1
1

CSCul95082: DMAR faults during mtt testing

usnic_channel_finalize() was deregistering recv buffers before
destroying the QP to which they were posted. The QP needs to be
destroyed first so that the NIC does not attemp tto write to
deregistered memory, causing the DMAR messages.

Submitted by Reese, reviewed by Jeff.

cmr=v1.7.4:reviewer=ompi-rm1.7

This commit was SVN r29963.
Этот коммит содержится в:
Jeff Squyres 2013-12-19 00:01:35 +00:00
родитель d8c0c919e1
Коммит 515fd00411

Просмотреть файл

@ -931,6 +931,8 @@ static int usnic_finalize(struct mca_btl_base_module_t* btl)
OBJ_DESTRUCT(&module->chunk_segs);
OBJ_DESTRUCT(&module->senders);
mca_mpool_base_module_destroy(module->super.btl_mpool);
/* destroy the PD after all the CQs and AHs have been destroyed, otherwise
* we get a minor leak in libusnic_verbs */
rc = ibv_dealloc_pd(module->pd);
@ -943,8 +945,6 @@ static int usnic_finalize(struct mca_btl_base_module_t* btl)
BTL_ERROR(("failed to ibv_close_device"));
}
mca_mpool_base_module_destroy(module->super.btl_mpool);
return OMPI_SUCCESS;
}
@ -1446,8 +1446,8 @@ static int usnic_dereg_mr(void* reg_data,
if (ud_reg->mr != NULL) {
if (ibv_dereg_mr(ud_reg->mr)) {
opal_output(0, "%s: error unpinning UD memory: %s\n",
__func__, strerror(errno));
opal_output(0, "%s: error unpinning UD memory mr=%p: %s\n",
__func__, ud_reg->mr, strerror(errno));
return OMPI_ERROR;
}
}
@ -1670,13 +1670,6 @@ ompi_btl_usnic_channel_finalize(
ompi_btl_usnic_module_t *module,
struct ompi_btl_usnic_channel_t *channel)
{
/* gets set right after constructor called, lets us know recv_segs
* have been constructed
*/
if (channel->recv_segs.ctx == module) {
OBJ_DESTRUCT(&channel->recv_segs);
}
if (NULL != channel->qp) {
ibv_destroy_qp(channel->qp);
channel->qp = NULL;
@ -1687,6 +1680,15 @@ ompi_btl_usnic_channel_finalize(
ibv_destroy_cq(channel->cq);
channel->cq = NULL;
}
/* gets set right after constructor called, lets us know recv_segs
* have been constructed. Make sure to wait until queues destroyed to destroy
* the recv_segs
*/
if (channel->recv_segs.ctx == module) {
assert(NULL == channel->qp && NULL == channel->cq);
OBJ_DESTRUCT(&channel->recv_segs);
}
}
/*