CSCul95082: DMAR faults during mtt testing
usnic_channel_finalize() was deregistering recv buffers before destroying the QP to which they were posted. The QP needs to be destroyed first so that the NIC does not attemp tto write to deregistered memory, causing the DMAR messages. Submitted by Reese, reviewed by Jeff. cmr=v1.7.4:reviewer=ompi-rm1.7 This commit was SVN r29963.
Этот коммит содержится в:
родитель
d8c0c919e1
Коммит
515fd00411
@ -931,6 +931,8 @@ static int usnic_finalize(struct mca_btl_base_module_t* btl)
|
||||
OBJ_DESTRUCT(&module->chunk_segs);
|
||||
OBJ_DESTRUCT(&module->senders);
|
||||
|
||||
mca_mpool_base_module_destroy(module->super.btl_mpool);
|
||||
|
||||
/* destroy the PD after all the CQs and AHs have been destroyed, otherwise
|
||||
* we get a minor leak in libusnic_verbs */
|
||||
rc = ibv_dealloc_pd(module->pd);
|
||||
@ -943,8 +945,6 @@ static int usnic_finalize(struct mca_btl_base_module_t* btl)
|
||||
BTL_ERROR(("failed to ibv_close_device"));
|
||||
}
|
||||
|
||||
mca_mpool_base_module_destroy(module->super.btl_mpool);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -1446,8 +1446,8 @@ static int usnic_dereg_mr(void* reg_data,
|
||||
|
||||
if (ud_reg->mr != NULL) {
|
||||
if (ibv_dereg_mr(ud_reg->mr)) {
|
||||
opal_output(0, "%s: error unpinning UD memory: %s\n",
|
||||
__func__, strerror(errno));
|
||||
opal_output(0, "%s: error unpinning UD memory mr=%p: %s\n",
|
||||
__func__, ud_reg->mr, strerror(errno));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
@ -1670,13 +1670,6 @@ ompi_btl_usnic_channel_finalize(
|
||||
ompi_btl_usnic_module_t *module,
|
||||
struct ompi_btl_usnic_channel_t *channel)
|
||||
{
|
||||
/* gets set right after constructor called, lets us know recv_segs
|
||||
* have been constructed
|
||||
*/
|
||||
if (channel->recv_segs.ctx == module) {
|
||||
OBJ_DESTRUCT(&channel->recv_segs);
|
||||
}
|
||||
|
||||
if (NULL != channel->qp) {
|
||||
ibv_destroy_qp(channel->qp);
|
||||
channel->qp = NULL;
|
||||
@ -1687,6 +1680,15 @@ ompi_btl_usnic_channel_finalize(
|
||||
ibv_destroy_cq(channel->cq);
|
||||
channel->cq = NULL;
|
||||
}
|
||||
|
||||
/* gets set right after constructor called, lets us know recv_segs
|
||||
* have been constructed. Make sure to wait until queues destroyed to destroy
|
||||
* the recv_segs
|
||||
*/
|
||||
if (channel->recv_segs.ctx == module) {
|
||||
assert(NULL == channel->qp && NULL == channel->cq);
|
||||
OBJ_DESTRUCT(&channel->recv_segs);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user