1
1

Fix mca_btl_ofi_finalize clean-up logic

This fix is from John L. Byrne (john.l.byrne@hpe.com).

When OFI Libfabric binds objects to endpoints, before the object can
be successfully closed, the endpoint must first be freed.  For scalable
endpoints, objects can also be bound to transmit and receive contexts,
and for objects that are bound to contexts, we need to first free the
contexts before freeing the endpoint. We also need to clear the memory
registration cache.

If we don't clean up properly, then fi\_close may not be able to close
the domain because the dom will have a non-zero ref count.

Signed-off-by: harumi kuno <harumi.kuno@hpe.com>
Этот коммит содержится в:
harumi kuno 2020-03-04 17:51:08 -07:00
родитель 77bf3f08f5
Коммит 3095fabf94
2 изменённых файлов: 24 добавлений и 13 удалений

Просмотреть файл

@ -581,6 +581,10 @@ static int mca_btl_ofi_init_device(struct fi_info *info)
fail:
/* clean up */
if (NULL != ep && !module->is_scalable_ep) {
fi_close(&ep->fid);
}
/* if the contexts have not been initiated, num_contexts should
* be zero and we skip this. */
for (int i=0; i < module->num_contexts; i++) {
@ -588,14 +592,14 @@ fail:
}
free(module->contexts);
if (NULL != av) {
fi_close(&av->fid);
}
if (NULL != ep) {
fi_close(&ep->fid);
}
if (NULL != av) {
fi_close(&av->fid);
}
if (NULL != domain) {
fi_close(&domain->fid);
}

Просмотреть файл

@ -277,20 +277,32 @@ int mca_btl_ofi_finalize (mca_btl_base_module_t* btl)
assert(btl);
/* clear the rcache */
if (ofi_btl->rcache) {
mca_rcache_base_module_destroy (ofi_btl->rcache);
ofi_btl->rcache = NULL;
}
/* For a standard ep, we need to close the ep first. */
if (NULL != ofi_btl->ofi_endpoint && !ofi_btl->is_scalable_ep) {
fi_close(&ofi_btl->ofi_endpoint->fid);
ofi_btl->ofi_endpoint = NULL;
}
/* loop over all the contexts */
for (i=0; i < ofi_btl->num_contexts; i++) {
mca_btl_ofi_context_finalize(&ofi_btl->contexts[i], ofi_btl->is_scalable_ep);
}
free(ofi_btl->contexts);
if (NULL != ofi_btl->av) {
fi_close(&ofi_btl->av->fid);
}
if (NULL != ofi_btl->ofi_endpoint) {
fi_close(&ofi_btl->ofi_endpoint->fid);
}
if (NULL != ofi_btl->av) {
fi_close(&ofi_btl->av->fid);
}
if (NULL != ofi_btl->domain) {
fi_close(&ofi_btl->domain->fid);
}
@ -313,11 +325,6 @@ int mca_btl_ofi_finalize (mca_btl_base_module_t* btl)
OBJ_DESTRUCT(&ofi_btl->id_to_endpoint);
OBJ_DESTRUCT(&ofi_btl->module_lock);
if (ofi_btl->rcache) {
mca_rcache_base_module_destroy (ofi_btl->rcache);
ofi_btl->rcache = NULL;
}
free (btl);
return OPAL_SUCCESS;