1
1

keep mxm context alive as long as pml_yalla component is open.

pml_yalla_del_comm may be called after yalla module is finalized, which
leads to invalid memory access if mxm context is already destroyed in
this point.
Этот коммит содержится в:
yosefe 2015-03-05 12:21:51 +02:00
родитель d6ae0a5e05
Коммит 976144dca7
3 изменённых файлов: 54 добавлений и 16 удалений

Просмотреть файл

@ -45,6 +45,8 @@ mca_pml_yalla_module_t ompi_pml_yalla = {
1ul << (sizeof(mxm_tag_t)*8 - 1) - 1, 1ul << (sizeof(mxm_tag_t)*8 - 1) - 1,
}, },
NULL, NULL,
NULL,
NULL,
NULL NULL
}; };
@ -94,14 +96,11 @@ static void mca_pml_yalla_mem_release_cb(void *buf, size_t length,
from_alloc ? MXM_MEM_UNMAP_MARK_INVALID : 0); from_alloc ? MXM_MEM_UNMAP_MARK_INVALID : 0);
} }
int mca_pml_yalla_init(void) int mca_pml_yalla_open(void)
{ {
mxm_context_opts_t *ctx_opts;
mxm_ep_opts_t *ep_opts;
mxm_error_t error; mxm_error_t error;
int rc;
PML_YALLA_VERBOSE(1, "mca_pml_yalla_init"); PML_YALLA_VERBOSE(1, "mca_pml_yalla_open");
/* Set memory hooks */ /* Set memory hooks */
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) == if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
@ -119,28 +118,54 @@ int mca_pml_yalla_init(void)
false, &environ); false, &environ);
/* Read options */ /* Read options */
error = mxm_config_read_opts(&ctx_opts, &ep_opts, "PML", NULL, 0); error = mxm_config_read_opts(&ompi_pml_yalla.ctx_opts, &ompi_pml_yalla.ep_opts,
"PML", NULL, 0);
if (MXM_OK != error) { if (MXM_OK != error) {
return OMPI_ERROR; return OMPI_ERROR;
} }
error = mxm_init(ctx_opts, &ompi_pml_yalla.mxm_context); error = mxm_init(ompi_pml_yalla.ctx_opts, &ompi_pml_yalla.mxm_context);
if (MXM_OK != error) { if (MXM_OK != error) {
return OMPI_ERROR; return OMPI_ERROR;
} }
return OMPI_SUCCESS;
}
int mca_pml_yalla_close(void)
{
PML_YALLA_VERBOSE(1, "mca_pml_yalla_close");
if (ompi_pml_yalla.ctx_opts != NULL) {
mxm_config_free_context_opts(ompi_pml_yalla.ctx_opts);
}
if (ompi_pml_yalla.ep_opts != NULL) {
mxm_config_free_ep_opts(ompi_pml_yalla.ep_opts);
}
if (ompi_pml_yalla.mxm_context != NULL) {
mxm_cleanup(ompi_pml_yalla.mxm_context);
ompi_pml_yalla.mxm_context = NULL;
}
return 0;
}
int mca_pml_yalla_init(void)
{
mxm_error_t error;
int rc;
PML_YALLA_VERBOSE(1, "mca_pml_yalla_init");
if (ompi_pml_yalla.using_mem_hooks) { if (ompi_pml_yalla.using_mem_hooks) {
opal_mem_hooks_register_release(mca_pml_yalla_mem_release_cb, NULL); opal_mem_hooks_register_release(mca_pml_yalla_mem_release_cb, NULL);
} }
error = mxm_ep_create(ompi_pml_yalla.mxm_context, ep_opts, &ompi_pml_yalla.mxm_ep); error = mxm_ep_create(ompi_pml_yalla.mxm_context, ompi_pml_yalla.ep_opts,
&ompi_pml_yalla.mxm_ep);
if (MXM_OK != error) { if (MXM_OK != error) {
return OMPI_ERROR; return OMPI_ERROR;
} }
mxm_config_free_context_opts(ctx_opts);
mxm_config_free_ep_opts(ep_opts);
rc = send_ep_address(); rc = send_ep_address();
if (rc < 0) { if (rc < 0) {
return rc; return rc;
@ -176,10 +201,7 @@ int mca_pml_yalla_cleanup(void)
if (ompi_pml_yalla.using_mem_hooks) { if (ompi_pml_yalla.using_mem_hooks) {
opal_mem_hooks_unregister_release(mca_pml_yalla_mem_release_cb); opal_mem_hooks_unregister_release(mca_pml_yalla_mem_release_cb);
} }
if (ompi_pml_yalla.mxm_context) {
mxm_cleanup(ompi_pml_yalla.mxm_context);
ompi_pml_yalla.mxm_context = NULL;
}
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
@ -277,6 +299,11 @@ int mca_pml_yalla_del_comm(struct ompi_communicator_t* comm)
{ {
mxm_mq_h mq = (void*)comm->c_pml_comm; mxm_mq_h mq = (void*)comm->c_pml_comm;
if (ompi_pml_yalla.mxm_context == NULL) {
PML_YALLA_ERROR("Destroying communicator after MXM context is destroyed");
return OMPI_ERROR;
}
PML_YALLA_VERBOSE(2, "destroying mq ctxid %d of comm %s", comm->c_contextid, PML_YALLA_VERBOSE(2, "destroying mq ctxid %d of comm %s", comm->c_contextid,
comm->c_name); comm->c_name);
mxm_mq_destroy(mq); mxm_mq_destroy(mq);

Просмотреть файл

@ -37,6 +37,8 @@ struct mca_pml_yalla_module {
mca_pml_base_module_t super; mca_pml_base_module_t super;
/* MXM global objects */ /* MXM global objects */
mxm_context_opts_t *ctx_opts;
mxm_ep_opts_t *ep_opts;
mxm_h mxm_context; mxm_h mxm_context;
mxm_ep_h mxm_ep; mxm_ep_h mxm_ep;
@ -79,6 +81,8 @@ extern mca_pml_yalla_module_t ompi_pml_yalla;
__FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__); \ __FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__); \
} }
int mca_pml_yalla_open(void);
int mca_pml_yalla_close(void);
int mca_pml_yalla_init(void); int mca_pml_yalla_init(void);
int mca_pml_yalla_cleanup(void); int mca_pml_yalla_cleanup(void);

Просмотреть файл

@ -69,11 +69,18 @@ static int mca_pml_yalla_component_open(void)
{ {
ompi_pml_yalla.output = opal_output_open(NULL); ompi_pml_yalla.output = opal_output_open(NULL);
opal_output_set_verbosity(ompi_pml_yalla.output, ompi_pml_yalla.verbose); opal_output_set_verbosity(ompi_pml_yalla.output, ompi_pml_yalla.verbose);
return 0; return mca_pml_yalla_open();
} }
static int mca_pml_yalla_component_close(void) static int mca_pml_yalla_component_close(void)
{ {
int rc;
rc = mca_pml_yalla_close();
if (rc != 0) {
return rc;
}
opal_output_close(ompi_pml_yalla.output); opal_output_close(ompi_pml_yalla.output);
return 0; return 0;
} }