We should never block when requesting dmodex data from the PMIx server as this will block it from being able to accept connections from local clients. Do not deregister standing dmodx requests when a fence completes unless we actually collected the data in the fence
Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
родитель
70c455938b
Коммит
292983261a
@ -1019,10 +1019,8 @@ static void _dmodex_req(int sd, short args, void *cbdata)
|
||||
rc = PMIX_ERR_NOMEM;
|
||||
goto cleanup;
|
||||
}
|
||||
PMIX_RETAIN(cd);
|
||||
dcd->cd = cd;
|
||||
pmix_list_append(&pmix_server_globals.remote_pnd, &dcd->super);
|
||||
PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1051,13 +1049,7 @@ static void _dmodex_req(int sd, short args, void *cbdata)
|
||||
PMIX_DESTRUCT(&cb);
|
||||
PMIX_UNLOAD_BUFFER(&pbkt, data, sz);
|
||||
PMIX_DESTRUCT(&pbkt);
|
||||
/* execute the callback */
|
||||
cd->cbfunc(rc, data, sz, cd->cbdata);
|
||||
PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang
|
||||
if (NULL != data) {
|
||||
free(data);
|
||||
}
|
||||
return;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* see if we have this peer in our list */
|
||||
@ -1072,10 +1064,8 @@ static void _dmodex_req(int sd, short args, void *cbdata)
|
||||
/* rank isn't known yet - defer
|
||||
* the request until we do */
|
||||
dcd = PMIX_NEW(pmix_dmdx_remote_t);
|
||||
PMIX_RETAIN(cd);
|
||||
dcd->cd = cd;
|
||||
pmix_list_append(&pmix_server_globals.remote_pnd, &dcd->super);
|
||||
PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1085,10 +1075,8 @@ static void _dmodex_req(int sd, short args, void *cbdata)
|
||||
/* track the request so we can fulfill it once
|
||||
* data is recvd */
|
||||
dcd = PMIX_NEW(pmix_dmdx_remote_t);
|
||||
PMIX_RETAIN(cd);
|
||||
dcd->cd = cd;
|
||||
pmix_list_append(&pmix_server_globals.remote_pnd, &dcd->super);
|
||||
PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1120,7 +1108,7 @@ static void _dmodex_req(int sd, short args, void *cbdata)
|
||||
if (NULL != data) {
|
||||
free(data);
|
||||
}
|
||||
PMIX_WAKEUP_THREAD(&cd->lock);
|
||||
PMIX_RELEASE(cd);
|
||||
}
|
||||
|
||||
PMIX_EXPORT pmix_status_t PMIx_server_dmodex_request(const pmix_proc_t *proc,
|
||||
@ -1154,9 +1142,6 @@ PMIX_EXPORT pmix_status_t PMIx_server_dmodex_request(const pmix_proc_t *proc,
|
||||
/* we have to push this into our event library to avoid
|
||||
* potential threading issues */
|
||||
PMIX_THREADSHIFT(cd, _dmodex_req);
|
||||
|
||||
PMIX_WAIT_THREAD(&cd->lock);
|
||||
PMIX_RELEASE(cd);
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -285,17 +285,21 @@ static void opmdx_response(int status, const char *data, size_t sz, void *cbdata
|
||||
opalcaddy->ocbdata = relcbdata;
|
||||
opalcaddy->mdxcbfunc(rc, data, sz, opalcaddy->cbdata,
|
||||
_data_release, opalcaddy);
|
||||
} else {
|
||||
OBJ_RELEASE(opalcaddy);
|
||||
}
|
||||
if (opal_pmix_collect_all_data) {
|
||||
/* if we were collecting all data, then check for any pending
|
||||
* dmodx requests that we cached and notify them that the
|
||||
* data has arrived */
|
||||
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
|
||||
while (NULL != (dmdx = (opal_pmix3x_dmx_trkr_t*)opal_list_remove_first(&mca_pmix_pmix3x_component.dmdx))) {
|
||||
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
||||
dmdx->cbfunc(PMIX_SUCCESS, NULL, 0, dmdx->cbdata, NULL, NULL);
|
||||
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
|
||||
OBJ_RELEASE(dmdx);
|
||||
}
|
||||
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
||||
} else {
|
||||
OBJ_RELEASE(opalcaddy);
|
||||
}
|
||||
}
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user