From 292983261a7372692b91403677d673edd45ec9a3 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 24 Oct 2017 06:42:39 -0700 Subject: [PATCH] We should never block when requesting dmodex data from the PMIx server as this will block it from being able to accept connections from local clients. Do not deregister standing dmodx requests when a fence completes unless we actually collected the data in the fence Signed-off-by: Ralph Castain --- .../pmix/pmix3x/pmix/src/server/pmix_server.c | 19 ++----------------- opal/mca/pmix/pmix3x/pmix3x_server_north.c | 8 ++++++-- 2 files changed, 8 insertions(+), 19 deletions(-) diff --git a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server.c index fc125df3d8..9fced62be5 100644 --- a/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix3x/pmix/src/server/pmix_server.c @@ -1019,10 +1019,8 @@ static void _dmodex_req(int sd, short args, void *cbdata) rc = PMIX_ERR_NOMEM; goto cleanup; } - PMIX_RETAIN(cd); dcd->cd = cd; pmix_list_append(&pmix_server_globals.remote_pnd, &dcd->super); - PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang return; } @@ -1051,13 +1049,7 @@ static void _dmodex_req(int sd, short args, void *cbdata) PMIX_DESTRUCT(&cb); PMIX_UNLOAD_BUFFER(&pbkt, data, sz); PMIX_DESTRUCT(&pbkt); - /* execute the callback */ - cd->cbfunc(rc, data, sz, cd->cbdata); - PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang - if (NULL != data) { - free(data); - } - return; + goto cleanup; } /* see if we have this peer in our list */ @@ -1072,10 +1064,8 @@ static void _dmodex_req(int sd, short args, void *cbdata) /* rank isn't known yet - defer * the request until we do */ dcd = PMIX_NEW(pmix_dmdx_remote_t); - PMIX_RETAIN(cd); dcd->cd = cd; pmix_list_append(&pmix_server_globals.remote_pnd, &dcd->super); - PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang return; } @@ -1085,10 +1075,8 @@ static void _dmodex_req(int sd, short args, void *cbdata) /* track the request so we can fulfill it once * data is recvd */ dcd = PMIX_NEW(pmix_dmdx_remote_t); - PMIX_RETAIN(cd); dcd->cd = cd; pmix_list_append(&pmix_server_globals.remote_pnd, &dcd->super); - PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang return; } @@ -1120,7 +1108,7 @@ static void _dmodex_req(int sd, short args, void *cbdata) if (NULL != data) { free(data); } - PMIX_WAKEUP_THREAD(&cd->lock); + PMIX_RELEASE(cd); } PMIX_EXPORT pmix_status_t PMIx_server_dmodex_request(const pmix_proc_t *proc, @@ -1154,9 +1142,6 @@ PMIX_EXPORT pmix_status_t PMIx_server_dmodex_request(const pmix_proc_t *proc, /* we have to push this into our event library to avoid * potential threading issues */ PMIX_THREADSHIFT(cd, _dmodex_req); - - PMIX_WAIT_THREAD(&cd->lock); - PMIX_RELEASE(cd); return PMIX_SUCCESS; } diff --git a/opal/mca/pmix/pmix3x/pmix3x_server_north.c b/opal/mca/pmix/pmix3x/pmix3x_server_north.c index f508385297..46bf652dbf 100644 --- a/opal/mca/pmix/pmix3x/pmix3x_server_north.c +++ b/opal/mca/pmix/pmix3x/pmix3x_server_north.c @@ -285,17 +285,21 @@ static void opmdx_response(int status, const char *data, size_t sz, void *cbdata opalcaddy->ocbdata = relcbdata; opalcaddy->mdxcbfunc(rc, data, sz, opalcaddy->cbdata, _data_release, opalcaddy); + } else { + OBJ_RELEASE(opalcaddy); + } + if (opal_pmix_collect_all_data) { /* if we were collecting all data, then check for any pending * dmodx requests that we cached and notify them that the * data has arrived */ OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); while (NULL != (dmdx = (opal_pmix3x_dmx_trkr_t*)opal_list_remove_first(&mca_pmix_pmix3x_component.dmdx))) { + OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); dmdx->cbfunc(PMIX_SUCCESS, NULL, 0, dmdx->cbdata, NULL, NULL); + OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock); OBJ_RELEASE(dmdx); } OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock); - } else { - OBJ_RELEASE(opalcaddy); } }