1
1

We should never block when requesting dmodex data from the PMIx server as this will block it from being able to accept connections from local clients. Do not deregister standing dmodx requests when a fence completes unless we actually collected the data in the fence

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2017-10-24 06:42:39 -07:00
родитель 70c455938b
Коммит 292983261a
2 изменённых файлов: 8 добавлений и 19 удалений

Просмотреть файл

@ -1019,10 +1019,8 @@ static void _dmodex_req(int sd, short args, void *cbdata)
rc = PMIX_ERR_NOMEM;
goto cleanup;
}
PMIX_RETAIN(cd);
dcd->cd = cd;
pmix_list_append(&pmix_server_globals.remote_pnd, &dcd->super);
PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang
return;
}
@ -1051,13 +1049,7 @@ static void _dmodex_req(int sd, short args, void *cbdata)
PMIX_DESTRUCT(&cb);
PMIX_UNLOAD_BUFFER(&pbkt, data, sz);
PMIX_DESTRUCT(&pbkt);
/* execute the callback */
cd->cbfunc(rc, data, sz, cd->cbdata);
PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang
if (NULL != data) {
free(data);
}
return;
goto cleanup;
}
/* see if we have this peer in our list */
@ -1072,10 +1064,8 @@ static void _dmodex_req(int sd, short args, void *cbdata)
/* rank isn't known yet - defer
* the request until we do */
dcd = PMIX_NEW(pmix_dmdx_remote_t);
PMIX_RETAIN(cd);
dcd->cd = cd;
pmix_list_append(&pmix_server_globals.remote_pnd, &dcd->super);
PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang
return;
}
@ -1085,10 +1075,8 @@ static void _dmodex_req(int sd, short args, void *cbdata)
/* track the request so we can fulfill it once
* data is recvd */
dcd = PMIX_NEW(pmix_dmdx_remote_t);
PMIX_RETAIN(cd);
dcd->cd = cd;
pmix_list_append(&pmix_server_globals.remote_pnd, &dcd->super);
PMIX_WAKEUP_THREAD(&cd->lock); // ensure the request doesn't hang
return;
}
@ -1120,7 +1108,7 @@ static void _dmodex_req(int sd, short args, void *cbdata)
if (NULL != data) {
free(data);
}
PMIX_WAKEUP_THREAD(&cd->lock);
PMIX_RELEASE(cd);
}
PMIX_EXPORT pmix_status_t PMIx_server_dmodex_request(const pmix_proc_t *proc,
@ -1154,9 +1142,6 @@ PMIX_EXPORT pmix_status_t PMIx_server_dmodex_request(const pmix_proc_t *proc,
/* we have to push this into our event library to avoid
* potential threading issues */
PMIX_THREADSHIFT(cd, _dmodex_req);
PMIX_WAIT_THREAD(&cd->lock);
PMIX_RELEASE(cd);
return PMIX_SUCCESS;
}

Просмотреть файл

@ -285,17 +285,21 @@ static void opmdx_response(int status, const char *data, size_t sz, void *cbdata
opalcaddy->ocbdata = relcbdata;
opalcaddy->mdxcbfunc(rc, data, sz, opalcaddy->cbdata,
_data_release, opalcaddy);
} else {
OBJ_RELEASE(opalcaddy);
}
if (opal_pmix_collect_all_data) {
/* if we were collecting all data, then check for any pending
* dmodx requests that we cached and notify them that the
* data has arrived */
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
while (NULL != (dmdx = (opal_pmix3x_dmx_trkr_t*)opal_list_remove_first(&mca_pmix_pmix3x_component.dmdx))) {
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
dmdx->cbfunc(PMIX_SUCCESS, NULL, 0, dmdx->cbdata, NULL, NULL);
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
OBJ_RELEASE(dmdx);
}
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
} else {
OBJ_RELEASE(opalcaddy);
}
}