Support timeout values when performing connect/accept operations. Bump default timeout to 10 minutes so folks have time to start the partnering application
Этот коммит содержится в:
родитель
c281bd3c7f
Коммит
cacb582ecd
@ -875,7 +875,7 @@ static int ompi_comm_allreduce_pmix_reduce_complete (ompi_comm_request_t *reques
|
||||
|
||||
/* this macro is not actually non-blocking. if a non-blocking version becomes available this function
|
||||
* needs to be reworked to take advantage of it. */
|
||||
OPAL_PMIX_EXCHANGE(rc, &info, &pdat, 60);
|
||||
OPAL_PMIX_EXCHANGE(rc, &info, &pdat, 600); // give them 10 minutes
|
||||
OBJ_DESTRUCT(&info);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
OBJ_DESTRUCT(&pdat);
|
||||
|
@ -211,7 +211,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
|
||||
info.data.string = opal_argv_join(members, ':');
|
||||
pdat.value.type = OPAL_STRING;
|
||||
|
||||
OPAL_PMIX_EXCHANGE(rc, &info, &pdat, 60);
|
||||
OPAL_PMIX_EXCHANGE(rc, &info, &pdat, 600); // give them 10 minutes
|
||||
OBJ_DESTRUCT(&info);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
OBJ_DESTRUCT(&pdat);
|
||||
|
@ -162,8 +162,7 @@ int opal_pmix_base_exchange(opal_value_t *indat,
|
||||
info->type = OPAL_BOOL;
|
||||
info->data.flag = true;
|
||||
opal_list_append(&mlist, &info->super);
|
||||
if (0 < timeout) {
|
||||
/* give it a decent timeout as we don't know when
|
||||
/* pass along the given timeout as we don't know when
|
||||
* the other side will publish - it doesn't
|
||||
* have to be simultaneous */
|
||||
info = OBJ_NEW(opal_value_t);
|
||||
@ -171,7 +170,6 @@ int opal_pmix_base_exchange(opal_value_t *indat,
|
||||
info->type = OPAL_INT;
|
||||
info->data.integer = timeout;
|
||||
opal_list_append(&mlist, &info->super);
|
||||
}
|
||||
|
||||
/* if a non-blocking version of lookup isn't
|
||||
* available, then use the blocking version */
|
||||
|
@ -437,7 +437,8 @@ int pmix2x_get(const opal_process_name_t *proc, const char *key,
|
||||
n=0;
|
||||
OPAL_LIST_FOREACH(ival, info, opal_value_t) {
|
||||
(void)strncpy(pinfo[n].key, ival->key, PMIX_MAX_KEYLEN);
|
||||
pmix2x_value_load(&pinfo[n++].value, ival);
|
||||
pmix2x_value_load(&pinfo[n].value, ival);
|
||||
++n;
|
||||
}
|
||||
} else {
|
||||
pinfo = NULL;
|
||||
@ -534,6 +535,7 @@ int pmix2x_getnb(const opal_process_name_t *proc, const char *key,
|
||||
OPAL_LIST_FOREACH(ival, info, opal_value_t) {
|
||||
(void)strncpy(op->info[n].key, ival->key, PMIX_MAX_KEYLEN);
|
||||
pmix2x_value_load(&op->info[n].value, ival);
|
||||
++n;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -151,11 +151,18 @@ static void eviction_cbfunc(struct opal_hotel_t *hotel,
|
||||
int room_num, void *occupant)
|
||||
{
|
||||
pmix_server_req_t *req = (pmix_server_req_t*)occupant;
|
||||
bool timeout = false;
|
||||
int rc;
|
||||
|
||||
/* decrement the request timeout */
|
||||
req->timeout -= orte_pmix_server_globals.timeout;
|
||||
if (0 < req->timeout) {
|
||||
if (req->timeout > 0) {
|
||||
req->timeout -= orte_pmix_server_globals.timeout;
|
||||
if (0 >= req->timeout) {
|
||||
timeout = true;
|
||||
}
|
||||
}
|
||||
if (!timeout) {
|
||||
/* not done yet - check us back in */
|
||||
if (OPAL_SUCCESS == (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) {
|
||||
return;
|
||||
|
@ -156,13 +156,18 @@ int pmix_server_publish_fn(opal_process_name_t *proc,
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* if we have items, pack those too - ignore persistence
|
||||
/* if we have items, pack those too - ignore persistence, timeout
|
||||
* and range values */
|
||||
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
|
||||
if (0 == strcmp(iptr->key, OPAL_PMIX_RANGE) ||
|
||||
0 == strcmp(iptr->key, OPAL_PMIX_PERSISTENCE)) {
|
||||
continue;
|
||||
}
|
||||
if (0 == strcmp(iptr->key, OPAL_PMIX_TIMEOUT)) {
|
||||
/* record the timeout value, but don't pack it */
|
||||
req->timeout = iptr->data.integer;
|
||||
continue;
|
||||
}
|
||||
opal_output_verbose(5, orte_pmix_server_globals.output,
|
||||
"%s publishing data %s of type %d from source %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), iptr->key, iptr->type,
|
||||
@ -257,11 +262,16 @@ int pmix_server_lookup_fn(opal_process_name_t *proc, char **keys,
|
||||
}
|
||||
}
|
||||
|
||||
/* if we have items, pack those too - ignore range value */
|
||||
/* if we have items, pack those too - ignore range and timeout value */
|
||||
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
|
||||
if (0 == strcmp(iptr->key, OPAL_PMIX_RANGE)) {
|
||||
continue;
|
||||
}
|
||||
if (0 == strcmp(iptr->key, OPAL_PMIX_TIMEOUT)) {
|
||||
/* record the timeout value, but don't pack it */
|
||||
req->timeout = iptr->data.integer;
|
||||
continue;
|
||||
}
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &iptr, 1, OPAL_VALUE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(req);
|
||||
@ -347,11 +357,16 @@ int pmix_server_unpublish_fn(opal_process_name_t *proc, char **keys,
|
||||
}
|
||||
}
|
||||
|
||||
/* if we have items, pack those too - ignore range value */
|
||||
/* if we have items, pack those too - ignore range and timeout value */
|
||||
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
|
||||
if (0 == strcmp(iptr->key, OPAL_PMIX_RANGE)) {
|
||||
continue;
|
||||
}
|
||||
if (0 == strcmp(iptr->key, OPAL_PMIX_TIMEOUT)) {
|
||||
/* record the timeout value, but don't pack it */
|
||||
req->timeout = iptr->data.integer;
|
||||
continue;
|
||||
}
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &iptr, 1, OPAL_VALUE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(req);
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user