1
1

Support timeout values when performing connect/accept operations. Bump default timeout to 10 minutes so folks have time to start the partnering application

Этот коммит содержится в:
Ralph Castain 2016-07-28 14:07:35 -07:00
родитель c281bd3c7f
Коммит cacb582ecd
6 изменённых файлов: 621 добавлений и 599 удалений

Просмотреть файл

@ -875,7 +875,7 @@ static int ompi_comm_allreduce_pmix_reduce_complete (ompi_comm_request_t *reques
/* this macro is not actually non-blocking. if a non-blocking version becomes available this function
* needs to be reworked to take advantage of it. */
OPAL_PMIX_EXCHANGE(rc, &info, &pdat, 60);
OPAL_PMIX_EXCHANGE(rc, &info, &pdat, 600); // give them 10 minutes
OBJ_DESTRUCT(&info);
if (OPAL_SUCCESS != rc) {
OBJ_DESTRUCT(&pdat);

Просмотреть файл

@ -211,7 +211,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
info.data.string = opal_argv_join(members, ':');
pdat.value.type = OPAL_STRING;
OPAL_PMIX_EXCHANGE(rc, &info, &pdat, 60);
OPAL_PMIX_EXCHANGE(rc, &info, &pdat, 600); // give them 10 minutes
OBJ_DESTRUCT(&info);
if (OPAL_SUCCESS != rc) {
OBJ_DESTRUCT(&pdat);

Просмотреть файл

@ -162,8 +162,7 @@ int opal_pmix_base_exchange(opal_value_t *indat,
info->type = OPAL_BOOL;
info->data.flag = true;
opal_list_append(&mlist, &info->super);
if (0 < timeout) {
/* give it a decent timeout as we don't know when
/* pass along the given timeout as we don't know when
* the other side will publish - it doesn't
* have to be simultaneous */
info = OBJ_NEW(opal_value_t);
@ -171,7 +170,6 @@ int opal_pmix_base_exchange(opal_value_t *indat,
info->type = OPAL_INT;
info->data.integer = timeout;
opal_list_append(&mlist, &info->super);
}
/* if a non-blocking version of lookup isn't
* available, then use the blocking version */

Просмотреть файл

@ -437,7 +437,8 @@ int pmix2x_get(const opal_process_name_t *proc, const char *key,
n=0;
OPAL_LIST_FOREACH(ival, info, opal_value_t) {
(void)strncpy(pinfo[n].key, ival->key, PMIX_MAX_KEYLEN);
pmix2x_value_load(&pinfo[n++].value, ival);
pmix2x_value_load(&pinfo[n].value, ival);
++n;
}
} else {
pinfo = NULL;
@ -534,6 +535,7 @@ int pmix2x_getnb(const opal_process_name_t *proc, const char *key,
OPAL_LIST_FOREACH(ival, info, opal_value_t) {
(void)strncpy(op->info[n].key, ival->key, PMIX_MAX_KEYLEN);
pmix2x_value_load(&op->info[n].value, ival);
++n;
}
}
}

Просмотреть файл

@ -151,11 +151,18 @@ static void eviction_cbfunc(struct opal_hotel_t *hotel,
int room_num, void *occupant)
{
pmix_server_req_t *req = (pmix_server_req_t*)occupant;
bool timeout = false;
int rc;
/* decrement the request timeout */
req->timeout -= orte_pmix_server_globals.timeout;
if (0 < req->timeout) {
if (req->timeout > 0) {
req->timeout -= orte_pmix_server_globals.timeout;
if (0 >= req->timeout) {
timeout = true;
}
}
if (!timeout) {
/* not done yet - check us back in */
if (OPAL_SUCCESS == (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) {
return;

Просмотреть файл

@ -156,13 +156,18 @@ int pmix_server_publish_fn(opal_process_name_t *proc,
return rc;
}
/* if we have items, pack those too - ignore persistence
/* if we have items, pack those too - ignore persistence, timeout
* and range values */
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
if (0 == strcmp(iptr->key, OPAL_PMIX_RANGE) ||
0 == strcmp(iptr->key, OPAL_PMIX_PERSISTENCE)) {
continue;
}
if (0 == strcmp(iptr->key, OPAL_PMIX_TIMEOUT)) {
/* record the timeout value, but don't pack it */
req->timeout = iptr->data.integer;
continue;
}
opal_output_verbose(5, orte_pmix_server_globals.output,
"%s publishing data %s of type %d from source %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), iptr->key, iptr->type,
@ -257,11 +262,16 @@ int pmix_server_lookup_fn(opal_process_name_t *proc, char **keys,
}
}
/* if we have items, pack those too - ignore range value */
/* if we have items, pack those too - ignore range and timeout value */
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
if (0 == strcmp(iptr->key, OPAL_PMIX_RANGE)) {
continue;
}
if (0 == strcmp(iptr->key, OPAL_PMIX_TIMEOUT)) {
/* record the timeout value, but don't pack it */
req->timeout = iptr->data.integer;
continue;
}
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &iptr, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(req);
@ -347,11 +357,16 @@ int pmix_server_unpublish_fn(opal_process_name_t *proc, char **keys,
}
}
/* if we have items, pack those too - ignore range value */
/* if we have items, pack those too - ignore range and timeout value */
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
if (0 == strcmp(iptr->key, OPAL_PMIX_RANGE)) {
continue;
}
if (0 == strcmp(iptr->key, OPAL_PMIX_TIMEOUT)) {
/* record the timeout value, but don't pack it */
req->timeout = iptr->data.integer;
continue;
}
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &iptr, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(req);