1
1

Merge pull request #3281 from rhc54/topic/dmx

Adjust the timeout for direct modex requests to reflect the size of t…
Этот коммит содержится в:
Ralph Castain 2017-04-04 19:04:33 -07:00 коммит произвёл GitHub
родитель 9cb18b8348 734b90aa6b
Коммит 9132bb26fe
3 изменённых файлов: 20 добавлений и 1 удалений

Просмотреть файл

@ -536,6 +536,9 @@ static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender,
req->proxy = *sender;
req->target = idreq;
req->remote_room_num = room_num;
/* adjust the timeout to reflect the size of the job as it can take some
* amount of time to start the job */
ORTE_ADJUST_TIMEOUT(req);
if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) {
OBJ_RELEASE(req);
send_error(rc, &idreq, sender);
@ -558,6 +561,9 @@ static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender,
req->proxy = *sender;
req->target = idreq;
req->remote_room_num = room_num;
/* adjust the timeout to reflect the size of the job as it can take some
* amount of time to start the job */
ORTE_ADJUST_TIMEOUT(req);
if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) {
OBJ_RELEASE(req);
send_error(rc, &idreq, sender);

Просмотреть файл

@ -13,7 +13,7 @@
* All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2014-2017 Research Organization for Information Science
@ -148,6 +148,10 @@ static void dmodex_req(int sd, short args, void *cbdata)
return;
}
/* adjust the timeout to reflect the size of the job as it can take some
* amount of time to start the job */
ORTE_ADJUST_TIMEOUT(req);
/* has anyone already requested data for this target? If so,
* then the data is already on its way */
for (rnum=0; rnum < orte_pmix_server_globals.reqs.num_rooms; rnum++) {

Просмотреть файл

@ -48,6 +48,15 @@
BEGIN_C_DECLS
#define ORTED_PMIX_MIN_DMX_TIMEOUT 10
#define ORTE_ADJUST_TIMEOUT(a) \
do { \
(a)->timeout = (2 * orte_process_info.num_daemons) / 1000; \
if ((a)->timeout < ORTED_PMIX_MIN_DMX_TIMEOUT) { \
(a)->timeout = ORTED_PMIX_MIN_DMX_TIMEOUT; \
} \
} while(0)
/* object for tracking requests so we can
* correctly route the eventual reply */
typedef struct {