From b7e9711f4520b202a5c7d5b6b72f263aa4179e27 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 4 Apr 2017 21:09:02 -0700 Subject: [PATCH] Resolve the direct modex race condition. The request hotel was running out of rooms, thereby returning an error upon checkin - and we had missed error_logging a couple of those places. Hence no error message and things just hung. Output a (hopefully) helpful message when we timeout an operation Thanks to Nathan for tracking it down. Signed-off-by: Ralph Castain --- orte/orted/help-orted.txt | 10 +++++++++- orte/orted/pmix/pmix_server.c | 27 ++++++++++++++++++++++++-- orte/orted/pmix/pmix_server_internal.h | 3 +++ orte/orted/pmix/pmix_server_pub.c | 6 ++++-- 4 files changed, 41 insertions(+), 5 deletions(-) diff --git a/orte/orted/help-orted.txt b/orte/orted/help-orted.txt index fb271f90d8..6ab53cba82 100644 --- a/orte/orted/help-orted.txt +++ b/orte/orted/help-orted.txt @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2014-2015 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2017 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -60,3 +60,11 @@ info key: key: %s The operation will continue, but may not behave completely as expected. +# +[timedout] +A request has timed out and will therefore fail: + + Operation: %s + +Your job may terminate as a result of this problem. You may want to +adjust the MCA parameter pmix_server_max_wait and try again. diff --git a/orte/orted/pmix/pmix_server.c b/orte/orted/pmix/pmix_server.c index 8754ded276..f8d81025ec 100644 --- a/orte/orted/pmix/pmix_server.c +++ b/orte/orted/pmix/pmix_server.c @@ -83,6 +83,8 @@ static void pmix_server_dmdx_resp(int status, orte_process_name_t* sender, opal_buffer_t *buffer, orte_rml_tag_t tg, void *cbdata); +#define ORTE_PMIX_SERVER_MIN_ROOMS 4096 + pmix_server_globals_t orte_pmix_server_globals = {0}; static opal_pmix_server_module_t pmix_server = { @@ -122,7 +124,7 @@ void pmix_server_register_params(void) orte_pmix_server_globals.verbosity); } /* specify the size of the hotel */ - orte_pmix_server_globals.num_rooms = 256; + orte_pmix_server_globals.num_rooms = -1; (void) mca_base_var_register ("orte", "pmix", NULL, "server_max_reqs", "Maximum number of backlogged PMIx server direct modex requests", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, @@ -158,7 +160,7 @@ static void eviction_cbfunc(struct opal_hotel_t *hotel, { pmix_server_req_t *req = (pmix_server_req_t*)occupant; bool timeout = false; - int rc; + int rc=OPAL_ERR_TIMEOUT; /* decrement the request timeout */ req->timeout -= orte_pmix_server_globals.timeout; @@ -175,6 +177,8 @@ static void eviction_cbfunc(struct opal_hotel_t *hotel, } ORTE_ERROR_LOG(rc); /* fall thru and return an error so the caller doesn't hang */ + } else { + orte_show_help("help-orted.txt", "timedout", true, req->operation); } /* don't let the caller hang */ if (NULL != req->opcbfunc) { @@ -205,6 +209,17 @@ int pmix_server_init(void) /* setup the server's state variables */ OBJ_CONSTRUCT(&orte_pmix_server_globals.reqs, opal_hotel_t); + /* by the time we init the server, we should know how many nodes we + * have in our environment - with the exception of mpirun. If the + * user specified the size of the hotel, then use that value. Otherwise, + * set the value to something large to avoid running out of rooms on + * large machines */ + if (-1 == orte_pmix_server_globals.num_rooms) { + orte_pmix_server_globals.num_rooms = orte_process_info.num_procs * 2; + if (orte_pmix_server_globals.num_rooms < ORTE_PMIX_SERVER_MIN_ROOMS) { + orte_pmix_server_globals.num_rooms = ORTE_PMIX_SERVER_MIN_ROOMS; + } + } if (OPAL_SUCCESS != (rc = opal_hotel_init(&orte_pmix_server_globals.reqs, orte_pmix_server_globals.num_rooms, orte_event_base, orte_pmix_server_globals.timeout*1000000, @@ -533,6 +548,7 @@ static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender, * condition, so just log the request and we will fill * it later */ req = OBJ_NEW(pmix_server_req_t); + (void)asprintf(&req->operation, "DMDX: %s:%d", __FILE__, __LINE__); req->proxy = *sender; req->target = idreq; req->remote_room_num = room_num; @@ -540,6 +556,7 @@ static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender, * amount of time to start the job */ ORTE_ADJUST_TIMEOUT(req); if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) { + ORTE_ERROR_LOG(rc); OBJ_RELEASE(req); send_error(rc, &idreq, sender); } @@ -558,6 +575,7 @@ static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender, /* track the request since the call down to the PMIx server * is asynchronous */ req = OBJ_NEW(pmix_server_req_t); + (void)asprintf(&req->operation, "DMDX: %s:%d", __FILE__, __LINE__); req->proxy = *sender; req->target = idreq; req->remote_room_num = room_num; @@ -565,6 +583,7 @@ static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender, * amount of time to start the job */ ORTE_ADJUST_TIMEOUT(req); if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) { + ORTE_ERROR_LOG(rc); OBJ_RELEASE(req); send_error(rc, &idreq, sender); return; @@ -696,6 +715,7 @@ OBJ_CLASS_INSTANCE(orte_pmix_server_op_caddy_t, static void rqcon(pmix_server_req_t *p) { + p->operation = NULL; p->target = *ORTE_NAME_INVALID; p->proxy = *ORTE_NAME_INVALID; p->timeout = orte_pmix_server_globals.timeout; @@ -710,6 +730,9 @@ static void rqcon(pmix_server_req_t *p) } static void rqdes(pmix_server_req_t *p) { + if (NULL != p->operation) { + free(p->operation); + } if (NULL != p->jdata) { OBJ_RELEASE(p->jdata); } diff --git a/orte/orted/pmix/pmix_server_internal.h b/orte/orted/pmix/pmix_server_internal.h index 53da91595c..5712529b5c 100644 --- a/orte/orted/pmix/pmix_server_internal.h +++ b/orte/orted/pmix/pmix_server_internal.h @@ -62,6 +62,7 @@ typedef struct { opal_object_t super; opal_event_t ev; + char *operation; int status; int timeout; int room_num; @@ -109,6 +110,7 @@ OBJ_CLASS_DECLARATION(orte_pmix_mdx_caddy_t); do { \ pmix_server_req_t *_req; \ _req = OBJ_NEW(pmix_server_req_t); \ + (void)asprintf(&_req->operation, "DMDX: %s:%d", __FILE__, __LINE__); \ _req->target = (p); \ _req->mdxcbfunc = (ocf); \ _req->cbdata = (ocd); \ @@ -122,6 +124,7 @@ OBJ_CLASS_DECLARATION(orte_pmix_mdx_caddy_t); do { \ pmix_server_req_t *_req; \ _req = OBJ_NEW(pmix_server_req_t); \ + (void)asprintf(&_req->operation, "SPAWN: %s:%d", __FILE__, __LINE__); \ _req->jdata = (j); \ _req->spcbfunc = (ocf); \ _req->cbdata = (ocd); \ diff --git a/orte/orted/pmix/pmix_server_pub.c b/orte/orted/pmix/pmix_server_pub.c index 0b3ec8d109..86d07cccb7 100644 --- a/orte/orted/pmix/pmix_server_pub.c +++ b/orte/orted/pmix/pmix_server_pub.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science @@ -100,6 +100,7 @@ int pmix_server_publish_fn(opal_process_name_t *proc, /* create the caddy */ req = OBJ_NEW(pmix_server_req_t); + (void)asprintf(&req->operation, "PUBLISH: %s:%d", __FILE__, __LINE__); req->opcbfunc = cbfunc; req->cbdata = cbdata; @@ -207,6 +208,7 @@ int pmix_server_lookup_fn(opal_process_name_t *proc, char **keys, /* create the caddy */ req = OBJ_NEW(pmix_server_req_t); + (void)asprintf(&req->operation, "LOOKUP: %s:%d", __FILE__, __LINE__); req->lkcbfunc = cbfunc; req->cbdata = cbdata; @@ -302,6 +304,7 @@ int pmix_server_unpublish_fn(opal_process_name_t *proc, char **keys, /* create the caddy */ req = OBJ_NEW(pmix_server_req_t); + (void)asprintf(&req->operation, "UNPUBLISH: %s:%d", __FILE__, __LINE__); req->opcbfunc = cbfunc; req->cbdata = cbdata; @@ -468,4 +471,3 @@ void pmix_server_keyval_client(int status, orte_process_name_t* sender, OBJ_RELEASE(req); } } -