From 399de0738eae0466a6ba7a69f68f3dbe60c8a679 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 25 Jan 2017 19:19:22 -0800 Subject: [PATCH] Cleanup launch Given that we only set OOB contact info from inside of events, or before we begin threaded operations (e.g., in the ess), allow set_contact_info to directly update the oob/base framework globals. Correct the nidmap regex decompression routine. Ensure that rank=1 daemon always sends back its topology as this is the most common use-case. Signed-off-by: Ralph Castain --- orte/mca/oob/base/base.h | 11 +++--- orte/mca/oob/base/oob_base_frame.c | 12 +------ orte/mca/plm/base/plm_base_launch_support.c | 39 +++++++++------------ orte/orted/orted_main.c | 6 ++-- orte/util/nidmap.c | 24 ++++++------- 5 files changed, 37 insertions(+), 55 deletions(-) diff --git a/orte/mca/oob/base/base.h b/orte/mca/oob/base/base.h index ce49f50a75..21595f26ad 100644 --- a/orte/mca/oob/base/base.h +++ b/orte/mca/oob/base/base.h @@ -126,7 +126,7 @@ ORTE_DECLSPEC void orte_oob_base_send_nb(int fd, short args, void *cbdata); orte_oob_base_send_nb, cd); \ opal_event_set_priority(&cd->ev, ORTE_MSG_PRI); \ opal_event_active(&cd->ev, OPAL_EV_WRITE, 1); \ - }while(0); + }while(0) /* Our contact info is actually subject to change as transports * can fail at any time. So a request to obtain our URI requires @@ -175,12 +175,9 @@ OBJ_CLASS_DECLARATION(mca_oob_uri_req_t); mca_oob_uri_req_t *rq; \ rq = OBJ_NEW(mca_oob_uri_req_t); \ rq->uri = strdup((u)); \ - opal_event_set(orte_oob_base.ev_base, &(rq)->ev, -1, \ - OPAL_EV_WRITE, \ - orte_oob_base_set_addr, (rq)); \ - opal_event_set_priority(&(rq)->ev, ORTE_MSG_PRI); \ - opal_event_active(&(rq)->ev, OPAL_EV_WRITE, 1); \ - }while(0); + orte_oob_base_set_addr(0, 0, (void*)rq); \ + }while(0) + ORTE_DECLSPEC void orte_oob_base_set_addr(int fd, short args, void *cbdata); diff --git a/orte/mca/oob/base/oob_base_frame.c b/orte/mca/oob/base/oob_base_frame.c index 7af6b10b92..a20d20010e 100644 --- a/orte/mca/oob/base/oob_base_frame.c +++ b/orte/mca/oob/base/oob_base_frame.c @@ -109,12 +109,6 @@ static int orte_oob_base_close(void) OBJ_DESTRUCT(&orte_oob_base.peers); - if (ORTE_PROC_IS_APP || ORTE_PROC_IS_TOOL) { - opal_progress_thread_finalize(NULL); - } else { - opal_progress_thread_finalize("OOB-BASE"); - } - OPAL_TIMING_EVENT((&tm_oob, "Finish")); OPAL_TIMING_REPORT(orte_oob_base.timing, &tm_oob); @@ -133,11 +127,7 @@ static int orte_oob_base_open(mca_base_open_flag_t flags) opal_hash_table_init(&orte_oob_base.peers, 128); OBJ_CONSTRUCT(&orte_oob_base.actives, opal_list_t); - if (ORTE_PROC_IS_APP || ORTE_PROC_IS_TOOL) { - orte_oob_base.ev_base = opal_progress_thread_init(NULL); - } else { - orte_oob_base.ev_base = opal_progress_thread_init("OOB-BASE"); - } + orte_oob_base.ev_base = orte_event_base; #if OPAL_ENABLE_FT_CR == 1 diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index 3138c98045..3985521eee 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -835,7 +835,7 @@ void orte_plm_base_daemon_topology(int status, orte_process_name_t* sender, orte_job_t *jdata; OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, - "%s plm:base:daemon_topology for daemon %s", + "%s plm:base:daemon_topology recvd for daemon %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(sender))); @@ -938,7 +938,7 @@ void orte_plm_base_daemon_topology(int status, orte_process_name_t* sender, CLEANUP: OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, - "%s plm:base:orted_report_launch %s for daemon %s", + "%s plm:base:orted:report_topo launch %s for daemon %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orted_failed_launch ? "failed" : "completed", ORTE_NAME_PRINT(sender))); @@ -985,7 +985,6 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, char *rml_uri = NULL, *ptr; int rc, idx; orte_proc_t *daemon=NULL; - orte_node_t *node; orte_job_t *jdata; orte_process_name_t dname; opal_buffer_t *relay; @@ -994,7 +993,7 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, hwloc_topology_t topo; int i; bool found; - orte_daemon_cmd_flag_t cmd = ORTE_DAEMON_REPORT_TOPOLOGY_CMD; + orte_daemon_cmd_flag_t cmd; /* get the daemon job, if necessary */ if (NULL == jdatorted) { @@ -1054,8 +1053,6 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&daemon->name), nodename)); - node = daemon->node; - /* look this node up, if necessary */ if (!orte_plm_globals.daemon_nodes_assigned_at_launch) { OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, @@ -1067,21 +1064,11 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, free(daemon->node->name); daemon->node->name = strdup(nodename); /* mark that it was verified */ - ORTE_FLAG_SET(node, ORTE_NODE_FLAG_LOC_VERIFIED); - } - - if (NULL == node) { - /* this shouldn't happen - it indicates an error in the - * prior node matching logic, so report it and error out - */ - orte_show_help("help-plm-base.txt", "daemon-no-assigned-node", true, - ORTE_NAME_PRINT(&daemon->name), nodename); - orted_failed_launch = true; - goto CLEANUP; + ORTE_FLAG_SET(daemon->node, ORTE_NODE_FLAG_LOC_VERIFIED); } /* mark the daemon as launched */ - ORTE_FLAG_SET(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED); + ORTE_FLAG_SET(daemon->node, ORTE_NODE_FLAG_DAEMON_LAUNCHED); if (orte_retain_aliases) { char *alias, **atmp=NULL; @@ -1113,7 +1100,7 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, } alias = opal_argv_join(atmp, ','); opal_argv_free(atmp); - orte_set_attribute(&node->attributes, ORTE_NODE_ALIAS, ORTE_ATTR_LOCAL, alias, OPAL_STRING); + orte_set_attribute(&daemon->node->attributes, ORTE_NODE_ALIAS, ORTE_ATTR_LOCAL, alias, OPAL_STRING); free(alias); } @@ -1130,7 +1117,7 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, /* rank=1 always sends its topology back */ topo = NULL; - if (1 == sender->vpid) { + if (1 == dname.vpid) { idx=1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &topo, &idx, OPAL_HWLOC_TOPO))) { ORTE_ERROR_LOG(rc); @@ -1151,7 +1138,7 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, "%s TOPOLOGY ALREADY RECORDED", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); found = true; - node->topology = t; + daemon->node->topology = t; if (NULL != topo) { hwloc_topology_destroy(topo); } @@ -1167,12 +1154,18 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, t = OBJ_NEW(orte_topology_t); t->sig = sig; opal_pointer_array_add(orte_node_topologies, t); - node->topology = t; + daemon->node->topology = t; if (NULL != topo) { t->topo = topo; } else { + /* nope - save the signature and request the complete topology from that node */ + OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, + "%s REQUESTING TOPOLOGY FROM %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&dname))); /* construct the request */ relay = OBJ_NEW(opal_buffer_t); + cmd = ORTE_DAEMON_REPORT_TOPOLOGY_CMD; if (OPAL_SUCCESS != (rc = opal_dss.pack(relay, &cmd, 1, ORTE_DAEMON_CMD))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(relay); @@ -1181,7 +1174,7 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, } /* send it */ orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, relay, + &dname, relay, ORTE_RML_TAG_DAEMON, orte_rml_send_callback, NULL); /* we will count this node as completed diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index 741d7bcf40..51c3b4240d 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -761,8 +761,10 @@ int orte_daemon(int argc, char *argv[]) /* if we are rank=1, then send our topology back - otherwise, mpirun * will request it if necessary */ - if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &opal_hwloc_topology, 1, OPAL_HWLOC_TOPO))) { - ORTE_ERROR_LOG(ret); + if (1 == ORTE_PROC_MY_NAME->vpid) { + if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &opal_hwloc_topology, 1, OPAL_HWLOC_TOPO))) { + ORTE_ERROR_LOG(ret); + } } /* send to the HNP's callback - will be routed if routes are available */ diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index 320c1b17d1..7d1c0f1de6 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -516,9 +516,9 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) /* decode a nodemap for a daemon */ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) { - int k, m, n, rc, start, endpt; + int m, n, rc; orte_node_t *node; - size_t num_nodes; + size_t k, num_nodes, endpt; orte_job_t *daemons; orte_proc_t *dptr; char **nodes, *indices, *dvpids; @@ -601,16 +601,16 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) for (n=0; NULL != tmp[n]; n++) { /* convert the number - since it might be a range, * save the remainder pointer */ - nodeids[k] = strtoul(tmp[n], &rmndr, 10); + nodeids[k++] = strtoul(tmp[n], &rmndr, 10); if (NULL != rmndr) { /* it must be a range - find the endpoint */ ++rmndr; + m = nodeids[k-1] + 1; endpt = strtoul(rmndr, NULL, 10); - start = nodeids[k] + 1; - for (m=0; m < endpt; m++) { - ++k; - nodeids[k] = start + m; + while (k <= endpt && k < num_nodes) { + nodeids[k++] = m++; } + --k; // step back to compensate for later increment } ++k; } @@ -624,16 +624,16 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) for (n=0; NULL != tmp[n]; n++) { /* convert the number - since it might be a range, * save the remainder pointer */ - dids[k] = strtoul(tmp[n], &rmndr, 10); + dids[k++] = strtoul(tmp[n], &rmndr, 10); if (NULL != rmndr) { /* it must be a range - find the endpoint */ ++rmndr; endpt = strtoul(rmndr, NULL, 10); - start = dids[k] + 1; - for (m=0; m < endpt; m++) { - ++k; - dids[k] = start + m; + m = dids[k-1] + 1; + while (k <= endpt && k < num_nodes) { + dids[k++] = m++; } + --k; // step back to compensate for later increment } ++k; }