From 709b36efb481edc44619c1164d85be9634b490db Mon Sep 17 00:00:00 2001 From: Ralph Castain <rhc@open-mpi.org> Date: Fri, 25 Sep 2009 01:00:09 +0000 Subject: [PATCH] Cleanup auto-wireup and enable tools to "discover" the HNP via multicast This commit was SVN r22012. --- orte/mca/ess/base/ess_base_std_tool.c | 4 +- orte/mca/ess/cm/ess_cm_module.c | 222 +++++++++++++++++--------- orte/mca/odls/odls_types.h | 3 +- orte/mca/rmcast/basic/rmcast_basic.c | 14 +- orte/mca/routed/cm/routed_cm.c | 48 ++++-- 5 files changed, 194 insertions(+), 97 deletions(-) diff --git a/orte/mca/ess/base/ess_base_std_tool.c b/orte/mca/ess/base/ess_base_std_tool.c index dc95556be2..a965e2c4d4 100644 --- a/orte/mca/ess/base/ess_base_std_tool.c +++ b/orte/mca/ess/base/ess_base_std_tool.c @@ -127,9 +127,7 @@ int orte_ess_base_tool_setup(void) /* setup I/O forwarding system - must come after we init routes */ if (NULL != orte_process_info.my_hnp_uri) { - /* only do this if we were NOT given an HNP - i.e., if we - * are a standalone tool - */ + /* only do this if we were given an HNP */ if (ORTE_SUCCESS != (ret = orte_iof_base_open())) { ORTE_ERROR_LOG(ret); error = "orte_iof_base_open"; diff --git a/orte/mca/ess/cm/ess_cm_module.c b/orte/mca/ess/cm/ess_cm_module.c index ad3a59a832..f2adbce6f9 100644 --- a/orte/mca/ess/cm/ess_cm_module.c +++ b/orte/mca/ess/cm/ess_cm_module.c @@ -31,6 +31,7 @@ #include "orte/mca/rmcast/base/base.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/odls/odls_types.h" +#include "orte/mca/plm/base/base.h" #include "orte/util/show_help.h" #include "orte/util/proc_info.h" #include "orte/util/name_fns.h" @@ -81,10 +82,6 @@ static int rte_init(void) * be calling this module */ - /* initialize the global list of local children and job data */ - OBJ_CONSTRUCT(&orte_local_children, opal_list_t); - OBJ_CONSTRUCT(&orte_local_jobdata, opal_list_t); - /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { error = "orte_ess_base_std_prolog"; @@ -106,25 +103,65 @@ static int rte_init(void) goto error; } - /* get a name for ourselves */ - if (ORTE_SUCCESS != (ret = cm_set_name())) { - error = "set_name"; - goto error; + if (ORTE_PROC_IS_DAEMON) { + /* get a name for ourselves */ + if (ORTE_SUCCESS != (ret = cm_set_name())) { + error = "set_name"; + goto error; + } + + /* initialize the global list of local children and job data */ + OBJ_CONSTRUCT(&orte_local_children, opal_list_t); + OBJ_CONSTRUCT(&orte_local_jobdata, opal_list_t); + + /* get the list of nodes used for this job */ + nodelist = getenv("OMPI_MCA_orte_nodelist"); + + if (NULL != nodelist) { + /* split the node list into an argv array */ + hosts = opal_argv_split(nodelist, ','); + } + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) { + ORTE_ERROR_LOG(ret); + error = "orte_ess_base_orted_setup"; + goto error; + } + opal_argv_free(hosts); + } else if (ORTE_PROC_IS_TOOL) { + if (ORTE_SUCCESS != (ret = orte_plm_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_plm_base_open"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_plm_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_plm_base_select"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_plm.set_hnp_name())) { + ORTE_ERROR_LOG(ret); + error = "orte_plm_set_hnp_name"; + goto error; + } + /* close the plm since we opened it to set our + * name, but have no further use for it + */ + orte_plm_base_close(); + + /* checkin with the HNP */ + if (ORTE_SUCCESS != (ret = cm_set_name())) { + error = "set_name"; + goto error; + } + + /* do the rest of the standard tool init */ + if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) { + ORTE_ERROR_LOG(ret); + error = "orte_ess_base_tool_setup"; + goto error; + } } - - /* get the list of nodes used for this job */ - nodelist = getenv("OMPI_MCA_orte_nodelist"); - - if (NULL != nodelist) { - /* split the node list into an argv array */ - hosts = opal_argv_split(nodelist, ','); - } - if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) { - ORTE_ERROR_LOG(ret); - error = "orte_ess_base_orted_setup"; - goto error; - } - opal_argv_free(hosts); return ORTE_SUCCESS; error: @@ -139,13 +176,19 @@ static int rte_finalize(void) { int ret; - if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) { - ORTE_ERROR_LOG(ret); + if (ORTE_PROC_IS_DAEMON) { + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) { + ORTE_ERROR_LOG(ret); + } + + /* deconstruct the nidmap and jobmap arrays */ + orte_util_nidmap_finalize(); + } else if (ORTE_PROC_IS_TOOL) { + if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) { + ORTE_ERROR_LOG(ret); + } } - /* deconstruct the nidmap and jobmap arrays */ - orte_util_nidmap_finalize(); - return ret; } @@ -320,6 +363,7 @@ static bool name_success = false; static void cbfunc(int channel, opal_buffer_t *buf, void *cbdata) { int32_t n; + orte_daemon_cmd_flag_t cmd; orte_process_name_t name; int rc; char *uri; @@ -327,18 +371,26 @@ static void cbfunc(int channel, opal_buffer_t *buf, void *cbdata) /* ensure we default to failure */ name_success = false; - /* unpack the name */ + /* unpack the cmd */ n = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buf, &name, &n, ORTE_NAME))) { + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buf, &cmd, &n, ORTE_DAEMON_CMD_T))) { ORTE_ERROR_LOG(rc); return; } - ORTE_PROC_MY_NAME->jobid = name.jobid; - ORTE_PROC_MY_NAME->vpid = name.vpid; - OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, - "set my name to %s", ORTE_NAME_PRINT(&name))); - + if (ORTE_DAEMON_NAME_REQ_CMD == cmd) { + /* unpack the name */ + n = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buf, &name, &n, ORTE_NAME))) { + ORTE_ERROR_LOG(rc); + return; + } + ORTE_PROC_MY_NAME->jobid = name.jobid; + ORTE_PROC_MY_NAME->vpid = name.vpid; + OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, + "set my name to %s", ORTE_NAME_PRINT(&name))); + } + /* unpack the HNP uri */ n = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buf, &uri, &n, OPAL_STRING))) { @@ -370,47 +422,67 @@ static int cm_set_name(void) opal_buffer_t buf; orte_daemon_cmd_flag_t cmd; - /* try constructing the name from the IP address - first, - * find an appropriate interface - */ - for (i=0; NULL != ifnames[i]; i++) { - if (ORTE_SUCCESS != (rc = opal_ifnametoaddr(ifnames[i], - (struct sockaddr*)&if_addr, - sizeof(struct sockaddr_in)))) { - continue; - } - addr = htonl(if_addr.sin_addr.s_addr); - - /* break address into sections */ - net = 0x000000FF & ((0xFF000000 & addr) >> 24); - rack = 0x000000FF & ((0x00FF0000 & addr) >> 16); - slot = 0x000000FF & ((0x0000FF00 & addr) >> 8); - function = 0x000000FF & addr; - - /* is this an appropriate interface to use */ - if (10 == net) { - /* set our vpid - add 1 to ensure it cannot be zero */ - ORTE_PROC_MY_NAME->vpid = (rack * mca_ess_cm_component.max_slots) + slot + function + 1; - /* set our jobid to 0 */ - ORTE_PROC_MY_NAME->jobid = 0; - return ORTE_SUCCESS; - } else if (192 == net && 168 == rack) { - /* just use function */ - ORTE_PROC_MY_NAME->vpid = function + 1; - /* set our jobid to 0 */ - ORTE_PROC_MY_NAME->jobid = 0; - return ORTE_SUCCESS; - } - } - - /* if we get here, then we didn't find a usable interface. - * use the reliable multicast system to contact the HNP and - * get a name - */ + /* setup the query */ OBJ_CONSTRUCT(&buf, opal_buffer_t); - cmd = ORTE_DAEMON_NAME_REQ_CMD; - opal_dss.pack(&buf, &cmd, 1, ORTE_DAEMON_CMD_T); - + + if (ORTE_PROC_IS_DAEMON) { + /* try constructing the name from the IP address - first, + * find an appropriate interface + */ + for (i=0; NULL != ifnames[i]; i++) { + if (ORTE_SUCCESS != (rc = opal_ifnametoaddr(ifnames[i], + (struct sockaddr*)&if_addr, + sizeof(struct sockaddr_in)))) { + continue; + } + addr = htonl(if_addr.sin_addr.s_addr); + + /* break address into sections */ + net = 0x000000FF & ((0xFF000000 & addr) >> 24); + rack = 0x000000FF & ((0x00FF0000 & addr) >> 16); + slot = 0x000000FF & ((0x0000FF00 & addr) >> 8); + function = 0x000000FF & addr; + + /* is this an appropriate interface to use */ + if (10 == net) { + /* set our vpid - add 1 to ensure it cannot be zero */ + ORTE_PROC_MY_NAME->vpid = (rack * mca_ess_cm_component.max_slots) + slot + function + 1; + /* set our jobid to 0 */ + ORTE_PROC_MY_NAME->jobid = 0; + /* notify the HNP of our existence */ + cmd = ORTE_DAEMON_CHECKIN_CMD; + opal_dss.pack(&buf, &cmd, 1, ORTE_DAEMON_CMD_T); + opal_dss.pack(&buf, ORTE_PROC_MY_NAME, 1, ORTE_NAME); + goto checkin; + } else if (192 == net && 168 == rack) { + /* just use function */ + ORTE_PROC_MY_NAME->vpid = function + 1; + /* set our jobid to 0 */ + ORTE_PROC_MY_NAME->jobid = 0; + /* notify the HNP of our existence */ + cmd = ORTE_DAEMON_CHECKIN_CMD; + opal_dss.pack(&buf, &cmd, 1, ORTE_DAEMON_CMD_T); + opal_dss.pack(&buf, ORTE_PROC_MY_NAME, 1, ORTE_NAME); + goto checkin; + } + } + /* if we get here, then we didn't find a usable interface. + * use the reliable multicast system to contact the HNP and + * get a name + */ + cmd = ORTE_DAEMON_NAME_REQ_CMD; + opal_dss.pack(&buf, &cmd, 1, ORTE_DAEMON_CMD_T); + } else if (ORTE_PROC_IS_TOOL) { + cmd = ORTE_TOOL_CHECKIN_CMD; + opal_dss.pack(&buf, &cmd, 1, ORTE_DAEMON_CMD_T); + /* provide our name */ + opal_dss.pack(&buf, ORTE_PROC_MY_NAME, 1, ORTE_NAME); + } + +checkin: + /* always include our node name */ + opal_dss.pack(&buf, &orte_process_info.nodename, 1, OPAL_STRING); + /* set the recv to get the answer */ if (ORTE_SUCCESS != (rc = orte_rmcast.recv_nb(ORTE_RMCAST_SYS_ADDR, ORTE_RMCAST_NON_PERSISTENT, @@ -420,6 +492,7 @@ static int cm_set_name(void) OBJ_DESTRUCT(&buf); return rc; } + /* send the request */ if (ORTE_SUCCESS != (rc = orte_rmcast.send(ORTE_RMCAST_SYS_ADDR, ORTE_RMCAST_TAG_BOOTSTRAP, @@ -439,4 +512,3 @@ static int cm_set_name(void) } return ORTE_ERR_NOT_FOUND; } - diff --git a/orte/mca/odls/odls_types.h b/orte/mca/odls/odls_types.h index 34f7e774a5..e22385c7b2 100644 --- a/orte/mca/odls/odls_types.h +++ b/orte/mca/odls/odls_types.h @@ -75,7 +75,8 @@ typedef uint8_t orte_daemon_cmd_flag_t; /* bootstrap */ #define ORTE_DAEMON_NAME_REQ_CMD (orte_daemon_cmd_flag_t) 23 #define ORTE_DAEMON_CHECKIN_CMD (orte_daemon_cmd_flag_t) 24 - +#define ORTE_TOOL_CHECKIN_CMD (orte_daemon_cmd_flag_t) 25 + /* * List object to locally store the process names and pids of * our children. This can subsequently be used to order termination diff --git a/orte/mca/rmcast/basic/rmcast_basic.c b/orte/mca/rmcast/basic/rmcast_basic.c index 7cc2f31019..e6a9ba38ac 100644 --- a/orte/mca/rmcast/basic/rmcast_basic.c +++ b/orte/mca/rmcast/basic/rmcast_basic.c @@ -41,7 +41,6 @@ static opal_mutex_t lock; static opal_list_t recvs; static opal_list_t channels; static unsigned int next_channel; -static uint8_t my_packed_name[8]; static bool init_completed = false; /* LOCAL FUNCTIONS */ @@ -237,7 +236,6 @@ static int init(void) rmcast_basic_channel_t *chan; int channel; char *name; - uint32_t tmp; if (init_completed) { return ORTE_SUCCESS; @@ -252,12 +250,6 @@ static int init(void) OBJ_CONSTRUCT(&recvs, opal_list_t); OBJ_CONSTRUCT(&channels, opal_list_t); - /* convert my name to get it into network-byte-order */ - tmp = htonl(ORTE_PROC_MY_NAME->jobid); - memcpy(&my_packed_name[0], &tmp, 4); - tmp = htonl(ORTE_PROC_MY_NAME->vpid); - memcpy(&my_packed_name[4], &tmp, 4); - /* define the starting point for new channels */ next_channel = ORTE_RMCAST_DYNAMIC_CHANNELS; @@ -783,6 +775,7 @@ static void xmit_data(int sd, short flags, void* send_req) int32_t sz; int rc; uint16_t tmp; + uint32_t nm; OPAL_THREAD_LOCK(&chan->send_lock); while (NULL != (item = opal_list_remove_first(&chan->pending_sends))) { @@ -792,7 +785,10 @@ static void xmit_data(int sd, short flags, void* send_req) opal_dss.unload(snd->data, (void**)&bytes, &sz); /* start the send data area with our name in network-byte-order */ - memcpy(chan->send_data, my_packed_name, 8); + nm = htonl(ORTE_PROC_MY_NAME->jobid); + memcpy(&chan->send_data[0], &nm, 4); + nm = htonl(ORTE_PROC_MY_NAME->vpid); + memcpy(&chan->send_data[4], &nm, 4); /* add the tag data, also converted */ tmp = htons(snd->tag); diff --git a/orte/mca/routed/cm/routed_cm.c b/orte/mca/routed/cm/routed_cm.c index d266f816be..982b3c2556 100644 --- a/orte/mca/routed/cm/routed_cm.c +++ b/orte/mca/routed/cm/routed_cm.c @@ -97,9 +97,16 @@ static int finalize(void) { int rc; + /* if I am a tool without a daemon, just cleanout + * the basics and leave + */ + if (ORTE_PROC_IS_TOOL && NULL == orte_process_info.my_daemon_uri) { + goto cleanup; + } + /* if I am an application process, indicate that I am - * truly finalizing prior to departure - */ + * truly finalizing prior to departure + */ if (ORTE_PROC_IS_APP) { if (ORTE_SUCCESS != (rc = orte_routed_base_register_sync(false))) { ORTE_ERROR_LOG(rc); @@ -112,6 +119,7 @@ static int finalize(void) orte_routed_base_comm_stop(); } +cleanup: OBJ_DESTRUCT(&jobfam_list); /* destruct the global condition and lock */ OBJ_DESTRUCT(&cond); @@ -291,18 +299,18 @@ static orte_process_name_t get_route(orte_process_name_t *target) goto found; } + /* if I am a tool without a daemon, the route is direct */ + if (ORTE_PROC_IS_TOOL && NULL == orte_process_info.my_daemon_uri) { + ret = target; + goto found; + } + /* if I am an application process, always route via my local daemon */ if (ORTE_PROC_IS_APP) { ret = ORTE_PROC_MY_DAEMON; goto found; } - /* if I am a tool, the route is direct */ - if (ORTE_PROC_IS_TOOL) { - ret = target; - goto found; - } - /* if the job family is zero, then this is going to a local slave, * so the path is direct */ @@ -469,8 +477,30 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat) */ int rc; - /* if I am a tool, then I stand alone - there is nothing to do */ + /* if I am a tool, then see if I stand alone - otherwise, + * setup the HNP info + */ if (ORTE_PROC_IS_TOOL) { + if (NULL == orte_process_info.my_hnp_uri) { + return ORTE_SUCCESS; + } + + /* set the contact info into the hash table */ + if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(orte_process_info.my_hnp_uri))) { + ORTE_ERROR_LOG(rc); + return(rc); + } + + /* extract the hnp name and store it */ + if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri, + ORTE_PROC_MY_HNP, NULL))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* set our lifeline to the HNP - we will abort if that connection is lost */ + lifeline = ORTE_PROC_MY_HNP; + return ORTE_SUCCESS; }