diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index d3b0055469..7270b664ec 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -51,6 +51,7 @@ #include "orte/util/session_dir.h" #include "orte/util/name_fns.h" #include "orte/util/nidmap.h" +#include "orte/util/regex.h" #include "orte/util/show_help.h" #include "orte/mca/notifier/base/base.h" @@ -184,18 +185,29 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_util_nidmap_init"; goto error; } - if (ORTE_SUCCESS != (ret = orte_util_setup_local_nidmap_entries())) { - ORTE_ERROR_LOG(ret); - error = "orte_util_nidmap_init"; - goto error; - } - /* extract the node info from the environment and - * build a nidmap from it - */ - if (ORTE_SUCCESS != (ret = orte_util_build_daemon_nidmap(hosts))) { - ORTE_ERROR_LOG(ret); - error = "construct daemon map from static ports"; - goto error; + if (NULL != orted_launch_cmd) { + /* the launch cmd was given via regexp on the cmd line - parse + * it to get the contact info + */ + if (ORTE_SUCCESS != (ret = orte_regex_decode_maps(orted_launch_cmd, NULL))) { + ORTE_ERROR_LOG(ret); + error = "orte_regex_decode_maps"; + goto error; + } + } else { + if (ORTE_SUCCESS != (ret = orte_util_setup_local_nidmap_entries())) { + ORTE_ERROR_LOG(ret); + error = "orte_util_nidmap_init"; + goto error; + } + /* extract the node info from the environment and + * build a nidmap from it + */ + if (ORTE_SUCCESS != (ret = orte_util_build_daemon_nidmap(hosts))) { + ORTE_ERROR_LOG(ret); + error = "construct daemon map from static ports"; + goto error; + } } /* be sure to update the routing tree so the initial "phone home" * to mpirun goes through the tree! diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index 83c42ea57e..29dbe28ff1 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -60,6 +60,7 @@ #include "orte/util/session_dir.h" #include "orte/util/proc_info.h" #include "orte/util/nidmap.h" +#include "orte/util/regex.h" #include "orte/util/show_help.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" @@ -103,6 +104,85 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *data, /* get a pointer to the job map */ map = jdata->map; + /* are we passing a regexp? */ + if (orte_use_regexp && jdata->num_apps < 2 && NULL == orte_debugger_daemon) { + char *regexp; + flag = 1; + opal_dss.pack(data, &flag, 1, OPAL_INT8); + regexp = orte_regex_encode_maps(jdata); + opal_dss.pack(data, ®exp, 1, OPAL_STRING); + free(regexp); + /* if we are not using static ports, then we need to add the daemon wireup info */ + if (!orte_static_ports) { + /* pack a flag indicating that wiring info is provided */ + flag = 1; + opal_dss.pack(data, &flag, 1, OPAL_INT8); + /* get wireup info for daemons per the selected routing module */ + wireup = OBJ_NEW(opal_buffer_t); + if (ORTE_SUCCESS != (rc = orte_routed.get_wireup_info(wireup))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(wireup); + return rc; + } + /* if anything was inserted, put it in a byte object for xmission */ + if (0 < wireup->bytes_used) { + opal_dss.unload(wireup, (void**)&bo.bytes, &numbytes); + /* pack the number of bytes required by payload */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &numbytes, 1, OPAL_INT32))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(wireup); + return rc; + } + /* pack the byte object */ + bo.size = numbytes; + boptr = &bo; + if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &boptr, 1, OPAL_BYTE_OBJECT))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(wireup); + return rc; + } + /* release the data since it has now been copied into our buffer */ + free(bo.bytes); + } else { + /* pack numbytes=0 so the unpack routine remains sync'd to us */ + numbytes = 0; + if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &numbytes, 1, OPAL_INT32))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(wireup); + return rc; + } + } + OBJ_RELEASE(wireup); + } else { + /* pack a flag indicating no wireup info is provided */ + flag = 0; + opal_dss.pack(data, &flag, 1, OPAL_INT8); + } + /* insert an "add-procs" command here so we can cleanly process it on the + * other end + */ + command = ORTE_DAEMON_ADD_LOCAL_PROCS; + if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &command, 1, ORTE_DAEMON_CMD))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* since we will have processed this to update daemons, flag that we don't + * have the regexp again + */ + flag = 2; + opal_dss.pack(data, &flag, 1, OPAL_INT8); + /* pack the jobid so it can be extracted later */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &job, 1, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* all done */ + return ORTE_SUCCESS; + } + + /* if we are not passing a regexp, then pass the nodemap */ + flag = 0; + opal_dss.pack(data, &flag, 1, OPAL_INT8); /* construct a nodemap */ if (ORTE_SUCCESS != (rc = orte_util_encode_nodemap(&bo))) { ORTE_ERROR_LOG(rc); @@ -117,43 +197,53 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *data, } /* release the data since it has now been copied into our buffer */ free(bo.bytes); - - /* get wireup info for daemons per the selected routing module */ - wireup = OBJ_NEW(opal_buffer_t); - if (ORTE_SUCCESS != (rc = orte_routed.get_wireup_info(wireup))) { - ORTE_ERROR_LOG(rc); + + /* if we are not using static ports, we need to send the wireup info */ + if (!orte_static_ports) { + /* pack a flag indicating wiring info is provided */ + flag = 1; + opal_dss.pack(data, &flag, 1, OPAL_INT8); + /* get wireup info for daemons per the selected routing module */ + wireup = OBJ_NEW(opal_buffer_t); + if (ORTE_SUCCESS != (rc = orte_routed.get_wireup_info(wireup))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(wireup); + return rc; + } + /* if anything was inserted, put it in a byte object for xmission */ + if (0 < wireup->bytes_used) { + opal_dss.unload(wireup, (void**)&bo.bytes, &numbytes); + /* pack the number of bytes required by payload */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &numbytes, 1, OPAL_INT32))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(wireup); + return rc; + } + /* pack the byte object */ + bo.size = numbytes; + boptr = &bo; + if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &boptr, 1, OPAL_BYTE_OBJECT))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(wireup); + return rc; + } + /* release the data since it has now been copied into our buffer */ + free(bo.bytes); + } else { + /* pack numbytes=0 so the unpack routine remains sync'd to us */ + numbytes = 0; + if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &numbytes, 1, OPAL_INT32))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(wireup); + return rc; + } + } OBJ_RELEASE(wireup); - return rc; - } - /* if anything was inserted, put it in a byte object for xmission */ - if (0 < wireup->bytes_used) { - opal_dss.unload(wireup, (void**)&bo.bytes, &numbytes); - /* pack the number of bytes required by payload */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &numbytes, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(wireup); - return rc; - } - /* pack the byte object */ - bo.size = numbytes; - boptr = &bo; - if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &boptr, 1, OPAL_BYTE_OBJECT))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(wireup); - return rc; - } - /* release the data since it has now been copied into our buffer */ - free(bo.bytes); } else { - /* pack numbytes=0 so the unpack routine remains sync'd to us */ - numbytes = 0; - if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &numbytes, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(wireup); - return rc; - } + /* pack a flag indicating no wireup data is provided */ + flag = 0; + opal_dss.pack(data, &flag, 1, OPAL_INT8); } - OBJ_RELEASE(wireup); /* insert an "add-procs" command here so we can cleanly process it on the * other end @@ -164,6 +254,13 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *data, return rc; } + /* pack the flag indicating that we are not using regexps - required to + * keep things in order when unpacking due to different ways the data + * can get to the unpacking routine + */ + flag = 0; + opal_dss.pack(data, &flag, 1, OPAL_INT8); + /* are we co-locating debugger daemons? */ if (NULL != orte_debugger_daemon) { orte_app_context_t **apps; @@ -306,6 +403,23 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *data, return ORTE_SUCCESS; } +static int unpack_regexp(orte_odls_job_t **jobdat, opal_buffer_t *data) +{ + char *regexp; + int rc, cnt; + + cnt=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, ®exp, &cnt, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + return rc; + } + if (ORTE_SUCCESS != (rc = orte_regex_decode_maps(regexp, jobdat))) { + ORTE_ERROR_LOG(rc); + } + free(regexp); + return rc; +} + int orte_odls_base_default_update_daemon_info(opal_buffer_t *data) { opal_buffer_t wireup; @@ -313,8 +427,42 @@ int orte_odls_base_default_update_daemon_info(opal_buffer_t *data) int rc; orte_std_cntr_t cnt; int32_t numbytes; + int8_t flag; - /* extract the byte object holding the daemonmap */ + /* unpack the flag for regexp */ + cnt=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &flag, &cnt, OPAL_INT8))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* if we have a regexp, then process it so we know the daemonmap */ + if (0 < flag) { + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:update:daemon:info updating nidmap from regexp", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + if (ORTE_SUCCESS != (rc = unpack_regexp(NULL, data))) { + ORTE_ERROR_LOG(rc); + } + /* update the routing tree */ + if (ORTE_SUCCESS != (rc = orte_routed.update_routing_tree())) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* see if we have wiring info as well */ + cnt=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &flag, &cnt, OPAL_INT8))) { + ORTE_ERROR_LOG(rc); + return rc; + } + if (0 < flag) { + /* yes - extract and process it */ + goto wireup; + } + return rc; + } + + /* otherwise, extract the byte object holding the daemonmap */ cnt=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &bo, &cnt, OPAL_BYTE_OBJECT))) { ORTE_ERROR_LOG(rc); @@ -345,6 +493,18 @@ int orte_odls_base_default_update_daemon_info(opal_buffer_t *data) return rc; } + /* see if we have wiring info as well */ + cnt=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &flag, &cnt, OPAL_INT8))) { + ORTE_ERROR_LOG(rc); + return rc; + } + if (0 == flag) { + /* no - just return */ + return rc; + } + +wireup: /* unpack the #bytes of daemon wireup info in the message */ cnt=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &numbytes, &cnt, OPAL_INT32))) { @@ -396,14 +556,69 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data, "%s odls:constructing child list", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - /* unpack the returned data to create the required structures - * for a fork launch. Since the data will contain information - * on procs for ALL nodes, we first have to find the value - * struct that contains info for our node. - */ + /* unpack the flag for regexp */ + cnt=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &flag, &cnt, OPAL_INT8))) { + ORTE_ERROR_LOG(rc); + goto REPORT_ERROR; + } - /* set the default values since they may not be included in the data */ - *job = ORTE_JOBID_INVALID; + if (0 < flag) { + if (1 == flag) { + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls: constructing jobdat from regexp", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + /* need to setup the job from the regexp */ + if (ORTE_SUCCESS != (rc = unpack_regexp(&jobdat, data))) { + ORTE_ERROR_LOG(rc); + goto REPORT_ERROR; + } + /* record the jobid */ + *job = jobdat->jobid; + } else { + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls: using jobdat previously extracted from regexp", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + /* unpack the jobid */ + cnt=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, job, &cnt, ORTE_JOBID))) { + *job = ORTE_JOBID_INVALID; + ORTE_ERROR_LOG(rc); + goto REPORT_ERROR; + } + /* find the corresponding jobdat */ + for (item = opal_list_get_first(&orte_local_jobdata); + item != opal_list_get_end(&orte_local_jobdata); + item = opal_list_get_next(item)) { + orte_odls_job_t *jdat = (orte_odls_job_t*)item; + + /* is this the specified job? */ + if (jdat->jobid == *job) { + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:construct_child_list found existing jobdat for job %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(*job))); + jobdat = jdat; + break; + } + } + if (NULL == jobdat) { + /* we have a problem */ + rc = ORTE_ERR_NOT_FOUND; + ORTE_ERROR_LOG(rc); + goto REPORT_ERROR; + } + } + /* fake an app_idx array */ + app_idx = (int8_t*)malloc(jobdat->num_procs * sizeof(int8_t)); + memset(app_idx, 0, jobdat->num_procs * sizeof(int8_t)); + /* if we are doing a timing test, store the time the msg was recvd */ + if (orte_timing) { + jobdat->launch_msg_recvd.tv_sec = orte_daemon_msg_recvd.tv_sec; + jobdat->launch_msg_recvd.tv_usec = orte_daemon_msg_recvd.tv_usec; + } + goto find_my_procs; + } /* unpack the flag - are we co-locating debugger daemons? */ cnt=1; @@ -451,6 +666,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data, /* unpack the jobid we are to launch */ cnt=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, job, &cnt, ORTE_JOBID))) { + *job = ORTE_JOBID_INVALID; ORTE_ERROR_LOG(rc); goto REPORT_ERROR; } @@ -475,6 +691,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data, OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, "%s odls:construct_child_list found existing jobdat for job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(*job))); + jobdat = jdat; break; } } @@ -487,13 +704,13 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data, jobdat->jobid = *job; opal_list_append(&orte_local_jobdata, &jobdat->super); } - /* if we are doing a timing test, store the time the msg was recvd */ if (orte_timing) { jobdat->launch_msg_recvd.tv_sec = orte_daemon_msg_recvd.tv_sec; jobdat->launch_msg_recvd.tv_usec = orte_daemon_msg_recvd.tv_usec; } + /* UNPACK JOB-SPECIFIC DATA */ /* unpack the number of nodes involved in this job */ cnt=1; @@ -590,6 +807,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data, } } +find_my_procs: /* cycle through the procs and find mine */ proc.jobid = jobdat->jobid; for (j=0; j < jobdat->num_procs; j++) { @@ -1807,6 +2025,7 @@ int orte_odls_base_default_require_sync(orte_process_name_t *proc, orte_std_cntr_t cnt; int rc; bool found=false; + int8_t flag; /* protect operations involving the global list of children */ OPAL_THREAD_LOCK(&orte_odls_globals.mutex); @@ -1888,8 +2107,25 @@ int orte_odls_base_default_require_sync(orte_process_name_t *proc, /* the proc needs a copy of both the daemon/node map, and * the process map for its peers */ - opal_dss.pack(&buffer, &orte_odls_globals.dmap, 1, OPAL_BYTE_OBJECT); - opal_dss.pack(&buffer, &jobdat->pmap, 1, OPAL_BYTE_OBJECT); + if (NULL != jobdat->regexp) { + /* the data is in a regexp - send that */ + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:sync sending regexp %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + jobdat->regexp)); + flag = 1; + opal_dss.pack(&buffer, &flag, 1, OPAL_INT8); + opal_dss.pack(&buffer, &jobdat->regexp, 1, OPAL_STRING); + } else { + /* the data is in the local byte objects - send them */ + OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, + "%s odls:sync sending byte object", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + flag = 0; + opal_dss.pack(&buffer, &flag, 1, OPAL_INT8); + opal_dss.pack(&buffer, &orte_odls_globals.dmap, 1, OPAL_BYTE_OBJECT); + opal_dss.pack(&buffer, &jobdat->pmap, 1, OPAL_BYTE_OBJECT); + } } OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output, diff --git a/orte/mca/odls/base/odls_base_open.c b/orte/mca/odls/base/odls_base_open.c index b7531b3016..167d40b363 100644 --- a/orte/mca/odls/base/odls_base_open.c +++ b/orte/mca/odls/base/odls_base_open.c @@ -111,6 +111,7 @@ static void orte_odls_job_constructor(orte_odls_job_t *ptr) ptr->total_slots_alloc = 0; ptr->num_procs = 0; ptr->num_local_procs = 0; + ptr->regexp = NULL; ptr->pmap = NULL; OBJ_CONSTRUCT(&ptr->collection_bucket, opal_buffer_t); OBJ_CONSTRUCT(&ptr->local_collection, opal_buffer_t); @@ -132,6 +133,10 @@ static void orte_odls_job_destructor(orte_odls_job_t *ptr) } } + if (NULL != ptr->regexp) { + free(ptr->regexp); + } + if (NULL != ptr->pmap && NULL != ptr->pmap->bytes) { free(ptr->pmap->bytes); free(ptr->pmap); diff --git a/orte/mca/odls/odls_types.h b/orte/mca/odls/odls_types.h index e4da6615da..b9ef8ebf81 100644 --- a/orte/mca/odls/odls_types.h +++ b/orte/mca/odls/odls_types.h @@ -110,6 +110,7 @@ typedef struct orte_odls_job_t { orte_std_cntr_t num_nodes; /* number of nodes involved in the job */ orte_vpid_t num_procs; int32_t num_local_procs; + char *regexp; /* the regular expression describing the job */ opal_byte_object_t *pmap; /* local copy of pidmap byte object */ opal_buffer_t collection_bucket; opal_buffer_t local_collection; diff --git a/orte/mca/oob/tcp/oob_tcp.c b/orte/mca/oob/tcp/oob_tcp.c index a570849351..ec929a145c 100644 --- a/orte/mca/oob/tcp/oob_tcp.c +++ b/orte/mca/oob/tcp/oob_tcp.c @@ -659,7 +659,14 @@ mca_oob_tcp_create_listen(int *target_sd, unsigned short *target_port, uint16_t * system pick any port */ opal_argv_append_nosize(&ports, "0"); - orte_static_ports = false; + /* if static ports were specified, flag it + * so the HNP does the right thing + */ + if (NULL != mca_oob_tcp_component.tcp4_static_ports) { + orte_static_ports = true; + } else { + orte_static_ports = false; + } } } @@ -715,7 +722,14 @@ mca_oob_tcp_create_listen(int *target_sd, unsigned short *target_port, uint16_t * system pick any port */ opal_argv_append_nosize(&ports, "0"); - orte_static_ports = false; + /* if static ports were specified, flag it + * so the HNP does the right thing + */ + if (NULL != mca_oob_tcp_component.tcp6_static_ports) { + orte_static_ports = true; + } else { + orte_static_ports = false; + } } } #endif /* OPAL_WANT_IPV6 */ diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index 785ff63eac..5bb5cbf84e 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -58,13 +58,13 @@ #include "orte/util/proc_info.h" #include "orte/util/regex.h" +#include "orte/mca/odls/odls_types.h" + #include "orte/mca/plm/base/plm_private.h" #include "orte/mca/plm/base/base.h" static bool active_job_completed_callback = false; -static int orte_plm_base_report_launched(orte_jobid_t job); - static char *pretty_print_timing(int64_t secs, int64_t usecs); int orte_plm_base_setup_job(orte_job_t *jdata) @@ -109,6 +109,7 @@ int orte_plm_base_setup_job(orte_job_t *jdata) */ { char *crud; + orte_odls_job_t *jobdat; crud = orte_regex_encode_maps(jdata); opal_output(0, "maps regex: %s", (NULL == crud) ? "NULL" : crud); if (NULL == crud) { @@ -118,18 +119,30 @@ int orte_plm_base_setup_job(orte_job_t *jdata) return ORTE_ERROR; } orte_util_nidmap_init(NULL); - orte_regex_decode_maps(crud); + orte_regex_decode_maps(crud, &jobdat); free(crud); /* print-out the map */ orte_nidmap_dump(); orte_jobmap_dump(); + /* printout the jobdat */ + opal_output(orte_clean_output, "**** DUMP OF JOBDAT %s (%d nodes %d procs) ***", + ORTE_JOBID_PRINT(jobdat->jobid), (int)jobdat->num_nodes, (int)(jobdat->num_procs)); + opal_output(orte_clean_output, "\tNum slots: %d\tControl: %x\tStdin: %d", + (int)jobdat->total_slots_alloc, jobdat->controls, (int)jobdat->stdin_target); + opal_output(orte_clean_output, "\tApp: %s", jobdat->apps[0]->app); + opal_output(orte_clean_output, "\tCwd: %s", jobdat->apps[0]->cwd); + crud = opal_argv_join(jobdat->apps[0]->argv, ','); + opal_output(orte_clean_output, "\tArgv: %s", crud); + free(crud); + crud = opal_argv_join(jobdat->apps[0]->env, ','); + opal_output(orte_clean_output, "\tEnv: %s", crud); + free(crud); orte_never_launched = true; ORTE_UPDATE_EXIT_STATUS(0); orte_trigger_event(&orte_exit); return ORTE_ERROR; } - - + { opal_byte_object_t bo; @@ -928,7 +941,7 @@ static void app_report_launch(int status, orte_process_name_t* sender, } -static int orte_plm_base_report_launched(orte_jobid_t job) +int orte_plm_base_report_launched(orte_jobid_t job) { int rc; orte_job_t *jdata; diff --git a/orte/mca/plm/base/plm_private.h b/orte/mca/plm/base/plm_private.h index 7ea173f0f0..2904aec7cc 100644 --- a/orte/mca/plm/base/plm_private.h +++ b/orte/mca/plm/base/plm_private.h @@ -94,6 +94,7 @@ ORTE_DECLSPEC int orte_plm_base_set_progress_sched(int sched); ORTE_DECLSPEC int orte_plm_base_setup_job(orte_job_t *jdata); ORTE_DECLSPEC int orte_plm_base_launch_apps(orte_jobid_t job); ORTE_DECLSPEC void orte_plm_base_launch_failed(orte_jobid_t job, pid_t pid, int status, orte_job_state_t state); +ORTE_DECLSPEC int orte_plm_base_report_launched(orte_jobid_t job); ORTE_DECLSPEC int orte_plm_base_daemon_callback(orte_std_cntr_t num_daemons); diff --git a/orte/mca/plm/slurm/plm_slurm_module.c b/orte/mca/plm/slurm/plm_slurm_module.c index 9436c2f331..355372c067 100644 --- a/orte/mca/plm/slurm/plm_slurm_module.c +++ b/orte/mca/plm/slurm/plm_slurm_module.c @@ -63,6 +63,7 @@ #include "orte/types.h" #include "orte/util/show_help.h" #include "orte/util/name_fns.h" +#include "orte/util/regex.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" #include "orte/mca/errmgr/errmgr.h" @@ -159,6 +160,7 @@ static int plm_slurm_launch_job(orte_job_t *jdata) int proc_vpid_index; orte_jobid_t failed_job; bool failed_launch=true; + bool using_regexp=false; if (jdata->controls & ORTE_JOB_CONTROL_LOCAL_SLAVE) { /* if this is a request to launch a local slave, @@ -320,15 +322,6 @@ static int plm_slurm_launch_job(orte_job_t *jdata) argv[proc_vpid_index] = strdup(name_string); free(name_string); - if (0 < opal_output_get_verbosity(orte_plm_globals.output)) { - param = opal_argv_join(argv, ' '); - OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, - "%s plm:slurm: final top-level argv:\n\t%s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (NULL == param) ? "NULL" : param)); - if (NULL != param) free(param); - } - /* Copy the prefix-directory specified in the corresponding app_context. If there are multiple, different prefix's in the app context, complain (i.e., only @@ -369,6 +362,29 @@ static int plm_slurm_launch_job(orte_job_t *jdata) opal_setenv(var, "rsh", true, &env); free(var); + /* if we can do it, use the regexp to launch the apps - this + * requires that the user requested this mode, that we were + * provided with static ports, and that we only have one + * app_context + */ + if (orte_use_regexp && orte_static_ports && jdata->num_apps < 2) { + char *regexp; + regexp = orte_regex_encode_maps(jdata); + opal_argv_append(&argc, &argv, "--launch"); + opal_argv_append(&argc, &argv, regexp); + free(regexp); + using_regexp = true; + } + + if (0 < opal_output_get_verbosity(orte_plm_globals.output)) { + param = opal_argv_join(argv, ' '); + OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, + "%s plm:slurm: final top-level argv:\n\t%s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + (NULL == param) ? "NULL" : param)); + if (NULL != param) free(param); + } + /* exec the daemon(s) */ if (ORTE_SUCCESS != (rc = plm_slurm_start_proc(argc, argv, env, cur_prefix))) { ORTE_ERROR_LOG(rc); @@ -393,12 +409,25 @@ launch_apps: /* get here if daemons launch okay - any failures now by apps */ launching_daemons = false; failed_job = active_job; - if (ORTE_SUCCESS != (rc = orte_plm_base_launch_apps(active_job))) { - OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, - "%s plm:slurm: launch of apps failed for job %s on error %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_JOBID_PRINT(active_job), ORTE_ERROR_NAME(rc))); - goto cleanup; + if (using_regexp) { + /* daemons already have launch cmd - just wait for them to + * report back + */ + if (ORTE_SUCCESS != (rc = orte_plm_base_report_launched(jdata->jobid))) { + OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, + "%s plm:slurm:launch failed for job %s on error %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_JOBID_PRINT(jdata->jobid), ORTE_ERROR_NAME(rc))); + goto cleanup; + } + } else { + if (ORTE_SUCCESS != (rc = orte_plm_base_launch_apps(active_job))) { + OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, + "%s plm:slurm: launch of apps failed for job %s on error %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_JOBID_PRINT(active_job), ORTE_ERROR_NAME(rc))); + goto cleanup; + } } /* declare the launch a success */ diff --git a/orte/orted/orted_comm.c b/orte/orted/orted_comm.c index c022dc830c..c3ecdf61ac 100644 --- a/orte/orted/orted_comm.c +++ b/orte/orted/orted_comm.c @@ -276,7 +276,7 @@ void orte_daemon_cmd_processor(int fd, short event, void *data) orte_daemon_msg_recvd.tv_sec = mesg_recvd.tv_sec; orte_daemon_msg_recvd.tv_usec = mesg_recvd.tv_usec; } - /* cmd contains daemon update info - process it */ + /* the cmd contains daemon update info - process it */ if (ORTE_SUCCESS != (ret = orte_odls_base_default_update_daemon_info(buffer))) { ORTE_ERROR_LOG(ret); goto CLEANUP; diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index 91c1de2585..bc84bea2b0 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -193,6 +193,10 @@ opal_cmd_line_init_t orte_cmd_line_opts[] = { NULL, OPAL_CMD_LINE_TYPE_STRING, "Create a new xterm window and display output from the specified ranks there" }, + { NULL, NULL, NULL, '\0', "launch", "launch", 1, + &orted_launch_cmd, OPAL_CMD_LINE_TYPE_STRING, + "A regular expression describing the job to be launched at startup" }, + /* End of list */ { NULL, NULL, NULL, '\0', NULL, NULL, 0, NULL, OPAL_CMD_LINE_TYPE_NULL, NULL } @@ -684,6 +688,20 @@ int orte_daemon(int argc, char *argv[]) if (0 < orted_globals.heartbeat) { ORTE_TIMER_EVENT(orted_globals.heartbeat, 0, orte_plm_base_heartbeat); } + + /* if we were given a launch string, then process it */ + if (NULL != orted_launch_cmd) { + opal_buffer_t launch; + int8_t flag; + orte_daemon_cmd_flag_t command = ORTE_DAEMON_ADD_LOCAL_PROCS; + OBJ_CONSTRUCT(&launch, opal_buffer_t); + opal_dss.pack(&launch, &command, 1, ORTE_DAEMON_CMD); + flag = 1; + opal_dss.pack(&launch, &flag, 1, OPAL_INT8); + opal_dss.pack(&launch, &orted_launch_cmd, 1, OPAL_STRING); + ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &launch, ORTE_RML_TAG_DAEMON, orte_daemon_cmd_processor); + OBJ_DESTRUCT(&launch); + } /* wait to hear we are done */ opal_event_dispatch(); diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index fea505e092..9af951712b 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -64,7 +64,6 @@ bool orte_hetero_apps = false; bool orte_never_launched = false; bool orte_devel_level_output = false; -int32_t orte_contiguous_nodes; char **orte_launch_environ; bool orte_hnp_is_allocated = false; @@ -105,6 +104,8 @@ bool orte_send_profile; /* Nidmap and job maps */ opal_pointer_array_t orte_nidmap; opal_pointer_array_t orte_jobmap; +bool orte_use_regexp; +char *orted_launch_cmd = NULL; /* list of local children on a daemon */ opal_list_t orte_local_children; diff --git a/orte/runtime/orte_globals.h b/orte/runtime/orte_globals.h index e50e4caf5c..4d1963ef24 100644 --- a/orte/runtime/orte_globals.h +++ b/orte/runtime/orte_globals.h @@ -456,7 +456,6 @@ ORTE_DECLSPEC extern bool orte_leave_session_attached; ORTE_DECLSPEC extern bool orte_do_not_launch; ORTE_DECLSPEC extern bool orted_spin_flag; ORTE_DECLSPEC extern bool orte_static_ports; -ORTE_DECLSPEC extern int32_t orte_contiguous_nodes; ORTE_DECLSPEC extern bool orte_keep_fqdn_hostnames; ORTE_DECLSPEC extern bool orte_show_resolved_nodenames; ORTE_DECLSPEC extern int orted_debug_failure; @@ -508,6 +507,8 @@ ORTE_DECLSPEC extern bool orte_send_profile; /* Nidmap and job maps */ ORTE_DECLSPEC extern opal_pointer_array_t orte_nidmap; ORTE_DECLSPEC extern opal_pointer_array_t orte_jobmap; +ORTE_DECLSPEC extern bool orte_use_regexp; +ORTE_DECLSPEC extern char *orted_launch_cmd; /* list of local children on a daemon */ ORTE_DECLSPEC extern opal_list_t orte_local_children; diff --git a/orte/runtime/orte_mca_params.c b/orte/runtime/orte_mca_params.c index fb02abc093..f6a0c9dab3 100644 --- a/orte/runtime/orte_mca_params.c +++ b/orte/runtime/orte_mca_params.c @@ -205,11 +205,12 @@ int orte_register_params(void) false, false, (int)false, &value); orte_keep_fqdn_hostnames = OPAL_INT_TO_BOOL(value); - /* whether or not contiguous nodenames are in use */ - mca_base_param_reg_int_name("orte", "contiguous_nodes", - "Number of nodes after which contiguous nodename encoding will automatically be used [default: INT_MAX]", - false, false, INT32_MAX, &orte_contiguous_nodes); - + /* whether or not to use regular expressions for launch */ + mca_base_param_reg_int_name("orte", "use_regexp", + "Whether or not to use regular expressions for launch [default: no]", + false, false, (int)false, &value); + orte_use_regexp = OPAL_INT_TO_BOOL(value); + /* whether to tag output */ mca_base_param_reg_int_name("orte", "tag_output", "Tag all output with [job,rank] (default: false)", diff --git a/orte/tools/orterun/orterun.c b/orte/tools/orterun/orterun.c index 06c74fe8e7..e0b96e7bba 100644 --- a/orte/tools/orterun/orterun.c +++ b/orte/tools/orterun/orterun.c @@ -353,6 +353,10 @@ static opal_cmd_line_init_t cmd_line_init[] = { NULL, OPAL_CMD_LINE_TYPE_BOOL, "Output a brief periodic report on launch progress" }, + { "orte", "use", "regexp", '\0', "use-regexp", "use-regexp", 0, + NULL, OPAL_CMD_LINE_TYPE_BOOL, + "Use regular expressions for launch" }, + /* End of list */ { NULL, NULL, NULL, '\0', NULL, NULL, 0, NULL, OPAL_CMD_LINE_TYPE_NULL, NULL } diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index df03c03cc8..9384cac644 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -53,6 +53,7 @@ #include "orte/util/show_help.h" #include "orte/util/proc_info.h" #include "orte/util/name_fns.h" +#include "orte/util/regex.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/rml/base/rml_contact.h" @@ -65,6 +66,8 @@ int orte_util_nidmap_init(opal_buffer_t *buffer) int32_t cnt; int rc; opal_byte_object_t *bo; + int8_t flag; + char *regexp; if (!initialized) { /* need to construct the global arrays */ @@ -85,6 +88,29 @@ int orte_util_nidmap_init(opal_buffer_t *buffer) return ORTE_SUCCESS; } + /* extract the flag indicating the type of info in the buffer */ + cnt=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &flag, &cnt, OPAL_INT8))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + if (0 < flag) { + /* the data is a regular expression - extract and parse it + * to get the daemonmap and process map + */ + cnt=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, ®exp, &cnt, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + return rc; + } + if (ORTE_SUCCESS != (rc = orte_regex_decode_maps(regexp, NULL))) { + ORTE_ERROR_LOG(rc); + } + free(regexp); + return rc; + } + /* extract the byte object holding the daemonmap */ cnt=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &bo, &cnt, OPAL_BYTE_OBJECT))) { @@ -263,15 +289,11 @@ int orte_util_encode_nodemap(opal_byte_object_t *boptr) { orte_vpid_t *vpids; orte_node_t **nodes; - char prefix[ORTE_MAX_NODE_PREFIX], *tmp; - int32_t i, len, firstnode, lastnode, nodenum, num_nodes; - uint8_t command = ORTE_CONTIG_NODE_CMD; + int32_t i, num_nodes; uint8_t num_digs; - uint8_t incdec; int rc; char *nodename; opal_buffer_t buf; - int step; int32_t *arch; /* setup a buffer for tmp use */ @@ -314,160 +336,25 @@ int orte_util_encode_nodemap(opal_byte_object_t *boptr) } } - /* see if the cluster is configured with contiguous - * node names and we have more than the HNP - */ - if (orte_contiguous_nodes < num_nodes) { - /* discover the prefix - find first non-alpha character */ - len = strlen(nodes[1]->name); - memset(prefix, 0, ORTE_MAX_NODE_PREFIX); - prefix[0] = nodes[1]->name[0]; /* must start with alpha */ - for (i=1; i < len; i++) { - if (!isalpha(nodes[1]->name[i])) { - /* found a non-alpha char */ - if (!isdigit(nodes[1]->name[i])) { - /* if it is anything but a digit, - * then that's not good - */ - opal_output(0, "%s encode:nidmap Nodename pattern is nonstandard", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - return ORTE_ERROR; - } - /* okay, this defines end of the prefix. - * convert rest of name to an offset - */ - firstnode = strtol(&(nodes[1]->name[i]), NULL, 10); - /* figure out how many digits are in the index */ - for (num_digs=0; isdigit(nodes[1]->name[i+num_digs]); num_digs++); - goto PACK; + /* pack every nodename individually */ + for (i=1; i < num_nodes; i++) { + if (!orte_keep_fqdn_hostnames) { + char *ptr; + nodename = strdup(nodes[i]->name); + if (NULL != (ptr = strchr(nodename, '.'))) { + *ptr = '\0'; } - prefix[i] = nodes[1]->name[i]; - } - - PACK: - /* begin encoding rest of map by indicating that this will - * be a contiguous node map - */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &command, 1, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* pack the prefix */ - tmp = &prefix[0]; - if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &tmp, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - len = strlen(prefix); - - /* pack the number of digits in the index */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &num_digs, 1, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* and the starting offset */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &firstnode, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - OPAL_OUTPUT_VERBOSE((2, orte_debug_output, - "%s encode:nidmap:contig_nodes prefix %s num_digits %d offset %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), prefix, num_digs, firstnode)); - - lastnode = strtol(&(nodes[2]->name[i]), NULL, 10); - if ((lastnode - firstnode) < 0) { - /* we are decrementing */ - incdec = 0; - if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &incdec, 1, OPAL_INT8))) { + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &nodename, 1, OPAL_STRING))) { ORTE_ERROR_LOG(rc); return rc; } + free(nodename); } else { - /* we are incrementing */ - incdec = 1; - if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &incdec, 1, OPAL_INT8))) { + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &nodes[i]->name, 1, OPAL_STRING))) { ORTE_ERROR_LOG(rc); return rc; } } - - lastnode = firstnode; - /* cycle through the nodes - pack the starting offset - * and total number of nodes in each contiguous range - */ - for (i=2; i < num_nodes; i++) { - nodenum = strtol(&(nodes[i]->name[len]), NULL, 10); - step = nodenum -lastnode; - if (step < 0) { - /* we are decrementing */ - step = lastnode - nodenum; - } - if (step > 1) { - /* have a break - indicate end of range */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &lastnode, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* indicate start of new range */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &nodenum, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - return rc; - } - OPAL_OUTPUT_VERBOSE((2, orte_debug_output, - "%s encode:nidmap:contig_nodes end range %d start next range %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lastnode, nodenum)); - } - lastnode = nodenum; - } - /* pack end of range */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &lastnode, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - return rc; - } - OPAL_OUTPUT_VERBOSE((2, orte_debug_output, - "%s encode:nidmap:contig_nodes end range %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), lastnode)); - /* pack flag end of ranges */ - lastnode = -1; - if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &lastnode, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } else { - /* if the nodes aren't contiguous, then we need - * to simply pack every nodename individually - */ - OPAL_OUTPUT_VERBOSE((2, orte_debug_output, - "%s encode:nidmap non_contig_nodes - packing all names", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - /* indicate that this will not be a contiguous node map */ - command = ORTE_NON_CONTIG_NODE_CMD; - if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &command, 1, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - return rc; - } - for (i=1; i < num_nodes; i++) { - if (!orte_keep_fqdn_hostnames) { - char *ptr; - nodename = strdup(nodes[i]->name); - if (NULL != (ptr = strchr(nodename, '.'))) { - *ptr = '\0'; - } - if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &nodename, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - free(nodename); - } else { - if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &nodes[i]->name, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - } } /* since the daemon vpids may not correspond to the node @@ -588,15 +475,12 @@ int orte_util_encode_nodemap(opal_byte_object_t *boptr) int orte_util_decode_nodemap(opal_byte_object_t *bo) { - int n, loc, k, diglen, namelen; - char *prefix, digits[10]; - int32_t num_nodes, lastnode, endrange, i, num_daemons; + int n; + int32_t num_nodes, i, num_daemons; orte_nid_t *node; orte_vpid_t *vpids; - uint8_t command, num_digs; + uint8_t num_digs; orte_nid_t **nd, *ndptr; - uint8_t incdec; - int32_t index, step; int32_t *arch; opal_buffer_t buf; opal_byte_object_t *boptr; @@ -655,123 +539,22 @@ int orte_util_decode_nodemap(opal_byte_object_t *bo) return rc; } - /* unpack flag to see if this is a contiguous node map or not */ - n=1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &command, &n, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_CONTIG_NODE_CMD == command) { - /* unpack the prefix */ - n=1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &prefix, &n, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* the number of digits in the index */ - n=1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &num_digs, &n, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* and the starting offset */ - n=1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &lastnode, &n, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* unpack increment/decrement flag */ - n=1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &incdec, &n, OPAL_INT8))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* unpack the end of the range */ - n=1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &endrange, &n, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* setup loop params */ - if (0 == incdec) { - endrange -= 1; - step = -1; - } else { - endrange += 1; - step = 1; - } - - OPAL_OUTPUT_VERBOSE((2, orte_debug_output, - "%s decode:nidmap:contig_nodes prefix %s num_digits %d offset %d endrange %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), prefix, num_digs, lastnode, endrange)); - - namelen = strlen(prefix) + num_digs + 1; - /* cycle through the ranges */ - index = 1; - while (1) { - for (i=lastnode; i != endrange; i += step) { - node = OBJ_NEW(orte_nid_t); - /* allocate space for the nodename */ - node->name = (char*)malloc(namelen); - memset(node->name, 0, namelen); - loc = snprintf(node->name, namelen, "%s", prefix); - diglen = num_digs - snprintf(digits, 10, "%d", i); - for (k=0; k < diglen && loc < namelen; k++) { - node->name[loc] = '0'; - loc++; - } - strncat(node->name, digits, num_digs); - /* the arch defaults to our arch so that non-hetero - * case will yield correct behavior - */ - opal_pointer_array_set_item(&orte_nidmap, index, node); - index++; - } - /* unpack start of new range */ - n=1; - opal_dss.unpack(&buf, &lastnode, &n, OPAL_INT32); - /* if that is -1, then it flags no more ranges */ - if (-1 == lastnode) { - goto process_daemons; - } - n=1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &endrange, &n, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (0 == incdec) { - endrange -= 1; - } else { - endrange += 1; - } - } - } else { - /* not contiguous - just loop over nodes and - * unpack the raw nodename + /* loop over nodes and unpack the raw nodename */ + for (i=1; i < num_nodes; i++) { + node = OBJ_NEW(orte_nid_t); + /* the arch defaults to our arch so that non-hetero + * case will yield correct behavior */ - for (i=1; i < num_nodes; i++) { - node = OBJ_NEW(orte_nid_t); - /* the arch defaults to our arch so that non-hetero - * case will yield correct behavior - */ - opal_pointer_array_set_item(&orte_nidmap, i, node); - - /* unpack the node's name */ - n=1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &(node->name), &n, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } + opal_pointer_array_set_item(&orte_nidmap, i, node); + + /* unpack the node's name */ + n=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &(node->name), &n, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + return rc; } } -process_daemons: /* unpack the daemon names */ vpids = (orte_vpid_t*)malloc(num_nodes * sizeof(orte_vpid_t)); n=num_nodes; @@ -1141,7 +924,7 @@ cleanup: orte_jmap_t* orte_util_lookup_jmap(orte_jobid_t job) { int i; - orte_jmap_t **jmaps; + orte_jmap_t *jmap; /* unfortunately, job objects cannot be stored * by index number as the jobid is a constructed @@ -1151,17 +934,16 @@ orte_jmap_t* orte_util_lookup_jmap(orte_jobid_t job) * left-justified as cleanup is done - and array * entries set to NULL - upon job completion. */ - jmaps = (orte_jmap_t**)orte_jobmap.addr; for (i=0; i < orte_jobmap.size; i++) { - if (NULL == jmaps[i]) { + if (NULL == (jmap = (orte_jmap_t*)opal_pointer_array_get_item(&orte_jobmap, i))) { continue; } OPAL_OUTPUT_VERBOSE((10, orte_debug_output, "%s lookup:pmap: checking job %s for job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_JOBID_PRINT(jmaps[i]->job), ORTE_JOBID_PRINT(job))); - if (job == jmaps[i]->job) { - return jmaps[i]; + ORTE_JOBID_PRINT(jmap->job), ORTE_JOBID_PRINT(job))); + if (job == jmap->job) { + return jmap; } } @@ -1192,19 +974,18 @@ orte_pmap_t* orte_util_lookup_pmap(orte_process_name_t *proc) static orte_nid_t* find_daemon_node(orte_process_name_t *proc) { int32_t i; - orte_nid_t **nids; + orte_nid_t *nid; - nids = (orte_nid_t**)orte_nidmap.addr; for (i=0; i < orte_nidmap.size; i++) { - if (NULL == nids[i]) { + if (NULL == (nid = (orte_nid_t*)opal_pointer_array_get_item(&orte_nidmap, i))) { continue; } OPAL_OUTPUT_VERBOSE((10, orte_debug_output, "%s find:daemon:node: checking daemon %s for %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_VPID_PRINT(nids[i]->daemon), ORTE_VPID_PRINT(proc->vpid))); - if (nids[i]->daemon == proc->vpid) { - return nids[i]; + ORTE_VPID_PRINT(nid->daemon), ORTE_VPID_PRINT(proc->vpid))); + if (nid->daemon == proc->vpid) { + return nid; } } diff --git a/orte/util/regex.c b/orte/util/regex.c index b5fd7600e6..70405616a2 100644 --- a/orte/util/regex.c +++ b/orte/util/regex.c @@ -26,12 +26,30 @@ #ifdef HAVE_UNISTD_H #include #endif +#ifdef HAVE_SYS_SOCKET_H +#include +#endif +#ifdef HAVE_NETINET_IN_H +#include +#endif +#ifdef HAVE_ARPA_INET_H +#include +#endif +#ifdef HAVE_NETDB_H +#include +#endif +#ifdef HAVE_IFADDRS_H +#include +#endif #include "opal/util/argv.h" #include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/odls/odls_types.h" +#include "orte/mca/rml/base/rml_contact.h" #include "orte/util/show_help.h" #include "orte/util/name_fns.h" +#include "orte/util/nidmap.h" #include "orte/runtime/orte_globals.h" #include "orte/util/regex.h" @@ -445,19 +463,23 @@ char* orte_regex_encode_maps(orte_job_t *jdata) char prefix[ORTE_MAX_NODE_PREFIX]; int startnum; opal_list_item_t *item; - char **regexargs = NULL, *tmp; + char **regexargs = NULL, *tmp, *tmp2; int32_t num_nodes, start, cnt, ppn, nppn; orte_vpid_t vpid_start, start_vpid, end_vpid, base; char *regexp = NULL; bool byslot; orte_node_rank_t node_rank, nrank; char suffix, sfx; + orte_app_context_t *app; /* this is only supported with regular maps - i.e., when * the mapping is byslot or bynode. Irregular maps cannot * be expressed in a regular expression + * + * Also only supported for one app_context */ - if (jdata->map->policy & ORTE_RMAPS_BYUSER) { + if (jdata->map->policy & ORTE_RMAPS_BYUSER || + jdata->num_apps > 1) { return NULL; } @@ -598,6 +620,39 @@ char* orte_regex_encode_maps(orte_job_t *jdata) opal_argv_append_nosize(®exargs, tmp); free(tmp); + /* next comes the total slots allocated to us */ + asprintf(&tmp, "SLOTS=%d", (int)jdata->total_slots_alloc); + opal_argv_append_nosize(®exargs, tmp); + free(tmp); + + /* the control flags for this job */ + asprintf(&tmp, "CTRLS=%d", (int)jdata->controls); + opal_argv_append_nosize(®exargs, tmp); + free(tmp); + + /* the stdin target for the job */ + asprintf(&tmp, "STDIN=%d", (int)jdata->stdin_target); + opal_argv_append_nosize(®exargs, tmp); + free(tmp); + + /* the app_context for the job - can only be one! Just include + * the required portions + */ + app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0); + asprintf(&tmp, "APP=\"%s:%s\"", app->app, app->cwd); + opal_argv_append_nosize(®exargs, tmp); + free(tmp); + tmp2 = opal_argv_join(app->argv, '#'); + asprintf(&tmp, "ARGV=\"%s\"", (NULL == tmp2) ? "NULL" : tmp2); + free(tmp2); + opal_argv_append_nosize(®exargs, tmp); + free(tmp); + tmp2 = opal_argv_join(app->env, '#'); + asprintf(&tmp, "ENV=\"%s\"", (NULL == tmp2) ? "NULL" : tmp2); + free(tmp2); + opal_argv_append_nosize(®exargs, tmp); + free(tmp); + /* next comes the starting daemon vpid */ asprintf(&tmp, "DVPID=%s", ORTE_VPID_PRINT(jdata->map->daemon_vpid_start)); opal_argv_append_nosize(®exargs, tmp); @@ -855,10 +910,10 @@ cleanup: return rc; } -int orte_regex_decode_maps(char *regexp) +int orte_regex_decode_maps(char *regexp, orte_odls_job_t **jobdat) { - char **seqs, *ptr, **names; - int i, j, k, n, rc; + char **seqs, *ptr, **names, *ptr2, check[5]; + int i, j, k, n, entry, rc; int ppn, step, start_nrank, nrank; int32_t tmp32; orte_vpid_t daemon_vpid, vpid; @@ -867,27 +922,46 @@ int orte_regex_decode_maps(char *regexp) orte_jmap_t *jmap; orte_pmap_t *pmap; bool found; - + orte_odls_job_t *jdat; + orte_app_context_t *app; + opal_list_item_t *item; + int num_procs, num_nodes; + struct hostent *h; + opal_buffer_t buf; + char *uri, *addr; + orte_process_name_t proc; + char *proc_name; + bool hnp_entry; + /* if regexp is NULL, then nothing to parse */ if (NULL == regexp) { return ORTE_ERR_SILENT; } + /* ensure the global nidmap/pidmap arrays are initialized */ + if (ORTE_SUCCESS != (rc = orte_util_nidmap_init(NULL))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* break the regexp into its component parts - this is trivial * because they are all separated by commas! */ seqs = opal_argv_split(regexp, ','); - /* we need to have at least three elements or something is wrong */ - if (opal_argv_count(seqs) < 3) { + /* we need to have at least six elements or something is wrong */ + if (opal_argv_count(seqs) < 6) { opal_argv_free(seqs); return ORTE_ERROR; } + /* start parsing with the first entry */ + entry=0; + /* the first entry is the local jobid, so we extract that and * convert it into a global jobid */ - ptr = strchr(seqs[0], '='); + ptr = strchr(seqs[entry++], '='); if (NULL == ptr) { opal_argv_free(seqs); return ORTE_ERROR; @@ -915,8 +989,146 @@ int orte_regex_decode_maps(char *regexp) opal_pointer_array_add(&orte_jobmap, jmap); } - /* the second entry is the starting daemon vpid for the job being launched */ - ptr = strchr(seqs[0], '='); + jdat = NULL; + if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) { + /* even though we are unpacking an add_local_procs cmd, we cannot assume + * that no job record for this jobid exists. A race condition exists that + * could allow another daemon's procs to call us with a collective prior + * to our unpacking add_local_procs. So lookup the job record for this jobid + * and see if it already exists + */ + for (item = opal_list_get_first(&orte_local_jobdata); + item != opal_list_get_end(&orte_local_jobdata); + item = opal_list_get_next(item)) { + orte_odls_job_t *jdt = (orte_odls_job_t*)item; + + /* is this the specified job? */ + if (jdt->jobid == jobid) { + jdat = jdt; + break; + } + } + if (NULL == jdat) { + /* setup jobdat object for this job */ + jdat = OBJ_NEW(orte_odls_job_t); + jdat->jobid = jobid; + opal_list_append(&orte_local_jobdata, &jdat->super); + } + if (NULL != jobdat) { + *jobdat = jdat; + } + /* see if this was previously decoded */ + if (NULL != jdat->regexp) { + /* yep - don't decode it again */ + opal_argv_free(seqs); + return ORTE_SUCCESS; + } + + /* next entry is the total slots allocated to this job */ + ptr = strchr(seqs[entry++], '='); + if (NULL == ptr) { + opal_argv_free(seqs); + return ORTE_ERROR; + } + ptr++; + jdat->total_slots_alloc = strtol(ptr, NULL, 10); + + /* next entry is the control flags for the job */ + ptr = strchr(seqs[entry++], '='); + if (NULL == ptr) { + opal_argv_free(seqs); + return ORTE_ERROR; + } + ptr++; + jdat->controls = strtol(ptr, NULL, 10); + + /* next entry - stdin target */ + ptr = strchr(seqs[entry++], '='); + if (NULL == ptr) { + opal_argv_free(seqs); + return ORTE_ERROR; + } + ptr++; + jdat->stdin_target = strtol(ptr, NULL, 10); + + /* next entry - the app_context itself */ + ptr = strchr(seqs[entry++], '='); + if (NULL == ptr) { + opal_argv_free(seqs); + return ORTE_ERROR; + } + ptr++; + /* some shells will strip the starting and ending quotes, and some won't - + * so check for them here + */ + if ('\"' == *ptr) ptr++; + if ('\"' == ptr[strlen(ptr)-1]) ptr[strlen(ptr)-1] = '\0'; + /* create the app_context object */ + app = OBJ_NEW(orte_app_context_t); + jdat->apps = (orte_app_context_t**)malloc(sizeof(orte_app_context_t*)); + jdat->apps[0] = app; + jdat->num_apps = 1; + /* get the app and the cwd by hand */ + ptr2 = strchr(ptr, ':'); + *ptr2 = '\0'; + app->app = strdup(ptr); + ptr = ++ptr2; + app->cwd = strdup(ptr); + + /* the next entry is the argv for the app_context, separated by '#'. We + * assume we can use argv_split for this purpose. First check, though, for + * NULL, indicating there were no argvs + */ + ptr = strchr(seqs[entry++], '='); + if (NULL == ptr) { + opal_argv_free(seqs); + return ORTE_ERROR; + } + ptr++; + /* some shells will strip the starting and ending quotes, and some won't - + * so check for them here + */ + if ('\"' == *ptr) ptr++; + if ('\"' == ptr[strlen(ptr)-1]) ptr[strlen(ptr)-1] = '\0'; + for (i=0; i < 4; i++) { + check[i] = ptr[i]; + } + check[4] = '\0'; + if (0 != strcmp("NULL", check)) { + /* there are argvs */ + app->argv = opal_argv_split(ptr, '#'); + + } + + /* the next entry is the env for the app_context, also separated by '#'. + * Again, start by checking for NULL + */ + ptr = strchr(seqs[entry++], '='); + if (NULL == ptr) { + opal_argv_free(seqs); + return ORTE_ERROR; + } + ptr++; + /* some shells will strip the starting and ending quotes, and some won't - + * so check for them here + */ + if ('\"' == *ptr) ptr++; + if ('\"' == ptr[strlen(ptr)-1]) ptr[strlen(ptr)-1] = '\0'; + for (i=0; i < 4; i++) { + check[i] = ptr[i]; + } + check[4] = '\0'; + if (0 != strcmp("NULL", check)) { + /* there are argvs */ + app->env = opal_argv_split(ptr, '#'); + } + + } else { + entry += 6; + } + + /* next entry is the starting daemon vpid for the job being launched */ + ptr = strchr(seqs[entry++], '='); if (NULL == ptr) { opal_argv_free(seqs); return ORTE_ERROR; @@ -930,14 +1142,18 @@ int orte_regex_decode_maps(char *regexp) opal_argv_free(seqs); return rc; } - + /* the remaining entries contain the name of the nodes in the system, how * many procs (if any) on each of those nodes, the starting vpid of the * procs on those nodes, and the starting node rank for the procs on * each node */ names = NULL; - for (n=2; n < opal_argv_count(seqs); n++) { + num_procs = 0; + num_nodes = 0; + hnp_entry = true; + OBJ_CONSTRUCT(&buf, opal_buffer_t); + for (n=entry; n < opal_argv_count(seqs); n++) { /* parse the node entry to get a list of all node names in it */ if (ORTE_SUCCESS != (rc = parse_node_range(seqs[n], &names, &vpid, &ppn, &step, &start_nrank))) { ORTE_ERROR_LOG(rc); @@ -963,22 +1179,64 @@ int orte_regex_decode_maps(char *regexp) nid->name = strdup(names[i]); nid->index = opal_pointer_array_add(&orte_nidmap, nid); } - /* are there any procs on this node? */ - if (ORTE_VPID_INVALID != vpid) { + /* is this the hnp entry (very first one), or are there any procs on this node? */ + if (hnp_entry || ORTE_VPID_INVALID != vpid) { /* yep - add a daemon if we don't already one, otherwise * this is just adding procs to an existing daemon */ if (ORTE_VPID_INVALID != daemon_vpid && ORTE_VPID_INVALID == nid->daemon) { /* no daemon assigned yet - add it */ - nid->daemon = daemon_vpid; - daemon_vpid++; + if (hnp_entry) { + /* the hnp is always daemon=0 */ + nid->daemon = 0; + hnp_entry = false; /* only do this once */ + } else { + nid->daemon = daemon_vpid++; + } + /* if we are using static ports, create the contact info + * for the daemon on this node + */ + if (orte_static_ports) { + /* lookup the address of this node */ + if (NULL == (h = gethostbyname(nid->name))) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + addr = inet_ntoa(*(struct in_addr*)h->h_addr_list[0]); + + OPAL_OUTPUT_VERBOSE((0, orte_debug_output, + "%s orte:regex: constructing static path to node %s daemon %d addr %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + nid->name, (int)nid->daemon, addr)); + + /* since we are using static ports, all my fellow daemons will be on my + * port. Setup the contact info for each daemon in my hash tables. Note + * that this will -not- open a port to those daemons, but will only + * define the info necessary for opening such a port if/when I communicate + * to them + */ + /* construct the URI */ + proc.jobid = ORTE_PROC_MY_NAME->jobid; + proc.vpid = nid->daemon; + orte_util_convert_process_name_to_string(&proc_name, &proc); + asprintf(&uri, "%s;tcp://%s:%d", proc_name, addr, (int)orte_process_info.my_port); + opal_dss.pack(&buf, &uri, 1, OPAL_STRING); + free(proc_name); + free(uri); + } } + /* cycle through the ppn, adding a pmap * for each new rank */ nrank = start_nrank; for (k=0; k < ppn; k++) { + if (NULL != opal_pointer_array_get_item(&jmap->pmap, vpid)) { + /* this proc was already entered via some earlier step */ + vpid += step; + continue; + } pmap = OBJ_NEW(orte_pmap_t); pmap->node = nid->index; pmap->local_rank = k; @@ -986,12 +1244,35 @@ int orte_regex_decode_maps(char *regexp) jmap->num_procs++; opal_pointer_array_set_item(&jmap->pmap, vpid, pmap); vpid += step; + /* increment #procs in the job */ + num_procs++; } + /* increment #nodes in the job */ + num_nodes++; } } opal_argv_free(names); names = NULL; } + + /* if we are using static ports, load the hash tables */ + if (orte_static_ports) { + if (ORTE_SUCCESS != (rc = orte_rml_base_update_contact_info(&buf))) { + ORTE_ERROR_LOG(rc); + } + } + OBJ_DESTRUCT(&buf); + + opal_argv_free(seqs); + + if (NULL != jdat) { + /* record the regexp so it can be sent to the local procs */ + jdat->regexp = strdup(regexp); + /* save the job data */ + jdat->num_procs += num_procs; + jdat->num_nodes += num_nodes; + } + return ORTE_SUCCESS; } diff --git a/orte/util/regex.h b/orte/util/regex.h index 75b798e87d..5742831e46 100644 --- a/orte/util/regex.h +++ b/orte/util/regex.h @@ -25,6 +25,7 @@ #include "orte_config.h" +#include "orte/mca/odls/odls_types.h" #include "orte/runtime/orte_globals.h" BEGIN_C_DECLS @@ -35,7 +36,7 @@ ORTE_DECLSPEC int orte_regex_extract_ppn(int num_nodes, char *regexp, int **ppn) ORTE_DECLSPEC char* orte_regex_encode_maps(orte_job_t *jdata); -ORTE_DECLSPEC int orte_regex_decode_maps(char *regexp); +ORTE_DECLSPEC int orte_regex_decode_maps(char *regexp, orte_odls_job_t **jobdat); END_C_DECLS #endif