diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c index 9e4b220ad1..582207ae40 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c @@ -990,11 +990,11 @@ static void _dmodex_req(int sd, short args, void *cbdata) * may not be a contribution */ if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->server->myremote, info->rank, "modex", &val)) && NULL != val) { - data = val->data.bo.bytes; - sz = val->data.bo.size; - /* protect the data */ - val->data.bo.bytes = NULL; - val->data.bo.size = 0; + data = val->data.bo.bytes; + sz = val->data.bo.size; + /* protect the data */ + val->data.bo.bytes = NULL; + val->data.bo.size = 0; PMIX_VALUE_RELEASE(val); } diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c index 10c3a62741..bf6be3ab39 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_ops.c @@ -138,7 +138,6 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf) pmix_nspace_t *nptr; pmix_rank_info_t *info; pmix_dmdx_remote_t *dcd, *dcdnext; - pmix_buffer_t *pbkt; pmix_value_t *val; char *data; size_t sz; @@ -236,16 +235,19 @@ pmix_status_t pmix_server_commit(pmix_peer_t *peer, pmix_buffer_t *buf) if (dcd->cd->proc.rank == info->rank) { /* we can now fulfill this request - collect the * remote/global data from this proc */ - pbkt = PMIX_NEW(pmix_buffer_t); /* get any remote contribution - note that there * may not be a contribution */ + data = NULL; + sz = 0; if (PMIX_SUCCESS == pmix_hash_fetch(&nptr->server->myremote, info->rank, "modex", &val) && NULL != val) { - PMIX_LOAD_BUFFER(pbkt, val->data.bo.bytes, val->data.bo.size); + data = val->data.bo.bytes; + sz = val->data.bo.size; + /* protect the data */ + val->data.bo.bytes = NULL; + val->data.bo.size = 0; PMIX_VALUE_RELEASE(val); } - PMIX_UNLOAD_BUFFER(pbkt, data, sz); - PMIX_RELEASE(pbkt); /* execute the callback */ dcd->cd->cbfunc(PMIX_SUCCESS, data, sz, dcd->cd->cbdata); if (NULL != data) { diff --git a/opal/mca/pmix/pmix2x/pmix/src/util/compress.h b/opal/mca/pmix/pmix2x/pmix/src/util/compress.h index b07b0d2ea7..630cdc990c 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/util/compress.h +++ b/opal/mca/pmix/pmix2x/pmix/src/util/compress.h @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -31,8 +31,8 @@ BEGIN_C_DECLS -/* define a limit for storing raw strings */ -#define PMIX_STRING_LIMIT 512 +/* define a limit of 128k for raw strings */ +#define PMIX_STRING_LIMIT 131072 /* define a macro for quickly checking if a string exceeds the * compression limit */ diff --git a/orte/mca/ess/alps/ess_alps_module.c b/orte/mca/ess/alps/ess_alps_module.c index 4f0f47b501..1109c360e2 100644 --- a/orte/mca/ess/alps/ess_alps_module.c +++ b/orte/mca/ess/alps/ess_alps_module.c @@ -12,6 +12,7 @@ * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -58,7 +59,6 @@ static int rte_init(void) { int ret; char *error = NULL; - char **hosts = NULL; OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, "ess:alps in rte_init")); @@ -90,23 +90,11 @@ static int rte_init(void) * default procedure */ if (ORTE_PROC_IS_DAEMON) { - if (NULL != orte_node_regex) { - /* extract the nodes */ - if (ORTE_SUCCESS != (ret = - orte_regex_extract_node_names(orte_node_regex, &hosts)) || - NULL == hosts) { - error = "orte_regex_extract_node_names"; - goto fn_fail; - } - } - if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) { + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_orted_setup"; goto fn_fail; } - if (NULL != hosts) { - opal_argv_free(hosts); - } /* * now synchronize with aprun. diff --git a/orte/mca/ess/base/base.h b/orte/mca/ess/base/base.h index 4387a5e98d..2fefed0845 100644 --- a/orte/mca/ess/base/base.h +++ b/orte/mca/ess/base/base.h @@ -12,7 +12,7 @@ * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -67,7 +67,7 @@ ORTE_DECLSPEC void orte_ess_base_app_abort(int status, bool report); ORTE_DECLSPEC int orte_ess_base_tool_setup(void); ORTE_DECLSPEC int orte_ess_base_tool_finalize(void); -ORTE_DECLSPEC int orte_ess_base_orted_setup(char **hosts); +ORTE_DECLSPEC int orte_ess_base_orted_setup(void); ORTE_DECLSPEC int orte_ess_base_orted_finalize(void); /* Detect whether or not this proc is bound - if not, diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index ce6bdd5fe9..a3e3e2d44f 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -103,7 +103,7 @@ static void setup_sighandler(int signal, opal_event_t *ev, } -int orte_ess_base_orted_setup(char **hosts) +int orte_ess_base_orted_setup(void) { int ret = ORTE_ERROR; int fd; @@ -113,7 +113,6 @@ int orte_ess_base_orted_setup(char **hosts) orte_job_t *jdata; orte_proc_t *proc; orte_app_context_t *app; - orte_node_t *node; char *param; hwloc_obj_t obj; unsigned i, j; @@ -218,12 +217,9 @@ int orte_ess_base_orted_setup(char **hosts) * a specific module to use */ (void) mca_base_var_env_name("plm", ¶m); - plm_in_use = !!(getenv(param)); free (param); - if (plm_in_use) { - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_plm_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_plm_base_open"; @@ -332,11 +328,6 @@ int orte_ess_base_orted_setup(char **hosts) app = OBJ_NEW(orte_app_context_t); opal_pointer_array_set_item(jdata->apps, 0, app); jdata->num_apps++; - /* create and store a node object where we are */ - node = OBJ_NEW(orte_node_t); - node->name = strdup(orte_process_info.nodename); - node->index = ORTE_PROC_MY_NAME->vpid; - opal_pointer_array_set_item(orte_node_pool, ORTE_PROC_MY_NAME->vpid, node); /* create and store a proc object for us */ proc = OBJ_NEW(orte_proc_t); @@ -345,19 +336,6 @@ int orte_ess_base_orted_setup(char **hosts) proc->pid = orte_process_info.pid; proc->state = ORTE_PROC_STATE_RUNNING; opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc); - /* record that the daemon (i.e., us) is on this node - * NOTE: we do not add the proc object to the node's - * proc array because we are not an application proc. - * Instead, we record it in the daemon field of the - * node object - */ - OBJ_RETAIN(proc); /* keep accounting straight */ - node->daemon = proc; - ORTE_FLAG_SET(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED); - node->state = ORTE_NODE_STATE_UP; - /* now point our proc node field to the node */ - OBJ_RETAIN(node); /* keep accounting straight */ - proc->node = node; /* record that the daemon job is running */ jdata->num_procs = 1; jdata->state = ORTE_JOB_STATE_RUNNING; @@ -514,7 +492,6 @@ int orte_ess_base_orted_setup(char **hosts) orte_topo_signature = opal_hwloc_base_get_topo_signature(opal_hwloc_topology); t->sig = strdup(orte_topo_signature); opal_pointer_array_add(orte_node_topologies, t); - node->topology = t; if (15 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) { opal_output(0, "%s Topology Info:", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); opal_dss.dump(0, opal_hwloc_topology, OPAL_HWLOC_TOPO); @@ -526,12 +503,25 @@ int orte_ess_base_orted_setup(char **hosts) * after we enable_comm as that function determines our * own port, which we need in order to construct the nidmap */ - if (NULL != hosts) { + if (NULL != orte_node_regex) { + if (ORTE_SUCCESS != (ret = orte_util_nidmap_parse(orte_node_regex))) { + ORTE_ERROR_LOG(ret); + error = "construct nidmap"; + goto error; + } + } + + if (orte_static_ports) { + if (NULL == orte_node_regex) { + /* we didn't get the node info */ + error = "cannot construct daemon map for static ports - no node map info"; + goto error; + } /* extract the node info from the environment and * build a nidmap from it - this will update the * routing plan as well */ - if (ORTE_SUCCESS != (ret = orte_util_build_daemon_nidmap(hosts))) { + if (ORTE_SUCCESS != (ret = orte_util_build_daemon_nidmap())) { ORTE_ERROR_LOG(ret); error = "construct daemon map from static ports"; goto error; @@ -635,6 +625,7 @@ int orte_ess_base_orted_setup(char **hosts) } return ORTE_SUCCESS; + error: orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", diff --git a/orte/mca/ess/env/ess_env_module.c b/orte/mca/ess/env/ess_env_module.c index c04b8c0c83..bc4152e23e 100644 --- a/orte/mca/ess/env/ess_env_module.c +++ b/orte/mca/ess/env/ess_env_module.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -98,7 +98,6 @@ static int rte_init(void) { int ret; char *error = NULL; - char **hosts = NULL; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { @@ -112,19 +111,11 @@ static int rte_init(void) /* if I am a daemon, complete my setup using the * default procedure */ - if (NULL != orte_node_regex) { - /* extract the nodes */ - if (ORTE_SUCCESS != (ret = orte_regex_extract_node_names(orte_node_regex, &hosts))) { - error = "orte_regex_extract_node_names"; - goto error; - } - } - if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) { + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_orted_setup"; goto error; } - opal_argv_free(hosts); return ORTE_SUCCESS; error: diff --git a/orte/mca/ess/lsf/ess_lsf_module.c b/orte/mca/ess/lsf/ess_lsf_module.c index f9aef64269..cb200e4df3 100644 --- a/orte/mca/ess/lsf/ess_lsf_module.c +++ b/orte/mca/ess/lsf/ess_lsf_module.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -68,7 +68,6 @@ static int rte_init(void) { int ret; char *error = NULL; - char **hosts = NULL; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { @@ -83,19 +82,11 @@ static int rte_init(void) * default procedure */ if (ORTE_PROC_IS_DAEMON) { - if (NULL != orte_node_regex) { - /* extract the nodes */ - if (ORTE_SUCCESS != (ret = orte_regex_extract_node_names(orte_node_regex, &hosts))) { - error = "orte_regex_extract_node_names"; - goto error; - } - } - if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) { + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_orted_setup"; goto error; } - opal_argv_free(hosts); return ORTE_SUCCESS; } diff --git a/orte/mca/ess/slurm/ess_slurm_module.c b/orte/mca/ess/slurm/ess_slurm_module.c index 472b6aa9ee..c645c4ecaa 100644 --- a/orte/mca/ess/slurm/ess_slurm_module.c +++ b/orte/mca/ess/slurm/ess_slurm_module.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -62,7 +62,6 @@ static int rte_init(void) { int ret; char *error = NULL; - char **hosts = NULL; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { @@ -77,23 +76,11 @@ static int rte_init(void) * default procedure */ if (ORTE_PROC_IS_DAEMON) { - if (NULL != orte_node_regex) { - /* extract the nodes */ - if (ORTE_SUCCESS != (ret = - orte_regex_extract_node_names(orte_node_regex, &hosts)) || - NULL == hosts) { - error = "orte_regex_extract_node_names"; - goto error; - } - } - if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) { + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_orted_setup"; goto error; } - if (NULL != hosts) { - opal_argv_free(hosts); - } return ORTE_SUCCESS; } diff --git a/orte/mca/ess/tm/ess_tm_module.c b/orte/mca/ess/tm/ess_tm_module.c index 0ebad54b7a..b9fe8e0cbe 100644 --- a/orte/mca/ess/tm/ess_tm_module.c +++ b/orte/mca/ess/tm/ess_tm_module.c @@ -67,7 +67,6 @@ static int rte_init(void) { int ret; char *error = NULL; - char **hosts = NULL; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { @@ -82,21 +81,11 @@ static int rte_init(void) * default procedure */ if (ORTE_PROC_IS_DAEMON) { - if (NULL != orte_node_regex) { - /* extract the nodes */ - if (ORTE_SUCCESS != (ret = - orte_regex_extract_node_names(orte_node_regex, &hosts)) || - NULL == hosts) { - error = "orte_regex_extract_node_names"; - goto error; - } - } - if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) { + if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_orted_setup"; goto error; } - opal_argv_free(hosts); return ORTE_SUCCESS; } @@ -194,4 +183,3 @@ static int tm_set_name(void) return ORTE_SUCCESS; } - diff --git a/orte/mca/grpcomm/direct/grpcomm_direct.c b/orte/mca/grpcomm/direct/grpcomm_direct.c index e69068a711..967d590bc8 100644 --- a/orte/mca/grpcomm/direct/grpcomm_direct.c +++ b/orte/mca/grpcomm/direct/grpcomm_direct.c @@ -270,7 +270,7 @@ static void xcast_recv(int status, orte_process_name_t* sender, opal_list_t coll; orte_grpcomm_signature_t *sig; orte_rml_tag_t tag; - char *rtmod; + char *rtmod, *nidmap; size_t inlen, cmplen; uint8_t *packed_data, *cmpdata; @@ -392,7 +392,8 @@ static void xcast_recv(int status, orte_process_name_t* sender, } opal_dss.copy_payload(relay, data); } else if (ORTE_DAEMON_ADD_LOCAL_PROCS == command || - ORTE_DAEMON_DVM_NIDMAP_CMD == command) { + ORTE_DAEMON_DVM_NIDMAP_CMD == command || + ORTE_DAEMON_DVM_ADD_PROCS == command) { /* setup our internal relay buffer */ relay = OBJ_NEW(opal_buffer_t); /* repack the command */ @@ -400,14 +401,25 @@ static void xcast_recv(int status, orte_process_name_t* sender, ORTE_ERROR_LOG(ret); goto relay; } - /* see if any daemons were launched */ + /* unpack the nidmap string - may be NULL */ + cnt = 1; + if (OPAL_SUCCESS != (ret = opal_dss.unpack(data, &nidmap, &cnt, OPAL_STRING))) { + ORTE_ERROR_LOG(ret); + goto relay; + } + if (NULL != nidmap) { + if (ORTE_SUCCESS != (ret = orte_util_nidmap_parse(nidmap))) { + ORTE_ERROR_LOG(ret); + goto relay; + } + free(nidmap); + } + /* see if they included info on node capabilities */ cnt = 1; if (OPAL_SUCCESS != (ret = opal_dss.unpack(data, &flag, &cnt, OPAL_INT8))) { ORTE_ERROR_LOG(ret); goto relay; } - /* add it to our relay buffer as we will need it later */ - opal_dss.pack(relay, &flag, 1, OPAL_INT8); if (0 != flag) { /* update our local nidmap, if required - the decode function * knows what to do diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index ece314f518..175473cf5e 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -113,6 +113,7 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer, int8_t flag; void *nptr; uint32_t key; + char *nidmap; /* get the job data pointer */ if (NULL == (jdata = orte_get_job_data_object(job))) { @@ -127,19 +128,32 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer, return ORTE_SUCCESS; } - /* if we launched new daemons... */ - if (orte_get_attribute(&jdata->attributes, ORTE_JOB_LAUNCHED_DAEMONS, NULL, OPAL_BOOL)) { - /* flag that we did */ + /* if we couldn't provide the allocation regex on the orted + * cmd line, then we need to provide all the info here */ + if (!orte_nidmap_communicated) { + if (ORTE_SUCCESS != (rc = orte_util_nidmap_create(&nidmap))) { + ORTE_ERROR_LOG(rc); + return rc; + } + orte_nidmap_communicated = true; + } else { + nidmap = NULL; + } + opal_dss.pack(buffer, &nidmap, 1, OPAL_STRING); + if (NULL != nidmap) { + free(nidmap); + } + + /* if we haven't already done so, provide the info on the + * capabilities of each node */ + if (!orte_node_info_communicated || + orte_get_attribute(&jdata->attributes, ORTE_JOB_LAUNCHED_DAEMONS, NULL, OPAL_BOOL)) { flag = 1; opal_dss.pack(buffer, &flag, 1, OPAL_INT8); - - /* include a nodemap of the daemons */ if (ORTE_SUCCESS != (rc = orte_util_encode_nodemap(buffer))) { ORTE_ERROR_LOG(rc); return rc; } - - /* if we are not using static ports, we need to send the wireup info */ if (!orte_static_ports && !orte_fwd_mpirun_port) { /* pack a flag indicating wiring info is provided */ flag = 1; @@ -176,41 +190,52 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer, * copy of all active jobs so the grpcomm collectives can * properly work should a proc from one of the other jobs * interact with this one */ - OBJ_CONSTRUCT(&jobdata, opal_buffer_t); - numjobs = 0; - rc = opal_hash_table_get_first_key_uint32(orte_job_data, &key, (void **)&jptr, &nptr); - while (OPAL_SUCCESS == rc) { - /* skip the one we are launching now */ - if (NULL != jptr && jptr != jdata && - ORTE_PROC_MY_NAME->jobid != jptr->jobid) { - /* pack the job struct */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(&jobdata, &jptr, 1, ORTE_JOB))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&jobdata); - return rc; + if (orte_get_attribute(&jdata->attributes, ORTE_JOB_LAUNCHED_DAEMONS, NULL, OPAL_BOOL)) { + flag = 1; + opal_dss.pack(buffer, &flag, 1, OPAL_INT8); + OBJ_CONSTRUCT(&jobdata, opal_buffer_t); + numjobs = 0; + rc = opal_hash_table_get_first_key_uint32(orte_job_data, &key, (void **)&jptr, &nptr); + while (OPAL_SUCCESS == rc) { + /* skip the one we are launching now */ + if (NULL != jptr && jptr != jdata && + ORTE_PROC_MY_NAME->jobid != jptr->jobid) { + /* pack the job struct */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&jobdata, &jptr, 1, ORTE_JOB))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&jobdata); + return rc; + } + ++numjobs; } - ++numjobs; + rc = opal_hash_table_get_next_key_uint32(orte_job_data, &key, (void **)&jptr, nptr, &nptr); } - rc = opal_hash_table_get_next_key_uint32(orte_job_data, &key, (void **)&jptr, nptr, &nptr); - } - /* pack the number of jobs */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &numjobs, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&jobdata); - return rc; - } - if (0 < numjobs) { - /* pack the jobdata buffer */ - wireup = &jobdata; - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &wireup, 1, OPAL_BUFFER))) { + /* pack the number of jobs */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &numjobs, 1, OPAL_INT32))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&jobdata); return rc; } - OBJ_DESTRUCT(&jobdata); + if (0 < numjobs) { + /* pack the jobdata buffer */ + wireup = &jobdata; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &wireup, 1, OPAL_BUFFER))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&jobdata); + return rc; + } + OBJ_DESTRUCT(&jobdata); + } + } else { + flag = 0; + opal_dss.pack(buffer, &flag, 1, OPAL_INT8); } + orte_node_info_communicated = true; } else { - /* include a sentinel */ + /* mark that we didn't */ + flag = 0; + opal_dss.pack(buffer, &flag, 1, OPAL_INT8); + /* and that we didn't launch daemons */ flag = 0; opal_dss.pack(buffer, &flag, 1, OPAL_INT8); } diff --git a/orte/mca/plm/alps/plm_alps_module.c b/orte/mca/plm/alps/plm_alps_module.c index 2549944212..2592cf5363 100644 --- a/orte/mca/plm/alps/plm_alps_module.c +++ b/orte/mca/plm/alps/plm_alps_module.c @@ -306,41 +306,42 @@ static void launch_daemons(int fd, short args, void *cbdata) opal_argv_append(&argc, &argv, "-e"); opal_argv_append(&argc, &argv, "OMPI_NO_USE_CRAY_PMI=1"); - /* create nodelist */ - nodelist_argv = NULL; - nodelist_argc = 0; - - for (nnode=0; nnode < map->nodes->size; nnode++) { - if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, nnode))) { - continue; - } - - /* if the daemon already exists on this node, then - * don't include it - */ - if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED)) { - continue; - } - - /* otherwise, add it to the list of nodes upon which - * we need to launch a daemon - */ - opal_argv_append(&nodelist_argc, &nodelist_argv, node->name); - } - if (0 == opal_argv_count(nodelist_argv)) { - orte_show_help("help-plm-alps.txt", "no-hosts-in-list", true); - rc = ORTE_ERR_FAILED_TO_START; - goto cleanup; - } - nodelist_flat = opal_argv_join(nodelist_argv, ','); - opal_argv_free(nodelist_argv); - /* if we are using all allocated nodes, then alps * doesn't need a nodelist, or if running without a batch scheduler */ if ((map->num_new_daemons < orte_num_allocated_nodes) || (orte_num_allocated_nodes == 0)) { + /* create nodelist */ + nodelist_argv = NULL; + nodelist_argc = 0; + + for (nnode=0; nnode < map->nodes->size; nnode++) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, nnode))) { + continue; + } + + /* if the daemon already exists on this node, then + * don't include it + */ + if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED)) { + continue; + } + + /* otherwise, add it to the list of nodes upon which + * we need to launch a daemon + */ + opal_argv_append(&nodelist_argc, &nodelist_argv, node->name); + } + if (0 == opal_argv_count(nodelist_argv)) { + orte_show_help("help-plm-alps.txt", "no-hosts-in-list", true); + rc = ORTE_ERR_FAILED_TO_START; + goto cleanup; + } + nodelist_flat = opal_argv_join(nodelist_argv, ','); + opal_argv_free(nodelist_argv); + opal_argv_append(&argc, &argv, "-L"); opal_argv_append(&argc, &argv, nodelist_flat); + free(nodelist_flat); } @@ -351,20 +352,10 @@ static void launch_daemons(int fd, short args, void *cbdata) /* add the daemon command (as specified by user) */ orte_plm_base_setup_orted_cmd(&argc, &argv); - /* ensure that mpirun is - * on the list. Since alps won't be launching a daemon on it, - * it won't have been placed on the list, so create a new - * version here that includes it */ - asprintf(<mp, "%s,%s", orte_process_info.nodename, nodelist_flat); - free(nodelist_flat); - nodelist_flat = ltmp; - /* Add basic orted command line options, including debug flags */ orte_plm_base_orted_append_basic_args(&argc, &argv, NULL, - &proc_vpid_index, - nodelist_flat); - free(nodelist_flat); + &proc_vpid_index); /* tell the new daemons the base of the name list so they can compute * their own name on the other end diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index a9dbc4f041..8bedfef7d0 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -1037,20 +1037,6 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&daemon->name), nodename)); - /* look this node up, if necessary */ - if (!orte_plm_globals.daemon_nodes_assigned_at_launch) { - OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, - "%s plm:base:orted_report_launch attempting to assign daemon %s to node %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&dname), nodename)); - /* to "relocate" the daemon, we just update the name of - * the node object pointed to by this daemon */ - free(daemon->node->name); - daemon->node->name = strdup(nodename); - /* mark that it was verified */ - ORTE_FLAG_SET(daemon->node, ORTE_NODE_FLAG_LOC_VERIFIED); - } - /* mark the daemon as launched */ ORTE_FLAG_SET(daemon->node, ORTE_NODE_FLAG_DAEMON_LAUNCHED); @@ -1312,8 +1298,7 @@ int orte_plm_base_setup_orted_cmd(int *argc, char ***argv) */ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv, char *ess, - int *proc_vpid_index, - char *nodes) + int *proc_vpid_index) { char *param = NULL; const char **tmp_value, **tmp_value2; @@ -1321,7 +1306,6 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv, char *tmp_force = NULL; int i, j, cnt, rc; orte_job_t *jdata; - char *rml_uri; unsigned long num_procs; bool ignore; @@ -1411,39 +1395,32 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv, opal_argv_append(argc, argv, param); free(param); - /* pass the uri of the hnp */ - if (ORTE_PROC_IS_HNP) { - rml_uri = orte_rml.get_contact_info(); - } else { - rml_uri = orte_rml.get_contact_info(); - opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); - opal_argv_append(argc, argv, "orte_parent_uri"); - opal_argv_append(argc, argv, rml_uri); - free(rml_uri); - - rml_uri = strdup(orte_process_info.my_hnp_uri); - } - opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); - opal_argv_append(argc, argv, "orte_hnp_uri"); - opal_argv_append(argc, argv, rml_uri); - free(rml_uri); - - /* pass the node list if one was given*/ + /* convert the nodes with daemons to a regex */ param = NULL; - if (NULL != nodes) { - /* convert the nodes to a regex */ - if (ORTE_SUCCESS != (rc = orte_regex_create(nodes, ¶m))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } else if (NULL != orte_node_regex) { - param = strdup(orte_node_regex); + if (ORTE_SUCCESS != (rc = orte_util_nidmap_create(¶m))) { + ORTE_ERROR_LOG(rc); + return rc; } - if (NULL != param) { + /* if this is too long, then we'll have to do it with + * a phone home operation instead */ + if (strlen(param) < ORTE_MAX_REGEX_CMD_LENGTH) { opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); opal_argv_append(argc, argv, "orte_node_regex"); opal_argv_append(argc, argv, param); - free(param); + /* mark that the nidmap has been communicated */ + orte_nidmap_communicated = true; + } + free(param); + + if (!orte_static_ports && !orte_fwd_mpirun_port) { + /* if we are using static ports, or we are forwarding + * mpirun's port, then we would have built all the + * connection info and so there is nothing to be passed. + * Otherwise, we have to pass the HNP uri so we can + * phone home */ + opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); + opal_argv_append(argc, argv, "orte_hnp_uri"); + opal_argv_append(argc, argv, orte_process_info.my_hnp_uri); } /* if requested, pass our port */ @@ -1994,7 +1971,7 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata) if (orte_hnp_is_allocated) { node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0); OBJ_RETAIN(node); - opal_list_append(&nodes, &node->super); + opal_list_prepend(&nodes, &node->super); } for (i=0; i < jdata->apps->size; i++) { if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) { @@ -2028,15 +2005,11 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata) } /* ensure we are not on the list */ - for (item = opal_list_get_first(&nodes); - item != opal_list_get_end(&nodes); - item = opal_list_get_next(item)) { - node = (orte_node_t*)item; - if (0 == node->index) { - opal_list_remove_item(&nodes, item); - OBJ_RELEASE(item); - break; - } + item = opal_list_get_first(&nodes); + node = (orte_node_t*)item; + if (0 == node->index) { + opal_list_remove_item(&nodes, item); + OBJ_RELEASE(item); } /* if we didn't get anything, then we are the only node in the diff --git a/orte/mca/plm/base/plm_private.h b/orte/mca/plm/base/plm_private.h index 835c6de843..047a508394 100644 --- a/orte/mca/plm/base/plm_private.h +++ b/orte/mca/plm/base/plm_private.h @@ -114,8 +114,7 @@ ORTE_DECLSPEC void orte_plm_base_recv(int status, orte_process_name_t* sender, */ ORTE_DECLSPEC int orte_plm_base_orted_append_basic_args(int *argc, char ***argv, char *ess_module, - int *proc_vpid_index, - char *nodes); + int *proc_vpid_index); /* * Proxy functions for use by daemons and application procs diff --git a/orte/mca/plm/lsf/plm_lsf_module.c b/orte/mca/plm/lsf/plm_lsf_module.c index df5e0d9500..461feda868 100644 --- a/orte/mca/plm/lsf/plm_lsf_module.c +++ b/orte/mca/plm/lsf/plm_lsf_module.c @@ -160,7 +160,6 @@ static void launch_daemons(int fd, short args, void *cbdata) int rc; char** env = NULL; char **nodelist_argv; - char *nodelist; int nodelist_argc; char *vpid_string; int i; @@ -257,19 +256,11 @@ static void launch_daemons(int fd, short args, void *cbdata) /* add the daemon command (as specified by user) */ orte_plm_base_setup_orted_cmd(&argc, &argv); - /* we need mpirun to be the first node on this list - since we - * aren't launching mpirun via TM, it won't be there now */ - opal_argv_prepend_nosize(&nodelist_argv, orte_process_info.nodename); - nodelist = opal_argv_join(nodelist_argv, ','); - opal_argv_free(nodelist_argv); - /* Add basic orted command line options */ orte_plm_base_orted_append_basic_args(&argc, &argv, "lsf", - &proc_vpid_index, - nodelist); - free(nodelist); + &proc_vpid_index); /* tell the new daemons the base of the name list so they can compute * their own name on the other end diff --git a/orte/mca/plm/rsh/plm_rsh_module.c b/orte/mca/plm/rsh/plm_rsh_module.c index a8cd21e002..ac1f501c39 100644 --- a/orte/mca/plm/rsh/plm_rsh_module.c +++ b/orte/mca/plm/rsh/plm_rsh_module.c @@ -328,8 +328,7 @@ static void rsh_wait_daemon(orte_proc_t *daemon, void* cbdata) static int setup_launch(int *argcptr, char ***argvptr, char *nodename, int *node_name_index1, - int *proc_vpid_index, char *prefix_dir, - char *nodelist) + int *proc_vpid_index, char *prefix_dir) { int argc; char **argv; @@ -613,8 +612,7 @@ static int setup_launch(int *argcptr, char ***argvptr, */ orte_plm_base_orted_append_basic_args(&argc, &argv, "env", - proc_vpid_index, - nodelist); + proc_vpid_index); /* ensure that only the ssh plm is selected on the remote daemon */ opal_argv_append_nosize(&argv, "-"OPAL_MCA_CMD_LINE_ID); @@ -828,8 +826,9 @@ static int remote_spawn(opal_buffer_t *launch) } /* setup the launch */ - if (ORTE_SUCCESS != (rc = setup_launch(&argc, &argv, orte_process_info.nodename, &node_name_index1, - &proc_vpid_index, prefix, NULL))) { + if (ORTE_SUCCESS != (rc = setup_launch(&argc, &argv, + orte_process_info.nodename, &node_name_index1, + &proc_vpid_index, prefix))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&coll); goto cleanup; @@ -1030,7 +1029,6 @@ static void launch_daemons(int fd, short args, void *cbdata) int port, *portptr; orte_namelist_t *child; char *rtmod; - char *nlistflat; /* if we are launching debugger daemons, then just go * do it - no new daemons will be launched @@ -1199,33 +1197,12 @@ static void launch_daemons(int fd, short args, void *cbdata) orte_routed.get_routing_list(rtmod, &coll); } - /* create a list of all nodes involved so we can pass it along */ - char **nodelist = NULL; - orte_node_t *n2; - for (nnode=0; nnode < map->nodes->size; nnode++) { - if (NULL != (n2 = (orte_node_t*)opal_pointer_array_get_item(map->nodes, nnode))) { - opal_argv_append_nosize(&nodelist, n2->name); - } - } - /* we need mpirun to be the first node on this list */ - if (NULL == nodelist || 0 != strcmp(nodelist[0], orte_process_info.nodename)) { - opal_argv_prepend_nosize(&nodelist, orte_process_info.nodename); - } - nlistflat = opal_argv_join(nodelist, ','); - opal_argv_free(nodelist); - /* setup the launch */ if (ORTE_SUCCESS != (rc = setup_launch(&argc, &argv, node->name, &node_name_index1, - &proc_vpid_index, prefix_dir, nlistflat))) { + &proc_vpid_index, prefix_dir))) { ORTE_ERROR_LOG(rc); - if (NULL != nlistflat) { - free(nlistflat); - } goto cleanup; } - if (NULL != nlistflat) { - free(nlistflat); - } /* * Iterate through each of the nodes diff --git a/orte/mca/plm/slurm/plm_slurm_module.c b/orte/mca/plm/slurm/plm_slurm_module.c index 75bebac472..1008ef09ee 100644 --- a/orte/mca/plm/slurm/plm_slurm_module.c +++ b/orte/mca/plm/slurm/plm_slurm_module.c @@ -323,6 +323,7 @@ static void launch_daemons(int fd, short args, void *cbdata) goto cleanup; } nodelist_flat = opal_argv_join(nodelist_argv, ','); + opal_argv_free(nodelist_argv); /* if we are using all allocated nodes, then srun doesn't * require any further arguments @@ -336,6 +337,7 @@ static void launch_daemons(int fd, short args, void *cbdata) opal_argv_append(&argc, &argv, tmp); free(tmp); } + free(nodelist_flat); /* tell srun how many tasks to run */ asprintf(&tmp, "--ntasks=%lu", (unsigned long)map->num_new_daemons); @@ -353,18 +355,9 @@ static void launch_daemons(int fd, short args, void *cbdata) /* add the daemon command (as specified by user) */ orte_plm_base_setup_orted_cmd(&argc, &argv); - /* we need mpirun to be the first node on this list - since we - * aren't launching mpirun via srun, it won't be there now */ - opal_argv_prepend_nosize(&nodelist_argv, orte_process_info.nodename); - free(nodelist_flat); - nodelist_flat = opal_argv_join(nodelist_argv, ','); - opal_argv_free(nodelist_argv); - /* Add basic orted command line options, including debug flags */ orte_plm_base_orted_append_basic_args(&argc, &argv, - "slurm", &proc_vpid_index, - nodelist_flat); - free(nodelist_flat); + "slurm", &proc_vpid_index); /* tell the new daemons the base of the name list so they can compute * their own name on the other end diff --git a/orte/mca/plm/tm/plm_tm_module.c b/orte/mca/plm/tm/plm_tm_module.c index e3e0c422da..cf16c60561 100644 --- a/orte/mca/plm/tm/plm_tm_module.c +++ b/orte/mca/plm/tm/plm_tm_module.c @@ -171,7 +171,6 @@ static void launch_daemons(int fd, short args, void *cbdata) char **env = NULL; char *var; char **argv = NULL; - char **nodeargv; int argc = 0; int rc; orte_std_cntr_t i; @@ -180,7 +179,6 @@ static void launch_daemons(int fd, short args, void *cbdata) tm_task_id *tm_task_ids = NULL; bool failed_launch = true; mode_t current_umask; - char *nodelist; char* vpid_string; orte_job_t *daemons, *jdata; orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata; @@ -260,32 +258,9 @@ static void launch_daemons(int fd, short args, void *cbdata) /* add the daemon command (as specified by user) */ orte_plm_base_setup_orted_cmd(&argc, &argv); - /* create a list of nodes in this launch */ - nodeargv = NULL; - for (i = 0; i < map->nodes->size; i++) { - if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { - continue; - } - - /* if this daemon already exists, don't launch it! */ - if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED)) { - continue; - } - - /* add to list */ - opal_argv_append_nosize(&nodeargv, node->name); - } - /* we need mpirun to be the first node on this list - since we - * aren't launching mpirun via TM, it won't be there now */ - opal_argv_prepend_nosize(&nodeargv, orte_process_info.nodename); - nodelist = opal_argv_join(nodeargv, ','); - opal_argv_free(nodeargv); - - /* Add basic orted command line options */ orte_plm_base_orted_append_basic_args(&argc, &argv, "tm", - &proc_vpid_index, - nodelist); + &proc_vpid_index); free(nodelist); if (0 < opal_output_get_verbosity(orte_plm_base_framework.framework_output)) { diff --git a/orte/mca/ras/alps/ras_alps_module.c b/orte/mca/ras/alps/ras_alps_module.c index 681c80fc9f..a8273dfd3c 100644 --- a/orte/mca/ras/alps/ras_alps_module.c +++ b/orte/mca/ras/alps/ras_alps_module.c @@ -365,25 +365,6 @@ ras_alps_getline(FILE *fp) return NULL; } -static int compare_nodes (opal_list_item_t **a, opal_list_item_t **b) -{ - orte_node_t *nodea = (orte_node_t *) *a; - orte_node_t *nodeb = (orte_node_t *) *b; - int32_t launcha, launchb, *ldptr; - - ldptr = &launcha; - if (!orte_get_attribute(&nodea->attributes, ORTE_NODE_LAUNCH_ID, (void**)&ldptr, OPAL_INT32)) { - return 0; - } - - ldptr = &launchb; - if (!orte_get_attribute(&nodeb->attributes, ORTE_NODE_LAUNCH_ID, (void**)&ldptr, OPAL_INT32)) { - return 0; - } - - return (launcha > launchb) ? 1 : -1; -} - #if ALPS_APPINFO_VERSION > 0 && ALPS_APPINFO_VERSION < 3 typedef placeNodeList_t orte_ras_alps_placeNodeList_t; #else @@ -602,8 +583,6 @@ orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename, break; /* Extended details ignored */ } - opal_list_sort (nodes, compare_nodes); - free(cpBuf); /* Free the buffer */ return ORTE_SUCCESS; @@ -617,4 +596,3 @@ orte_ras_alps_finalize(void) "ras:alps:finalize: success (nothing to do)"); return ORTE_SUCCESS; } - diff --git a/orte/mca/state/dvm/state_dvm.c b/orte/mca/state/dvm/state_dvm.c index cb6b5b9fdd..bdadbc0028 100644 --- a/orte/mca/state/dvm/state_dvm.c +++ b/orte/mca/state/dvm/state_dvm.c @@ -243,6 +243,7 @@ static void vm_ready(int fd, short args, void *cbdata) opal_byte_object_t bo, *boptr; int8_t flag; int32_t numbytes; + char *nidmap; /* if this is my job, then we are done */ if (ORTE_PROC_MY_NAME->jobid == caddy->jdata->jobid) { @@ -250,50 +251,65 @@ static void vm_ready(int fd, short args, void *cbdata) * do this here so we don't have to do it for every * job we are going to launch */ buf = OBJ_NEW(opal_buffer_t); - /* pack the "load nidmap" cmd */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &command, 1, ORTE_DAEMON_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - return; + opal_dss.pack(buf, &command, 1, ORTE_DAEMON_CMD); + /* if we couldn't provide the allocation regex on the orted + * cmd line, then we need to provide all the info here */ + if (!orte_nidmap_communicated) { + if (ORTE_SUCCESS != (rc = orte_util_nidmap_create(&nidmap))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(buf); + return; + } + orte_nidmap_communicated = true; + } else { + nidmap = NULL; } - /* flag that daemons were launched so we will update the nidmap */ - flag = 1; - opal_dss.pack(buf, &flag, 1, OPAL_INT8); - /* construct a nodemap with everything in it */ - if (ORTE_SUCCESS != (rc = orte_util_encode_nodemap(buf))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - return; + opal_dss.pack(buf, &nidmap, 1, OPAL_STRING); + if (NULL != nidmap) { + free(nidmap); } - - if (!orte_static_ports && !orte_fwd_mpirun_port) { - /* pack a flag indicating wiring info is provided */ + /* provide the info on the capabilities of each node */ + if (!orte_node_info_communicated) { flag = 1; opal_dss.pack(buf, &flag, 1, OPAL_INT8); - /* get wireup info for daemons per the selected routing module */ - wireup = OBJ_NEW(opal_buffer_t); - if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(ORTE_PROC_MY_NAME->jobid, wireup))) { + if (ORTE_SUCCESS != (rc = orte_util_encode_nodemap(buf))) { ORTE_ERROR_LOG(rc); - OBJ_RELEASE(wireup); OBJ_RELEASE(buf); return; } - /* put it in a byte object for xmission */ - opal_dss.unload(wireup, (void**)&bo.bytes, &numbytes); - /* pack the byte object - zero-byte objects are fine */ - bo.size = numbytes; - boptr = &bo; - if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &boptr, 1, OPAL_BYTE_OBJECT))) { - ORTE_ERROR_LOG(rc); + orte_node_info_communicated = true; + if (!orte_static_ports && !orte_fwd_mpirun_port) { + /* pack a flag indicating wiring info is provided */ + flag = 1; + opal_dss.pack(buf, &flag, 1, OPAL_INT8); + /* get wireup info for daemons per the selected routing module */ + wireup = OBJ_NEW(opal_buffer_t); + if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(ORTE_PROC_MY_NAME->jobid, wireup))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(wireup); + OBJ_RELEASE(buf); + return; + } + /* put it in a byte object for xmission */ + opal_dss.unload(wireup, (void**)&bo.bytes, &numbytes); + /* pack the byte object - zero-byte objects are fine */ + bo.size = numbytes; + boptr = &bo; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &boptr, 1, OPAL_BYTE_OBJECT))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(wireup); + OBJ_RELEASE(buf); + return; + } + /* release the data since it has now been copied into our buffer */ + if (NULL != bo.bytes) { + free(bo.bytes); + } OBJ_RELEASE(wireup); - OBJ_RELEASE(buf); - return; + } else { + flag = 0; + opal_dss.pack(buf, &flag, 1, OPAL_INT8); } - /* release the data since it has now been copied into our buffer */ - if (NULL != bo.bytes) { - free(bo.bytes); - } - OBJ_RELEASE(wireup); } else { flag = 0; opal_dss.pack(buf, &flag, 1, OPAL_INT8); diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index d4a740f386..68826c4abf 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -108,6 +108,8 @@ bool orte_display_allocation = false; bool orte_display_devel_allocation = false; bool orte_soft_locations = false; int orted_pmi_version = 0; +bool orte_nidmap_communicated = false; +bool orte_node_info_communicated = false; /* launch agents */ char *orte_launch_agent = NULL; diff --git a/orte/runtime/orte_globals.h b/orte/runtime/orte_globals.h index b864e5cd8e..0b46dfc73d 100644 --- a/orte/runtime/orte_globals.h +++ b/orte/runtime/orte_globals.h @@ -491,6 +491,8 @@ ORTE_DECLSPEC extern bool orte_display_allocation; ORTE_DECLSPEC extern bool orte_display_devel_allocation; ORTE_DECLSPEC extern bool orte_soft_locations; ORTE_DECLSPEC extern bool orte_hnp_connected; +ORTE_DECLSPEC extern bool orte_nidmap_communicated; +ORTE_DECLSPEC extern bool orte_node_info_communicated; /* launch agents */ ORTE_DECLSPEC extern char *orte_launch_agent; diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index d82f0601ce..c2f9abae2a 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -62,6 +62,7 @@ #include "orte/mca/dfs/dfs.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/odls/base/odls_private.h" +#include "orte/mca/rmaps/base/base.h" #include "orte/mca/routed/routed.h" #include "orte/util/show_help.h" #include "orte/util/proc_info.h" @@ -73,24 +74,18 @@ #include "orte/util/nidmap.h" -int orte_util_build_daemon_nidmap(char **nodes) +int orte_util_build_daemon_nidmap(void) { - int i, num_nodes; + int i; int rc; struct hostent *h; + orte_node_t *node; opal_buffer_t buf; opal_process_name_t proc; char *uri, *addr; char *proc_name; opal_value_t kv; - num_nodes = opal_argv_count(nodes); - - if (0 == num_nodes) { - /* nothing to do */ - return ORTE_SUCCESS; - } - /* install the entry for the HNP */ proc.jobid = ORTE_PROC_MY_NAME->jobid; proc.vpid = 0; @@ -105,16 +100,22 @@ int orte_util_build_daemon_nidmap(char **nodes) } OBJ_DESTRUCT(&kv); - /* the daemon vpids will be assigned in order, - * starting with vpid=0 for the HNP */ + /* we must have already built the node pool, so cycle across it */ OBJ_CONSTRUCT(&buf, opal_buffer_t); - for (i=0; i < num_nodes; i++) { + for (i=0; i < orte_node_pool->size; i++) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { + continue; + } + if (NULL == node->daemon) { + /* this node isn't occupied */ + continue; + } /* define the vpid for this daemon */ - proc.vpid = i; + proc.vpid = node->daemon->name.vpid; /* store the hostname for the proc */ OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_PMIX_HOSTNAME); - kv.data.string = strdup(nodes[i]); + kv.data.string = strdup(node->name); kv.type = OPAL_STRING; if (OPAL_SUCCESS != (rc = opal_pmix.store_local(&proc, &kv))) { ORTE_ERROR_LOG(rc); @@ -138,7 +139,7 @@ int orte_util_build_daemon_nidmap(char **nodes) OBJ_DESTRUCT(&kv); /* lookup the address of this node */ - if (NULL == (h = gethostbyname(nodes[i]))) { + if (NULL == (h = gethostbyname(node->name))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } @@ -157,7 +158,11 @@ int orte_util_build_daemon_nidmap(char **nodes) OPAL_OUTPUT_VERBOSE((2, orte_debug_verbosity, "%s orte:util:build:daemon:nidmap node %s daemon %d addr %s uri %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - nodes[i], i+1, addr, uri)); + node->name, i+1, addr, uri)); + /* if this is the HNP, then store it */ + if (!ORTE_PROC_IS_HNP && 0 == i) { + orte_process_info.my_hnp_uri = strdup(uri); + } opal_dss.pack(&buf, &uri, 1, OPAL_STRING); free(proc_name); free(uri); @@ -172,136 +177,69 @@ int orte_util_build_daemon_nidmap(char **nodes) return rc; } -int orte_util_encode_nodemap(opal_buffer_t *buffer) +int orte_util_nidmap_create(char **regex) { char *node; char prefix[ORTE_MAX_NODE_PREFIX]; int i, j, n, len, startnum, nodenum, numdigits; - bool found, fullname, test; - char *suffix, *sfx; + bool found, fullname; + char *suffix, *sfx, *nodenames; orte_regex_node_t *ndreg; - orte_regex_range_t *range, *rng, *slt, *tp, *flg; - opal_list_t nodenms, dvpids, slots, topos, flags; + orte_regex_range_t *range, *rng; + opal_list_t nodenms, dvpids; opal_list_item_t *item, *itm2; char **regexargs = NULL, *tmp, *tmp2; orte_node_t *nptr; - int rc; - uint8_t ui8; + orte_vpid_t vpid; - /* setup the list of results */ OBJ_CONSTRUCT(&nodenms, opal_list_t); OBJ_CONSTRUCT(&dvpids, opal_list_t); - OBJ_CONSTRUCT(&slots, opal_list_t); - OBJ_CONSTRUCT(&topos, opal_list_t); - OBJ_CONSTRUCT(&flags, opal_list_t); rng = NULL; - slt = NULL; - tp = NULL; - flg = NULL; for (n=0; n < orte_node_pool->size; n++) { if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { continue; } /* if no daemon has been assigned, then this node is not being used */ if (NULL == nptr->daemon) { - continue; + vpid = -1; // indicates no daemon assigned + } else { + vpid = nptr->daemon->name.vpid; } /* deal with the daemon vpid - see if it is next in the * current range */ if (NULL == rng) { /* just starting */ rng = OBJ_NEW(orte_regex_range_t); - rng->start = nptr->daemon->name.vpid; + rng->vpid = vpid; rng->cnt = 1; opal_list_append(&dvpids, &rng->super); - } else { - /* is this the next in line */ - if (nptr->daemon->name.vpid == (orte_vpid_t)(rng->start + rng->cnt)) { + } else if (UINT32_MAX == vpid) { + if (-1 == rng->vpid) { rng->cnt++; } else { /* need to start another range */ rng = OBJ_NEW(orte_regex_range_t); - rng->start = nptr->daemon->name.vpid; + rng->vpid = vpid; rng->cnt = 1; opal_list_append(&dvpids, &rng->super); } - } - /* check the #slots */ - if (NULL == slt) { - /* just starting */ - slt = OBJ_NEW(orte_regex_range_t); - slt->start = nptr->daemon->name.vpid; - slt->slots = nptr->slots; - slt->cnt = 1; - opal_list_append(&slots, &slt->super); + } else if (-1 == rng->vpid) { + /* need to start another range */ + rng = OBJ_NEW(orte_regex_range_t); + rng->vpid = vpid; + rng->cnt = 1; + opal_list_append(&dvpids, &rng->super); } else { /* is this the next in line */ - if (nptr->slots == slt->slots) { - slt->cnt++; + if (vpid == (orte_vpid_t)(rng->vpid + rng->cnt)) { + rng->cnt++; } else { /* need to start another range */ - slt = OBJ_NEW(orte_regex_range_t); - slt->start = nptr->daemon->name.vpid; - slt->slots = nptr->slots; - slt->cnt = 1; - opal_list_append(&slots, &slt->super); - } - } - /* check the topologies */ - if (NULL == tp) { - if (NULL != nptr->topology) { - /* just starting */ - tp = OBJ_NEW(orte_regex_range_t); - tp->start = nptr->daemon->name.vpid; - tp->t = nptr->topology; - tp->cnt = 1; - opal_list_append(&topos, &tp->super); - } - } else { - if (NULL != nptr->topology) { - /* is this the next in line */ - if (tp->t == nptr->topology) { - tp->cnt++; - } else { - /* need to start another range */ - tp = OBJ_NEW(orte_regex_range_t); - tp->start = nptr->daemon->name.vpid; - tp->t = nptr->topology; - tp->cnt = 1; - opal_list_append(&topos, &tp->super); - } - } - } - /* check the flags */ - test = ORTE_FLAG_TEST(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN); - if (NULL == flg) { - /* just starting */ - flg = OBJ_NEW(orte_regex_range_t); - flg->start = nptr->daemon->name.vpid; - if (test) { - flg->slots = 1; - } else { - flg->slots = 0; - } - flg->cnt = 1; - opal_list_append(&flags, &flg->super); - } else { - /* is this the next in line */ - if ((test && 1 == flg->slots) || - (!test && 0 == flg->slots)) { - flg->cnt++; - } else { - /* need to start another range */ - flg = OBJ_NEW(orte_regex_range_t); - flg->start = nptr->daemon->name.vpid; - if (test) { - flg->slots = 1; - } else { - flg->slots = 0; - } - flg->cnt = 1; - opal_list_append(&flags, &flg->super); + rng = OBJ_NEW(orte_regex_range_t); + rng->vpid = vpid; + rng->cnt = 1; + opal_list_append(&dvpids, &rng->super); } } node = nptr->name; @@ -387,16 +325,16 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) if (NULL == range) { /* first range for this nodeid */ range = OBJ_NEW(orte_regex_range_t); - range->start = nodenum; + range->vpid = nodenum; range->cnt = 1; opal_list_append(&ndreg->ranges, &range->super); break; } /* see if the node number is out of sequence */ - if (nodenum != (range->start + range->cnt)) { + if (nodenum != (range->vpid + range->cnt)) { /* start a new range */ range = OBJ_NEW(orte_regex_range_t); - range->start = nodenum; + range->vpid = nodenum; range->cnt = 1; opal_list_append(&ndreg->ranges, &range->super); break; @@ -420,7 +358,7 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) * care of names we can't compress above */ range = OBJ_NEW(orte_regex_range_t); - range->start = nodenum; + range->vpid = nodenum; range->cnt = 1; opal_list_append(&ndreg->ranges, &range->super); } @@ -428,7 +366,6 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) free(suffix); } } - /* begin constructing the regular expression */ while (NULL != (item = opal_list_remove_first(&nodenms))) { ndreg = (orte_regex_node_t*)item; @@ -454,9 +391,9 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) while (NULL != (itm2 = opal_list_remove_first(&ndreg->ranges))) { range = (orte_regex_range_t*)itm2; if (1 == range->cnt) { - asprintf(&tmp2, "%s%d,", tmp, range->start); + asprintf(&tmp2, "%s%u,", tmp, range->vpid); } else { - asprintf(&tmp2, "%s%d-%d,", tmp, range->start, range->start + range->cnt - 1); + asprintf(&tmp2, "%s%u-%u,", tmp, range->vpid, range->vpid + range->cnt - 1); } free(tmp); tmp = tmp2; @@ -476,39 +413,28 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) } /* assemble final result */ - tmp = opal_argv_join(regexargs, ','); + nodenames = opal_argv_join(regexargs, ','); /* cleanup */ opal_argv_free(regexargs); OBJ_DESTRUCT(&nodenms); - /* pack the string */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - OPAL_LIST_DESTRUCT(&dvpids); - OPAL_LIST_DESTRUCT(&slots); - return rc; - } - if (NULL != tmp) { - free(tmp); - } - /* do the same for the vpids */ tmp = NULL; while (NULL != (item = opal_list_remove_first(&dvpids))) { rng = (orte_regex_range_t*)item; if (1 < rng->cnt) { if (NULL == tmp) { - asprintf(&tmp, "%d-%d", rng->start, rng->start + rng->cnt - 1); + asprintf(&tmp, "%u(%u)", rng->vpid, rng->cnt); } else { - asprintf(&tmp2, "%s,%d-%d", tmp, rng->start, rng->start + rng->cnt - 1); + asprintf(&tmp2, "%s,%u(%u)", tmp, rng->vpid, rng->cnt); free(tmp); tmp = tmp2; } } else { if (NULL == tmp) { - asprintf(&tmp, "%d", rng->start); + asprintf(&tmp, "%u", rng->vpid); } else { - asprintf(&tmp2, "%s,%d", tmp, rng->start); + asprintf(&tmp2, "%s,%u", tmp, rng->vpid); free(tmp); tmp = tmp2; } @@ -517,83 +443,35 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) } OPAL_LIST_DESTRUCT(&dvpids); - /* pack the string */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - OPAL_LIST_DESTRUCT(&slots); - return rc; - } - if (NULL != tmp) { - free(tmp); - } + /* now concatenate the results into one string */ + asprintf(&tmp2, "%s@%s", nodenames, tmp); + free(nodenames); + free(tmp); - /* do the same to pass #slots on each node */ - tmp = NULL; - while (NULL != (item = opal_list_remove_first(&slots))) { - rng = (orte_regex_range_t*)item; - if (1 < rng->cnt) { - if (NULL == tmp) { - asprintf(&tmp, "%d-%d[%d]", rng->start, rng->start + rng->cnt - 1, rng->slots); - } else { - asprintf(&tmp2, "%s,%d-%d[%d]", tmp, rng->start, rng->start + rng->cnt - 1, rng->slots); - free(tmp); - tmp = tmp2; - } - } else { - if (NULL == tmp) { - asprintf(&tmp, "%d[%d]", rng->start, rng->slots); - } else { - asprintf(&tmp2, "%s,%d[%d]", tmp, rng->start, rng->slots); - free(tmp); - tmp = tmp2; - } - } - OBJ_RELEASE(rng); - } - OPAL_LIST_DESTRUCT(&slots); + *regex = tmp2; + return ORTE_SUCCESS; +} - /* pack the string */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (NULL != tmp) { - free(tmp); - } +int orte_util_encode_nodemap(opal_buffer_t *buffer) +{ + int n; + bool test; + orte_regex_range_t *rng, *slt, *tp, *flg; + opal_list_t slots, topos, flags; + opal_list_item_t *item; + char *tmp, *tmp2; + orte_node_t *nptr; + int rc; + uint8_t ui8; - /* do the same to pass the flags for each node */ - tmp = NULL; - while (NULL != (item = opal_list_remove_first(&flags))) { - rng = (orte_regex_range_t*)item; - if (1 < rng->cnt) { - if (NULL == tmp) { - asprintf(&tmp, "%d-%d[%x]", rng->start, rng->start + rng->cnt - 1, rng->slots); - } else { - asprintf(&tmp2, "%s,%d-%d[%x]", tmp, rng->start, rng->start + rng->cnt - 1, rng->slots); - free(tmp); - tmp = tmp2; - } - } else { - if (NULL == tmp) { - asprintf(&tmp, "%d[%x]", rng->start, rng->slots); - } else { - asprintf(&tmp2, "%s,%d[%x]", tmp, rng->start, rng->slots); - free(tmp); - tmp = tmp2; - } - } - OBJ_RELEASE(rng); - } - OPAL_LIST_DESTRUCT(&flags); + /* setup the list of results */ + OBJ_CONSTRUCT(&slots, opal_list_t); + OBJ_CONSTRUCT(&topos, opal_list_t); + OBJ_CONSTRUCT(&flags, opal_list_t); - /* pack the string */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (NULL != tmp) { - free(tmp); - } + slt = NULL; + tp = NULL; + flg = NULL; /* pack a flag indicating if the HNP was included in the allocation */ if (orte_hnp_is_allocated) { @@ -617,31 +495,147 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) return rc; } + for (n=0; n < orte_node_pool->size; n++) { + if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { + continue; + } + /* check the #slots */ + if (NULL == slt) { + /* just starting */ + slt = OBJ_NEW(orte_regex_range_t); + slt->slots = nptr->slots; + slt->cnt = 1; + opal_list_append(&slots, &slt->super); + } else { + /* is this the next in line */ + if (nptr->slots == slt->slots) { + slt->cnt++; + } else { + /* need to start another range */ + slt = OBJ_NEW(orte_regex_range_t); + slt->slots = nptr->slots; + slt->cnt = 1; + opal_list_append(&slots, &slt->super); + } + } + /* check the topologies */ + if (NULL == tp) { + /* just starting */ + tp = OBJ_NEW(orte_regex_range_t); + tp->t = nptr->topology; + tp->cnt = 1; + opal_list_append(&topos, &tp->super); + } else { + /* is this the next in line */ + if (tp->t == nptr->topology) { + tp->cnt++; + } else { + /* need to start another range */ + tp = OBJ_NEW(orte_regex_range_t); + tp->t = nptr->topology; + tp->cnt = 1; + opal_list_append(&topos, &tp->super); + } + } + /* check the flags */ + test = ORTE_FLAG_TEST(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN); + if (NULL == flg) { + /* just starting */ + flg = OBJ_NEW(orte_regex_range_t); + if (test) { + flg->slots = 1; + } else { + flg->slots = 0; + } + flg->cnt = 1; + opal_list_append(&flags, &flg->super); + } else { + /* is this the next in line */ + if ((test && 1 == flg->slots) || + (!test && 0 == flg->slots)) { + flg->cnt++; + } else { + /* need to start another range */ + flg = OBJ_NEW(orte_regex_range_t); + if (test) { + flg->slots = 1; + } else { + flg->slots = 0; + } + flg->cnt = 1; + opal_list_append(&flags, &flg->super); + } + } + } + + /* pass #slots on each node */ + tmp = NULL; + while (NULL != (item = opal_list_remove_first(&slots))) { + rng = (orte_regex_range_t*)item; + if (NULL == tmp) { + asprintf(&tmp, "%d[%d]", rng->cnt, rng->slots); + } else { + asprintf(&tmp2, "%s,%d[%d]", tmp, rng->cnt, rng->slots); + free(tmp); + tmp = tmp2; + } + OBJ_RELEASE(rng); + } + OPAL_LIST_DESTRUCT(&slots); + + /* pack the string */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + return rc; + } + if (NULL != tmp) { + free(tmp); + } + + /* do the same to pass the flags for each node */ + tmp = NULL; + while (NULL != (item = opal_list_remove_first(&flags))) { + rng = (orte_regex_range_t*)item; + if (NULL == tmp) { + asprintf(&tmp, "%d[%d]", rng->cnt, rng->slots); + } else { + asprintf(&tmp2, "%s,%d[%d]", tmp, rng->cnt, rng->slots); + free(tmp); + tmp = tmp2; + } + OBJ_RELEASE(rng); + } + OPAL_LIST_DESTRUCT(&flags); + + /* pack the string */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + return rc; + } + if (NULL != tmp) { + free(tmp); + } + /* handle the topologies - as the most common case by far * is to have homogeneous topologies, we only send them * if something is different */ + if (orte_hnp_is_allocated && !(ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping) & ORTE_MAPPING_NO_USE_LOCAL)) { + ui8 = 2; + } else { + ui8 = 1; + } tmp = NULL; - if (1 < opal_list_get_size(&topos)) { + if (ui8 < opal_list_get_size(&topos)) { opal_buffer_t bucket, *bptr; OBJ_CONSTRUCT(&bucket, opal_buffer_t); while (NULL != (item = opal_list_remove_first(&topos))) { rng = (orte_regex_range_t*)item; - if (1 < rng->cnt) { - if (NULL == tmp) { - asprintf(&tmp, "%d-%d", rng->start, rng->start + rng->cnt - 1); - } else { - asprintf(&tmp2, "%s,%d-%d", tmp, rng->start, rng->start + rng->cnt - 1); - free(tmp); - tmp = tmp2; - } + if (NULL == tmp) { + asprintf(&tmp, "%d", rng->cnt); } else { - if (NULL == tmp) { - asprintf(&tmp, "%d", rng->start); - } else { - asprintf(&tmp2, "%s,%d", tmp, rng->start); - free(tmp); - tmp = tmp2; - } + asprintf(&tmp2, "%s,%d", tmp, rng->cnt); + free(tmp); + tmp = tmp2; } /* pack this topology string */ if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &rng->t->sig, 1, OPAL_STRING))) { @@ -693,49 +687,173 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) return ORTE_SUCCESS; } -/* decode a nodemap for a daemon */ -int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) +int orte_util_nidmap_parse(char *regex) { - int n, nn, rc; - orte_node_t *node; - size_t k, endpt, start; + char *nodelist, *vpids, *ptr; + char **nodes, **dvpids; + int rc, n, cnt; + orte_regex_range_t *rng; + opal_list_t dids; orte_job_t *daemons; - orte_proc_t *dptr; - char **nodes=NULL, *dvpids=NULL, *slots=NULL, *topos=NULL, *flags=NULL; - char *ndnames, *rmndr, **tmp; - opal_list_t dids, slts, flgs;; - opal_buffer_t *bptr=NULL; - orte_topology_t *t2; - orte_regex_range_t *rng, *drng, *srng, *frng; - uint8_t ui8; + orte_node_t *nd; + orte_proc_t *proc; - /* unpack the node regex */ - n = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ndnames, &n, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* it is okay for this to be NULL */ - if (NULL == ndnames) { + /* if we are the HNP, we don't need to parse this */ + if (ORTE_PROC_IS_HNP) { return ORTE_SUCCESS; } + /* split the regex into its node and vpid parts */ + nodelist = regex; + vpids = strchr(regex, '@'); + if (NULL == vpids) { + /* indicates the regex got mangled somewhere */ + return ORTE_ERR_BAD_PARAM; + } + *vpids = '\0'; // terminate the nodelist string + ++vpids; // step over the separator + if (NULL == vpids || '\0' == *vpids) { + /* indicates the regex got mangled somewhere */ + return ORTE_ERR_BAD_PARAM; + } + + /* decompress the nodes regex */ + nodes = NULL; + if (ORTE_SUCCESS != (rc = orte_regex_extract_node_names(nodelist, &nodes))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + if (NULL == nodes) { + /* should not happen */ + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + + /* decompress the vpids */ OBJ_CONSTRUCT(&dids, opal_list_t); + dvpids = opal_argv_split(vpids, ','); + for (n=0; NULL != dvpids[n]; n++) { + rng = OBJ_NEW(orte_regex_range_t); + opal_list_append(&dids, &rng->super); + /* check for a count */ + if (NULL != (ptr = strchr(dvpids[n], '('))) { + *ptr = '\0'; + dvpids[n][strlen(dvpids[n])-2] = '\0'; // remove trailing paren + ++ptr; + rng->cnt = strtoul(ptr, NULL, 10); + } + /* convert the number - since it might be a range, + * save the remainder pointer */ + rng->vpid = strtoul(dvpids[n], NULL, 10); + } + opal_argv_free(dvpids); + + /* get the daemon job object */ + daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); + + /* create the node pool array - this will include + * _all_ nodes known to the allocation */ + rng = (orte_regex_range_t*)opal_list_get_first(&dids); + cnt = 0; + for (n=0; NULL != nodes[n]; n++) { + nd = OBJ_NEW(orte_node_t); + nd->name = nodes[n]; + opal_pointer_array_set_item(orte_node_pool, n, nd); + /* see if it has a daemon on it */ + if (-1 != rng->vpid) { + /* we have a daemon, so let's create the tracker for it */ + if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, rng->vpid+cnt))) { + proc = OBJ_NEW(orte_proc_t); + proc->name.jobid = ORTE_PROC_MY_NAME->jobid; + proc->name.vpid = rng->vpid + cnt; + proc->state = ORTE_PROC_STATE_RUNNING; + ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_ALIVE); + daemons->num_procs++; + opal_pointer_array_set_item(daemons->procs, proc->name.vpid, proc); + } + nd->index = proc->name.vpid; + OBJ_RETAIN(nd); + proc->node = nd; + OBJ_RETAIN(proc); + nd->daemon = proc; + } + ++cnt; + if (cnt == rng->cnt) { + rng = (orte_regex_range_t*)opal_list_get_next(&rng->super); + if (NULL == rng) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + } + } + + /* unpdate num procs */ + if (orte_process_info.num_procs != daemons->num_procs) { + orte_process_info.num_procs = daemons->num_procs; + /* need to update the routing plan */ + orte_routed.update_routing_plan(NULL); + } + + if (orte_process_info.max_procs < orte_process_info.num_procs) { + orte_process_info.max_procs = orte_process_info.num_procs; + } + + if (0 < opal_output_get_verbosity(orte_debug_verbosity)) { + int i; + for (i=0; i < orte_node_pool->size; i++) { + if (NULL == (nd = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { + continue; + } + opal_output(0, "%s node[%d].name %s daemon %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i, + (NULL == nd->name) ? "NULL" : nd->name, + (NULL == nd->daemon) ? "NONE" : ORTE_VPID_PRINT(nd->daemon->name.vpid)); + } + } + + return ORTE_SUCCESS; +} + +/* decode a nodemap for a daemon */ +int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) +{ + int n, nn, rc, cnt, offset; + orte_node_t *node; + char *slots=NULL, *topos=NULL, *flags=NULL; + char *rmndr, **tmp; + opal_list_t slts, flgs;; + opal_buffer_t *bptr=NULL; + orte_topology_t *t2; + orte_regex_range_t *rng, *srng, *frng; + uint8_t ui8; + OBJ_CONSTRUCT(&slts, opal_list_t); OBJ_CONSTRUCT(&flgs, opal_list_t); - /* unpack the daemon vpid regex */ + /* unpack the flag indicating if the HNP was allocated */ n = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &dvpids, &n, OPAL_STRING))) { + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ui8, &n, OPAL_UINT8))) { ORTE_ERROR_LOG(rc); goto cleanup; } - /* this is not allowed to be NULL */ - if (NULL == dvpids) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - rc = ORTE_ERR_BAD_PARAM; + if (0 == ui8) { + orte_hnp_is_allocated = false; + } else { + orte_hnp_is_allocated = true; + } + + /* unpack the flag indicating we are in a managed allocation */ + n = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ui8, &n, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); goto cleanup; } + if (0 == ui8) { + orte_managed_allocation = false; + } else { + orte_managed_allocation = true; + } /* unpack the slots regex */ n = 1; @@ -763,30 +881,6 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) goto cleanup; } - /* unpack the flag indicating if the HNP was allocated */ - n = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ui8, &n, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - if (0 == ui8) { - orte_hnp_is_allocated = false; - } else { - orte_hnp_is_allocated = true; - } - - /* unpack the flag indicating we are in a managed allocation */ - n = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ui8, &n, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - if (0 == ui8) { - orte_managed_allocation = false; - } else { - orte_managed_allocation = true; - } - /* unpack the topos regex - this may not have been * provided (e.g., for a homogeneous machine) */ n = 1; @@ -811,38 +905,6 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) goto cleanup; } - /* decompress the regex */ - nodes = NULL; - if (ORTE_SUCCESS != (rc = orte_regex_extract_node_names(ndnames, &nodes))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - if (NULL == nodes) { - /* should not happen */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - rc = ORTE_ERR_NOT_FOUND; - goto cleanup; - } - - /* decompress the vpids */ - tmp = opal_argv_split(dvpids, ','); - for (n=0; NULL != tmp[n]; n++) { - rng = OBJ_NEW(orte_regex_range_t); - opal_list_append(&dids, &rng->super); - /* convert the number - since it might be a range, - * save the remainder pointer */ - rng->start = strtoul(tmp[n], &rmndr, 10); - if (NULL == rmndr || 0 == strlen(rmndr)) { - rng->endpt = rng->start; - } else { - /* it must be a range - find the endpoint */ - ++rmndr; - rng->endpt = strtoul(rmndr, NULL, 10); - } - } - opal_argv_free(tmp); - /* decompress the slots */ tmp = opal_argv_split(slots, ','); for (n=0; NULL != tmp[n]; n++) { @@ -861,16 +923,8 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) /* convert that number as this is the number of * slots for this range */ rng->slots = strtoul(rmndr, NULL, 10); - /* convert the starting pt - since it might be a range, - * save the remainder pointer */ - rng->start = strtoul(tmp[n], &rmndr, 10); - if (NULL == rmndr || 0 == strlen(rmndr)) { - rng->endpt = rng->start; - } else { - /* it must be a range - find the endpoint */ - ++rmndr; - rng->endpt = strtoul(rmndr, NULL, 10); - } + /* convert the initial number as that is the cnt */ + rng->cnt = strtoul(tmp[n], NULL, 10); } opal_argv_free(tmp); @@ -878,7 +932,7 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) tmp = opal_argv_split(flags, ','); for (n=0; NULL != tmp[n]; n++) { rng = OBJ_NEW(orte_regex_range_t); - opal_list_append(&dids, &rng->super); + opal_list_append(&flgs, &rng->super); /* find the '[' as that delimits the value */ rmndr = strchr(tmp[n], '['); if (NULL == rmndr) { @@ -895,43 +949,23 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) } else { rng->slots = 0; } - /* convert the starting pt - since it might be a range, - * save the remainder pointer */ - rng->start = strtoul(tmp[n], &rmndr, 10); - if (NULL == rmndr || 0 == strlen(rmndr)) { - rng->endpt = rng->start; - } else { - /* it must be a range - find the endpoint */ - ++rmndr; - rng->endpt = strtoul(rmndr, NULL, 10); - } + /* convert the initial number as that is the cnt */ + rng->cnt = strtoul(tmp[n], NULL, 10); } opal_argv_free(tmp); free(flags); - /* get the daemon job object */ - daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); - /* update the node array */ - drng = (orte_regex_range_t*)opal_list_get_first(&dids); srng = (orte_regex_range_t*)opal_list_get_first(&slts); frng = (orte_regex_range_t*)opal_list_get_first(&flgs); - for (n=0; NULL != nodes[n]; n++) { - /* the daemon vpids for these nodes will be in the dids array, so - * use those to lookup the nodes */ - nn = drng->start + n; - if (nn == drng->endpt) { - drng = (orte_regex_range_t*)opal_list_get_next(&drng->super); - } - if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, nn))) { - node = OBJ_NEW(orte_node_t); - node->name = nodes[n]; - node->index = nn; - opal_pointer_array_set_item(orte_node_pool, nn, node); + for (n=0; n < orte_node_pool->size; n++) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { + continue; } /* set the number of slots */ node->slots = srng->slots; - if (srng->endpt == nn) { + srng->cnt--; + if (0 == srng->cnt) { srng = (orte_regex_range_t*)opal_list_get_next(&srng->super); } /* set the flags */ @@ -940,41 +974,11 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) } else { ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN); } - if (frng->endpt == nn) { + frng->cnt--; + if (0 == frng->cnt) { frng = (orte_regex_range_t*)opal_list_get_next(&frng->super); } - ++orte_process_info.num_nodes; - /* if this is me, just ignore the rest as we are all setup */ - if (nn == (int)ORTE_PROC_MY_NAME->vpid) { - continue; - } - if (NULL != node->daemon) { - OBJ_RELEASE(node->daemon); - node->daemon = NULL; - } - if (NULL == (dptr = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, nn))) { - /* create a daemon object for this node */ - dptr = OBJ_NEW(orte_proc_t); - dptr->name.jobid = ORTE_PROC_MY_NAME->jobid; - dptr->name.vpid = nn; - ORTE_FLAG_SET(dptr, ORTE_PROC_FLAG_ALIVE); // assume the daemon is alive until discovered otherwise - opal_pointer_array_set_item(daemons->procs, nn, dptr); - ++daemons->num_procs; - } else if (NULL != dptr->node) { - OBJ_RELEASE(dptr->node); - dptr->node = NULL; - } - /* link the node to the daemon */ - OBJ_RETAIN(dptr); - node->daemon = dptr; - /* link the node to the daemon */ - OBJ_RETAIN(node); - dptr->node = node; } - /* we cannot use opal_argv_free here as this would release - * all the node names themselves. Instead, we just free the - * array of string pointers, leaving the strings alone */ - free(nodes); /* if no topology info was passed, then everyone shares our topology */ if (NULL == bptr) { @@ -994,7 +998,9 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) /* decompress the topology regex */ tmp = opal_argv_split(topos, ','); /* there must be a topology definition for each range */ + offset = 0; for (nn=0; NULL != tmp[nn]; nn++) { + cnt = strtoul(tmp[nn], NULL, 10); /* unpack the signature */ n = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(bptr, &sig, &n, OPAL_STRING))) { @@ -1039,57 +1045,25 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) t2->topo = topo; opal_pointer_array_add(orte_node_topologies, t2); } - /* point each of the nodes in the regex to this topology */ - start = strtoul(tmp[nn], &rmndr, 10); - if (NULL != rmndr) { - /* it must be a range - find the endpoint */ - ++rmndr; - endpt = strtoul(rmndr, NULL, 10); - } else { - endpt = start; - } - for (k=start; k <= endpt; k++) { - if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, k))) { - if (NULL == node->topology) { - OBJ_RETAIN(t2); - node->topology = t2; - } + /* point each of the nodes in this range to this topology */ + n=0; + while (n < cnt && (n+offset) < orte_node_pool->size) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n+offset))) { + continue; } + if (NULL == node->topology) { + OBJ_RETAIN(t2); + node->topology = t2; + } + ++n; } + offset += cnt; } OBJ_RELEASE(bptr); opal_argv_free(tmp); } - /* unpdate num procs */ - if (orte_process_info.num_procs != daemons->num_procs) { - orte_process_info.num_procs = daemons->num_procs; - /* need to update the routing plan */ - orte_routed.update_routing_plan(NULL); - } - - if (orte_process_info.max_procs < orte_process_info.num_procs) { - orte_process_info.max_procs = orte_process_info.num_procs; - } - - /* update num_daemons */ - orte_process_info.num_daemons = daemons->num_procs; - - if (0 < opal_output_get_verbosity(orte_debug_verbosity)) { - int i; - for (i=0; i < orte_node_pool->size; i++) { - if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { - continue; - } - opal_output(0, "%s node[%d].name %s daemon %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i, - (NULL == node->name) ? "NULL" : node->name, - (NULL == node->daemon) ? "NONE" : ORTE_VPID_PRINT(node->daemon->name.vpid)); - } - } - cleanup: - OPAL_LIST_DESTRUCT(&dids); OPAL_LIST_DESTRUCT(&slts); OPAL_LIST_DESTRUCT(&flgs); return rc; diff --git a/orte/util/nidmap.h b/orte/util/nidmap.h index e91be60e00..521cc352c0 100644 --- a/orte/util/nidmap.h +++ b/orte/util/nidmap.h @@ -37,10 +37,16 @@ BEGIN_C_DECLS +#define ORTE_MAX_REGEX_CMD_LENGTH 1024 + #define ORTE_MAX_NODE_PREFIX 50 #define ORTE_CONTIG_NODE_CMD 0x01 #define ORTE_NON_CONTIG_NODE_CMD 0x02 + +ORTE_DECLSPEC int orte_util_nidmap_create(char **regex); +ORTE_DECLSPEC int orte_util_nidmap_parse(char *regex); + /* create a regular expression describing the nodes in the * allocation */ ORTE_DECLSPEC int orte_util_encode_nodemap(opal_buffer_t *buffer); @@ -49,11 +55,7 @@ ORTE_DECLSPEC int orte_util_encode_nodemap(opal_buffer_t *buffer); * into the orte_node_pool array */ ORTE_DECLSPEC int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer); -ORTE_DECLSPEC int orte_util_build_daemon_nidmap(char **nodes); - -ORTE_DECLSPEC int orte_util_encode_topologies(opal_buffer_t *buffer); - -ORTE_DECLSPEC int orte_util_decode_topologies(opal_buffer_t *buffer); +ORTE_DECLSPEC int orte_util_build_daemon_nidmap(void); END_C_DECLS diff --git a/orte/util/regex.c b/orte/util/regex.c index a723c877db..f59ed0000f 100644 --- a/orte/util/regex.c +++ b/orte/util/regex.c @@ -63,230 +63,6 @@ static int regex_parse_node_ranges(char *base, char *ranges, int num_digits, char *suffix, char ***names); static int regex_parse_node_range(char *base, char *range, int num_digits, char *suffix, char ***names); -int orte_regex_create(char *nodelist, char **regexp) -{ - char *node; - char prefix[ORTE_MAX_NODE_PREFIX]; - int i, j, len, startnum, nodenum, numdigits; - bool found, fullname; - char *suffix, *sfx; - orte_regex_node_t *ndreg; - orte_regex_range_t *range; - opal_list_t nodeids; - opal_list_item_t *item, *itm2; - char **regexargs = NULL, *tmp, *tmp2; - char *cptr; - - /* define the default */ - *regexp = NULL; - - cptr = strchr(nodelist, ','); - if (NULL == cptr) { - /* if there is only one node, don't bother */ - *regexp = strdup(nodelist); - return ORTE_SUCCESS; - } - - /* setup the list of results */ - OBJ_CONSTRUCT(&nodeids, opal_list_t); - - /* cycle thru the array of nodenames */ - node = nodelist; - while (NULL != (cptr = strchr(node, ',')) || 0 < strlen(node)) { - if (NULL != cptr) { - *cptr = '\0'; - } - /* determine this node's prefix by looking for first non-alpha char */ - fullname = false; - len = strlen(node); - startnum = -1; - memset(prefix, 0, ORTE_MAX_NODE_PREFIX); - numdigits = 0; - for (i=0, j=0; i < len; i++) { - if (!isalpha(node[i])) { - /* found a non-alpha char */ - if (!isdigit(node[i])) { - /* if it is anything but a digit, we just use - * the entire name - */ - fullname = true; - break; - } - /* count the size of the numeric field - but don't - * add the digits to the prefix - */ - numdigits++; - if (startnum < 0) { - /* okay, this defines end of the prefix */ - startnum = i; - } - continue; - } - if (startnum < 0) { - prefix[j++] = node[i]; - } - } - if (fullname || startnum < 0) { - /* can't compress this name - just add it to the list */ - ndreg = OBJ_NEW(orte_regex_node_t); - ndreg->prefix = strdup(node); - opal_list_append(&nodeids, &ndreg->super); - /* move to the next posn */ - if (NULL == cptr) { - break; - } - node = cptr + 1; - continue; - } - /* convert the digits and get any suffix */ - nodenum = strtol(&node[startnum], &sfx, 10); - if (NULL != sfx) { - suffix = strdup(sfx); - } else { - suffix = NULL; - } - /* is this nodeid already on our list? */ - found = false; - for (item = opal_list_get_first(&nodeids); - !found && item != opal_list_get_end(&nodeids); - item = opal_list_get_next(item)) { - ndreg = (orte_regex_node_t*)item; - if (0 < strlen(prefix) && NULL == ndreg->prefix) { - continue; - } - if (0 == strlen(prefix) && NULL != ndreg->prefix) { - continue; - } - if (0 < strlen(prefix) && NULL != ndreg->prefix - && 0 != strcmp(prefix, ndreg->prefix)) { - continue; - } - if (NULL == suffix && NULL != ndreg->suffix) { - continue; - } - if (NULL != suffix && NULL == ndreg->suffix) { - continue; - } - if (NULL != suffix && NULL != ndreg->suffix && - 0 != strcmp(suffix, ndreg->suffix)) { - continue; - } - if (numdigits != ndreg->num_digits) { - continue; - } - /* found a match - flag it */ - found = true; - /* get the last range on this nodeid - we do this - * to preserve order - */ - range = (orte_regex_range_t*)opal_list_get_last(&ndreg->ranges); - if (NULL == range) { - /* first range for this nodeid */ - range = OBJ_NEW(orte_regex_range_t); - range->start = nodenum; - range->cnt = 1; - opal_list_append(&ndreg->ranges, &range->super); - break; - } - /* see if the node number is out of sequence */ - if (nodenum != (range->start + range->cnt)) { - /* start a new range */ - range = OBJ_NEW(orte_regex_range_t); - range->start = nodenum; - range->cnt = 1; - opal_list_append(&ndreg->ranges, &range->super); - break; - } - /* everything matches - just increment the cnt */ - range->cnt++; - break; - } - if (!found) { - /* need to add it */ - ndreg = OBJ_NEW(orte_regex_node_t); - if (0 < strlen(prefix)) { - ndreg->prefix = strdup(prefix); - } - if (NULL != suffix) { - ndreg->suffix = strdup(suffix); - } - ndreg->num_digits = numdigits; - opal_list_append(&nodeids, &ndreg->super); - /* record the first range for this nodeid - we took - * care of names we can't compress above - */ - range = OBJ_NEW(orte_regex_range_t); - range->start = nodenum; - range->cnt = 1; - opal_list_append(&ndreg->ranges, &range->super); - } - if (NULL != suffix) { - free(suffix); - } - /* move to the next posn */ - if (NULL == cptr) { - break; - } - node = cptr + 1; - } - - /* begin constructing the regular expression */ - while (NULL != (item = opal_list_remove_first(&nodeids))) { - ndreg = (orte_regex_node_t*)item; - - /* if no ranges, then just add the name */ - if (0 == opal_list_get_size(&ndreg->ranges)) { - if (NULL != ndreg->prefix) { - /* solitary node */ - asprintf(&tmp, "%s", ndreg->prefix); - opal_argv_append_nosize(®exargs, tmp); - free(tmp); - } - OBJ_RELEASE(ndreg); - continue; - } - /* start the regex for this nodeid with the prefix */ - if (NULL != ndreg->prefix) { - asprintf(&tmp, "%s[%d:", ndreg->prefix, ndreg->num_digits); - } else { - asprintf(&tmp, "[%d:", ndreg->num_digits); - } - /* add the ranges */ - while (NULL != (itm2 = opal_list_remove_first(&ndreg->ranges))) { - range = (orte_regex_range_t*)itm2; - if (1 == range->cnt) { - asprintf(&tmp2, "%s%d,", tmp, range->start); - } else { - asprintf(&tmp2, "%s%d-%d,", tmp, range->start, range->start + range->cnt - 1); - } - free(tmp); - tmp = tmp2; - OBJ_RELEASE(range); - } - /* replace the final comma */ - tmp[strlen(tmp)-1] = ']'; - if (NULL != ndreg->suffix) { - /* add in the suffix, if provided */ - asprintf(&tmp2, "%s%s", tmp, ndreg->suffix); - free(tmp); - tmp = tmp2; - } - opal_argv_append_nosize(®exargs, tmp); - free(tmp); - OBJ_RELEASE(ndreg); - } - - /* assemble final result */ - *regexp = opal_argv_join(regexargs, ','); - /* cleanup */ - opal_argv_free(regexargs); - - OBJ_DESTRUCT(&nodeids); - - - return ORTE_SUCCESS; -} - int orte_regex_extract_node_names(char *regexp, char ***names) { int i, j, k, len, ret; @@ -592,7 +368,7 @@ static int regex_parse_node_range(char *base, char *range, int num_digits, char static void range_construct(orte_regex_range_t *ptr) { - ptr->start = 0; + ptr->vpid = 0; ptr->cnt = 0; } OBJ_CLASS_INSTANCE(orte_regex_range_t, diff --git a/orte/util/regex.h b/orte/util/regex.h index 1e8ab8bc85..b58cacb807 100644 --- a/orte/util/regex.h +++ b/orte/util/regex.h @@ -36,8 +36,7 @@ BEGIN_C_DECLS typedef struct { opal_list_item_t super; - int start; - int endpt; + int vpid; int cnt; int slots; orte_topology_t *t; @@ -54,11 +53,6 @@ typedef struct { } orte_regex_node_t; ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_regex_node_t); -/* NOTE: this is a destructive call for the nodes param - the - * function will search and replace all commas with '\0' - */ -ORTE_DECLSPEC int orte_regex_create(char *nodes, char **regexp); - ORTE_DECLSPEC int orte_regex_extract_node_names(char *regexp, char ***names); END_C_DECLS