From a210f8046ff35546437d8dbc469cb3f3d337fdd8 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 8 Apr 2020 08:37:25 -0700 Subject: [PATCH 1/2] Cleanup ompi/dpm operations Do some code cleanup in the connect/accept code. Ensure that the OMPI layer has access to the PMIx identifier for the process. Add macros for converting PMIx names to/from strings. Cleanup a few of the simple test programs. Add a little more info to a btl/tcp error message. Signed-off-by: Ralph Castain --- ompi/dpm/dpm.c | 169 +++++++++------------------- ompi/runtime/ompi_rte.c | 7 +- ompi/runtime/ompi_rte.h | 1 + opal/mca/btl/tcp/btl_tcp_endpoint.c | 22 ++-- opal/mca/pmix/base/pmix_base_fns.c | 23 +++- opal/mca/pmix/pmix-internal.h | 57 ++++++++-- test/simple/hello.c | 20 +--- test/simple/intercomm_create.c | 2 + test/simple/simple_spawn.c | 5 +- 9 files changed, 143 insertions(+), 163 deletions(-) diff --git a/ompi/dpm/dpm.c b/ompi/dpm/dpm.c index 09435e8b17..409ea14018 100644 --- a/ompi/dpm/dpm.c +++ b/ompi/dpm/dpm.c @@ -106,8 +106,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, pmix_proc_t *procs, pxproc; size_t nprocs, n; pmix_status_t pret; - opal_namelist_t *nm; - opal_jobid_t jobid; + opal_proclist_t *plt; ompi_communicator_t *newcomp=MPI_COMM_NULL; ompi_proc_t *proc; @@ -131,24 +130,14 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, * procs is used to complete construction of the intercommunicator. */ /* everyone constructs the list of members from their communicator */ + pname.jobid = OMPI_PROC_MY_NAME->jobid; + pname.vpid = OPAL_VPID_WILDCARD; if (MPI_COMM_WORLD == comm) { - pname.jobid = OMPI_PROC_MY_NAME->jobid; - pname.vpid = OPAL_VPID_WILDCARD; - rc = opal_convert_process_name_to_string(&nstring, &pname); - if (OPAL_SUCCESS != rc) { - return OMPI_ERROR; - } + PMIX_LOAD_PROCID(&pxproc, ompi_process_info.myprocid.nspace, PMIX_RANK_WILDCARD); + OPAL_PMIX_CONVERT_PROCT_TO_STRING(&nstring, &pxproc); opal_argv_append_nosize(&members, nstring); free(nstring); - /* have to add the number of procs in the job so the remote side - * can correctly add the procs by computing their names, and our nspace - * so they can update their records */ - nstring = opal_jobid_print(pname.jobid); - if (NULL == nstring) { - opal_argv_free(members); - return OMPI_ERROR; - } - opal_argv_append_nosize(&members, nstring); + /* add the number of procs in this job */ (void)opal_asprintf(&nstring, "%d", size); opal_argv_append_nosize(&members, nstring); free(nstring); @@ -176,22 +165,10 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, } else { proc_name = proc_list[i]->super.proc_name; } - rc = opal_convert_process_name_to_string(&nstring, &proc_name); - if (OPAL_SUCCESS != rc) { - if (!dense) { - free(proc_list); - proc_list = NULL; - } - return OMPI_ERROR; - } + OPAL_PMIX_CONVERT_NAME(&pxproc, &proc_name); + OPAL_PMIX_CONVERT_PROCT_TO_STRING(&nstring, &pxproc); opal_argv_append_nosize(&members, nstring); free(nstring); - nstring = opal_jobid_print(pname.jobid); - if (OPAL_SUCCESS != rc) { - opal_argv_free(members); - return OMPI_ERROR; - } - opal_argv_append_nosize(&members, nstring); } if (!dense) { free(proc_list); @@ -260,64 +237,18 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, * starting with our own members */ OBJ_CONSTRUCT(&mlist, opal_list_t); for (i=0; NULL != members[i]; i++) { - nm = OBJ_NEW(opal_namelist_t); - if (OPAL_SUCCESS != (rc = opal_convert_string_to_process_name(&nm->name, members[i]))) { - OMPI_ERROR_LOG(rc); - opal_argv_free(members); - free(rport); - OPAL_LIST_DESTRUCT(&mlist); - goto exit; - } - /* step over the nspace */ - ++i; - if (NULL == members[i]) { - /* this shouldn't happen and is an error */ - OMPI_ERROR_LOG(OMPI_ERR_BAD_PARAM); - OPAL_LIST_DESTRUCT(&mlist); - opal_argv_free(members); - free(rport); - rc = OMPI_ERR_BAD_PARAM; - goto exit; - } - /* if the rank is wildcard, then we need to add all procs - * in that job to the list */ - if (OPAL_VPID_WILDCARD == nm->name.vpid) { - jobid = nm->name.jobid; - OBJ_RELEASE(nm); - for (k=0; k < size; k++) { - nm = OBJ_NEW(opal_namelist_t); - nm->name.jobid = jobid; - nm->name.vpid = k; - opal_list_append(&mlist, &nm->super); - } - /* now step over the size */ - if (NULL == members[i+1]) { - /* this shouldn't happen and is an error */ - OMPI_ERROR_LOG(OMPI_ERR_BAD_PARAM); - OPAL_LIST_DESTRUCT(&mlist); - opal_argv_free(members); - free(rport); - rc = OMPI_ERR_BAD_PARAM; - goto exit; - } + OPAL_PMIX_CONVERT_STRING_TO_PROCT(&pxproc, members[i]); + plt = OBJ_NEW(opal_proclist_t); + memcpy(&plt->procid, &pxproc, sizeof(pmix_proc_t)); + opal_list_append(&mlist, &plt->super); + /* if the rank is wildcard, then we need to skip + * the next position */ + if (PMIX_RANK_WILDCARD == pxproc.rank) { ++i; - } else { - opal_list_append(&mlist, &nm->super); } } opal_argv_free(members); members = NULL; - - /* convert the list of members to a pmix_proc_t array */ - nprocs = opal_list_get_size(&mlist); - PMIX_PROC_CREATE(procs, nprocs); - n = 0; - OPAL_LIST_FOREACH(nm, &mlist, opal_namelist_t) { - OPAL_PMIX_CONVERT_NAME(&procs[n], &nm->name); - ++n; - } - OPAL_LIST_DESTRUCT(&mlist); - /* rport contains a colon-delimited list * of process names for the remote procs - convert it * into an argv array */ @@ -330,29 +261,13 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, OBJ_CONSTRUCT(&rlist, opal_list_t); for (i=0; NULL != members[i]; i++) { - nm = OBJ_NEW(opal_namelist_t); - if (OPAL_SUCCESS != (rc = opal_convert_string_to_process_name(&nm->name, members[i]))) { - OMPI_ERROR_LOG(rc); - opal_argv_free(members); - OPAL_LIST_DESTRUCT(&ilist); - OPAL_LIST_DESTRUCT(&rlist); - PMIX_PROC_FREE(procs, nprocs); - goto exit; - } - /* next entry is the nspace - register it */ - ++i; - if (NULL == members[i]) { - OMPI_ERROR_LOG(OMPI_ERR_NOT_SUPPORTED); - opal_argv_free(members); - OPAL_LIST_DESTRUCT(&ilist); - OPAL_LIST_DESTRUCT(&rlist); - PMIX_PROC_FREE(procs, nprocs); - goto exit; - } - if (OPAL_VPID_WILDCARD == nm->name.vpid) { - jobid = nm->name.jobid; - OBJ_RELEASE(nm); - /* if the vpid is wildcard, then we are including all ranks + OPAL_PMIX_CONVERT_STRING_TO_PROCT(&pxproc, members[i]); + plt = OBJ_NEW(opal_proclist_t); + memcpy(&plt->procid, &pxproc, sizeof(pmix_proc_t)); + opal_list_append(&mlist, &plt->super); + + if (PMIX_RANK_WILDCARD == pxproc.rank) { + /* if the rank is wildcard, then we are including all ranks * of that job, and the next entry in members should be the * number of procs in the job */ if (NULL == members[i+1]) { @@ -361,19 +276,25 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, opal_argv_free(members); OPAL_LIST_DESTRUCT(&ilist); OPAL_LIST_DESTRUCT(&rlist); + OPAL_LIST_DESTRUCT(&mlist); rc = OMPI_ERR_BAD_PARAM; - PMIX_PROC_FREE(procs, nprocs); goto exit; } rsize = strtoul(members[i+1], NULL, 10); ++i; for (k=0; k < rsize; k++) { - nm = OBJ_NEW(opal_namelist_t); - nm->name.jobid = jobid; - nm->name.vpid = k; - opal_list_append(&mlist, &nm->super); + pxproc.rank = k; + OPAL_PMIX_CONVERT_PROCT(rc, &pname, &pxproc); + if (OPAL_SUCCESS != rc) { + OMPI_ERROR_LOG(rc); + opal_argv_free(members); + OPAL_LIST_DESTRUCT(&ilist); + OPAL_LIST_DESTRUCT(&rlist); + OPAL_LIST_DESTRUCT(&mlist); + goto exit; + } /* see if this needs to be added to our ompi_proc_t array */ - proc = ompi_proc_find_and_add(&nm->name, &isnew); + proc = ompi_proc_find_and_add(&pname, &isnew); if (isnew) { cd = OBJ_NEW(ompi_dpm_proct_caddy_t); cd->p = proc; @@ -385,9 +306,17 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, opal_list_append(&rlist, &cd->super); } } else { - opal_list_append(&mlist, &nm->super); + OPAL_PMIX_CONVERT_PROCT(rc, &pname, &pxproc); + if (OPAL_SUCCESS != rc) { + OMPI_ERROR_LOG(rc); + opal_argv_free(members); + OPAL_LIST_DESTRUCT(&ilist); + OPAL_LIST_DESTRUCT(&rlist); + OPAL_LIST_DESTRUCT(&mlist); + goto exit; + } /* see if this needs to be added to our ompi_proc_t array */ - proc = ompi_proc_find_and_add(&nm->name, &isnew); + proc = ompi_proc_find_and_add(&pname, &isnew); if (isnew) { cd = OBJ_NEW(ompi_dpm_proct_caddy_t); cd->p = proc; @@ -401,6 +330,16 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, } opal_argv_free(members); + /* convert the list of members to a pmix_proc_t array */ + nprocs = opal_list_get_size(&mlist); + PMIX_PROC_CREATE(procs, nprocs); + n = 0; + OPAL_LIST_FOREACH(plt, &mlist, opal_proclist_t) { + memcpy(&procs[n], &plt->procid, sizeof(pmix_proc_t)); + ++n; + } + OPAL_LIST_DESTRUCT(&mlist); + /* tell the host RTE to connect us - this will download * all known data for the nspace's of participating procs * so that add_procs will not result in a slew of lookups */ diff --git a/ompi/runtime/ompi_rte.c b/ompi/runtime/ompi_rte.c index 346b434de0..a2feca8285 100644 --- a/ompi/runtime/ompi_rte.c +++ b/ompi/runtime/ompi_rte.c @@ -62,6 +62,7 @@ opal_process_name_t pmix_name_invalid = {UINT32_MAX, UINT32_MAX}; hwloc_cpuset_t ompi_proc_applied_binding = NULL; pmix_process_info_t pmix_process_info = { .my_name = {OPAL_JOBID_INVALID, OPAL_VPID_INVALID}, + .myprocid = {{0}, PMIX_RANK_INVALID}, .nodename = NULL, .pid = 0, .top_session_dir = NULL, @@ -84,8 +85,6 @@ pmix_process_info_t pmix_process_info = { bool pmix_proc_is_bound = false; bool ompi_singleton = false; -static pmix_proc_t myprocid; - static int _setup_top_session_dir(char **sdir); static int _setup_job_session_dir(char **sdir); static int _setup_proc_session_dir(char **sdir); @@ -550,7 +549,7 @@ int ompi_rte_init(int *pargc, char ***pargv) opal_pmix_setup_nspace_tracker(); /* initialize the selected module */ - if (!PMIx_Initialized() && (PMIX_SUCCESS != (ret = PMIx_Init(&myprocid, NULL, 0)))) { + if (!PMIx_Initialized() && (PMIX_SUCCESS != (ret = PMIx_Init(&pmix_process_info.myprocid, NULL, 0)))) { /* if we get PMIX_ERR_UNREACH indicating that we cannot reach the * server, then we assume we are operating as a singleton */ if (PMIX_ERR_UNREACH == ret) { @@ -565,7 +564,7 @@ int ompi_rte_init(int *pargc, char ***pargv) } /* setup the process name fields - also registers the new nspace */ - OPAL_PMIX_CONVERT_PROCT(rc, &pname, &myprocid); + OPAL_PMIX_CONVERT_PROCT(rc, &pname, &pmix_process_info.myprocid); if (OPAL_SUCCESS != rc) { return rc; } diff --git a/ompi/runtime/ompi_rte.h b/ompi/runtime/ompi_rte.h index 3425939f7e..616df8af34 100644 --- a/ompi/runtime/ompi_rte.h +++ b/ompi/runtime/ompi_rte.h @@ -245,6 +245,7 @@ typedef uint16_t ompi_local_rank_t; typedef struct { opal_process_name_t my_name; + pmix_proc_t myprocid; char *nodename; pid_t pid; char *top_session_dir; diff --git a/opal/mca/btl/tcp/btl_tcp_endpoint.c b/opal/mca/btl/tcp/btl_tcp_endpoint.c index db009b218c..2a381e77dd 100644 --- a/opal/mca/btl/tcp/btl_tcp_endpoint.c +++ b/opal/mca/btl/tcp/btl_tcp_endpoint.c @@ -398,19 +398,19 @@ mca_btl_tcp_endpoint_send_blocking(mca_btl_base_endpoint_t* btl_endpoint, * Send the globally unique identifier for this process to a endpoint on * a newly connected socket. */ -static int +static int mca_btl_tcp_endpoint_send_connect_ack(mca_btl_base_endpoint_t* btl_endpoint) { opal_process_name_t guid = opal_proc_local_get()->proc_name; OPAL_PROCESS_NAME_HTON(guid); - + mca_btl_tcp_endpoint_hs_msg_t hs_msg; opal_string_copy(hs_msg.magic_id, mca_btl_tcp_magic_id_string, sizeof(hs_msg.magic_id)); hs_msg.guid = guid; - - if(sizeof(hs_msg) != - mca_btl_tcp_endpoint_send_blocking(btl_endpoint, + + if(sizeof(hs_msg) != + mca_btl_tcp_endpoint_send_blocking(btl_endpoint, &hs_msg, sizeof(hs_msg))) { opal_show_help("help-mpi-btl-tcp.txt", "client handshake fail", true, opal_process_info.nodename, @@ -649,8 +649,8 @@ static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_en * to be able to exchange the opal_process_name_t over the network. */ if (0 != opal_compare_proc(btl_proc->proc_opal->proc_name, guid)) { - BTL_ERROR(("received unexpected process identifier %s", - OPAL_NAME_PRINT(guid))); + BTL_ERROR(("received unexpected process identifier: got %s expected %s", + OPAL_NAME_PRINT(guid), OPAL_NAME_PRINT(btl_proc->proc_opal->proc_name))); btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED; mca_btl_tcp_endpoint_close(btl_endpoint); return OPAL_ERR_UNREACH; @@ -758,9 +758,9 @@ static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t* btl_endpo mca_btl_tcp_proc_tosocks(btl_endpoint->endpoint_addr, &endpoint_addr); /* Bind the socket to one of the addresses associated with - * this btl module. This sets the source IP to one of the - * addresses shared in modex, so that the destination rank - * can properly pair btl modules, even in cases where Linux + * this btl module. This sets the source IP to one of the + * addresses shared in modex, so that the destination rank + * can properly pair btl modules, even in cases where Linux * might do something unexpected with routing */ if (endpoint_addr.ss_family == AF_INET) { assert(NULL != &btl_endpoint->endpoint_btl->tcp_ifaddr); @@ -965,7 +965,7 @@ static void mca_btl_tcp_endpoint_recv_handler(int sd, short flags, void* user) the magic string ID failed). recv_connect_ack already cleaned up the socket. */ /* If we get OPAL_ERROR, the other end closed the connection - * because it has initiated a symetrical connexion on its end. + * because it has initiated a symetrical connexion on its end. * recv_connect_ack already cleaned up the socket. */ } else { diff --git a/opal/mca/pmix/base/pmix_base_fns.c b/opal/mca/pmix/base/pmix_base_fns.c index fa9b446a8f..618c3f559b 100644 --- a/opal/mca/pmix/base/pmix_base_fns.c +++ b/opal/mca/pmix/base/pmix_base_fns.c @@ -137,21 +137,28 @@ int opal_pmix_convert_nspace(opal_jobid_t *jobid, pmix_nspace_t nspace) return OPAL_SUCCESS; } if (NULL != strstr(nspace, "JOBID_WILDCARD")) { - *jobid = OPAL_JOBID_WILDCARD; + if (NULL != jobid) { + *jobid = OPAL_JOBID_WILDCARD; + } return OPAL_SUCCESS; } if (NULL != strstr(nspace, "JOBID_INVALID")) { - *jobid = OPAL_JOBID_INVALID; + if (NULL != jobid) { + *jobid = OPAL_JOBID_INVALID; + } return OPAL_SUCCESS; } - /* cycle across our list of known jobids */ + /* cycle across our list of known nspace's */ OPAL_LIST_FOREACH(nptr, &localnspaces, opal_nptr_t) { if (PMIX_CHECK_NSPACE(nspace, nptr->nspace)) { - *jobid = nptr->jobid; + if (NULL != jobid) { + *jobid = nptr->jobid; + } return OPAL_SUCCESS; } } + /* if we get here, we don't know this nspace */ /* find the "." at the end that indicates the child job */ if (NULL != (p = strrchr(nspace, '.'))) { @@ -167,7 +174,9 @@ int opal_pmix_convert_nspace(opal_jobid_t *jobid, pmix_nspace_t nspace) /* now compress to 16-bits */ jobfam = (uint16_t)(((0x0000ffff & (0xffff0000 & hash32) >> 16)) ^ (0x0000ffff & hash32)); jid = (0xffff0000 & ((uint32_t)jobfam << 16)) | (0x0000ffff & localjob); - *jobid = jid; + if (NULL != jobid) { + *jobid = jid; + } /* save this jobid/nspace pair */ nptr = OBJ_NEW(opal_nptr_t); nptr->jobid = jid; @@ -956,3 +965,7 @@ static void infoitdecon(opal_info_item_t *p) OBJ_CLASS_INSTANCE(opal_info_item_t, opal_list_item_t, infoitmcon, infoitdecon); + +OBJ_CLASS_INSTANCE(opal_proclist_t, + opal_list_item_t, + NULL, NULL); diff --git a/opal/mca/pmix/pmix-internal.h b/opal/mca/pmix/pmix-internal.h index 20f2ce2f9a..5077aced12 100644 --- a/opal/mca/pmix/pmix-internal.h +++ b/opal/mca/pmix/pmix-internal.h @@ -64,6 +64,14 @@ typedef struct { } opal_info_item_t; OBJ_CLASS_DECLARATION(opal_info_item_t); +/* define the equivalent to opal_namelist_t for pmix_proc_t */ +typedef struct { + opal_list_item_t super; + pmix_proc_t procid; +} opal_proclist_t; +OBJ_CLASS_DECLARATION(opal_proclist_t); + + typedef opal_cond_t opal_pmix_condition_t; typedef struct { @@ -599,18 +607,26 @@ OPAL_DECLSPEC int opal_pmix_convert_nspace(opal_jobid_t *jobid, pmix_nspace_t ns OPAL_DECLSPEC void opal_pmix_setup_nspace_tracker(void); OPAL_DECLSPEC void opal_pmix_finalize_nspace_tracker(void); +#define OPAL_SCHEMA_DELIMITER_CHAR '.' +#define OPAL_SCHEMA_WILDCARD_CHAR '*' +#define OPAL_SCHEMA_WILDCARD_STRING "*" +#define OPAL_SCHEMA_INVALID_CHAR '$' +#define OPAL_SCHEMA_INVALID_STRING "$" + /* convert jobid to nspace */ #define OPAL_PMIX_CONVERT_JOBID(n, j) \ opal_pmix_convert_jobid((n), (j)) /* convert vpid to rank */ -#define OPAL_PMIX_CONVERT_VPID(r, v) \ - do { \ - if (OPAL_VPID_WILDCARD == (v)) { \ - (r) = PMIX_RANK_WILDCARD; \ - } else { \ - (r) = (v); \ - } \ +#define OPAL_PMIX_CONVERT_VPID(r, v) \ + do { \ + if (OPAL_VPID_WILDCARD == (v)) { \ + (r) = PMIX_RANK_WILDCARD; \ + } else if (OPAL_VPID_INVALID == (v)) { \ + (r) = PMIX_RANK_INVALID; \ + } else { \ + (r) = (v); \ + } \ } while(0) /* convert opal_process_name_t to pmix_proc_t */ @@ -646,6 +662,33 @@ OPAL_DECLSPEC void opal_pmix_finalize_nspace_tracker(void); } \ } while(0) +#define OPAL_PMIX_CONVERT_PROCT_TO_STRING(s, p) \ + do { \ + if (PMIX_RANK_WILDCARD == (p)->rank) { \ + (void)opal_asprintf((s), "%s.*", (p)->nspace); \ + } else if (PMIX_RANK_INVALID == (p)->rank) { \ + (void)opal_asprintf((s), "%s.$", (p)->nspace); \ + } else { \ + (void)opal_asprintf((s), "%s.%u", (p)->nspace, (p)->rank); \ + } \ + } while(0) + +#define OPAL_PMIX_CONVERT_STRING_TO_PROCT(p, s) \ + do { \ + char *_ptr; \ + _ptr = strrchr((s), '.'); \ + *_ptr = '\0'; \ + _ptr++; \ + PMIX_LOAD_NSPACE((p)->nspace, (s)); \ + if ('*' == *_ptr) { \ + (p)->rank = PMIX_RANK_WILDCARD; \ + } else if ('$' == *_ptr) { \ + (p)->rank = PMIX_RANK_INVALID; \ + } else { \ + (p)->rank = strtoul(_ptr, NULL, 10); \ + } \ + } while(0) + OPAL_DECLSPEC void opal_pmix_value_load(pmix_value_t *v, opal_value_t *kv); diff --git a/test/simple/hello.c b/test/simple/hello.c index 8f832d970d..f1bd7fd3e2 100644 --- a/test/simple/hello.c +++ b/test/simple/hello.c @@ -6,34 +6,18 @@ */ #include -#include "opal/mca/hwloc/base/base.h" #include "mpi.h" -#include "orte/util/proc_info.h" int main(int argc, char* argv[]) { - int rank, size, rc; - hwloc_cpuset_t cpus; - char *bindings = NULL; - pid_t pid; + int rank, size; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); - pid = getpid(); - printf("[%lu] Rank %d: getting topology\n", (unsigned long)pid, rank); - fflush(stdout); - if (OPAL_SUCCESS == opal_hwloc_base_get_topology()) { - cpus = hwloc_bitmap_alloc(); - rc = hwloc_get_cpubind(opal_hwloc_topology, cpus, HWLOC_CPUBIND_PROCESS); - hwloc_bitmap_list_asprintf(&bindings, cpus); - } - - printf("Hello, World, I am %d of %d [%d local peers]: get_cpubind: %d bitmap %s\n", - rank, size, orte_process_info.num_local_peers, rc, - (NULL == bindings) ? "NULL" : bindings); + printf("Hello, World, I am %d of %d\n", rank, size); MPI_Finalize(); return 0; diff --git a/test/simple/intercomm_create.c b/test/simple/intercomm_create.c index 7bd1dc9be6..8e6e2cc3b8 100644 --- a/test/simple/intercomm_create.c +++ b/test/simple/intercomm_create.c @@ -51,6 +51,8 @@ main(int argc, char *argv[]) MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); + printf("Intercomm: Rank %d of %d\n", rank, size); + /* Check to see if we *were* spawned -- because this is a test, we can only assume the existence of this one executable. Hence, we both mpirun it and spawn it. */ diff --git a/test/simple/simple_spawn.c b/test/simple/simple_spawn.c index b57761294f..7ce9a34288 100644 --- a/test/simple/simple_spawn.c +++ b/test/simple/simple_spawn.c @@ -4,7 +4,6 @@ #include #include -#include "opal/runtime/opal.h" #include @@ -13,14 +12,14 @@ int main(int argc, char* argv[]) int msg, rc; MPI_Comm parent, child; int rank, size; - const char *hostname; + char hostname[1024]; pid_t pid; char *env_rank,*env_nspace; env_rank = getenv("PMIX_RANK"); env_nspace = getenv("PMIX_NAMESPACE"); pid = getpid(); - hostname = opal_gethostname(); + gethostname(hostname, 1024); printf("[%s:%s pid %ld] starting up on node %s!\n", env_nspace, env_rank, (long)pid, hostname); From f32febd7f741131c10922a8aa17dc4fac29fd8a4 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 8 Apr 2020 09:24:44 -0700 Subject: [PATCH 2/2] Update PMIx and PRRTE PMIx: - restore OPA support PRRTE: Restore support for several options * -N for ppr:N:node * INHERIT modifier for --map-by option, indicating that the spawned job should inherit the placement options of its parent. Only applicable to dynamically spawned jobs Signed-off-by: Ralph Castain --- opal/mca/pmix/pmix4x/openpmix | 2 +- prrte | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/opal/mca/pmix/pmix4x/openpmix b/opal/mca/pmix/pmix4x/openpmix index 8532539624..5aec0689a5 160000 --- a/opal/mca/pmix/pmix4x/openpmix +++ b/opal/mca/pmix/pmix4x/openpmix @@ -1 +1 @@ -Subproject commit 85325396240dcc155622ec289f9c4d95e056a088 +Subproject commit 5aec0689a5d6aa51a113ea31866f459652d1ded9 diff --git a/prrte b/prrte index 4f7d38b6e9..84de10eb6e 160000 --- a/prrte +++ b/prrte @@ -1 +1 @@ -Subproject commit 4f7d38b6e9b7520496f1222f4adb076e5a604cdd +Subproject commit 84de10eb6ecff6d52a0b1a46a0d0eac007fba519