diff --git a/ompi/mca/dpm/dpm.h b/ompi/mca/dpm/dpm.h index c0bd9e75b1..0fc1e28b46 100644 --- a/ompi/mca/dpm/dpm.h +++ b/ompi/mca/dpm/dpm.h @@ -38,19 +38,22 @@ BEGIN_C_DECLS /* OMPI port definitions */ -#define OMPI_RML_TAG_BASE ORTE_RML_TAG_MAX +#define OMPI_RML_TAG_BASE ORTE_RML_TAG_MAX -#define OMPI_RML_TAG_UDAPL OMPI_RML_TAG_BASE+1 -#define OMPI_RML_TAG_OPENIB OMPI_RML_TAG_BASE+2 -#define OMPI_RML_TAG_XOPENIB OMPI_RML_TAG_BASE+3 -#define OMPI_RML_TAG_COMM_CID_INTRA OMPI_RML_TAG_BASE+4 -#define OMPI_RML_TAG_XOOB OMPI_RML_TAG_BASE+5 -#define OMPI_RML_TAG_SM_BACK_FILE_CREATED OMPI_RML_TAG_BASE+6 -#define OMPI_RML_TAG_WIREUP OMPI_RML_TAG_BASE+7 -#define OMPI_CRCP_COORD_BOOKMARK_TAG OMPI_RML_TAG_BASE+8 -#define OMPI_COMM_JOIN_TAG OMPI_RML_TAG_BASE+9 +#define OMPI_RML_TAG_UDAPL OMPI_RML_TAG_BASE+1 +#define OMPI_RML_TAG_OPENIB OMPI_RML_TAG_BASE+2 +#define OMPI_RML_TAG_XOPENIB OMPI_RML_TAG_BASE+3 +#define OMPI_RML_TAG_COMM_CID_INTRA OMPI_RML_TAG_BASE+4 +#define OMPI_RML_TAG_XOOB OMPI_RML_TAG_BASE+5 +#define OMPI_RML_TAG_SM_BACK_FILE_CREATED OMPI_RML_TAG_BASE+6 +#define OMPI_RML_TAG_WIREUP OMPI_RML_TAG_BASE+7 +#define OMPI_CRCP_COORD_BOOKMARK_TAG OMPI_RML_TAG_BASE+8 +#define OMPI_COMM_JOIN_TAG OMPI_RML_TAG_BASE+9 -#define OMPI_RML_TAG_DYNAMIC OMPI_RML_TAG_BASE+200 +/* support for shared memory collectives */ +#define OMPI_RML_TAG_COLL_SM2_BACK_FILE_CREATED OMPI_RML_TAG_BASE+10 + +#define OMPI_RML_TAG_DYNAMIC OMPI_RML_TAG_BASE+200 /* diff --git a/ompi/mca/dpm/orte/dpm_orte.c b/ompi/mca/dpm/orte/dpm_orte.c index e0124b4082..8ba25a9061 100644 --- a/ompi/mca/dpm/orte/dpm_orte.c +++ b/ompi/mca/dpm/orte/dpm_orte.c @@ -90,9 +90,10 @@ static int connect_accept ( ompi_communicator_t *comm, int root, OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_output, - "%s dpm:orte:connect_accept with port %s", + "%s dpm:orte:connect_accept with port %s %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(port))); + ORTE_NAME_PRINT(port), + send_first ? "sending first" : "recv first")); size = ompi_comm_size ( comm ); rank = ompi_comm_rank ( comm ); @@ -112,6 +113,11 @@ static int connect_accept ( ompi_communicator_t *comm, int root, sizeof (ompi_proc_t *)); for(i=0 ; igrp_proc_count ; i++) proc_list[i] = ompi_group_peer_lookup(group,i); + + OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output, + "%s dpm:orte:connect_accept adding %s to proc list", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&proc_list[i]->proc_name))); } if ( OMPI_COMM_JOIN_TAG != tag ) { @@ -160,9 +166,17 @@ static int connect_accept ( ompi_communicator_t *comm, int root, /* Exchange the number and the list of processes in the groups */ if ( send_first ) { + OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output, + "%s dpm:orte:connect_accept sending first to %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(rport))); rc = orte_rml.send_buffer(rport, nbuf, tag, 0); rc = orte_rml.recv_buffer(rport, nrbuf, tag, 0); } else { + OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output, + "%s dpm:orte:connect_accept recving first from %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(rport))); rc = orte_rml.recv_buffer(rport, nrbuf, tag, 0); rc = orte_rml.send_buffer(rport, nbuf, tag, 0); } @@ -237,29 +251,45 @@ static int connect_accept ( ompi_communicator_t *comm, int root, OBJ_CONSTRUCT(&all_procs, opal_list_t); if (send_first) { - for (i = 0 ; i < group->grp_proc_count ; ++i) { - name = OBJ_NEW(orte_namelist_t); - name->name = ompi_group_peer_lookup(group, i)->proc_name; - opal_list_append(&all_procs, &name->item); - } - for (i = 0 ; i < rsize ; ++i) { name = OBJ_NEW(orte_namelist_t); name->name = rprocs[i]->proc_name; opal_list_append(&all_procs, &name->item); + OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output, + "%s dpm:orte:connect_accept send first adding %s to allgather list", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&name->name))); } + for (i = 0 ; i < group->grp_proc_count ; ++i) { + name = OBJ_NEW(orte_namelist_t); + name->name = ompi_group_peer_lookup(group, i)->proc_name; + opal_list_append(&all_procs, &name->item); + OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output, + "%s dpm:orte:connect_accept send first adding %s to allgather list", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&name->name))); + } + } else { - for (i = 0 ; i < rsize ; ++i) { - name = OBJ_NEW(orte_namelist_t); - name->name = rprocs[i]->proc_name; - opal_list_append(&all_procs, &name->item); - } - for (i = 0 ; i < group->grp_proc_count ; ++i) { name = OBJ_NEW(orte_namelist_t); name->name = ompi_group_peer_lookup(group, i)->proc_name; opal_list_append(&all_procs, &name->item); + OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output, + "%s dpm:orte:connect_accept recv first adding %s to allgather list", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&name->name))); } + for (i = 0 ; i < rsize ; ++i) { + name = OBJ_NEW(orte_namelist_t); + name->name = rprocs[i]->proc_name; + opal_list_append(&all_procs, &name->item); + OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output, + "%s dpm:orte:connect_accept recv first adding %s to allgather list", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&name->name))); + } + } if (OMPI_SUCCESS != (rc = orte_grpcomm.modex(&all_procs))) { diff --git a/ompi/mpi/c/comm_accept.c b/ompi/mpi/c/comm_accept.c index 48619cfaf3..7eb49a0b0a 100644 --- a/ompi/mpi/c/comm_accept.c +++ b/ompi/mpi/c/comm_accept.c @@ -39,7 +39,7 @@ int MPI_Comm_accept(char *port_name, MPI_Info info, int root, MPI_Comm comm, MPI_Comm *newcomm) { int rank, rc; - int send_first=0; /*wrong, we receive first */ + bool send_first=false; /* we receive first */ ompi_communicator_t *newcomp=MPI_COMM_NULL; char *tmp_port=NULL; orte_rml_tag_t tag; diff --git a/ompi/mpi/c/comm_connect.c b/ompi/mpi/c/comm_connect.c index 63f79b4439..20b67003cc 100644 --- a/ompi/mpi/c/comm_connect.c +++ b/ompi/mpi/c/comm_connect.c @@ -43,7 +43,7 @@ int MPI_Comm_connect(char *port_name, MPI_Info info, int root, MPI_Comm comm, MPI_Comm *newcomm) { int rank, rc; - int send_first=1; /* yes, we are the active part in this game */ + bool send_first=true; /* yes, we are the active part in this game */ ompi_communicator_t *newcomp=MPI_COMM_NULL; orte_process_name_t port_proc_name; char *tmp_port=NULL; diff --git a/ompi/mpi/c/comm_spawn_multiple.c b/ompi/mpi/c/comm_spawn_multiple.c index f8d618b7fe..8dea5430bc 100644 --- a/ompi/mpi/c/comm_spawn_multiple.c +++ b/ompi/mpi/c/comm_spawn_multiple.c @@ -43,7 +43,7 @@ int MPI_Comm_spawn_multiple(int count, char **array_of_commands, char ***array_o { int i=0, rc=0, rank=0, flag; ompi_communicator_t *newcomp=NULL; - int send_first=0; /* they are contacting us first */ + bool send_first=false; /* they are contacting us first */ char port_name[MPI_MAX_PORT_NAME]; char *tmp_port; orte_rml_tag_t tag = 0; diff --git a/orte/mca/plm/base/plm_base_receive.c b/orte/mca/plm/base/plm_base_receive.c index d87fe3dd59..9f27bd8e3d 100644 --- a/orte/mca/plm/base/plm_base_receive.c +++ b/orte/mca/plm/base/plm_base_receive.c @@ -106,7 +106,7 @@ void orte_plm_base_receive_process_msg(int fd, short event, void *data) orte_std_cntr_t count; orte_jobid_t job; orte_job_t *jdata; - opal_buffer_t answer, xchg; + opal_buffer_t answer; orte_vpid_t vpid; orte_proc_t **procs; orte_proc_state_t state; @@ -145,24 +145,6 @@ void orte_plm_base_receive_process_msg(int fd, short event, void *data) /* if the child is an ORTE job, wait for the procs to report they are alive */ if (!(jdata->controls & ORTE_JOB_CONTROL_NON_ORTE_JOB)) { ORTE_PROGRESSED_WAIT(false, jdata->num_reported, jdata->num_procs); - /* pack the update command */ - OBJ_CONSTRUCT(&xchg, opal_buffer_t); - cmd = ORTE_RML_UPDATE_CMD; - if (ORTE_SUCCESS != (rc = opal_dss.pack(&xchg, &cmd, 1, ORTE_RML_CMD))) { - ORTE_ERROR_LOG(rc); - goto ANSWER_LAUNCH; - } - /* get the contact data of the child job */ - if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(job, &xchg))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&xchg); - goto ANSWER_LAUNCH; - } - /* send it to the parents */ - if (ORTE_SUCCESS != (ret = orte_grpcomm.xcast(mev->sender.jobid, &xchg, ORTE_RML_TAG_RML_INFO_UPDATE))) { - ORTE_ERROR_LOG(rc); - } - OBJ_DESTRUCT(&xchg); } ANSWER_LAUNCH: