1
1

Fix an issue with comm_spawn over who sent/recv first in the modex. The modex assumes that the first name on the list is the "root" that will serve as the allgather collector/distributor. The dpm was putting that entity last, which forced us to pre-inform the parent procs of the child proc's contact info since the parent was trying to send to the child.

Clarify the setting of send_first in the mpi bindings (trivial, i know, but helpful)

Remove the extra xcast of child contact info to the parent job.

This commit was SVN r17952.
Этот коммит содержится в:
Ralph Castain 2008-03-25 14:57:34 +00:00
родитель f1e3045296
Коммит 90107f3c14
6 изменённых файлов: 62 добавлений и 47 удалений

Просмотреть файл

@ -38,19 +38,22 @@
BEGIN_C_DECLS
/* OMPI port definitions */
#define OMPI_RML_TAG_BASE ORTE_RML_TAG_MAX
#define OMPI_RML_TAG_BASE ORTE_RML_TAG_MAX
#define OMPI_RML_TAG_UDAPL OMPI_RML_TAG_BASE+1
#define OMPI_RML_TAG_OPENIB OMPI_RML_TAG_BASE+2
#define OMPI_RML_TAG_XOPENIB OMPI_RML_TAG_BASE+3
#define OMPI_RML_TAG_COMM_CID_INTRA OMPI_RML_TAG_BASE+4
#define OMPI_RML_TAG_XOOB OMPI_RML_TAG_BASE+5
#define OMPI_RML_TAG_SM_BACK_FILE_CREATED OMPI_RML_TAG_BASE+6
#define OMPI_RML_TAG_WIREUP OMPI_RML_TAG_BASE+7
#define OMPI_CRCP_COORD_BOOKMARK_TAG OMPI_RML_TAG_BASE+8
#define OMPI_COMM_JOIN_TAG OMPI_RML_TAG_BASE+9
#define OMPI_RML_TAG_UDAPL OMPI_RML_TAG_BASE+1
#define OMPI_RML_TAG_OPENIB OMPI_RML_TAG_BASE+2
#define OMPI_RML_TAG_XOPENIB OMPI_RML_TAG_BASE+3
#define OMPI_RML_TAG_COMM_CID_INTRA OMPI_RML_TAG_BASE+4
#define OMPI_RML_TAG_XOOB OMPI_RML_TAG_BASE+5
#define OMPI_RML_TAG_SM_BACK_FILE_CREATED OMPI_RML_TAG_BASE+6
#define OMPI_RML_TAG_WIREUP OMPI_RML_TAG_BASE+7
#define OMPI_CRCP_COORD_BOOKMARK_TAG OMPI_RML_TAG_BASE+8
#define OMPI_COMM_JOIN_TAG OMPI_RML_TAG_BASE+9
#define OMPI_RML_TAG_DYNAMIC OMPI_RML_TAG_BASE+200
/* support for shared memory collectives */
#define OMPI_RML_TAG_COLL_SM2_BACK_FILE_CREATED OMPI_RML_TAG_BASE+10
#define OMPI_RML_TAG_DYNAMIC OMPI_RML_TAG_BASE+200
/*

Просмотреть файл

@ -90,9 +90,10 @@ static int connect_accept ( ompi_communicator_t *comm, int root,
OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_output,
"%s dpm:orte:connect_accept with port %s",
"%s dpm:orte:connect_accept with port %s %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(port)));
ORTE_NAME_PRINT(port),
send_first ? "sending first" : "recv first"));
size = ompi_comm_size ( comm );
rank = ompi_comm_rank ( comm );
@ -112,6 +113,11 @@ static int connect_accept ( ompi_communicator_t *comm, int root,
sizeof (ompi_proc_t *));
for(i=0 ; i<group->grp_proc_count ; i++)
proc_list[i] = ompi_group_peer_lookup(group,i);
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
"%s dpm:orte:connect_accept adding %s to proc list",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&proc_list[i]->proc_name)));
}
if ( OMPI_COMM_JOIN_TAG != tag ) {
@ -160,9 +166,17 @@ static int connect_accept ( ompi_communicator_t *comm, int root,
/* Exchange the number and the list of processes in the groups */
if ( send_first ) {
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
"%s dpm:orte:connect_accept sending first to %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(rport)));
rc = orte_rml.send_buffer(rport, nbuf, tag, 0);
rc = orte_rml.recv_buffer(rport, nrbuf, tag, 0);
} else {
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
"%s dpm:orte:connect_accept recving first from %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(rport)));
rc = orte_rml.recv_buffer(rport, nrbuf, tag, 0);
rc = orte_rml.send_buffer(rport, nbuf, tag, 0);
}
@ -237,29 +251,45 @@ static int connect_accept ( ompi_communicator_t *comm, int root,
OBJ_CONSTRUCT(&all_procs, opal_list_t);
if (send_first) {
for (i = 0 ; i < group->grp_proc_count ; ++i) {
name = OBJ_NEW(orte_namelist_t);
name->name = ompi_group_peer_lookup(group, i)->proc_name;
opal_list_append(&all_procs, &name->item);
}
for (i = 0 ; i < rsize ; ++i) {
name = OBJ_NEW(orte_namelist_t);
name->name = rprocs[i]->proc_name;
opal_list_append(&all_procs, &name->item);
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
"%s dpm:orte:connect_accept send first adding %s to allgather list",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&name->name)));
}
for (i = 0 ; i < group->grp_proc_count ; ++i) {
name = OBJ_NEW(orte_namelist_t);
name->name = ompi_group_peer_lookup(group, i)->proc_name;
opal_list_append(&all_procs, &name->item);
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
"%s dpm:orte:connect_accept send first adding %s to allgather list",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&name->name)));
}
} else {
for (i = 0 ; i < rsize ; ++i) {
name = OBJ_NEW(orte_namelist_t);
name->name = rprocs[i]->proc_name;
opal_list_append(&all_procs, &name->item);
}
for (i = 0 ; i < group->grp_proc_count ; ++i) {
name = OBJ_NEW(orte_namelist_t);
name->name = ompi_group_peer_lookup(group, i)->proc_name;
opal_list_append(&all_procs, &name->item);
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
"%s dpm:orte:connect_accept recv first adding %s to allgather list",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&name->name)));
}
for (i = 0 ; i < rsize ; ++i) {
name = OBJ_NEW(orte_namelist_t);
name->name = rprocs[i]->proc_name;
opal_list_append(&all_procs, &name->item);
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_output,
"%s dpm:orte:connect_accept recv first adding %s to allgather list",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&name->name)));
}
}
if (OMPI_SUCCESS != (rc = orte_grpcomm.modex(&all_procs))) {

Просмотреть файл

@ -39,7 +39,7 @@ int MPI_Comm_accept(char *port_name, MPI_Info info, int root,
MPI_Comm comm, MPI_Comm *newcomm)
{
int rank, rc;
int send_first=0; /*wrong, we receive first */
bool send_first=false; /* we receive first */
ompi_communicator_t *newcomp=MPI_COMM_NULL;
char *tmp_port=NULL;
orte_rml_tag_t tag;

Просмотреть файл

@ -43,7 +43,7 @@ int MPI_Comm_connect(char *port_name, MPI_Info info, int root,
MPI_Comm comm, MPI_Comm *newcomm)
{
int rank, rc;
int send_first=1; /* yes, we are the active part in this game */
bool send_first=true; /* yes, we are the active part in this game */
ompi_communicator_t *newcomp=MPI_COMM_NULL;
orte_process_name_t port_proc_name;
char *tmp_port=NULL;

Просмотреть файл

@ -43,7 +43,7 @@ int MPI_Comm_spawn_multiple(int count, char **array_of_commands, char ***array_o
{
int i=0, rc=0, rank=0, flag;
ompi_communicator_t *newcomp=NULL;
int send_first=0; /* they are contacting us first */
bool send_first=false; /* they are contacting us first */
char port_name[MPI_MAX_PORT_NAME];
char *tmp_port;
orte_rml_tag_t tag = 0;

Просмотреть файл

@ -106,7 +106,7 @@ void orte_plm_base_receive_process_msg(int fd, short event, void *data)
orte_std_cntr_t count;
orte_jobid_t job;
orte_job_t *jdata;
opal_buffer_t answer, xchg;
opal_buffer_t answer;
orte_vpid_t vpid;
orte_proc_t **procs;
orte_proc_state_t state;
@ -145,24 +145,6 @@ void orte_plm_base_receive_process_msg(int fd, short event, void *data)
/* if the child is an ORTE job, wait for the procs to report they are alive */
if (!(jdata->controls & ORTE_JOB_CONTROL_NON_ORTE_JOB)) {
ORTE_PROGRESSED_WAIT(false, jdata->num_reported, jdata->num_procs);
/* pack the update command */
OBJ_CONSTRUCT(&xchg, opal_buffer_t);
cmd = ORTE_RML_UPDATE_CMD;
if (ORTE_SUCCESS != (rc = opal_dss.pack(&xchg, &cmd, 1, ORTE_RML_CMD))) {
ORTE_ERROR_LOG(rc);
goto ANSWER_LAUNCH;
}
/* get the contact data of the child job */
if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(job, &xchg))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&xchg);
goto ANSWER_LAUNCH;
}
/* send it to the parents */
if (ORTE_SUCCESS != (ret = orte_grpcomm.xcast(mev->sender.jobid, &xchg, ORTE_RML_TAG_RML_INFO_UPDATE))) {
ORTE_ERROR_LOG(rc);
}
OBJ_DESTRUCT(&xchg);
}
ANSWER_LAUNCH: