1
1

Yet one more fix to intercommunicator splitting logic.

Previous commit f2794740 reverts Nathans changes. However it turns out
that I was unable to trace his logic until I started investigation of
icsplit hang. Bug was triggered when splitting Intercom was giving a group
where on side of the communicator was empty (icsplit, intercom create #2).
in this case remote_size == 0 and there is no way to distinguish between
inter- and intra-communicator.
Conclusion: We do need to distinguish between intra- and inter-communicators.
So we should use ompi_mpi_group_null.group.
Этот коммит содержится в:
Artem Polyakov 2015-12-08 08:38:01 +02:00
родитель 63d8feb31c
Коммит 7690f4027a

Просмотреть файл

@ -148,7 +148,7 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm,
local_size = ompi_group_size (local_group);
}
if (NULL != remote_group) {
if ( (NULL != remote_group) && (&ompi_mpi_group_null.group != remote_group) ) {
remote_size = ompi_group_size (remote_group);
}
@ -177,10 +177,10 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm,
newcomm->c_my_rank = newcomm->c_local_group->grp_my_rank;
/* Set remote group and duplicate the local comm, if applicable */
if (0 < remote_size) {
if ( NULL != remote_group ) {
ompi_communicator_t *old_localcomm;
if (NULL == remote_group) {
if (&ompi_mpi_group_null.group == remote_group) {
ret = ompi_group_incl(oldcomm->c_remote_group, remote_size,
remote_ranks, &newcomm->c_remote_group);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
@ -432,7 +432,7 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key,
int rc=OMPI_SUCCESS;
ompi_communicator_t *newcomp = NULL;
int *lranks=NULL, *rranks=NULL;
ompi_group_t * local_group=NULL;
ompi_group_t * local_group=NULL, *remote_group=NULL;
ompi_comm_allgatherfct *allgatherfct=NULL;
@ -508,6 +508,7 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key,
/* Step 2: determine all the information for the remote group */
/* --------------------------------------------------------- */
if ( inter ) {
remote_group = &ompi_mpi_group_null.group;
rsize = comm->c_remote_group->grp_proc_count;
rresults = (int *) malloc ( rsize * 2 * sizeof(int));
if ( NULL == rresults ) {
@ -591,7 +592,7 @@ int ompi_comm_split( ompi_communicator_t* comm, int color, int key,
comm->error_handler,/* error handler */
pass_on_topo,
local_group, /* local group */
NULL); /* remote group */
remote_group); /* remote group */
if ( NULL == newcomp ) {
rc = MPI_ERR_INTERN;