1
1

Merge pull request #896 from hjelmn/comm_set_fix

ompi/comm: fix bug in ompi_comm_set
Этот коммит содержится в:
rhc54 2015-09-15 12:25:55 -07:00
родитель ca4be77ff1 9c45c63143
Коммит 3b4e982f86
3 изменённых файлов: 61 добавлений и 29 удалений

Просмотреть файл

@ -84,8 +84,8 @@ static int ompi_comm_copy_topo (ompi_communicator_t *oldcomm,
ompi_communicator_t *newcomm);
/* idup with local group and info. the local group support is provided to support ompi_comm_set_nb */
static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *group, ompi_info_t *info,
ompi_communicator_t **newcomm, ompi_request_t **req);
static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *group, ompi_group_t *remote_group,
ompi_info_t *info, ompi_communicator_t **newcomm, ompi_request_t **req);
/**********************************************************************/
@ -144,6 +144,14 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm,
ompi_communicator_t *newcomm = NULL;
int ret;
if (NULL != local_group) {
local_size = ompi_group_size (local_group);
}
if (NULL != remote_group) {
remote_size = ompi_group_size (remote_group);
}
*req = NULL;
/* ompi_comm_allocate */
@ -170,6 +178,8 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm,
/* Set remote group and duplicate the local comm, if applicable */
if (0 < remote_size) {
ompi_communicator_t *old_localcomm;
if (NULL == remote_group || &ompi_mpi_group_null.group == remote_group) {
ret = ompi_group_incl(oldcomm->c_remote_group, remote_size,
remote_ranks, &newcomm->c_remote_group);
@ -181,18 +191,14 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm,
OBJ_RETAIN(newcomm->c_remote_group);
ompi_group_increment_proc_count(newcomm->c_remote_group);
}
}
if (0 < remote_size || &ompi_mpi_group_null.group == remote_group) {
newcomm->c_flags |= OMPI_COMM_INTER;
if ( OMPI_COMM_IS_INTRA(oldcomm) ) {
ompi_comm_idup(oldcomm, &newcomm->c_local_comm, req);
} else if (NULL == local_group) {
ompi_comm_idup(oldcomm->c_local_comm, &newcomm->c_local_comm, req);
} else {
old_localcomm = OMPI_COMM_IS_INTRA(oldcomm) ? oldcomm : oldcomm->c_local_comm;
/* NTH: use internal idup function that takes a local group argument */
ompi_comm_idup_internal (oldcomm->c_local_comm, local_group, NULL,
ompi_comm_idup_internal (old_localcomm, newcomm->c_local_group, NULL, NULL,
&newcomm->c_local_comm, req);
}
} else {
newcomm->c_remote_group = newcomm->c_local_group;
OBJ_RETAIN(newcomm->c_remote_group);
@ -267,7 +273,7 @@ int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group,
ompi_communicator_t **newcomm )
{
ompi_communicator_t *newcomp = NULL;
int rsize , lsize;
int rsize;
int mode,i,j;
int *allranks=NULL;
int *rranks=NULL;
@ -278,8 +284,6 @@ int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group,
return OMPI_ERR_BAD_PARAM;
}
lsize = group->grp_proc_count;
if ( OMPI_COMM_IS_INTER(comm) ) {
int tsize;
@ -336,7 +340,7 @@ int ompi_comm_create ( ompi_communicator_t *comm, ompi_group_t *group,
rc = ompi_comm_set ( &newcomp, /* new comm */
comm, /* old comm */
lsize, /* local_size */
0, /* local array size */
NULL, /* local_ranks */
rsize, /* remote_size */
rranks, /* remote_ranks */
@ -999,10 +1003,9 @@ int ompi_comm_dup ( ompi_communicator_t * comm, ompi_communicator_t **newcomm )
int ompi_comm_dup_with_info ( ompi_communicator_t * comm, ompi_info_t *info, ompi_communicator_t **newcomm )
{
ompi_communicator_t *newcomp = NULL;
int rsize = 0, mode = OMPI_COMM_CID_INTRA, rc = OMPI_SUCCESS;
int mode = OMPI_COMM_CID_INTRA, rc = OMPI_SUCCESS;
if ( OMPI_COMM_IS_INTER ( comm ) ){
rsize = comm->c_remote_group->grp_proc_count;
mode = OMPI_COMM_CID_INTER;
}
@ -1010,9 +1013,9 @@ int ompi_comm_dup_with_info ( ompi_communicator_t * comm, ompi_info_t *info, omp
rc = ompi_comm_set ( &newcomp, /* new comm */
comm, /* old comm */
comm->c_local_group->grp_proc_count, /* local_size */
0, /* local array size */
NULL, /* local_procs*/
rsize, /* remote_size */
0, /* remote array size */
NULL, /* remote_procs */
comm->c_keyhash, /* attrs */
comm->error_handler, /* error handler */
@ -1075,17 +1078,17 @@ int ompi_comm_idup (ompi_communicator_t *comm, ompi_communicator_t **newcomm, om
int ompi_comm_idup_with_info (ompi_communicator_t *comm, ompi_info_t *info, ompi_communicator_t **newcomm, ompi_request_t **req)
{
return ompi_comm_idup_internal (comm, comm->c_local_group, info, newcomm, req);
return ompi_comm_idup_internal (comm, comm->c_local_group, comm->c_remote_group, info, newcomm, req);
}
/* NTH: we need a way to idup with a smaller local group so this function takes a local group */
static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *group, ompi_info_t *info,
ompi_communicator_t **newcomm, ompi_request_t **req)
static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *group, ompi_group_t *remote_group,
ompi_info_t *info, ompi_communicator_t **newcomm, ompi_request_t **req)
{
struct ompi_comm_idup_with_info_context *context;
ompi_comm_request_t *request;
ompi_request_t *subreq[1];
int rsize = 0, rc;
int rc;
*newcomm = MPI_COMM_NULL;
@ -1106,15 +1109,15 @@ static int ompi_comm_idup_internal (ompi_communicator_t *comm, ompi_group_t *gro
rc = ompi_comm_set_nb (&context->newcomp, /* new comm */
comm, /* old comm */
group->grp_proc_count, /* local_size */
0, /* local array size */
NULL, /* local_procs */
rsize, /* remote_size */
0, /* remote array size */
NULL, /* remote_procs */
comm->c_keyhash, /* attrs */
comm->error_handler, /* error handler */
true, /* copy the topo */
group, /* local group */
comm->c_remote_group, /* remote group */
remote_group, /* remote group */
subreq); /* new subrequest */
if (NULL == context->newcomp) {
ompi_comm_request_return (request);

Просмотреть файл

@ -535,6 +535,18 @@ int ompi_comm_finalize (void);
/**
* This is THE routine, where all the communicator stuff
* is really set.
*
* @param[out] newcomm new ompi communicator object
* @param[in] oldcomm old communicator
* @param[in] local_size size of local_ranks array
* @param[in] local_ranks local ranks (not used if local_group != NULL)
* @param[in] remote_size size of remote_ranks array
* @param[in] remote_ranks remote ranks (intercomm) (not used if remote_group != NULL)
* @param[in] attr attributes (can be NULL)
* @param[in] errh error handler
* @param[in] copy_topocomponent whether to copy the topology
* @param[in] local_group local process group (may be NULL if local_ranks array supplied)
* @param[in] remote_group remote process group (may be NULL)
*/
OMPI_DECLSPEC int ompi_comm_set ( ompi_communicator_t** newcomm,
ompi_communicator_t* oldcomm,
@ -548,6 +560,23 @@ OMPI_DECLSPEC int ompi_comm_set ( ompi_communicator_t** newcomm,
ompi_group_t *local_group,
ompi_group_t *remote_group );
/**
* This is THE routine, where all the communicator stuff
* is really set. Non-blocking version.
*
* @param[out] newcomm new ompi communicator object
* @param[in] oldcomm old communicator
* @param[in] local_size size of local_ranks array
* @param[in] local_ranks local ranks (not used if local_group != NULL)
* @param[in] remote_size size of remote_ranks array
* @param[in] remote_ranks remote ranks (intercomm) (not used if remote_group != NULL)
* @param[in] attr attributes (can be NULL)
* @param[in] errh error handler
* @param[in] copy_topocomponent whether to copy the topology
* @param[in] local_group local process group (may be NULL if local_ranks array supplied)
* @param[in] remote_group remote process group (may be NULL)
* @param[out] req ompi_request_t object for tracking completion
*/
OMPI_DECLSPEC int ompi_comm_set_nb ( ompi_communicator_t **ncomm,
ompi_communicator_t *oldcomm,
int local_size,

Просмотреть файл

@ -1297,7 +1297,7 @@ static bool ompi_dpm_group_is_dyn (ompi_group_t *group, ompi_jobid_t thisjobid)
{
int size = group ? ompi_group_size (group) : 0;
for (int i = 1 ; i < size ; ++i) {
for (int i = 0 ; i < size ; ++i) {
opal_process_name_t name = ompi_group_get_proc_name (group, i);
if (thisjobid != ((ompi_process_name_t *) &name)->jobid) {