From 2f67f29b858ab4966621485b2ecf40c0ccd4c534 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Fri, 10 Oct 2014 11:29:06 +0900 Subject: [PATCH 1/3] Revert "coll/basic: fix segmentation fault in neighborhood collectives if the degree" This reverts commit 9c788ff9400960823637dc0eebb3a8640414fa05. --- ompi/mca/coll/basic/coll_basic.h | 21 +------------------ ompi/mca/coll/basic/coll_basic_module.c | 13 +++++------- .../basic/coll_basic_neighbor_allgather.c | 20 +----------------- .../basic/coll_basic_neighbor_allgatherv.c | 20 +----------------- .../coll/basic/coll_basic_neighbor_alltoall.c | 20 +----------------- .../basic/coll_basic_neighbor_alltoallv.c | 20 +----------------- .../basic/coll_basic_neighbor_alltoallw.c | 19 +---------------- 7 files changed, 11 insertions(+), 122 deletions(-) diff --git a/ompi/mca/coll/basic/coll_basic.h b/ompi/mca/coll/basic/coll_basic.h index 425dd3066d..ca0b6d558f 100644 --- a/ompi/mca/coll/basic/coll_basic.h +++ b/ompi/mca/coll/basic/coll_basic.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -354,25 +354,6 @@ struct mca_coll_basic_module_t { int mccb_num_reqs; }; typedef struct mca_coll_basic_module_t mca_coll_basic_module_t; - -static inline int mca_coll_basic_check_for_requests (mca_coll_basic_module_t *basic_module, int max_reqs) -{ - if (basic_module->mccb_num_reqs < max_reqs) { - void *tmp; - - basic_module->mccb_num_reqs = max_reqs; - - tmp = realloc (basic_module->mccb_reqs, sizeof(ompi_request_t *) * basic_module->mccb_num_reqs); - if (NULL == tmp) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - basic_module->mccb_reqs = tmp; - } - - return OMPI_SUCCESS; -} - OBJ_CLASS_DECLARATION(mca_coll_basic_module_t); END_C_DECLS diff --git a/ompi/mca/coll/basic/coll_basic_module.c b/ompi/mca/coll/basic/coll_basic_module.c index cb9b0d769b..8bb929a1f1 100644 --- a/ompi/mca/coll/basic/coll_basic_module.c +++ b/ompi/mca/coll/basic/coll_basic_module.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -55,7 +55,7 @@ mca_coll_base_module_t * mca_coll_basic_comm_query(struct ompi_communicator_t *comm, int *priority) { - int size, ret; + int size; mca_coll_basic_module_t *basic_module; basic_module = OBJ_NEW(mca_coll_basic_module_t); @@ -70,12 +70,9 @@ mca_coll_basic_comm_query(struct ompi_communicator_t *comm, } else { size = ompi_comm_size(comm); } - - ret = mca_coll_basic_check_for_requests (basic_module, size * 2); - if (OMPI_SUCCESS != ret) { - OBJ_RELEASE(basic_module); - return NULL; - } + basic_module->mccb_num_reqs = size * 2; + basic_module->mccb_reqs = (ompi_request_t**) + malloc(sizeof(ompi_request_t *) * basic_module->mccb_num_reqs); /* Choose whether to use [intra|inter], and [linear|log]-based * algorithms. */ diff --git a/ompi/mca/coll/basic/coll_basic_neighbor_allgather.c b/ompi/mca/coll/basic/coll_basic_neighbor_allgather.c index 31aba0d4bf..8d8242dc9b 100644 --- a/ompi/mca/coll/basic/coll_basic_neighbor_allgather.c +++ b/ompi/mca/coll/basic/coll_basic_neighbor_allgather.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -49,12 +49,6 @@ mca_coll_basic_neighbor_allgather_cart(const void *sbuf, int scount, ptrdiff_t lb, extent; int rc = MPI_SUCCESS, dim, nreqs; - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, cart->ndims * 4); - if (OMPI_SUCCESS != rc) { - return rc; - } - ompi_datatype_get_extent(rdtype, &lb, &extent); /* The ordering is defined as -1 then +1 in each dimension in @@ -132,12 +126,6 @@ mca_coll_basic_neighbor_allgather_graph(const void *sbuf, int scount, mca_topo_base_graph_neighbors_count (comm, rank, °ree); - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, degree * 2); - if (OMPI_SUCCESS != rc) { - return rc; - } - edges = graph->edges; if (rank > 0) { edges += graph->index[rank - 1]; @@ -185,12 +173,6 @@ mca_coll_basic_neighbor_allgather_dist_graph(const void *sbuf, int scount, indegree = dist_graph->indegree; outdegree = dist_graph->outdegree; - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, indegree + outdegree); - if (OMPI_SUCCESS != rc) { - return rc; - } - inedges = dist_graph->in; outedges = dist_graph->out; diff --git a/ompi/mca/coll/basic/coll_basic_neighbor_allgatherv.c b/ompi/mca/coll/basic/coll_basic_neighbor_allgatherv.c index 89e4d584af..cdcf91de95 100644 --- a/ompi/mca/coll/basic/coll_basic_neighbor_allgatherv.c +++ b/ompi/mca/coll/basic/coll_basic_neighbor_allgatherv.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -48,12 +48,6 @@ mca_coll_basic_neighbor_allgatherv_cart(const void *sbuf, int scount, struct omp ptrdiff_t lb, extent; int rc = MPI_SUCCESS, dim, i, nreqs; - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, cart->ndims * 4); - if (OMPI_SUCCESS != rc) { - return rc; - } - ompi_datatype_get_extent(rdtype, &lb, &extent); reqs = basic_module->mccb_reqs; @@ -119,12 +113,6 @@ mca_coll_basic_neighbor_allgatherv_graph(const void *sbuf, int scount, struct om mca_topo_base_graph_neighbors_count (comm, rank, °ree); - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, degree * 2); - if (OMPI_SUCCESS != rc) { - return rc; - } - edges = graph->edges; if (rank > 0) { edges += graph->index[rank - 1]; @@ -170,12 +158,6 @@ mca_coll_basic_neighbor_allgatherv_dist_graph(const void *sbuf, int scount, stru indegree = dist_graph->indegree; outdegree = dist_graph->outdegree; - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, indegree + outdegree); - if (OMPI_SUCCESS != rc) { - return rc; - } - inedges = dist_graph->in; outedges = dist_graph->out; diff --git a/ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c b/ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c index 4517fd8f4e..289f60acbc 100644 --- a/ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c +++ b/ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -47,12 +47,6 @@ mca_coll_basic_neighbor_alltoall_cart(const void *sbuf, int scount, struct ompi_ ptrdiff_t lb, rdextent, sdextent; int rc = MPI_SUCCESS, dim, nreqs; - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, cart->ndims * 4); - if (OMPI_SUCCESS != rc) { - return rc; - } - ompi_datatype_get_extent(rdtype, &lb, &rdextent); ompi_datatype_get_extent(sdtype, &lb, &sdextent); @@ -149,12 +143,6 @@ mca_coll_basic_neighbor_alltoall_graph(const void *sbuf, int scount, struct ompi mca_topo_base_graph_neighbors_count (comm, rank, °ree); - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, degree * 2); - if (OMPI_SUCCESS != rc) { - return rc; - } - edges = graph->edges; if (rank > 0) { edges += graph->index[rank - 1]; @@ -205,12 +193,6 @@ mca_coll_basic_neighbor_alltoall_dist_graph(const void *sbuf, int scount,struct indegree = dist_graph->indegree; outdegree = dist_graph->outdegree; - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, indegree + outdegree); - if (OMPI_SUCCESS != rc) { - return rc; - } - inedges = dist_graph->in; outedges = dist_graph->out; diff --git a/ompi/mca/coll/basic/coll_basic_neighbor_alltoallv.c b/ompi/mca/coll/basic/coll_basic_neighbor_alltoallv.c index 3c909968cc..9ace900662 100644 --- a/ompi/mca/coll/basic/coll_basic_neighbor_alltoallv.c +++ b/ompi/mca/coll/basic/coll_basic_neighbor_alltoallv.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -48,12 +48,6 @@ mca_coll_basic_neighbor_alltoallv_cart(const void *sbuf, const int scounts[], co ptrdiff_t lb, rdextent, sdextent; ompi_request_t **reqs; - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, cart->ndims * 4); - if (OMPI_SUCCESS != rc) { - return rc; - } - ompi_datatype_get_extent(rdtype, &lb, &rdextent); ompi_datatype_get_extent(sdtype, &lb, &sdextent); @@ -136,12 +130,6 @@ mca_coll_basic_neighbor_alltoallv_graph(const void *sbuf, const int scounts[], c mca_topo_base_graph_neighbors_count (comm, rank, °ree); - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, 2 * degree); - if (OMPI_SUCCESS != rc) { - return rc; - } - edges = graph->edges; if (rank > 0) { edges += graph->index[rank - 1]; @@ -195,12 +183,6 @@ mca_coll_basic_neighbor_alltoallv_dist_graph(const void *sbuf, const int scounts indegree = dist_graph->indegree; outdegree = dist_graph->outdegree; - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, indegree + outdegree); - if (OMPI_SUCCESS != rc) { - return rc; - } - inedges = dist_graph->in; outedges = dist_graph->out; diff --git a/ompi/mca/coll/basic/coll_basic_neighbor_alltoallw.c b/ompi/mca/coll/basic/coll_basic_neighbor_alltoallw.c index dcf3592250..28ecf04cbb 100644 --- a/ompi/mca/coll/basic/coll_basic_neighbor_alltoallw.c +++ b/ompi/mca/coll/basic/coll_basic_neighbor_alltoallw.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -47,11 +47,6 @@ mca_coll_basic_neighbor_alltoallw_cart(const void *sbuf, const int scounts[], co int rc = MPI_SUCCESS, dim, i, nreqs; ompi_request_t **reqs; - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, cart->ndims * 4); - if (OMPI_SUCCESS != rc) { - return rc; - } /* post receives first */ for (dim = 0, i = 0, nreqs = 0, reqs = basic_module->mccb_reqs ; dim < cart->ndims ; ++dim, i += 2) { @@ -131,12 +126,6 @@ mca_coll_basic_neighbor_alltoallw_graph(const void *sbuf, const int scounts[], c mca_topo_base_graph_neighbors_count (comm, rank, °ree); - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, 2 * degree); - if (OMPI_SUCCESS != rc) { - return rc; - } - edges = graph->edges; if (rank > 0) { edges += graph->index[rank - 1]; @@ -186,12 +175,6 @@ mca_coll_basic_neighbor_alltoallw_dist_graph(const void *sbuf, const int scounts indegree = dist_graph->indegree; outdegree = dist_graph->outdegree; - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, indegree + outdegree); - if (OMPI_SUCCESS != rc) { - return rc; - } - inedges = dist_graph->in; outedges = dist_graph->out; From 76204dfafe48c2859fe3c8250e2d9f3c23799a76 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Fri, 10 Oct 2014 11:56:04 +0900 Subject: [PATCH 2/3] coll/basic: fix segmentation fault in neighborhood collectives if the degree of the topology is higher than the communicator size It is possible to have a topology degree higher than the size of the communicator. For example, a periodic cartesian communicator on MPI_COMM_SELF. This will leave the neighborhood collectives with a request buffer that is too small. This commits introduces a semantic change : from now, c_topo must be set before invoking coll_select --- ompi/mca/coll/basic/coll_basic_module.c | 35 ++++++++- ompi/mca/topo/base/topo_base_cart_create.c | 12 ++-- .../topo/base/topo_base_dist_graph_create.c | 71 +++++++++++++++---- ompi/mca/topo/base/topo_base_graph_create.c | 13 ++-- 4 files changed, 107 insertions(+), 24 deletions(-) diff --git a/ompi/mca/coll/basic/coll_basic_module.c b/ompi/mca/coll/basic/coll_basic_module.c index 8bb929a1f1..574a0e4f98 100644 --- a/ompi/mca/coll/basic/coll_basic_module.c +++ b/ompi/mca/coll/basic/coll_basic_module.c @@ -13,6 +13,8 @@ * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2014 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,6 +30,8 @@ #include "mpi.h" #include "ompi/mca/coll/coll.h" #include "ompi/mca/coll/base/base.h" +#include "ompi/mca/topo/topo.h" +#include "ompi/mca/topo/base/base.h" #include "coll_basic.h" @@ -70,7 +74,36 @@ mca_coll_basic_comm_query(struct ompi_communicator_t *comm, } else { size = ompi_comm_size(comm); } - basic_module->mccb_num_reqs = size * 2; + size *= 2; + if (OMPI_COMM_IS_CART(comm)) { + int cart_size; + mca_topo_base_comm_cart_2_2_0_t *cart; + assert (NULL != comm->c_topo); + cart = comm->c_topo->mtc.cart; + cart_size = cart->ndims * 4; + if (cart_size > size) { + size = cart_size; + } + } else if (OMPI_COMM_IS_GRAPH(comm)) { + int rank, degree; + assert (NULL != comm->c_topo); + rank = ompi_comm_rank (comm); + mca_topo_base_graph_neighbors_count (comm, rank, °ree); + degree *= 2; + if (degree > size) { + size = degree; + } + } else if (OMPI_COMM_IS_DIST_GRAPH(comm)) { + int dist_graph_size; + mca_topo_base_comm_dist_graph_2_2_0_t *dist_graph; + assert (NULL != comm->c_topo); + dist_graph = comm->c_topo->mtc.dist_graph; + dist_graph_size = dist_graph->indegree + dist_graph->outdegree; + if (dist_graph_size > size) { + size = dist_graph_size; + } + } + basic_module->mccb_num_reqs = size; basic_module->mccb_reqs = (ompi_request_t**) malloc(sizeof(ompi_request_t *) * basic_module->mccb_num_reqs); diff --git a/ompi/mca/topo/base/topo_base_cart_create.c b/ompi/mca/topo/base/topo_base_cart_create.c index f0d8d92793..6a678da7ca 100644 --- a/ompi/mca/topo/base/topo_base_cart_create.c +++ b/ompi/mca/topo/base/topo_base_cart_create.c @@ -162,20 +162,24 @@ int mca_topo_base_cart_create(mca_topo_base_module_t *topo, return MPI_ERR_INTERN; } + assert(NULL == new_comm->c_topo); + assert(!(new_comm->c_flags & OMPI_COMM_CART)); + new_comm->c_topo = topo; + new_comm->c_topo->mtc.cart = cart; + new_comm->c_topo->reorder = reorder; + new_comm->c_flags |= OMPI_COMM_CART; ret = ompi_comm_enable(old_comm, new_comm, new_rank, num_procs, topo_procs); if (OMPI_SUCCESS != ret) { /* something wrong happened during setting the communicator */ + new_comm->c_topo = NULL; + new_comm->c_flags &= ~OMPI_COMM_CART; free(topo_procs); OBJ_RELEASE(cart); ompi_comm_free (&new_comm); return ret; } - new_comm->c_topo = topo; - new_comm->c_topo->mtc.cart = cart; - new_comm->c_topo->reorder = reorder; - new_comm->c_flags |= OMPI_COMM_CART; *comm_topo = new_comm; if( MPI_UNDEFINED == new_rank ) { diff --git a/ompi/mca/topo/base/topo_base_dist_graph_create.c b/ompi/mca/topo/base/topo_base_dist_graph_create.c index f8e6dab932..641b62ab83 100644 --- a/ompi/mca/topo/base/topo_base_dist_graph_create.c +++ b/ompi/mca/topo/base/topo_base_dist_graph_create.c @@ -288,28 +288,71 @@ int mca_topo_base_dist_graph_create(mca_topo_base_module_t* module, { int err; - if( OMPI_SUCCESS != (err = ompi_comm_create(comm_old, - comm_old->c_local_group, - newcomm)) ) { - OBJ_RELEASE(module); - return err; + ompi_proc_t **topo_procs = NULL; + int num_procs, ret, rank, i; + ompi_communicator_t *new_comm; + mca_topo_base_comm_dist_graph_2_2_0_t* topo; + num_procs = ompi_comm_size(comm_old); + rank = ompi_comm_rank(comm_old); + topo_procs = (ompi_proc_t**)malloc(num_procs * sizeof(ompi_proc_t *)); + if(OMPI_GROUP_IS_DENSE(comm_old->c_local_group)) { + memcpy(topo_procs, + comm_old->c_local_group->grp_proc_pointers, + num_procs * sizeof(ompi_proc_t *)); + } else { + for(i = 0 ; i < num_procs; i++) { + topo_procs[i] = ompi_group_peer_lookup(comm_old->c_local_group,i); + } + } + new_comm = ompi_comm_allocate(num_procs, 0); + if (NULL == new_comm) { + free(topo_procs); + return OMPI_ERR_OUT_OF_RESOURCE; } - - assert(NULL == (*newcomm)->c_topo); - (*newcomm)->c_topo = module; - (*newcomm)->c_topo->reorder = reorder; - (*newcomm)->c_flags |= OMPI_COMM_DIST_GRAPH; - err = mca_topo_base_dist_graph_distribute(module, - *newcomm, + comm_old, n, nodes, degrees, targets, weights, - &((*newcomm)->c_topo->mtc.dist_graph)); + &topo); if( OMPI_SUCCESS != err ) { + free(topo_procs); ompi_comm_free(newcomm); + return err; } - return err; + + assert(NULL == new_comm->c_topo); + new_comm->c_topo = module; + new_comm->c_topo->reorder = reorder; + new_comm->c_flags |= OMPI_COMM_DIST_GRAPH; + new_comm->c_topo->mtc.dist_graph = topo; + + ret = ompi_comm_enable(comm_old, new_comm, + rank, num_procs, topo_procs); + if (OMPI_SUCCESS != ret) { + if ( NULL != topo->in ) { + free(topo->in); + } + if ( NULL != topo->out ) { + free(topo->out); + } + if ( NULL != topo->inw ) { + free(topo->inw); + } + if ( NULL != topo->outw ) { + free(topo->outw); + } + free(topo); + free(topo_procs); + new_comm->c_topo = NULL; + new_comm->c_flags &= ~OMPI_COMM_DIST_GRAPH; + new_comm->c_topo->mtc.dist_graph = NULL; + ompi_comm_free (&new_comm); + return ret; + } + *newcomm = new_comm; + + return OMPI_SUCCESS; } static void mca_topo_base_comm_dist_graph_2_2_0_construct(mca_topo_base_comm_dist_graph_2_2_0_t * dist_graph) { diff --git a/ompi/mca/topo/base/topo_base_graph_create.c b/ompi/mca/topo/base/topo_base_graph_create.c index d83a9f4738..990ac7228f 100644 --- a/ompi/mca/topo/base/topo_base_graph_create.c +++ b/ompi/mca/topo/base/topo_base_graph_create.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. - * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -123,19 +123,22 @@ int mca_topo_base_graph_create(mca_topo_base_module_t *topo, return OMPI_ERR_OUT_OF_RESOURCE; } + new_comm->c_topo = topo; + new_comm->c_topo->mtc.graph = graph; + new_comm->c_flags |= OMPI_COMM_GRAPH; + new_comm->c_topo->reorder = reorder; + ret = ompi_comm_enable(old_comm, new_comm, new_rank, num_procs, topo_procs); if (OMPI_SUCCESS != ret) { + new_comm->c_topo = NULL; + new_comm->c_flags &= ~OMPI_COMM_GRAPH; free(topo_procs); OBJ_RELEASE(graph); ompi_comm_free (&new_comm); return ret; } - new_comm->c_topo = topo; - new_comm->c_topo->mtc.graph = graph; - new_comm->c_flags |= OMPI_COMM_GRAPH; - new_comm->c_topo->reorder = reorder; *comm_topo = new_comm; if( MPI_UNDEFINED == new_rank ) { From 27e4389259ff3117f6143ca4512bf0685a7a1abd Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Fri, 10 Oct 2014 16:07:20 +0900 Subject: [PATCH 3/3] * comment on communicator creation in mca_topo_base_dist_graph_create(...) * use accesors to retrieve topo info --- ompi/mca/coll/basic/coll_basic_module.c | 16 +++++----- .../topo/base/topo_base_dist_graph_create.c | 29 +++++++++++-------- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/ompi/mca/coll/basic/coll_basic_module.c b/ompi/mca/coll/basic/coll_basic_module.c index 574a0e4f98..80fb7b584e 100644 --- a/ompi/mca/coll/basic/coll_basic_module.c +++ b/ompi/mca/coll/basic/coll_basic_module.c @@ -76,11 +76,10 @@ mca_coll_basic_comm_query(struct ompi_communicator_t *comm, } size *= 2; if (OMPI_COMM_IS_CART(comm)) { - int cart_size; - mca_topo_base_comm_cart_2_2_0_t *cart; + int cart_size, ndims; assert (NULL != comm->c_topo); - cart = comm->c_topo->mtc.cart; - cart_size = cart->ndims * 4; + comm->c_topo->topo.cart.cartdim_get(comm, &ndims); + cart_size = ndims * 4; if (cart_size > size) { size = cart_size; } @@ -88,17 +87,16 @@ mca_coll_basic_comm_query(struct ompi_communicator_t *comm, int rank, degree; assert (NULL != comm->c_topo); rank = ompi_comm_rank (comm); - mca_topo_base_graph_neighbors_count (comm, rank, °ree); + comm->c_topo->topo.graph.graph_neighbors_count (comm, rank, °ree); degree *= 2; if (degree > size) { size = degree; } } else if (OMPI_COMM_IS_DIST_GRAPH(comm)) { - int dist_graph_size; - mca_topo_base_comm_dist_graph_2_2_0_t *dist_graph; + int dist_graph_size, inneighbors, outneighbors, weighted; assert (NULL != comm->c_topo); - dist_graph = comm->c_topo->mtc.dist_graph; - dist_graph_size = dist_graph->indegree + dist_graph->outdegree; + comm->c_topo->topo.dist_graph.dist_graph_neighbors_count(comm, &inneighbors, &outneighbors, &weighted); + dist_graph_size = inneighbors + outneighbors; if (dist_graph_size > size) { size = dist_graph_size; } diff --git a/ompi/mca/topo/base/topo_base_dist_graph_create.c b/ompi/mca/topo/base/topo_base_dist_graph_create.c index 641b62ab83..37e2b4adf8 100644 --- a/ompi/mca/topo/base/topo_base_dist_graph_create.c +++ b/ompi/mca/topo/base/topo_base_dist_graph_create.c @@ -287,23 +287,16 @@ int mca_topo_base_dist_graph_create(mca_topo_base_module_t* module, ompi_communicator_t **newcomm) { int err; - ompi_proc_t **topo_procs = NULL; int num_procs, ret, rank, i; ompi_communicator_t *new_comm; mca_topo_base_comm_dist_graph_2_2_0_t* topo; - num_procs = ompi_comm_size(comm_old); - rank = ompi_comm_rank(comm_old); + topo_procs = (ompi_proc_t**)malloc(num_procs * sizeof(ompi_proc_t *)); - if(OMPI_GROUP_IS_DENSE(comm_old->c_local_group)) { - memcpy(topo_procs, - comm_old->c_local_group->grp_proc_pointers, - num_procs * sizeof(ompi_proc_t *)); - } else { - for(i = 0 ; i < num_procs; i++) { - topo_procs[i] = ompi_group_peer_lookup(comm_old->c_local_group,i); - } + if (NULL == topo_procs) { + return OMPI_ERR_OUT_OF_RESOURCE; } + num_procs = ompi_comm_size(comm_old); new_comm = ompi_comm_allocate(num_procs, 0); if (NULL == new_comm) { free(topo_procs); @@ -317,10 +310,22 @@ int mca_topo_base_dist_graph_create(mca_topo_base_module_t* module, &topo); if( OMPI_SUCCESS != err ) { free(topo_procs); - ompi_comm_free(newcomm); + ompi_comm_free(&new_comm); return err; } + /* we cannot simply call ompi_comm_create because c_topo + must be set before invoking ompi_comm_enable */ + rank = ompi_comm_rank(comm_old); + if(OMPI_GROUP_IS_DENSE(comm_old->c_local_group)) { + memcpy(topo_procs, + comm_old->c_local_group->grp_proc_pointers, + num_procs * sizeof(ompi_proc_t *)); + } else { + for(i = 0 ; i < num_procs; i++) { + topo_procs[i] = ompi_group_peer_lookup(comm_old->c_local_group,i); + } + } assert(NULL == new_comm->c_topo); new_comm->c_topo = module; new_comm->c_topo->reorder = reorder;