1
1

coll/basic: fix segmentation fault in neighborhood collectives if the degree

of the topology is higher than the communicator size

It is possible to have a topology degree higher than the size of the communicator.
For example, a periodic cartesian communicator on MPI_COMM_SELF. This will leave
the neighborhood collectives with a request buffer that is too small. This commit
adds a call that will dynamically increase the size of the request buffer if it
is too small.

A better fix would be to create the topology *before* calling the coll_select
routine on a communicator. This will take some discussion and the solution will
not likely be ready anytime soon.

Thanks to Lisandro Dalcin for reporting this.

Original thread: http://www.open-mpi.org/community/lists/devel/2014/08/15713.php

cmr=v1.8.3:reviewer=jsquyres

This commit was SVN r32796.
Этот коммит содержится в:
Nathan Hjelm 2014-09-25 17:43:29 +00:00
родитель 53e012ae97
Коммит 9c788ff940
7 изменённых файлов: 122 добавлений и 11 удалений

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
@ -354,6 +354,25 @@ struct mca_coll_basic_module_t {
int mccb_num_reqs;
};
typedef struct mca_coll_basic_module_t mca_coll_basic_module_t;
static inline int mca_coll_basic_check_for_requests (mca_coll_basic_module_t *basic_module, int max_reqs)
{
if (basic_module->mccb_num_reqs < max_reqs) {
void *tmp;
basic_module->mccb_num_reqs = max_reqs;
tmp = realloc (basic_module->mccb_reqs, sizeof(ompi_request_t *) * basic_module->mccb_num_reqs);
if (NULL == tmp) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
basic_module->mccb_reqs = tmp;
}
return OMPI_SUCCESS;
}
OBJ_CLASS_DECLARATION(mca_coll_basic_module_t);
END_C_DECLS

Просмотреть файл

@ -11,7 +11,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
@ -55,7 +55,7 @@ mca_coll_base_module_t *
mca_coll_basic_comm_query(struct ompi_communicator_t *comm,
int *priority)
{
int size;
int size, ret;
mca_coll_basic_module_t *basic_module;
basic_module = OBJ_NEW(mca_coll_basic_module_t);
@ -70,9 +70,12 @@ mca_coll_basic_comm_query(struct ompi_communicator_t *comm,
} else {
size = ompi_comm_size(comm);
}
basic_module->mccb_num_reqs = size * 2;
basic_module->mccb_reqs = (ompi_request_t**)
malloc(sizeof(ompi_request_t *) * basic_module->mccb_num_reqs);
ret = mca_coll_basic_check_for_requests (basic_module, size * 2);
if (OMPI_SUCCESS != ret) {
OBJ_RELEASE(basic_module);
return NULL;
}
/* Choose whether to use [intra|inter], and [linear|log]-based
* algorithms. */

Просмотреть файл

@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
@ -49,6 +49,12 @@ mca_coll_basic_neighbor_allgather_cart(const void *sbuf, int scount,
ptrdiff_t lb, extent;
int rc = MPI_SUCCESS, dim, nreqs;
/* ensure we have enough storage for requests */
rc = mca_coll_basic_check_for_requests (basic_module, cart->ndims * 4);
if (OMPI_SUCCESS != rc) {
return rc;
}
ompi_datatype_get_extent(rdtype, &lb, &extent);
/* The ordering is defined as -1 then +1 in each dimension in
@ -126,6 +132,12 @@ mca_coll_basic_neighbor_allgather_graph(const void *sbuf, int scount,
mca_topo_base_graph_neighbors_count (comm, rank, &degree);
/* ensure we have enough storage for requests */
rc = mca_coll_basic_check_for_requests (basic_module, degree * 2);
if (OMPI_SUCCESS != rc) {
return rc;
}
edges = graph->edges;
if (rank > 0) {
edges += graph->index[rank - 1];
@ -173,6 +185,12 @@ mca_coll_basic_neighbor_allgather_dist_graph(const void *sbuf, int scount,
indegree = dist_graph->indegree;
outdegree = dist_graph->outdegree;
/* ensure we have enough storage for requests */
rc = mca_coll_basic_check_for_requests (basic_module, indegree + outdegree);
if (OMPI_SUCCESS != rc) {
return rc;
}
inedges = dist_graph->in;
outedges = dist_graph->out;

Просмотреть файл

@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
@ -48,6 +48,12 @@ mca_coll_basic_neighbor_allgatherv_cart(const void *sbuf, int scount, struct omp
ptrdiff_t lb, extent;
int rc = MPI_SUCCESS, dim, i, nreqs;
/* ensure we have enough storage for requests */
rc = mca_coll_basic_check_for_requests (basic_module, cart->ndims * 4);
if (OMPI_SUCCESS != rc) {
return rc;
}
ompi_datatype_get_extent(rdtype, &lb, &extent);
reqs = basic_module->mccb_reqs;
@ -113,6 +119,12 @@ mca_coll_basic_neighbor_allgatherv_graph(const void *sbuf, int scount, struct om
mca_topo_base_graph_neighbors_count (comm, rank, &degree);
/* ensure we have enough storage for requests */
rc = mca_coll_basic_check_for_requests (basic_module, degree * 2);
if (OMPI_SUCCESS != rc) {
return rc;
}
edges = graph->edges;
if (rank > 0) {
edges += graph->index[rank - 1];
@ -158,6 +170,12 @@ mca_coll_basic_neighbor_allgatherv_dist_graph(const void *sbuf, int scount, stru
indegree = dist_graph->indegree;
outdegree = dist_graph->outdegree;
/* ensure we have enough storage for requests */
rc = mca_coll_basic_check_for_requests (basic_module, indegree + outdegree);
if (OMPI_SUCCESS != rc) {
return rc;
}
inedges = dist_graph->in;
outedges = dist_graph->out;

Просмотреть файл

@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
@ -47,6 +47,12 @@ mca_coll_basic_neighbor_alltoall_cart(const void *sbuf, int scount, struct ompi_
ptrdiff_t lb, rdextent, sdextent;
int rc = MPI_SUCCESS, dim, nreqs;
/* ensure we have enough storage for requests */
rc = mca_coll_basic_check_for_requests (basic_module, cart->ndims * 4);
if (OMPI_SUCCESS != rc) {
return rc;
}
ompi_datatype_get_extent(rdtype, &lb, &rdextent);
ompi_datatype_get_extent(sdtype, &lb, &sdextent);
@ -143,6 +149,12 @@ mca_coll_basic_neighbor_alltoall_graph(const void *sbuf, int scount, struct ompi
mca_topo_base_graph_neighbors_count (comm, rank, &degree);
/* ensure we have enough storage for requests */
rc = mca_coll_basic_check_for_requests (basic_module, degree * 2);
if (OMPI_SUCCESS != rc) {
return rc;
}
edges = graph->edges;
if (rank > 0) {
edges += graph->index[rank - 1];
@ -193,6 +205,12 @@ mca_coll_basic_neighbor_alltoall_dist_graph(const void *sbuf, int scount,struct
indegree = dist_graph->indegree;
outdegree = dist_graph->outdegree;
/* ensure we have enough storage for requests */
rc = mca_coll_basic_check_for_requests (basic_module, indegree + outdegree);
if (OMPI_SUCCESS != rc) {
return rc;
}
inedges = dist_graph->in;
outedges = dist_graph->out;

Просмотреть файл

@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
@ -48,6 +48,12 @@ mca_coll_basic_neighbor_alltoallv_cart(const void *sbuf, const int scounts[], co
ptrdiff_t lb, rdextent, sdextent;
ompi_request_t **reqs;
/* ensure we have enough storage for requests */
rc = mca_coll_basic_check_for_requests (basic_module, cart->ndims * 4);
if (OMPI_SUCCESS != rc) {
return rc;
}
ompi_datatype_get_extent(rdtype, &lb, &rdextent);
ompi_datatype_get_extent(sdtype, &lb, &sdextent);
@ -130,6 +136,12 @@ mca_coll_basic_neighbor_alltoallv_graph(const void *sbuf, const int scounts[], c
mca_topo_base_graph_neighbors_count (comm, rank, &degree);
/* ensure we have enough storage for requests */
rc = mca_coll_basic_check_for_requests (basic_module, 2 * degree);
if (OMPI_SUCCESS != rc) {
return rc;
}
edges = graph->edges;
if (rank > 0) {
edges += graph->index[rank - 1];
@ -183,6 +195,12 @@ mca_coll_basic_neighbor_alltoallv_dist_graph(const void *sbuf, const int scounts
indegree = dist_graph->indegree;
outdegree = dist_graph->outdegree;
/* ensure we have enough storage for requests */
rc = mca_coll_basic_check_for_requests (basic_module, indegree + outdegree);
if (OMPI_SUCCESS != rc) {
return rc;
}
inedges = dist_graph->in;
outedges = dist_graph->out;

Просмотреть файл

@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
@ -47,6 +47,11 @@ mca_coll_basic_neighbor_alltoallw_cart(const void *sbuf, const int scounts[], co
int rc = MPI_SUCCESS, dim, i, nreqs;
ompi_request_t **reqs;
/* ensure we have enough storage for requests */
rc = mca_coll_basic_check_for_requests (basic_module, cart->ndims * 4);
if (OMPI_SUCCESS != rc) {
return rc;
}
/* post receives first */
for (dim = 0, i = 0, nreqs = 0, reqs = basic_module->mccb_reqs ; dim < cart->ndims ; ++dim, i += 2) {
@ -126,6 +131,12 @@ mca_coll_basic_neighbor_alltoallw_graph(const void *sbuf, const int scounts[], c
mca_topo_base_graph_neighbors_count (comm, rank, &degree);
/* ensure we have enough storage for requests */
rc = mca_coll_basic_check_for_requests (basic_module, 2 * degree);
if (OMPI_SUCCESS != rc) {
return rc;
}
edges = graph->edges;
if (rank > 0) {
edges += graph->index[rank - 1];
@ -175,6 +186,12 @@ mca_coll_basic_neighbor_alltoallw_dist_graph(const void *sbuf, const int scounts
indegree = dist_graph->indegree;
outdegree = dist_graph->outdegree;
/* ensure we have enough storage for requests */
rc = mca_coll_basic_check_for_requests (basic_module, indegree + outdegree);
if (OMPI_SUCCESS != rc) {
return rc;
}
inedges = dist_graph->in;
outedges = dist_graph->out;