From c71125acfd5a3abfc4f98904b275d1867259d016 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Fri, 1 Nov 2013 16:45:54 +0000 Subject: [PATCH] Using MPI_* functions in iallreduce can cause comm-spawned processes to crash. Update libnbc's iallreduce function to use ompi_* functions instead. cmr=v1.7.4:reviewer=brbarret This commit was SVN r29582. --- ompi/mca/coll/libnbc/nbc_iallreduce.c | 23 +++++++++++++---------- ompi/mca/coll/libnbc/nbc_internal.h | 7 ++++--- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/ompi/mca/coll/libnbc/nbc_iallreduce.c b/ompi/mca/coll/libnbc/nbc_iallreduce.c index ab72a00be6..d463765c99 100644 --- a/ompi/mca/coll/libnbc/nbc_iallreduce.c +++ b/ompi/mca/coll/libnbc/nbc_iallreduce.c @@ -11,6 +11,9 @@ * */ #include "nbc_internal.h" +#include "ompi/communicator/communicator.h" +#include "ompi/datatype/ompi_datatype.h" + #include static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype datatype, void *sendbuf, void *recvbuf, MPI_Op op, NBC_Schedule *schedule, NBC_Handle *handle); @@ -39,9 +42,10 @@ int ompi_coll_libnbc_iallreduce(void* sendbuf, void* recvbuf, int count, MPI_Dat struct ompi_communicator_t *comm, ompi_request_t ** request, struct mca_coll_base_module_2_0_0_t *module) { - int rank, p, res, size; - MPI_Aint ext; + int rank, p, res; + OPAL_PTRDIFF_TYPE ext, lb; NBC_Schedule *schedule; + size_t size; #ifdef NBC_CACHE_SCHEDULE NBC_Allreduce_args *args, *found, search; #endif @@ -56,14 +60,13 @@ int ompi_coll_libnbc_iallreduce(void* sendbuf, void* recvbuf, int count, MPI_Dat res = NBC_Init_handle(comm, coll_req, libnbc_module); if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; } handle = (*coll_req); - res = MPI_Comm_rank(comm, &rank); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; } - res = MPI_Comm_size(comm, &p); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; } - res = MPI_Type_extent(datatype, &ext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } - res = MPI_Type_size(datatype, &size); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_size() (%i)\n", res); return res; } + + rank = ompi_comm_rank (comm); + p = ompi_comm_size (comm); + res = ompi_datatype_get_extent(datatype, &lb, &ext); + if (OMPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + res = ompi_datatype_type_size (datatype, &size); + if (OMPI_SUCCESS != res) { printf("MPI Error in MPI_Type_size() (%i)\n", res); return res; } handle->tmpbuf = malloc(ext*count); if(handle->tmpbuf == NULL) { printf("Error in malloc() (%i)\n", res); return NBC_OOR; } diff --git a/ompi/mca/coll/libnbc/nbc_internal.h b/ompi/mca/coll/libnbc/nbc_internal.h index f6d2c9a9a2..002b051777 100644 --- a/ompi/mca/coll/libnbc/nbc_internal.h +++ b/ompi/mca/coll/libnbc/nbc_internal.h @@ -22,6 +22,7 @@ #include "coll_libnbc.h" #include "ompi/include/ompi/constants.h" #include "ompi/request/request.h" +#include "ompi/datatype/ompi_datatype.h" #include #include @@ -479,14 +480,14 @@ static inline int NBC_Type_intrinsic(MPI_Datatype type) { /* let's give a try to inline functions */ static inline int NBC_Copy(void *src, int srccount, MPI_Datatype srctype, void *tgt, int tgtcount, MPI_Datatype tgttype, MPI_Comm comm) { int size, pos, res; - MPI_Aint ext; + OPAL_PTRDIFF_TYPE ext, lb; void *packbuf; if((srctype == tgttype) && NBC_Type_intrinsic(srctype)) { /* if we have the same types and they are contiguous (intrinsic * types are contiguous), we can just use a single memcpy */ - res = MPI_Type_extent(srctype, &ext); - if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } + res = ompi_datatype_get_extent(srctype, &lb, &ext); + if (OMPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; } memcpy(tgt, src, srccount*ext); } else { /* we have to pack and unpack */