Using MPI_* functions in iallreduce can cause comm-spawned processes to
crash. Update libnbc's iallreduce function to use ompi_* functions instead. cmr=v1.7.4:reviewer=brbarret This commit was SVN r29582.
Этот коммит содержится в:
родитель
ee7510b025
Коммит
c71125acfd
@ -11,6 +11,9 @@
|
||||
*
|
||||
*/
|
||||
#include "nbc_internal.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "ompi/datatype/ompi_datatype.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype datatype, void *sendbuf, void *recvbuf, MPI_Op op, NBC_Schedule *schedule, NBC_Handle *handle);
|
||||
@ -39,9 +42,10 @@ int ompi_coll_libnbc_iallreduce(void* sendbuf, void* recvbuf, int count, MPI_Dat
|
||||
struct ompi_communicator_t *comm, ompi_request_t ** request,
|
||||
struct mca_coll_base_module_2_0_0_t *module)
|
||||
{
|
||||
int rank, p, res, size;
|
||||
MPI_Aint ext;
|
||||
int rank, p, res;
|
||||
OPAL_PTRDIFF_TYPE ext, lb;
|
||||
NBC_Schedule *schedule;
|
||||
size_t size;
|
||||
#ifdef NBC_CACHE_SCHEDULE
|
||||
NBC_Allreduce_args *args, *found, search;
|
||||
#endif
|
||||
@ -56,14 +60,13 @@ int ompi_coll_libnbc_iallreduce(void* sendbuf, void* recvbuf, int count, MPI_Dat
|
||||
res = NBC_Init_handle(comm, coll_req, libnbc_module);
|
||||
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
|
||||
handle = (*coll_req);
|
||||
res = MPI_Comm_rank(comm, &rank);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
|
||||
res = MPI_Comm_size(comm, &p);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; }
|
||||
res = MPI_Type_extent(datatype, &ext);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
|
||||
res = MPI_Type_size(datatype, &size);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_size() (%i)\n", res); return res; }
|
||||
|
||||
rank = ompi_comm_rank (comm);
|
||||
p = ompi_comm_size (comm);
|
||||
res = ompi_datatype_get_extent(datatype, &lb, &ext);
|
||||
if (OMPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
|
||||
res = ompi_datatype_type_size (datatype, &size);
|
||||
if (OMPI_SUCCESS != res) { printf("MPI Error in MPI_Type_size() (%i)\n", res); return res; }
|
||||
|
||||
handle->tmpbuf = malloc(ext*count);
|
||||
if(handle->tmpbuf == NULL) { printf("Error in malloc() (%i)\n", res); return NBC_OOR; }
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include "coll_libnbc.h"
|
||||
#include "ompi/include/ompi/constants.h"
|
||||
#include "ompi/request/request.h"
|
||||
#include "ompi/datatype/ompi_datatype.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
@ -479,14 +480,14 @@ static inline int NBC_Type_intrinsic(MPI_Datatype type) {
|
||||
/* let's give a try to inline functions */
|
||||
static inline int NBC_Copy(void *src, int srccount, MPI_Datatype srctype, void *tgt, int tgtcount, MPI_Datatype tgttype, MPI_Comm comm) {
|
||||
int size, pos, res;
|
||||
MPI_Aint ext;
|
||||
OPAL_PTRDIFF_TYPE ext, lb;
|
||||
void *packbuf;
|
||||
|
||||
if((srctype == tgttype) && NBC_Type_intrinsic(srctype)) {
|
||||
/* if we have the same types and they are contiguous (intrinsic
|
||||
* types are contiguous), we can just use a single memcpy */
|
||||
res = MPI_Type_extent(srctype, &ext);
|
||||
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
|
||||
res = ompi_datatype_get_extent(srctype, &lb, &ext);
|
||||
if (OMPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
|
||||
memcpy(tgt, src, srccount*ext);
|
||||
} else {
|
||||
/* we have to pack and unpack */
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user