1
1

Add basic algorithms for the remaining non-blocking collectives.

The algorithms are intended for MPI-3.0 compliance and are not
optimized. We should aim to add better algorithms in the future through
cheetah.

MPI_Iallreduce and MPI_Igatherv on intercommunicators are required for
MPI_Comm_idup support.

cmr=v1.7.4:reviewer=brbarret:ticket=trac:2715

This commit was SVN r29333.

The following Trac tickets were found above:
  Ticket 2715 --> https://svn.open-mpi.org/trac/ompi/ticket/2715
Этот коммит содержится в:
Nathan Hjelm 2013-10-02 14:26:23 +00:00
родитель 37d9a727f2
Коммит 7bedf62dd8
18 изменённых файлов: 1318 добавлений и 294 удалений

Просмотреть файл

@ -22,7 +22,6 @@
sources = \
coll_libnbc.h \
coll_libnbc_component.c \
coll_libnbc_not_implemented.c \
coll_libnbc_ireduce_scatter_block.c \
nbc.c \
nbc_internal.h \
@ -36,9 +35,11 @@ sources = \
nbc_iallreduce.c \
nbc_ialltoall.c \
nbc_ialltoallv.c \
nbc_ialltoallw.c \
nbc_ibarrier.c \
nbc_ibcast.c \
nbc_ibcast_inter.c \
nbc_iexscan.c \
nbc_igather.c \
nbc_igatherv.c \
nbc_ineighbor_allgather.c \

Просмотреть файл

@ -240,25 +240,22 @@ libnbc_module_enable(mca_coll_base_module_t *module,
int
ompi_coll_libnbc_progress(void)
{
opal_list_item_t *item;
ompi_coll_libnbc_request_t* request, *next;
if (opal_atomic_trylock(&mca_coll_libnbc_component.progress_lock)) return 0;
for (item = opal_list_get_first(&mca_coll_libnbc_component.active_requests) ;
item != opal_list_get_end(&mca_coll_libnbc_component.active_requests) ;
item = opal_list_get_next(item)) {
ompi_coll_libnbc_request_t* request = (ompi_coll_libnbc_request_t*) item;
OPAL_LIST_FOREACH_SAFE(request, next, &mca_coll_libnbc_component.active_requests,
ompi_coll_libnbc_request_t) {
if (NBC_OK == NBC_Progress(request)) {
/* done, remove and complete */
item = opal_list_remove_item(&mca_coll_libnbc_component.active_requests,
&request->super.super.super);
opal_list_remove_item(&mca_coll_libnbc_component.active_requests,
&request->super.super.super);
request->super.req_status.MPI_ERROR = OMPI_SUCCESS;
OPAL_THREAD_LOCK(&ompi_request_lock);
ompi_request_complete(&request->super, true);
OPAL_THREAD_UNLOCK(&ompi_request_lock);
}
item = opal_list_get_next(item);
}
opal_atomic_unlock(&mca_coll_libnbc_component.progress_lock);

Просмотреть файл

@ -5,6 +5,8 @@
* Copyright (c) 2006 The Technical University of Chemnitz. All
* rights reserved.
* Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
*
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
*
@ -146,3 +148,93 @@ int ompi_coll_libnbc_ireduce_scatter_block(void* sendbuf, void* recvbuf, int rec
/* tmpbuf is freed with the handle */
return NBC_OK;
}
int ompi_coll_libnbc_ireduce_scatter_block_inter(void *sbuf, void *rbuf, int rcount, struct ompi_datatype_t *dtype,
struct ompi_op_t *op, struct ompi_communicator_t *comm,
ompi_request_t **request, struct mca_coll_base_module_2_0_0_t *module) {
int peer, rank, res, count, rsize;
MPI_Aint ext;
NBC_Schedule *schedule;
NBC_Handle *handle;
ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
res = NBC_Init_handle(comm, coll_req, libnbc_module);
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
handle = (*coll_req);
res = MPI_Comm_rank(comm, &rank);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
res = MPI_Comm_remote_size(comm, &rsize);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; }
MPI_Type_extent(dtype, &ext);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule));
if (NULL == schedule) { printf("Error in malloc()\n"); return NBC_OOR; }
res = NBC_Sched_create(schedule);
if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; }
count = rcount * rsize;
handle->tmpbuf = malloc(2*ext*count);
if(handle->tmpbuf == NULL) { printf("Error in malloc()\n"); return NBC_OOR; }
/* send my data to the remote root */
res = NBC_Sched_send(sbuf, false, count, dtype, 0, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
if (0 == rank) {
res = NBC_Sched_recv((void *) 0, true, count, dtype, 0, schedule);
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
res = NBC_Sched_barrier(schedule);
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
for (peer = 1 ; peer < rsize ; ++peer) {
res = NBC_Sched_recv((void *)(ext * count), true, count, dtype, peer, schedule);
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
res = NBC_Sched_barrier(schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
res = NBC_Sched_op((void *) 0, true, (void *)(ext * count), true, (void *) 0, true, count, dtype, op, schedule);
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; }
res = NBC_Sched_barrier(schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
}
/* exchange data with remote root for scatter phase (we *could* use the local communicator to do the scatter) */
res = NBC_Sched_recv((void *)(ext * count), true, count, dtype, 0, schedule);
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
res = NBC_Sched_send((void *) 0, true, count, dtype, 0, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
res = NBC_Sched_barrier(schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
/* scatter */
for (peer = 0 ; peer < rsize ; ++peer) {
res = NBC_Sched_send((void *)(ext * (count + peer * rcount)), true, rcount, dtype, peer, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
}
}
/* receive my block */
res = NBC_Sched_recv(rbuf, true, rcount, dtype, 0, schedule);
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
/*NBC_PRINT_SCHED(*schedule);*/
res = NBC_Sched_commit(schedule);
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
res = NBC_Start(handle, schedule);
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; }
/* tmpbuf is freed with the handle */
return NBC_OK;
}

Просмотреть файл

@ -1,151 +0,0 @@
#include "ompi_config.h"
#include "coll_libnbc.h"
#include "ompi/constants.h"
int
ompi_coll_libnbc_ialltoallw(void *sbuf, int *scounts, int *sdisps, struct ompi_datatype_t **sdtypes,
void *rbuf, int *rcounts, int *rdisps, struct ompi_datatype_t **rdtypes,
struct ompi_communicator_t *comm, ompi_request_t **request,
struct mca_coll_base_module_2_0_0_t *module)
{
return OMPI_ERR_NOT_IMPLEMENTED;
}
int
ompi_coll_libnbc_iexscan(void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype,
struct ompi_op_t *op, struct ompi_communicator_t *comm, ompi_request_t **request,
struct mca_coll_base_module_2_0_0_t *module)
{
return OMPI_ERR_NOT_IMPLEMENTED;
}
int
ompi_coll_libnbc_iallgather_inter(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module)
{
return OMPI_ERR_NOT_IMPLEMENTED;
}
int
ompi_coll_libnbc_iallgatherv_inter(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int *recvcounts, int *displs,
MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module)
{
return OMPI_ERR_NOT_IMPLEMENTED;
}
int
ompi_coll_libnbc_iallreduce_inter(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module)
{
return OMPI_ERR_NOT_IMPLEMENTED;
}
int
ompi_coll_libnbc_ialltoall_inter(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module)
{
return OMPI_ERR_NOT_IMPLEMENTED;
}
int
ompi_coll_libnbc_ialltoallv_inter(void* sendbuf, int *sendcounts, int *sdispls,
MPI_Datatype sendtype, void* recvbuf, int *recvcounts, int *rdispls,
MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module)
{
return OMPI_ERR_NOT_IMPLEMENTED;
}
int
ompi_coll_libnbc_ialltoallw_inter(void *sbuf, int *scounts, int *sdisps, struct ompi_datatype_t **sdtypes,
void *rbuf, int *rcounts, int *rdisps, struct ompi_datatype_t **rdtypes,
struct ompi_communicator_t *comm, ompi_request_t **request,
struct mca_coll_base_module_2_0_0_t *module)
{
return OMPI_ERR_NOT_IMPLEMENTED;
}
int
ompi_coll_libnbc_ibarrier_inter(struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module)
{
return OMPI_ERR_NOT_IMPLEMENTED;
}
int
ompi_coll_libnbc_igather_inter(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
MPI_Datatype recvtype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module)
{
return OMPI_ERR_NOT_IMPLEMENTED;
}
int
ompi_coll_libnbc_igatherv_inter(void* sendbuf, int sendcount, MPI_Datatype sendtype,
void* recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype,
int root, struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module)
{
return OMPI_ERR_NOT_IMPLEMENTED;
}
int
ompi_coll_libnbc_ireduce_inter(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype,
MPI_Op op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module)
{
return OMPI_ERR_NOT_IMPLEMENTED;
}
int
ompi_coll_libnbc_ireduce_scatter_inter(void* sendbuf, void* recvbuf, int *recvcounts, MPI_Datatype datatype,
MPI_Op op, struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module)
{
return OMPI_ERR_NOT_IMPLEMENTED;
}
int
ompi_coll_libnbc_ireduce_scatter_block_inter(void *sbuf, void *rbuf, int rcount, struct ompi_datatype_t *dtype,
struct ompi_op_t *op, struct ompi_communicator_t *comm,
ompi_request_t **request, struct mca_coll_base_module_2_0_0_t *module)
{
return OMPI_ERR_NOT_IMPLEMENTED;
}
int ompi_coll_libnbc_iscatter_inter(void* sendbuf, int sendcount, MPI_Datatype sendtype,
void* recvbuf, int recvcount, MPI_Datatype recvtype, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module)
{
return OMPI_ERR_NOT_IMPLEMENTED;
}
int
ompi_coll_libnbc_iscatterv_inter(void* sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype,
void* recvbuf, int recvcount, MPI_Datatype recvtype, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module)
{
return OMPI_ERR_NOT_IMPLEMENTED;
}

Просмотреть файл

@ -135,3 +135,56 @@ int ompi_coll_libnbc_iallgather(void* sendbuf, int sendcount, MPI_Datatype sendt
return NBC_OK;
}
int ompi_coll_libnbc_iallgather_inter(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module)
{
int rank, res, r, rsize;
MPI_Aint rcvext;
NBC_Schedule *schedule;
char *rbuf;
NBC_Handle *handle;
ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
res = NBC_Init_handle(comm, coll_req, libnbc_module);
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
handle = (*coll_req);
res = MPI_Comm_rank(comm, &rank);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
res = MPI_Comm_remote_size(comm, &rsize);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; }
res = MPI_Type_extent(recvtype, &rcvext);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
handle->tmpbuf = NULL;
schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule));
if (NULL == schedule) { printf("Error in malloc()\n"); return res; }
res = NBC_Sched_create(schedule);
if(NBC_OK != res) { printf("Error in NBC_Sched_create, (%i)\n", res); return res; }
/* do rsize - 1 rounds */
for(r = 0 ; r < rsize ; ++r) {
/* recv from rank r */
rbuf = ((char *)recvbuf) + r*(recvcount*rcvext);
res = NBC_Sched_recv(rbuf, false, recvcount, recvtype, r, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
/* send to rank r */
res = NBC_Sched_send(sendbuf, false, recvcount, recvtype, r, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
}
res = NBC_Sched_commit(schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
/*NBC_PRINT_SCHED(*schedule);*/
res = NBC_Start(handle, schedule);
if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; }
return NBC_OK;
}

Просмотреть файл

@ -8,6 +8,8 @@
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
*
* Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
*
*/
#include "nbc_internal.h"
@ -87,3 +89,58 @@ int ompi_coll_libnbc_iallgatherv(void* sendbuf, int sendcount, MPI_Datatype send
return NBC_OK;
}
int ompi_coll_libnbc_iallgatherv_inter(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int *recvcounts, int *displs,
MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module)
{
int rank, res, r, rsize;
MPI_Aint rcvext;
NBC_Schedule *schedule;
NBC_Handle *handle;
ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
res = NBC_Init_handle(comm, coll_req, libnbc_module);
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
handle = (*coll_req);
res = MPI_Comm_rank(comm, &rank);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
res = MPI_Comm_remote_size(comm, &rsize);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; }
res = MPI_Type_extent(recvtype, &rcvext);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule));
if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; }
handle->tmpbuf=NULL;
res = NBC_Sched_create(schedule);
if(res != NBC_OK) { printf("Error in NBC_Sched_create, (%i)\n", res); return res; }
/* do rsize rounds */
for (r = 0 ; r < rsize ; ++r) {
char *rbuf = ((char *)recvbuf) + (displs[r]*rcvext);
if (recvcounts[r]) {
res = NBC_Sched_recv(rbuf, false, recvcounts[r], recvtype, r, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
}
}
if (sendcount) {
for (r = 0 ; r < rsize ; ++r) {
res = NBC_Sched_send(sendbuf, false, sendcount, sendtype, r, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
}
}
res = NBC_Sched_commit(schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
res = NBC_Start(handle, schedule);
if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; }
return NBC_OK;
}

Просмотреть файл

@ -1,9 +1,11 @@
/*
* Copyright (c) 2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2006 The Technical University of Chemnitz. All
* rights reserved.
* Copyright (c) 2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2006 The Technical University of Chemnitz. All
* rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
*
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
*
@ -13,26 +15,27 @@
static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype datatype, void *sendbuf, void *recvbuf, MPI_Op op, NBC_Schedule *schedule, NBC_Handle *handle);
static inline int allred_sched_ring(int rank, int p, int count, MPI_Datatype datatype, void *sendbuf, void *recvbuf, MPI_Op op, int size, int ext, NBC_Schedule *schedule, NBC_Handle *handle);
static inline int allred_sched_linear(int rank, int p, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int ext, int size, NBC_Schedule *schedule, NBC_Handle *handle);
#ifdef NBC_CACHE_SCHEDULE
/* tree comparison function for schedule cache */
int NBC_Allreduce_args_compare(NBC_Allreduce_args *a, NBC_Allreduce_args *b, void *param) {
if( (a->sendbuf == b->sendbuf) &&
if( (a->sendbuf == b->sendbuf) &&
(a->recvbuf == b->recvbuf) &&
(a->count == b->count) &&
(a->count == b->count) &&
(a->datatype == b->datatype) &&
(a->op == b->op) ) {
return 0;
}
if( a->sendbuf < b->sendbuf ) {
if( a->sendbuf < b->sendbuf ) {
return -1;
}
return +1;
}
return +1;
}
#endif
int ompi_coll_libnbc_iallreduce(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
int ompi_coll_libnbc_iallreduce(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module)
{
@ -49,7 +52,7 @@ int ompi_coll_libnbc_iallreduce(void* sendbuf, void* recvbuf, int count, MPI_Dat
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
NBC_IN_PLACE(sendbuf, recvbuf, inplace);
res = NBC_Init_handle(comm, coll_req, libnbc_module);
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
handle = (*coll_req);
@ -61,7 +64,7 @@ int ompi_coll_libnbc_iallreduce(void* sendbuf, void* recvbuf, int count, MPI_Dat
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
res = MPI_Type_size(datatype, &size);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_size() (%i)\n", res); return res; }
handle->tmpbuf = malloc(ext*count);
if(handle->tmpbuf == NULL) { printf("Error in malloc() (%i)\n", res); return NBC_OOR; }
@ -70,14 +73,14 @@ int ompi_coll_libnbc_iallreduce(void* sendbuf, void* recvbuf, int count, MPI_Dat
res = NBC_Copy(sendbuf, count, datatype, recvbuf, count, datatype, comm);
if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; }
}
/* algorithm selection */
if(p < 4 || size*count < 65536 || inplace) {
alg = NBC_ARED_BINOMIAL;
} else {
alg = NBC_ARED_RING;
}
#ifdef NBC_CACHE_SCHEDULE
/* search schedule in communicator specific tree */
search.sendbuf=sendbuf;
@ -103,10 +106,10 @@ int ompi_coll_libnbc_iallreduce(void* sendbuf, void* recvbuf, int count, MPI_Dat
break;
}
if (NBC_OK != res) { printf("Error in Schedule creation() (%i)\n", res); return res; }
res = NBC_Sched_commit(schedule);
if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
#ifdef NBC_CACHE_SCHEDULE
/* save schedule to tree */
args = (NBC_Allreduce_args*)malloc(sizeof(NBC_Allreduce_args));
@ -116,7 +119,7 @@ int ompi_coll_libnbc_iallreduce(void* sendbuf, void* recvbuf, int count, MPI_Dat
args->datatype=datatype;
args->op=op;
args->schedule=schedule;
res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_ALLREDUCE], args, args, 0);
res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_ALLREDUCE], args, args, 0);
if(res != 0) printf("error in dict_insert() (%i)\n", res);
/* increase number of elements for A2A */
if(++handle->comminfo->NBC_Dict_size[NBC_ALLREDUCE] > NBC_SCHED_DICT_UPPER) {
@ -127,10 +130,55 @@ int ompi_coll_libnbc_iallreduce(void* sendbuf, void* recvbuf, int count, MPI_Dat
schedule=found->schedule;
}
#endif
res = NBC_Start(handle, schedule);
if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; }
/* tmpbuf is freed with the handle */
return NBC_OK;
}
int ompi_coll_libnbc_iallreduce_inter(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module)
{
int rank, res, size, rsize;
MPI_Aint ext;
NBC_Schedule *schedule;
NBC_Handle *handle;
ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
res = NBC_Init_handle(comm, coll_req, libnbc_module);
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
handle = (*coll_req);
res = MPI_Comm_rank(comm, &rank);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
res = MPI_Comm_remote_size(comm, &rsize);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; }
res = MPI_Type_extent(datatype, &ext);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
res = MPI_Type_size(datatype, &size);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_size() (%i)\n", res); return res; }
handle->tmpbuf = malloc(ext*count);
if(handle->tmpbuf == NULL) { printf("Error in malloc() (%i)\n", res); return NBC_OOR; }
schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule));
if (NULL == schedule) { printf("Error in malloc()\n"); return res; }
res = NBC_Sched_create(schedule);
if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; }
res = allred_sched_linear(rank, rsize, sendbuf, recvbuf, count, datatype, op, ext, size, schedule, handle);
if (NBC_OK != res) { printf("Error in Schedule creation() (%i)\n", res); return res; }
res = NBC_Sched_commit(schedule);
if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
res = NBC_Start(handle, schedule);
if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; }
/* tmpbuf is freed with the handle */
return NBC_OK;
}
@ -139,25 +187,25 @@ int ompi_coll_libnbc_iallreduce(void* sendbuf, void* recvbuf, int count, MPI_Dat
/* binomial allreduce (binomial tree up and binomial bcast down)
* working principle:
* - each node gets a virtual rank vrank
* - the 'root' node get vrank 0
* - the 'root' node get vrank 0
* - node 0 gets the vrank of the 'root'
* - all other ranks stay identical (they do not matter)
*
* Algorithm:
* pairwise exchange
* round r:
* round r:
* grp = rank % 2^r
* if grp == 0: receive from rank + 2^(r-1) if it exists and reduce value
* if grp == 1: send to rank - 2^(r-1) and exit function
*
*
* do this for R=log_2(p) rounds
* followed by a Bcast:
* Algorithm:
* - each node with vrank > 2^r and vrank < 2^r+1 receives from node
* vrank - 2^r (vrank=1 receives from 0, vrank 0 receives never)
* - each node sends each round r to node vrank + 2^r
* - a node stops to send if 2^r > commsize
*
* - a node stops to send if 2^r > commsize
*
*/
#define RANK2VRANK(rank, vrank, root) \
{ \
@ -173,7 +221,7 @@ int ompi_coll_libnbc_iallreduce(void* sendbuf, void* recvbuf, int count, MPI_Dat
}
static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype datatype, void *sendbuf, void *recvbuf, MPI_Op op, NBC_Schedule *schedule, NBC_Handle *handle) {
int root, vrank, r, maxr, firstred, vpeer, peer, res;
root = 0; /* this makes the code for ireduce and iallreduce nearly identical - could be changed to improve performance */
RANK2VRANK(rank, vrank, root);
maxr = (int)ceil((log((double)p)/LOG2));
@ -219,8 +267,8 @@ static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype dat
break;
}
}
/* this is the Bcast part - copied with minor changes from nbc_ibcast.c
/* this is the Bcast part - copied with minor changes from nbc_ibcast.c
* changed: buffer -> recvbuf */
RANK2VRANK(rank, vrank, root);
@ -246,7 +294,7 @@ static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype dat
}
}
/* end of the bcast */
return NBC_OK;
}
@ -254,7 +302,7 @@ static inline int allred_sched_ring(int r, int p, int count, MPI_Datatype dataty
int i; /* runner */
int segsize, *segsizes, *segoffsets; /* segment sizes and offsets per segment (number of segments == number of nodes */
int speer, rpeer; /* send and recvpeer */
if(count == 0) return NBC_OK;
{
@ -285,9 +333,9 @@ static inline int allred_sched_ring(int r, int p, int count, MPI_Datatype dataty
* / -> sum (reduced in a previous step)
*
* *** round 0 ***
* 0 1 2
*
* 00 10 20 0: [1] -> 1
* 0 1 2
*
* 00 10 20 0: [1] -> 1
* 01 11 21 1: [2] -> 2
* 02 12 22 2: [0] -> 0 --> send element (r+1)%p to node (r+1)%p
*
@ -301,7 +349,7 @@ static inline int allred_sched_ring(int r, int p, int count, MPI_Datatype dataty
* *** round 2 ***
* 0 1 2
*
* 00/20 all 20 0: red(2), [2] -> 1
* 00/20 all 20 0: red(2), [2] -> 1
* 01 11/01 all 1: red(0), [0] -> 2
* all 12 22/12 2: red(1), [1] -> 0 --> reduce and send (r-1)%p to node (r+1)%p
*
@ -321,11 +369,11 @@ static inline int allred_sched_ring(int r, int p, int count, MPI_Datatype dataty
*
* -> 4
* *** round 0 ***
* 0 1 2 3
*
* 00 10 20 30 0: [1] -> 1
* 0 1 2 3
*
* 00 10 20 30 0: [1] -> 1
* 01 11 21 31 1: [2] -> 2
* 02 12 22 32 2: [3] -> 3
* 02 12 22 32 2: [3] -> 3
* 03 13 23 33 3: [0] -> 0 --> send element (r+1)%p to node (r+1)%p
*
* *** round 1 ***
@ -333,13 +381,13 @@ static inline int allred_sched_ring(int r, int p, int count, MPI_Datatype dataty
*
* 00+30 10 20 30 0: red(0), [0] -> 1
* 01 11+01 21 31 1: red(1), [1] -> 2
* 02 12 22+12 32 2: red(2), [2] -> 3
* 02 12 22+12 32 2: red(2), [2] -> 3
* 03 13 23 33+23 3: red(3), [3] -> 0 --> reduce and send element (r+0)%p to node (r+1)%p
*
* *** round 2 ***
* 0 1 2 3
*
* 00/30 10+00/30 20 30 0: red(3), [3] -> 1
* 00/30 10+00/30 20 30 0: red(3), [3] -> 1
* 01 11/01 21+11/01 31 1: red(0), [0] -> 2
* 02 12 22/12 32+22/12 2: red(1), [1] -> 3
* 03+33/23 13 23 33/23 3: red(2), [2] -> 0 --> reduce and send (r-1)%p to node (r+1)%p
@ -347,14 +395,14 @@ static inline int allred_sched_ring(int r, int p, int count, MPI_Datatype dataty
* *** round 3 ***
* 0 1 2 3
*
* 00/30 10/00/30 all 30 0: red(2), [2] -> 1
* 00/30 10/00/30 all 30 0: red(2), [2] -> 1
* 01 11/01 21/11/01 all 1: red(3), [3] -> 2
* all 12 22/12 32/22/12 2: red(0), [0] -> 3
* 03/33/23 all 23 33/23 3: red(1), [1] -> 0 --> reduce and send (r-2)%p to node (r+1)%p
* 03/33/23 all 23 33/23 3: red(1), [1] -> 0 --> reduce and send (r-2)%p to node (r+1)%p
*
* *** round 4 ***
* 0 1 2 3
*
* 0 1 2 3
*
* 00/30 10/00/30 all all 0: [1] -> 1
* all 11/01 21/11/01 all 1: [2] -> 2
* all all 22/12 32/22/12 2: [3] -> 3
@ -362,7 +410,7 @@ static inline int allred_sched_ring(int r, int p, int count, MPI_Datatype dataty
*
* *** round 5 ***
* 0 1 2 3
*
*
* all 10/00/30 all all 0: [0] -> 1
* all all 21/11/01 all 1: [1] -> 2
* all all all 32/22/12 2: [3] -> 3
@ -370,10 +418,10 @@ static inline int allred_sched_ring(int r, int p, int count, MPI_Datatype dataty
*
* *** round 6 ***
* 0 1 2 3
*
* all all all all
* all all all all
* all all all all
*
* all all all all
* all all all all
* all all all all
* all all all all receive element (r-1)%p
*
* 2p-2 rounds ... every node does p-1 reductions and p-1 sends
@ -412,9 +460,63 @@ static inline int allred_sched_ring(int r, int p, int count, MPI_Datatype dataty
NBC_Sched_send((char*)recvbuf+soffset, false, segsizes[selement], datatype, speer, schedule);
NBC_Sched_recv((char*)recvbuf+roffset, false, segsizes[relement], datatype, rpeer, schedule);
NBC_Sched_barrier(schedule);
round++;
round++;
} while (round < 2*p-2);
}
return NBC_OK;
}
static inline int allred_sched_linear(int rank, int rsize, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
MPI_Op op, int ext, int size, NBC_Schedule *schedule, NBC_Handle *handle) {
int res, rpeer;
if(count == 0) return NBC_OK;
/* send my data to the remote root */
res = NBC_Sched_send (sendbuf, false, count, datatype, 0, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
res = NBC_Sched_recv (recvbuf, false, count, datatype, 0, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
if (0 == rank) {
/* wait for data from the remote root */
res = NBC_Sched_barrier (schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
/* get data from remote peers and reduce */
for (rpeer = 1 ; rpeer < rsize ; ++rpeer) {
res = NBC_Sched_recv (0, true, count, datatype, rpeer, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
res = NBC_Sched_barrier(schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
res = NBC_Sched_op (recvbuf, false, 0, true, recvbuf, false, count, datatype, op, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_op() (%i)\n", res); return res; }
res = NBC_Sched_barrier(schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
}
/* exchange our result with the remote root (each root will broadcast to the other's peers) */
res = NBC_Sched_recv (0, true, count, datatype, 0, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
res = NBC_Sched_send (recvbuf, false, count, datatype, 0, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
/* wait for data from remote root */
res = NBC_Sched_barrier(schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
/* broadcast the result to all remote peers */
for (rpeer = 1 ; rpeer < rsize ; ++rpeer) {
res = NBC_Sched_send (0, true, count, datatype, rpeer, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
}
}
return NBC_OK;
}

Просмотреть файл

@ -1,9 +1,11 @@
/*
* Copyright (c) 2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2006 The Technical University of Chemnitz. All
* rights reserved.
* Copyright (c) 2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2006 The Technical University of Chemnitz. All
* rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
*
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
*
@ -191,6 +193,61 @@ int ompi_coll_libnbc_ialltoall(void* sendbuf, int sendcount, MPI_Datatype sendty
return NBC_OK;
}
int ompi_coll_libnbc_ialltoall_inter (void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module)
{
int rank, res, i, rsize;
MPI_Aint sndext, rcvext;
NBC_Schedule *schedule;
char *rbuf, *sbuf;
NBC_Handle *handle;
ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
res = NBC_Init_handle(comm, coll_req, libnbc_module);
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
handle = (*coll_req);
res = MPI_Comm_remote_size (comm, &rsize);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; }
res = MPI_Comm_rank(comm, &rank);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
res = MPI_Type_extent(sendtype, &sndext);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
res = MPI_Type_extent(recvtype, &rcvext);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule));
if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; }
handle->tmpbuf=NULL;
res = NBC_Sched_create(schedule);
if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; }
for (i = 0; i < rsize; i++) {
/* post all sends */
sbuf = ((char *) sendbuf) + (i * sendcount * sndext);
res = NBC_Sched_send(sbuf, false, sendcount, sendtype, i, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
/* post all receives */
rbuf = ((char *) recvbuf) + (i * recvcount * rcvext);
res = NBC_Sched_recv(rbuf, false, recvcount, recvtype, i, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
}
/*NBC_PRINT_SCHED(*schedule);*/
res = NBC_Sched_commit(schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
res = NBC_Start(handle, schedule);
if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; }
return NBC_OK;
}
static inline int a2a_sched_pairwise(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule* schedule, void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm) {
int res, r, sndpeer, rcvpeer;
char *rbuf, *sbuf;

Просмотреть файл

@ -84,3 +84,63 @@ int ompi_coll_libnbc_ialltoallv(void* sendbuf, int *sendcounts, int *sdispls,
return NBC_OK;
}
/* simple linear Alltoallv */
int ompi_coll_libnbc_ialltoallv_inter (void* sendbuf, int *sendcounts, int *sdispls,
MPI_Datatype sendtype, void* recvbuf, int *recvcounts, int *rdispls,
MPI_Datatype recvtype, struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module)
{
int rank, res, i, rsize;
MPI_Aint sndext, rcvext;
NBC_Schedule *schedule;
char *rbuf, *sbuf;
NBC_Handle *handle;
ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
res = NBC_Init_handle(comm, coll_req, libnbc_module);
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
handle = (*coll_req);
res = MPI_Comm_rank(comm, &rank);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
res = MPI_Type_extent(sendtype, &sndext);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
res = MPI_Type_extent(recvtype, &rcvext);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
MPI_Comm_remote_size (comm, &rsize);
schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule));
if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; }
handle->tmpbuf=NULL;
res = NBC_Sched_create(schedule);
if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; }
for (i = 0; i < rsize; i++) {
/* post all sends */
if(sendcounts[i] != 0) {
sbuf = ((char *) sendbuf) + (sdispls[i] * sndext);
res = NBC_Sched_send(sbuf, false, sendcounts[i], sendtype, i, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
}
/* post all receives */
if(recvcounts[i] != 0) {
rbuf = ((char *) recvbuf) + (rdispls[i] * rcvext);
res = NBC_Sched_recv(rbuf, false, recvcounts[i], recvtype, i, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
}
}
/*NBC_PRINT_SCHED(*schedule);*/
res = NBC_Sched_commit(schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
res = NBC_Start(handle, schedule);
if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; }
return NBC_OK;
}

136
ompi/mca/coll/libnbc/nbc_ialltoallw.c Обычный файл
Просмотреть файл

@ -0,0 +1,136 @@
/*
* Copyright (c) 2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2006 The Technical University of Chemnitz. All
* rights reserved.
*
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
*
*/
#include "nbc_internal.h"
/* an alltoallw schedule can not be cached easily because the contents
* ot the recvcounts array may change, so a comparison of the address
* would not be sufficient ... we simply do not cache it */
/* simple linear Alltoallw */
int ompi_coll_libnbc_ialltoallw(void* sendbuf, int *sendcounts, int *sdispls,
MPI_Datatype sendtypes[], void* recvbuf, int *recvcounts, int *rdispls,
MPI_Datatype recvtypes[], struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module)
{
int rank, p, res, i;
NBC_Schedule *schedule;
char *rbuf, *sbuf, inplace;
NBC_Handle *handle;
ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
NBC_IN_PLACE(sendbuf, recvbuf, inplace);
res = NBC_Init_handle(comm, coll_req, libnbc_module);
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
handle = (*coll_req);
res = MPI_Comm_rank(comm, &rank);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
res= MPI_Comm_size(comm, &p);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; }
schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule));
if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; }
handle->tmpbuf=NULL;
res = NBC_Sched_create(schedule);
if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; }
/* copy data to receivbuffer */
if((sendcounts[rank] != 0) && !inplace) {
rbuf = ((char *) recvbuf) + rdispls[rank];
sbuf = ((char *) sendbuf) + sdispls[rank];
res = NBC_Copy(sbuf, sendcounts[rank], sendtypes[rank], rbuf, recvcounts[rank], recvtypes[rank], comm);
if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; }
}
for (i = 0; i < p; i++) {
if (i == rank) { continue; }
/* post all sends */
if(sendcounts[i] != 0) {
sbuf = ((char *) sendbuf) + sdispls[i];
res = NBC_Sched_send(sbuf, false, sendcounts[i], sendtypes[i], i, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
}
/* post all receives */
if(recvcounts[i] != 0) {
rbuf = ((char *) recvbuf) + rdispls[i];
res = NBC_Sched_recv(rbuf, false, recvcounts[i], recvtypes[i], i, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
}
}
/*NBC_PRINT_SCHED(*schedule);*/
res = NBC_Sched_commit(schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
res = NBC_Start(handle, schedule);
if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; }
return NBC_OK;
}
/* simple linear Alltoallw */
int ompi_coll_libnbc_ialltoallw_inter (void* sendbuf, int *sendcounts, int *sdispls,
MPI_Datatype sendtypes[], void* recvbuf, int *recvcounts, int *rdispls,
MPI_Datatype recvtypes[], struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module)
{
int rank, res, i, rsize;
NBC_Schedule *schedule;
char *rbuf, *sbuf;
NBC_Handle *handle;
ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
res = NBC_Init_handle(comm, coll_req, libnbc_module);
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
handle = (*coll_req);
res = MPI_Comm_rank(comm, &rank);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
MPI_Comm_remote_size (comm, &rsize);
schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule));
if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; }
handle->tmpbuf=NULL;
res = NBC_Sched_create(schedule);
if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; }
for (i = 0; i < rsize; i++) {
/* post all sends */
if(sendcounts[i] != 0) {
sbuf = ((char *) sendbuf) + sdispls[i];
res = NBC_Sched_send(sbuf, false, sendcounts[i], sendtypes[i], i, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
}
/* post all receives */
if(recvcounts[i] != 0) {
rbuf = ((char *) recvbuf) + rdispls[i];
res = NBC_Sched_recv(rbuf, false, recvcounts[i], recvtypes[i], i, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
}
}
/*NBC_PRINT_SCHED(*schedule);*/
res = NBC_Sched_commit(schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
res = NBC_Start(handle, schedule);
if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; }
return NBC_OK;
}

Просмотреть файл

@ -1,9 +1,11 @@
/*
* Copyright (c) 2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2006 The Technical University of Chemnitz. All
* rights reserved.
* Copyright (c) 2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2006 The Technical University of Chemnitz. All
* rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
*
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
*
@ -28,7 +30,7 @@ int ompi_coll_libnbc_ibarrier(struct ompi_communicator_t *comm, ompi_request_t *
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
res = MPI_Comm_size(comm, &p);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; }
handle->tmpbuf=(void*)malloc(2*sizeof(char));
#ifdef NBC_CACHE_SCHEDULE
@ -84,6 +86,67 @@ int ompi_coll_libnbc_ibarrier(struct ompi_communicator_t *comm, ompi_request_t *
res = NBC_Start(handle, schedule);
if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; }
return NBC_OK;
}
int ompi_coll_libnbc_ibarrier_inter(struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module)
{
int rank, res, rsize, peer;
NBC_Schedule *schedule;
NBC_Handle *handle;
ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
res = NBC_Init_handle(comm, coll_req, libnbc_module);
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
handle = (*coll_req);
res = MPI_Comm_rank(comm, &rank);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
res = MPI_Comm_remote_size(comm, &rsize);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; }
handle->tmpbuf=(void*)malloc(2*sizeof(char));
schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule));
if (NULL == schedule) { printf("Error in malloc()\n"); return res; }
res = NBC_Sched_create(schedule);
if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; }
if (0 == rank) {
for (peer = 1 ; peer < rsize ; ++peer) {
res = NBC_Sched_recv (0, true, 1, MPI_BYTE, peer, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
}
}
/* synchronize with the remote root */
res = NBC_Sched_recv (0, true, 1, MPI_BYTE, 0, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
res = NBC_Sched_send (0, true, 1, MPI_BYTE, 0, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
if (0 == rank) {
/* wait for the remote root */
res = NBC_Sched_barrier(schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
/* inform remote peers that all local peers have entered the barrier */
for (peer = 0 ; peer < rsize ; ++peer) {
res = NBC_Sched_send (0, true, 1, MPI_BYTE, peer, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
}
}
res = NBC_Sched_commit(schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
res = NBC_Start(handle, schedule);
if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; }
return NBC_OK;
}

162
ompi/mca/coll/libnbc/nbc_iexscan.c Обычный файл
Просмотреть файл

@ -0,0 +1,162 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2006 The Technical University of Chemnitz. All
* rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
*
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
*
*/
#include "nbc_internal.h"
#ifdef NBC_CACHE_SCHEDULE
/* tree comparison function for schedule cache */
int NBC_Scan_args_compare(NBC_Scan_args *a, NBC_Scan_args *b, void *param) {
if( (a->sendbuf == b->sendbuf) &&
(a->recvbuf == b->recvbuf) &&
(a->count == b->count) &&
(a->datatype == b->datatype) &&
(a->op == b->op) ) {
return 0;
}
if( a->sendbuf < b->sendbuf ) {
return -1;
}
return +1;
}
#endif
/* linear iexscan
* working principle:
* 1. each node (but node 0) receives from left neigbor
* 2. performs op
* 3. all but rank p-1 do sends to it's right neigbor and exits
*
*/
int ompi_coll_libnbc_iexscan(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module) {
int rank, p, res;
MPI_Aint ext;
NBC_Schedule *schedule;
#ifdef NBC_CACHE_SCHEDULE
NBC_Scan_args *args, *found, search;
#endif
char inplace;
NBC_Handle *handle;
ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
NBC_IN_PLACE(sendbuf, recvbuf, inplace);
res = NBC_Init_handle(comm, coll_req, libnbc_module);
if (res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
handle = (*coll_req);
res = MPI_Comm_rank(comm, &rank);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
res = MPI_Comm_size(comm, &p);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; }
res = MPI_Type_extent(datatype, &ext);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
if (inplace && rank < p - 1)
/* need more buffer space for the inplace case */
handle->tmpbuf = malloc(ext * count * 2);
else
handle->tmpbuf = malloc(ext * count);
if (handle->tmpbuf == NULL) { printf("Error in malloc()\n"); return NBC_OOR; }
#ifdef NBC_CACHE_SCHEDULE
fprintf (stderr, "NBC_CACHE_SCHEDULE\n");
/* search schedule in communicator specific tree */
search.sendbuf=sendbuf;
search.recvbuf=recvbuf;
search.count=count;
search.datatype=datatype;
search.op=op;
found = (NBC_Scan_args*)hb_tree_search((hb_tree*)handle->comminfo->NBC_Dict[NBC_EXSCAN], &search);
if (found == NULL) {
#endif
schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule));
if (NULL == schedule) { printf("Error in malloc()\n"); return res; }
res = NBC_Sched_create(schedule);
if (res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; }
if (rank != 0) {
if (inplace && rank < p - 1)
/* if sendbuf == recvbuf do not clobber the send buffer until it has been combined
* with the incoming data. */
res = NBC_Sched_recv((void *)(ext * count), true, count, datatype, rank-1, schedule);
else
res = NBC_Sched_recv(recvbuf, false, count, datatype, rank-1, schedule);
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
if (rank < p - 1) {
/* we have to wait until we have the data */
res = NBC_Sched_barrier(schedule);
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
/* perform the reduce in my temporary buffer */
if (inplace)
res = NBC_Sched_op(0, true, sendbuf, false, (void *)(ext * count), true, count, datatype, op, schedule);
else
res = NBC_Sched_op(0, true, sendbuf, false, recvbuf, false, count, datatype, op, schedule);
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; }
/* this cannot be done until handle->tmpbuf is unused :-( */
res = NBC_Sched_barrier(schedule);
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
/* send reduced data onward */
res = NBC_Sched_send(0, true, count, datatype, rank + 1, schedule);
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
if (inplace)
/* copy the received data into the receive buffer */
NBC_Sched_copy ((void *)(ext * count), true, count, datatype, recvbuf, false, count, datatype, schedule);
}
} else if (p > 1) {
res = NBC_Sched_send(sendbuf, false, count, datatype, 1, schedule);
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
}
res = NBC_Sched_commit(schedule);
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
#ifdef NBC_CACHE_SCHEDULE
/* save schedule to tree */
args = (NBC_Scan_args*)malloc(sizeof(NBC_Alltoall_args));
args->sendbuf=sendbuf;
args->recvbuf=recvbuf;
args->count=count;
args->datatype=datatype;
args->op=op;
args->schedule=schedule;
res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_EXSCAN], args, args, 0);
if(res != 0) printf("error in dict_insert() (%i)\n", res);
/* increase number of elements for A2A */
if(++handle->comminfo->NBC_Dict_size[NBC_EXSCAN] > NBC_SCHED_DICT_UPPER) {
NBC_SchedCache_dictwipe((hb_tree*)handle->comminfo->NBC_Dict[NBC_EXSCAN], &handle->comminfo->NBC_Dict_size[NBC_EXSCAN]);
}
} else {
/* found schedule */
schedule=found->schedule;
}
#endif
res = NBC_Start(handle, schedule);
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; }
/* tmpbuf is freed with the handle */
return NBC_OK;
}

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2006 The Technical University of Chemnitz. All
* Copyright (c) 2006 The Technical University of Chemnitz. All
* rights reserved.
*
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
@ -14,23 +14,23 @@
/* tree comparison function for schedule cache */
int NBC_Gather_args_compare(NBC_Gather_args *a, NBC_Gather_args *b, void *param) {
if( (a->sendbuf == b->sendbuf) &&
(a->sendcount == b->sendcount) &&
if( (a->sendbuf == b->sendbuf) &&
(a->sendcount == b->sendcount) &&
(a->sendtype == b->sendtype) &&
(a->recvbuf == b->recvbuf) &&
(a->recvcount == b->recvcount) &&
(a->recvtype == b->recvtype) &&
(a->recvtype == b->recvtype) &&
(a->root == b->root) ) {
return 0;
}
if( a->sendbuf < b->sendbuf ) {
if( a->sendbuf < b->sendbuf ) {
return -1;
}
return +1;
}
return +1;
}
#endif
int ompi_coll_libnbc_igather(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
int ompi_coll_libnbc_igather(void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
MPI_Datatype recvtype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module) {
int rank, p, res, i;
@ -43,9 +43,9 @@ int ompi_coll_libnbc_igather(void* sendbuf, int sendcount, MPI_Datatype sendtype
NBC_Handle *handle;
ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
NBC_IN_PLACE(sendbuf, recvbuf, inplace);
res = NBC_Init_handle(comm, coll_req, libnbc_module);
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
handle = (*coll_req);
@ -68,7 +68,7 @@ int ompi_coll_libnbc_igather(void* sendbuf, int sendcount, MPI_Datatype sendtype
res = NBC_Copy(sendbuf, sendcount, sendtype, rbuf, recvcount, recvtype, comm);
if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; }
}
#ifdef NBC_CACHE_SCHEDULE
/* search schedule in communicator specific tree */
search.sendbuf=sendbuf;
@ -102,10 +102,10 @@ int ompi_coll_libnbc_igather(void* sendbuf, int sendcount, MPI_Datatype sendtype
}
}
}
res = NBC_Sched_commit(schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
#ifdef NBC_CACHE_SCHEDULE
/* save schedule to tree */
args = (NBC_Gather_args*)malloc(sizeof(NBC_Gather_args));
@ -117,7 +117,7 @@ int ompi_coll_libnbc_igather(void* sendbuf, int sendcount, MPI_Datatype sendtype
args->recvtype=recvtype;
args->root=root;
args->schedule=schedule;
res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_GATHER], args, args, 0);
res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_GATHER], args, args, 0);
if(res != 0) printf("error in dict_insert() (%i)\n", res);
/* increase number of elements for A2A */
if(++handle->comminfo->NBC_Dict_size[NBC_GATHER] > NBC_SCHED_DICT_UPPER) {
@ -131,6 +131,62 @@ int ompi_coll_libnbc_igather(void* sendbuf, int sendcount, MPI_Datatype sendtype
res = NBC_Start(handle, schedule);
if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; }
return NBC_OK;
}
int ompi_coll_libnbc_igather_inter (void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount,
MPI_Datatype recvtype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module) {
int rank, p, res, i, rsize;
MPI_Aint rcvext = 0;
NBC_Schedule *schedule;
char *rbuf;
NBC_Handle *handle;
ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
res = NBC_Init_handle(comm, coll_req, libnbc_module);
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
handle = (*coll_req);
res = MPI_Comm_rank(comm, &rank);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
res = MPI_Comm_size(comm, &p);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
res = MPI_Comm_remote_size (comm, &rsize);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; }
if (root == MPI_ROOT) {
res = MPI_Type_extent(recvtype, &rcvext);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
}
handle->tmpbuf = NULL;
schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule));
if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; }
res = NBC_Sched_create(schedule);
if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; }
/* send to root */
if(root != MPI_ROOT && root != MPI_PROC_NULL) {
/* send msg to root */
res = NBC_Sched_send(sendbuf, false, sendcount, sendtype, root, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
} else if (MPI_ROOT == root) {
for (i = 0 ; i < rsize ; ++i) {
rbuf = ((char *)recvbuf) + (i * recvcount * rcvext);
/* root receives message to the right buffer */
res = NBC_Sched_recv(rbuf, false, recvcount, recvtype, i, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
}
}
res = NBC_Sched_commit(schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
res = NBC_Start(handle, schedule);
if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; }
return NBC_OK;
}

Просмотреть файл

@ -77,3 +77,58 @@ int ompi_coll_libnbc_igatherv(void* sendbuf, int sendcount, MPI_Datatype sendtyp
return NBC_OK;
}
int ompi_coll_libnbc_igatherv_inter (void* sendbuf, int sendcount, MPI_Datatype sendtype,
void* recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype,
int root, struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module) {
int rank, p, res, i, rsize;
MPI_Aint rcvext;
NBC_Schedule *schedule;
char *rbuf;
NBC_Handle *handle;
ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
res = NBC_Init_handle(comm, coll_req, libnbc_module);
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
handle = (*coll_req);
res = MPI_Comm_rank(comm, &rank);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
res = MPI_Comm_size(comm, &p);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
res = MPI_Type_extent(recvtype, &rcvext);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
res = MPI_Comm_remote_size (comm, &rsize);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; }
schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule));
if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; }
handle->tmpbuf=NULL;
res = NBC_Sched_create(schedule);
if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; }
/* send to root */
if (MPI_ROOT != root && MPI_PROC_NULL != root) {
/* send msg to root */
res = NBC_Sched_send(sendbuf, false, sendcount, sendtype, root, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
} else if (MPI_ROOT == root) {
for (i = 0 ; i < rsize ; ++i) {
rbuf = ((char *)recvbuf) + (displs[i]*rcvext);
/* root receives message to the right buffer */
res = NBC_Sched_recv(rbuf, false, recvcounts[i], recvtype, i, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
}
}
res = NBC_Sched_commit(schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
res = NBC_Start(handle, schedule);
if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; }
return NBC_OK;
}

Просмотреть файл

@ -1,9 +1,11 @@
/*
* Copyright (c) 2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2006 The Technical University of Chemnitz. All
* rights reserved.
* Copyright (c) 2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2006 The Technical University of Chemnitz. All
* rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
*
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
*
@ -13,11 +15,12 @@
static inline int red_sched_binomial(int rank, int p, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, void *redbuf, NBC_Schedule *schedule, NBC_Handle *handle);
static inline int red_sched_chain(int rank, int p, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int ext, int size, NBC_Schedule *schedule, NBC_Handle *handle, int fragsize);
static inline int red_sched_linear(int rank, int rsize, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, NBC_Schedule *schedule, NBC_Handle *handle);
#ifdef NBC_CACHE_SCHEDULE
/* tree comparison function for schedule cache */
int NBC_Reduce_args_compare(NBC_Reduce_args *a, NBC_Reduce_args *b, void *param) {
if( (a->sendbuf == b->sendbuf) &&
if( (a->sendbuf == b->sendbuf) &&
(a->recvbuf == b->recvbuf) &&
(a->count == b->count) &&
(a->datatype == b->datatype) &&
@ -25,10 +28,10 @@ int NBC_Reduce_args_compare(NBC_Reduce_args *a, NBC_Reduce_args *b, void *param)
(a->root == b->root) ) {
return 0;
}
if( a->sendbuf < b->sendbuf ) {
if( a->sendbuf < b->sendbuf ) {
return -1;
}
return +1;
}
return +1;
}
#endif
@ -144,6 +147,49 @@ int ompi_coll_libnbc_ireduce(void* sendbuf, void* recvbuf, int count, MPI_Dataty
return NBC_OK;
}
int ompi_coll_libnbc_ireduce_inter(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype,
MPI_Op op, int root, struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module) {
int rank, res, rsize;
NBC_Schedule *schedule;
MPI_Aint ext;
NBC_Handle *handle;
ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
res = NBC_Init_handle(comm, coll_req, libnbc_module);
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
handle = (*coll_req);
res = MPI_Comm_rank(comm, &rank);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
res = MPI_Comm_remote_size(comm, &rsize);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; }
res = MPI_Type_extent(datatype, &ext);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
handle->tmpbuf = malloc(ext*count);
if (NULL == handle->tmpbuf) { printf("Error in malloc() (%i)\n", res); return res; }
schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule));
if (NULL == schedule) { printf("Error in malloc() (%i)\n", res); return res; }
res = NBC_Sched_create(schedule);
if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; }
res = red_sched_linear (rank, rsize, root, sendbuf, recvbuf, count, datatype, op, schedule, handle);
if (NBC_OK != res) { printf("Error in Schedule creation() (%i)\n", res); return res; }
res = NBC_Sched_commit(schedule);
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
res = NBC_Start(handle, schedule);
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; }
/* tmpbuf is freed with the handle */
return NBC_OK;
}
/* binomial reduce
* working principle:
@ -292,3 +338,38 @@ static inline int red_sched_chain(int rank, int p, int root, void *sendbuf, void
return NBC_OK;
}
/* simple linear algorithm for intercommunicators */
static inline int red_sched_linear(int rank, int rsize, int root, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, NBC_Schedule *schedule, NBC_Handle *handle) {
int res, peer;
if(count == 0) return NBC_OK;
if (MPI_ROOT == root) {
res = NBC_Sched_recv (recvbuf, false, count, datatype, 0, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
res = NBC_Sched_barrier (schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
for (peer = 1 ; peer < rsize ; ++peer) {
res = NBC_Sched_recv (0, true, count, datatype, peer, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
res = NBC_Sched_barrier(schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
res = NBC_Sched_op (recvbuf, false, 0, true, recvbuf, false, count, datatype, op, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_op() (%i)\n", res); return res; }
res = NBC_Sched_barrier(schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
}
} else if (MPI_PROC_NULL != root) {
res = NBC_Sched_send (sendbuf, false, count, datatype, root, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
}
return NBC_OK;
}

Просмотреть файл

@ -1,9 +1,11 @@
/*
* Copyright (c) 2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2006 The Technical University of Chemnitz. All
* rights reserved.
* Copyright (c) 2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2006 The Technical University of Chemnitz. All
* rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
*
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
*
@ -145,3 +147,95 @@ int ompi_coll_libnbc_ireduce_scatter(void* sendbuf, void* recvbuf, int *recvcoun
/* tmpbuf is freed with the handle */
return NBC_OK;
}
int ompi_coll_libnbc_ireduce_scatter_inter(void* sendbuf, void* recvbuf, int *recvcounts, MPI_Datatype datatype,
MPI_Op op, struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module) {
int peer, rank, r, res, count, rsize, offset;
MPI_Aint ext;
NBC_Schedule *schedule;
NBC_Handle *handle;
ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
res = NBC_Init_handle(comm, coll_req, libnbc_module);
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
handle = (*coll_req);
res = MPI_Comm_rank(comm, &rank);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
res = MPI_Comm_remote_size(comm, &rsize);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; }
MPI_Type_extent(datatype, &ext);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule));
if (NULL == schedule) { printf("Error in malloc()\n"); return NBC_OOR; }
res = NBC_Sched_create(schedule);
if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; }
count = 0;
for (r = 0 ; r < rsize ; ++r) count += recvcounts[r];
handle->tmpbuf = malloc(2 * ext * count);
if(handle->tmpbuf == NULL) { printf("Error in malloc()\n"); return NBC_OOR; }
/* send my data to the remote root */
res = NBC_Sched_send(sendbuf, false, count, datatype, 0, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
if (0 == rank) {
res = NBC_Sched_recv((void *) 0, true, count, datatype, 0, schedule);
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
res = NBC_Sched_barrier(schedule);
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
for (peer = 1 ; peer < rsize ; ++peer) {
res = NBC_Sched_recv((void *)(ext * count), true, count, datatype, peer, schedule);
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
res = NBC_Sched_barrier(schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
res = NBC_Sched_op((void *) 0, true, (void *)(ext * count), true, (void *) 0, true, count, datatype, op, schedule);
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_op() (%i)\n", res); return res; }
res = NBC_Sched_barrier(schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
}
/* exchange data with remote root for scatter phase (we *could* use the local communicator to do the scatter) */
res = NBC_Sched_recv((void *)(ext * count), true, count, datatype, 0, schedule);
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
res = NBC_Sched_send((void *) 0, true, count, datatype, 0, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
res = NBC_Sched_barrier(schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
/* scatter */
for (peer = 0, offset = ext * count ; peer < rsize ; ++peer) {
res = NBC_Sched_send((void *)(uintptr_t) offset, true, recvcounts[peer], datatype, peer, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
offset += recvcounts[peer] * ext;
}
}
/* receive my block */
res = NBC_Sched_recv(recvbuf, false, recvcounts[rank], datatype, 0, schedule);
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
/*NBC_PRINT_SCHED(*schedule);*/
res = NBC_Sched_commit(schedule);
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
res = NBC_Start(handle, schedule);
if (NBC_OK != res) { free(handle->tmpbuf); printf("Error in NBC_Start() (%i)\n", res); return res; }
/* tmpbuf is freed with the handle */
return NBC_OK;
}

Просмотреть файл

@ -1,9 +1,11 @@
/*
* Copyright (c) 2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2006 The Technical University of Chemnitz. All
* rights reserved.
* Copyright (c) 2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2006 The Technical University of Chemnitz. All
* rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
*
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
*
@ -13,26 +15,25 @@
#ifdef NBC_CACHE_SCHEDULE
/* tree comparison function for schedule cache */
int NBC_Scatter_args_compare(NBC_Scatter_args *a, NBC_Scatter_args *b, void *param) {
if( (a->sendbuf == b->sendbuf) &&
(a->sendcount == b->sendcount) &&
(a->sendtype == b->sendtype) &&
(a->recvbuf == b->recvbuf) &&
(a->recvcount == b->recvcount) &&
(a->recvtype == b->recvtype) &&
(a->root == b->root) ) {
return 0;
}
if( a->sendbuf < b->sendbuf ) {
return -1;
}
return +1;
if( (a->sendbuf == b->sendbuf) &&
(a->sendcount == b->sendcount) &&
(a->sendtype == b->sendtype) &&
(a->recvbuf == b->recvbuf) &&
(a->recvcount == b->recvcount) &&
(a->recvtype == b->recvtype) &&
(a->root == b->root) ) {
return 0;
}
if( a->sendbuf < b->sendbuf ) {
return -1;
}
return +1;
}
#endif
/* simple linear MPI_Iscatter */
int ompi_coll_libnbc_iscatter(void* sendbuf, int sendcount, MPI_Datatype sendtype,
void* recvbuf, int recvcount, MPI_Datatype recvtype, int root,
int ompi_coll_libnbc_iscatter(void* sendbuf, int sendcount, MPI_Datatype sendtype,
void* recvbuf, int recvcount, MPI_Datatype recvtype, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module) {
int rank, p, res, i;
@ -45,9 +46,9 @@ int ompi_coll_libnbc_iscatter(void* sendbuf, int sendcount, MPI_Datatype sendtyp
NBC_Handle *handle;
ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
NBC_IN_PLACE(sendbuf, recvbuf, inplace);
res = NBC_Init_handle(comm, coll_req, libnbc_module);
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
handle = (*coll_req);
@ -59,14 +60,14 @@ int ompi_coll_libnbc_iscatter(void* sendbuf, int sendcount, MPI_Datatype sendtyp
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
handle->tmpbuf=NULL;
if((rank == root) && (!inplace)) {
sbuf = ((char *)sendbuf) + (rank*sendcount*sndext);
/* if I am the root - just copy the message (not for MPI_IN_PLACE) */
res = NBC_Copy(sbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, comm);
if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; }
}
#ifdef NBC_CACHE_SCHEDULE
/* search schedule in communicator specific tree */
search.sendbuf=sendbuf;
@ -100,7 +101,7 @@ int ompi_coll_libnbc_iscatter(void* sendbuf, int sendcount, MPI_Datatype sendtyp
}
}
}
res = NBC_Sched_commit(schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
#ifdef NBC_CACHE_SCHEDULE
@ -114,7 +115,7 @@ int ompi_coll_libnbc_iscatter(void* sendbuf, int sendcount, MPI_Datatype sendtyp
args->recvtype=recvtype;
args->root=root;
args->schedule=schedule;
res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_SCATTER], args, args, 0);
res = hb_tree_insert ((hb_tree*)handle->comminfo->NBC_Dict[NBC_SCATTER], args, args, 0);
if(res != 0) printf("error in dict_insert() (%i)\n", res);
/* increase number of elements for A2A */
if(++handle->comminfo->NBC_Dict_size[NBC_SCATTER] > NBC_SCHED_DICT_UPPER) {
@ -126,9 +127,62 @@ int ompi_coll_libnbc_iscatter(void* sendbuf, int sendcount, MPI_Datatype sendtyp
}
#endif
res = NBC_Start(handle, schedule);
if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; }
return NBC_OK;
}
int ompi_coll_libnbc_iscatter_inter(void* sendbuf, int sendcount, MPI_Datatype sendtype,
void* recvbuf, int recvcount, MPI_Datatype recvtype, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module) {
int rank, res, i, rsize;
MPI_Aint sndext;
NBC_Schedule *schedule;
char *sbuf;
NBC_Handle *handle;
ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
res = NBC_Init_handle(comm, coll_req, libnbc_module);
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
handle = (*coll_req);
res = MPI_Comm_rank(comm, &rank);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
res = MPI_Type_extent(sendtype, &sndext);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
res = MPI_Comm_remote_size (comm, &rsize);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; }
handle->tmpbuf = NULL;
schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule));
if (NULL == schedule) { printf("Error in malloc()\n"); return res; }
res = NBC_Sched_create(schedule);
if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; }
/* receive from root */
if (MPI_ROOT != root && MPI_PROC_NULL != root) {
/* recv msg from remote root */
res = NBC_Sched_recv(recvbuf, false, recvcount, recvtype, root, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
} else if (MPI_ROOT == root) {
for (i = 0 ; i < rsize ; ++i) {
sbuf = ((char *)sendbuf) + (i * sendcount * sndext);
/* root sends the right buffer to the right receiver */
res = NBC_Sched_send(sbuf, false, sendcount, sendtype, i, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
}
}
res = NBC_Sched_commit(schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
res = NBC_Start(handle, schedule);
if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; }
return NBC_OK;
}

Просмотреть файл

@ -1,9 +1,11 @@
/*
* Copyright (c) 2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2006 The Technical University of Chemnitz. All
* rights reserved.
* Copyright (c) 2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2006 The Technical University of Chemnitz. All
* rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
*
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
*
@ -15,8 +17,8 @@
* would not be sufficient ... we simply do not cache it */
/* simple linear MPI_Iscatterv */
int ompi_coll_libnbc_iscatterv(void* sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype,
void* recvbuf, int recvcount, MPI_Datatype recvtype, int root,
int ompi_coll_libnbc_iscatterv(void* sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype,
void* recvbuf, int recvcount, MPI_Datatype recvtype, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module) {
int rank, p, res, i;
@ -26,9 +28,9 @@ int ompi_coll_libnbc_iscatterv(void* sendbuf, int *sendcounts, int *displs, MPI_
NBC_Handle *handle;
ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
NBC_IN_PLACE(sendbuf, recvbuf, inplace);
res = NBC_Init_handle(comm, coll_req, libnbc_module);
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
handle = (*coll_req);
@ -43,7 +45,7 @@ int ompi_coll_libnbc_iscatterv(void* sendbuf, int *sendcounts, int *displs, MPI_
if (NULL == schedule) { printf("Error in malloc()\n"); return res; }
handle->tmpbuf=NULL;
res = NBC_Sched_create(schedule);
if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; }
@ -68,12 +70,65 @@ int ompi_coll_libnbc_iscatterv(void* sendbuf, int *sendcounts, int *displs, MPI_
}
}
}
res = NBC_Sched_commit(schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
res = NBC_Start(handle, schedule);
if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; }
return NBC_OK;
}
int ompi_coll_libnbc_iscatterv_inter (void* sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype,
void* recvbuf, int recvcount, MPI_Datatype recvtype, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module) {
int rank, res, i, rsize;
MPI_Aint sndext;
NBC_Schedule *schedule;
char *sbuf;
NBC_Handle *handle;
ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
res = NBC_Init_handle(comm, coll_req, libnbc_module);
if(res != NBC_OK) { printf("Error in NBC_Init_handle(%i)\n", res); return res; }
handle = (*coll_req);
res = MPI_Comm_rank(comm, &rank);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
res = MPI_Comm_remote_size(comm, &rsize);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_remote_size() (%i)\n", res); return res; }
res = MPI_Type_extent(sendtype, &sndext);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
schedule = (NBC_Schedule*)malloc(sizeof(NBC_Schedule));
if (NULL == schedule) { printf("Error in malloc()\n"); return res; }
handle->tmpbuf=NULL;
res = NBC_Sched_create(schedule);
if(res != NBC_OK) { printf("Error in NBC_Sched_create (%i)\n", res); return res; }
/* receive from root */
if(MPI_ROOT != root && MPI_PROC_NULL != root) {
/* recv msg from root */
res = NBC_Sched_recv(recvbuf, false, recvcount, recvtype, root, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
} else if (MPI_ROOT == root) {
for (i = 0 ; i < rsize ; ++i) {
sbuf = ((char *)sendbuf) + (displs[i] * sndext);
/* root sends the right buffer to the right receiver */
res = NBC_Sched_send(sbuf, false, sendcounts[i], sendtype, i, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
}
}
res = NBC_Sched_commit(schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_commit() (%i)\n", res); return res; }
res = NBC_Start(handle, schedule);
if (NBC_OK != res) { printf("Error in NBC_Start() (%i)\n", res); return res; }
return NBC_OK;
}