1
1

Fix a number of issues with IN_PLACE

This commit was SVN r26814.
Этот коммит содержится в:
Brian Barrett 2012-07-19 21:29:43 +00:00
родитель c6cf561a17
Коммит 2518014037
4 изменённых файлов: 26 добавлений и 199 удалений

Просмотреть файл

@ -38,7 +38,7 @@ int ompi_coll_libnbc_iallgather(void* sendbuf, int sendcount, MPI_Datatype sendt
struct mca_coll_base_module_2_0_0_t *module)
{
int rank, p, res, r;
MPI_Aint rcvext, sndext;
MPI_Aint rcvext;
NBC_Schedule *schedule;
char *rbuf, *sbuf, inplace;
#ifdef NBC_CACHE_SCHEDULE
@ -57,14 +57,15 @@ int ompi_coll_libnbc_iallgather(void* sendbuf, int sendcount, MPI_Datatype sendt
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
res = MPI_Comm_size(comm, &p);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; }
res = MPI_Type_extent(sendtype, &sndext);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
res = MPI_Type_extent(recvtype, &rcvext);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
handle->tmpbuf = NULL;
if(!((rank == 0) && inplace)) {
if (inplace) {
sendtype = recvtype;
sendcount = recvcount;
} else {
/* copy my data to receive buffer */
rbuf = ((char *)recvbuf) + (rank*recvcount*rcvext);
res = NBC_Copy(sendbuf, sendcount, sendtype, rbuf, recvcount, recvtype, comm);
@ -134,86 +135,3 @@ int ompi_coll_libnbc_iallgather(void* sendbuf, int sendcount, MPI_Datatype sendt
return NBC_OK;
}
/* this is a new possible dissemination based allgather algorithm - we should
* try it some time (big comm, small data) */
#if 0
static inline void diss_unpack(int rank, int vrank, int round, int p, int *pos, void *tmpbuf, int datasize, int slotsize, void *recvbuf, int sendcount, MPI_Datatype sendtype, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, NBC_Schedule *schedule) {
int r, res;
char *sbuf, *rbuf;
sbuf = (char *)tmpbuf + (*pos*datasize);
rbuf = (char *)recvbuf + (vrank*slotsize);
printf("[%i] unpacking tmpbuf pos: %i (%lu) to rbuf elem: %i (%lu) - %i elems, datasize %i\n", rank, *pos, (unsigned long)sbuf, vrank, (unsigned long)rbuf, recvcount, datasize);
res = NBC_Sched_unpack(sbuf, recvcount, recvtype, rbuf, schedule);
if (NBC_OK != res) { printf("Error in NBC_Unpack() (%i)\n", res); }
*pos=*pos+1;
for(r=0; r<=round; r++) {
if(r != 0) {
diss_unpack(rank, (vrank-(1<<(r-1))+p)%p, r-1, p, pos, tmpbuf, datasize, slotsize, recvbuf, sendcount, sendtype, recvcount, recvtype, comm, schedule);
}
}
}
static inline int a2a_sched_diss(int rank, int p, MPI_Aint sndext, MPI_Aint rcvext, NBC_Schedule* schedule, void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, NBC_Handle *handle) {
int res, r, maxround, size, speer, rpeer, pos, datasize;
char *sbuf, *rbuf;
res = NBC_OK;
if(p < 2) return res;
maxround = (int)ceil((log(p)/LOG2));
if(NBC_Type_intrinsic(sendtype)) {
datasize = sndext*sendcount;
} else {
res = MPI_Pack_size(sendcount, sendtype, comm, &datasize);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack_size() (%i)\n", res); return res; }
}
/* tmpbuf is probably bigger than p -> next power of 2 */
handle->tmpbuf=malloc(datasize*(1<<maxround));
/* copy my send - data to temp send/recv buffer */
sbuf = ((char *)sendbuf) + (rank*sendcount*sndext);
/* pack send buffer */
if(NBC_Type_intrinsic(sendtype)) {
/* it is contiguous - we can just memcpy it */
memcpy(handle->tmpbuf, sbuf, datasize);
} else {
pos = 0;
res = MPI_Pack(sbuf, sendcount, sendtype, handle->tmpbuf, datasize, &pos, comm);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Pack() (%i)\n", res); return res; }
}
printf("[%i] receive buffer is at %lu of size %i, maxround: %i\n", rank, (unsigned long)handle->tmpbuf, (int)sndext*sendcount*(1<<maxround), maxround);
for(r = 0; r < maxround; r++) {
size = datasize*(1<<r); /* size doubles every round */
rbuf = (char*)handle->tmpbuf+size;
sbuf = (char*)handle->tmpbuf;
speer = (rank + (1<<r)) % p;
/* add p because modulo does not work with negative values */
rpeer = ((rank - (1<<r))+p) % p;
printf("[%i] receiving %i bytes from host %i into rbuf %lu\n", rank, size, rpeer, (unsigned long)rbuf);
res = NBC_Sched_recv(rbuf, size, MPI_BYTE, rpeer, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
printf("[%i] sending %i bytes to host %i from sbuf %lu\n", rank, size, speer, (unsigned long)sbuf);
res = NBC_Sched_send(sbuf, size, MPI_BYTE, speer, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
res = NBC_Sched_barrier(schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
}
pos = 0;
diss_unpack(rank, rank, r, p, &pos, handle->tmpbuf, datasize, recvcount*rcvext, recvbuf, sendcount, sendtype, recvcount, recvtype, comm, schedule);
return NBC_OK;
}
#endif

Просмотреть файл

@ -27,9 +27,9 @@ int ompi_coll_libnbc_iallgatherv(void* sendbuf, int sendcount, MPI_Datatype send
struct mca_coll_base_module_2_0_0_t *module)
{
int rank, p, res, r, speer, rpeer;
MPI_Aint rcvext, sndext;
MPI_Aint rcvext;
NBC_Schedule *schedule;
char *rbuf, inplace;
char *rbuf, *sbuf, inplace;
NBC_Handle *handle;
ompi_coll_libnbc_request_t **coll_req = (ompi_coll_libnbc_request_t**) request;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
@ -43,8 +43,6 @@ int ompi_coll_libnbc_iallgatherv(void* sendbuf, int sendcount, MPI_Datatype send
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
res = MPI_Comm_size(comm, &p);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_size() (%i)\n", res); return res; }
res = MPI_Type_extent(sendtype, &sndext);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
res = MPI_Type_extent(recvtype, &rcvext);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
@ -56,12 +54,16 @@ int ompi_coll_libnbc_iallgatherv(void* sendbuf, int sendcount, MPI_Datatype send
res = NBC_Sched_create(schedule);
if(res != NBC_OK) { printf("Error in NBC_Sched_create, (%i)\n", res); return res; }
if(!inplace) {
if (inplace) {
sendtype = recvtype;
sendcount = recvcounts[rank];
} else {
/* copy my data to receive buffer */
rbuf = ((char *)recvbuf) + (displs[rank]*rcvext);
NBC_Copy(sendbuf, sendcount, sendtype, rbuf, recvcounts[rank], recvtype, comm);
if (NBC_OK != res) { printf("Error in NBC_Copy() (%i)\n", res); return res; }
}
sbuf = ((char*) recvbuf) + (displs[rank]*rcvext);
/* do p-1 rounds */
for(r=1;r<p;r++) {
@ -71,7 +73,7 @@ int ompi_coll_libnbc_iallgatherv(void* sendbuf, int sendcount, MPI_Datatype send
res = NBC_Sched_recv(rbuf, false, recvcounts[rpeer], recvtype, rpeer, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
res = NBC_Sched_send(sendbuf, false, sendcount, sendtype, speer, schedule);
res = NBC_Sched_send(sbuf, false, sendcount, sendtype, speer, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
}

Просмотреть файл

@ -12,7 +12,6 @@
#include <assert.h>
static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype datatype, void *sendbuf, void *recvbuf, MPI_Op op, NBC_Schedule *schedule, NBC_Handle *handle);
static inline int allred_sched_chain(int rank, int p, int count, MPI_Datatype datatype, void *sendbuf, void *recvbuf, MPI_Op op, int size, int ext, NBC_Schedule *schedule, NBC_Handle *handle, int fragsize);
static inline int allred_sched_ring(int rank, int p, int count, MPI_Datatype datatype, void *sendbuf, void *recvbuf, MPI_Op op, int size, int ext, NBC_Schedule *schedule, NBC_Handle *handle);
#ifdef NBC_CACHE_SCHEDULE
@ -73,7 +72,7 @@ int ompi_coll_libnbc_iallreduce(void* sendbuf, void* recvbuf, int count, MPI_Dat
}
/* algorithm selection */
if(p < 4 || size*count < 65536) {
if(p < 4 || size*count < 65536 || inplace) {
alg = NBC_ARED_BINOMIAL;
} else {
alg = NBC_ARED_RING;
@ -191,7 +190,7 @@ static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype dat
/* we have to wait until we have the data */
res = NBC_Sched_barrier(schedule);
if(res != NBC_OK) { free(handle->tmpbuf); printf("Error in NBC_Sched_barrier() (%i)\n", res); return res; }
if(firstred) {
if(firstred && MPI_IN_PLACE != sendbuf) {
/* perform the reduce with the senbuf */
res = NBC_Sched_op(recvbuf, false, sendbuf, false, 0, true, count, datatype, op, schedule);
firstred = 0;
@ -208,7 +207,7 @@ static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype dat
/* we have to send this round */
vpeer = vrank - (1<<(r-1));
VRANK2RANK(peer, vpeer, root)
if(firstred) {
if(firstred && MPI_IN_PLACE != sendbuf) {
/* we have to use the sendbuf in the first round .. */
res = NBC_Sched_send(sendbuf, false, count, datatype, peer, schedule);
} else {
@ -251,102 +250,6 @@ static inline int allred_sched_diss(int rank, int p, int count, MPI_Datatype dat
return NBC_OK;
}
static inline int allred_sched_chain(int rank, int p, int count, MPI_Datatype datatype, void *sendbuf, void *recvbuf, MPI_Op op, int size, int ext, NBC_Schedule *schedule, NBC_Handle *handle, int fragsize) {
int res, rrpeer, rbpeer, srpeer, sbpeer, numfrag, fragnum, fragcount, thiscount, bstart, bend;
long roffset, boffset;
/* reduce peers */
rrpeer = rank+1;
srpeer = rank-1;
/* bcast peers */
rbpeer = rank-1;
sbpeer = rank+1;
if(count == 0) return NBC_OK;
numfrag = count*size/fragsize;
if((count*size)%fragsize != 0) numfrag++;
fragcount = count/numfrag;
/* determine the starting round of bcast ... the first reduced packet
* is after p-1 rounds at rank 0 and will be sent back ... */
bstart = p-1+rank;
/* determine the ending round of bcast ... after arrival of the first
* packet, each rank has to forward numfrag packets */
bend = bstart+numfrag;
/*printf("[%i] numfrag: %i, count: %i, size: %i, fragcount: %i, bstart: %i, bend: %i\n", rank, numfrag, count, size, fragcount, bstart, bend);*/
/* this are two loops in one - this is a little nasty :-( */
for(fragnum = 0; fragnum < bend; fragnum++) {
roffset = fragnum*fragcount*ext;
boffset = (fragnum-bstart)*fragcount*ext;
thiscount = fragcount;
/* first numfrag rounds ... REDUCE to rank 0 */
if(fragnum < numfrag) {
if(fragnum == numfrag-1) {
/* last fragment may not be full */
thiscount = count-fragcount*fragnum;
}
/*printf("[%i] reduce %i elements from %lu\n", rank, thiscount, roffset); */
/* REDUCE - PART last node does not recv */
if(rank != p-1) {
res = NBC_Sched_recv((char*)roffset, true, thiscount, datatype, rrpeer, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
res = NBC_Sched_barrier(schedule);
/* root reduces into receivebuf */
if(rank == 0) {
res = NBC_Sched_op((char*)recvbuf+roffset, false, (char*)sendbuf+roffset, false, (char*)roffset, true, thiscount, datatype, op, schedule);
} else {
res = NBC_Sched_op((char*)roffset, true, (char*)sendbuf+roffset, false, (char*)roffset, true, thiscount, datatype, op, schedule);
}
res = NBC_Sched_barrier(schedule);
}
/* REDUCE PART root does not send */
if(rank != 0) {
/* rank p-1 has to send out of sendbuffer :) */
if(rank == p-1) {
res = NBC_Sched_send((char*)sendbuf+roffset, false, thiscount, datatype, srpeer, schedule);
} else {
res = NBC_Sched_send((char*)roffset, true, thiscount, datatype, srpeer, schedule);
}
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
/* this barrier here seems awkward but isn't!!!! */
/*res = NBC_Sched_barrier(schedule);*/
}
}
/* BCAST from rank 0 */
if(fragnum >= bstart) {
/*printf("[%i] bcast %i elements from %lu\n", rank, thiscount, boffset); */
if(fragnum == bend-1) {
/* last fragment may not be full */
thiscount = count-fragcount*(fragnum-bstart);
}
/* BCAST PART root does not receive */
if(rank != 0) {
res = NBC_Sched_recv((char*)recvbuf+boffset, false, thiscount, datatype, rbpeer, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_recv() (%i)\n", res); return res; }
res = NBC_Sched_barrier(schedule);
}
/* BCAST PART last rank does not send */
if(rank != p-1) {
res = NBC_Sched_send((char*)recvbuf+boffset, false, thiscount, datatype, sbpeer, schedule);
if (NBC_OK != res) { printf("Error in NBC_Sched_send() (%i)\n", res); return res; }
res = NBC_Sched_barrier(schedule);
}
}
}
/*NBC_PRINT_SCHED(*schedule);*/
return NBC_OK;
}
static inline int allred_sched_ring(int r, int p, int count, MPI_Datatype datatype, void *sendbuf, void *recvbuf, MPI_Op op, int size, int ext, NBC_Schedule *schedule, NBC_Handle *handle) {
int i; /* runner */
int segsize, *segsizes, *segoffsets; /* segment sizes and offsets per segment (number of segments == number of nodes */

Просмотреть файл

@ -34,7 +34,7 @@ int ompi_coll_libnbc_igather(void* sendbuf, int sendcount, MPI_Datatype sendtype
MPI_Datatype recvtype, int root, struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_0_0_t *module) {
int rank, p, res, i;
MPI_Aint rcvext;
MPI_Aint rcvext = 0;
NBC_Schedule *schedule;
char *rbuf, inplace;
#ifdef NBC_CACHE_SCHEDULE
@ -53,12 +53,16 @@ int ompi_coll_libnbc_igather(void* sendbuf, int sendcount, MPI_Datatype sendtype
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
res = MPI_Comm_size(comm, &p);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Comm_rank() (%i)\n", res); return res; }
res = MPI_Type_extent(recvtype, &rcvext);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
if (rank == root) {
res = MPI_Type_extent(recvtype, &rcvext);
if (MPI_SUCCESS != res) { printf("MPI Error in MPI_Type_extent() (%i)\n", res); return res; }
}
handle->tmpbuf = NULL;
if((rank == root) && (!inplace)) {
if (inplace) {
sendcount = recvcount;
sendtype = recvtype;
} else if (rank == root) {
rbuf = ((char *)recvbuf) + (rank*recvcount*rcvext);
/* if I am the root - just copy the message (only without MPI_IN_PLACE) */
res = NBC_Copy(sendbuf, sendcount, sendtype, rbuf, recvcount, recvtype, comm);