coll/libnbc: cleanup handling of the second temporary buffer in ireduce
Этот коммит содержится в:
родитель
ed9139ca13
Коммит
917d96ba50
@ -19,7 +19,7 @@
|
|||||||
|
|
||||||
#include "nbc_internal.h"
|
#include "nbc_internal.h"
|
||||||
|
|
||||||
static inline int red_sched_binomial (int rank, int p, int root, const void *sendbuf, void *redbuf, int count, MPI_Datatype datatype,
|
static inline int red_sched_binomial (int rank, int p, int root, const void *sendbuf, void *redbuf, char tmpredbuf, int count, MPI_Datatype datatype,
|
||||||
MPI_Op op, char inplace, NBC_Schedule *schedule, NBC_Handle *handle);
|
MPI_Op op, char inplace, NBC_Schedule *schedule, NBC_Handle *handle);
|
||||||
static inline int red_sched_chain (int rank, int p, int root, const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
|
static inline int red_sched_chain (int rank, int p, int root, const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
|
||||||
MPI_Op op, int ext, size_t size, NBC_Schedule *schedule, NBC_Handle *handle, int fragsize);
|
MPI_Op op, int ext, size_t size, NBC_Schedule *schedule, NBC_Handle *handle, int fragsize);
|
||||||
@ -56,6 +56,7 @@ int ompi_coll_libnbc_ireduce(const void* sendbuf, void* recvbuf, int count, MPI_
|
|||||||
MPI_Aint ext;
|
MPI_Aint ext;
|
||||||
NBC_Schedule *schedule;
|
NBC_Schedule *schedule;
|
||||||
char *redbuf=NULL, inplace;
|
char *redbuf=NULL, inplace;
|
||||||
|
char tmpredbuf = 0;
|
||||||
enum { NBC_RED_BINOMIAL, NBC_RED_CHAIN } alg;
|
enum { NBC_RED_BINOMIAL, NBC_RED_CHAIN } alg;
|
||||||
NBC_Handle *handle;
|
NBC_Handle *handle;
|
||||||
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
|
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
|
||||||
@ -107,7 +108,8 @@ int ompi_coll_libnbc_ireduce(const void* sendbuf, void* recvbuf, int count, MPI_
|
|||||||
/* recvbuf may not be valid on non-root nodes */
|
/* recvbuf may not be valid on non-root nodes */
|
||||||
ptrdiff_t span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t);
|
ptrdiff_t span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t);
|
||||||
handle->tmpbuf = malloc (span_align + span);
|
handle->tmpbuf = malloc (span_align + span);
|
||||||
redbuf = (char*) handle->tmpbuf + span_align - gap;
|
redbuf = (char*)span_align - gap;
|
||||||
|
tmpredbuf = 1;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
handle->tmpbuf = malloc (span);
|
handle->tmpbuf = malloc (span);
|
||||||
@ -144,7 +146,7 @@ int ompi_coll_libnbc_ireduce(const void* sendbuf, void* recvbuf, int count, MPI_
|
|||||||
|
|
||||||
switch(alg) {
|
switch(alg) {
|
||||||
case NBC_RED_BINOMIAL:
|
case NBC_RED_BINOMIAL:
|
||||||
res = red_sched_binomial(rank, p, root, sendbuf, redbuf, count, datatype, op, inplace, schedule, handle);
|
res = red_sched_binomial(rank, p, root, sendbuf, redbuf, tmpredbuf, count, datatype, op, inplace, schedule, handle);
|
||||||
break;
|
break;
|
||||||
case NBC_RED_CHAIN:
|
case NBC_RED_CHAIN:
|
||||||
res = red_sched_chain(rank, p, root, sendbuf, recvbuf, count, datatype, op, ext, size, schedule, handle, segsize);
|
res = red_sched_chain(rank, p, root, sendbuf, recvbuf, count, datatype, op, ext, size, schedule, handle, segsize);
|
||||||
@ -291,10 +293,10 @@ int ompi_coll_libnbc_ireduce_inter(const void* sendbuf, void* recvbuf, int count
|
|||||||
if (vrank == 0) rank = root; \
|
if (vrank == 0) rank = root; \
|
||||||
if (vrank == root) rank = 0; \
|
if (vrank == root) rank = 0; \
|
||||||
}
|
}
|
||||||
static inline int red_sched_binomial (int rank, int p, int root, const void *sendbuf, void *redbuf, int count, MPI_Datatype datatype,
|
static inline int red_sched_binomial (int rank, int p, int root, const void *sendbuf, void *redbuf, char tmpredbuf, int count, MPI_Datatype datatype,
|
||||||
MPI_Op op, char inplace, NBC_Schedule *schedule, NBC_Handle *handle) {
|
MPI_Op op, char inplace, NBC_Schedule *schedule, NBC_Handle *handle) {
|
||||||
int vroot, vrank, vpeer, peer, res, maxr;
|
int vroot, vrank, vpeer, peer, res, maxr;
|
||||||
char *rbuf, *lbuf, *buf;
|
char *rbuf, *lbuf, *buf, tmpbuf;
|
||||||
int tmprbuf, tmplbuf;
|
int tmprbuf, tmplbuf;
|
||||||
ptrdiff_t gap;
|
ptrdiff_t gap;
|
||||||
(void)opal_datatype_span(&datatype->super, count, &gap);
|
(void)opal_datatype_span(&datatype->super, count, &gap);
|
||||||
@ -312,12 +314,12 @@ static inline int red_sched_binomial (int rank, int p, int root, const void *sen
|
|||||||
rbuf = (void *)(-gap);
|
rbuf = (void *)(-gap);
|
||||||
tmprbuf = true;
|
tmprbuf = true;
|
||||||
lbuf = redbuf;
|
lbuf = redbuf;
|
||||||
tmplbuf = false;
|
tmplbuf = tmpredbuf;
|
||||||
} else {
|
} else {
|
||||||
lbuf = (void *)(-gap);
|
lbuf = (void *)(-gap);
|
||||||
tmplbuf = true;
|
tmplbuf = true;
|
||||||
rbuf = redbuf;
|
rbuf = redbuf;
|
||||||
tmprbuf = false;
|
tmprbuf = tmpredbuf;
|
||||||
if (inplace) {
|
if (inplace) {
|
||||||
res = NBC_Copy(rbuf, count, datatype, ((char *)handle->tmpbuf)-gap, count, datatype, MPI_COMM_SELF);
|
res = NBC_Copy(rbuf, count, datatype, ((char *)handle->tmpbuf)-gap, count, datatype, MPI_COMM_SELF);
|
||||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
|
||||||
@ -354,7 +356,7 @@ static inline int red_sched_binomial (int rank, int p, int root, const void *sen
|
|||||||
}
|
}
|
||||||
/* swap left and right buffers */
|
/* swap left and right buffers */
|
||||||
buf = rbuf; rbuf = lbuf ; lbuf = buf;
|
buf = rbuf; rbuf = lbuf ; lbuf = buf;
|
||||||
tmprbuf ^= 1; tmplbuf ^= 1;
|
tmpbuf = tmprbuf; tmprbuf = tmplbuf; tmplbuf = tmpbuf;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
/* we have to send this round */
|
/* we have to send this round */
|
||||||
@ -379,9 +381,9 @@ static inline int red_sched_binomial (int rank, int p, int root, const void *sen
|
|||||||
/* send to root if vroot ! root */
|
/* send to root if vroot ! root */
|
||||||
if (vroot != root) {
|
if (vroot != root) {
|
||||||
if (0 == rank) {
|
if (0 == rank) {
|
||||||
res = NBC_Sched_send (redbuf, false, count, datatype, root, schedule, false);
|
res = NBC_Sched_send (redbuf, tmpredbuf, count, datatype, root, schedule, false);
|
||||||
} else if (root == rank) {
|
} else if (root == rank) {
|
||||||
res = NBC_Sched_recv (redbuf, false, count, datatype, vroot, schedule, false);
|
res = NBC_Sched_recv (redbuf, tmpredbuf, count, datatype, vroot, schedule, false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user