coll/libnbc: correctly handle datatype alignment when allocating two buffers at once
Этот коммит содержится в:
родитель
cb0086685f
Коммит
ed9139ca13
@ -14,6 +14,7 @@
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "opal/include/opal/align.h"
|
||||||
#include "ompi/op/op.h"
|
#include "ompi/op/op.h"
|
||||||
|
|
||||||
#include "nbc_internal.h"
|
#include "nbc_internal.h"
|
||||||
@ -104,8 +105,9 @@ int ompi_coll_libnbc_ireduce(const void* sendbuf, void* recvbuf, int count, MPI_
|
|||||||
redbuf = recvbuf;
|
redbuf = recvbuf;
|
||||||
} else {
|
} else {
|
||||||
/* recvbuf may not be valid on non-root nodes */
|
/* recvbuf may not be valid on non-root nodes */
|
||||||
handle->tmpbuf = malloc (2*span);
|
ptrdiff_t span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t);
|
||||||
redbuf = (char*) handle->tmpbuf + span - gap;
|
handle->tmpbuf = malloc (span_align + span);
|
||||||
|
redbuf = (char*) handle->tmpbuf + span_align - gap;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
handle->tmpbuf = malloc (span);
|
handle->tmpbuf = malloc (span);
|
||||||
|
@ -16,6 +16,8 @@
|
|||||||
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
|
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
#include "opal/include/opal/align.h"
|
||||||
|
|
||||||
#include "nbc_internal.h"
|
#include "nbc_internal.h"
|
||||||
|
|
||||||
/* an reduce_csttare schedule can not be cached easily because the contents
|
/* an reduce_csttare schedule can not be cached easily because the contents
|
||||||
@ -40,7 +42,7 @@ int ompi_coll_libnbc_ireduce_scatter(const void* sendbuf, void* recvbuf, const i
|
|||||||
struct mca_coll_base_module_2_1_0_t *module) {
|
struct mca_coll_base_module_2_1_0_t *module) {
|
||||||
int peer, rank, maxr, p, res, count;
|
int peer, rank, maxr, p, res, count;
|
||||||
MPI_Aint ext;
|
MPI_Aint ext;
|
||||||
ptrdiff_t gap, span;
|
ptrdiff_t gap, span, span_align;
|
||||||
char *sbuf, inplace;
|
char *sbuf, inplace;
|
||||||
NBC_Schedule *schedule;
|
NBC_Schedule *schedule;
|
||||||
NBC_Handle *handle;
|
NBC_Handle *handle;
|
||||||
@ -84,14 +86,15 @@ int ompi_coll_libnbc_ireduce_scatter(const void* sendbuf, void* recvbuf, const i
|
|||||||
maxr = (int) ceil ((log((double) p) / LOG2));
|
maxr = (int) ceil ((log((double) p) / LOG2));
|
||||||
|
|
||||||
span = opal_datatype_span(&datatype->super, count, &gap);
|
span = opal_datatype_span(&datatype->super, count, &gap);
|
||||||
handle->tmpbuf = malloc (span * 2);
|
span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t);
|
||||||
|
handle->tmpbuf = malloc (span_align + span);
|
||||||
if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) {
|
if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) {
|
||||||
NBC_Return_handle (handle);
|
NBC_Return_handle (handle);
|
||||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
}
|
}
|
||||||
|
|
||||||
rbuf = (char *)(-gap);
|
rbuf = (char *)(-gap);
|
||||||
lbuf = (char *)(span - gap);
|
lbuf = (char *)(span_align - gap);
|
||||||
|
|
||||||
schedule = OBJ_NEW(NBC_Schedule);
|
schedule = OBJ_NEW(NBC_Schedule);
|
||||||
if (OPAL_UNLIKELY(NULL == schedule)) {
|
if (OPAL_UNLIKELY(NULL == schedule)) {
|
||||||
@ -205,7 +208,7 @@ int ompi_coll_libnbc_ireduce_scatter_inter (const void* sendbuf, void* recvbuf,
|
|||||||
struct mca_coll_base_module_2_1_0_t *module) {
|
struct mca_coll_base_module_2_1_0_t *module) {
|
||||||
int rank, res, count, lsize, rsize;
|
int rank, res, count, lsize, rsize;
|
||||||
MPI_Aint ext;
|
MPI_Aint ext;
|
||||||
ptrdiff_t gap, span;
|
ptrdiff_t gap, span, span_align;
|
||||||
NBC_Schedule *schedule;
|
NBC_Schedule *schedule;
|
||||||
NBC_Handle *handle;
|
NBC_Handle *handle;
|
||||||
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
|
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
|
||||||
@ -226,6 +229,7 @@ int ompi_coll_libnbc_ireduce_scatter_inter (const void* sendbuf, void* recvbuf,
|
|||||||
}
|
}
|
||||||
|
|
||||||
span = opal_datatype_span(&datatype->super, count, &gap);
|
span = opal_datatype_span(&datatype->super, count, &gap);
|
||||||
|
span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t);
|
||||||
|
|
||||||
res = NBC_Init_handle(comm, &handle, libnbc_module);
|
res = NBC_Init_handle(comm, &handle, libnbc_module);
|
||||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
|
||||||
@ -233,7 +237,7 @@ int ompi_coll_libnbc_ireduce_scatter_inter (const void* sendbuf, void* recvbuf,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (count > 0) {
|
if (count > 0) {
|
||||||
handle->tmpbuf = malloc (2 * span);
|
handle->tmpbuf = malloc (span_align + span);
|
||||||
if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) {
|
if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) {
|
||||||
NBC_Return_handle (handle);
|
NBC_Return_handle (handle);
|
||||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
@ -259,7 +263,7 @@ int ompi_coll_libnbc_ireduce_scatter_inter (const void* sendbuf, void* recvbuf,
|
|||||||
if (0 == rank) {
|
if (0 == rank) {
|
||||||
char *lbuf, *rbuf;
|
char *lbuf, *rbuf;
|
||||||
lbuf = (char *)(-gap);
|
lbuf = (char *)(-gap);
|
||||||
rbuf = (char *)(span-gap);
|
rbuf = (char *)(span_align-gap);
|
||||||
res = NBC_Sched_recv (lbuf, true, count, datatype, 0, schedule, true);
|
res = NBC_Sched_recv (lbuf, true, count, datatype, 0, schedule, true);
|
||||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
|
||||||
NBC_Return_handle (handle);
|
NBC_Return_handle (handle);
|
||||||
|
@ -14,6 +14,8 @@
|
|||||||
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
|
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
#include "opal/include/opal/align.h"
|
||||||
|
|
||||||
#include "nbc_internal.h"
|
#include "nbc_internal.h"
|
||||||
|
|
||||||
/* an reduce_csttare schedule can not be cached easily because the contents
|
/* an reduce_csttare schedule can not be cached easily because the contents
|
||||||
@ -75,9 +77,11 @@ int ompi_coll_libnbc_ireduce_scatter_block(const void* sendbuf, void* recvbuf, i
|
|||||||
|
|
||||||
if (0 < count) {
|
if (0 < count) {
|
||||||
char *rbuf, *lbuf, *buf;
|
char *rbuf, *lbuf, *buf;
|
||||||
|
ptrdiff_t span_align;
|
||||||
|
|
||||||
span = opal_datatype_span(&datatype->super, count, &gap);
|
span = opal_datatype_span(&datatype->super, count, &gap);
|
||||||
handle->tmpbuf = malloc (2*span);
|
span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t);
|
||||||
|
handle->tmpbuf = malloc (span_align + span);
|
||||||
if (NULL == handle->tmpbuf) {
|
if (NULL == handle->tmpbuf) {
|
||||||
OMPI_COLL_LIBNBC_REQUEST_RETURN(handle);
|
OMPI_COLL_LIBNBC_REQUEST_RETURN(handle);
|
||||||
OBJ_RELEASE(schedule);
|
OBJ_RELEASE(schedule);
|
||||||
@ -85,8 +89,8 @@ int ompi_coll_libnbc_ireduce_scatter_block(const void* sendbuf, void* recvbuf, i
|
|||||||
}
|
}
|
||||||
|
|
||||||
rbuf = (void *)(-gap);
|
rbuf = (void *)(-gap);
|
||||||
lbuf = (char *)(span - gap);
|
lbuf = (char *)(span_align - gap);
|
||||||
redbuf = (char *) handle->tmpbuf + span - gap;
|
redbuf = (char *) handle->tmpbuf + span_align - gap;
|
||||||
|
|
||||||
/* copy data to redbuf if we only have a single node */
|
/* copy data to redbuf if we only have a single node */
|
||||||
if ((p == 1) && !inplace) {
|
if ((p == 1) && !inplace) {
|
||||||
@ -206,7 +210,7 @@ int ompi_coll_libnbc_ireduce_scatter_block_inter(const void *sendbuf, void *recv
|
|||||||
ompi_request_t **request, struct mca_coll_base_module_2_1_0_t *module) {
|
ompi_request_t **request, struct mca_coll_base_module_2_1_0_t *module) {
|
||||||
int rank, res, count, lsize, rsize;
|
int rank, res, count, lsize, rsize;
|
||||||
MPI_Aint ext;
|
MPI_Aint ext;
|
||||||
ptrdiff_t gap, span;
|
ptrdiff_t gap, span, span_align;
|
||||||
NBC_Schedule *schedule;
|
NBC_Schedule *schedule;
|
||||||
NBC_Handle *handle;
|
NBC_Handle *handle;
|
||||||
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
|
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
|
||||||
@ -229,9 +233,10 @@ int ompi_coll_libnbc_ireduce_scatter_block_inter(const void *sendbuf, void *recv
|
|||||||
count = rcount * lsize;
|
count = rcount * lsize;
|
||||||
|
|
||||||
span = opal_datatype_span(&dtype->super, count, &gap);
|
span = opal_datatype_span(&dtype->super, count, &gap);
|
||||||
|
span_align = OPAL_ALIGN(span, dtype->super.align, ptrdiff_t);
|
||||||
|
|
||||||
if (count > 0) {
|
if (count > 0) {
|
||||||
handle->tmpbuf = malloc (2 * span);
|
handle->tmpbuf = malloc (span_align + span);
|
||||||
if (NULL == handle->tmpbuf) {
|
if (NULL == handle->tmpbuf) {
|
||||||
NBC_Return_handle (handle);
|
NBC_Return_handle (handle);
|
||||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
@ -257,7 +262,7 @@ int ompi_coll_libnbc_ireduce_scatter_block_inter(const void *sendbuf, void *recv
|
|||||||
if (0 == rank) {
|
if (0 == rank) {
|
||||||
char *lbuf, *rbuf;
|
char *lbuf, *rbuf;
|
||||||
lbuf = (char *)(-gap);
|
lbuf = (char *)(-gap);
|
||||||
rbuf = (char *)(span-gap);
|
rbuf = (char *)(span_align-gap);
|
||||||
res = NBC_Sched_recv (lbuf, true, count, dtype, 0, schedule, true);
|
res = NBC_Sched_recv (lbuf, true, count, dtype, 0, schedule, true);
|
||||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
|
||||||
NBC_Return_handle (handle);
|
NBC_Return_handle (handle);
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user