1
1
true_lb while computing the lower bound.
Этот коммит содержится в:
George Bosilca 2014-11-13 23:22:01 -05:00 коммит произвёл Gilles Gouaillardet
родитель 705147e98b
Коммит d622db783d
20 изменённых файлов: 72 добавлений и 68 удалений

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
* Corporation. All rights reserved. * Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University * Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights * of Tennessee Research Foundation. All rights
* reserved. * reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -80,6 +80,7 @@ mca_coll_basic_allreduce_inter(void *sbuf, void *rbuf, int count,
{ {
int err, i, rank, root = 0, rsize; int err, i, rank, root = 0, rsize;
ptrdiff_t lb, extent; ptrdiff_t lb, extent;
ptrdiff_t true_lb, true_extent;
char *tmpbuf = NULL, *pml_buffer = NULL; char *tmpbuf = NULL, *pml_buffer = NULL;
ompi_request_t *req[2]; ompi_request_t *req[2];
mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module; mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
@ -102,12 +103,16 @@ mca_coll_basic_allreduce_inter(void *sbuf, void *rbuf, int count,
if (OMPI_SUCCESS != err) { if (OMPI_SUCCESS != err) {
return OMPI_ERROR; return OMPI_ERROR;
} }
err = ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent);
if (OMPI_SUCCESS != err) {
return OMPI_ERROR;
}
tmpbuf = (char *) malloc(count * extent); tmpbuf = (char *) malloc(true_extent + (count - 1) * extent);
if (NULL == tmpbuf) { if (NULL == tmpbuf) {
return OMPI_ERR_OUT_OF_RESOURCE; return OMPI_ERR_OUT_OF_RESOURCE;
} }
pml_buffer = tmpbuf - lb; pml_buffer = tmpbuf - true_lb;
/* Do a send-recv between the two root procs. to avoid deadlock */ /* Do a send-recv between the two root procs. to avoid deadlock */
err = MCA_PML_CALL(irecv(rbuf, count, dtype, 0, err = MCA_PML_CALL(irecv(rbuf, count, dtype, 0,

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
* Corporation. All rights reserved. * Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University * Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights * of Tennessee Research Foundation. All rights
* reserved. * reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -88,7 +88,7 @@ mca_coll_basic_exscan_intra(void *sbuf, void *rbuf, int count,
if (NULL == free_buffer) { if (NULL == free_buffer) {
return OMPI_ERR_OUT_OF_RESOURCE; return OMPI_ERR_OUT_OF_RESOURCE;
} }
reduce_buffer = free_buffer - lb; reduce_buffer = free_buffer - true_lb;
err = ompi_datatype_copy_content_same_ddt(dtype, count, err = ompi_datatype_copy_content_same_ddt(dtype, count,
reduce_buffer, (char*)sbuf); reduce_buffer, (char*)sbuf);

Просмотреть файл

@ -81,8 +81,7 @@ mca_coll_basic_reduce_lin_intra(void *sbuf, void *rbuf, int count,
* in the layout pattern in the datatype. * in the layout pattern in the datatype.
* *
* For example, consider the following buffer (just talking about * For example, consider the following buffer (just talking about
* LB, extent, and true extent -- extrapolate for UB; i.e., assume * true_lb, extent, and true extent -- extrapolate for true_ub:
* the UB equals exactly where the data ends):
* *
* A B C * A B C
* -------------------------------------------------------- * --------------------------------------------------------
@ -96,7 +95,7 @@ mca_coll_basic_reduce_lin_intra(void *sbuf, void *rbuf, int count,
* *
* - extent: C-A * - extent: C-A
* - true extent: C-A * - true extent: C-A
* - LB: 0 * - true_lb: 0
* *
* A C * A C
* -------------------------------------------------------- * --------------------------------------------------------
@ -110,7 +109,7 @@ mca_coll_basic_reduce_lin_intra(void *sbuf, void *rbuf, int count,
* *
* - extent: C-A * - extent: C-A
* - true extent: C-B * - true extent: C-B
* - LB: positive * - true_lb: positive
* *
* A B C * A B C
* -------------------------------------------------------- * --------------------------------------------------------
@ -124,7 +123,7 @@ mca_coll_basic_reduce_lin_intra(void *sbuf, void *rbuf, int count,
* *
* - extent: C-A * - extent: C-A
* - true extent: C-A * - true extent: C-A
* - LB: negative * - true_lb: negative
* *
* A B C * A B C
* -------------------------------------------------------- * --------------------------------------------------------
@ -139,7 +138,7 @@ mca_coll_basic_reduce_lin_intra(void *sbuf, void *rbuf, int count,
* *
* - extent: C-MPI_BOTTOM * - extent: C-MPI_BOTTOM
* - true extent: C-B * - true extent: C-B
* - LB: [potentially very large] positive * - true_lb: [potentially very large] positive
* *
* MPI_BOTTOM B C * MPI_BOTTOM B C
* -------------------------------------------------------- * --------------------------------------------------------
@ -165,19 +164,19 @@ mca_coll_basic_reduce_lin_intra(void *sbuf, void *rbuf, int count,
* *
* 2. If B is what we get back from malloc, but we give A to * 2. If B is what we get back from malloc, but we give A to
* MPI_Send, then the buffer range [A,B) represents "dead space" * MPI_Send, then the buffer range [A,B) represents "dead space"
* -- no data will be put there. So it's safe to give B-LB to * -- no data will be put there. So it's safe to give B-true_lb to
* MPI_Send. More specifically, the LB is positive, so B-LB is * MPI_Send. More specifically, the true_lb is positive, so B-true_lb is
* actually A. * actually A.
* *
* 3. If A is what we get back from malloc, and B is what we give to * 3. If A is what we get back from malloc, and B is what we give to
* MPI_Send, then the LB is negative, so A-LB will actually equal * MPI_Send, then the true_lb is negative, so A-true_lb will actually equal
* B. * B.
* *
* 4. Although this seems like the weirdest case, it's actually * 4. Although this seems like the weirdest case, it's actually
* quite similar to case #2 -- the pointer we give to MPI_Send is * quite similar to case #2 -- the pointer we give to MPI_Send is
* smaller than the pointer we got back from malloc(). * smaller than the pointer we got back from malloc().
* *
* Hence, in all cases, we give (return_from_malloc - LB) to MPI_Send. * Hence, in all cases, we give (return_from_malloc - true_lb) to MPI_Send.
* *
* This works fine and dandy if we only have (count==1), which we * This works fine and dandy if we only have (count==1), which we
* rarely do. ;-) So we really need to allocate (true_extent + * rarely do. ;-) So we really need to allocate (true_extent +
@ -197,7 +196,7 @@ mca_coll_basic_reduce_lin_intra(void *sbuf, void *rbuf, int count,
if (NULL == inplace_temp) { if (NULL == inplace_temp) {
return OMPI_ERR_OUT_OF_RESOURCE; return OMPI_ERR_OUT_OF_RESOURCE;
} }
rbuf = inplace_temp - lb; rbuf = inplace_temp - true_lb;
} }
if (size > 1) { if (size > 1) {
@ -208,7 +207,7 @@ mca_coll_basic_reduce_lin_intra(void *sbuf, void *rbuf, int count,
} }
return OMPI_ERR_OUT_OF_RESOURCE; return OMPI_ERR_OUT_OF_RESOURCE;
} }
pml_buffer = free_buffer - lb; pml_buffer = free_buffer - true_lb;
} }
/* Initialize the receive buffer. */ /* Initialize the receive buffer. */
@ -361,7 +360,7 @@ mca_coll_basic_reduce_log_intra(void *sbuf, void *rbuf, int count,
return OMPI_ERR_OUT_OF_RESOURCE; return OMPI_ERR_OUT_OF_RESOURCE;
} }
pml_buffer = free_buffer - lb; pml_buffer = free_buffer - true_lb;
/* read the comment about commutative operations (few lines down /* read the comment about commutative operations (few lines down
* the page) */ * the page) */
if (ompi_op_is_commute(op)) { if (ompi_op_is_commute(op)) {
@ -377,7 +376,7 @@ mca_coll_basic_reduce_log_intra(void *sbuf, void *rbuf, int count,
err = OMPI_ERR_OUT_OF_RESOURCE; err = OMPI_ERR_OUT_OF_RESOURCE;
goto cleanup_and_return; goto cleanup_and_return;
} }
sbuf = inplace_temp - lb; sbuf = inplace_temp - true_lb;
err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)sbuf, (char*)rbuf); err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)sbuf, (char*)rbuf);
} }
snd_buffer = (char*)sbuf; snd_buffer = (char*)sbuf;
@ -391,7 +390,7 @@ mca_coll_basic_reduce_log_intra(void *sbuf, void *rbuf, int count,
err = OMPI_ERR_OUT_OF_RESOURCE; err = OMPI_ERR_OUT_OF_RESOURCE;
goto cleanup_and_return; goto cleanup_and_return;
} }
rbuf = free_rbuf - lb; rbuf = free_rbuf - true_lb;
} }
/* Loop over cube dimensions. High processes send to low ones in the /* Loop over cube dimensions. High processes send to low ones in the
@ -546,7 +545,7 @@ mca_coll_basic_reduce_lin_inter(void *sbuf, void *rbuf, int count,
if (NULL == free_buffer) { if (NULL == free_buffer) {
return OMPI_ERR_OUT_OF_RESOURCE; return OMPI_ERR_OUT_OF_RESOURCE;
} }
pml_buffer = free_buffer - lb; pml_buffer = free_buffer - true_lb;
/* Initialize the receive buffer. */ /* Initialize the receive buffer. */

Просмотреть файл

@ -111,7 +111,7 @@ mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts,
/* temporary receive buffer. See coll_basic_reduce.c for details on sizing */ /* temporary receive buffer. See coll_basic_reduce.c for details on sizing */
recv_buf_free = (char*) malloc(buf_size); recv_buf_free = (char*) malloc(buf_size);
recv_buf = recv_buf_free - lb; recv_buf = recv_buf_free - true_lb;
if (NULL == recv_buf_free) { if (NULL == recv_buf_free) {
err = OMPI_ERR_OUT_OF_RESOURCE; err = OMPI_ERR_OUT_OF_RESOURCE;
goto cleanup; goto cleanup;
@ -119,7 +119,7 @@ mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts,
/* allocate temporary buffer for results */ /* allocate temporary buffer for results */
result_buf_free = (char*) malloc(buf_size); result_buf_free = (char*) malloc(buf_size);
result_buf = result_buf_free - lb; result_buf = result_buf_free - true_lb;
/* copy local buffer into the temporary results */ /* copy local buffer into the temporary results */
err = ompi_datatype_sndrcv(sbuf, count, dtype, result_buf, count, dtype); err = ompi_datatype_sndrcv(sbuf, count, dtype, result_buf, count, dtype);
@ -323,7 +323,7 @@ mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts,
/* temporary receive buffer. See coll_basic_reduce.c for /* temporary receive buffer. See coll_basic_reduce.c for
details on sizing */ details on sizing */
recv_buf_free = (char*) malloc(buf_size); recv_buf_free = (char*) malloc(buf_size);
recv_buf = recv_buf_free - lb; recv_buf = recv_buf_free - true_lb;
if (NULL == recv_buf_free) { if (NULL == recv_buf_free) {
err = OMPI_ERR_OUT_OF_RESOURCE; err = OMPI_ERR_OUT_OF_RESOURCE;
goto cleanup; goto cleanup;

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
* Corporation. All rights reserved. * Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University * Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights * of Tennessee Research Foundation. All rights
* reserved. * reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -85,7 +85,7 @@ mca_coll_basic_reduce_scatter_block_intra(void *sbuf, void *rbuf, int rcount,
/* temporary receive buffer. See coll_basic_reduce.c for /* temporary receive buffer. See coll_basic_reduce.c for
details on sizing */ details on sizing */
recv_buf_free = (char*) malloc(buf_size); recv_buf_free = (char*) malloc(buf_size);
recv_buf = recv_buf_free - lb; recv_buf = recv_buf_free - true_lb;
if (NULL == recv_buf_free) { if (NULL == recv_buf_free) {
err = OMPI_ERR_OUT_OF_RESOURCE; err = OMPI_ERR_OUT_OF_RESOURCE;
goto cleanup; goto cleanup;

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
* Corporation. All rights reserved. * Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University * Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights * of Tennessee Research Foundation. All rights
* reserved. * reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -79,7 +79,7 @@ mca_coll_basic_scan_intra(void *sbuf, void *rbuf, int count,
if (NULL == free_buffer) { if (NULL == free_buffer) {
return OMPI_ERR_OUT_OF_RESOURCE; return OMPI_ERR_OUT_OF_RESOURCE;
} }
pml_buffer = free_buffer - lb; pml_buffer = free_buffer - true_lb;
/* Copy the send buffer into the receive buffer. */ /* Copy the send buffer into the receive buffer. */

Просмотреть файл

@ -50,7 +50,7 @@ mca_coll_cuda_allreduce(void *sbuf, void *rbuf, int count,
} }
opal_cuda_memcpy_sync(sbuf1, sbuf, bufsize); opal_cuda_memcpy_sync(sbuf1, sbuf, bufsize);
sbuf2 = sbuf; /* save away original buffer */ sbuf2 = sbuf; /* save away original buffer */
sbuf = sbuf1 - lb; sbuf = sbuf1 - true_lb;
} }
if (opal_cuda_check_bufs(rbuf, NULL)) { if (opal_cuda_check_bufs(rbuf, NULL)) {
@ -61,7 +61,7 @@ mca_coll_cuda_allreduce(void *sbuf, void *rbuf, int count,
} }
opal_cuda_memcpy_sync(rbuf1, rbuf, bufsize); opal_cuda_memcpy_sync(rbuf1, rbuf, bufsize);
rbuf2 = rbuf; /* save away original buffer */ rbuf2 = rbuf; /* save away original buffer */
rbuf = rbuf1 - lb; rbuf = rbuf1 - true_lb;
} }
rc = s->c_coll.coll_allreduce(sbuf, rbuf, count, dtype, op, comm, s->c_coll.coll_allreduce_module); rc = s->c_coll.coll_allreduce(sbuf, rbuf, count, dtype, op, comm, s->c_coll.coll_allreduce_module);
if (NULL != sbuf1) { if (NULL != sbuf1) {

Просмотреть файл

@ -42,7 +42,7 @@ int mca_coll_cuda_exscan(void *sbuf, void *rbuf, int count,
} }
opal_cuda_memcpy_sync(sbuf1, sbuf, bufsize); opal_cuda_memcpy_sync(sbuf1, sbuf, bufsize);
sbuf2 = sbuf; /* save away original buffer */ sbuf2 = sbuf; /* save away original buffer */
sbuf = sbuf1 - lb; sbuf = sbuf1 - true_lb;
} }
if (opal_cuda_check_bufs(rbuf, NULL)) { if (opal_cuda_check_bufs(rbuf, NULL)) {
@ -53,7 +53,7 @@ int mca_coll_cuda_exscan(void *sbuf, void *rbuf, int count,
} }
opal_cuda_memcpy_sync(rbuf1, rbuf, bufsize); opal_cuda_memcpy_sync(rbuf1, rbuf, bufsize);
rbuf2 = rbuf; /* save away original buffer */ rbuf2 = rbuf; /* save away original buffer */
rbuf = rbuf1 - lb; rbuf = rbuf1 - true_lb;
} }
rc = s->c_coll.coll_exscan(sbuf, rbuf, count, dtype, op, comm, rc = s->c_coll.coll_exscan(sbuf, rbuf, count, dtype, op, comm,

Просмотреть файл

@ -55,7 +55,7 @@ mca_coll_cuda_reduce_scatter_block(void *sbuf, void *rbuf, int rcount,
} }
opal_cuda_memcpy_sync(sbuf1, sbuf, sbufsize); opal_cuda_memcpy_sync(sbuf1, sbuf, sbufsize);
sbuf2 = sbuf; /* save away original buffer */ sbuf2 = sbuf; /* save away original buffer */
sbuf = sbuf1 - lb; sbuf = sbuf1 - true_lb;
} }
if (opal_cuda_check_bufs(rbuf, NULL)) { if (opal_cuda_check_bufs(rbuf, NULL)) {
@ -66,7 +66,7 @@ mca_coll_cuda_reduce_scatter_block(void *sbuf, void *rbuf, int rcount,
} }
opal_cuda_memcpy_sync(rbuf1, rbuf, rbufsize); opal_cuda_memcpy_sync(rbuf1, rbuf, rbufsize);
rbuf2 = rbuf; /* save away original buffer */ rbuf2 = rbuf; /* save away original buffer */
rbuf = rbuf1 - lb; rbuf = rbuf1 - true_lb;
} }
rc = s->c_coll.coll_reduce_scatter_block(sbuf, rbuf, rcount, dtype, op, comm, rc = s->c_coll.coll_reduce_scatter_block(sbuf, rbuf, rcount, dtype, op, comm,
s->c_coll.coll_reduce_scatter_block_module); s->c_coll.coll_reduce_scatter_block_module);

Просмотреть файл

@ -49,7 +49,7 @@ int mca_coll_cuda_scan(void *sbuf, void *rbuf, int count,
} }
opal_cuda_memcpy_sync(sbuf1, sbuf, bufsize); opal_cuda_memcpy_sync(sbuf1, sbuf, bufsize);
sbuf2 = sbuf; /* save away original buffer */ sbuf2 = sbuf; /* save away original buffer */
sbuf = sbuf1 - lb; sbuf = sbuf1 - true_lb;
} }
if (opal_cuda_check_bufs(rbuf, NULL)) { if (opal_cuda_check_bufs(rbuf, NULL)) {
@ -60,7 +60,7 @@ int mca_coll_cuda_scan(void *sbuf, void *rbuf, int count,
} }
opal_cuda_memcpy_sync(rbuf1, rbuf, bufsize); opal_cuda_memcpy_sync(rbuf1, rbuf, bufsize);
rbuf2 = rbuf; /* save away original buffer */ rbuf2 = rbuf; /* save away original buffer */
rbuf = rbuf1 - lb; rbuf = rbuf1 - true_lb;
} }
rc = s->c_coll.coll_scan(sbuf, rbuf, count, dtype, op, comm, rc = s->c_coll.coll_scan(sbuf, rbuf, count, dtype, op, comm,
s->c_coll.coll_scan_module); s->c_coll.coll_scan_module);

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
* Corporation. All rights reserved. * Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University * Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights * of Tennessee Research Foundation. All rights
* reserved. * reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -70,7 +70,7 @@ int mca_coll_hierarch_allreduce_intra(void *sbuf, void *rbuf, int count,
if (NULL == tbuf) { if (NULL == tbuf) {
return OMPI_ERR_OUT_OF_RESOURCE; return OMPI_ERR_OUT_OF_RESOURCE;
} }
tmpbuf = tbuf - lb; tmpbuf = tbuf - true_lb;
if ( MPI_IN_PLACE != sbuf ) { if ( MPI_IN_PLACE != sbuf ) {
ret = lcomm->c_coll.coll_reduce (sbuf, tmpbuf, count, dtype, ret = lcomm->c_coll.coll_reduce (sbuf, tmpbuf, count, dtype,

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
* Corporation. All rights reserved. * Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University * Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights * of Tennessee Research Foundation. All rights
* reserved. * reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -69,7 +69,7 @@ int mca_coll_hierarch_reduce_intra(void *sbuf, void *rbuf, int count,
if (NULL == tbuf) { if (NULL == tbuf) {
return OMPI_ERR_OUT_OF_RESOURCE; return OMPI_ERR_OUT_OF_RESOURCE;
} }
tmpbuf = tbuf - lb; tmpbuf = tbuf - true_lb;
if ( MPI_IN_PLACE != sbuf ) { if ( MPI_IN_PLACE != sbuf ) {

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
* Corporation. All rights reserved. * Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University * Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights * of Tennessee Research Foundation. All rights
* reserved. * reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -64,7 +64,7 @@ mca_coll_inter_reduce_inter(void *sbuf, void *rbuf, int count,
if (NULL == free_buffer) { if (NULL == free_buffer) {
return OMPI_ERR_OUT_OF_RESOURCE; return OMPI_ERR_OUT_OF_RESOURCE;
} }
pml_buffer = free_buffer - lb; pml_buffer = free_buffer - true_lb;
err = comm->c_local_comm->c_coll.coll_reduce(sbuf, pml_buffer, count, err = comm->c_local_comm->c_coll.coll_reduce(sbuf, pml_buffer, count,
dtype, op, 0, comm->c_local_comm, dtype, op, 0, comm->c_local_comm,

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
* Corporation. All rights reserved. * Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University * Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights * of Tennessee Research Foundation. All rights
* reserved. * reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -271,7 +271,7 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count,
if (NULL == free_buffer) { if (NULL == free_buffer) {
return OMPI_ERR_OUT_OF_RESOURCE; return OMPI_ERR_OUT_OF_RESOURCE;
} }
reduce_temp_buffer = free_buffer - lb; reduce_temp_buffer = free_buffer - true_lb;
/* Trickery here: we use a potentially smaller count than /* Trickery here: we use a potentially smaller count than
the user count -- use the largest count that is <= the user count -- use the largest count that is <=
@ -319,7 +319,7 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count,
} }
return OMPI_ERR_OUT_OF_RESOURCE; return OMPI_ERR_OUT_OF_RESOURCE;
} }
sbuf = inplace_temp - lb; sbuf = inplace_temp - true_lb;
ompi_datatype_copy_content_same_ddt(dtype, count, (char *) sbuf, (char *) rbuf); ompi_datatype_copy_content_same_ddt(dtype, count, (char *) sbuf, (char *) rbuf);
} else { } else {
inplace_temp = NULL; inplace_temp = NULL;

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
* Corporation. All rights reserved. * Corporation. All rights reserved.
* Copyright (c) 2004-2012 The University of Tennessee and The University * Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights * of Tennessee Research Foundation. All rights
* reserved. * reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -181,8 +181,8 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
- create temporary shift buffer, - create temporary shift buffer,
see discussion in coll_basic_reduce.c about the size and begining see discussion in coll_basic_reduce.c about the size and begining
of temporary buffer. of temporary buffer.
- copy blocks [0 .. (size - rank - 1)] in rbuf to shift buffer - copy blocks [0 .. (size - rank - 1)] from rbuf to shift buffer
- move blocks [(size - rank) .. size] in rbuf to begining of rbuf - move blocks [(size - rank) .. size] from rbuf to begining of rbuf
- copy blocks from shift buffer starting at block [rank] in rbuf. - copy blocks from shift buffer starting at block [rank] in rbuf.
*/ */
if (0 != rank) { if (0 != rank) {
@ -192,14 +192,14 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
err = ompi_datatype_get_true_extent(rdtype, &true_lb, &true_extent); err = ompi_datatype_get_true_extent(rdtype, &true_lb, &true_extent);
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; } if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
free_buf = (char*) calloc(((true_extent + free_buf = (char*) calloc(((true_extent +
((ptrdiff_t)(size - rank) * (ptrdiff_t)rcount - 1) * rext)), ((ptrdiff_t)(size - rank) * (ptrdiff_t)rcount - 1) * rext)),
sizeof(char)); sizeof(char));
if (NULL == free_buf) { if (NULL == free_buf) {
line = __LINE__; err = OMPI_ERR_OUT_OF_RESOURCE; goto err_hndl; line = __LINE__; err = OMPI_ERR_OUT_OF_RESOURCE; goto err_hndl;
} }
shift_buf = free_buf - true_lb; shift_buf = free_buf - true_lb;
/* 1. copy blocks [0 .. (size - rank - 1)] from rbuf to shift buffer */ /* 1. copy blocks [0 .. (size - rank - 1)] from rbuf to shift buffer */
err = ompi_datatype_copy_content_same_ddt(rdtype, ((ptrdiff_t)(size - rank) * (ptrdiff_t)rcount), err = ompi_datatype_copy_content_same_ddt(rdtype, ((ptrdiff_t)(size - rank) * (ptrdiff_t)rcount),
shift_buf, rbuf); shift_buf, rbuf);

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
* Corporation. All rights reserved. * Corporation. All rights reserved.
* Copyright (c) 2004-2012 The University of Tennessee and The University * Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights * of Tennessee Research Foundation. All rights
* reserved. * reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -412,8 +412,8 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
Early blocks are at most 1 element larger than the late ones. Early blocks are at most 1 element larger than the late ones.
*/ */
COLL_TUNED_COMPUTE_BLOCKCOUNT( count, size, split_rank, COLL_TUNED_COMPUTE_BLOCKCOUNT( count, size, split_rank,
early_segcount, late_segcount ) early_segcount, late_segcount );
max_segcount = early_segcount; max_segcount = early_segcount;
max_real_segsize = true_extent + (max_segcount - 1) * extent; max_real_segsize = true_extent + (max_segcount - 1) * extent;
@ -698,10 +698,10 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
out of the largest one will have max_segcount elements. out of the largest one will have max_segcount elements.
*/ */
COLL_TUNED_COMPUTE_BLOCKCOUNT( count, size, split_rank, COLL_TUNED_COMPUTE_BLOCKCOUNT( count, size, split_rank,
early_blockcount, late_blockcount ) early_blockcount, late_blockcount );
COLL_TUNED_COMPUTE_BLOCKCOUNT( early_blockcount, num_phases, inbi, COLL_TUNED_COMPUTE_BLOCKCOUNT( early_blockcount, num_phases, inbi,
max_segcount, k) max_segcount, k);
max_real_segsize = true_extent + (ptrdiff_t)(max_segcount - 1) * extent; max_real_segsize = true_extent + (ptrdiff_t)(max_segcount - 1) * extent;
/* Allocate and initialize temporary buffers */ /* Allocate and initialize temporary buffers */
inbuf[0] = (char*)malloc(max_real_segsize); inbuf[0] = (char*)malloc(max_real_segsize);

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
* Corporation. All rights reserved. * Corporation. All rights reserved.
* Copyright (c) 2004-2012 The University of Tennessee and The University * Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights * of Tennessee Research Foundation. All rights
* reserved. * reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -102,7 +102,7 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
err= OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl; err= OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl;
} }
ptmp = tempbuf - rlb; ptmp = tempbuf - rtrue_lb;
if (sbuf != MPI_IN_PLACE) { if (sbuf != MPI_IN_PLACE) {
/* copy from sbuf to temp buffer */ /* copy from sbuf to temp buffer */
err = ompi_datatype_sndrcv(sbuf, scount, sdtype, err = ompi_datatype_sndrcv(sbuf, scount, sdtype,
@ -125,7 +125,7 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
err= OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl; err= OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl;
} }
ptmp = tempbuf - slb; ptmp = tempbuf - strue_lb;
/* local copy to tempbuf */ /* local copy to tempbuf */
err = ompi_datatype_sndrcv(sbuf, scount, sdtype, err = ompi_datatype_sndrcv(sbuf, scount, sdtype,
ptmp, scount, sdtype); ptmp, scount, sdtype);

Просмотреть файл

@ -656,7 +656,7 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
if (NULL == inplace_temp) { if (NULL == inplace_temp) {
return OMPI_ERR_OUT_OF_RESOURCE; return OMPI_ERR_OUT_OF_RESOURCE;
} }
rbuf = inplace_temp - lb; rbuf = inplace_temp - true_lb;
} }
if (size > 1) { if (size > 1) {
@ -665,7 +665,7 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
err = OMPI_ERR_OUT_OF_RESOURCE; err = OMPI_ERR_OUT_OF_RESOURCE;
goto exit; goto exit;
} }
pml_buffer = free_buffer - lb; pml_buffer = free_buffer - true_lb;
} }
/* Initialize the receive buffer. */ /* Initialize the receive buffer. */

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
* Corporation. All rights reserved. * Corporation. All rights reserved.
* Copyright (c) 2004-2012 The University of Tennessee and The University * Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights * of Tennessee Research Foundation. All rights
* reserved. * reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -186,7 +186,7 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
/* Allocate temporary receive buffer. */ /* Allocate temporary receive buffer. */
recv_buf_free = (char*) malloc(buf_size); recv_buf_free = (char*) malloc(buf_size);
recv_buf = recv_buf_free - lb; recv_buf = recv_buf_free - true_lb;
if (NULL == recv_buf_free) { if (NULL == recv_buf_free) {
err = OMPI_ERR_OUT_OF_RESOURCE; err = OMPI_ERR_OUT_OF_RESOURCE;
goto cleanup; goto cleanup;
@ -194,7 +194,7 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
/* allocate temporary buffer for results */ /* allocate temporary buffer for results */
result_buf_free = (char*) malloc(buf_size); result_buf_free = (char*) malloc(buf_size);
result_buf = result_buf_free - lb; result_buf = result_buf_free - true_lb;
/* copy local buffer into the temporary results */ /* copy local buffer into the temporary results */
err = ompi_datatype_sndrcv(sbuf, count, dtype, result_buf, count, dtype); err = ompi_datatype_sndrcv(sbuf, count, dtype, result_buf, count, dtype);

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
* Corporation. All rights reserved. * Corporation. All rights reserved.
* Copyright (c) 2004-2012 The University of Tennessee and The University * Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights * of Tennessee Research Foundation. All rights
* reserved. * reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -100,7 +100,7 @@ ompi_coll_tuned_scatter_intra_binomial(void *sbuf, int scount,
err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl; err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl;
} }
ptmp = tempbuf - slb; ptmp = tempbuf - strue_lb;
/* and rotate data so they will eventually in the right place */ /* and rotate data so they will eventually in the right place */
err = ompi_datatype_copy_content_same_ddt(sdtype, (ptrdiff_t)scount * (ptrdiff_t)(size - root), err = ompi_datatype_copy_content_same_ddt(sdtype, (ptrdiff_t)scount * (ptrdiff_t)(size - root),
@ -128,7 +128,7 @@ ompi_coll_tuned_scatter_intra_binomial(void *sbuf, int scount,
err= OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl; err= OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl;
} }
ptmp = tempbuf - rlb; ptmp = tempbuf - rtrue_lb;
sdtype = rdtype; sdtype = rdtype;
scount = rcount; scount = rcount;