The tuned collectives can now deal with more than 2Gb of data.
This commit was SVN r26103.
Этот коммит содержится в:
родитель
762b3e13a9
Коммит
a78a7bd8e8
@ -117,7 +117,7 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
|
||||
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
|
||||
|
||||
} else if (0 != rank) {
|
||||
tmpsend = ((char*)rbuf) + rank * rcount * rext;
|
||||
tmpsend = ((char*)rbuf) + (ptrdiff_t)rank * (ptrdiff_t)rcount * rext;
|
||||
err = ompi_datatype_copy_content_same_ddt(rdtype, rcount, tmprecv, tmpsend);
|
||||
if (err < 0) { line = __LINE__; goto err_hndl; }
|
||||
}
|
||||
@ -139,7 +139,7 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
|
||||
recvfrom = (rank + distance) % size;
|
||||
sendto = (rank - distance + size) % size;
|
||||
|
||||
tmprecv = tmpsend + distance * rcount * rext;
|
||||
tmprecv = tmpsend + (ptrdiff_t)distance * (ptrdiff_t)rcount * rext;
|
||||
|
||||
if (distance <= (size >> 1)) {
|
||||
blockcount = distance;
|
||||
@ -174,7 +174,7 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
|
||||
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
|
||||
|
||||
free_buf = (char*) calloc(((true_extent + true_lb +
|
||||
((size - rank) * rcount - 1) * rext)),
|
||||
((ptrdiff_t)(size - rank) * (ptrdiff_t)rcount - 1) * rext)),
|
||||
sizeof(char));
|
||||
if (NULL == free_buf) {
|
||||
line = __LINE__; err = OMPI_ERR_OUT_OF_RESOURCE; goto err_hndl;
|
||||
@ -182,19 +182,19 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
|
||||
shift_buf = free_buf - rlb;
|
||||
|
||||
tmpsend = (char*) rbuf;
|
||||
err = ompi_datatype_copy_content_same_ddt(rdtype, ((size - rank) * rcount),
|
||||
err = ompi_datatype_copy_content_same_ddt(rdtype, ((ptrdiff_t)(size - rank) * (ptrdiff_t)rcount),
|
||||
shift_buf, tmpsend);
|
||||
if (err < 0) { line = __LINE__; goto err_hndl; }
|
||||
|
||||
tmprecv = (char*) rbuf;
|
||||
tmpsend = (char*) rbuf + (size - rank) * rcount * rext;
|
||||
tmpsend = (char*) rbuf + (ptrdiff_t)(size - rank) * (ptrdiff_t)rcount * rext;
|
||||
|
||||
err = ompi_datatype_copy_content_same_ddt(rdtype, rank * rcount,
|
||||
err = ompi_datatype_copy_content_same_ddt(rdtype, (ptrdiff_t)rank * (ptrdiff_t)rcount,
|
||||
tmprecv, tmpsend);
|
||||
if (err < 0) { line = __LINE__; goto err_hndl; }
|
||||
|
||||
tmprecv = (char*) rbuf + rank * rcount * rext;
|
||||
err = ompi_datatype_copy_content_same_ddt(rdtype, (size - rank) * rcount,
|
||||
tmprecv = (char*) rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * rext;
|
||||
err = ompi_datatype_copy_content_same_ddt(rdtype, (ptrdiff_t)(size - rank) * (ptrdiff_t)rcount,
|
||||
tmprecv, shift_buf);
|
||||
if (err < 0) { line = __LINE__; goto err_hndl; }
|
||||
|
||||
@ -305,7 +305,7 @@ ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,
|
||||
*/
|
||||
if (MPI_IN_PLACE != sbuf) {
|
||||
tmpsend = (char*) sbuf;
|
||||
tmprecv = (char*) rbuf + rank * rcount * rext;
|
||||
tmprecv = (char*) rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * rext;
|
||||
err = ompi_datatype_sndrcv(tmpsend, scount, sdtype, tmprecv, rcount, rdtype);
|
||||
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
|
||||
|
||||
@ -321,18 +321,18 @@ ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,
|
||||
remote = rank ^ distance;
|
||||
|
||||
if (rank < remote) {
|
||||
tmpsend = (char*)rbuf + sendblocklocation * rcount * rext;
|
||||
tmprecv = (char*)rbuf + (sendblocklocation + distance) * rcount * rext;
|
||||
tmpsend = (char*)rbuf + (ptrdiff_t)sendblocklocation * (ptrdiff_t)rcount * rext;
|
||||
tmprecv = (char*)rbuf + (ptrdiff_t)(sendblocklocation + distance) * (ptrdiff_t)rcount * rext;
|
||||
} else {
|
||||
tmpsend = (char*)rbuf + sendblocklocation * rcount * rext;
|
||||
tmprecv = (char*)rbuf + (sendblocklocation - distance) * rcount * rext;
|
||||
tmpsend = (char*)rbuf + (ptrdiff_t)sendblocklocation * (ptrdiff_t)rcount * rext;
|
||||
tmprecv = (char*)rbuf + (ptrdiff_t)(sendblocklocation - distance) * (ptrdiff_t)rcount * rext;
|
||||
sendblocklocation -= distance;
|
||||
}
|
||||
|
||||
/* Sendreceive */
|
||||
err = ompi_coll_tuned_sendrecv(tmpsend, distance * rcount, rdtype,
|
||||
err = ompi_coll_tuned_sendrecv(tmpsend, (ptrdiff_t)distance * (ptrdiff_t)rcount, rdtype,
|
||||
remote, MCA_COLL_BASE_TAG_ALLGATHER,
|
||||
tmprecv, distance * rcount, rdtype,
|
||||
tmprecv, (ptrdiff_t)distance * (ptrdiff_t)rcount, rdtype,
|
||||
remote, MCA_COLL_BASE_TAG_ALLGATHER,
|
||||
comm, MPI_STATUS_IGNORE, rank);
|
||||
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
|
||||
@ -394,7 +394,7 @@ int ompi_coll_tuned_allgather_intra_ring(void *sbuf, int scount,
|
||||
- if send buffer is not MPI_IN_PLACE, copy send buffer to appropriate block
|
||||
of receive buffer
|
||||
*/
|
||||
tmprecv = (char*) rbuf + rank * rcount * rext;
|
||||
tmprecv = (char*) rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * rext;
|
||||
if (MPI_IN_PLACE != sbuf) {
|
||||
tmpsend = (char*) sbuf;
|
||||
err = ompi_datatype_sndrcv(tmpsend, scount, sdtype, tmprecv, rcount, rdtype);
|
||||
@ -416,8 +416,8 @@ int ompi_coll_tuned_allgather_intra_ring(void *sbuf, int scount,
|
||||
recvdatafrom = (rank - i - 1 + size) % size;
|
||||
senddatafrom = (rank - i + size) % size;
|
||||
|
||||
tmprecv = (char*)rbuf + recvdatafrom * rcount * rext;
|
||||
tmpsend = (char*)rbuf + senddatafrom * rcount * rext;
|
||||
tmprecv = (char*)rbuf + (ptrdiff_t)recvdatafrom * (ptrdiff_t)rcount * rext;
|
||||
tmpsend = (char*)rbuf + (ptrdiff_t)senddatafrom * (ptrdiff_t)rcount * rext;
|
||||
|
||||
/* Sendreceive */
|
||||
err = ompi_coll_tuned_sendrecv(tmpsend, rcount, rdtype, sendto,
|
||||
@ -535,7 +535,7 @@ ompi_coll_tuned_allgather_intra_neighborexchange(void *sbuf, int scount,
|
||||
- if send buffer is not MPI_IN_PLACE, copy send buffer to appropriate block
|
||||
of receive buffer
|
||||
*/
|
||||
tmprecv = (char*) rbuf + rank * rcount * rext;
|
||||
tmprecv = (char*) rbuf + (ptrdiff_t)rank *(ptrdiff_t) rcount * rext;
|
||||
if (MPI_IN_PLACE != sbuf) {
|
||||
tmpsend = (char*) sbuf;
|
||||
err = ompi_datatype_sndrcv(tmpsend, scount, sdtype, tmprecv, rcount, rdtype);
|
||||
@ -567,8 +567,8 @@ ompi_coll_tuned_allgather_intra_neighborexchange(void *sbuf, int scount,
|
||||
exchange two blocks with appropriate neighbor.
|
||||
the send location becomes previous receve location.
|
||||
*/
|
||||
tmprecv = (char*)rbuf + neighbor[0] * rcount * rext;
|
||||
tmpsend = (char*)rbuf + rank * rcount * rext;
|
||||
tmprecv = (char*)rbuf + (ptrdiff_t)neighbor[0] * (ptrdiff_t)rcount * rext;
|
||||
tmpsend = (char*)rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * rext;
|
||||
/* Sendreceive */
|
||||
err = ompi_coll_tuned_sendrecv(tmpsend, rcount, rdtype, neighbor[0],
|
||||
MCA_COLL_BASE_TAG_ALLGATHER,
|
||||
@ -589,14 +589,14 @@ ompi_coll_tuned_allgather_intra_neighborexchange(void *sbuf, int scount,
|
||||
recv_data_from[i_parity] =
|
||||
(recv_data_from[i_parity] + offset_at_step[i_parity] + size) % size;
|
||||
|
||||
tmprecv = (char*)rbuf + recv_data_from[i_parity] * rcount * rext;
|
||||
tmpsend = (char*)rbuf + send_data_from * rcount * rext;
|
||||
tmprecv = (char*)rbuf + (ptrdiff_t)recv_data_from[i_parity] * (ptrdiff_t)rcount * rext;
|
||||
tmpsend = (char*)rbuf + (ptrdiff_t)send_data_from * rcount * rext;
|
||||
|
||||
/* Sendreceive */
|
||||
err = ompi_coll_tuned_sendrecv(tmpsend, 2 * rcount, rdtype,
|
||||
err = ompi_coll_tuned_sendrecv(tmpsend, (ptrdiff_t)2 * (ptrdiff_t)rcount, rdtype,
|
||||
neighbor[i_parity],
|
||||
MCA_COLL_BASE_TAG_ALLGATHER,
|
||||
tmprecv, 2 * rcount, rdtype,
|
||||
tmprecv, (ptrdiff_t)2 * (ptrdiff_t)rcount, rdtype,
|
||||
neighbor[i_parity],
|
||||
MCA_COLL_BASE_TAG_ALLGATHER,
|
||||
comm, MPI_STATUS_IGNORE, rank);
|
||||
@ -646,11 +646,11 @@ int ompi_coll_tuned_allgather_intra_two_procs(void *sbuf, int scount,
|
||||
|
||||
tmpsend = (char*)sbuf;
|
||||
if (MPI_IN_PLACE == sbuf) {
|
||||
tmpsend = (char*)rbuf + rank * rcount * rext;
|
||||
tmpsend = (char*)rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * rext;
|
||||
scount = rcount;
|
||||
sdtype = rdtype;
|
||||
}
|
||||
tmprecv = (char*)rbuf + remote * rcount * rext;
|
||||
tmprecv = (char*)rbuf + (ptrdiff_t)remote * (ptrdiff_t)rcount * rext;
|
||||
|
||||
err = ompi_coll_tuned_sendrecv(tmpsend, scount, sdtype, remote,
|
||||
MCA_COLL_BASE_TAG_ALLGATHER,
|
||||
@ -662,7 +662,7 @@ int ompi_coll_tuned_allgather_intra_two_procs(void *sbuf, int scount,
|
||||
/* Place your data in correct location if necessary */
|
||||
if (MPI_IN_PLACE != sbuf) {
|
||||
err = ompi_datatype_sndrcv((char*)sbuf, scount, sdtype,
|
||||
(char*)rbuf + rank * rcount * rext, rcount, rdtype);
|
||||
(char*)rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * rext, rcount, rdtype);
|
||||
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
|
||||
}
|
||||
|
||||
@ -726,9 +726,9 @@ ompi_coll_tuned_allgather_intra_basic_linear(void *sbuf, int scount,
|
||||
rbuf, rcount, rdtype,
|
||||
0, comm, comm->c_coll.coll_gather_module);
|
||||
if (MPI_SUCCESS == err) {
|
||||
size_t length = (size_t)rcount * ompi_comm_size(comm);
|
||||
size_t length = (ptrdiff_t)rcount * ompi_comm_size(comm);
|
||||
if( length < (size_t)INT_MAX ) {
|
||||
err = comm->c_coll.coll_bcast(rbuf, rcount * ompi_comm_size(comm), rdtype,
|
||||
err = comm->c_coll.coll_bcast(rbuf, (ptrdiff_t)rcount * ompi_comm_size(comm), rdtype,
|
||||
0, comm, comm->c_coll.coll_bcast_module);
|
||||
} else {
|
||||
ompi_datatype_t* temptype;
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2012 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -117,7 +117,7 @@ int ompi_coll_tuned_allgatherv_intra_bruck(void *sbuf, int scount,
|
||||
- if send buffer is not MPI_IN_PLACE, copy send buffer to block rank of
|
||||
the receive buffer.
|
||||
*/
|
||||
tmprecv = (char*) rbuf + rdispls[rank] * rext;
|
||||
tmprecv = (char*) rbuf + (ptrdiff_t)rdispls[rank] * rext;
|
||||
if (MPI_IN_PLACE != sbuf) {
|
||||
tmpsend = (char*) sbuf;
|
||||
err = ompi_datatype_sndrcv(tmpsend, scount, sdtype,
|
||||
@ -246,7 +246,7 @@ int ompi_coll_tuned_allgatherv_intra_ring(void *sbuf, int scount,
|
||||
- if send buffer is not MPI_IN_PLACE, copy send buffer to
|
||||
the appropriate block of receive buffer
|
||||
*/
|
||||
tmprecv = (char*) rbuf + rdisps[rank] * rext;
|
||||
tmprecv = (char*) rbuf + (ptrdiff_t)rdisps[rank] * rext;
|
||||
if (MPI_IN_PLACE != sbuf) {
|
||||
tmpsend = (char*) sbuf;
|
||||
err = ompi_datatype_sndrcv(tmpsend, scount, sdtype,
|
||||
@ -392,7 +392,7 @@ ompi_coll_tuned_allgatherv_intra_neighborexchange(void *sbuf, int scount,
|
||||
- if send buffer is not MPI_IN_PLACE, copy send buffer to
|
||||
the appropriate block of receive buffer
|
||||
*/
|
||||
tmprecv = (char*) rbuf + rdispls[rank] * rext;
|
||||
tmprecv = (char*) rbuf + (ptrdiff_t)rdispls[rank] * rext;
|
||||
if (MPI_IN_PLACE != sbuf) {
|
||||
tmpsend = (char*) sbuf;
|
||||
err = ompi_datatype_sndrcv(tmpsend, scount, sdtype,
|
||||
@ -427,8 +427,8 @@ ompi_coll_tuned_allgatherv_intra_neighborexchange(void *sbuf, int scount,
|
||||
Note, we need to create indexed datatype to send and receive these
|
||||
blocks properly.
|
||||
*/
|
||||
tmprecv = (char*)rbuf + rdispls[neighbor[0]] * rext;
|
||||
tmpsend = (char*)rbuf + rdispls[rank] * rext;
|
||||
tmprecv = (char*)rbuf + (ptrdiff_t)rdispls[neighbor[0]] * rext;
|
||||
tmpsend = (char*)rbuf + (ptrdiff_t)rdispls[rank] * rext;
|
||||
err = ompi_coll_tuned_sendrecv(tmpsend, rcounts[rank], rdtype,
|
||||
neighbor[0], MCA_COLL_BASE_TAG_ALLGATHERV,
|
||||
tmprecv, rcounts[neighbor[0]], rdtype,
|
||||
@ -532,11 +532,11 @@ int ompi_coll_tuned_allgatherv_intra_two_procs(void *sbuf, int scount,
|
||||
|
||||
tmpsend = (char*)sbuf;
|
||||
if (MPI_IN_PLACE == sbuf) {
|
||||
tmpsend = (char*)rbuf + rdispls[rank] * rext;
|
||||
tmpsend = (char*)rbuf + (ptrdiff_t)rdispls[rank] * rext;
|
||||
scount = rcounts[rank];
|
||||
sdtype = rdtype;
|
||||
}
|
||||
tmprecv = (char*)rbuf + rdispls[remote] * rext;
|
||||
tmprecv = (char*)rbuf + (ptrdiff_t)rdispls[remote] * rext;
|
||||
|
||||
err = ompi_coll_tuned_sendrecv(tmpsend, scount, sdtype, remote,
|
||||
MCA_COLL_BASE_TAG_ALLGATHERV,
|
||||
@ -548,7 +548,7 @@ int ompi_coll_tuned_allgatherv_intra_two_procs(void *sbuf, int scount,
|
||||
/* Place your data in correct location if necessary */
|
||||
if (MPI_IN_PLACE != sbuf) {
|
||||
err = ompi_datatype_sndrcv((char*)sbuf, scount, sdtype,
|
||||
(char*)rbuf + rdispls[rank] * rext,
|
||||
(char*)rbuf + (ptrdiff_t)rdispls[rank] * rext,
|
||||
rcounts[rank], rdtype);
|
||||
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
|
||||
}
|
||||
@ -616,7 +616,7 @@ ompi_coll_tuned_allgatherv_intra_basic_default(void *sbuf, int scount,
|
||||
send_type = rdtype;
|
||||
send_buf = (char*)rbuf;
|
||||
for (i = 0; i < rank; ++i) {
|
||||
send_buf += (rcounts[i] * extent);
|
||||
send_buf += ((ptrdiff_t)rcounts[i] * extent);
|
||||
}
|
||||
} else {
|
||||
send_buf = (char*)sbuf;
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2012 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -156,7 +156,7 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
|
||||
ret = ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent);
|
||||
if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
|
||||
|
||||
inplacebuf = (char*) malloc(true_extent + (count - 1) * extent);
|
||||
inplacebuf = (char*) malloc(true_extent + (ptrdiff_t)(count - 1) * extent);
|
||||
if (NULL == inplacebuf) { ret = -1; line = __LINE__; goto error_hndl; }
|
||||
|
||||
if (MPI_IN_PLACE == sbuf) {
|
||||
@ -441,8 +441,8 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
|
||||
if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
|
||||
/* Send first block (my block) to the neighbor on the right */
|
||||
block_offset = ((rank < split_rank)?
|
||||
(rank * early_segcount) :
|
||||
(rank * late_segcount + split_rank));
|
||||
((ptrdiff_t)rank * (ptrdiff_t)early_segcount) :
|
||||
((ptrdiff_t)rank * (ptrdiff_t)late_segcount + split_rank));
|
||||
block_count = ((rank < split_rank)? early_segcount : late_segcount);
|
||||
tmpsend = ((char*)rbuf) + block_offset * extent;
|
||||
ret = MCA_PML_CALL(send(tmpsend, block_count, dtype, send_to,
|
||||
@ -468,10 +468,10 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
|
||||
rbuf[prevblock] = inbuf[inbi ^ 0x1] (op) rbuf[prevblock]
|
||||
*/
|
||||
block_offset = ((prevblock < split_rank)?
|
||||
(prevblock * early_segcount) :
|
||||
(prevblock * late_segcount + split_rank));
|
||||
((ptrdiff_t)prevblock * early_segcount) :
|
||||
((ptrdiff_t)prevblock * late_segcount + split_rank));
|
||||
block_count = ((prevblock < split_rank)? early_segcount : late_segcount);
|
||||
tmprecv = ((char*)rbuf) + block_offset * extent;
|
||||
tmprecv = ((char*)rbuf) + (ptrdiff_t)block_offset * extent;
|
||||
ompi_op_reduce(op, inbuf[inbi ^ 0x1], tmprecv, block_count, dtype);
|
||||
|
||||
/* send previous block to send_to */
|
||||
@ -489,10 +489,10 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
|
||||
rbuf[rank+1] = inbuf[inbi] (op) rbuf[rank + 1] */
|
||||
recv_from = (rank + 1) % size;
|
||||
block_offset = ((recv_from < split_rank)?
|
||||
(recv_from * early_segcount) :
|
||||
(recv_from * late_segcount + split_rank));
|
||||
((ptrdiff_t)recv_from * early_segcount) :
|
||||
((ptrdiff_t)recv_from * late_segcount + split_rank));
|
||||
block_count = ((recv_from < split_rank)? early_segcount : late_segcount);
|
||||
tmprecv = ((char*)rbuf) + block_offset * extent;
|
||||
tmprecv = ((char*)rbuf) + (ptrdiff_t)block_offset * extent;
|
||||
ompi_op_reduce(op, inbuf[inbi], tmprecv, block_count, dtype);
|
||||
|
||||
/* Distribution loop - variation of ring allgather */
|
||||
@ -503,17 +503,17 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
|
||||
const int send_data_from = (rank + 1 + size - k) % size;
|
||||
const int send_block_offset =
|
||||
((send_data_from < split_rank)?
|
||||
(send_data_from * early_segcount) :
|
||||
(send_data_from * late_segcount + split_rank));
|
||||
((ptrdiff_t)send_data_from * early_segcount) :
|
||||
((ptrdiff_t)send_data_from * late_segcount + split_rank));
|
||||
const int recv_block_offset =
|
||||
((recv_data_from < split_rank)?
|
||||
(recv_data_from * early_segcount) :
|
||||
(recv_data_from * late_segcount + split_rank));
|
||||
((ptrdiff_t)recv_data_from * early_segcount) :
|
||||
((ptrdiff_t)recv_data_from * late_segcount + split_rank));
|
||||
block_count = ((send_data_from < split_rank)?
|
||||
early_segcount : late_segcount);
|
||||
|
||||
tmprecv = (char*)rbuf + recv_block_offset * extent;
|
||||
tmpsend = (char*)rbuf + send_block_offset * extent;
|
||||
tmprecv = (char*)rbuf + (ptrdiff_t)recv_block_offset * extent;
|
||||
tmpsend = (char*)rbuf + (ptrdiff_t)send_block_offset * extent;
|
||||
|
||||
ret = ompi_coll_tuned_sendrecv(tmpsend, block_count, dtype, send_to,
|
||||
MCA_COLL_BASE_TAG_ALLREDUCE,
|
||||
@ -663,7 +663,7 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
|
||||
COLL_TUNED_COMPUTED_SEGCOUNT(segsize, typelng, segcount)
|
||||
|
||||
/* Special case for count less than size * segcount - use regular ring */
|
||||
if (count < size * segcount) {
|
||||
if (count < (size * segcount)) {
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:allreduce_ring_segmented rank %d/%d, count %d, switching to regular ring", rank, size, count));
|
||||
return (ompi_coll_tuned_allreduce_intra_ring(sbuf, rbuf, count, dtype, op,
|
||||
comm, module));
|
||||
@ -689,7 +689,7 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
|
||||
early_blockcount, late_blockcount )
|
||||
COLL_TUNED_COMPUTE_BLOCKCOUNT( early_blockcount, num_phases, inbi,
|
||||
max_segcount, k)
|
||||
max_real_segsize = true_extent + (max_segcount - 1) * extent;
|
||||
max_real_segsize = true_extent + (ptrdiff_t)(max_segcount - 1) * extent;
|
||||
|
||||
/* Allocate and initialize temporary buffers */
|
||||
inbuf[0] = (char*)malloc(max_real_segsize);
|
||||
@ -739,17 +739,17 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
|
||||
- compute my block and phase offset
|
||||
- send data */
|
||||
block_offset = ((rank < split_rank)?
|
||||
(rank * early_blockcount) :
|
||||
(rank * late_blockcount + split_rank));
|
||||
((ptrdiff_t)rank * (ptrdiff_t)early_blockcount) :
|
||||
((ptrdiff_t)rank * (ptrdiff_t)late_blockcount + split_rank));
|
||||
block_count = ((rank < split_rank)? early_blockcount : late_blockcount);
|
||||
COLL_TUNED_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase,
|
||||
early_phase_segcount, late_phase_segcount)
|
||||
phase_count = ((phase < split_phase)?
|
||||
(early_phase_segcount) : (late_phase_segcount));
|
||||
phase_offset = ((phase < split_phase)?
|
||||
(phase * early_phase_segcount) :
|
||||
(phase * late_phase_segcount + split_phase));
|
||||
tmpsend = ((char*)rbuf) + (block_offset + phase_offset) * extent;
|
||||
((ptrdiff_t)phase * (ptrdiff_t)early_phase_segcount) :
|
||||
((ptrdiff_t)phase * (ptrdiff_t)late_phase_segcount + split_phase));
|
||||
tmpsend = ((char*)rbuf) + (ptrdiff_t)(block_offset + phase_offset) * extent;
|
||||
ret = MCA_PML_CALL(send(tmpsend, phase_count, dtype, send_to,
|
||||
MCA_COLL_BASE_TAG_ALLREDUCE,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
@ -774,8 +774,8 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
|
||||
rbuf[prevblock] = inbuf[inbi ^ 0x1] (op) rbuf[prevblock]
|
||||
*/
|
||||
block_offset = ((prevblock < split_rank)?
|
||||
(prevblock * early_blockcount) :
|
||||
(prevblock * late_blockcount + split_rank));
|
||||
((ptrdiff_t)prevblock * (ptrdiff_t)early_blockcount) :
|
||||
((ptrdiff_t)prevblock * (ptrdiff_t)late_blockcount + split_rank));
|
||||
block_count = ((prevblock < split_rank)?
|
||||
early_blockcount : late_blockcount);
|
||||
COLL_TUNED_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase,
|
||||
@ -783,9 +783,9 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
|
||||
phase_count = ((phase < split_phase)?
|
||||
(early_phase_segcount) : (late_phase_segcount));
|
||||
phase_offset = ((phase < split_phase)?
|
||||
(phase * early_phase_segcount) :
|
||||
(phase * late_phase_segcount + split_phase));
|
||||
tmprecv = ((char*)rbuf) + (block_offset + phase_offset) * extent;
|
||||
((ptrdiff_t)phase * (ptrdiff_t)early_phase_segcount) :
|
||||
((ptrdiff_t)phase * (ptrdiff_t)late_phase_segcount + split_phase));
|
||||
tmprecv = ((char*)rbuf) + (ptrdiff_t)(block_offset + phase_offset) * extent;
|
||||
ompi_op_reduce(op, inbuf[inbi ^ 0x1], tmprecv, phase_count, dtype);
|
||||
|
||||
/* send previous block to send_to */
|
||||
@ -803,8 +803,8 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
|
||||
rbuf[rank+1] = inbuf[inbi] (op) rbuf[rank + 1] */
|
||||
recv_from = (rank + 1) % size;
|
||||
block_offset = ((recv_from < split_rank)?
|
||||
(recv_from * early_blockcount) :
|
||||
(recv_from * late_blockcount + split_rank));
|
||||
((ptrdiff_t)recv_from * (ptrdiff_t)early_blockcount) :
|
||||
((ptrdiff_t)recv_from * (ptrdiff_t)late_blockcount + split_rank));
|
||||
block_count = ((recv_from < split_rank)?
|
||||
early_blockcount : late_blockcount);
|
||||
COLL_TUNED_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase,
|
||||
@ -812,9 +812,9 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
|
||||
phase_count = ((phase < split_phase)?
|
||||
(early_phase_segcount) : (late_phase_segcount));
|
||||
phase_offset = ((phase < split_phase)?
|
||||
(phase * early_phase_segcount) :
|
||||
(phase * late_phase_segcount + split_phase));
|
||||
tmprecv = ((char*)rbuf) + (block_offset + phase_offset) * extent;
|
||||
((ptrdiff_t)phase * (ptrdiff_t)early_phase_segcount) :
|
||||
((ptrdiff_t)phase * (ptrdiff_t)late_phase_segcount + split_phase));
|
||||
tmprecv = ((char*)rbuf) + (ptrdiff_t)(block_offset + phase_offset) * extent;
|
||||
ompi_op_reduce(op, inbuf[inbi], tmprecv, phase_count, dtype);
|
||||
}
|
||||
|
||||
@ -826,17 +826,17 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
|
||||
const int send_data_from = (rank + 1 + size - k) % size;
|
||||
const int send_block_offset =
|
||||
((send_data_from < split_rank)?
|
||||
(send_data_from * early_blockcount) :
|
||||
(send_data_from * late_blockcount + split_rank));
|
||||
((ptrdiff_t)send_data_from * (ptrdiff_t)early_blockcount) :
|
||||
((ptrdiff_t)send_data_from * (ptrdiff_t)late_blockcount + split_rank));
|
||||
const int recv_block_offset =
|
||||
((recv_data_from < split_rank)?
|
||||
(recv_data_from * early_blockcount) :
|
||||
(recv_data_from * late_blockcount + split_rank));
|
||||
((ptrdiff_t)recv_data_from * (ptrdiff_t)early_blockcount) :
|
||||
((ptrdiff_t)recv_data_from * (ptrdiff_t)late_blockcount + split_rank));
|
||||
block_count = ((send_data_from < split_rank)?
|
||||
early_blockcount : late_blockcount);
|
||||
|
||||
tmprecv = (char*)rbuf + recv_block_offset * extent;
|
||||
tmpsend = (char*)rbuf + send_block_offset * extent;
|
||||
tmprecv = (char*)rbuf + (ptrdiff_t)recv_block_offset * extent;
|
||||
tmpsend = (char*)rbuf + (ptrdiff_t)send_block_offset * extent;
|
||||
|
||||
ret = ompi_coll_tuned_sendrecv(tmpsend, block_count, dtype, send_to,
|
||||
MCA_COLL_BASE_TAG_ALLREDUCE,
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2012 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -62,8 +62,8 @@ int ompi_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount,
|
||||
recvfrom = (rank + size - step) % size;
|
||||
|
||||
/* Determine sending and receiving locations */
|
||||
tmpsend = (char*)sbuf + sendto * sext * scount;
|
||||
tmprecv = (char*)rbuf + recvfrom * rext * rcount;
|
||||
tmpsend = (char*)sbuf + (ptrdiff_t)sendto * sext * (ptrdiff_t)scount;
|
||||
tmprecv = (char*)rbuf + (ptrdiff_t)recvfrom * rext * (ptrdiff_t)rcount;
|
||||
|
||||
/* send and receive */
|
||||
err = ompi_coll_tuned_sendrecv( tmpsend, scount, sdtype, sendto,
|
||||
@ -141,22 +141,22 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
|
||||
#endif
|
||||
|
||||
/* tmp buffer allocation for message data */
|
||||
tmpbuf_free = (char *) malloc(tsext + (scount * size - 1) * sext);
|
||||
tmpbuf_free = (char *) malloc(tsext + ((ptrdiff_t)scount * (ptrdiff_t)size - 1) * sext);
|
||||
if (tmpbuf_free == NULL) { line = __LINE__; err = -1; goto err_hndl; }
|
||||
tmpbuf = tmpbuf_free - slb;
|
||||
|
||||
/* Step 1 - local rotation - shift up by rank */
|
||||
err = ompi_datatype_copy_content_same_ddt (sdtype,
|
||||
(int32_t) ((size - rank) * scount),
|
||||
(int32_t) ((ptrdiff_t)(size - rank) * (ptrdiff_t)scount),
|
||||
tmpbuf,
|
||||
((char*) sbuf) + rank * scount * sext);
|
||||
((char*) sbuf) + (ptrdiff_t)rank * (ptrdiff_t)scount * sext);
|
||||
if (err<0) {
|
||||
line = __LINE__; err = -1; goto err_hndl;
|
||||
}
|
||||
|
||||
if (rank != 0) {
|
||||
err = ompi_datatype_copy_content_same_ddt (sdtype, (int32_t) (rank * scount),
|
||||
tmpbuf + (size - rank) * scount* sext,
|
||||
err = ompi_datatype_copy_content_same_ddt (sdtype, (ptrdiff_t)rank * (ptrdiff_t)scount,
|
||||
tmpbuf + (ptrdiff_t)(size - rank) * (ptrdiff_t)scount* sext,
|
||||
(char*) sbuf);
|
||||
if (err<0) {
|
||||
line = __LINE__; err = -1; goto err_hndl;
|
||||
@ -173,7 +173,7 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
|
||||
/* create indexed datatype */
|
||||
for (i = 1; i < size; i++) {
|
||||
if (( i & distance) == distance) {
|
||||
displs[k] = i * scount;
|
||||
displs[k] = (ptrdiff_t)i * (ptrdiff_t)scount;
|
||||
blen[k] = scount;
|
||||
k++;
|
||||
}
|
||||
@ -206,8 +206,8 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
|
||||
for (i = 0; i < size; i++) {
|
||||
|
||||
err = ompi_datatype_copy_content_same_ddt (rdtype, (int32_t) rcount,
|
||||
((char*)rbuf) + (((rank - i + size) % size) * rcount * rext),
|
||||
tmpbuf + i * rcount * rext);
|
||||
((char*)rbuf) + ((ptrdiff_t)((rank - i + size) % size) * (ptrdiff_t)rcount * rext),
|
||||
tmpbuf + (ptrdiff_t)i * (ptrdiff_t)rcount * rext);
|
||||
if (err < 0) { line = __LINE__; err = -1; goto err_hndl; }
|
||||
}
|
||||
|
||||
@ -290,8 +290,8 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
|
||||
|
||||
/* simple optimization */
|
||||
|
||||
psnd = ((char *) sbuf) + (rank * sext);
|
||||
prcv = ((char *) rbuf) + (rank * rext);
|
||||
psnd = ((char *) sbuf) + (ptrdiff_t)rank * sext;
|
||||
prcv = ((char *) rbuf) + (ptrdiff_t)rank * rext;
|
||||
|
||||
error = ompi_datatype_sndrcv(psnd, scount, sdtype, prcv, rcount, rdtype);
|
||||
if (MPI_SUCCESS != error) {
|
||||
@ -320,7 +320,7 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
|
||||
ri = (ri + 1) % size, ++nreqs, ++nrreqs) {
|
||||
error =
|
||||
MCA_PML_CALL(irecv
|
||||
(prcv + (ri * rext), rcount, rdtype, ri,
|
||||
(prcv + (ptrdiff_t)ri * rext, rcount, rdtype, ri,
|
||||
MCA_COLL_BASE_TAG_ALLTOALL, comm, &reqs[nreqs]));
|
||||
if (MPI_SUCCESS != error) { line = __LINE__; goto error_hndl; }
|
||||
}
|
||||
@ -328,7 +328,7 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
|
||||
si = (si + size - 1) % size, ++nreqs, ++nsreqs) {
|
||||
error =
|
||||
MCA_PML_CALL(isend
|
||||
(psnd + (si * sext), scount, sdtype, si,
|
||||
(psnd + (ptrdiff_t)si * sext, scount, sdtype, si,
|
||||
MCA_COLL_BASE_TAG_ALLTOALL,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm, &reqs[nreqs]));
|
||||
if (MPI_SUCCESS != error) { line = __LINE__; goto error_hndl; }
|
||||
@ -360,7 +360,7 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
|
||||
if (nrreqs < (size - 1)) {
|
||||
error =
|
||||
MCA_PML_CALL(irecv
|
||||
(prcv + (ri * rext), rcount, rdtype, ri,
|
||||
(prcv + (ptrdiff_t)ri * rext, rcount, rdtype, ri,
|
||||
MCA_COLL_BASE_TAG_ALLTOALL, comm,
|
||||
&reqs[completed]));
|
||||
if (MPI_SUCCESS != error) { line = __LINE__; goto error_hndl; }
|
||||
@ -370,7 +370,7 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
|
||||
} else {
|
||||
if (nsreqs < (size - 1)) {
|
||||
error = MCA_PML_CALL(isend
|
||||
(psnd + (si * sext), scount, sdtype, si,
|
||||
(psnd + (ptrdiff_t)si * sext, scount, sdtype, si,
|
||||
MCA_COLL_BASE_TAG_ALLTOALL,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm,
|
||||
&reqs[completed]));
|
||||
@ -423,8 +423,8 @@ int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
|
||||
/* exchange data */
|
||||
remote = rank ^ 1;
|
||||
|
||||
tmpsend = (char*)sbuf + remote * sext * scount;
|
||||
tmprecv = (char*)rbuf + remote * rext * rcount;
|
||||
tmpsend = (char*)sbuf + (ptrdiff_t)remote * sext * (ptrdiff_t)scount;
|
||||
tmprecv = (char*)rbuf + (ptrdiff_t)remote * rext * (ptrdiff_t)rcount;
|
||||
|
||||
/* send and receive */
|
||||
err = ompi_coll_tuned_sendrecv ( tmpsend, scount, sdtype, remote,
|
||||
@ -435,9 +435,9 @@ int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
|
||||
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
|
||||
|
||||
/* ddt sendrecv your own data */
|
||||
err = ompi_datatype_sndrcv((char*) sbuf + rank * sext * scount,
|
||||
err = ompi_datatype_sndrcv((char*) sbuf + (ptrdiff_t)rank * sext * (ptrdiff_t)scount,
|
||||
(int32_t) scount, sdtype,
|
||||
(char*) rbuf + rank * rext * rcount,
|
||||
(char*) rbuf + (ptrdiff_t)rank * rext * (ptrdiff_t)rcount,
|
||||
(int32_t) rcount, rdtype);
|
||||
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
|
||||
|
||||
@ -515,8 +515,8 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
|
||||
|
||||
/* simple optimization */
|
||||
|
||||
psnd = ((char *) sbuf) + (rank * sndinc);
|
||||
prcv = ((char *) rbuf) + (rank * rcvinc);
|
||||
psnd = ((char *) sbuf) + (ptrdiff_t)rank * sndinc;
|
||||
prcv = ((char *) rbuf) + (ptrdiff_t)rank * rcvinc;
|
||||
|
||||
err = ompi_datatype_sndrcv(psnd, scount, sdtype, prcv, rcount, rdtype);
|
||||
if (MPI_SUCCESS != err) {
|
||||
@ -543,7 +543,7 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
|
||||
i = (i + 1) % size, ++rreq, ++nreqs) {
|
||||
err =
|
||||
MCA_PML_CALL(irecv_init
|
||||
(prcv + (i * rcvinc), rcount, rdtype, i,
|
||||
(prcv + (ptrdiff_t)i * rcvinc, rcount, rdtype, i,
|
||||
MCA_COLL_BASE_TAG_ALLTOALL, comm, rreq));
|
||||
if (MPI_SUCCESS != err) {
|
||||
ompi_coll_tuned_free_reqs(req, rreq - req);
|
||||
@ -559,7 +559,7 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
|
||||
i = (i + size - 1) % size, ++sreq, ++nreqs) {
|
||||
err =
|
||||
MCA_PML_CALL(isend_init
|
||||
(psnd + (i * sndinc), scount, sdtype, i,
|
||||
(psnd + (ptrdiff_t)i * sndinc, scount, sdtype, i,
|
||||
MCA_COLL_BASE_TAG_ALLTOALL,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm, sreq));
|
||||
if (MPI_SUCCESS != err) {
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2012 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -53,8 +53,8 @@ ompi_coll_tuned_alltoallv_intra_pairwise(void *sbuf, int *scounts, int *sdisps,
|
||||
ompi_datatype_type_extent(sdtype, &sext);
|
||||
ompi_datatype_type_extent(rdtype, &rext);
|
||||
|
||||
psnd = ((char *) sbuf) + (sdisps[rank] * sext);
|
||||
prcv = ((char *) rbuf) + (rdisps[rank] * rext);
|
||||
psnd = ((char *) sbuf) + (ptrdiff_t)sdisps[rank] * sext;
|
||||
prcv = ((char *) rbuf) + (ptrdiff_t)rdisps[rank] * rext;
|
||||
|
||||
if (0 != scounts[rank]) {
|
||||
err = ompi_datatype_sndrcv(psnd, scounts[rank], sdtype,
|
||||
@ -77,8 +77,8 @@ ompi_coll_tuned_alltoallv_intra_pairwise(void *sbuf, int *scounts, int *sdisps,
|
||||
recvfrom = (rank + size - step) % size;
|
||||
|
||||
/* Determine sending and receiving locations */
|
||||
psnd = (char*)sbuf + sdisps[sendto] * sext;
|
||||
prcv = (char*)rbuf + rdisps[recvfrom] * rext;
|
||||
psnd = (char*)sbuf + (ptrdiff_t)sdisps[sendto] * sext;
|
||||
prcv = (char*)rbuf + (ptrdiff_t)rdisps[recvfrom] * rext;
|
||||
|
||||
/* send and receive */
|
||||
err = ompi_coll_tuned_sendrecv( psnd, scounts[sendto], sdtype, sendto,
|
||||
@ -133,8 +133,8 @@ ompi_coll_tuned_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdis
|
||||
ompi_datatype_type_extent(rdtype, &rext);
|
||||
|
||||
/* Simple optimization - handle send to self first */
|
||||
psnd = ((char *) sbuf) + (sdisps[rank] * sext);
|
||||
prcv = ((char *) rbuf) + (rdisps[rank] * rext);
|
||||
psnd = ((char *) sbuf) + (ptrdiff_t)sdisps[rank] * sext;
|
||||
prcv = ((char *) rbuf) + (ptrdiff_t)rdisps[rank] * rext;
|
||||
if (0 != scounts[rank]) {
|
||||
err = ompi_datatype_sndrcv(psnd, scounts[rank], sdtype,
|
||||
prcv, rcounts[rank], rdtype);
|
||||
@ -158,7 +158,7 @@ ompi_coll_tuned_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdis
|
||||
continue;
|
||||
}
|
||||
|
||||
prcv = ((char *) rbuf) + (rdisps[i] * rext);
|
||||
prcv = ((char *) rbuf) + (ptrdiff_t)rdisps[i] * rext;
|
||||
err = MCA_PML_CALL(irecv_init(prcv, rcounts[i], rdtype,
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLV, comm,
|
||||
preq++));
|
||||
@ -175,7 +175,7 @@ ompi_coll_tuned_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdis
|
||||
continue;
|
||||
}
|
||||
|
||||
psnd = ((char *) sbuf) + (sdisps[i] * sext);
|
||||
psnd = ((char *) sbuf) + (ptrdiff_t)sdisps[i] * sext;
|
||||
err = MCA_PML_CALL(isend_init(psnd, scounts[i], sdtype,
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLV,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm,
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2012 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -61,14 +61,14 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
|
||||
ompi_datatype_get_extent (datatype, &lb, &extent);
|
||||
ompi_datatype_type_size( datatype, &type_size );
|
||||
num_segments = (original_count + count_by_segment - 1) / count_by_segment;
|
||||
realsegsize = count_by_segment * extent;
|
||||
realsegsize = (ptrdiff_t)count_by_segment * extent;
|
||||
|
||||
/* Set the buffer pointers */
|
||||
tmpbuf = (char *) buffer;
|
||||
|
||||
#if !defined(COLL_TUNED_BCAST_USE_BLOCKING)
|
||||
if( tree->tree_nextsize != 0 ) {
|
||||
send_reqs = (ompi_request_t**)malloc( tree->tree_nextsize *
|
||||
send_reqs = (ompi_request_t**)malloc( (ptrdiff_t)tree->tree_nextsize *
|
||||
sizeof(ompi_request_t*) );
|
||||
}
|
||||
#endif
|
||||
@ -180,7 +180,7 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
|
||||
/* Process the last segment */
|
||||
err = ompi_request_wait( &recv_reqs[req_index], MPI_STATUSES_IGNORE );
|
||||
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||
sendcount = original_count - (num_segments - 1) * count_by_segment;
|
||||
sendcount = original_count - (ptrdiff_t)(num_segments - 1) * count_by_segment;
|
||||
for( i = 0; i < tree->tree_nextsize; i++ ) {
|
||||
#if defined(COLL_TUNED_BCAST_USE_BLOCKING)
|
||||
err = MCA_PML_CALL(send(tmpbuf, sendcount, datatype,
|
||||
@ -433,8 +433,8 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
|
||||
|
||||
/* if the message is too small to be split into segments */
|
||||
if( (counts[0] == 0 || counts[1] == 0) ||
|
||||
(segsize > counts[0] * type_size) ||
|
||||
(segsize > counts[1] * type_size) ) {
|
||||
(segsize > ((ptrdiff_t)counts[0] * type_size)) ||
|
||||
(segsize > ((ptrdiff_t)counts[1] * type_size)) ) {
|
||||
/* call linear version here ! */
|
||||
return (ompi_coll_tuned_bcast_intra_chain ( buffer, count, datatype,
|
||||
root, comm, module,
|
||||
@ -444,12 +444,12 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
|
||||
err = ompi_datatype_get_extent (datatype, &lb, &type_extent);
|
||||
|
||||
/* Determine real segment size */
|
||||
realsegsize[0] = segcount[0] * type_extent;
|
||||
realsegsize[1] = segcount[1] * type_extent;
|
||||
realsegsize[0] = (ptrdiff_t)segcount[0] * type_extent;
|
||||
realsegsize[1] = (ptrdiff_t)segcount[1] * type_extent;
|
||||
|
||||
/* set the buffer pointers */
|
||||
tmpbuf[0] = (char *) buffer;
|
||||
tmpbuf[1] = (char *) buffer+counts[0] * type_extent;
|
||||
tmpbuf[1] = (char *) buffer + (ptrdiff_t)counts[0] * type_extent;
|
||||
|
||||
/* Step 1:
|
||||
Root splits the buffer in 2 and sends segmented message down the branches.
|
||||
@ -508,7 +508,7 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
|
||||
for( segindex = 1; segindex < num_segments[lr]; segindex++ ) {
|
||||
/* determine how many elements to expect in this round */
|
||||
if( segindex == (num_segments[lr] - 1))
|
||||
sendcount[lr] = counts[lr] - segindex*segcount[lr];
|
||||
sendcount[lr] = counts[lr] - (ptrdiff_t)segindex * (ptrdiff_t)segcount[lr];
|
||||
/* post new irecv */
|
||||
MCA_PML_CALL(irecv( tmpbuf[lr] + realsegsize[lr], sendcount[lr],
|
||||
datatype, tree->tree_prev, MCA_COLL_BASE_TAG_BCAST,
|
||||
@ -546,7 +546,8 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
|
||||
sendcount[lr] = segcount[lr];
|
||||
for (segindex = 0; segindex < num_segments[lr]; segindex++) {
|
||||
/* determine how many elements to expect in this round */
|
||||
if (segindex == (num_segments[lr] - 1)) sendcount[lr] = counts[lr] - segindex*segcount[lr];
|
||||
if (segindex == (num_segments[lr] - 1))
|
||||
sendcount[lr] = counts[lr] - (ptrdiff_t)segindex * (ptrdiff_t)segcount[lr];
|
||||
/* receive segments */
|
||||
MCA_PML_CALL(recv(tmpbuf[lr], sendcount[lr], datatype,
|
||||
tree->tree_prev, MCA_COLL_BASE_TAG_BCAST,
|
||||
@ -559,7 +560,7 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
|
||||
|
||||
/* reset the buffer pointers */
|
||||
tmpbuf[0] = (char *) buffer;
|
||||
tmpbuf[1] = (char *) buffer+counts[0] * type_extent;
|
||||
tmpbuf[1] = (char *) buffer + (ptrdiff_t)counts[0] * type_extent;
|
||||
|
||||
/* Step 2:
|
||||
Find your immediate pair (identical node in opposite subtree) and SendRecv
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2012 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -120,7 +120,7 @@ int ompi_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount,
|
||||
|
||||
ompi_datatype_type_size (sdtype, &dsize);
|
||||
comsize = ompi_comm_size(comm);
|
||||
dsize *= comsize * scount;
|
||||
dsize *= (ptrdiff_t)comsize * (ptrdiff_t)scount;
|
||||
|
||||
alg = ompi_coll_tuned_get_target_method_params (data->com_rules[ALLTOALL],
|
||||
dsize, &faninout, &segsize, &max_requests);
|
||||
@ -414,7 +414,7 @@ int ompi_coll_tuned_allgather_intra_dec_dynamic(void *sbuf, int scount,
|
||||
|
||||
ompi_datatype_type_size (sdtype, &dsize);
|
||||
comsize = ompi_comm_size(comm);
|
||||
dsize *= comsize * scount;
|
||||
dsize *= (ptrdiff_t)comsize * (ptrdiff_t)scount;
|
||||
|
||||
alg = ompi_coll_tuned_get_target_method_params (data->com_rules[ALLGATHER],
|
||||
dsize, &faninout, &segsize, &ignoreme);
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2012 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -56,7 +56,7 @@ ompi_coll_tuned_allreduce_intra_dec_fixed (void *sbuf, void *rbuf, int count,
|
||||
* Ring algorithm does not support non-commutative operations.
|
||||
*/
|
||||
ompi_datatype_type_size(dtype, &dsize);
|
||||
block_dsize = dsize * count;
|
||||
block_dsize = dsize * (ptrdiff_t)count;
|
||||
|
||||
if (block_dsize < intermediate_message) {
|
||||
return (ompi_coll_tuned_allreduce_intra_recursivedoubling (sbuf, rbuf,
|
||||
@ -66,7 +66,7 @@ ompi_coll_tuned_allreduce_intra_dec_fixed (void *sbuf, void *rbuf, int count,
|
||||
|
||||
if( ompi_op_is_commute(op) && (count > comm_size) ) {
|
||||
const size_t segment_size = 1 << 20; /* 1 MB */
|
||||
if ((comm_size * segment_size >= block_dsize)) {
|
||||
if (((size_t)comm_size * (size_t)segment_size >= block_dsize)) {
|
||||
return (ompi_coll_tuned_allreduce_intra_ring (sbuf, rbuf, count, dtype,
|
||||
op, comm, module));
|
||||
} else {
|
||||
@ -116,7 +116,7 @@ int ompi_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount,
|
||||
Has better performance for messages of intermediate sizes than the old one */
|
||||
/* determine block size */
|
||||
ompi_datatype_type_size(sdtype, &dsize);
|
||||
block_dsize = dsize * scount;
|
||||
block_dsize = dsize * (ptrdiff_t)scount;
|
||||
|
||||
if ((block_dsize < 200) && (communicator_size > 12)) {
|
||||
return ompi_coll_tuned_alltoall_intra_bruck(sbuf, scount, sdtype,
|
||||
@ -360,7 +360,7 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf,
|
||||
|
||||
/* need data size for decision function */
|
||||
ompi_datatype_type_size(datatype, &dsize);
|
||||
message_size = dsize * count; /* needed for decision */
|
||||
message_size = dsize * (ptrdiff_t)count; /* needed for decision */
|
||||
|
||||
/**
|
||||
* If the operation is non commutative we currently have choice of linear
|
||||
@ -535,7 +535,7 @@ int ompi_coll_tuned_allgather_intra_dec_fixed(void *sbuf, int scount,
|
||||
|
||||
/* Determine complete data size */
|
||||
ompi_datatype_type_size(sdtype, &dsize);
|
||||
total_dsize = dsize * scount * communicator_size;
|
||||
total_dsize = dsize * (ptrdiff_t)scount * (ptrdiff_t)communicator_size;
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_allgather_intra_dec_fixed"
|
||||
" rank %d com_size %d msg_length %lu",
|
||||
@ -632,7 +632,7 @@ int ompi_coll_tuned_allgatherv_intra_dec_fixed(void *sbuf, int scount,
|
||||
ompi_datatype_type_size(sdtype, &dsize);
|
||||
total_dsize = 0;
|
||||
for (i = 0; i < communicator_size; i++) {
|
||||
total_dsize += dsize * rcounts[i];
|
||||
total_dsize += dsize * (ptrdiff_t)rcounts[i];
|
||||
}
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,
|
||||
@ -697,10 +697,10 @@ int ompi_coll_tuned_gather_intra_dec_fixed(void *sbuf, int scount,
|
||||
/* Determine block size */
|
||||
if (rank == root) {
|
||||
ompi_datatype_type_size(rdtype, &dsize);
|
||||
block_size = dsize * rcount;
|
||||
block_size = dsize * (ptrdiff_t)rcount;
|
||||
} else {
|
||||
ompi_datatype_type_size(sdtype, &dsize);
|
||||
block_size = dsize * scount;
|
||||
block_size = dsize * (ptrdiff_t)scount;
|
||||
}
|
||||
|
||||
if (block_size > large_block_size) {
|
||||
@ -758,10 +758,10 @@ int ompi_coll_tuned_scatter_intra_dec_fixed(void *sbuf, int scount,
|
||||
/* Determine block size */
|
||||
if (root == rank) {
|
||||
ompi_datatype_type_size(sdtype, &dsize);
|
||||
block_size = dsize * scount;
|
||||
block_size = dsize * (ptrdiff_t)scount;
|
||||
} else {
|
||||
ompi_datatype_type_size(rdtype, &dsize);
|
||||
block_size = dsize * rcount;
|
||||
block_size = dsize * (ptrdiff_t)rcount;
|
||||
}
|
||||
|
||||
if ((communicator_size > small_comm_size) &&
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2012 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -86,7 +86,7 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
|
||||
} else {
|
||||
/* root is not on 0, allocate temp buffer for recv,
|
||||
* rotate data at the end */
|
||||
tempbuf = (char *) malloc(rtrue_extent + (rcount*size - 1) * rextent);
|
||||
tempbuf = (char *) malloc(rtrue_extent + ((ptrdiff_t)rcount * (ptrdiff_t)size - 1) * rextent);
|
||||
if (NULL == tempbuf) {
|
||||
err= OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl;
|
||||
}
|
||||
@ -100,7 +100,7 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
|
||||
} else {
|
||||
/* copy from rbuf to temp buffer */
|
||||
err = ompi_datatype_copy_content_same_ddt(rdtype, rcount, ptmp,
|
||||
(char *) rbuf + rank*rextent*rcount);
|
||||
(char *)rbuf + (ptrdiff_t)rank * rextent * (ptrdiff_t)rcount);
|
||||
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
|
||||
}
|
||||
}
|
||||
@ -109,7 +109,7 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
|
||||
/* other non-leaf nodes, allocate temp buffer for data received from
|
||||
* children, the most we need is half of the total data elements due
|
||||
* to the property of binimoal tree */
|
||||
tempbuf = (char *) malloc(strue_extent + (scount*size - 1) * sextent);
|
||||
tempbuf = (char *) malloc(strue_extent + ((ptrdiff_t)scount * (ptrdiff_t)size - 1) * sextent);
|
||||
if (NULL == tempbuf) {
|
||||
err= OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl;
|
||||
}
|
||||
@ -148,7 +148,7 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
|
||||
"ompi_coll_tuned_gather_intra_binomial rank %d recv %d mycount = %d",
|
||||
rank, bmtree->tree_next[i], mycount));
|
||||
|
||||
err = MCA_PML_CALL(recv(ptmp + total_recv*rextent, rcount*size-total_recv, rdtype,
|
||||
err = MCA_PML_CALL(recv(ptmp + total_recv*rextent, (ptrdiff_t)rcount * size - total_recv, rdtype,
|
||||
bmtree->tree_next[i], MCA_COLL_BASE_TAG_GATHER,
|
||||
comm, &status));
|
||||
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
|
||||
@ -173,13 +173,13 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
|
||||
if (rank == root) {
|
||||
if (root != 0) {
|
||||
/* rotate received data on root if root != 0 */
|
||||
err = ompi_datatype_copy_content_same_ddt(rdtype, rcount*(size - root),
|
||||
(char *) rbuf + rextent*root*rcount, ptmp);
|
||||
err = ompi_datatype_copy_content_same_ddt(rdtype, (ptrdiff_t)rcount * (ptrdiff_t)(size - root),
|
||||
(char *)rbuf + rextent * (ptrdiff_t)root * (ptrdiff_t)rcount, ptmp);
|
||||
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
|
||||
|
||||
|
||||
err = ompi_datatype_copy_content_same_ddt(rdtype, rcount*root,
|
||||
(char *) rbuf, ptmp + rextent*rcount*(size-root));
|
||||
err = ompi_datatype_copy_content_same_ddt(rdtype, (ptrdiff_t)rcount * (ptrdiff_t)root,
|
||||
(char *) rbuf, ptmp + rextent * (ptrdiff_t)rcount * (ptrdiff_t)(size-root));
|
||||
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
|
||||
|
||||
free(tempbuf);
|
||||
@ -291,7 +291,7 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
|
||||
}
|
||||
|
||||
/* irecv for the first segment from i */
|
||||
ptmp = (char*)rbuf + i * rcount * extent;
|
||||
ptmp = (char*)rbuf + (ptrdiff_t)i * (ptrdiff_t)rcount * extent;
|
||||
ret = MCA_PML_CALL(irecv(ptmp, first_segment_count, rdtype, i,
|
||||
MCA_COLL_BASE_TAG_GATHER, comm,
|
||||
&first_segment_req));
|
||||
@ -304,7 +304,7 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
|
||||
if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||
|
||||
/* irecv for the second segment */
|
||||
ptmp = (char*)rbuf + (i * rcount + first_segment_count) * extent;
|
||||
ptmp = (char*)rbuf + ((ptrdiff_t)i * (ptrdiff_t)rcount + first_segment_count) * extent;
|
||||
ret = MCA_PML_CALL(irecv(ptmp, (rcount - first_segment_count),
|
||||
rdtype, i, MCA_COLL_BASE_TAG_GATHER, comm,
|
||||
&reqs[i]));
|
||||
@ -318,7 +318,7 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
|
||||
/* copy local data if necessary */
|
||||
if (MPI_IN_PLACE != sbuf) {
|
||||
ret = ompi_datatype_sndrcv(sbuf, scount, sdtype,
|
||||
(char*)rbuf + rank * rcount * extent,
|
||||
(char*)rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * extent,
|
||||
rcount, rdtype);
|
||||
if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
|
||||
}
|
||||
@ -394,7 +394,7 @@ ompi_coll_tuned_gather_intra_basic_linear(void *sbuf, int scount,
|
||||
/* I am the root, loop receiving the data. */
|
||||
|
||||
ompi_datatype_get_extent(rdtype, &lb, &extent);
|
||||
incr = extent * rcount;
|
||||
incr = extent * (ptrdiff_t)rcount;
|
||||
for (i = 0, ptmp = (char *) rbuf; i < size; ++i, ptmp += incr) {
|
||||
if (i == rank) {
|
||||
if (MPI_IN_PLACE != sbuf) {
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2012 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -63,14 +63,16 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
|
||||
ompi_datatype_get_extent( datatype, &lower_bound, &extent );
|
||||
ompi_datatype_type_size( datatype, &typelng );
|
||||
num_segments = (original_count + count_by_segment - 1) / count_by_segment;
|
||||
segment_increment = count_by_segment * extent;
|
||||
segment_increment = (ptrdiff_t)count_by_segment * extent;
|
||||
|
||||
sendtmpbuf = (char*) sendbuf;
|
||||
if( sendbuf == MPI_IN_PLACE ) {
|
||||
sendtmpbuf = (char *)recvbuf;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:reduce_generic count %d, msg size %ld, segsize %ld, max_requests %d", original_count, (unsigned long)(num_segments * segment_increment), (unsigned long)segment_increment, max_outstanding_reqs));
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:reduce_generic count %d, msg size %ld, segsize %ld, max_requests %d",
|
||||
original_count, (unsigned long)((ptrdiff_t)num_segments * (ptrdiff_t)segment_increment),
|
||||
(unsigned long)segment_increment, max_outstanding_reqs));
|
||||
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
@ -87,7 +89,7 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
|
||||
if( (NULL == accumbuf) || (root != rank) ) {
|
||||
/* Allocate temporary accumulator buffer. */
|
||||
accumbuf_free = (char*)malloc(true_extent +
|
||||
(original_count - 1) * extent);
|
||||
(ptrdiff_t)(original_count - 1) * extent);
|
||||
if (accumbuf_free == NULL) {
|
||||
line = __LINE__; ret = -1; goto error_hndl;
|
||||
}
|
||||
@ -102,7 +104,7 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
|
||||
(char*)sendtmpbuf);
|
||||
}
|
||||
/* Allocate two buffers for incoming segments */
|
||||
real_segment_size = true_extent + (count_by_segment - 1) * extent;
|
||||
real_segment_size = true_extent + (ptrdiff_t)(count_by_segment - 1) * extent;
|
||||
inbuf_free[0] = (char*) malloc(real_segment_size);
|
||||
if( inbuf_free[0] == NULL ) {
|
||||
line = __LINE__; ret = -1; goto error_hndl;
|
||||
@ -127,7 +129,7 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
|
||||
/* recvcount - number of elements in current segment */
|
||||
recvcount = count_by_segment;
|
||||
if( segindex == (num_segments-1) )
|
||||
recvcount = original_count - count_by_segment * segindex;
|
||||
recvcount = original_count - (ptrdiff_t)count_by_segment * (ptrdiff_t)segindex;
|
||||
|
||||
/* for each child */
|
||||
for( i = 0; i < tree->tree_nextsize; i++ ) {
|
||||
@ -151,7 +153,7 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
|
||||
*/
|
||||
if( (ompi_op_is_commute(op)) &&
|
||||
!((MPI_IN_PLACE == sendbuf) && (rank == tree->tree_root)) ) {
|
||||
local_recvbuf = accumbuf + segindex * segment_increment;
|
||||
local_recvbuf = accumbuf + (ptrdiff_t)segindex * (ptrdiff_t)segment_increment;
|
||||
}
|
||||
}
|
||||
|
||||
@ -178,19 +180,19 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
|
||||
if( 1 == i ) {
|
||||
if( (ompi_op_is_commute(op)) &&
|
||||
!((MPI_IN_PLACE == sendbuf) && (rank == tree->tree_root)) ) {
|
||||
local_op_buffer = sendtmpbuf + segindex * segment_increment;
|
||||
local_op_buffer = sendtmpbuf + (ptrdiff_t)segindex * (ptrdiff_t)segment_increment;
|
||||
}
|
||||
}
|
||||
/* apply operation */
|
||||
ompi_op_reduce(op, local_op_buffer,
|
||||
accumbuf + segindex * segment_increment,
|
||||
accumbuf + (ptrdiff_t)segindex * (ptrdiff_t)segment_increment,
|
||||
recvcount, datatype );
|
||||
} else if ( segindex > 0 ) {
|
||||
void* accumulator = accumbuf + (segindex-1) * segment_increment;
|
||||
void* accumulator = accumbuf + (ptrdiff_t)(segindex-1) * (ptrdiff_t)segment_increment;
|
||||
if( tree->tree_nextsize <= 1 ) {
|
||||
if( (ompi_op_is_commute(op)) &&
|
||||
!((MPI_IN_PLACE == sendbuf) && (rank == tree->tree_root)) ) {
|
||||
local_op_buffer = sendtmpbuf + (segindex-1) * segment_increment;
|
||||
local_op_buffer = sendtmpbuf + (ptrdiff_t)(segindex-1) * (ptrdiff_t)segment_increment;
|
||||
}
|
||||
}
|
||||
ompi_op_reduce(op, local_op_buffer, accumulator, prevcount,
|
||||
@ -251,7 +253,7 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
|
||||
count_by_segment = original_count;
|
||||
}
|
||||
ret = MCA_PML_CALL( send((char*)sendbuf +
|
||||
segindex * segment_increment,
|
||||
(ptrdiff_t)segindex * (ptrdiff_t)segment_increment,
|
||||
count_by_segment, datatype,
|
||||
tree->tree_prev,
|
||||
MCA_COLL_BASE_TAG_REDUCE,
|
||||
@ -281,7 +283,7 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
|
||||
/* post first group of requests */
|
||||
for (segindex = 0; segindex < max_outstanding_reqs; segindex++) {
|
||||
ret = MCA_PML_CALL( isend((char*)sendbuf +
|
||||
segindex * segment_increment,
|
||||
(ptrdiff_t)segindex * (ptrdiff_t)segment_increment,
|
||||
count_by_segment, datatype,
|
||||
tree->tree_prev,
|
||||
MCA_COLL_BASE_TAG_REDUCE,
|
||||
@ -302,7 +304,7 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
|
||||
count_by_segment = original_count;
|
||||
}
|
||||
ret = MCA_PML_CALL( isend((char*)sendbuf +
|
||||
segindex * segment_increment,
|
||||
(ptrdiff_t)segindex * (ptrdiff_t)segment_increment,
|
||||
count_by_segment, datatype,
|
||||
tree->tree_prev,
|
||||
MCA_COLL_BASE_TAG_REDUCE,
|
||||
@ -521,7 +523,7 @@ int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
|
||||
ompi_datatype_get_true_extent(datatype, &tlb, &text);
|
||||
|
||||
if ((root == rank) && (MPI_IN_PLACE == sendbuf)) {
|
||||
tmpbuf = (char *) malloc(text + (count - 1) * ext);
|
||||
tmpbuf = (char *) malloc(text + (ptrdiff_t)(count - 1) * ext);
|
||||
if (NULL == tmpbuf) {
|
||||
return MPI_ERR_INTERN;
|
||||
}
|
||||
@ -530,7 +532,7 @@ int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
|
||||
(char*)recvbuf);
|
||||
use_this_sendbuf = tmpbuf;
|
||||
} else if (io_root == rank) {
|
||||
tmpbuf = (char *) malloc(text + (count - 1) * ext);
|
||||
tmpbuf = (char *) malloc(text + (ptrdiff_t)(count - 1) * ext);
|
||||
if (NULL == tmpbuf) {
|
||||
return MPI_ERR_INTERN;
|
||||
}
|
||||
@ -631,7 +633,7 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
|
||||
|
||||
if (MPI_IN_PLACE == sbuf) {
|
||||
sbuf = rbuf;
|
||||
inplace_temp = (char*)malloc(true_extent + (count - 1) * extent);
|
||||
inplace_temp = (char*)malloc(true_extent + (ptrdiff_t)(count - 1) * extent);
|
||||
if (NULL == inplace_temp) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
@ -639,7 +641,7 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
|
||||
}
|
||||
|
||||
if (size > 1) {
|
||||
free_buffer = (char*)malloc(true_extent + (count - 1) * extent);
|
||||
free_buffer = (char*)malloc(true_extent + (ptrdiff_t)(count - 1) * extent);
|
||||
if (NULL == free_buffer) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2012 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -81,7 +81,7 @@ int ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(void *sbuf, void *rbuf,
|
||||
ompi_datatype_get_extent(dtype, &lb, &extent);
|
||||
ompi_datatype_get_true_extent(dtype, &tlb, &textent);
|
||||
|
||||
tmprbuf_free = (char*) malloc(textent + (total_count - 1)*extent);
|
||||
tmprbuf_free = (char*) malloc(textent + (ptrdiff_t)(total_count - 1) * extent);
|
||||
tmprbuf = tmprbuf_free - lb;
|
||||
}
|
||||
err = comm->c_coll.coll_reduce (sbuf, tmprbuf, total_count,
|
||||
@ -164,7 +164,7 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
|
||||
/* get datatype information */
|
||||
ompi_datatype_get_extent(dtype, &lb, &extent);
|
||||
ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent);
|
||||
buf_size = true_extent + (count - 1) * extent;
|
||||
buf_size = true_extent + (ptrdiff_t)(count - 1) * extent;
|
||||
|
||||
/* Handle MPI_IN_PLACE */
|
||||
if (MPI_IN_PLACE == sbuf) {
|
||||
@ -296,7 +296,7 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
|
||||
/* actual data transfer. Send from result_buf,
|
||||
receive into recv_buf */
|
||||
if (send_count > 0 && recv_count != 0) {
|
||||
err = MCA_PML_CALL(irecv(recv_buf + tmp_disps[recv_index] * extent,
|
||||
err = MCA_PML_CALL(irecv(recv_buf + (ptrdiff_t)tmp_disps[recv_index] * extent,
|
||||
recv_count, dtype, peer,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
||||
comm, &request));
|
||||
@ -307,7 +307,7 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
|
||||
}
|
||||
}
|
||||
if (recv_count > 0 && send_count != 0) {
|
||||
err = MCA_PML_CALL(send(result_buf + tmp_disps[send_index] * extent,
|
||||
err = MCA_PML_CALL(send(result_buf + (ptrdiff_t)tmp_disps[send_index] * extent,
|
||||
send_count, dtype, peer,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
||||
MCA_PML_BASE_SEND_STANDARD,
|
||||
@ -331,8 +331,8 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
|
||||
the results buffer */
|
||||
if (recv_count > 0) {
|
||||
ompi_op_reduce(op,
|
||||
recv_buf + tmp_disps[recv_index] * extent,
|
||||
result_buf + tmp_disps[recv_index] * extent,
|
||||
recv_buf + (ptrdiff_t)tmp_disps[recv_index] * extent,
|
||||
result_buf + (ptrdiff_t)tmp_disps[recv_index] * extent,
|
||||
recv_count, dtype);
|
||||
}
|
||||
|
||||
@ -360,7 +360,7 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
|
||||
|
||||
/* Now fix up the non-power of two case, by having the odd
|
||||
procs send the even procs the proper results */
|
||||
if (rank < 2 * remain) {
|
||||
if (rank < (2 * remain)) {
|
||||
if ((rank & 1) == 0) {
|
||||
if (rcounts[rank]) {
|
||||
err = MCA_PML_CALL(recv(rbuf, rcounts[rank], dtype, rank + 1,
|
||||
@ -514,9 +514,9 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
|
||||
ret = ompi_datatype_type_size( dtype, &typelng);
|
||||
if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
|
||||
|
||||
max_real_segsize = true_extent + (max_block_count - 1) * extent;
|
||||
max_real_segsize = true_extent + (ptrdiff_t)(max_block_count - 1) * extent;
|
||||
|
||||
accumbuf_free = (char*)malloc(true_extent + (total_count - 1) * extent);
|
||||
accumbuf_free = (char*)malloc(true_extent + (ptrdiff_t)(total_count - 1) * extent);
|
||||
if (NULL == accumbuf_free) { ret = -1; line = __LINE__; goto error_hndl; }
|
||||
accumbuf = accumbuf_free - lb;
|
||||
|
||||
@ -564,7 +564,7 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER, comm,
|
||||
&reqs[inbi]));
|
||||
if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
|
||||
tmpsend = accumbuf + displs[recv_from] * extent;
|
||||
tmpsend = accumbuf + (ptrdiff_t)displs[recv_from] * extent;
|
||||
ret = MCA_PML_CALL(send(tmpsend, rcounts[recv_from], dtype, send_to,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
@ -588,7 +588,7 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
|
||||
/* Apply operation on previous block: result goes to rbuf
|
||||
rbuf[prevblock] = inbuf[inbi ^ 0x1] (op) rbuf[prevblock]
|
||||
*/
|
||||
tmprecv = accumbuf + displs[prevblock] * extent;
|
||||
tmprecv = accumbuf + (ptrdiff_t)displs[prevblock] * extent;
|
||||
ompi_op_reduce(op, inbuf[inbi ^ 0x1], tmprecv, rcounts[prevblock], dtype);
|
||||
|
||||
/* send previous block to send_to */
|
||||
@ -604,11 +604,11 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
|
||||
|
||||
/* Apply operation on the last block (my block)
|
||||
rbuf[rank] = inbuf[inbi] (op) rbuf[rank] */
|
||||
tmprecv = accumbuf + displs[rank] * extent;
|
||||
tmprecv = accumbuf + (ptrdiff_t)displs[rank] * extent;
|
||||
ompi_op_reduce(op, inbuf[inbi], tmprecv, rcounts[rank], dtype);
|
||||
|
||||
/* Copy result from tmprecv to rbuf */
|
||||
ret = ompi_datatype_copy_content_same_ddt(dtype, rcounts[rank], (char *) rbuf, tmprecv);
|
||||
ret = ompi_datatype_copy_content_same_ddt(dtype, rcounts[rank], (char *)rbuf, tmprecv);
|
||||
if (ret < 0) { line = __LINE__; goto error_hndl; }
|
||||
|
||||
if (NULL != displs) free(displs);
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2012 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -84,7 +84,7 @@ ompi_coll_tuned_scatter_intra_binomial(void *sbuf, int scount,
|
||||
}
|
||||
} else {
|
||||
/* root is not on 0, allocate temp buffer for send */
|
||||
tempbuf = (char *) malloc(strue_extent + (scount*size - 1) * sextent);
|
||||
tempbuf = (char *) malloc(strue_extent + ((ptrdiff_t)scount * (ptrdiff_t)size - 1) * sextent);
|
||||
if (NULL == tempbuf) {
|
||||
err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl;
|
||||
}
|
||||
@ -92,13 +92,13 @@ ompi_coll_tuned_scatter_intra_binomial(void *sbuf, int scount,
|
||||
ptmp = tempbuf - slb;
|
||||
|
||||
/* and rotate data so they will eventually in the right place */
|
||||
err = ompi_datatype_copy_content_same_ddt(sdtype, scount*(size - root),
|
||||
ptmp, (char *) sbuf + sextent*root*scount);
|
||||
err = ompi_datatype_copy_content_same_ddt(sdtype, (ptrdiff_t)scount * (ptrdiff_t)(size - root),
|
||||
ptmp, (char *) sbuf + sextent * (ptrdiff_t)root * (ptrdiff_t)scount);
|
||||
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
|
||||
|
||||
|
||||
err = ompi_datatype_copy_content_same_ddt(sdtype, scount*root,
|
||||
ptmp + sextent*scount*(size - root), (char *) sbuf);
|
||||
err = ompi_datatype_copy_content_same_ddt(sdtype, (ptrdiff_t)scount * (ptrdiff_t)root,
|
||||
ptmp + sextent * (ptrdiff_t)scount * (ptrdiff_t)(size - root), (char *)sbuf);
|
||||
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
|
||||
|
||||
if (rbuf != MPI_IN_PLACE) {
|
||||
@ -112,7 +112,7 @@ ompi_coll_tuned_scatter_intra_binomial(void *sbuf, int scount,
|
||||
} else if (!(vrank % 2)) {
|
||||
/* non-root, non-leaf nodes, allocte temp buffer for recv
|
||||
* the most we need is rcount*size/2 */
|
||||
tempbuf = (char *) malloc(rtrue_extent + (rcount*size - 1) * rextent);
|
||||
tempbuf = (char *) malloc(rtrue_extent + ((ptrdiff_t)rcount * (ptrdiff_t)size - 1) * rextent);
|
||||
if (NULL == tempbuf) {
|
||||
err= OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl;
|
||||
}
|
||||
@ -131,7 +131,7 @@ ompi_coll_tuned_scatter_intra_binomial(void *sbuf, int scount,
|
||||
if (!(vrank % 2)) {
|
||||
if (rank != root) {
|
||||
/* recv from parent on non-root */
|
||||
err = MCA_PML_CALL(recv(ptmp, rcount*size, rdtype, bmtree->tree_prev,
|
||||
err = MCA_PML_CALL(recv(ptmp, (ptrdiff_t)rcount * (ptrdiff_t)size, rdtype, bmtree->tree_prev,
|
||||
MCA_COLL_BASE_TAG_SCATTER, comm, &status));
|
||||
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
|
||||
/* local copy to rbuf */
|
||||
@ -141,15 +141,16 @@ ompi_coll_tuned_scatter_intra_binomial(void *sbuf, int scount,
|
||||
}
|
||||
/* send to children on all non-leaf */
|
||||
for (i = 0; i < bmtree->tree_nextsize; i++) {
|
||||
int mycount = 0, vkid;
|
||||
size_t mycount = 0;
|
||||
int vkid;
|
||||
/* figure out how much data I have to send to this child */
|
||||
vkid = (bmtree->tree_next[i] - root + size) % size;
|
||||
mycount = vkid - vrank;
|
||||
if (mycount > (size - vkid))
|
||||
if( (int)mycount > (size - vkid) )
|
||||
mycount = size - vkid;
|
||||
mycount *= scount;
|
||||
|
||||
err = MCA_PML_CALL(send(ptmp + total_send*sextent, mycount, sdtype,
|
||||
err = MCA_PML_CALL(send(ptmp + (ptrdiff_t)total_send * sextent, mycount, sdtype,
|
||||
bmtree->tree_next[i],
|
||||
MCA_COLL_BASE_TAG_SCATTER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2012 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -25,37 +25,37 @@
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
typedef struct ompi_coll_tree_t {
|
||||
int32_t tree_root;
|
||||
int32_t tree_fanout;
|
||||
int32_t tree_bmtree;
|
||||
int32_t tree_prev;
|
||||
int32_t tree_next[MAXTREEFANOUT];
|
||||
int32_t tree_nextsize;
|
||||
} ompi_coll_tree_t;
|
||||
typedef struct ompi_coll_tree_t {
|
||||
int32_t tree_root;
|
||||
int32_t tree_fanout;
|
||||
int32_t tree_bmtree;
|
||||
int32_t tree_prev;
|
||||
int32_t tree_next[MAXTREEFANOUT];
|
||||
int32_t tree_nextsize;
|
||||
} ompi_coll_tree_t;
|
||||
|
||||
ompi_coll_tree_t*
|
||||
ompi_coll_tuned_topo_build_tree( int fanout,
|
||||
struct ompi_communicator_t* com,
|
||||
int root );
|
||||
ompi_coll_tree_t*
|
||||
ompi_coll_tuned_topo_build_in_order_bintree( struct ompi_communicator_t* comm );
|
||||
ompi_coll_tree_t*
|
||||
ompi_coll_tuned_topo_build_tree( int fanout,
|
||||
struct ompi_communicator_t* com,
|
||||
int root );
|
||||
ompi_coll_tree_t*
|
||||
ompi_coll_tuned_topo_build_in_order_bintree( struct ompi_communicator_t* comm );
|
||||
|
||||
ompi_coll_tree_t*
|
||||
ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
|
||||
int root );
|
||||
ompi_coll_tree_t*
|
||||
ompi_coll_tuned_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,
|
||||
int root );
|
||||
ompi_coll_tree_t*
|
||||
ompi_coll_tuned_topo_build_chain( int fanout,
|
||||
struct ompi_communicator_t* com,
|
||||
int root );
|
||||
ompi_coll_tree_t*
|
||||
ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
|
||||
int root );
|
||||
ompi_coll_tree_t*
|
||||
ompi_coll_tuned_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,
|
||||
int root );
|
||||
ompi_coll_tree_t*
|
||||
ompi_coll_tuned_topo_build_chain( int fanout,
|
||||
struct ompi_communicator_t* com,
|
||||
int root );
|
||||
|
||||
int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree );
|
||||
int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree );
|
||||
|
||||
/* debugging stuff, will be removed later */
|
||||
int ompi_coll_tuned_topo_dump_tree (ompi_coll_tree_t* tree, int rank);
|
||||
/* debugging stuff, will be removed later */
|
||||
int ompi_coll_tuned_topo_dump_tree (ompi_coll_tree_t* tree, int rank);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2012 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user