1
1

The tuned collectives can now deal with more than 2Gb of data.

This commit was SVN r26103.
Этот коммит содержится в:
George Bosilca 2012-03-05 22:23:44 +00:00
родитель 762b3e13a9
Коммит a78a7bd8e8
14 изменённых файлов: 228 добавлений и 224 удалений

Просмотреть файл

@ -117,7 +117,7 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
} else if (0 != rank) {
tmpsend = ((char*)rbuf) + rank * rcount * rext;
tmpsend = ((char*)rbuf) + (ptrdiff_t)rank * (ptrdiff_t)rcount * rext;
err = ompi_datatype_copy_content_same_ddt(rdtype, rcount, tmprecv, tmpsend);
if (err < 0) { line = __LINE__; goto err_hndl; }
}
@ -139,7 +139,7 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
recvfrom = (rank + distance) % size;
sendto = (rank - distance + size) % size;
tmprecv = tmpsend + distance * rcount * rext;
tmprecv = tmpsend + (ptrdiff_t)distance * (ptrdiff_t)rcount * rext;
if (distance <= (size >> 1)) {
blockcount = distance;
@ -174,7 +174,7 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
free_buf = (char*) calloc(((true_extent + true_lb +
((size - rank) * rcount - 1) * rext)),
((ptrdiff_t)(size - rank) * (ptrdiff_t)rcount - 1) * rext)),
sizeof(char));
if (NULL == free_buf) {
line = __LINE__; err = OMPI_ERR_OUT_OF_RESOURCE; goto err_hndl;
@ -182,19 +182,19 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
shift_buf = free_buf - rlb;
tmpsend = (char*) rbuf;
err = ompi_datatype_copy_content_same_ddt(rdtype, ((size - rank) * rcount),
err = ompi_datatype_copy_content_same_ddt(rdtype, ((ptrdiff_t)(size - rank) * (ptrdiff_t)rcount),
shift_buf, tmpsend);
if (err < 0) { line = __LINE__; goto err_hndl; }
tmprecv = (char*) rbuf;
tmpsend = (char*) rbuf + (size - rank) * rcount * rext;
tmpsend = (char*) rbuf + (ptrdiff_t)(size - rank) * (ptrdiff_t)rcount * rext;
err = ompi_datatype_copy_content_same_ddt(rdtype, rank * rcount,
err = ompi_datatype_copy_content_same_ddt(rdtype, (ptrdiff_t)rank * (ptrdiff_t)rcount,
tmprecv, tmpsend);
if (err < 0) { line = __LINE__; goto err_hndl; }
tmprecv = (char*) rbuf + rank * rcount * rext;
err = ompi_datatype_copy_content_same_ddt(rdtype, (size - rank) * rcount,
tmprecv = (char*) rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * rext;
err = ompi_datatype_copy_content_same_ddt(rdtype, (ptrdiff_t)(size - rank) * (ptrdiff_t)rcount,
tmprecv, shift_buf);
if (err < 0) { line = __LINE__; goto err_hndl; }
@ -305,7 +305,7 @@ ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,
*/
if (MPI_IN_PLACE != sbuf) {
tmpsend = (char*) sbuf;
tmprecv = (char*) rbuf + rank * rcount * rext;
tmprecv = (char*) rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * rext;
err = ompi_datatype_sndrcv(tmpsend, scount, sdtype, tmprecv, rcount, rdtype);
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@ -321,18 +321,18 @@ ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,
remote = rank ^ distance;
if (rank < remote) {
tmpsend = (char*)rbuf + sendblocklocation * rcount * rext;
tmprecv = (char*)rbuf + (sendblocklocation + distance) * rcount * rext;
tmpsend = (char*)rbuf + (ptrdiff_t)sendblocklocation * (ptrdiff_t)rcount * rext;
tmprecv = (char*)rbuf + (ptrdiff_t)(sendblocklocation + distance) * (ptrdiff_t)rcount * rext;
} else {
tmpsend = (char*)rbuf + sendblocklocation * rcount * rext;
tmprecv = (char*)rbuf + (sendblocklocation - distance) * rcount * rext;
tmpsend = (char*)rbuf + (ptrdiff_t)sendblocklocation * (ptrdiff_t)rcount * rext;
tmprecv = (char*)rbuf + (ptrdiff_t)(sendblocklocation - distance) * (ptrdiff_t)rcount * rext;
sendblocklocation -= distance;
}
/* Sendreceive */
err = ompi_coll_tuned_sendrecv(tmpsend, distance * rcount, rdtype,
err = ompi_coll_tuned_sendrecv(tmpsend, (ptrdiff_t)distance * (ptrdiff_t)rcount, rdtype,
remote, MCA_COLL_BASE_TAG_ALLGATHER,
tmprecv, distance * rcount, rdtype,
tmprecv, (ptrdiff_t)distance * (ptrdiff_t)rcount, rdtype,
remote, MCA_COLL_BASE_TAG_ALLGATHER,
comm, MPI_STATUS_IGNORE, rank);
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@ -394,7 +394,7 @@ int ompi_coll_tuned_allgather_intra_ring(void *sbuf, int scount,
- if send buffer is not MPI_IN_PLACE, copy send buffer to appropriate block
of receive buffer
*/
tmprecv = (char*) rbuf + rank * rcount * rext;
tmprecv = (char*) rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * rext;
if (MPI_IN_PLACE != sbuf) {
tmpsend = (char*) sbuf;
err = ompi_datatype_sndrcv(tmpsend, scount, sdtype, tmprecv, rcount, rdtype);
@ -416,8 +416,8 @@ int ompi_coll_tuned_allgather_intra_ring(void *sbuf, int scount,
recvdatafrom = (rank - i - 1 + size) % size;
senddatafrom = (rank - i + size) % size;
tmprecv = (char*)rbuf + recvdatafrom * rcount * rext;
tmpsend = (char*)rbuf + senddatafrom * rcount * rext;
tmprecv = (char*)rbuf + (ptrdiff_t)recvdatafrom * (ptrdiff_t)rcount * rext;
tmpsend = (char*)rbuf + (ptrdiff_t)senddatafrom * (ptrdiff_t)rcount * rext;
/* Sendreceive */
err = ompi_coll_tuned_sendrecv(tmpsend, rcount, rdtype, sendto,
@ -535,7 +535,7 @@ ompi_coll_tuned_allgather_intra_neighborexchange(void *sbuf, int scount,
- if send buffer is not MPI_IN_PLACE, copy send buffer to appropriate block
of receive buffer
*/
tmprecv = (char*) rbuf + rank * rcount * rext;
tmprecv = (char*) rbuf + (ptrdiff_t)rank *(ptrdiff_t) rcount * rext;
if (MPI_IN_PLACE != sbuf) {
tmpsend = (char*) sbuf;
err = ompi_datatype_sndrcv(tmpsend, scount, sdtype, tmprecv, rcount, rdtype);
@ -567,8 +567,8 @@ ompi_coll_tuned_allgather_intra_neighborexchange(void *sbuf, int scount,
exchange two blocks with appropriate neighbor.
the send location becomes previous receve location.
*/
tmprecv = (char*)rbuf + neighbor[0] * rcount * rext;
tmpsend = (char*)rbuf + rank * rcount * rext;
tmprecv = (char*)rbuf + (ptrdiff_t)neighbor[0] * (ptrdiff_t)rcount * rext;
tmpsend = (char*)rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * rext;
/* Sendreceive */
err = ompi_coll_tuned_sendrecv(tmpsend, rcount, rdtype, neighbor[0],
MCA_COLL_BASE_TAG_ALLGATHER,
@ -589,14 +589,14 @@ ompi_coll_tuned_allgather_intra_neighborexchange(void *sbuf, int scount,
recv_data_from[i_parity] =
(recv_data_from[i_parity] + offset_at_step[i_parity] + size) % size;
tmprecv = (char*)rbuf + recv_data_from[i_parity] * rcount * rext;
tmpsend = (char*)rbuf + send_data_from * rcount * rext;
tmprecv = (char*)rbuf + (ptrdiff_t)recv_data_from[i_parity] * (ptrdiff_t)rcount * rext;
tmpsend = (char*)rbuf + (ptrdiff_t)send_data_from * rcount * rext;
/* Sendreceive */
err = ompi_coll_tuned_sendrecv(tmpsend, 2 * rcount, rdtype,
err = ompi_coll_tuned_sendrecv(tmpsend, (ptrdiff_t)2 * (ptrdiff_t)rcount, rdtype,
neighbor[i_parity],
MCA_COLL_BASE_TAG_ALLGATHER,
tmprecv, 2 * rcount, rdtype,
tmprecv, (ptrdiff_t)2 * (ptrdiff_t)rcount, rdtype,
neighbor[i_parity],
MCA_COLL_BASE_TAG_ALLGATHER,
comm, MPI_STATUS_IGNORE, rank);
@ -646,11 +646,11 @@ int ompi_coll_tuned_allgather_intra_two_procs(void *sbuf, int scount,
tmpsend = (char*)sbuf;
if (MPI_IN_PLACE == sbuf) {
tmpsend = (char*)rbuf + rank * rcount * rext;
tmpsend = (char*)rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * rext;
scount = rcount;
sdtype = rdtype;
}
tmprecv = (char*)rbuf + remote * rcount * rext;
tmprecv = (char*)rbuf + (ptrdiff_t)remote * (ptrdiff_t)rcount * rext;
err = ompi_coll_tuned_sendrecv(tmpsend, scount, sdtype, remote,
MCA_COLL_BASE_TAG_ALLGATHER,
@ -662,7 +662,7 @@ int ompi_coll_tuned_allgather_intra_two_procs(void *sbuf, int scount,
/* Place your data in correct location if necessary */
if (MPI_IN_PLACE != sbuf) {
err = ompi_datatype_sndrcv((char*)sbuf, scount, sdtype,
(char*)rbuf + rank * rcount * rext, rcount, rdtype);
(char*)rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * rext, rcount, rdtype);
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
}
@ -726,9 +726,9 @@ ompi_coll_tuned_allgather_intra_basic_linear(void *sbuf, int scount,
rbuf, rcount, rdtype,
0, comm, comm->c_coll.coll_gather_module);
if (MPI_SUCCESS == err) {
size_t length = (size_t)rcount * ompi_comm_size(comm);
size_t length = (ptrdiff_t)rcount * ompi_comm_size(comm);
if( length < (size_t)INT_MAX ) {
err = comm->c_coll.coll_bcast(rbuf, rcount * ompi_comm_size(comm), rdtype,
err = comm->c_coll.coll_bcast(rbuf, (ptrdiff_t)rcount * ompi_comm_size(comm), rdtype,
0, comm, comm->c_coll.coll_bcast_module);
} else {
ompi_datatype_t* temptype;

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2009 The University of Tennessee and The University
* Copyright (c) 2004-2012 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -117,7 +117,7 @@ int ompi_coll_tuned_allgatherv_intra_bruck(void *sbuf, int scount,
- if send buffer is not MPI_IN_PLACE, copy send buffer to block rank of
the receive buffer.
*/
tmprecv = (char*) rbuf + rdispls[rank] * rext;
tmprecv = (char*) rbuf + (ptrdiff_t)rdispls[rank] * rext;
if (MPI_IN_PLACE != sbuf) {
tmpsend = (char*) sbuf;
err = ompi_datatype_sndrcv(tmpsend, scount, sdtype,
@ -246,7 +246,7 @@ int ompi_coll_tuned_allgatherv_intra_ring(void *sbuf, int scount,
- if send buffer is not MPI_IN_PLACE, copy send buffer to
the appropriate block of receive buffer
*/
tmprecv = (char*) rbuf + rdisps[rank] * rext;
tmprecv = (char*) rbuf + (ptrdiff_t)rdisps[rank] * rext;
if (MPI_IN_PLACE != sbuf) {
tmpsend = (char*) sbuf;
err = ompi_datatype_sndrcv(tmpsend, scount, sdtype,
@ -392,7 +392,7 @@ ompi_coll_tuned_allgatherv_intra_neighborexchange(void *sbuf, int scount,
- if send buffer is not MPI_IN_PLACE, copy send buffer to
the appropriate block of receive buffer
*/
tmprecv = (char*) rbuf + rdispls[rank] * rext;
tmprecv = (char*) rbuf + (ptrdiff_t)rdispls[rank] * rext;
if (MPI_IN_PLACE != sbuf) {
tmpsend = (char*) sbuf;
err = ompi_datatype_sndrcv(tmpsend, scount, sdtype,
@ -427,8 +427,8 @@ ompi_coll_tuned_allgatherv_intra_neighborexchange(void *sbuf, int scount,
Note, we need to create indexed datatype to send and receive these
blocks properly.
*/
tmprecv = (char*)rbuf + rdispls[neighbor[0]] * rext;
tmpsend = (char*)rbuf + rdispls[rank] * rext;
tmprecv = (char*)rbuf + (ptrdiff_t)rdispls[neighbor[0]] * rext;
tmpsend = (char*)rbuf + (ptrdiff_t)rdispls[rank] * rext;
err = ompi_coll_tuned_sendrecv(tmpsend, rcounts[rank], rdtype,
neighbor[0], MCA_COLL_BASE_TAG_ALLGATHERV,
tmprecv, rcounts[neighbor[0]], rdtype,
@ -532,11 +532,11 @@ int ompi_coll_tuned_allgatherv_intra_two_procs(void *sbuf, int scount,
tmpsend = (char*)sbuf;
if (MPI_IN_PLACE == sbuf) {
tmpsend = (char*)rbuf + rdispls[rank] * rext;
tmpsend = (char*)rbuf + (ptrdiff_t)rdispls[rank] * rext;
scount = rcounts[rank];
sdtype = rdtype;
}
tmprecv = (char*)rbuf + rdispls[remote] * rext;
tmprecv = (char*)rbuf + (ptrdiff_t)rdispls[remote] * rext;
err = ompi_coll_tuned_sendrecv(tmpsend, scount, sdtype, remote,
MCA_COLL_BASE_TAG_ALLGATHERV,
@ -548,7 +548,7 @@ int ompi_coll_tuned_allgatherv_intra_two_procs(void *sbuf, int scount,
/* Place your data in correct location if necessary */
if (MPI_IN_PLACE != sbuf) {
err = ompi_datatype_sndrcv((char*)sbuf, scount, sdtype,
(char*)rbuf + rdispls[rank] * rext,
(char*)rbuf + (ptrdiff_t)rdispls[rank] * rext,
rcounts[rank], rdtype);
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
}
@ -616,7 +616,7 @@ ompi_coll_tuned_allgatherv_intra_basic_default(void *sbuf, int scount,
send_type = rdtype;
send_buf = (char*)rbuf;
for (i = 0; i < rank; ++i) {
send_buf += (rcounts[i] * extent);
send_buf += ((ptrdiff_t)rcounts[i] * extent);
}
} else {
send_buf = (char*)sbuf;

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2009 The University of Tennessee and The University
* Copyright (c) 2004-2012 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -156,7 +156,7 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
ret = ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent);
if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
inplacebuf = (char*) malloc(true_extent + (count - 1) * extent);
inplacebuf = (char*) malloc(true_extent + (ptrdiff_t)(count - 1) * extent);
if (NULL == inplacebuf) { ret = -1; line = __LINE__; goto error_hndl; }
if (MPI_IN_PLACE == sbuf) {
@ -441,8 +441,8 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
/* Send first block (my block) to the neighbor on the right */
block_offset = ((rank < split_rank)?
(rank * early_segcount) :
(rank * late_segcount + split_rank));
((ptrdiff_t)rank * (ptrdiff_t)early_segcount) :
((ptrdiff_t)rank * (ptrdiff_t)late_segcount + split_rank));
block_count = ((rank < split_rank)? early_segcount : late_segcount);
tmpsend = ((char*)rbuf) + block_offset * extent;
ret = MCA_PML_CALL(send(tmpsend, block_count, dtype, send_to,
@ -468,10 +468,10 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
rbuf[prevblock] = inbuf[inbi ^ 0x1] (op) rbuf[prevblock]
*/
block_offset = ((prevblock < split_rank)?
(prevblock * early_segcount) :
(prevblock * late_segcount + split_rank));
((ptrdiff_t)prevblock * early_segcount) :
((ptrdiff_t)prevblock * late_segcount + split_rank));
block_count = ((prevblock < split_rank)? early_segcount : late_segcount);
tmprecv = ((char*)rbuf) + block_offset * extent;
tmprecv = ((char*)rbuf) + (ptrdiff_t)block_offset * extent;
ompi_op_reduce(op, inbuf[inbi ^ 0x1], tmprecv, block_count, dtype);
/* send previous block to send_to */
@ -489,10 +489,10 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
rbuf[rank+1] = inbuf[inbi] (op) rbuf[rank + 1] */
recv_from = (rank + 1) % size;
block_offset = ((recv_from < split_rank)?
(recv_from * early_segcount) :
(recv_from * late_segcount + split_rank));
((ptrdiff_t)recv_from * early_segcount) :
((ptrdiff_t)recv_from * late_segcount + split_rank));
block_count = ((recv_from < split_rank)? early_segcount : late_segcount);
tmprecv = ((char*)rbuf) + block_offset * extent;
tmprecv = ((char*)rbuf) + (ptrdiff_t)block_offset * extent;
ompi_op_reduce(op, inbuf[inbi], tmprecv, block_count, dtype);
/* Distribution loop - variation of ring allgather */
@ -503,17 +503,17 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
const int send_data_from = (rank + 1 + size - k) % size;
const int send_block_offset =
((send_data_from < split_rank)?
(send_data_from * early_segcount) :
(send_data_from * late_segcount + split_rank));
((ptrdiff_t)send_data_from * early_segcount) :
((ptrdiff_t)send_data_from * late_segcount + split_rank));
const int recv_block_offset =
((recv_data_from < split_rank)?
(recv_data_from * early_segcount) :
(recv_data_from * late_segcount + split_rank));
((ptrdiff_t)recv_data_from * early_segcount) :
((ptrdiff_t)recv_data_from * late_segcount + split_rank));
block_count = ((send_data_from < split_rank)?
early_segcount : late_segcount);
tmprecv = (char*)rbuf + recv_block_offset * extent;
tmpsend = (char*)rbuf + send_block_offset * extent;
tmprecv = (char*)rbuf + (ptrdiff_t)recv_block_offset * extent;
tmpsend = (char*)rbuf + (ptrdiff_t)send_block_offset * extent;
ret = ompi_coll_tuned_sendrecv(tmpsend, block_count, dtype, send_to,
MCA_COLL_BASE_TAG_ALLREDUCE,
@ -663,7 +663,7 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
COLL_TUNED_COMPUTED_SEGCOUNT(segsize, typelng, segcount)
/* Special case for count less than size * segcount - use regular ring */
if (count < size * segcount) {
if (count < (size * segcount)) {
OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:allreduce_ring_segmented rank %d/%d, count %d, switching to regular ring", rank, size, count));
return (ompi_coll_tuned_allreduce_intra_ring(sbuf, rbuf, count, dtype, op,
comm, module));
@ -689,7 +689,7 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
early_blockcount, late_blockcount )
COLL_TUNED_COMPUTE_BLOCKCOUNT( early_blockcount, num_phases, inbi,
max_segcount, k)
max_real_segsize = true_extent + (max_segcount - 1) * extent;
max_real_segsize = true_extent + (ptrdiff_t)(max_segcount - 1) * extent;
/* Allocate and initialize temporary buffers */
inbuf[0] = (char*)malloc(max_real_segsize);
@ -739,17 +739,17 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
- compute my block and phase offset
- send data */
block_offset = ((rank < split_rank)?
(rank * early_blockcount) :
(rank * late_blockcount + split_rank));
((ptrdiff_t)rank * (ptrdiff_t)early_blockcount) :
((ptrdiff_t)rank * (ptrdiff_t)late_blockcount + split_rank));
block_count = ((rank < split_rank)? early_blockcount : late_blockcount);
COLL_TUNED_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase,
early_phase_segcount, late_phase_segcount)
phase_count = ((phase < split_phase)?
(early_phase_segcount) : (late_phase_segcount));
phase_offset = ((phase < split_phase)?
(phase * early_phase_segcount) :
(phase * late_phase_segcount + split_phase));
tmpsend = ((char*)rbuf) + (block_offset + phase_offset) * extent;
((ptrdiff_t)phase * (ptrdiff_t)early_phase_segcount) :
((ptrdiff_t)phase * (ptrdiff_t)late_phase_segcount + split_phase));
tmpsend = ((char*)rbuf) + (ptrdiff_t)(block_offset + phase_offset) * extent;
ret = MCA_PML_CALL(send(tmpsend, phase_count, dtype, send_to,
MCA_COLL_BASE_TAG_ALLREDUCE,
MCA_PML_BASE_SEND_STANDARD, comm));
@ -774,8 +774,8 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
rbuf[prevblock] = inbuf[inbi ^ 0x1] (op) rbuf[prevblock]
*/
block_offset = ((prevblock < split_rank)?
(prevblock * early_blockcount) :
(prevblock * late_blockcount + split_rank));
((ptrdiff_t)prevblock * (ptrdiff_t)early_blockcount) :
((ptrdiff_t)prevblock * (ptrdiff_t)late_blockcount + split_rank));
block_count = ((prevblock < split_rank)?
early_blockcount : late_blockcount);
COLL_TUNED_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase,
@ -783,9 +783,9 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
phase_count = ((phase < split_phase)?
(early_phase_segcount) : (late_phase_segcount));
phase_offset = ((phase < split_phase)?
(phase * early_phase_segcount) :
(phase * late_phase_segcount + split_phase));
tmprecv = ((char*)rbuf) + (block_offset + phase_offset) * extent;
((ptrdiff_t)phase * (ptrdiff_t)early_phase_segcount) :
((ptrdiff_t)phase * (ptrdiff_t)late_phase_segcount + split_phase));
tmprecv = ((char*)rbuf) + (ptrdiff_t)(block_offset + phase_offset) * extent;
ompi_op_reduce(op, inbuf[inbi ^ 0x1], tmprecv, phase_count, dtype);
/* send previous block to send_to */
@ -803,8 +803,8 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
rbuf[rank+1] = inbuf[inbi] (op) rbuf[rank + 1] */
recv_from = (rank + 1) % size;
block_offset = ((recv_from < split_rank)?
(recv_from * early_blockcount) :
(recv_from * late_blockcount + split_rank));
((ptrdiff_t)recv_from * (ptrdiff_t)early_blockcount) :
((ptrdiff_t)recv_from * (ptrdiff_t)late_blockcount + split_rank));
block_count = ((recv_from < split_rank)?
early_blockcount : late_blockcount);
COLL_TUNED_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase,
@ -812,9 +812,9 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
phase_count = ((phase < split_phase)?
(early_phase_segcount) : (late_phase_segcount));
phase_offset = ((phase < split_phase)?
(phase * early_phase_segcount) :
(phase * late_phase_segcount + split_phase));
tmprecv = ((char*)rbuf) + (block_offset + phase_offset) * extent;
((ptrdiff_t)phase * (ptrdiff_t)early_phase_segcount) :
((ptrdiff_t)phase * (ptrdiff_t)late_phase_segcount + split_phase));
tmprecv = ((char*)rbuf) + (ptrdiff_t)(block_offset + phase_offset) * extent;
ompi_op_reduce(op, inbuf[inbi], tmprecv, phase_count, dtype);
}
@ -826,17 +826,17 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
const int send_data_from = (rank + 1 + size - k) % size;
const int send_block_offset =
((send_data_from < split_rank)?
(send_data_from * early_blockcount) :
(send_data_from * late_blockcount + split_rank));
((ptrdiff_t)send_data_from * (ptrdiff_t)early_blockcount) :
((ptrdiff_t)send_data_from * (ptrdiff_t)late_blockcount + split_rank));
const int recv_block_offset =
((recv_data_from < split_rank)?
(recv_data_from * early_blockcount) :
(recv_data_from * late_blockcount + split_rank));
((ptrdiff_t)recv_data_from * (ptrdiff_t)early_blockcount) :
((ptrdiff_t)recv_data_from * (ptrdiff_t)late_blockcount + split_rank));
block_count = ((send_data_from < split_rank)?
early_blockcount : late_blockcount);
tmprecv = (char*)rbuf + recv_block_offset * extent;
tmpsend = (char*)rbuf + send_block_offset * extent;
tmprecv = (char*)rbuf + (ptrdiff_t)recv_block_offset * extent;
tmpsend = (char*)rbuf + (ptrdiff_t)send_block_offset * extent;
ret = ompi_coll_tuned_sendrecv(tmpsend, block_count, dtype, send_to,
MCA_COLL_BASE_TAG_ALLREDUCE,

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* Copyright (c) 2004-2012 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -62,8 +62,8 @@ int ompi_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount,
recvfrom = (rank + size - step) % size;
/* Determine sending and receiving locations */
tmpsend = (char*)sbuf + sendto * sext * scount;
tmprecv = (char*)rbuf + recvfrom * rext * rcount;
tmpsend = (char*)sbuf + (ptrdiff_t)sendto * sext * (ptrdiff_t)scount;
tmprecv = (char*)rbuf + (ptrdiff_t)recvfrom * rext * (ptrdiff_t)rcount;
/* send and receive */
err = ompi_coll_tuned_sendrecv( tmpsend, scount, sdtype, sendto,
@ -141,22 +141,22 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
#endif
/* tmp buffer allocation for message data */
tmpbuf_free = (char *) malloc(tsext + (scount * size - 1) * sext);
tmpbuf_free = (char *) malloc(tsext + ((ptrdiff_t)scount * (ptrdiff_t)size - 1) * sext);
if (tmpbuf_free == NULL) { line = __LINE__; err = -1; goto err_hndl; }
tmpbuf = tmpbuf_free - slb;
/* Step 1 - local rotation - shift up by rank */
err = ompi_datatype_copy_content_same_ddt (sdtype,
(int32_t) ((size - rank) * scount),
(int32_t) ((ptrdiff_t)(size - rank) * (ptrdiff_t)scount),
tmpbuf,
((char*) sbuf) + rank * scount * sext);
((char*) sbuf) + (ptrdiff_t)rank * (ptrdiff_t)scount * sext);
if (err<0) {
line = __LINE__; err = -1; goto err_hndl;
}
if (rank != 0) {
err = ompi_datatype_copy_content_same_ddt (sdtype, (int32_t) (rank * scount),
tmpbuf + (size - rank) * scount* sext,
err = ompi_datatype_copy_content_same_ddt (sdtype, (ptrdiff_t)rank * (ptrdiff_t)scount,
tmpbuf + (ptrdiff_t)(size - rank) * (ptrdiff_t)scount* sext,
(char*) sbuf);
if (err<0) {
line = __LINE__; err = -1; goto err_hndl;
@ -173,7 +173,7 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
/* create indexed datatype */
for (i = 1; i < size; i++) {
if (( i & distance) == distance) {
displs[k] = i * scount;
displs[k] = (ptrdiff_t)i * (ptrdiff_t)scount;
blen[k] = scount;
k++;
}
@ -206,8 +206,8 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
for (i = 0; i < size; i++) {
err = ompi_datatype_copy_content_same_ddt (rdtype, (int32_t) rcount,
((char*)rbuf) + (((rank - i + size) % size) * rcount * rext),
tmpbuf + i * rcount * rext);
((char*)rbuf) + ((ptrdiff_t)((rank - i + size) % size) * (ptrdiff_t)rcount * rext),
tmpbuf + (ptrdiff_t)i * (ptrdiff_t)rcount * rext);
if (err < 0) { line = __LINE__; err = -1; goto err_hndl; }
}
@ -290,8 +290,8 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
/* simple optimization */
psnd = ((char *) sbuf) + (rank * sext);
prcv = ((char *) rbuf) + (rank * rext);
psnd = ((char *) sbuf) + (ptrdiff_t)rank * sext;
prcv = ((char *) rbuf) + (ptrdiff_t)rank * rext;
error = ompi_datatype_sndrcv(psnd, scount, sdtype, prcv, rcount, rdtype);
if (MPI_SUCCESS != error) {
@ -320,7 +320,7 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
ri = (ri + 1) % size, ++nreqs, ++nrreqs) {
error =
MCA_PML_CALL(irecv
(prcv + (ri * rext), rcount, rdtype, ri,
(prcv + (ptrdiff_t)ri * rext, rcount, rdtype, ri,
MCA_COLL_BASE_TAG_ALLTOALL, comm, &reqs[nreqs]));
if (MPI_SUCCESS != error) { line = __LINE__; goto error_hndl; }
}
@ -328,7 +328,7 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
si = (si + size - 1) % size, ++nreqs, ++nsreqs) {
error =
MCA_PML_CALL(isend
(psnd + (si * sext), scount, sdtype, si,
(psnd + (ptrdiff_t)si * sext, scount, sdtype, si,
MCA_COLL_BASE_TAG_ALLTOALL,
MCA_PML_BASE_SEND_STANDARD, comm, &reqs[nreqs]));
if (MPI_SUCCESS != error) { line = __LINE__; goto error_hndl; }
@ -360,7 +360,7 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
if (nrreqs < (size - 1)) {
error =
MCA_PML_CALL(irecv
(prcv + (ri * rext), rcount, rdtype, ri,
(prcv + (ptrdiff_t)ri * rext, rcount, rdtype, ri,
MCA_COLL_BASE_TAG_ALLTOALL, comm,
&reqs[completed]));
if (MPI_SUCCESS != error) { line = __LINE__; goto error_hndl; }
@ -370,7 +370,7 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
} else {
if (nsreqs < (size - 1)) {
error = MCA_PML_CALL(isend
(psnd + (si * sext), scount, sdtype, si,
(psnd + (ptrdiff_t)si * sext, scount, sdtype, si,
MCA_COLL_BASE_TAG_ALLTOALL,
MCA_PML_BASE_SEND_STANDARD, comm,
&reqs[completed]));
@ -423,8 +423,8 @@ int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
/* exchange data */
remote = rank ^ 1;
tmpsend = (char*)sbuf + remote * sext * scount;
tmprecv = (char*)rbuf + remote * rext * rcount;
tmpsend = (char*)sbuf + (ptrdiff_t)remote * sext * (ptrdiff_t)scount;
tmprecv = (char*)rbuf + (ptrdiff_t)remote * rext * (ptrdiff_t)rcount;
/* send and receive */
err = ompi_coll_tuned_sendrecv ( tmpsend, scount, sdtype, remote,
@ -435,9 +435,9 @@ int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
/* ddt sendrecv your own data */
err = ompi_datatype_sndrcv((char*) sbuf + rank * sext * scount,
err = ompi_datatype_sndrcv((char*) sbuf + (ptrdiff_t)rank * sext * (ptrdiff_t)scount,
(int32_t) scount, sdtype,
(char*) rbuf + rank * rext * rcount,
(char*) rbuf + (ptrdiff_t)rank * rext * (ptrdiff_t)rcount,
(int32_t) rcount, rdtype);
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
@ -515,8 +515,8 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
/* simple optimization */
psnd = ((char *) sbuf) + (rank * sndinc);
prcv = ((char *) rbuf) + (rank * rcvinc);
psnd = ((char *) sbuf) + (ptrdiff_t)rank * sndinc;
prcv = ((char *) rbuf) + (ptrdiff_t)rank * rcvinc;
err = ompi_datatype_sndrcv(psnd, scount, sdtype, prcv, rcount, rdtype);
if (MPI_SUCCESS != err) {
@ -543,7 +543,7 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
i = (i + 1) % size, ++rreq, ++nreqs) {
err =
MCA_PML_CALL(irecv_init
(prcv + (i * rcvinc), rcount, rdtype, i,
(prcv + (ptrdiff_t)i * rcvinc, rcount, rdtype, i,
MCA_COLL_BASE_TAG_ALLTOALL, comm, rreq));
if (MPI_SUCCESS != err) {
ompi_coll_tuned_free_reqs(req, rreq - req);
@ -559,7 +559,7 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
i = (i + size - 1) % size, ++sreq, ++nreqs) {
err =
MCA_PML_CALL(isend_init
(psnd + (i * sndinc), scount, sdtype, i,
(psnd + (ptrdiff_t)i * sndinc, scount, sdtype, i,
MCA_COLL_BASE_TAG_ALLTOALL,
MCA_PML_BASE_SEND_STANDARD, comm, sreq));
if (MPI_SUCCESS != err) {

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* Copyright (c) 2004-2012 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -53,8 +53,8 @@ ompi_coll_tuned_alltoallv_intra_pairwise(void *sbuf, int *scounts, int *sdisps,
ompi_datatype_type_extent(sdtype, &sext);
ompi_datatype_type_extent(rdtype, &rext);
psnd = ((char *) sbuf) + (sdisps[rank] * sext);
prcv = ((char *) rbuf) + (rdisps[rank] * rext);
psnd = ((char *) sbuf) + (ptrdiff_t)sdisps[rank] * sext;
prcv = ((char *) rbuf) + (ptrdiff_t)rdisps[rank] * rext;
if (0 != scounts[rank]) {
err = ompi_datatype_sndrcv(psnd, scounts[rank], sdtype,
@ -77,8 +77,8 @@ ompi_coll_tuned_alltoallv_intra_pairwise(void *sbuf, int *scounts, int *sdisps,
recvfrom = (rank + size - step) % size;
/* Determine sending and receiving locations */
psnd = (char*)sbuf + sdisps[sendto] * sext;
prcv = (char*)rbuf + rdisps[recvfrom] * rext;
psnd = (char*)sbuf + (ptrdiff_t)sdisps[sendto] * sext;
prcv = (char*)rbuf + (ptrdiff_t)rdisps[recvfrom] * rext;
/* send and receive */
err = ompi_coll_tuned_sendrecv( psnd, scounts[sendto], sdtype, sendto,
@ -133,8 +133,8 @@ ompi_coll_tuned_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdis
ompi_datatype_type_extent(rdtype, &rext);
/* Simple optimization - handle send to self first */
psnd = ((char *) sbuf) + (sdisps[rank] * sext);
prcv = ((char *) rbuf) + (rdisps[rank] * rext);
psnd = ((char *) sbuf) + (ptrdiff_t)sdisps[rank] * sext;
prcv = ((char *) rbuf) + (ptrdiff_t)rdisps[rank] * rext;
if (0 != scounts[rank]) {
err = ompi_datatype_sndrcv(psnd, scounts[rank], sdtype,
prcv, rcounts[rank], rdtype);
@ -158,7 +158,7 @@ ompi_coll_tuned_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdis
continue;
}
prcv = ((char *) rbuf) + (rdisps[i] * rext);
prcv = ((char *) rbuf) + (ptrdiff_t)rdisps[i] * rext;
err = MCA_PML_CALL(irecv_init(prcv, rcounts[i], rdtype,
i, MCA_COLL_BASE_TAG_ALLTOALLV, comm,
preq++));
@ -175,7 +175,7 @@ ompi_coll_tuned_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdis
continue;
}
psnd = ((char *) sbuf) + (sdisps[i] * sext);
psnd = ((char *) sbuf) + (ptrdiff_t)sdisps[i] * sext;
err = MCA_PML_CALL(isend_init(psnd, scounts[i], sdtype,
i, MCA_COLL_BASE_TAG_ALLTOALLV,
MCA_PML_BASE_SEND_STANDARD, comm,

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* Copyright (c) 2004-2012 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -61,14 +61,14 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
ompi_datatype_get_extent (datatype, &lb, &extent);
ompi_datatype_type_size( datatype, &type_size );
num_segments = (original_count + count_by_segment - 1) / count_by_segment;
realsegsize = count_by_segment * extent;
realsegsize = (ptrdiff_t)count_by_segment * extent;
/* Set the buffer pointers */
tmpbuf = (char *) buffer;
#if !defined(COLL_TUNED_BCAST_USE_BLOCKING)
if( tree->tree_nextsize != 0 ) {
send_reqs = (ompi_request_t**)malloc( tree->tree_nextsize *
send_reqs = (ompi_request_t**)malloc( (ptrdiff_t)tree->tree_nextsize *
sizeof(ompi_request_t*) );
}
#endif
@ -180,7 +180,7 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
/* Process the last segment */
err = ompi_request_wait( &recv_reqs[req_index], MPI_STATUSES_IGNORE );
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
sendcount = original_count - (num_segments - 1) * count_by_segment;
sendcount = original_count - (ptrdiff_t)(num_segments - 1) * count_by_segment;
for( i = 0; i < tree->tree_nextsize; i++ ) {
#if defined(COLL_TUNED_BCAST_USE_BLOCKING)
err = MCA_PML_CALL(send(tmpbuf, sendcount, datatype,
@ -433,8 +433,8 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
/* if the message is too small to be split into segments */
if( (counts[0] == 0 || counts[1] == 0) ||
(segsize > counts[0] * type_size) ||
(segsize > counts[1] * type_size) ) {
(segsize > ((ptrdiff_t)counts[0] * type_size)) ||
(segsize > ((ptrdiff_t)counts[1] * type_size)) ) {
/* call linear version here ! */
return (ompi_coll_tuned_bcast_intra_chain ( buffer, count, datatype,
root, comm, module,
@ -444,12 +444,12 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
err = ompi_datatype_get_extent (datatype, &lb, &type_extent);
/* Determine real segment size */
realsegsize[0] = segcount[0] * type_extent;
realsegsize[1] = segcount[1] * type_extent;
realsegsize[0] = (ptrdiff_t)segcount[0] * type_extent;
realsegsize[1] = (ptrdiff_t)segcount[1] * type_extent;
/* set the buffer pointers */
tmpbuf[0] = (char *) buffer;
tmpbuf[1] = (char *) buffer+counts[0] * type_extent;
tmpbuf[1] = (char *) buffer + (ptrdiff_t)counts[0] * type_extent;
/* Step 1:
Root splits the buffer in 2 and sends segmented message down the branches.
@ -508,7 +508,7 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
for( segindex = 1; segindex < num_segments[lr]; segindex++ ) {
/* determine how many elements to expect in this round */
if( segindex == (num_segments[lr] - 1))
sendcount[lr] = counts[lr] - segindex*segcount[lr];
sendcount[lr] = counts[lr] - (ptrdiff_t)segindex * (ptrdiff_t)segcount[lr];
/* post new irecv */
MCA_PML_CALL(irecv( tmpbuf[lr] + realsegsize[lr], sendcount[lr],
datatype, tree->tree_prev, MCA_COLL_BASE_TAG_BCAST,
@ -546,7 +546,8 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
sendcount[lr] = segcount[lr];
for (segindex = 0; segindex < num_segments[lr]; segindex++) {
/* determine how many elements to expect in this round */
if (segindex == (num_segments[lr] - 1)) sendcount[lr] = counts[lr] - segindex*segcount[lr];
if (segindex == (num_segments[lr] - 1))
sendcount[lr] = counts[lr] - (ptrdiff_t)segindex * (ptrdiff_t)segcount[lr];
/* receive segments */
MCA_PML_CALL(recv(tmpbuf[lr], sendcount[lr], datatype,
tree->tree_prev, MCA_COLL_BASE_TAG_BCAST,
@ -559,7 +560,7 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
/* reset the buffer pointers */
tmpbuf[0] = (char *) buffer;
tmpbuf[1] = (char *) buffer+counts[0] * type_extent;
tmpbuf[1] = (char *) buffer + (ptrdiff_t)counts[0] * type_extent;
/* Step 2:
Find your immediate pair (identical node in opposite subtree) and SendRecv

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2009 The University of Tennessee and The University
* Copyright (c) 2004-2012 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -120,7 +120,7 @@ int ompi_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount,
ompi_datatype_type_size (sdtype, &dsize);
comsize = ompi_comm_size(comm);
dsize *= comsize * scount;
dsize *= (ptrdiff_t)comsize * (ptrdiff_t)scount;
alg = ompi_coll_tuned_get_target_method_params (data->com_rules[ALLTOALL],
dsize, &faninout, &segsize, &max_requests);
@ -414,7 +414,7 @@ int ompi_coll_tuned_allgather_intra_dec_dynamic(void *sbuf, int scount,
ompi_datatype_type_size (sdtype, &dsize);
comsize = ompi_comm_size(comm);
dsize *= comsize * scount;
dsize *= (ptrdiff_t)comsize * (ptrdiff_t)scount;
alg = ompi_coll_tuned_get_target_method_params (data->com_rules[ALLGATHER],
dsize, &faninout, &segsize, &ignoreme);

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2009 The University of Tennessee and The University
* Copyright (c) 2004-2012 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -56,7 +56,7 @@ ompi_coll_tuned_allreduce_intra_dec_fixed (void *sbuf, void *rbuf, int count,
* Ring algorithm does not support non-commutative operations.
*/
ompi_datatype_type_size(dtype, &dsize);
block_dsize = dsize * count;
block_dsize = dsize * (ptrdiff_t)count;
if (block_dsize < intermediate_message) {
return (ompi_coll_tuned_allreduce_intra_recursivedoubling (sbuf, rbuf,
@ -66,7 +66,7 @@ ompi_coll_tuned_allreduce_intra_dec_fixed (void *sbuf, void *rbuf, int count,
if( ompi_op_is_commute(op) && (count > comm_size) ) {
const size_t segment_size = 1 << 20; /* 1 MB */
if ((comm_size * segment_size >= block_dsize)) {
if (((size_t)comm_size * (size_t)segment_size >= block_dsize)) {
return (ompi_coll_tuned_allreduce_intra_ring (sbuf, rbuf, count, dtype,
op, comm, module));
} else {
@ -116,7 +116,7 @@ int ompi_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount,
Has better performance for messages of intermediate sizes than the old one */
/* determine block size */
ompi_datatype_type_size(sdtype, &dsize);
block_dsize = dsize * scount;
block_dsize = dsize * (ptrdiff_t)scount;
if ((block_dsize < 200) && (communicator_size > 12)) {
return ompi_coll_tuned_alltoall_intra_bruck(sbuf, scount, sdtype,
@ -360,7 +360,7 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf,
/* need data size for decision function */
ompi_datatype_type_size(datatype, &dsize);
message_size = dsize * count; /* needed for decision */
message_size = dsize * (ptrdiff_t)count; /* needed for decision */
/**
* If the operation is non commutative we currently have choice of linear
@ -535,7 +535,7 @@ int ompi_coll_tuned_allgather_intra_dec_fixed(void *sbuf, int scount,
/* Determine complete data size */
ompi_datatype_type_size(sdtype, &dsize);
total_dsize = dsize * scount * communicator_size;
total_dsize = dsize * (ptrdiff_t)scount * (ptrdiff_t)communicator_size;
OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_allgather_intra_dec_fixed"
" rank %d com_size %d msg_length %lu",
@ -632,7 +632,7 @@ int ompi_coll_tuned_allgatherv_intra_dec_fixed(void *sbuf, int scount,
ompi_datatype_type_size(sdtype, &dsize);
total_dsize = 0;
for (i = 0; i < communicator_size; i++) {
total_dsize += dsize * rcounts[i];
total_dsize += dsize * (ptrdiff_t)rcounts[i];
}
OPAL_OUTPUT((ompi_coll_tuned_stream,
@ -697,10 +697,10 @@ int ompi_coll_tuned_gather_intra_dec_fixed(void *sbuf, int scount,
/* Determine block size */
if (rank == root) {
ompi_datatype_type_size(rdtype, &dsize);
block_size = dsize * rcount;
block_size = dsize * (ptrdiff_t)rcount;
} else {
ompi_datatype_type_size(sdtype, &dsize);
block_size = dsize * scount;
block_size = dsize * (ptrdiff_t)scount;
}
if (block_size > large_block_size) {
@ -758,10 +758,10 @@ int ompi_coll_tuned_scatter_intra_dec_fixed(void *sbuf, int scount,
/* Determine block size */
if (root == rank) {
ompi_datatype_type_size(sdtype, &dsize);
block_size = dsize * scount;
block_size = dsize * (ptrdiff_t)scount;
} else {
ompi_datatype_type_size(rdtype, &dsize);
block_size = dsize * rcount;
block_size = dsize * (ptrdiff_t)rcount;
}
if ((communicator_size > small_comm_size) &&

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2009 The University of Tennessee and The University
* Copyright (c) 2004-2012 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -86,7 +86,7 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
} else {
/* root is not on 0, allocate temp buffer for recv,
* rotate data at the end */
tempbuf = (char *) malloc(rtrue_extent + (rcount*size - 1) * rextent);
tempbuf = (char *) malloc(rtrue_extent + ((ptrdiff_t)rcount * (ptrdiff_t)size - 1) * rextent);
if (NULL == tempbuf) {
err= OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl;
}
@ -100,7 +100,7 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
} else {
/* copy from rbuf to temp buffer */
err = ompi_datatype_copy_content_same_ddt(rdtype, rcount, ptmp,
(char *) rbuf + rank*rextent*rcount);
(char *)rbuf + (ptrdiff_t)rank * rextent * (ptrdiff_t)rcount);
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
}
}
@ -109,7 +109,7 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
/* other non-leaf nodes, allocate temp buffer for data received from
* children, the most we need is half of the total data elements due
* to the property of binimoal tree */
tempbuf = (char *) malloc(strue_extent + (scount*size - 1) * sextent);
tempbuf = (char *) malloc(strue_extent + ((ptrdiff_t)scount * (ptrdiff_t)size - 1) * sextent);
if (NULL == tempbuf) {
err= OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl;
}
@ -148,7 +148,7 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
"ompi_coll_tuned_gather_intra_binomial rank %d recv %d mycount = %d",
rank, bmtree->tree_next[i], mycount));
err = MCA_PML_CALL(recv(ptmp + total_recv*rextent, rcount*size-total_recv, rdtype,
err = MCA_PML_CALL(recv(ptmp + total_recv*rextent, (ptrdiff_t)rcount * size - total_recv, rdtype,
bmtree->tree_next[i], MCA_COLL_BASE_TAG_GATHER,
comm, &status));
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@ -173,13 +173,13 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
if (rank == root) {
if (root != 0) {
/* rotate received data on root if root != 0 */
err = ompi_datatype_copy_content_same_ddt(rdtype, rcount*(size - root),
(char *) rbuf + rextent*root*rcount, ptmp);
err = ompi_datatype_copy_content_same_ddt(rdtype, (ptrdiff_t)rcount * (ptrdiff_t)(size - root),
(char *)rbuf + rextent * (ptrdiff_t)root * (ptrdiff_t)rcount, ptmp);
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
err = ompi_datatype_copy_content_same_ddt(rdtype, rcount*root,
(char *) rbuf, ptmp + rextent*rcount*(size-root));
err = ompi_datatype_copy_content_same_ddt(rdtype, (ptrdiff_t)rcount * (ptrdiff_t)root,
(char *) rbuf, ptmp + rextent * (ptrdiff_t)rcount * (ptrdiff_t)(size-root));
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
free(tempbuf);
@ -291,7 +291,7 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
}
/* irecv for the first segment from i */
ptmp = (char*)rbuf + i * rcount * extent;
ptmp = (char*)rbuf + (ptrdiff_t)i * (ptrdiff_t)rcount * extent;
ret = MCA_PML_CALL(irecv(ptmp, first_segment_count, rdtype, i,
MCA_COLL_BASE_TAG_GATHER, comm,
&first_segment_req));
@ -304,7 +304,7 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
/* irecv for the second segment */
ptmp = (char*)rbuf + (i * rcount + first_segment_count) * extent;
ptmp = (char*)rbuf + ((ptrdiff_t)i * (ptrdiff_t)rcount + first_segment_count) * extent;
ret = MCA_PML_CALL(irecv(ptmp, (rcount - first_segment_count),
rdtype, i, MCA_COLL_BASE_TAG_GATHER, comm,
&reqs[i]));
@ -318,7 +318,7 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
/* copy local data if necessary */
if (MPI_IN_PLACE != sbuf) {
ret = ompi_datatype_sndrcv(sbuf, scount, sdtype,
(char*)rbuf + rank * rcount * extent,
(char*)rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * extent,
rcount, rdtype);
if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
}
@ -394,7 +394,7 @@ ompi_coll_tuned_gather_intra_basic_linear(void *sbuf, int scount,
/* I am the root, loop receiving the data. */
ompi_datatype_get_extent(rdtype, &lb, &extent);
incr = extent * rcount;
incr = extent * (ptrdiff_t)rcount;
for (i = 0, ptmp = (char *) rbuf; i < size; ++i, ptmp += incr) {
if (i == rank) {
if (MPI_IN_PLACE != sbuf) {

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2009 The University of Tennessee and The University
* Copyright (c) 2004-2012 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -63,14 +63,16 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
ompi_datatype_get_extent( datatype, &lower_bound, &extent );
ompi_datatype_type_size( datatype, &typelng );
num_segments = (original_count + count_by_segment - 1) / count_by_segment;
segment_increment = count_by_segment * extent;
segment_increment = (ptrdiff_t)count_by_segment * extent;
sendtmpbuf = (char*) sendbuf;
if( sendbuf == MPI_IN_PLACE ) {
sendtmpbuf = (char *)recvbuf;
}
OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:reduce_generic count %d, msg size %ld, segsize %ld, max_requests %d", original_count, (unsigned long)(num_segments * segment_increment), (unsigned long)segment_increment, max_outstanding_reqs));
OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:reduce_generic count %d, msg size %ld, segsize %ld, max_requests %d",
original_count, (unsigned long)((ptrdiff_t)num_segments * (ptrdiff_t)segment_increment),
(unsigned long)segment_increment, max_outstanding_reqs));
rank = ompi_comm_rank(comm);
@ -87,7 +89,7 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
if( (NULL == accumbuf) || (root != rank) ) {
/* Allocate temporary accumulator buffer. */
accumbuf_free = (char*)malloc(true_extent +
(original_count - 1) * extent);
(ptrdiff_t)(original_count - 1) * extent);
if (accumbuf_free == NULL) {
line = __LINE__; ret = -1; goto error_hndl;
}
@ -102,7 +104,7 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
(char*)sendtmpbuf);
}
/* Allocate two buffers for incoming segments */
real_segment_size = true_extent + (count_by_segment - 1) * extent;
real_segment_size = true_extent + (ptrdiff_t)(count_by_segment - 1) * extent;
inbuf_free[0] = (char*) malloc(real_segment_size);
if( inbuf_free[0] == NULL ) {
line = __LINE__; ret = -1; goto error_hndl;
@ -127,7 +129,7 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
/* recvcount - number of elements in current segment */
recvcount = count_by_segment;
if( segindex == (num_segments-1) )
recvcount = original_count - count_by_segment * segindex;
recvcount = original_count - (ptrdiff_t)count_by_segment * (ptrdiff_t)segindex;
/* for each child */
for( i = 0; i < tree->tree_nextsize; i++ ) {
@ -151,7 +153,7 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
*/
if( (ompi_op_is_commute(op)) &&
!((MPI_IN_PLACE == sendbuf) && (rank == tree->tree_root)) ) {
local_recvbuf = accumbuf + segindex * segment_increment;
local_recvbuf = accumbuf + (ptrdiff_t)segindex * (ptrdiff_t)segment_increment;
}
}
@ -178,19 +180,19 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
if( 1 == i ) {
if( (ompi_op_is_commute(op)) &&
!((MPI_IN_PLACE == sendbuf) && (rank == tree->tree_root)) ) {
local_op_buffer = sendtmpbuf + segindex * segment_increment;
local_op_buffer = sendtmpbuf + (ptrdiff_t)segindex * (ptrdiff_t)segment_increment;
}
}
/* apply operation */
ompi_op_reduce(op, local_op_buffer,
accumbuf + segindex * segment_increment,
accumbuf + (ptrdiff_t)segindex * (ptrdiff_t)segment_increment,
recvcount, datatype );
} else if ( segindex > 0 ) {
void* accumulator = accumbuf + (segindex-1) * segment_increment;
void* accumulator = accumbuf + (ptrdiff_t)(segindex-1) * (ptrdiff_t)segment_increment;
if( tree->tree_nextsize <= 1 ) {
if( (ompi_op_is_commute(op)) &&
!((MPI_IN_PLACE == sendbuf) && (rank == tree->tree_root)) ) {
local_op_buffer = sendtmpbuf + (segindex-1) * segment_increment;
local_op_buffer = sendtmpbuf + (ptrdiff_t)(segindex-1) * (ptrdiff_t)segment_increment;
}
}
ompi_op_reduce(op, local_op_buffer, accumulator, prevcount,
@ -251,7 +253,7 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
count_by_segment = original_count;
}
ret = MCA_PML_CALL( send((char*)sendbuf +
segindex * segment_increment,
(ptrdiff_t)segindex * (ptrdiff_t)segment_increment,
count_by_segment, datatype,
tree->tree_prev,
MCA_COLL_BASE_TAG_REDUCE,
@ -281,7 +283,7 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
/* post first group of requests */
for (segindex = 0; segindex < max_outstanding_reqs; segindex++) {
ret = MCA_PML_CALL( isend((char*)sendbuf +
segindex * segment_increment,
(ptrdiff_t)segindex * (ptrdiff_t)segment_increment,
count_by_segment, datatype,
tree->tree_prev,
MCA_COLL_BASE_TAG_REDUCE,
@ -302,7 +304,7 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
count_by_segment = original_count;
}
ret = MCA_PML_CALL( isend((char*)sendbuf +
segindex * segment_increment,
(ptrdiff_t)segindex * (ptrdiff_t)segment_increment,
count_by_segment, datatype,
tree->tree_prev,
MCA_COLL_BASE_TAG_REDUCE,
@ -521,7 +523,7 @@ int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
ompi_datatype_get_true_extent(datatype, &tlb, &text);
if ((root == rank) && (MPI_IN_PLACE == sendbuf)) {
tmpbuf = (char *) malloc(text + (count - 1) * ext);
tmpbuf = (char *) malloc(text + (ptrdiff_t)(count - 1) * ext);
if (NULL == tmpbuf) {
return MPI_ERR_INTERN;
}
@ -530,7 +532,7 @@ int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
(char*)recvbuf);
use_this_sendbuf = tmpbuf;
} else if (io_root == rank) {
tmpbuf = (char *) malloc(text + (count - 1) * ext);
tmpbuf = (char *) malloc(text + (ptrdiff_t)(count - 1) * ext);
if (NULL == tmpbuf) {
return MPI_ERR_INTERN;
}
@ -631,7 +633,7 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
if (MPI_IN_PLACE == sbuf) {
sbuf = rbuf;
inplace_temp = (char*)malloc(true_extent + (count - 1) * extent);
inplace_temp = (char*)malloc(true_extent + (ptrdiff_t)(count - 1) * extent);
if (NULL == inplace_temp) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
@ -639,7 +641,7 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
}
if (size > 1) {
free_buffer = (char*)malloc(true_extent + (count - 1) * extent);
free_buffer = (char*)malloc(true_extent + (ptrdiff_t)(count - 1) * extent);
if (NULL == free_buffer) {
return OMPI_ERR_OUT_OF_RESOURCE;
}

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2009 The University of Tennessee and The University
* Copyright (c) 2004-2012 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -81,7 +81,7 @@ int ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(void *sbuf, void *rbuf,
ompi_datatype_get_extent(dtype, &lb, &extent);
ompi_datatype_get_true_extent(dtype, &tlb, &textent);
tmprbuf_free = (char*) malloc(textent + (total_count - 1)*extent);
tmprbuf_free = (char*) malloc(textent + (ptrdiff_t)(total_count - 1) * extent);
tmprbuf = tmprbuf_free - lb;
}
err = comm->c_coll.coll_reduce (sbuf, tmprbuf, total_count,
@ -164,7 +164,7 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
/* get datatype information */
ompi_datatype_get_extent(dtype, &lb, &extent);
ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent);
buf_size = true_extent + (count - 1) * extent;
buf_size = true_extent + (ptrdiff_t)(count - 1) * extent;
/* Handle MPI_IN_PLACE */
if (MPI_IN_PLACE == sbuf) {
@ -296,7 +296,7 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
/* actual data transfer. Send from result_buf,
receive into recv_buf */
if (send_count > 0 && recv_count != 0) {
err = MCA_PML_CALL(irecv(recv_buf + tmp_disps[recv_index] * extent,
err = MCA_PML_CALL(irecv(recv_buf + (ptrdiff_t)tmp_disps[recv_index] * extent,
recv_count, dtype, peer,
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
comm, &request));
@ -307,7 +307,7 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
}
}
if (recv_count > 0 && send_count != 0) {
err = MCA_PML_CALL(send(result_buf + tmp_disps[send_index] * extent,
err = MCA_PML_CALL(send(result_buf + (ptrdiff_t)tmp_disps[send_index] * extent,
send_count, dtype, peer,
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
MCA_PML_BASE_SEND_STANDARD,
@ -331,8 +331,8 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
the results buffer */
if (recv_count > 0) {
ompi_op_reduce(op,
recv_buf + tmp_disps[recv_index] * extent,
result_buf + tmp_disps[recv_index] * extent,
recv_buf + (ptrdiff_t)tmp_disps[recv_index] * extent,
result_buf + (ptrdiff_t)tmp_disps[recv_index] * extent,
recv_count, dtype);
}
@ -360,7 +360,7 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
/* Now fix up the non-power of two case, by having the odd
procs send the even procs the proper results */
if (rank < 2 * remain) {
if (rank < (2 * remain)) {
if ((rank & 1) == 0) {
if (rcounts[rank]) {
err = MCA_PML_CALL(recv(rbuf, rcounts[rank], dtype, rank + 1,
@ -514,9 +514,9 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
ret = ompi_datatype_type_size( dtype, &typelng);
if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
max_real_segsize = true_extent + (max_block_count - 1) * extent;
max_real_segsize = true_extent + (ptrdiff_t)(max_block_count - 1) * extent;
accumbuf_free = (char*)malloc(true_extent + (total_count - 1) * extent);
accumbuf_free = (char*)malloc(true_extent + (ptrdiff_t)(total_count - 1) * extent);
if (NULL == accumbuf_free) { ret = -1; line = __LINE__; goto error_hndl; }
accumbuf = accumbuf_free - lb;
@ -564,7 +564,7 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
MCA_COLL_BASE_TAG_REDUCE_SCATTER, comm,
&reqs[inbi]));
if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
tmpsend = accumbuf + displs[recv_from] * extent;
tmpsend = accumbuf + (ptrdiff_t)displs[recv_from] * extent;
ret = MCA_PML_CALL(send(tmpsend, rcounts[recv_from], dtype, send_to,
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
MCA_PML_BASE_SEND_STANDARD, comm));
@ -588,7 +588,7 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
/* Apply operation on previous block: result goes to rbuf
rbuf[prevblock] = inbuf[inbi ^ 0x1] (op) rbuf[prevblock]
*/
tmprecv = accumbuf + displs[prevblock] * extent;
tmprecv = accumbuf + (ptrdiff_t)displs[prevblock] * extent;
ompi_op_reduce(op, inbuf[inbi ^ 0x1], tmprecv, rcounts[prevblock], dtype);
/* send previous block to send_to */
@ -604,11 +604,11 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
/* Apply operation on the last block (my block)
rbuf[rank] = inbuf[inbi] (op) rbuf[rank] */
tmprecv = accumbuf + displs[rank] * extent;
tmprecv = accumbuf + (ptrdiff_t)displs[rank] * extent;
ompi_op_reduce(op, inbuf[inbi], tmprecv, rcounts[rank], dtype);
/* Copy result from tmprecv to rbuf */
ret = ompi_datatype_copy_content_same_ddt(dtype, rcounts[rank], (char *) rbuf, tmprecv);
ret = ompi_datatype_copy_content_same_ddt(dtype, rcounts[rank], (char *)rbuf, tmprecv);
if (ret < 0) { line = __LINE__; goto error_hndl; }
if (NULL != displs) free(displs);

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* Copyright (c) 2004-2012 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -84,7 +84,7 @@ ompi_coll_tuned_scatter_intra_binomial(void *sbuf, int scount,
}
} else {
/* root is not on 0, allocate temp buffer for send */
tempbuf = (char *) malloc(strue_extent + (scount*size - 1) * sextent);
tempbuf = (char *) malloc(strue_extent + ((ptrdiff_t)scount * (ptrdiff_t)size - 1) * sextent);
if (NULL == tempbuf) {
err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl;
}
@ -92,13 +92,13 @@ ompi_coll_tuned_scatter_intra_binomial(void *sbuf, int scount,
ptmp = tempbuf - slb;
/* and rotate data so they will eventually in the right place */
err = ompi_datatype_copy_content_same_ddt(sdtype, scount*(size - root),
ptmp, (char *) sbuf + sextent*root*scount);
err = ompi_datatype_copy_content_same_ddt(sdtype, (ptrdiff_t)scount * (ptrdiff_t)(size - root),
ptmp, (char *) sbuf + sextent * (ptrdiff_t)root * (ptrdiff_t)scount);
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
err = ompi_datatype_copy_content_same_ddt(sdtype, scount*root,
ptmp + sextent*scount*(size - root), (char *) sbuf);
err = ompi_datatype_copy_content_same_ddt(sdtype, (ptrdiff_t)scount * (ptrdiff_t)root,
ptmp + sextent * (ptrdiff_t)scount * (ptrdiff_t)(size - root), (char *)sbuf);
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
if (rbuf != MPI_IN_PLACE) {
@ -112,7 +112,7 @@ ompi_coll_tuned_scatter_intra_binomial(void *sbuf, int scount,
} else if (!(vrank % 2)) {
/* non-root, non-leaf nodes, allocte temp buffer for recv
* the most we need is rcount*size/2 */
tempbuf = (char *) malloc(rtrue_extent + (rcount*size - 1) * rextent);
tempbuf = (char *) malloc(rtrue_extent + ((ptrdiff_t)rcount * (ptrdiff_t)size - 1) * rextent);
if (NULL == tempbuf) {
err= OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl;
}
@ -131,7 +131,7 @@ ompi_coll_tuned_scatter_intra_binomial(void *sbuf, int scount,
if (!(vrank % 2)) {
if (rank != root) {
/* recv from parent on non-root */
err = MCA_PML_CALL(recv(ptmp, rcount*size, rdtype, bmtree->tree_prev,
err = MCA_PML_CALL(recv(ptmp, (ptrdiff_t)rcount * (ptrdiff_t)size, rdtype, bmtree->tree_prev,
MCA_COLL_BASE_TAG_SCATTER, comm, &status));
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
/* local copy to rbuf */
@ -141,15 +141,16 @@ ompi_coll_tuned_scatter_intra_binomial(void *sbuf, int scount,
}
/* send to children on all non-leaf */
for (i = 0; i < bmtree->tree_nextsize; i++) {
int mycount = 0, vkid;
size_t mycount = 0;
int vkid;
/* figure out how much data I have to send to this child */
vkid = (bmtree->tree_next[i] - root + size) % size;
mycount = vkid - vrank;
if (mycount > (size - vkid))
if( (int)mycount > (size - vkid) )
mycount = size - vkid;
mycount *= scount;
err = MCA_PML_CALL(send(ptmp + total_send*sextent, mycount, sdtype,
err = MCA_PML_CALL(send(ptmp + (ptrdiff_t)total_send * sextent, mycount, sdtype,
bmtree->tree_next[i],
MCA_COLL_BASE_TAG_SCATTER,
MCA_PML_BASE_SEND_STANDARD, comm));

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* Copyright (c) 2004-2012 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -25,37 +25,37 @@
BEGIN_C_DECLS
typedef struct ompi_coll_tree_t {
int32_t tree_root;
int32_t tree_fanout;
int32_t tree_bmtree;
int32_t tree_prev;
int32_t tree_next[MAXTREEFANOUT];
int32_t tree_nextsize;
} ompi_coll_tree_t;
typedef struct ompi_coll_tree_t {
int32_t tree_root;
int32_t tree_fanout;
int32_t tree_bmtree;
int32_t tree_prev;
int32_t tree_next[MAXTREEFANOUT];
int32_t tree_nextsize;
} ompi_coll_tree_t;
ompi_coll_tree_t*
ompi_coll_tuned_topo_build_tree( int fanout,
struct ompi_communicator_t* com,
int root );
ompi_coll_tree_t*
ompi_coll_tuned_topo_build_in_order_bintree( struct ompi_communicator_t* comm );
ompi_coll_tree_t*
ompi_coll_tuned_topo_build_tree( int fanout,
struct ompi_communicator_t* com,
int root );
ompi_coll_tree_t*
ompi_coll_tuned_topo_build_in_order_bintree( struct ompi_communicator_t* comm );
ompi_coll_tree_t*
ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
int root );
ompi_coll_tree_t*
ompi_coll_tuned_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,
int root );
ompi_coll_tree_t*
ompi_coll_tuned_topo_build_chain( int fanout,
struct ompi_communicator_t* com,
int root );
ompi_coll_tree_t*
ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
int root );
ompi_coll_tree_t*
ompi_coll_tuned_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,
int root );
ompi_coll_tree_t*
ompi_coll_tuned_topo_build_chain( int fanout,
struct ompi_communicator_t* com,
int root );
int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree );
int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree );
/* debugging stuff, will be removed later */
int ompi_coll_tuned_topo_dump_tree (ompi_coll_tree_t* tree, int rank);
/* debugging stuff, will be removed later */
int ompi_coll_tuned_topo_dump_tree (ompi_coll_tree_t* tree, int rank);
END_C_DECLS

Просмотреть файл

@ -2,7 +2,7 @@
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* Copyright (c) 2004-2012 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,