1
1
- instead of coll_base_comm_get_reqs(2) for irecv/isend, use only
   one request allocated in the stack and do a irecv/send

 - instead of ompi_request_wait_all(2), simpy ompi_request_wait

Signed-off-by: Gilles Gouaillardet <gilles@rist.or.jp>
Этот коммит содержится в:
Gilles Gouaillardet 2016-11-13 22:31:37 -07:00
родитель 99d30353af
Коммит fc776e3fa5
4 изменённых файлов: 27 добавлений и 52 удалений

Просмотреть файл

@ -41,10 +41,9 @@ mca_coll_base_alltoall_intra_basic_inplace(const void *rbuf, int rcount,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
int i, j, size, rank, err = MPI_SUCCESS, line;
OPAL_PTRDIFF_TYPE ext, gap;
ompi_request_t **preq, **reqs;
ompi_request_t *req;
char *allocated_buffer = NULL, *tmp_buffer;
size_t max_size;
@ -63,8 +62,6 @@ mca_coll_base_alltoall_intra_basic_inplace(const void *rbuf, int rcount,
max_size = opal_datatype_span(&rdtype->super, rcount, &gap);
/* Initiate all send/recv to/from others. */
reqs = coll_base_comm_get_reqs(base_module->base_data, 2);
if( NULL == reqs ) { err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto error_hndl; }
/* Allocate a temporary buffer */
allocated_buffer = calloc (max_size, 1);
@ -75,8 +72,6 @@ mca_coll_base_alltoall_intra_basic_inplace(const void *rbuf, int rcount,
/* in-place alltoall slow algorithm (but works) */
for (i = 0 ; i < size ; ++i) {
for (j = i+1 ; j < size ; ++j) {
preq = reqs;
if (i == rank) {
/* Copy the data into the temporary buffer */
err = ompi_datatype_copy_content_same_ddt (rdtype, rcount, tmp_buffer,
@ -85,12 +80,12 @@ mca_coll_base_alltoall_intra_basic_inplace(const void *rbuf, int rcount,
/* Exchange data with the peer */
err = MCA_PML_CALL(irecv ((char *) rbuf + max_size * j, rcount, rdtype,
j, MCA_COLL_BASE_TAG_ALLTOALL, comm, preq++));
j, MCA_COLL_BASE_TAG_ALLTOALL, comm, &req));
if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; }
err = MCA_PML_CALL(isend ((char *) tmp_buffer, rcount, rdtype,
err = MCA_PML_CALL(send ((char *) tmp_buffer, rcount, rdtype,
j, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD,
comm, preq++));
comm));
if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; }
} else if (j == rank) {
/* Copy the data into the temporary buffer */
@ -100,19 +95,19 @@ mca_coll_base_alltoall_intra_basic_inplace(const void *rbuf, int rcount,
/* Exchange data with the peer */
err = MCA_PML_CALL(irecv ((char *) rbuf + max_size * i, rcount, rdtype,
i, MCA_COLL_BASE_TAG_ALLTOALL, comm, preq++));
i, MCA_COLL_BASE_TAG_ALLTOALL, comm, &req));
if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; }
err = MCA_PML_CALL(isend ((char *) tmp_buffer, rcount, rdtype,
err = MCA_PML_CALL(send ((char *) tmp_buffer, rcount, rdtype,
i, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD,
comm, preq++));
comm));
if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; }
} else {
continue;
}
/* Wait for the requests to complete */
err = ompi_request_wait_all (2, reqs, MPI_STATUSES_IGNORE);
err = ompi_request_wait ( &req, MPI_STATUSES_IGNORE);
if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; }
}
}
@ -127,7 +122,6 @@ mca_coll_base_alltoall_intra_basic_inplace(const void *rbuf, int rcount,
"%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err,
rank));
(void)line; // silence compiler warning
ompi_coll_base_free_reqs(reqs, 2);
}
/* All done */

Просмотреть файл

@ -42,9 +42,8 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
int i, j, size, rank, err=MPI_SUCCESS;
ompi_request_t **preq, **reqs;
ompi_request_t *req;
char *allocated_buffer, *tmp_buffer;
size_t max_size, rdtype_size;
OPAL_PTRDIFF_TYPE ext, gap = 0;
@ -76,14 +75,9 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts
tmp_buffer = allocated_buffer - gap;
/* Initiate all send/recv to/from others. */
reqs = preq = coll_base_comm_get_reqs(base_module->base_data, 2);
if( NULL == reqs ) { err = OMPI_ERR_OUT_OF_RESOURCE; goto error_hndl; }
/* in-place alltoallv slow algorithm (but works) */
for (i = 0 ; i < size ; ++i) {
for (j = i+1 ; j < size ; ++j) {
preq = reqs;
if (i == rank && rcounts[j]) {
/* Copy the data into the temporary buffer */
err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[j],
@ -92,12 +86,12 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts
/* Exchange data with the peer */
err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[j] * ext, rcounts[j], rdtype,
j, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++));
j, MCA_COLL_BASE_TAG_ALLTOALLV, comm, &req));
if (MPI_SUCCESS != err) { goto error_hndl; }
err = MCA_PML_CALL(isend ((void *) tmp_buffer, rcounts[j], rdtype,
err = MCA_PML_CALL(send ((void *) tmp_buffer, rcounts[j], rdtype,
j, MCA_COLL_BASE_TAG_ALLTOALLV, MCA_PML_BASE_SEND_STANDARD,
comm, preq++));
comm));
if (MPI_SUCCESS != err) { goto error_hndl; }
} else if (j == rank && rcounts[i]) {
/* Copy the data into the temporary buffer */
@ -107,19 +101,19 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts
/* Exchange data with the peer */
err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[i] * ext, rcounts[i], rdtype,
i, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++));
i, MCA_COLL_BASE_TAG_ALLTOALLV, comm, &req));
if (MPI_SUCCESS != err) { goto error_hndl; }
err = MCA_PML_CALL(isend ((void *) tmp_buffer, rcounts[i], rdtype,
err = MCA_PML_CALL(send ((void *) tmp_buffer, rcounts[i], rdtype,
i, MCA_COLL_BASE_TAG_ALLTOALLV, MCA_PML_BASE_SEND_STANDARD,
comm, preq++));
comm));
if (MPI_SUCCESS != err) { goto error_hndl; }
} else {
continue;
}
/* Wait for the requests to complete */
err = ompi_request_wait_all (2, reqs, MPI_STATUSES_IGNORE);
err = ompi_request_wait (&req, MPI_STATUSES_IGNORE);
if (MPI_SUCCESS != err) { goto error_hndl; }
}
}
@ -127,9 +121,6 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts
error_hndl:
/* Free the temporary buffer */
free (allocated_buffer);
if( MPI_SUCCESS != err ) {
ompi_coll_base_free_reqs(reqs, 2 );
}
/* All done */
return err;

Просмотреть файл

@ -168,8 +168,8 @@ int ompi_coll_base_reduce_generic( const void* sendbuf, void* recvbuf, int origi
if there are no requests reqs[inbi ^1] will be
MPI_REQUEST_NULL. */
/* wait on data from last child for previous segment */
ret = ompi_request_wait_all( 1, &reqs[inbi ^ 1],
MPI_STATUSES_IGNORE );
ret = ompi_request_wait(&reqs[inbi ^ 1],
MPI_STATUSES_IGNORE );
if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
local_op_buffer = inbuf[inbi ^ 1];
if( i > 0 ) {

Просмотреть файл

@ -14,7 +14,7 @@
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013 FUJITSU LIMITED. All rights reserved.
* Copyright (c) 2014-2015 Research Organization for Information Science
* Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
@ -42,7 +42,7 @@ mca_coll_basic_alltoallw_intra_inplace(const void *rbuf, const int *rcounts, con
mca_coll_base_module_t *module)
{
int i, j, size, rank, err = MPI_SUCCESS, max_size;
ompi_request_t **preq, **reqs = NULL;
ompi_request_t *req;
char *tmp_buffer, *save_buffer = NULL;
ptrdiff_t ext, gap = 0;
@ -70,9 +70,6 @@ mca_coll_basic_alltoallw_intra_inplace(const void *rbuf, const int *rcounts, con
}
tmp_buffer -= gap;
reqs = coll_base_comm_get_reqs( module->base_data, 2);
if( NULL == reqs ) { err = OMPI_ERR_OUT_OF_RESOURCE; goto error_hndl; }
/* in-place alltoallw slow algorithm (but works) */
for (i = 0 ; i < size ; ++i) {
size_t msg_size_i;
@ -84,8 +81,6 @@ mca_coll_basic_alltoallw_intra_inplace(const void *rbuf, const int *rcounts, con
msg_size_j *= rcounts[j];
/* Initiate all send/recv to/from others. */
preq = reqs;
if (i == rank && msg_size_j != 0) {
/* Copy the data into the temporary buffer */
err = ompi_datatype_copy_content_same_ddt (rdtypes[j], rcounts[j],
@ -94,12 +89,12 @@ mca_coll_basic_alltoallw_intra_inplace(const void *rbuf, const int *rcounts, con
/* Exchange data with the peer */
err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[j], rcounts[j], rdtypes[j],
j, MCA_COLL_BASE_TAG_ALLTOALLW, comm, preq++));
j, MCA_COLL_BASE_TAG_ALLTOALLW, comm, &req));
if (MPI_SUCCESS != err) { goto error_hndl; }
err = MCA_PML_CALL(isend ((void *) tmp_buffer, rcounts[j], rdtypes[j],
err = MCA_PML_CALL(send ((void *) tmp_buffer, rcounts[j], rdtypes[j],
j, MCA_COLL_BASE_TAG_ALLTOALLW, MCA_PML_BASE_SEND_STANDARD,
comm, preq++));
comm));
if (MPI_SUCCESS != err) { goto error_hndl; }
} else if (j == rank && msg_size_i != 0) {
/* Copy the data into the temporary buffer */
@ -109,19 +104,19 @@ mca_coll_basic_alltoallw_intra_inplace(const void *rbuf, const int *rcounts, con
/* Exchange data with the peer */
err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[i], rcounts[i], rdtypes[i],
i, MCA_COLL_BASE_TAG_ALLTOALLW, comm, preq++));
i, MCA_COLL_BASE_TAG_ALLTOALLW, comm, &req));
if (MPI_SUCCESS != err) { goto error_hndl; }
err = MCA_PML_CALL(isend ((void *) tmp_buffer, rcounts[i], rdtypes[i],
err = MCA_PML_CALL(send ((void *) tmp_buffer, rcounts[i], rdtypes[i],
i, MCA_COLL_BASE_TAG_ALLTOALLW, MCA_PML_BASE_SEND_STANDARD,
comm, preq++));
comm));
if (MPI_SUCCESS != err) { goto error_hndl; }
} else {
continue;
}
/* Wait for the requests to complete */
err = ompi_request_wait_all (2, reqs, MPI_STATUSES_IGNORE);
err = ompi_request_wait (&req, MPI_STATUSES_IGNORE);
if (MPI_SUCCESS != err) { goto error_hndl; }
}
}
@ -129,11 +124,6 @@ mca_coll_basic_alltoallw_intra_inplace(const void *rbuf, const int *rcounts, con
error_hndl:
/* Free the temporary buffer */
free (save_buffer);
if( MPI_SUCCESS != err ) { /* Free the requests. */
if( NULL != reqs ) {
ompi_coll_base_free_reqs(reqs, 2);
}
}
/* All done */