1
1

Never allocate a temporary array for the requests. Instead rely on the

module_data to hold one with the largest necessary size. This array is
only allocated when needed, and it is released upon communicator
destruction.
Этот коммит содержится в:
George Bosilca 2015-09-19 21:14:40 -04:00
родитель 8cc39f7192
Коммит a324602174
8 изменённых файлов: 82 добавлений и 99 удалений

Просмотреть файл

@ -42,7 +42,7 @@ mca_coll_base_alltoall_intra_basic_inplace(const void *rbuf, int rcount,
mca_coll_base_module_t *module)
{
mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
int i, j, size, rank, err=MPI_SUCCESS;
int i, j, size, rank, err = MPI_SUCCESS, line;
MPI_Request *preq;
char *tmp_buffer;
size_t max_size;
@ -78,39 +78,39 @@ mca_coll_base_alltoall_intra_basic_inplace(const void *rbuf, int rcount,
/* Copy the data into the temporary buffer */
err = ompi_datatype_copy_content_same_ddt (rdtype, rcount, tmp_buffer,
(char *) rbuf + j * max_size);
if (MPI_SUCCESS != err) { goto error_hndl; }
if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; }
/* Exchange data with the peer */
err = MCA_PML_CALL(irecv ((char *) rbuf + max_size * j, rcount, rdtype,
j, MCA_COLL_BASE_TAG_ALLTOALL, comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }
if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; }
err = MCA_PML_CALL(isend ((char *) tmp_buffer, rcount, rdtype,
j, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD,
comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }
if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; }
} else if (j == rank) {
/* Copy the data into the temporary buffer */
err = ompi_datatype_copy_content_same_ddt (rdtype, rcount, tmp_buffer,
(char *) rbuf + i * max_size);
if (MPI_SUCCESS != err) { goto error_hndl; }
if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; }
/* Exchange data with the peer */
err = MCA_PML_CALL(irecv ((char *) rbuf + max_size * i, rcount, rdtype,
i, MCA_COLL_BASE_TAG_ALLTOALL, comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }
if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; }
err = MCA_PML_CALL(isend ((char *) tmp_buffer, rcount, rdtype,
i, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD,
comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }
if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; }
} else {
continue;
}
/* Wait for the requests to complete */
err = ompi_request_wait_all (2, base_module->base_data->mcct_reqs, MPI_STATUSES_IGNORE);
if (MPI_SUCCESS != err) { goto error_hndl; }
if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; }
}
}
@ -118,8 +118,14 @@ mca_coll_base_alltoall_intra_basic_inplace(const void *rbuf, int rcount,
/* Free the temporary buffer */
free (tmp_buffer);
/* All done */
if( MPI_SUCCESS != err ) {
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
"%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err,
rank));
ompi_coll_base_free_reqs(base_module->base_data->mcct_reqs, 2);
}
/* All done */
return err;
}
@ -383,8 +389,7 @@ int ompi_coll_base_alltoall_intra_linear_sync(const void *sbuf, int scount,
total_reqs = (((max_outstanding_reqs > (size - 1)) ||
(max_outstanding_reqs <= 0)) ?
(size - 1) : (max_outstanding_reqs));
reqs = (ompi_request_t**) malloc( 2 * total_reqs *
sizeof(ompi_request_t*));
reqs = coll_base_comm_get_reqs(module->base_data, 2 * total_reqs);
if (NULL == reqs) { error = -1; line = __LINE__; goto error_hndl; }
prcv = (char *) rbuf;
@ -456,9 +461,6 @@ int ompi_coll_base_alltoall_intra_linear_sync(const void *sbuf, int scount,
}
}
/* Free the reqs */
free(reqs);
/* All done */
return MPI_SUCCESS;
@ -466,7 +468,7 @@ int ompi_coll_base_alltoall_intra_linear_sync(const void *sbuf, int scount,
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
"%s:%4d\tError occurred %d, rank %2d", __FILE__, line, error,
rank));
if (NULL != reqs) free(reqs);
ompi_coll_base_free_reqs(reqs, 2 * total_reqs);
return error;
}
@ -552,7 +554,7 @@ int ompi_coll_base_alltoall_intra_basic_linear(const void *sbuf, int scount,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
int i, rank, size, err, nreqs;
int i, rank, size, err, nreqs, line;
char *psnd, *prcv;
MPI_Aint lb, sndinc, rcvinc;
ompi_request_t **req, **sreq, **rreq;
@ -614,10 +616,7 @@ int ompi_coll_base_alltoall_intra_basic_linear(const void *sbuf, int scount,
err = MCA_PML_CALL(irecv_init
(prcv + (ptrdiff_t)i * rcvinc, rcount, rdtype, i,
MCA_COLL_BASE_TAG_ALLTOALL, comm, rreq));
if (MPI_SUCCESS != err) {
ompi_coll_base_free_reqs(req, nreqs);
return err;
}
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
}
/* Now post all sends in reverse order
@ -631,10 +630,7 @@ int ompi_coll_base_alltoall_intra_basic_linear(const void *sbuf, int scount,
(psnd + (ptrdiff_t)i * sndinc, scount, sdtype, i,
MCA_COLL_BASE_TAG_ALLTOALL,
MCA_PML_BASE_SEND_STANDARD, comm, sreq));
if (MPI_SUCCESS != err) {
ompi_coll_base_free_reqs(req, nreqs);
return err;
}
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
}
/* Start your engines. This will never return an error. */
@ -650,8 +646,13 @@ int ompi_coll_base_alltoall_intra_basic_linear(const void *sbuf, int scount,
err = ompi_request_wait_all(nreqs, req, MPI_STATUSES_IGNORE);
/* Free the reqs */
ompi_coll_base_free_reqs(req, nreqs);
err_hndl:
if( MPI_SUCCESS != err ) {
OPAL_OUTPUT( (ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d",
__FILE__, line, err, rank) );
/* Free the reqs */
ompi_coll_base_free_reqs(req, nreqs);
}
/* All done */
return err;

Просмотреть файл

@ -123,9 +123,11 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts
error_hndl:
/* Free the temporary buffer */
free (tmp_buffer);
if( MPI_SUCCESS != err ) {
ompi_coll_base_free_reqs(base_module->base_data->mcct_reqs, 2 );
}
/* All done */
return err;
}
@ -253,8 +255,7 @@ ompi_coll_base_alltoallv_intra_basic_linear(const void *sbuf, const int *scounts
preq++));
++nreqs;
if (MPI_SUCCESS != err) {
ompi_coll_base_free_reqs(data->mcct_reqs, nreqs);
return err;
goto err_hndl;
}
}
@ -271,8 +272,7 @@ ompi_coll_base_alltoallv_intra_basic_linear(const void *sbuf, const int *scounts
preq++));
++nreqs;
if (MPI_SUCCESS != err) {
ompi_coll_base_free_reqs(data->mcct_reqs, nreqs);
return err;
goto err_hndl;
}
}
@ -287,9 +287,10 @@ ompi_coll_base_alltoallv_intra_basic_linear(const void *sbuf, const int *scounts
* error after we free everything. */
err = ompi_request_wait_all(nreqs, data->mcct_reqs,
MPI_STATUSES_IGNORE);
/* Free the requests. */
ompi_coll_base_free_reqs(data->mcct_reqs, nreqs);
err_hndl:
if( MPI_SUCCESS != err ) { /* Free the requests. */
ompi_coll_base_free_reqs(data->mcct_reqs, nreqs);
}
return err;
}

Просмотреть файл

@ -324,7 +324,8 @@ int ompi_coll_base_barrier_intra_two_procs(struct ompi_communicator_t *comm,
int ompi_coll_base_barrier_intra_basic_linear(struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
int i, err, rank, size;
int i, err, rank, size, line;
ompi_request_t** requests = NULL;
rank = ompi_comm_rank(comm);
size = ompi_comm_size(comm);
@ -334,50 +335,43 @@ int ompi_coll_base_barrier_intra_basic_linear(struct ompi_communicator_t *comm,
err = MCA_PML_CALL(send (NULL, 0, MPI_BYTE, 0,
MCA_COLL_BASE_TAG_BARRIER,
MCA_PML_BASE_SEND_STANDARD, comm));
if (MPI_SUCCESS != err) {
return err;
}
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
err = MCA_PML_CALL(recv (NULL, 0, MPI_BYTE, 0,
MCA_COLL_BASE_TAG_BARRIER,
comm, MPI_STATUS_IGNORE));
if (MPI_SUCCESS != err) {
return err;
}
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
}
/* The root collects and broadcasts the messages. */
else {
ompi_request_t** requests;
requests = (ompi_request_t**)malloc( size * sizeof(ompi_request_t*) );
requests = coll_base_comm_get_reqs(module->base_data, size);
for (i = 1; i < size; ++i) {
err = MCA_PML_CALL(irecv(NULL, 0, MPI_BYTE, MPI_ANY_SOURCE,
MCA_COLL_BASE_TAG_BARRIER, comm,
&(requests[i])));
if (MPI_SUCCESS != err) {
return err;
}
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
}
ompi_request_wait_all( size-1, requests+1, MPI_STATUSES_IGNORE );
requests = NULL; /* we're done the requests array is clean */
for (i = 1; i < size; ++i) {
err = MCA_PML_CALL(send(NULL, 0, MPI_BYTE, i,
MCA_COLL_BASE_TAG_BARRIER,
MCA_PML_BASE_SEND_STANDARD, comm));
if (MPI_SUCCESS != err) {
return err;
}
if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
}
free( requests );
}
/* All done */
return MPI_SUCCESS;
err_hndl:
OPAL_OUTPUT( (ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d",
__FILE__, line, err, rank) );
if( NULL != requests )
ompi_coll_base_free_reqs(requests, size-1);
return err;
}
/* copied function (with appropriate renaming) ends here */

Просмотреть файл

@ -66,8 +66,7 @@ ompi_coll_base_bcast_intra_generic( void* buffer,
#if !defined(COLL_BASE_BCAST_USE_BLOCKING)
if( tree->tree_nextsize != 0 ) {
send_reqs = (ompi_request_t**)malloc( (ptrdiff_t)tree->tree_nextsize *
sizeof(ompi_request_t*) );
send_reqs = coll_base_comm_get_reqs(module->base_data, tree->tree_nextsize);
}
#endif
@ -236,19 +235,16 @@ ompi_coll_base_bcast_intra_generic( void* buffer,
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
}
#if !defined(COLL_BASE_BCAST_USE_BLOCKING)
if( NULL != send_reqs ) free(send_reqs);
#endif
return (MPI_SUCCESS);
error_hndl:
OPAL_OUTPUT( (ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d",
__FILE__, line, err, rank) );
#if !defined(COLL_BASE_BCAST_USE_BLOCKING)
if( NULL != send_reqs ) free(send_reqs);
#endif
return (err);
if( (MPI_SUCCESS != err) && (NULL != send_reqs) ) {
ompi_coll_base_free_reqs( send_reqs, tree->tree_nextsize);
}
return err;
}
int
@ -665,10 +661,7 @@ ompi_coll_base_bcast_intra_basic_linear(void *buff, int count,
MCA_COLL_BASE_TAG_BCAST,
MCA_PML_BASE_SEND_STANDARD,
comm, preq++));
if (MPI_SUCCESS != err) {
ompi_coll_base_free_reqs(data->mcct_reqs, i);
return err;
}
if (MPI_SUCCESS != err) { goto err_hndl; }
}
--i;
@ -684,9 +677,10 @@ ompi_coll_base_bcast_intra_basic_linear(void *buff, int count,
* the error after we free everything. */
err = ompi_request_wait_all(i, reqs, MPI_STATUSES_IGNORE);
/* Free the reqs */
ompi_coll_base_free_reqs(reqs, i);
err_hndl:
if( MPI_SUCCESS != err ) { /* Free the reqs */
ompi_coll_base_free_reqs(reqs, i);
}
/* All done */
return err;

Просмотреть файл

@ -83,10 +83,7 @@ static void
coll_base_comm_destruct(mca_coll_base_comm_t *data)
{
if( NULL != data->mcct_reqs ) {
for( int i = 0; i < data->mcct_num_reqs; ++i ) {
if( MPI_REQUEST_NULL != data->mcct_reqs[i] )
ompi_request_free(&data->mcct_reqs[i]);
}
ompi_coll_base_free_reqs( data->mcct_reqs, data->mcct_num_reqs );
free(data->mcct_reqs);
data->mcct_reqs = NULL;
data->mcct_num_reqs = 0;
@ -122,18 +119,13 @@ OBJ_CLASS_INSTANCE(mca_coll_base_comm_t, opal_object_t,
ompi_request_t** coll_base_comm_get_reqs(mca_coll_base_comm_t* data, int nreqs)
{
int startfrom = data->mcct_num_reqs;
if( NULL == data->mcct_reqs ) {
assert(0 == data->mcct_num_reqs);
data->mcct_reqs = (ompi_request_t**)malloc(sizeof(ompi_request_t*) * nreqs);
} else if( data->mcct_num_reqs <= nreqs ) {
if( data->mcct_num_reqs <= nreqs ) {
data->mcct_reqs = (ompi_request_t**)realloc(data->mcct_reqs, sizeof(ompi_request_t*) * nreqs);
}
if( NULL != data->mcct_reqs ) {
data->mcct_num_reqs = nreqs;
for( int i = startfrom; i < data->mcct_num_reqs; i++ )
for( int i = data->mcct_num_reqs; i < nreqs; i++ )
data->mcct_reqs[i] = MPI_REQUEST_NULL;
data->mcct_num_reqs = nreqs;
} else
data->mcct_num_reqs = 0; /* nothing to return */
return data->mcct_reqs;

Просмотреть файл

@ -343,11 +343,19 @@ struct mca_coll_base_comm_t {
typedef struct mca_coll_base_comm_t mca_coll_base_comm_t;
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_coll_base_comm_t);
/**
* Free all requests in an array. As these requests are usually used during
* collective communications, and as on a succesful collective they are
* expected to be released during the corresponding wait, the array should
* generally be empty. However, this function might be used on error conditions
* where it will allow a correct cleanup.
*/
static inline void ompi_coll_base_free_reqs(ompi_request_t **reqs, int count)
{
int i;
for (i = 0; i < count; ++i)
ompi_request_free(&reqs[i]);
if( MPI_REQUEST_NULL != reqs[i] )
ompi_request_free(&reqs[i]);
}
/**

Просмотреть файл

@ -266,7 +266,7 @@ ompi_coll_base_gather_intra_linear_sync(const void *sbuf, int scount,
*/
char *ptmp;
ompi_request_t *first_segment_req;
reqs = (ompi_request_t**) calloc(size, sizeof(ompi_request_t*));
reqs = coll_base_comm_get_reqs(module->base_data, size);
if (NULL == reqs) { ret = -1; line = __LINE__; goto error_hndl; }
ompi_datatype_type_size(rdtype, &typelng);
@ -319,16 +319,13 @@ ompi_coll_base_gather_intra_linear_sync(const void *sbuf, int scount,
/* wait all second segments to complete */
ret = ompi_request_wait_all(size, reqs, MPI_STATUSES_IGNORE);
if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
free(reqs);
}
/* All done */
return MPI_SUCCESS;
error_hndl:
if (NULL != reqs) {
free(reqs);
ompi_coll_base_free_reqs(reqs, size);
}
OPAL_OUTPUT (( ompi_coll_base_framework.framework_output,
"ERROR_HNDL: node %d file %s line %d error %d\n",
@ -405,7 +402,6 @@ ompi_coll_base_gather_intra_basic_linear(const void *sbuf, int scount,
}
/* All done */
return MPI_SUCCESS;
}

Просмотреть файл

@ -56,10 +56,10 @@ int ompi_coll_base_reduce_generic( const void* sendbuf, void* recvbuf, int origi
char *accumbuf = NULL, *accumbuf_free = NULL;
char *local_op_buffer = NULL, *sendtmpbuf = NULL;
ptrdiff_t extent, lower_bound, segment_increment;
size_t typelng;
ompi_request_t* reqs[2] = {MPI_REQUEST_NULL, MPI_REQUEST_NULL};
ompi_request_t **sreq = NULL, *reqs[2] = {MPI_REQUEST_NULL, MPI_REQUEST_NULL};
int num_segments, line, ret, segindex, i, rank;
int recvcount, prevcount, inbi;
size_t typelng;
/**
* Determine number of segments and number of elements
@ -279,10 +279,8 @@ int ompi_coll_base_reduce_generic( const void* sendbuf, void* recvbuf, int origi
else {
int creq = 0;
ompi_request_t **sreq = NULL;
sreq = (ompi_request_t**) calloc( max_outstanding_reqs,
sizeof(ompi_request_t*) );
sreq = coll_base_comm_get_reqs(module->base_data, max_outstanding_reqs);
if (NULL == sreq) { line = __LINE__; ret = -1; goto error_hndl; }
/* post first group of requests */
@ -303,7 +301,6 @@ int ompi_coll_base_reduce_generic( const void* sendbuf, void* recvbuf, int origi
/* wait on a posted request to complete */
ret = ompi_request_wait(&sreq[creq], MPI_STATUS_IGNORE);
if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
sreq[creq] = MPI_REQUEST_NULL;
if( original_count < count_by_segment ) {
count_by_segment = original_count;
@ -325,9 +322,6 @@ int ompi_coll_base_reduce_generic( const void* sendbuf, void* recvbuf, int origi
ret = ompi_request_wait_all( max_outstanding_reqs, sreq,
MPI_STATUSES_IGNORE );
if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
/* free requests */
free(sreq);
}
}
return OMPI_SUCCESS;
@ -339,6 +333,9 @@ int ompi_coll_base_reduce_generic( const void* sendbuf, void* recvbuf, int origi
if( inbuf_free[0] != NULL ) free(inbuf_free[0]);
if( inbuf_free[1] != NULL ) free(inbuf_free[1]);
if( accumbuf_free != NULL ) free(accumbuf);
if( NULL != sreq ) {
ompi_coll_base_free_reqs(sreq, max_outstanding_reqs);
}
return ret;
}