Merge pull request #912 from bosilca/topic/coll_requests

This patch fixes the issues identified by @ggouaillardet in the IBM tests (collectives and topologies). It also improves the memory usage of OMPI, as a communicator without collective communications will never allocate the array of requests needed to coordinate the basic collective algorithms. This ticket replaced #790.
2015-10-09 11:27:07 -04:00 · 2015-10-09 11:27:07 -04:00 · 1310acc83f
--- a/ompi/mca/coll/base/base.h
+++ b/ompi/mca/coll/base/base.h
@ -23,7 +23,7 @@
 * These functions are normally invoked by the back-ends of:
 *
 * - The back-ends of MPI_Init() and MPI_Finalize()
- * - Communuicactor constructors (e.g., MPI_Comm_split()) and
+ * - Communicator constructors (e.g., MPI_Comm_split()) and
 *   destructors (e.g., MPI_Comm_free())
 * - The laminfo command
 */
--- a/ompi/mca/coll/base/coll_base_alltoall.c
+++ b/ompi/mca/coll/base/coll_base_alltoall.c
@ -42,7 +42,7 @@ mca_coll_base_alltoall_intra_basic_inplace(const void *rbuf, int rcount,
                                           mca_coll_base_module_t *module)
 {
    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
-    int i, j, size, rank, err=MPI_SUCCESS;
+    int i, j, size, rank, err = MPI_SUCCESS, line;
    MPI_Request *preq;
    char *tmp_buffer;
    size_t max_size;
@ -78,39 +78,39 @@ mca_coll_base_alltoall_intra_basic_inplace(const void *rbuf, int rcount,
                /* Copy the data into the temporary buffer */
                err = ompi_datatype_copy_content_same_ddt (rdtype, rcount, tmp_buffer,
                                                       (char *) rbuf + j * max_size);
-                if (MPI_SUCCESS != err) { goto error_hndl; }
+                if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; }

                /* Exchange data with the peer */
                err = MCA_PML_CALL(irecv ((char *) rbuf + max_size * j, rcount, rdtype,
                                          j, MCA_COLL_BASE_TAG_ALLTOALL, comm, preq++));
-                if (MPI_SUCCESS != err) { goto error_hndl; }
+                if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; }

                err = MCA_PML_CALL(isend ((char *) tmp_buffer,  rcount, rdtype,
                                          j, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD,
                                          comm, preq++));
-                if (MPI_SUCCESS != err) { goto error_hndl; }
+                if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; }
            } else if (j == rank) {
                /* Copy the data into the temporary buffer */
                err = ompi_datatype_copy_content_same_ddt (rdtype, rcount, tmp_buffer,
                                                       (char *) rbuf + i * max_size);
-                if (MPI_SUCCESS != err) { goto error_hndl; }
+                if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; }

                /* Exchange data with the peer */
                err = MCA_PML_CALL(irecv ((char *) rbuf + max_size * i, rcount, rdtype,
                                          i, MCA_COLL_BASE_TAG_ALLTOALL, comm, preq++));
-                if (MPI_SUCCESS != err) { goto error_hndl; }
+                if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; }

                err = MCA_PML_CALL(isend ((char *) tmp_buffer,  rcount, rdtype,
                                          i, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD,
                                          comm, preq++));
-                if (MPI_SUCCESS != err) { goto error_hndl; }
+                if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; }
            } else {
                continue;
            }

            /* Wait for the requests to complete */
            err = ompi_request_wait_all (2, base_module->base_data->mcct_reqs, MPI_STATUSES_IGNORE);
-            if (MPI_SUCCESS != err) { goto error_hndl; }
+            if (MPI_SUCCESS != err) { line = __LINE__; goto error_hndl; }
        }
    }

@ -118,8 +118,14 @@ mca_coll_base_alltoall_intra_basic_inplace(const void *rbuf, int rcount,
    /* Free the temporary buffer */
    free (tmp_buffer);

-    /* All done */
+    if( MPI_SUCCESS != err ) {
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                     "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err,
+                     rank));
+        ompi_coll_base_free_reqs(base_module->base_data->mcct_reqs, 2);
+    }

+    /* All done */
    return err;
 }

@ -383,8 +389,7 @@ int ompi_coll_base_alltoall_intra_linear_sync(const void *sbuf, int scount,
    total_reqs =  (((max_outstanding_reqs > (size - 1)) ||
                    (max_outstanding_reqs <= 0)) ?
                   (size - 1) : (max_outstanding_reqs));
-    reqs = (ompi_request_t**) malloc( 2 * total_reqs *
-                                      sizeof(ompi_request_t*));
+    reqs = coll_base_comm_get_reqs(module->base_data, 2 * total_reqs);
    if (NULL == reqs) { error = -1; line = __LINE__; goto error_hndl; }

    prcv = (char *) rbuf;
@ -456,9 +461,6 @@ int ompi_coll_base_alltoall_intra_linear_sync(const void *sbuf, int scount,
        }
    }

-    /* Free the reqs */
-    free(reqs);
-
    /* All done */
    return MPI_SUCCESS;

@ -466,7 +468,7 @@ int ompi_coll_base_alltoall_intra_linear_sync(const void *sbuf, int scount,
    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
                 "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, error,
                 rank));
-    if (NULL != reqs) free(reqs);
+    ompi_coll_base_free_reqs(reqs, 2 * total_reqs);
    return error;
 }

@ -552,7 +554,7 @@ int ompi_coll_base_alltoall_intra_basic_linear(const void *sbuf, int scount,
                                               struct ompi_communicator_t *comm,
                                               mca_coll_base_module_t *module)
 {
-    int i, rank, size, err, nreqs;
+    int i, rank, size, err, nreqs, line;
    char *psnd, *prcv;
    MPI_Aint lb, sndinc, rcvinc;
    ompi_request_t **req, **sreq, **rreq;
@ -614,10 +616,7 @@ int ompi_coll_base_alltoall_intra_basic_linear(const void *sbuf, int scount,
        err = MCA_PML_CALL(irecv_init
                           (prcv + (ptrdiff_t)i * rcvinc, rcount, rdtype, i,
                           MCA_COLL_BASE_TAG_ALLTOALL, comm, rreq));
-        if (MPI_SUCCESS != err) {
-            ompi_coll_base_free_reqs(req, nreqs);
-            return err;
-        }
+        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
    }

    /* Now post all sends in reverse order
@ -631,10 +630,7 @@ int ompi_coll_base_alltoall_intra_basic_linear(const void *sbuf, int scount,
                           (psnd + (ptrdiff_t)i * sndinc, scount, sdtype, i,
                           MCA_COLL_BASE_TAG_ALLTOALL,
                           MCA_PML_BASE_SEND_STANDARD, comm, sreq));
-        if (MPI_SUCCESS != err) {
-            ompi_coll_base_free_reqs(req, nreqs);
-            return err;
-        }
+        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
    }

    /* Start your engines.  This will never return an error. */
@ -650,7 +646,12 @@ int ompi_coll_base_alltoall_intra_basic_linear(const void *sbuf, int scount,

    err = ompi_request_wait_all(nreqs, req, MPI_STATUSES_IGNORE);

-    /* Free the reqs */
+ err_hndl:
+    if( MPI_SUCCESS != err ) {
+        OPAL_OUTPUT( (ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d",
+                      __FILE__, line, err, rank) );
+    }
+    /* Free the reqs in all cases as they are persistent requests */
    ompi_coll_base_free_reqs(req, nreqs);

    /* All done */
--- a/ompi/mca/coll/base/coll_base_alltoallv.c
+++ b/ompi/mca/coll/base/coll_base_alltoallv.c
@ -123,9 +123,11 @@ mca_coll_base_alltoallv_intra_basic_inplace(const void *rbuf, const int *rcounts
 error_hndl:
    /* Free the temporary buffer */
    free (tmp_buffer);
+    if( MPI_SUCCESS != err ) {
+        ompi_coll_base_free_reqs(base_module->base_data->mcct_reqs, 2 );
+    }

    /* All done */
-
    return err;
 }

@ -253,8 +255,7 @@ ompi_coll_base_alltoallv_intra_basic_linear(const void *sbuf, const int *scounts
                                      preq++));
        ++nreqs;
        if (MPI_SUCCESS != err) {
-            ompi_coll_base_free_reqs(data->mcct_reqs, nreqs);
-            return err;
+            goto err_hndl;
        }
    }

@ -271,8 +272,7 @@ ompi_coll_base_alltoallv_intra_basic_linear(const void *sbuf, const int *scounts
                                      preq++));
        ++nreqs;
        if (MPI_SUCCESS != err) {
-            ompi_coll_base_free_reqs(data->mcct_reqs, nreqs);
-            return err;
+            goto err_hndl;
        }
    }

@ -287,8 +287,8 @@ ompi_coll_base_alltoallv_intra_basic_linear(const void *sbuf, const int *scounts
     * error after we free everything. */
    err = ompi_request_wait_all(nreqs, data->mcct_reqs,
                                MPI_STATUSES_IGNORE);
-
-    /* Free the requests. */
+ err_hndl:
+    /* Free the requests in all cases as they are persistent */
    ompi_coll_base_free_reqs(data->mcct_reqs, nreqs);

    return err;
--- a/ompi/mca/coll/base/coll_base_barrier.c
+++ b/ompi/mca/coll/base/coll_base_barrier.c
@ -324,7 +324,8 @@ int ompi_coll_base_barrier_intra_two_procs(struct ompi_communicator_t *comm,
 int ompi_coll_base_barrier_intra_basic_linear(struct ompi_communicator_t *comm,
                                              mca_coll_base_module_t *module)
 {
-    int i, err, rank, size;
+    int i, err, rank, size, line;
+    ompi_request_t** requests = NULL;

    rank = ompi_comm_rank(comm);
    size = ompi_comm_size(comm);
@ -334,50 +335,43 @@ int ompi_coll_base_barrier_intra_basic_linear(struct ompi_communicator_t *comm,
        err = MCA_PML_CALL(send (NULL, 0, MPI_BYTE, 0,
                                 MCA_COLL_BASE_TAG_BARRIER,
                                 MCA_PML_BASE_SEND_STANDARD, comm));
-        if (MPI_SUCCESS != err) {
-            return err;
-        }
+        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }

        err = MCA_PML_CALL(recv (NULL, 0, MPI_BYTE, 0,
                                 MCA_COLL_BASE_TAG_BARRIER,
                                 comm, MPI_STATUS_IGNORE));
-        if (MPI_SUCCESS != err) {
-            return err;
-        }
+        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
    }

    /* The root collects and broadcasts the messages. */

    else {
-        ompi_request_t** requests;
-
-        requests = (ompi_request_t**)malloc( size * sizeof(ompi_request_t*) );
+        requests = coll_base_comm_get_reqs(module->base_data, size);
        for (i = 1; i < size; ++i) {
            err = MCA_PML_CALL(irecv(NULL, 0, MPI_BYTE, MPI_ANY_SOURCE,
                                     MCA_COLL_BASE_TAG_BARRIER, comm,
                                     &(requests[i])));
-            if (MPI_SUCCESS != err) {
-                return err;
-            }
+            if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
        }
        ompi_request_wait_all( size-1, requests+1, MPI_STATUSES_IGNORE );
+        requests = NULL;  /* we're done the requests array is clean */

        for (i = 1; i < size; ++i) {
            err = MCA_PML_CALL(send(NULL, 0, MPI_BYTE, i,
                                    MCA_COLL_BASE_TAG_BARRIER,
                                    MCA_PML_BASE_SEND_STANDARD, comm));
-            if (MPI_SUCCESS != err) {
-                return err;
-            }
+            if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
        }
-
-        free( requests );
    }

    /* All done */
-
    return MPI_SUCCESS;
-
+ err_hndl:
+    OPAL_OUTPUT( (ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d",
+                  __FILE__, line, err, rank) );
+    if( NULL != requests )
+        ompi_coll_base_free_reqs(requests, size-1);
+    return err;
 }
 /* copied function (with appropriate renaming) ends here */

--- a/ompi/mca/coll/base/coll_base_bcast.c
+++ b/ompi/mca/coll/base/coll_base_bcast.c
@ -66,8 +66,7 @@ ompi_coll_base_bcast_intra_generic( void* buffer,

 #if !defined(COLL_BASE_BCAST_USE_BLOCKING)
    if( tree->tree_nextsize != 0 ) {
-        send_reqs = (ompi_request_t**)malloc( (ptrdiff_t)tree->tree_nextsize *
-                                              sizeof(ompi_request_t*) );
+        send_reqs = coll_base_comm_get_reqs(module->base_data, tree->tree_nextsize);
    }
 #endif

@ -144,7 +143,7 @@ ompi_coll_base_bcast_intra_generic( void* buffer,

            /* wait for and forward the previous segment to children */
            err = ompi_request_wait( &recv_reqs[req_index ^ 0x1],
-                                     MPI_STATUSES_IGNORE );
+                                     MPI_STATUS_IGNORE );
            if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }

            for( i = 0; i < tree->tree_nextsize; i++ ) {
@ -176,7 +175,7 @@ ompi_coll_base_bcast_intra_generic( void* buffer,
        }

        /* Process the last segment */
-        err = ompi_request_wait( &recv_reqs[req_index], MPI_STATUSES_IGNORE );
+        err = ompi_request_wait( &recv_reqs[req_index], MPI_STATUS_IGNORE );
        if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
        sendcount = original_count - (ptrdiff_t)(num_segments - 1) * count_by_segment;
        for( i = 0; i < tree->tree_nextsize; i++ ) {
@ -236,19 +235,19 @@ ompi_coll_base_bcast_intra_generic( void* buffer,
        if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
    }

-#if !defined(COLL_BASE_BCAST_USE_BLOCKING)
-    if( NULL != send_reqs ) free(send_reqs);
-#endif
-
    return (MPI_SUCCESS);

 error_hndl:
    OPAL_OUTPUT( (ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d",
                  __FILE__, line, err, rank) );
-#if !defined(COLL_BASE_BCAST_USE_BLOCKING)
-    if( NULL != send_reqs ) free(send_reqs);
-#endif
-    return (err);
+    if( MPI_SUCCESS != err ) {
+        ompi_coll_base_free_reqs( recv_reqs, 2);
+        if( NULL != send_reqs ) {
+            ompi_coll_base_free_reqs( send_reqs, tree->tree_nextsize);
+        }
+    }
+
+    return err;
 }

 int
@ -382,7 +381,6 @@ ompi_coll_base_bcast_intra_split_bintree ( void* buffer,
    ptrdiff_t type_extent, lb;
    ompi_request_t *base_req, *new_req;
    ompi_coll_tree_t *tree;
-    mca_coll_base_comm_t *data = module->base_data;

    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);
@ -395,7 +393,7 @@ ompi_coll_base_bcast_intra_split_bintree ( void* buffer,

    /* setup the binary tree topology. */
    COLL_BASE_UPDATE_BINTREE( comm, module, root );
-    tree = data->cached_bintree;
+    tree = module->base_data->cached_bintree;

    err = ompi_datatype_type_size( datatype, &type_size );

@ -505,8 +503,8 @@ ompi_coll_base_bcast_intra_split_bintree ( void* buffer,
                                      comm, &new_req));
            if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }

-            /* wait for and forward current segment */
-            err = ompi_request_wait_all( 1, &base_req, MPI_STATUSES_IGNORE );
+            /* wait for and forward the previous segment */
+            err = ompi_request_wait( &base_req, MPI_STATUS_IGNORE );
            for( i = 0; i < tree->tree_nextsize; i++ ) {  /* send data to children (segcount[lr]) */
                err = MCA_PML_CALL(send( tmpbuf[lr], segcount[lr], datatype,
                                         tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST,
@ -521,7 +519,7 @@ ompi_coll_base_bcast_intra_split_bintree ( void* buffer,
        } /* end of for segindex */

        /* wait for the last segment and forward current segment */
-        err = ompi_request_wait_all( 1, &base_req, MPI_STATUSES_IGNORE );
+        err = ompi_request_wait( &base_req, MPI_STATUS_IGNORE );
        for( i = 0; i < tree->tree_nextsize; i++ ) {  /* send data to children */
            err = MCA_PML_CALL(send(tmpbuf[lr], sendcount[lr], datatype,
                                    tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST,
@ -637,10 +635,8 @@ ompi_coll_base_bcast_intra_basic_linear(void *buff, int count,
                                        mca_coll_base_module_t *module)
 {
    int i, size, rank, err;
-    mca_coll_base_comm_t *data = module->base_data;
    ompi_request_t **preq, **reqs;

-
    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

@ -655,27 +651,20 @@ ompi_coll_base_bcast_intra_basic_linear(void *buff, int count,
    }

    /* Root sends data to all others. */
-    preq = reqs = coll_base_comm_get_reqs(data, size-1);
+    preq = reqs = coll_base_comm_get_reqs(module->base_data, size-1);
    for (i = 0; i < size; ++i) {
        if (i == rank) {
            continue;
        }

-        err = MCA_PML_CALL(isend_init(buff, count, datatype, i,
-                                      MCA_COLL_BASE_TAG_BCAST,
-                                      MCA_PML_BASE_SEND_STANDARD,
-                                      comm, preq++));
-        if (MPI_SUCCESS != err) {
-            ompi_coll_base_free_reqs(data->mcct_reqs, i);
-            return err;
-        }
+        err = MCA_PML_CALL(isend(buff, count, datatype, i,
+                                 MCA_COLL_BASE_TAG_BCAST,
+                                 MCA_PML_BASE_SEND_STANDARD,
+                                 comm, preq++));
+        if (MPI_SUCCESS != err) { goto err_hndl; }
    }
    --i;

-    /* Start your engines.  This will never return an error. */
-
-    MCA_PML_CALL(start(i, reqs));
-
    /* Wait for them all.  If there's an error, note that we don't
     * care what the error was -- just that there *was* an error.  The
     * PML will finish all requests, even if one or more of them fail.
@ -684,9 +673,10 @@ ompi_coll_base_bcast_intra_basic_linear(void *buff, int count,
     * the error after we free everything. */

    err = ompi_request_wait_all(i, reqs, MPI_STATUSES_IGNORE);
-
-    /* Free the reqs */
-    ompi_coll_base_free_reqs(reqs, i);
+ err_hndl:
+    if( MPI_SUCCESS != err ) {  /* Free the reqs */
+        ompi_coll_base_free_reqs(reqs, i);
+    }

    /* All done */
    return err;
--- a/ompi/mca/coll/base/coll_base_frame.c
+++ b/ompi/mca/coll/base/coll_base_frame.c
@ -68,25 +68,14 @@ OBJ_CLASS_INSTANCE(mca_coll_base_module_t, opal_object_t,
 static void
 coll_base_comm_construct(mca_coll_base_comm_t *data)
 {
-    data->mcct_reqs = NULL;
-    data->mcct_num_reqs = 0;
-    data->cached_ntree = NULL;
-    data->cached_bintree = NULL;
-    data->cached_bmtree = NULL;
-    data->cached_in_order_bmtree = NULL;
-    data->cached_chain = NULL;
-    data->cached_pipeline = NULL;
-    data->cached_in_order_bintree = NULL;
+    memset ((char *) data + sizeof (data->super), 0, sizeof (*data) - sizeof (data->super));
 }

 static void
 coll_base_comm_destruct(mca_coll_base_comm_t *data)
 {
    if( NULL != data->mcct_reqs ) {
-        for( int i = 0; i < data->mcct_num_reqs; ++i ) {
-            if( MPI_REQUEST_NULL != data->mcct_reqs[i] )
-                ompi_request_free(&data->mcct_reqs[i]);
-        }
+        ompi_coll_base_free_reqs( data->mcct_reqs, data->mcct_num_reqs );
        free(data->mcct_reqs);
        data->mcct_reqs = NULL;
        data->mcct_num_reqs = 0;
@ -122,18 +111,15 @@ OBJ_CLASS_INSTANCE(mca_coll_base_comm_t, opal_object_t,

 ompi_request_t** coll_base_comm_get_reqs(mca_coll_base_comm_t* data, int nreqs)
 {
-    int startfrom = data->mcct_num_reqs;
+    if( 0 == nreqs ) return NULL;

-    if( NULL == data->mcct_reqs ) {
-        assert(0 == data->mcct_num_reqs);
-        data->mcct_reqs = (ompi_request_t**)malloc(sizeof(ompi_request_t*) * nreqs);
-    } else if( data->mcct_num_reqs <= nreqs ) {
+    if( data->mcct_num_reqs <= nreqs )
        data->mcct_reqs = (ompi_request_t**)realloc(data->mcct_reqs, sizeof(ompi_request_t*) * nreqs);
-    }
+
    if( NULL != data->mcct_reqs ) {
-        data->mcct_num_reqs = nreqs;
-        for( int i = startfrom; i < data->mcct_num_reqs; i++ )
+        for( int i = data->mcct_num_reqs; i < nreqs; i++ )
            data->mcct_reqs[i] = MPI_REQUEST_NULL;
+        data->mcct_num_reqs = nreqs;
    } else
        data->mcct_num_reqs = 0;  /* nothing to return */
    return data->mcct_reqs;
--- a/ompi/mca/coll/base/coll_base_functions.h
+++ b/ompi/mca/coll/base/coll_base_functions.h
@ -343,11 +343,21 @@ struct mca_coll_base_comm_t {
 typedef struct mca_coll_base_comm_t mca_coll_base_comm_t;
 OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_coll_base_comm_t);

+/**
+ * Free all requests in an array. As these requests are usually used during
+ * collective communications, and as on a succesful collective they are
+ * expected to be released during the corresponding wait, the array should
+ * generally be empty. However, this function might be used on error conditions
+ * where it will allow a correct cleanup.
+ */
 static inline void ompi_coll_base_free_reqs(ompi_request_t **reqs, int count)
 {
    int i;
-    for (i = 0; i < count; ++i)
-        ompi_request_free(&reqs[i]);
+    for (i = 0; i < count; ++i) {
+        if( MPI_REQUEST_NULL != reqs[i] ) {
+            ompi_request_free(&reqs[i]);
+        }
+    }
 }

 /**
--- a/ompi/mca/coll/base/coll_base_gather.c
+++ b/ompi/mca/coll/base/coll_base_gather.c
@ -266,7 +266,7 @@ ompi_coll_base_gather_intra_linear_sync(const void *sbuf, int scount,
        */
        char *ptmp;
        ompi_request_t *first_segment_req;
-        reqs = (ompi_request_t**) calloc(size, sizeof(ompi_request_t*));
+        reqs = coll_base_comm_get_reqs(module->base_data, size);
        if (NULL == reqs) { ret = -1; line = __LINE__; goto error_hndl; }

        ompi_datatype_type_size(rdtype, &typelng);
@ -319,16 +319,13 @@ ompi_coll_base_gather_intra_linear_sync(const void *sbuf, int scount,
        /* wait all second segments to complete */
        ret = ompi_request_wait_all(size, reqs, MPI_STATUSES_IGNORE);
        if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
-
-        free(reqs);
    }

    /* All done */
-
    return MPI_SUCCESS;
 error_hndl:
    if (NULL != reqs) {
-        free(reqs);
+        ompi_coll_base_free_reqs(reqs, size);
    }
    OPAL_OUTPUT (( ompi_coll_base_framework.framework_output,
                   "ERROR_HNDL: node %d file %s line %d error %d\n",
@ -405,7 +402,6 @@ ompi_coll_base_gather_intra_basic_linear(const void *sbuf, int scount,
    }

    /* All done */
-
    return MPI_SUCCESS;
 }

--- a/ompi/mca/coll/base/coll_base_reduce.c
+++ b/ompi/mca/coll/base/coll_base_reduce.c
@ -56,10 +56,10 @@ int ompi_coll_base_reduce_generic( const void* sendbuf, void* recvbuf, int origi
    char *accumbuf = NULL, *accumbuf_free = NULL;
    char *local_op_buffer = NULL, *sendtmpbuf = NULL;
    ptrdiff_t extent, lower_bound, segment_increment;
-    size_t typelng;
-    ompi_request_t* reqs[2] = {MPI_REQUEST_NULL, MPI_REQUEST_NULL};
+    ompi_request_t **sreq = NULL, *reqs[2] = {MPI_REQUEST_NULL, MPI_REQUEST_NULL};
    int num_segments, line, ret, segindex, i, rank;
    int recvcount, prevcount, inbi;
+    size_t typelng;

    /**
     * Determine number of segments and number of elements
@ -279,10 +279,8 @@ int ompi_coll_base_reduce_generic( const void* sendbuf, void* recvbuf, int origi
        else {

            int creq = 0;
-            ompi_request_t **sreq = NULL;

-            sreq = (ompi_request_t**) calloc( max_outstanding_reqs,
-                                              sizeof(ompi_request_t*) );
+            sreq = coll_base_comm_get_reqs(module->base_data, max_outstanding_reqs);
            if (NULL == sreq) { line = __LINE__; ret = -1; goto error_hndl; }

            /* post first group of requests */
@ -303,7 +301,6 @@ int ompi_coll_base_reduce_generic( const void* sendbuf, void* recvbuf, int origi
                /* wait on a posted request to complete */
                ret = ompi_request_wait(&sreq[creq], MPI_STATUS_IGNORE);
                if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl;  }
-                sreq[creq] = MPI_REQUEST_NULL;

                if( original_count < count_by_segment ) {
                    count_by_segment = original_count;
@ -325,9 +322,6 @@ int ompi_coll_base_reduce_generic( const void* sendbuf, void* recvbuf, int origi
            ret = ompi_request_wait_all( max_outstanding_reqs, sreq,
                                         MPI_STATUSES_IGNORE );
            if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl;  }
-
-            /* free requests */
-            free(sreq);
        }
    }
    return OMPI_SUCCESS;
@ -339,6 +333,9 @@ int ompi_coll_base_reduce_generic( const void* sendbuf, void* recvbuf, int origi
    if( inbuf_free[0] != NULL ) free(inbuf_free[0]);
    if( inbuf_free[1] != NULL ) free(inbuf_free[1]);
    if( accumbuf_free != NULL ) free(accumbuf);
+    if( NULL != sreq ) {
+        ompi_coll_base_free_reqs(sreq, max_outstanding_reqs);
+    }
    return ret;
 }

--- a/ompi/mca/coll/basic/coll_basic.h
+++ b/ompi/mca/coll/basic/coll_basic.h
@ -283,25 +283,14 @@ BEGIN_C_DECLS
    int mca_coll_basic_ft_event(int status);


-/* Utility functions */
-
-    static inline void mca_coll_basic_free_reqs(ompi_request_t ** reqs,
-                                                int count)
-    {
-        int i;
-        for (i = 0; i < count; ++i)
-             ompi_request_free(&reqs[i]);
-    }
-
-
 struct mca_coll_basic_module_t {
    mca_coll_base_module_t super;
-
-    ompi_request_t **mccb_reqs;
-    int mccb_num_reqs;
 };
 typedef struct mca_coll_basic_module_t mca_coll_basic_module_t;
-OBJ_CLASS_DECLARATION(mca_coll_basic_module_t);
+OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_coll_basic_module_t);
+
+typedef mca_coll_base_comm_t mca_coll_basic_comm_t;
+OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_coll_basic_comm_t);

 END_C_DECLS

--- a/ompi/mca/coll/basic/coll_basic_allgather.c
+++ b/ompi/mca/coll/basic/coll_basic_allgather.c
@ -47,12 +47,11 @@ mca_coll_basic_allgather_inter(const void *sbuf, int scount,
                               struct ompi_communicator_t *comm,
                               mca_coll_base_module_t *module)
 {
-    int rank, root = 0, size, rsize, err, i;
+    int rank, root = 0, size, rsize, err, i, line;
    char *tmpbuf = NULL, *ptmp;
    ptrdiff_t rlb, slb, rextent, sextent, incr;
    ompi_request_t *req;
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
-    ompi_request_t **reqs = basic_module->mccb_reqs;
+    ompi_request_t **reqs = NULL;

    rank = ompi_comm_rank(comm);
    size = ompi_comm_size(comm);
@ -71,35 +70,29 @@ mca_coll_basic_allgather_inter(const void *sbuf, int scount,
        err = MCA_PML_CALL(send(sbuf, scount, sdtype, root,
                                MCA_COLL_BASE_TAG_ALLGATHER,
                                MCA_PML_BASE_SEND_STANDARD, comm));
-        if (OMPI_SUCCESS != err) {
-            return err;
-        }
+        if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; }
    } else {
        /* receive a msg. from all other procs. */
        err = ompi_datatype_get_extent(rdtype, &rlb, &rextent);
-        if (OMPI_SUCCESS != err) {
-            return err;
-        }
+        if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; }
        err = ompi_datatype_get_extent(sdtype, &slb, &sextent);
-        if (OMPI_SUCCESS != err) {
-            return err;
-        }
+        if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; }
+
+        /* Get a requests arrays of the right size */
+        reqs = coll_base_comm_get_reqs(module->base_data, rsize + 1);
+        if( NULL == reqs ) { line = __LINE__; goto exit; }

        /* Do a send-recv between the two root procs. to avoid deadlock */
        err = MCA_PML_CALL(isend(sbuf, scount, sdtype, 0,
                                 MCA_COLL_BASE_TAG_ALLGATHER,
                                 MCA_PML_BASE_SEND_STANDARD,
                                 comm, &reqs[rsize]));
-        if (OMPI_SUCCESS != err) {
-            return err;
-        }
+        if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; }

        err = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, 0,
                                 MCA_COLL_BASE_TAG_ALLGATHER, comm,
                                 &reqs[0]));
-        if (OMPI_SUCCESS != err) {
-            return err;
-        }
+        if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; }

        incr = rextent * rcount;
        ptmp = (char *) rbuf + incr;
@ -107,45 +100,33 @@ mca_coll_basic_allgather_inter(const void *sbuf, int scount,
            err = MCA_PML_CALL(irecv(ptmp, rcount, rdtype, i,
                                     MCA_COLL_BASE_TAG_ALLGATHER,
                                     comm, &reqs[i]));
-            if (MPI_SUCCESS != err) {
-                return err;
-            }
+            if (MPI_SUCCESS != err) { line = __LINE__; goto exit; }
        }

        err = ompi_request_wait_all(rsize + 1, reqs, MPI_STATUSES_IGNORE);
-        if (OMPI_SUCCESS != err) {
-            return err;
-        }
+        if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; }

        /* Step 2: exchange the resuts between the root processes */
        tmpbuf = (char *) malloc(scount * size * sextent);
-        if (NULL == tmpbuf) {
-            return err;
-        }
+        if (NULL == tmpbuf) { line = __LINE__; goto exit; }

        err = MCA_PML_CALL(isend(rbuf, rsize * rcount, rdtype, 0,
                                 MCA_COLL_BASE_TAG_ALLGATHER,
                                 MCA_PML_BASE_SEND_STANDARD, comm, &req));
-        if (OMPI_SUCCESS != err) {
-            goto exit;
-        }
+        if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; }

        err = MCA_PML_CALL(recv(tmpbuf, size * scount, sdtype, 0,
                                MCA_COLL_BASE_TAG_ALLGATHER, comm,
                                MPI_STATUS_IGNORE));
-        if (OMPI_SUCCESS != err) {
-            goto exit;
-        }
+        if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; }

        err = ompi_request_wait( &req, MPI_STATUS_IGNORE);
-        if (OMPI_SUCCESS != err) {
-            goto exit;
-        }
+        if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; }
    }


    /* Step 3: bcast the data to the remote group. This
-     * happens in both groups simultaniously, thus we can
+     * happens in both groups simultaneously, thus we can
     * not use coll_bcast (this would deadlock).
     */
    if (rank != root) {
@ -153,9 +134,7 @@ mca_coll_basic_allgather_inter(const void *sbuf, int scount,
        err = MCA_PML_CALL(recv(rbuf, rsize * rcount, rdtype, 0,
                                MCA_COLL_BASE_TAG_ALLGATHER, comm,
                                MPI_STATUS_IGNORE));
-        if (OMPI_SUCCESS != err) {
-            goto exit;
-        }
+        if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; }

    } else {
        /* Send the data to every other process in the remote group
@ -165,19 +144,19 @@ mca_coll_basic_allgather_inter(const void *sbuf, int scount,
                                     MCA_COLL_BASE_TAG_ALLGATHER,
                                     MCA_PML_BASE_SEND_STANDARD,
                                     comm, &reqs[i - 1]));
-            if (OMPI_SUCCESS != err) {
-                goto exit;
-            }
-
+            if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; }
        }

        err = ompi_request_wait_all(rsize - 1, reqs, MPI_STATUSES_IGNORE);
-        if (OMPI_SUCCESS != err) {
-            goto exit;
-        }
+        if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; }
    }

  exit:
+    if( MPI_SUCCESS != err ) {
+        OPAL_OUTPUT( (ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d",
+                      __FILE__, line, err, rank) );
+        if( NULL != reqs ) ompi_coll_base_free_reqs(reqs, rsize+1);
+    }
    if (NULL != tmpbuf) {
        free(tmpbuf);
    }
--- a/ompi/mca/coll/basic/coll_basic_allreduce.c
+++ b/ompi/mca/coll/basic/coll_basic_allreduce.c
@ -2,7 +2,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2014 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -80,13 +80,12 @@ mca_coll_basic_allreduce_inter(const void *sbuf, void *rbuf, int count,
                               struct ompi_communicator_t *comm,
                               mca_coll_base_module_t *module)
 {
-    int err, i, rank, root = 0, rsize;
+    int err, i, rank, root = 0, rsize, line;
    ptrdiff_t lb, extent;
    ptrdiff_t true_lb, true_extent;
    char *tmpbuf = NULL, *pml_buffer = NULL;
    ompi_request_t *req[2];
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
-    ompi_request_t **reqs = basic_module->mccb_reqs;
+    ompi_request_t **reqs = NULL;

    rank = ompi_comm_rank(comm);
    rsize = ompi_comm_remote_size(comm);
@ -111,41 +110,33 @@ mca_coll_basic_allreduce_inter(const void *sbuf, void *rbuf, int count,
        }

        tmpbuf = (char *) malloc(true_extent + (count - 1) * extent);
-        if (NULL == tmpbuf) {
-            return OMPI_ERR_OUT_OF_RESOURCE;
-        }
+        if (NULL == tmpbuf) { err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto exit; }
        pml_buffer = tmpbuf - true_lb;

+        reqs = coll_base_comm_get_reqs(module->base_data, rsize - 1);
+        if( NULL == reqs ) { err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto exit; }
+
        /* Do a send-recv between the two root procs. to avoid deadlock */
        err = MCA_PML_CALL(irecv(rbuf, count, dtype, 0,
                                 MCA_COLL_BASE_TAG_ALLREDUCE, comm,
                                 &(req[0])));
-        if (OMPI_SUCCESS != err) {
-            goto exit;
-        }
+        if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; }

        err = MCA_PML_CALL(isend(sbuf, count, dtype, 0,
                                 MCA_COLL_BASE_TAG_ALLREDUCE,
                                 MCA_PML_BASE_SEND_STANDARD,
                                 comm, &(req[1])));
-        if (OMPI_SUCCESS != err) {
-            goto exit;
-        }
+        if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; }

        err = ompi_request_wait_all(2, req, MPI_STATUSES_IGNORE);
-        if (OMPI_SUCCESS != err) {
-            goto exit;
-        }
-
+        if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; }

        /* Loop receiving and calling reduction function (C or Fortran). */
        for (i = 1; i < rsize; i++) {
            err = MCA_PML_CALL(recv(pml_buffer, count, dtype, i,
                                    MCA_COLL_BASE_TAG_ALLREDUCE, comm,
                                    MPI_STATUS_IGNORE));
-            if (MPI_SUCCESS != err) {
-                goto exit;
-            }
+            if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; }

            /* Perform the reduction */
            ompi_op_reduce(op, pml_buffer, rbuf, count, dtype);
@ -155,9 +146,7 @@ mca_coll_basic_allreduce_inter(const void *sbuf, void *rbuf, int count,
        err = MCA_PML_CALL(send(sbuf, count, dtype, root,
                                MCA_COLL_BASE_TAG_ALLREDUCE,
                                MCA_PML_BASE_SEND_STANDARD, comm));
-        if (OMPI_SUCCESS != err) {
-            goto exit;
-        }
+        if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; }
    }


@ -171,21 +160,16 @@ mca_coll_basic_allreduce_inter(const void *sbuf, void *rbuf, int count,
        err = MCA_PML_CALL(irecv(pml_buffer, count, dtype, 0,
                                 MCA_COLL_BASE_TAG_ALLREDUCE,
                                 comm, &(req[1])));
-        if (OMPI_SUCCESS != err) {
-            goto exit;
-        }
+        if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; }

        err = MCA_PML_CALL(isend(rbuf, count, dtype, 0,
                                 MCA_COLL_BASE_TAG_ALLREDUCE,
                                 MCA_PML_BASE_SEND_STANDARD, comm,
                                 &(req[0])));
-        if (OMPI_SUCCESS != err) {
-            goto exit;
-        }
+        if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; }
+
        err = ompi_request_wait_all(2, req, MPI_STATUSES_IGNORE);
-        if (OMPI_SUCCESS != err) {
-            goto exit;
-        }
+        if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; }

        /* distribute the data to other processes in remote group.
         * Note that we start from 1 (not from zero), since zero
@ -198,17 +182,13 @@ mca_coll_basic_allreduce_inter(const void *sbuf, void *rbuf, int count,
                                         MCA_COLL_BASE_TAG_ALLREDUCE,
                                         MCA_PML_BASE_SEND_STANDARD, comm,
                                         &reqs[i - 1]));
-                if (OMPI_SUCCESS != err) {
-                    goto exit;
-                }
+                if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; }
            }

            err =
                ompi_request_wait_all(rsize - 1, reqs,
                                      MPI_STATUSES_IGNORE);
-            if (OMPI_SUCCESS != err) {
-                goto exit;
-            }
+            if (OMPI_SUCCESS != err) { line = __LINE__; goto exit; }
        }
    } else {
        err = MCA_PML_CALL(recv(rbuf, count, dtype, root,
@ -217,10 +197,14 @@ mca_coll_basic_allreduce_inter(const void *sbuf, void *rbuf, int count,
    }

  exit:
+    if( MPI_SUCCESS != err ) {
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d", __FILE__,
+                     line, err, rank));
+        ompi_coll_base_free_reqs(reqs, rsize - 1);
+    }
    if (NULL != tmpbuf) {
        free(tmpbuf);
    }

-
    return err;
 }
--- a/ompi/mca/coll/basic/coll_basic_alltoall.c
+++ b/ompi/mca/coll/basic/coll_basic_alltoall.c
@ -57,11 +57,7 @@ mca_coll_basic_alltoall_inter(const void *sbuf, int scount,
    MPI_Aint sndinc;
    MPI_Aint rcvinc;

-    ompi_request_t **req;
-    ompi_request_t **sreq;
-    ompi_request_t **rreq;
-
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
+    ompi_request_t **req, **sreq, **rreq;

    /* Initialize. */

@ -81,7 +77,7 @@ mca_coll_basic_alltoall_inter(const void *sbuf, int scount,

    /* Initiate all send/recv to/from others. */
    nreqs = size * 2;
-    req = rreq = basic_module->mccb_reqs;
+    req = rreq = coll_base_comm_get_reqs( module->base_data, nreqs);
    sreq = rreq + size;

    prcv = (char *) rbuf;
@ -92,6 +88,7 @@ mca_coll_basic_alltoall_inter(const void *sbuf, int scount,
        err = MCA_PML_CALL(irecv(prcv + (i * rcvinc), rcount, rdtype, i,
                                 MCA_COLL_BASE_TAG_ALLTOALL, comm, rreq));
        if (OMPI_SUCCESS != err) {
+            ompi_coll_base_free_reqs(req, nreqs);
            return err;
        }
    }
@ -102,6 +99,7 @@ mca_coll_basic_alltoall_inter(const void *sbuf, int scount,
                                 MCA_COLL_BASE_TAG_ALLTOALL,
                                 MCA_PML_BASE_SEND_STANDARD, comm, sreq));
        if (OMPI_SUCCESS != err) {
+            ompi_coll_base_free_reqs(req, nreqs);
            return err;
        }
    }
@ -113,6 +111,9 @@ mca_coll_basic_alltoall_inter(const void *sbuf, int scount,
     * So free them anyway -- even if there was an error, and return
     * the error after we free everything. */
    err = ompi_request_wait_all(nreqs, req, MPI_STATUSES_IGNORE);
+    if (OMPI_SUCCESS != err) {
+        ompi_coll_base_free_reqs(req, nreqs);
+    }

    /* All done */
    return err;
--- a/ompi/mca/coll/basic/coll_basic_alltoallv.c
+++ b/ompi/mca/coll/basic/coll_basic_alltoallv.c
@ -57,8 +57,7 @@ mca_coll_basic_alltoallv_inter(const void *sbuf, const int *scounts, const int *
    MPI_Aint sndextent;
    MPI_Aint rcvextent;

-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
-    ompi_request_t **preq = basic_module->mccb_reqs;
+    ompi_request_t **preq;

    /* Initialize. */

@ -69,6 +68,7 @@ mca_coll_basic_alltoallv_inter(const void *sbuf, const int *scounts, const int *

    /* Initiate all send/recv to/from others. */
    nreqs = rsize * 2;
+    preq = coll_base_comm_get_reqs(module->base_data, nreqs);

    /* Post all receives first  */
    /* A simple optimization: do not send and recv msgs of length zero */
@ -79,10 +79,9 @@ mca_coll_basic_alltoallv_inter(const void *sbuf, const int *scounts, const int *
                                     i, MCA_COLL_BASE_TAG_ALLTOALLV, comm,
                                     &preq[i]));
            if (MPI_SUCCESS != err) {
+                ompi_coll_base_free_reqs(preq, i);
                return err;
            }
-        } else {
-            preq[i] = MPI_REQUEST_NULL;
        }
    }

@ -95,14 +94,16 @@ mca_coll_basic_alltoallv_inter(const void *sbuf, const int *scounts, const int *
                                     MCA_PML_BASE_SEND_STANDARD, comm,
                                     &preq[rsize + i]));
            if (MPI_SUCCESS != err) {
+                ompi_coll_base_free_reqs(preq, rsize + i);
                return err;
            }
-        } else {
-            preq[rsize + i] = MPI_REQUEST_NULL;
        }
    }

    err = ompi_request_wait_all(nreqs, preq, MPI_STATUSES_IGNORE);
+    if (MPI_SUCCESS != err) {
+        ompi_coll_base_free_reqs(preq, nreqs);
+    }

    /* All done */
    return err;
--- a/ompi/mca/coll/basic/coll_basic_alltoallw.c
+++ b/ompi/mca/coll/basic/coll_basic_alltoallw.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2005 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -41,9 +41,8 @@ mca_coll_basic_alltoallw_intra_inplace(const void *rbuf, const int *rcounts, con
                                       struct ompi_communicator_t *comm,
                                       mca_coll_base_module_t *module)
 {
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
    int i, j, size, rank, err=MPI_SUCCESS, max_size;
-    MPI_Request *preq;
+    MPI_Request *preq, *reqs = NULL;
    char *tmp_buffer;
    ptrdiff_t ext;

@ -71,6 +70,7 @@ mca_coll_basic_alltoallw_intra_inplace(const void *rbuf, const int *rcounts, con
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

+    reqs = coll_base_comm_get_reqs( module->base_data, 2);
    /* in-place alltoallw slow algorithm (but works) */
    for (i = 0 ; i < size ; ++i) {
        size_t msg_size_i;
@ -82,7 +82,7 @@ mca_coll_basic_alltoallw_intra_inplace(const void *rbuf, const int *rcounts, con
            msg_size_j *= rcounts[j];

            /* Initiate all send/recv to/from others. */
-            preq = basic_module->mccb_reqs;
+            preq = reqs;

            if (i == rank && msg_size_j != 0) {
                /* Copy the data into the temporary buffer */
@ -119,17 +119,19 @@ mca_coll_basic_alltoallw_intra_inplace(const void *rbuf, const int *rcounts, con
            }

            /* Wait for the requests to complete */
-            err = ompi_request_wait_all (2, basic_module->mccb_reqs, MPI_STATUSES_IGNORE);
+            err = ompi_request_wait_all (2, reqs, MPI_STATUSES_IGNORE);
            if (MPI_SUCCESS != err) { goto error_hndl; }
-
-            /* Free the requests. */
-            mca_coll_basic_free_reqs(basic_module->mccb_reqs, 2);
        }
    }

 error_hndl:
    /* Free the temporary buffer */
    free (tmp_buffer);
+    if( MPI_SUCCESS != err ) {  /* Free the requests. */
+        if( NULL != reqs ) {
+            ompi_coll_base_free_reqs(reqs, 2);
+        }
+    }

    /* All done */

@ -159,8 +161,7 @@ mca_coll_basic_alltoallw_intra(const void *sbuf, const int *scounts, const int *
    char *psnd;
    char *prcv;
    int nreqs;
-    MPI_Request *preq;
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
+    MPI_Request *preq, *reqs;

    /* Initialize. */
    if (MPI_IN_PLACE == sbuf) {
@ -191,7 +192,7 @@ mca_coll_basic_alltoallw_intra(const void *sbuf, const int *scounts, const int *
    /* Initiate all send/recv to/from others. */

    nreqs = 0;
-    preq = basic_module->mccb_reqs;
+    reqs = preq = coll_base_comm_get_reqs(module->base_data, 2 * size);

    /* Post all receives first -- a simple optimization */

@ -209,8 +210,7 @@ mca_coll_basic_alltoallw_intra(const void *sbuf, const int *scounts, const int *
                                      preq++));
        ++nreqs;
        if (MPI_SUCCESS != err) {
-            mca_coll_basic_free_reqs(basic_module->mccb_reqs,
-                                     nreqs);
+            ompi_coll_base_free_reqs(reqs, nreqs);
            return err;
        }
    }
@ -232,15 +232,14 @@ mca_coll_basic_alltoallw_intra(const void *sbuf, const int *scounts, const int *
                                      preq++));
        ++nreqs;
        if (MPI_SUCCESS != err) {
-            mca_coll_basic_free_reqs(basic_module->mccb_reqs,
-                                     nreqs);
+            ompi_coll_base_free_reqs(reqs, nreqs);
            return err;
        }
    }

    /* Start your engines.  This will never return an error. */

-    MCA_PML_CALL(start(nreqs, basic_module->mccb_reqs));
+    MCA_PML_CALL(start(nreqs, reqs));

    /* Wait for them all.  If there's an error, note that we don't care
     * what the error was -- just that there *was* an error.  The PML
@ -249,15 +248,11 @@ mca_coll_basic_alltoallw_intra(const void *sbuf, const int *scounts, const int *
     * So free them anyway -- even if there was an error, and return the
     * error after we free everything. */

-    err = ompi_request_wait_all(nreqs, basic_module->mccb_reqs,
-                                MPI_STATUSES_IGNORE);
-
-    /* Free the requests. */
-
-    mca_coll_basic_free_reqs(basic_module->mccb_reqs, nreqs);
+    err = ompi_request_wait_all(nreqs, reqs, MPI_STATUSES_IGNORE);
+    /* Free the requests in all cases as they are persistent */
+    ompi_coll_base_free_reqs(reqs, nreqs);

    /* All done */
-
    return err;
 }

@ -283,15 +278,14 @@ mca_coll_basic_alltoallw_inter(const void *sbuf, const int *scounts, const int *
    char *psnd;
    char *prcv;
    int nreqs;
-    MPI_Request *preq;
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
+    MPI_Request *preq, *reqs;

    /* Initialize. */
    size = ompi_comm_remote_size(comm);

    /* Initiate all send/recv to/from others. */
    nreqs = 0;
-    preq = basic_module->mccb_reqs;
+    reqs = preq = coll_base_comm_get_reqs(module->base_data, 2 * size);

    /* Post all receives first -- a simple optimization */
    for (i = 0; i < size; ++i) {
@ -308,8 +302,7 @@ mca_coll_basic_alltoallw_inter(const void *sbuf, const int *scounts, const int *
                                      comm, preq++));
        ++nreqs;
        if (OMPI_SUCCESS != err) {
-            mca_coll_basic_free_reqs(basic_module->mccb_reqs,
-                                     nreqs);
+            ompi_coll_base_free_reqs(reqs, nreqs);
            return err;
        }
    }
@ -330,14 +323,13 @@ mca_coll_basic_alltoallw_inter(const void *sbuf, const int *scounts, const int *
                                      preq++));
        ++nreqs;
        if (OMPI_SUCCESS != err) {
-            mca_coll_basic_free_reqs(basic_module->mccb_reqs,
-                                     nreqs);
+            ompi_coll_base_free_reqs(reqs, nreqs);
            return err;
        }
    }

    /* Start your engines.  This will never return an error. */
-    MCA_PML_CALL(start(nreqs, basic_module->mccb_reqs));
+    MCA_PML_CALL(start(nreqs, reqs));

    /* Wait for them all.  If there's an error, note that we don't care
     * what the error was -- just that there *was* an error.  The PML
@ -345,11 +337,10 @@ mca_coll_basic_alltoallw_inter(const void *sbuf, const int *scounts, const int *
     * i.e., by the end of this call, all the requests are free-able.
     * So free them anyway -- even if there was an error, and return the
     * error after we free everything. */
-    err = ompi_request_wait_all(nreqs, basic_module->mccb_reqs,
-                                MPI_STATUSES_IGNORE);
+    err = ompi_request_wait_all(nreqs, reqs, MPI_STATUSES_IGNORE);

-    /* Free the requests. */
-    mca_coll_basic_free_reqs(basic_module->mccb_reqs, nreqs);
+    /* Free the requests in all cases as they are persistent */
+    ompi_coll_base_free_reqs(reqs, nreqs);

    /* All done */
    return err;
--- a/ompi/mca/coll/basic/coll_basic_bcast.c
+++ b/ompi/mca/coll/basic/coll_basic_bcast.c
@ -53,9 +53,7 @@ mca_coll_basic_bcast_log_intra(void *buff, int count,
    int mask;
    int err;
    int nreqs;
-    ompi_request_t **preq;
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
-    ompi_request_t **reqs = basic_module->mccb_reqs;
+    ompi_request_t **preq, **reqs;

    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);
@ -83,6 +81,8 @@ mca_coll_basic_bcast_log_intra(void *buff, int count,

    /* Send data to the children. */

+    reqs = coll_base_comm_get_reqs(module->base_data, size);
+
    err = MPI_SUCCESS;
    preq = reqs;
    nreqs = 0;
@ -92,12 +92,12 @@ mca_coll_basic_bcast_log_intra(void *buff, int count,
            peer = (peer + root) % size;
            ++nreqs;

-            err = MCA_PML_CALL(isend_init(buff, count, datatype, peer,
-                                          MCA_COLL_BASE_TAG_BCAST,
-                                          MCA_PML_BASE_SEND_STANDARD,
-                                          comm, preq++));
+            err = MCA_PML_CALL(isend(buff, count, datatype, peer,
+                                     MCA_COLL_BASE_TAG_BCAST,
+                                     MCA_PML_BASE_SEND_STANDARD,
+                                     comm, preq++));
            if (MPI_SUCCESS != err) {
-                mca_coll_basic_free_reqs(reqs, nreqs);
+                ompi_coll_base_free_reqs(reqs, nreqs);
                return err;
            }
        }
@ -107,10 +107,6 @@ mca_coll_basic_bcast_log_intra(void *buff, int count,

    if (nreqs > 0) {

-        /* Start your engines.  This will never return an error. */
-
-        MCA_PML_CALL(start(nreqs, reqs));
-
        /* Wait for them all.  If there's an error, note that we don't
         * care what the error was -- just that there *was* an error.
         * The PML will finish all requests, even if one or more of them
@ -119,10 +115,9 @@ mca_coll_basic_bcast_log_intra(void *buff, int count,
         * error, and return the error after we free everything. */

        err = ompi_request_wait_all(nreqs, reqs, MPI_STATUSES_IGNORE);
-
-        /* Free the reqs */
-
-        mca_coll_basic_free_reqs(reqs, nreqs);
+        if( MPI_SUCCESS != err ) {
+            ompi_coll_base_free_reqs(reqs, nreqs);
+        }
    }

    /* All done */
@ -147,8 +142,7 @@ mca_coll_basic_bcast_lin_inter(void *buff, int count,
    int i;
    int rsize;
    int err;
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
-    ompi_request_t **reqs = basic_module->mccb_reqs;
+    ompi_request_t **reqs = NULL;

    rsize = ompi_comm_remote_size(comm);

@ -161,6 +155,7 @@ mca_coll_basic_bcast_lin_inter(void *buff, int count,
                                MCA_COLL_BASE_TAG_BCAST, comm,
                                MPI_STATUS_IGNORE));
    } else {
+        reqs = coll_base_comm_get_reqs(module->base_data, rsize);
        /* root section */
        for (i = 0; i < rsize; i++) {
            err = MCA_PML_CALL(isend(buff, count, datatype, i,
@ -168,10 +163,14 @@ mca_coll_basic_bcast_lin_inter(void *buff, int count,
                                     MCA_PML_BASE_SEND_STANDARD,
                                     comm, &(reqs[i])));
            if (OMPI_SUCCESS != err) {
+                ompi_coll_base_free_reqs(reqs, rsize);
                return err;
            }
        }
        err = ompi_request_wait_all(rsize, reqs, MPI_STATUSES_IGNORE);
+        if (OMPI_SUCCESS != err) {
+            ompi_coll_base_free_reqs(reqs, rsize);
+        }
    }


--- a/ompi/mca/coll/basic/coll_basic_component.c
+++ b/ompi/mca/coll/basic/coll_basic_component.c
@ -106,22 +106,10 @@ basic_register(void)
    return OMPI_SUCCESS;
 }

-
-static void
-mca_coll_basic_module_construct(mca_coll_basic_module_t *module)
-{
-    module->mccb_reqs = NULL;
-    module->mccb_num_reqs = 0;
-}
-
-static void
-mca_coll_basic_module_destruct(mca_coll_basic_module_t *module)
-{
-    if (NULL != module->mccb_reqs) free(module->mccb_reqs);
-}
-
-
 OBJ_CLASS_INSTANCE(mca_coll_basic_module_t,
                   mca_coll_base_module_t,
-                   mca_coll_basic_module_construct,
-                   mca_coll_basic_module_destruct);
+                   NULL,
+                   NULL);
+
+OBJ_CLASS_INSTANCE(mca_coll_basic_comm_t, mca_coll_base_comm_t,
+                   NULL, NULL);
--- a/ompi/mca/coll/basic/coll_basic_gatherv.c
+++ b/ompi/mca/coll/basic/coll_basic_gatherv.c
@ -2,7 +2,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2005 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -120,8 +120,7 @@ mca_coll_basic_gatherv_inter(const void *sbuf, int scount,
    int i, size, err;
    char *ptmp;
    ptrdiff_t lb, extent;
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
-    ompi_request_t **reqs = basic_module->mccb_reqs;
+    ompi_request_t **reqs = NULL;

    size = ompi_comm_remote_size(comm);

@ -143,17 +142,22 @@ mca_coll_basic_gatherv_inter(const void *sbuf, int scount,
            return OMPI_ERROR;
        }

+        reqs = coll_base_comm_get_reqs(module->base_data, size);
        for (i = 0; i < size; ++i) {
            ptmp = ((char *) rbuf) + (extent * disps[i]);
            err = MCA_PML_CALL(irecv(ptmp, rcounts[i], rdtype, i,
                                     MCA_COLL_BASE_TAG_GATHERV,
                                     comm, &reqs[i]));
            if (OMPI_SUCCESS != err) {
+                ompi_coll_base_free_reqs(reqs, size);
                return err;
            }
        }

        err = ompi_request_wait_all(size, reqs, MPI_STATUSES_IGNORE);
+        if (OMPI_SUCCESS != err) {
+            ompi_coll_base_free_reqs(reqs, size);
+        }
    }

    /* All done */
--- a/ompi/mca/coll/basic/coll_basic_module.c
+++ b/ompi/mca/coll/basic/coll_basic_module.c
@ -13,7 +13,7 @@
 * Copyright (c) 2012      Sandia National Laboratories. All rights reserved.
 * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
 *                         reserved.
- * Copyright (c) 2014      Research Organization for Information Science
+ * Copyright (c) 2015      Research Organization for Information Science
 *                         and Technology (RIST). All rights reserved.
 * $COPYRIGHT$
 *
@ -30,8 +30,6 @@
 #include "mpi.h"
 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/base.h"
-#include "ompi/mca/topo/topo.h"
-#include "ompi/mca/topo/base/base.h"
 #include "coll_basic.h"


@ -59,7 +57,6 @@ mca_coll_base_module_t *
 mca_coll_basic_comm_query(struct ompi_communicator_t *comm,
                          int *priority)
 {
-    int size;
    mca_coll_basic_module_t *basic_module;

    basic_module = OBJ_NEW(mca_coll_basic_module_t);
@ -67,44 +64,6 @@ mca_coll_basic_comm_query(struct ompi_communicator_t *comm,

    *priority = mca_coll_basic_priority;

-    /* Allocate the data that hangs off the communicator */
-
-    if (OMPI_COMM_IS_INTER(comm)) {
-        size = ompi_comm_remote_size(comm);
-    } else {
-        size = ompi_comm_size(comm);
-    }
-    size *= 2;
-    if (OMPI_COMM_IS_CART(comm)) {
-        int cart_size, ndims;
-        assert (NULL != comm->c_topo);
-        comm->c_topo->topo.cart.cartdim_get(comm, &ndims);
-        cart_size = ndims * 4;
-        if (cart_size > size) {
-            size = cart_size;
-        }
-    } else if (OMPI_COMM_IS_GRAPH(comm)) {
-        int rank, degree;
-        assert (NULL != comm->c_topo);
-        rank = ompi_comm_rank (comm);
-        comm->c_topo->topo.graph.graph_neighbors_count (comm, rank, &degree);
-        degree *= 2;
-        if (degree > size) {
-            size = degree;
-        }
-    } else if (OMPI_COMM_IS_DIST_GRAPH(comm)) {
-        int dist_graph_size, inneighbors, outneighbors, weighted;
-        assert (NULL != comm->c_topo);
-        comm->c_topo->topo.dist_graph.dist_graph_neighbors_count(comm, &inneighbors, &outneighbors, &weighted);
-        dist_graph_size = inneighbors + outneighbors;
-        if (dist_graph_size > size) {
-            size = dist_graph_size;
-        }
-    }
-    basic_module->mccb_num_reqs = size;
-    basic_module->mccb_reqs = (ompi_request_t**)
-        malloc(sizeof(ompi_request_t *) * basic_module->mccb_num_reqs);
-
    /* Choose whether to use [intra|inter], and [linear|log]-based
     * algorithms. */
    basic_module->super.coll_module_enable = mca_coll_basic_module_enable;
@ -184,11 +143,16 @@ int
 mca_coll_basic_module_enable(mca_coll_base_module_t *module,
                             struct ompi_communicator_t *comm)
 {
+    /* prepare the placeholder for the array of request* */
+    module->base_data = OBJ_NEW(mca_coll_basic_comm_t);
+    if (NULL == module->base_data) {
+        return OMPI_ERROR;
+    }
+
    /* All done */
    return OMPI_SUCCESS;
 }

-
 int
 mca_coll_basic_ft_event(int state) {
    if(OPAL_CRS_CHECKPOINT == state) {
--- a/ompi/mca/coll/basic/coll_basic_neighbor_allgather.c
+++ b/ompi/mca/coll/basic/coll_basic_neighbor_allgather.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2006 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -42,18 +42,18 @@ mca_coll_basic_neighbor_allgather_cart(const void *sbuf, int scount,
                                       struct ompi_communicator_t *comm,
                                       mca_coll_base_module_t *module)
 {
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module;
    const mca_topo_base_comm_cart_2_2_0_t *cart = comm->c_topo->mtc.cart;
    const int rank = ompi_comm_rank (comm);
-    ompi_request_t **reqs;
+    ompi_request_t **reqs, **preqs;
    ptrdiff_t lb, extent;
    int rc = MPI_SUCCESS, dim, nreqs;

    ompi_datatype_get_extent(rdtype, &lb, &extent);

+    reqs = preqs = coll_base_comm_get_reqs( module->base_data, 4 * cart->ndims );
    /* The ordering is defined as -1 then +1 in each dimension in
     * order of dimension. */
-    for (dim = 0, reqs = basic_module->mccb_reqs, nreqs = 0 ; dim < cart->ndims ; ++dim) {
+    for (dim = 0, nreqs = 0 ; dim < cart->ndims ; ++dim) {
        int srank = MPI_PROC_NULL, drank = MPI_PROC_NULL;

        if (cart->dims[dim] > 1) {
@ -63,9 +63,10 @@ mca_coll_basic_neighbor_allgather_cart(const void *sbuf, int scount,
        }

        if (MPI_PROC_NULL != srank) {
+            nreqs += 2;
            rc = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, srank,
                                    MCA_COLL_BASE_TAG_ALLGATHER,
-                                    comm, reqs++));
+                                    comm, preqs++));
            if (OMPI_SUCCESS != rc) break;

            /* remove cast from const when the pml layer is updated to take
@ -73,39 +74,40 @@ mca_coll_basic_neighbor_allgather_cart(const void *sbuf, int scount,
            rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, srank,
                                    MCA_COLL_BASE_TAG_ALLGATHER,
                                    MCA_PML_BASE_SEND_STANDARD,
-                                    comm, reqs++));
+                                    comm, preqs++));
            if (OMPI_SUCCESS != rc) break;
-
-            nreqs += 2;
        }

        rbuf = (char *) rbuf + extent * rcount;

        if (MPI_PROC_NULL != drank) {
+            nreqs += 2;
            rc = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, drank,
                                    MCA_COLL_BASE_TAG_ALLGATHER,
-                                    comm, reqs++));
+                                    comm, preqs++));
            if (OMPI_SUCCESS != rc) break;


            rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, drank,
                                    MCA_COLL_BASE_TAG_ALLGATHER,
                                    MCA_PML_BASE_SEND_STANDARD,
-                                    comm, reqs++));
+                                    comm, preqs++));
            if (OMPI_SUCCESS != rc) break;
-
-            nreqs += 2;
        }

        rbuf = (char *) rbuf + extent * rcount;
    }

    if (OMPI_SUCCESS != rc) {
-        /* should probably try to clean up here */
+        ompi_coll_base_free_reqs(reqs, nreqs);
        return rc;
    }

-    return ompi_request_wait_all (nreqs, basic_module->mccb_reqs, MPI_STATUSES_IGNORE);
+    rc = ompi_request_wait_all (nreqs, reqs, MPI_STATUSES_IGNORE);
+    if (OMPI_SUCCESS != rc) {
+        ompi_coll_base_free_reqs(reqs, nreqs);
+    }
+    return rc;
 }

 static int
@ -115,12 +117,11 @@ mca_coll_basic_neighbor_allgather_graph(const void *sbuf, int scount,
                                        struct ompi_communicator_t *comm,
                                        mca_coll_base_module_t *module)
 {
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module;
    const mca_topo_base_comm_graph_2_2_0_t *graph = comm->c_topo->mtc.graph;
    const int rank = ompi_comm_rank (comm);
    const int *edges;
    int degree;
-    ompi_request_t **reqs;
+    ompi_request_t **reqs, **preqs;
    ptrdiff_t lb, extent;
    int rc = MPI_SUCCESS, neighbor;

@ -132,10 +133,11 @@ mca_coll_basic_neighbor_allgather_graph(const void *sbuf, int scount,
    }

    ompi_datatype_get_extent(rdtype, &lb, &extent);
+    reqs = preqs = coll_base_comm_get_reqs( module->base_data, 2 * degree);

-    for (neighbor = 0, reqs = basic_module->mccb_reqs ; neighbor < degree ; ++neighbor) {
+    for (neighbor = 0; neighbor < degree ; ++neighbor) {
        rc = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, edges[neighbor], MCA_COLL_BASE_TAG_ALLGATHER,
-                                comm, reqs++));
+                                comm, preqs++));
        if (OMPI_SUCCESS != rc) break;
        rbuf = (char *) rbuf + extent * rcount;

@ -143,16 +145,20 @@ mca_coll_basic_neighbor_allgather_graph(const void *sbuf, int scount,
         * a const for the send buffer. */
        rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, edges[neighbor],
                                MCA_COLL_BASE_TAG_ALLGATHER, MCA_PML_BASE_SEND_STANDARD,
-                                comm, reqs++));
+                                comm, preqs++));
        if (OMPI_SUCCESS != rc) break;
    }

    if (OMPI_SUCCESS != rc) {
-        /* should probably try to clean up here */
+        ompi_coll_base_free_reqs( reqs, (2 * neighbor + 1));
        return rc;
    }

-    return ompi_request_wait_all (degree * 2, basic_module->mccb_reqs, MPI_STATUSES_IGNORE);
+    rc = ompi_request_wait_all (degree * 2, reqs, MPI_STATUSES_IGNORE);
+    if (OMPI_SUCCESS != rc) {
+        ompi_coll_base_free_reqs( reqs, degree * 2);
+    }
+    return rc;
 }

 static int
@ -162,11 +168,10 @@ mca_coll_basic_neighbor_allgather_dist_graph(const void *sbuf, int scount,
                                             struct ompi_communicator_t *comm,
                                             mca_coll_base_module_t *module)
 {
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module;
    const mca_topo_base_comm_dist_graph_2_2_0_t *dist_graph = comm->c_topo->mtc.dist_graph;
    const int *inedges, *outedges;
    int indegree, outdegree;
-    ompi_request_t **reqs;
+    ompi_request_t **reqs, **preqs;
    ptrdiff_t lb, extent;
    int rc = MPI_SUCCESS, neighbor;

@ -177,17 +182,18 @@ mca_coll_basic_neighbor_allgather_dist_graph(const void *sbuf, int scount,
    outedges = dist_graph->out;

    ompi_datatype_get_extent(rdtype, &lb, &extent);
+    reqs = preqs = coll_base_comm_get_reqs( module->base_data, indegree + outdegree);

-    for (neighbor = 0, reqs = basic_module->mccb_reqs ; neighbor < indegree ; ++neighbor) {
+    for (neighbor = 0; neighbor < indegree ; ++neighbor) {
        rc = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, inedges[neighbor],
                                MCA_COLL_BASE_TAG_ALLGATHER,
-                                comm, reqs++));
+                                comm, preqs++));
        if (OMPI_SUCCESS != rc) break;
        rbuf = (char *) rbuf + extent * rcount;
    }

    if (OMPI_SUCCESS != rc) {
-        /* should probably try to clean up here */
+        ompi_coll_base_free_reqs(reqs, neighbor);
        return rc;
    }

@ -197,16 +203,20 @@ mca_coll_basic_neighbor_allgather_dist_graph(const void *sbuf, int scount,
        rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, outedges[neighbor],
                                MCA_COLL_BASE_TAG_ALLGATHER,
                                MCA_PML_BASE_SEND_STANDARD,
-                                comm, reqs++));
+                                comm, preqs++));
        if (OMPI_SUCCESS != rc) break;
    }

    if (OMPI_SUCCESS != rc) {
-        /* should probably try to clean up here */
+        ompi_coll_base_free_reqs(reqs, indegree + neighbor);
        return rc;
    }

-    return ompi_request_wait_all (indegree + outdegree, basic_module->mccb_reqs, MPI_STATUSES_IGNORE);
+    rc = ompi_request_wait_all (indegree + outdegree, reqs, MPI_STATUSES_IGNORE);
+    if (OMPI_SUCCESS != rc) {
+        ompi_coll_base_free_reqs(reqs, indegree + outdegree);
+    }
+    return rc;
 }

 int mca_coll_basic_neighbor_allgather(const void *sbuf, int scount,
--- a/ompi/mca/coll/basic/coll_basic_neighbor_allgatherv.c
+++ b/ompi/mca/coll/basic/coll_basic_neighbor_allgatherv.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2006 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -41,16 +41,15 @@ mca_coll_basic_neighbor_allgatherv_cart(const void *sbuf, int scount, struct omp
                                        struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm,
                                        mca_coll_base_module_t *module)
 {
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module;
    const mca_topo_base_comm_cart_2_2_0_t *cart = comm->c_topo->mtc.cart;
    const int rank = ompi_comm_rank (comm);
-    ompi_request_t **reqs;
+    ompi_request_t **reqs, **preqs;
    ptrdiff_t lb, extent;
    int rc = MPI_SUCCESS, dim, i, nreqs;

    ompi_datatype_get_extent(rdtype, &lb, &extent);

-    reqs = basic_module->mccb_reqs;
+    reqs = preqs = coll_base_comm_get_reqs( module->base_data, 4 * cart->ndims);

    /* The ordering is defined as -1 then +1 in each dimension in
     * order of dimension. */
@ -64,36 +63,40 @@ mca_coll_basic_neighbor_allgatherv_cart(const void *sbuf, int scount, struct omp
        }

        if (MPI_PROC_NULL != srank) {
+            nreqs += 2;
            rc = MCA_PML_CALL(irecv((char *) rbuf + disps[i] * extent, rcounts[i], rdtype, srank,
-                                    MCA_COLL_BASE_TAG_ALLGATHER, comm, reqs++));
+                                    MCA_COLL_BASE_TAG_ALLGATHER, comm, preqs++));
            if (OMPI_SUCCESS != rc) break;

            /* remove cast from const when the pml layer is updated to take
             * a const for the send buffer. */
            rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, srank, MCA_COLL_BASE_TAG_ALLGATHER,
-                                    MCA_PML_BASE_SEND_STANDARD, comm, reqs++));
+                                    MCA_PML_BASE_SEND_STANDARD, comm, preqs++));
            if (OMPI_SUCCESS != rc) break;
-            nreqs += 2;
        }

        if (MPI_PROC_NULL != drank) {
+            nreqs += 2;
            rc = MCA_PML_CALL(irecv((char *) rbuf + disps[i+1] * extent, rcounts[i+1], rdtype, drank,
-                                    MCA_COLL_BASE_TAG_ALLGATHER, comm, reqs++));
+                                    MCA_COLL_BASE_TAG_ALLGATHER, comm, preqs++));
            if (OMPI_SUCCESS != rc) break;

            rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, drank, MCA_COLL_BASE_TAG_ALLGATHER,
-                                    MCA_PML_BASE_SEND_STANDARD, comm, reqs++));
+                                    MCA_PML_BASE_SEND_STANDARD, comm, preqs++));
            if (OMPI_SUCCESS != rc) break;
-            nreqs += 2;
        }
    }

    if (OMPI_SUCCESS != rc) {
-        /* should probably try to clean up here */
+        ompi_coll_base_free_reqs( reqs, nreqs );
        return rc;
    }

-    return ompi_request_wait_all (nreqs, basic_module->mccb_reqs, MPI_STATUSES_IGNORE);
+    rc = ompi_request_wait_all (nreqs, reqs, MPI_STATUSES_IGNORE);
+    if (OMPI_SUCCESS != rc) {
+        ompi_coll_base_free_reqs( reqs, nreqs );
+    }
+    return rc;
 }

 static int
@ -102,14 +105,12 @@ mca_coll_basic_neighbor_allgatherv_graph(const void *sbuf, int scount, struct om
                                         struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm,
                                         mca_coll_base_module_t *module)
 {
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module;
    const mca_topo_base_comm_graph_2_2_0_t *graph = comm->c_topo->mtc.graph;
    const int rank = ompi_comm_rank (comm);
    const int *edges;
-    int degree;
-    ompi_request_t **reqs;
+    int rc = MPI_SUCCESS, neighbor, degree;
+    ompi_request_t **reqs, **preqs;
    ptrdiff_t lb, extent;
-    int rc = MPI_SUCCESS, neighbor;

    mca_topo_base_graph_neighbors_count (comm, rank, &degree);

@ -119,26 +120,31 @@ mca_coll_basic_neighbor_allgatherv_graph(const void *sbuf, int scount, struct om
    }

    ompi_datatype_get_extent(rdtype, &lb, &extent);
+    reqs = preqs = coll_base_comm_get_reqs( module->base_data, 2 * degree);

-    for (neighbor = 0, reqs = basic_module->mccb_reqs ; neighbor < degree ; ++neighbor) {
+    for (neighbor = 0; neighbor < degree ; ++neighbor) {
        rc = MCA_PML_CALL(irecv((char *) rbuf + disps[neighbor] * extent, rcounts[neighbor],
-                                rdtype, edges[neighbor], MCA_COLL_BASE_TAG_ALLGATHER, comm, reqs++));
+                                rdtype, edges[neighbor], MCA_COLL_BASE_TAG_ALLGATHER, comm, preqs++));
        if (OMPI_SUCCESS != rc) break;

        /* remove cast from const when the pml layer is updated to take
         * a const for the send buffer. */
        rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, edges[neighbor],
                                MCA_COLL_BASE_TAG_ALLGATHER, MCA_PML_BASE_SEND_STANDARD,
-                                comm, reqs++));
+                                comm, preqs++));
        if (OMPI_SUCCESS != rc) break;
    }

    if (OMPI_SUCCESS != rc) {
-        /* should probably try to clean up here */
+        ompi_coll_base_free_reqs( reqs, 2 * (neighbor + 1) );
        return rc;
    }

-    return ompi_request_wait_all (degree * 2, basic_module->mccb_reqs, MPI_STATUSES_IGNORE);
+    rc = ompi_request_wait_all (degree * 2, reqs, MPI_STATUSES_IGNORE);
+    if (OMPI_SUCCESS != rc) {
+        ompi_coll_base_free_reqs( reqs, 2 * degree );
+    }
+    return rc;
 }

 static int
@ -147,11 +153,10 @@ mca_coll_basic_neighbor_allgatherv_dist_graph(const void *sbuf, int scount, stru
                                              struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm,
                                              mca_coll_base_module_t *module)
 {
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module;
    const mca_topo_base_comm_dist_graph_2_2_0_t *dist_graph = comm->c_topo->mtc.dist_graph;
    const int *inedges, *outedges;
    int indegree, outdegree;
-    ompi_request_t **reqs;
+    ompi_request_t **reqs, **preqs;
    ptrdiff_t lb, extent;
    int rc = MPI_SUCCESS, neighbor;

@ -162,15 +167,16 @@ mca_coll_basic_neighbor_allgatherv_dist_graph(const void *sbuf, int scount, stru
    outedges = dist_graph->out;

    ompi_datatype_get_extent(rdtype, &lb, &extent);
+    reqs = preqs = coll_base_comm_get_reqs( module->base_data, indegree + outdegree);

-    for (neighbor = 0, reqs = basic_module->mccb_reqs ; neighbor < indegree ; ++neighbor) {
+    for (neighbor = 0; neighbor < indegree ; ++neighbor) {
        rc = MCA_PML_CALL(irecv((char *) rbuf + disps[neighbor] * extent, rcounts[neighbor], rdtype,
-                                inedges[neighbor], MCA_COLL_BASE_TAG_ALLGATHER, comm, reqs++));
+                                inedges[neighbor], MCA_COLL_BASE_TAG_ALLGATHER, comm, preqs++));
        if (OMPI_SUCCESS != rc) break;
    }

    if (OMPI_SUCCESS != rc) {
-        /* should probably try to clean up here */
+        ompi_coll_base_free_reqs(reqs, neighbor);
        return rc;
    }

@ -179,16 +185,20 @@ mca_coll_basic_neighbor_allgatherv_dist_graph(const void *sbuf, int scount, stru
         * a const for the send buffer. */
        rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, outedges[neighbor],
                                MCA_COLL_BASE_TAG_ALLGATHER, MCA_PML_BASE_SEND_STANDARD,
-                                comm, reqs++));
+                                comm, preqs++));
        if (OMPI_SUCCESS != rc) break;
    }

    if (OMPI_SUCCESS != rc) {
-        /* should probably try to clean up here */
+        ompi_coll_base_free_reqs(reqs, indegree + neighbor);
        return rc;
    }

-    return ompi_request_wait_all (indegree + outdegree, basic_module->mccb_reqs, MPI_STATUSES_IGNORE);
+    rc = ompi_request_wait_all (indegree + outdegree, reqs, MPI_STATUSES_IGNORE);
+    if (OMPI_SUCCESS != rc) {
+        ompi_coll_base_free_reqs(reqs, indegree + outdegree);
+    }
+    return rc;
 }

 int mca_coll_basic_neighbor_allgatherv(const void *sbuf, int scount, struct ompi_datatype_t *sdtype,
--- a/ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c
+++ b/ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2006 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -40,18 +40,18 @@ mca_coll_basic_neighbor_alltoall_cart(const void *sbuf, int scount, struct ompi_
                                      int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm,
                                      mca_coll_base_module_t *module)
 {
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module;
    const mca_topo_base_comm_cart_2_2_0_t *cart = comm->c_topo->mtc.cart;
    const int rank = ompi_comm_rank (comm);
-    ompi_request_t **reqs;
+    ompi_request_t **reqs, **preqs;
    ptrdiff_t lb, rdextent, sdextent;
    int rc = MPI_SUCCESS, dim, nreqs;

    ompi_datatype_get_extent(rdtype, &lb, &rdextent);
    ompi_datatype_get_extent(sdtype, &lb, &sdextent);
+    reqs = preqs = coll_base_comm_get_reqs( module->base_data, 4 * cart->ndims);

    /* post receives first */
-    for (dim = 0, nreqs = 0, reqs = basic_module->mccb_reqs ; dim < cart->ndims ; ++dim) {
+    for (dim = 0, nreqs = 0; dim < cart->ndims ; ++dim) {
        int srank = MPI_PROC_NULL, drank = MPI_PROC_NULL;

        if (cart->dims[dim] > 1) {
@ -61,28 +61,28 @@ mca_coll_basic_neighbor_alltoall_cart(const void *sbuf, int scount, struct ompi_
        }

        if (MPI_PROC_NULL != srank) {
+            nreqs++;
            rc = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, srank,
                                    MCA_COLL_BASE_TAG_ALLTOALL,
-                                    comm, reqs++));
+                                    comm, preqs++));
            if (OMPI_SUCCESS != rc) break;
-            nreqs++;
        }

        rbuf = (char *) rbuf + rdextent * rcount;

        if (MPI_PROC_NULL != drank) {
+            nreqs++;
            rc = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, drank,
                                    MCA_COLL_BASE_TAG_ALLTOALL,
-                                    comm, reqs++));
+                                    comm, preqs++));
            if (OMPI_SUCCESS != rc) break;
-            nreqs++;
        }

        rbuf = (char *) rbuf + rdextent * rcount;
    }

    if (OMPI_SUCCESS != rc) {
-        /* should probably try to clean up here */
+        ompi_coll_base_free_reqs( reqs, nreqs);
        return rc;
    }

@ -98,34 +98,38 @@ mca_coll_basic_neighbor_alltoall_cart(const void *sbuf, int scount, struct ompi_
        if (MPI_PROC_NULL != srank) {
            /* remove cast from const when the pml layer is updated to take
             * a const for the send buffer. */
+            nreqs++;
            rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, srank,
                                    MCA_COLL_BASE_TAG_ALLTOALL,
                                    MCA_PML_BASE_SEND_STANDARD,
-                                    comm, reqs++));
+                                    comm, preqs++));
            if (OMPI_SUCCESS != rc) break;
-            nreqs++;
        }

        sbuf = (const char *) sbuf + sdextent * scount;

        if (MPI_PROC_NULL != drank) {
+            nreqs++;
            rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, drank,
                                    MCA_COLL_BASE_TAG_ALLTOALL,
                                    MCA_PML_BASE_SEND_STANDARD,
-                                    comm, reqs++));
+                                    comm, preqs++));
            if (OMPI_SUCCESS != rc) break;
-            nreqs++;
        }

        sbuf = (const char *) sbuf + sdextent * scount;
    }

    if (OMPI_SUCCESS != rc) {
-        /* should probably try to clean up here */
+        ompi_coll_base_free_reqs( reqs, nreqs);
        return rc;
    }

-    return ompi_request_wait_all (nreqs, basic_module->mccb_reqs, MPI_STATUSES_IGNORE);
+    rc = ompi_request_wait_all (nreqs, reqs, MPI_STATUSES_IGNORE);
+    if (OMPI_SUCCESS != rc) {
+        ompi_coll_base_free_reqs( reqs, nreqs);
+    }
+    return rc;
 }

 static int
@ -133,12 +137,11 @@ mca_coll_basic_neighbor_alltoall_graph(const void *sbuf, int scount, struct ompi
                                       int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm,
                                       mca_coll_base_module_t *module)
 {
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module;
    const mca_topo_base_comm_graph_2_2_0_t *graph = comm->c_topo->mtc.graph;
    const int rank = ompi_comm_rank (comm);
    int rc = MPI_SUCCESS, neighbor, degree;
    ptrdiff_t lb, rdextent, sdextent;
-    ompi_request_t **reqs;
+    ompi_request_t **reqs, **preqs;
    const int *edges;

    mca_topo_base_graph_neighbors_count (comm, rank, &degree);
@ -150,31 +153,40 @@ mca_coll_basic_neighbor_alltoall_graph(const void *sbuf, int scount, struct ompi

    ompi_datatype_get_extent(rdtype, &lb, &rdextent);
    ompi_datatype_get_extent(sdtype, &lb, &sdextent);
+    reqs = preqs = coll_base_comm_get_reqs( module->base_data, 2 * degree);

    /* post receives first */
-    for (neighbor = 0, reqs = basic_module->mccb_reqs ; neighbor < degree ; ++neighbor) {
+    for (neighbor = 0; neighbor < degree ; ++neighbor) {
        rc = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, edges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL,
-                                comm, reqs++));
+                                comm, preqs++));
        if (OMPI_SUCCESS != rc) break;
        rbuf = (char *) rbuf + rdextent * rcount;
    }
+    if( MPI_SUCCESS != rc ) {
+        ompi_coll_base_free_reqs( reqs, neighbor );
+        return rc;
+    }

    for (neighbor = 0 ; neighbor < degree ; ++neighbor) {
        /* remove cast from const when the pml layer is updated to take
         * a const for the send buffer. */
        rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, edges[neighbor],
                                MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD,
-                                comm, reqs++));
+                                comm, preqs++));
        if (OMPI_SUCCESS != rc) break;
        sbuf = (const char *) sbuf + sdextent * scount;
    }

-    if (OMPI_SUCCESS != rc) {
-        /* should probably try to clean up here */
+    if( MPI_SUCCESS != rc ) {
+        ompi_coll_base_free_reqs( reqs, degree + neighbor );
        return rc;
    }

-    return ompi_request_wait_all (degree * 2, basic_module->mccb_reqs, MPI_STATUSES_IGNORE);
+    rc = ompi_request_wait_all (degree * 2, reqs, MPI_STATUSES_IGNORE);
+    if( MPI_SUCCESS != rc ) {
+        ompi_coll_base_free_reqs( reqs, 2 * degree );
+    }
+    return rc;
 }

 static int
@ -182,13 +194,12 @@ mca_coll_basic_neighbor_alltoall_dist_graph(const void *sbuf, int scount,struct
                                            int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm,
                                            mca_coll_base_module_t *module)
 {
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module;
    const mca_topo_base_comm_dist_graph_2_2_0_t *dist_graph = comm->c_topo->mtc.dist_graph;
    ptrdiff_t lb, rdextent, sdextent;
    int rc = MPI_SUCCESS, neighbor;
    const int *inedges, *outedges;
    int indegree, outdegree;
-    ompi_request_t **reqs;
+    ompi_request_t **reqs, **preqs;

    indegree = dist_graph->indegree;
    outdegree = dist_graph->outdegree;
@ -198,36 +209,41 @@ mca_coll_basic_neighbor_alltoall_dist_graph(const void *sbuf, int scount,struct

    ompi_datatype_get_extent(rdtype, &lb, &rdextent);
    ompi_datatype_get_extent(sdtype, &lb, &sdextent);
+    reqs = preqs = coll_base_comm_get_reqs( module->base_data, indegree + outdegree);

    /* post receives first */
-    for (neighbor = 0, reqs = basic_module->mccb_reqs ; neighbor < indegree ; ++neighbor, ++reqs) {
+    for (neighbor = 0; neighbor < indegree ; ++neighbor) {
        rc = MCA_PML_CALL(irecv(rbuf, rcount, rdtype, inedges[neighbor],
                                MCA_COLL_BASE_TAG_ALLTOALL,
-                                comm, reqs));
+                                comm, preqs++));
        if (OMPI_SUCCESS != rc) break;
        rbuf = (char *) rbuf + rdextent * rcount;
    }

    if (OMPI_SUCCESS != rc) {
-        /* should probably try to clean up here */
+        ompi_coll_base_free_reqs(reqs, neighbor);
        return rc;
    }

-    for (neighbor = 0 ; neighbor < outdegree ; ++neighbor, ++reqs) {
+    for (neighbor = 0 ; neighbor < outdegree ; ++neighbor) {
        /* remove cast from const when the pml layer is updated to take a const for the send buffer */
        rc = MCA_PML_CALL(isend((void *) sbuf, scount, sdtype, outedges[neighbor],
                                MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD,
-                                comm, reqs));
+                                comm, preqs++));
        if (OMPI_SUCCESS != rc) break;
        sbuf = (char *) sbuf + sdextent * scount;
    }

    if (OMPI_SUCCESS != rc) {
-        /* should probably try to clean up here */
+        ompi_coll_base_free_reqs(reqs, indegree + neighbor);
        return rc;
    }

-    return ompi_request_wait_all (indegree + outdegree, basic_module->mccb_reqs, MPI_STATUSES_IGNORE);
+    rc = ompi_request_wait_all (indegree + outdegree, reqs, MPI_STATUSES_IGNORE);
+    if (OMPI_SUCCESS != rc) {
+        ompi_coll_base_free_reqs(reqs, indegree + outdegree);
+    }
+    return rc;
 }

 int mca_coll_basic_neighbor_alltoall(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf,
--- a/ompi/mca/coll/basic/coll_basic_neighbor_alltoallv.c
+++ b/ompi/mca/coll/basic/coll_basic_neighbor_alltoallv.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2006 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -41,18 +41,18 @@ mca_coll_basic_neighbor_alltoallv_cart(const void *sbuf, const int scounts[], co
                                       const int rdisps[], struct ompi_datatype_t *rdtype,
                                       struct ompi_communicator_t *comm, mca_coll_base_module_t *module)
 {
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module;
    const mca_topo_base_comm_cart_2_2_0_t *cart = comm->c_topo->mtc.cart;
    const int rank = ompi_comm_rank (comm);
    int rc = MPI_SUCCESS, dim, i, nreqs;
    ptrdiff_t lb, rdextent, sdextent;
-    ompi_request_t **reqs;
+    ompi_request_t **reqs, **preqs;

    ompi_datatype_get_extent(rdtype, &lb, &rdextent);
    ompi_datatype_get_extent(sdtype, &lb, &sdextent);
+    reqs = preqs = coll_base_comm_get_reqs( module->base_data, 4 * cart->ndims );

    /* post receives first */
-    for (dim = 0, nreqs = 0, i = 0, reqs = basic_module->mccb_reqs ; dim < cart->ndims ; ++dim, i += 2) {
+    for (dim = 0, nreqs = 0, i = 0; dim < cart->ndims ; ++dim, i += 2) {
        int srank = MPI_PROC_NULL, drank = MPI_PROC_NULL;

        if (cart->dims[dim] > 1) {
@ -62,22 +62,22 @@ mca_coll_basic_neighbor_alltoallv_cart(const void *sbuf, const int scounts[], co
        }

        if (MPI_PROC_NULL != srank) {
-            rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[i] * rdextent, rcounts[i], rdtype, srank,
-                                    MCA_COLL_BASE_TAG_ALLTOALL, comm, reqs++));
-            if (OMPI_SUCCESS != rc) break;
            nreqs++;
+            rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[i] * rdextent, rcounts[i], rdtype, srank,
+                                    MCA_COLL_BASE_TAG_ALLTOALL, comm, preqs++));
+            if (OMPI_SUCCESS != rc) break;
        }

        if (MPI_PROC_NULL != drank) {
-            rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[i+1] * rdextent, rcounts[i+1], rdtype, drank,
-                                    MCA_COLL_BASE_TAG_ALLTOALL, comm, reqs++));
-            if (OMPI_SUCCESS != rc) break;
            nreqs++;
+            rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[i+1] * rdextent, rcounts[i+1], rdtype, drank,
+                                    MCA_COLL_BASE_TAG_ALLTOALL, comm, preqs++));
+            if (OMPI_SUCCESS != rc) break;
        }
    }

    if (OMPI_SUCCESS != rc) {
-        /* should probably try to clean up here */
+        ompi_coll_base_free_reqs( reqs, nreqs );
        return rc;
    }

@ -91,27 +91,31 @@ mca_coll_basic_neighbor_alltoallv_cart(const void *sbuf, const int scounts[], co
        }

        if (MPI_PROC_NULL != srank) {
+            nreqs++;
            /* remove cast from const when the pml layer is updated to take a const for the send buffer */
            rc = MCA_PML_CALL(isend((char *) sbuf + sdisps[i] * sdextent, scounts[i], sdtype, srank,
-                                    MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, reqs++));
+                                    MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, preqs++));
            if (OMPI_SUCCESS != rc) break;
-            nreqs++;
        }

        if (MPI_PROC_NULL != drank) {
-            rc = MCA_PML_CALL(isend((char *) sbuf + sdisps[i+1] * sdextent, scounts[i+1], sdtype, drank,
-                                    MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, reqs++));
-            if (OMPI_SUCCESS != rc) break;
            nreqs++;
+            rc = MCA_PML_CALL(isend((char *) sbuf + sdisps[i+1] * sdextent, scounts[i+1], sdtype, drank,
+                                    MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, preqs++));
+            if (OMPI_SUCCESS != rc) break;
        }
    }

    if (OMPI_SUCCESS != rc) {
-        /* should probably try to clean up here */
+        ompi_coll_base_free_reqs( reqs, nreqs );
        return rc;
    }

-    return ompi_request_wait_all (nreqs, basic_module->mccb_reqs, MPI_STATUSES_IGNORE);
+    rc = ompi_request_wait_all (nreqs, reqs, MPI_STATUSES_IGNORE);
+    if (OMPI_SUCCESS != rc) {
+        ompi_coll_base_free_reqs( reqs, nreqs );
+    }
+    return rc;
 }

 static int
@ -120,12 +124,11 @@ mca_coll_basic_neighbor_alltoallv_graph(const void *sbuf, const int scounts[], c
                                        const int rdisps[], struct ompi_datatype_t *rdtype,
                                        struct ompi_communicator_t *comm, mca_coll_base_module_t *module)
 {
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module;
    const mca_topo_base_comm_graph_2_2_0_t *graph = comm->c_topo->mtc.graph;
    int rc = MPI_SUCCESS, neighbor, degree;
    const int rank = ompi_comm_rank (comm);
    ptrdiff_t lb, rdextent, sdextent;
-    ompi_request_t **reqs;
+    ompi_request_t **reqs, **preqs;
    const int *edges;

    mca_topo_base_graph_neighbors_count (comm, rank, &degree);
@ -137,16 +140,17 @@ mca_coll_basic_neighbor_alltoallv_graph(const void *sbuf, const int scounts[], c

    ompi_datatype_get_extent(rdtype, &lb, &rdextent);
    ompi_datatype_get_extent(sdtype, &lb, &sdextent);
+    reqs = preqs = coll_base_comm_get_reqs( module->base_data, 2 * degree );

    /* post all receives first */
-    for (neighbor = 0, reqs = basic_module->mccb_reqs ; neighbor < degree ; ++neighbor) {
+    for (neighbor = 0; neighbor < degree ; ++neighbor) {
        rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[neighbor] * rdextent, rcounts[neighbor], rdtype,
-                                edges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, comm, reqs++));
+                                edges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, comm, preqs++));
        if (OMPI_SUCCESS != rc) break;
    }

    if (OMPI_SUCCESS != rc) {
-        /* should probably try to clean up here */
+        ompi_coll_base_free_reqs( reqs, neighbor );
        return rc;
    }

@ -154,16 +158,20 @@ mca_coll_basic_neighbor_alltoallv_graph(const void *sbuf, const int scounts[], c
        /* remove cast from const when the pml layer is updated to take a const for the send buffer */
        rc = MCA_PML_CALL(isend((char *) sbuf + sdisps[neighbor] * sdextent, scounts[neighbor], sdtype,
                                edges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD,
-                                comm, reqs++));
+                                comm, preqs++));
        if (OMPI_SUCCESS != rc) break;
    }

    if (OMPI_SUCCESS != rc) {
-        /* should probably try to clean up here */
+        ompi_coll_base_free_reqs( reqs, degree + neighbor );
        return rc;
    }

-    return ompi_request_wait_all (degree * 2, basic_module->mccb_reqs, MPI_STATUSES_IGNORE);
+    rc = ompi_request_wait_all (degree * 2, reqs, MPI_STATUSES_IGNORE);
+    if (OMPI_SUCCESS != rc) {
+        ompi_coll_base_free_reqs( reqs, degree * 2);
+    }
+    return rc;
 }

 static int
@ -172,13 +180,12 @@ mca_coll_basic_neighbor_alltoallv_dist_graph(const void *sbuf, const int scounts
                                             const int rdisps[], struct ompi_datatype_t *rdtype,
                                             struct ompi_communicator_t *comm, mca_coll_base_module_t *module)
 {
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module;
    const mca_topo_base_comm_dist_graph_2_2_0_t *dist_graph = comm->c_topo->mtc.dist_graph;
    ptrdiff_t lb, rdextent, sdextent;
    int rc = MPI_SUCCESS, neighbor;
    const int *inedges, *outedges;
    int indegree, outdegree;
-    ompi_request_t **reqs;
+    ompi_request_t **reqs, **preqs;

    indegree = dist_graph->indegree;
    outdegree = dist_graph->outdegree;
@ -188,16 +195,17 @@ mca_coll_basic_neighbor_alltoallv_dist_graph(const void *sbuf, const int scounts

    ompi_datatype_get_extent(rdtype, &lb, &rdextent);
    ompi_datatype_get_extent(sdtype, &lb, &sdextent);
+    reqs = preqs = coll_base_comm_get_reqs( module->base_data, indegree + outdegree);

    /* post all receives first */
-    for (neighbor = 0, reqs = basic_module->mccb_reqs ; neighbor < indegree ; ++neighbor) {
+    for (neighbor = 0; neighbor < indegree ; ++neighbor) {
        rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[neighbor] * rdextent, rcounts[neighbor], rdtype,
-                                inedges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, comm, reqs++));
+                                inedges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, comm, preqs++));
        if (OMPI_SUCCESS != rc) break;
    }

    if (OMPI_SUCCESS != rc) {
-        /* should probably try to clean up here */
+        ompi_coll_base_free_reqs( reqs, neighbor );
        return rc;
    }

@ -205,16 +213,20 @@ mca_coll_basic_neighbor_alltoallv_dist_graph(const void *sbuf, const int scounts
        /* remove cast from const when the pml layer is updated to take a const for the send buffer */
        rc = MCA_PML_CALL(isend((char *) sbuf + sdisps[neighbor] * sdextent, scounts[neighbor], sdtype,
                                outedges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD,
-                                comm, reqs++));
+                                comm, preqs++));
        if (OMPI_SUCCESS != rc) break;
    }

    if (OMPI_SUCCESS != rc) {
-        /* should probably try to clean up here */
+        ompi_coll_base_free_reqs( reqs, indegree + neighbor );
        return rc;
    }

-    return ompi_request_wait_all (indegree + outdegree, basic_module->mccb_reqs, MPI_STATUSES_IGNORE);
+    rc = ompi_request_wait_all (indegree + outdegree, reqs, MPI_STATUSES_IGNORE);
+    if (OMPI_SUCCESS != rc) {
+        ompi_coll_base_free_reqs( reqs, indegree + outdegree );
+    }
+    return rc;
 }

 int mca_coll_basic_neighbor_alltoallv(const void *sbuf, const int scounts[], const int sdisps[],
--- a/ompi/mca/coll/basic/coll_basic_neighbor_alltoallw.c
+++ b/ompi/mca/coll/basic/coll_basic_neighbor_alltoallw.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2006 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -41,15 +41,15 @@ mca_coll_basic_neighbor_alltoallw_cart(const void *sbuf, const int scounts[], co
                                       const MPI_Aint rdisps[], struct ompi_datatype_t * const *rdtypes,
                                       struct ompi_communicator_t *comm, mca_coll_base_module_t *module)
 {
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module;
    const mca_topo_base_comm_cart_2_2_0_t *cart = comm->c_topo->mtc.cart;
    const int rank = ompi_comm_rank (comm);
    int rc = MPI_SUCCESS, dim, i, nreqs;
-    ompi_request_t **reqs;
+    ompi_request_t **reqs, **preqs;

+    reqs = preqs = coll_base_comm_get_reqs( module->base_data, 4 * cart->ndims );

    /* post receives first */
-    for (dim = 0, i = 0, nreqs = 0, reqs = basic_module->mccb_reqs ; dim < cart->ndims ; ++dim, i += 2) {
+    for (dim = 0, i = 0, nreqs = 0; dim < cart->ndims ; ++dim, i += 2) {
        int srank = MPI_PROC_NULL, drank = MPI_PROC_NULL;

        if (cart->dims[dim] > 1) {
@ -59,22 +59,22 @@ mca_coll_basic_neighbor_alltoallw_cart(const void *sbuf, const int scounts[], co
        }

        if (MPI_PROC_NULL != srank) {
-            rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[i], rcounts[i], rdtypes[i], srank,
-                                    MCA_COLL_BASE_TAG_ALLTOALL, comm, reqs++));
-            if (OMPI_SUCCESS != rc) break;
            nreqs++;
+            rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[i], rcounts[i], rdtypes[i], srank,
+                                    MCA_COLL_BASE_TAG_ALLTOALL, comm, preqs++));
+            if (OMPI_SUCCESS != rc) break;
        }

        if (MPI_PROC_NULL != drank) {
-            rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[i+1], rcounts[i+1], rdtypes[i+1], drank,
-                                    MCA_COLL_BASE_TAG_ALLTOALL, comm, reqs++));
-            if (OMPI_SUCCESS != rc) break;
            nreqs++;
+            rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[i+1], rcounts[i+1], rdtypes[i+1], drank,
+                                    MCA_COLL_BASE_TAG_ALLTOALL, comm, preqs++));
+            if (OMPI_SUCCESS != rc) break;
        }
    }

    if (OMPI_SUCCESS != rc) {
-        /* should probably try to clean up here */
+        ompi_coll_base_free_reqs( reqs, nreqs );
        return rc;
    }

@ -88,27 +88,31 @@ mca_coll_basic_neighbor_alltoallw_cart(const void *sbuf, const int scounts[], co
        }

        if (MPI_PROC_NULL != srank) {
+            nreqs++;
            /* remove cast from const when the pml layer is updated to take a const for the send buffer */
            rc = MCA_PML_CALL(isend((char *) sbuf + sdisps[i], scounts[i], sdtypes[i], srank,
-                                    MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, reqs++));
+                                    MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, preqs++));
            if (OMPI_SUCCESS != rc) break;
-            nreqs++;
        }

        if (MPI_PROC_NULL != drank) {
-            rc = MCA_PML_CALL(isend((char *) sbuf + sdisps[i+1], scounts[i+1], sdtypes[i+1], drank,
-                                    MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, reqs++));
-            if (OMPI_SUCCESS != rc) break;
            nreqs++;
+            rc = MCA_PML_CALL(isend((char *) sbuf + sdisps[i+1], scounts[i+1], sdtypes[i+1], drank,
+                                    MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, comm, preqs++));
+            if (OMPI_SUCCESS != rc) break;
        }
    }

    if (OMPI_SUCCESS != rc) {
-        /* should probably try to clean up here */
+        ompi_coll_base_free_reqs( reqs, nreqs );
        return rc;
    }

-    return ompi_request_wait_all (nreqs, basic_module->mccb_reqs, MPI_STATUSES_IGNORE);
+    rc = ompi_request_wait_all (nreqs, reqs, MPI_STATUSES_IGNORE);
+    if (OMPI_SUCCESS != rc) {
+        ompi_coll_base_free_reqs( reqs, nreqs );
+    }
+    return rc;
 }

 static int
@ -117,14 +121,14 @@ mca_coll_basic_neighbor_alltoallw_graph(const void *sbuf, const int scounts[], c
                                        const MPI_Aint rdisps[], struct ompi_datatype_t * const rdtypes[],
                                        struct ompi_communicator_t *comm, mca_coll_base_module_t *module)
 {
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module;
    const mca_topo_base_comm_graph_2_2_0_t *graph = comm->c_topo->mtc.graph;
    int rc = MPI_SUCCESS, neighbor, degree;
    const int rank = ompi_comm_rank (comm);
-    ompi_request_t **reqs;
+    ompi_request_t **reqs, **preqs;
    const int *edges;

    mca_topo_base_graph_neighbors_count (comm, rank, &degree);
+    reqs = preqs = coll_base_comm_get_reqs( module->base_data, 2 * degree );

    edges = graph->edges;
    if (rank > 0) {
@ -132,14 +136,14 @@ mca_coll_basic_neighbor_alltoallw_graph(const void *sbuf, const int scounts[], c
    }

    /* post all receives first */
-    for (neighbor = 0, reqs = basic_module->mccb_reqs ; neighbor < degree ; ++neighbor) {
+    for (neighbor = 0; neighbor < degree ; ++neighbor) {
        rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[neighbor], rcounts[neighbor], rdtypes[neighbor],
-                                edges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, comm, reqs++));
+                                edges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, comm, preqs++));
        if (OMPI_SUCCESS != rc) break;
    }

    if (OMPI_SUCCESS != rc) {
-        /* should probably try to clean up here */
+        ompi_coll_base_free_reqs( reqs, neighbor );
        return rc;
    }

@ -147,16 +151,20 @@ mca_coll_basic_neighbor_alltoallw_graph(const void *sbuf, const int scounts[], c
        /* remove cast from const when the pml layer is updated to take a const for the send buffer */
        rc = MCA_PML_CALL(isend((char *) sbuf + sdisps[neighbor], scounts[neighbor], sdtypes[neighbor],
                                edges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD,
-                                comm, reqs++));
+                                comm, preqs++));
        if (OMPI_SUCCESS != rc) break;
    }

    if (OMPI_SUCCESS != rc) {
-        /* should probably try to clean up here */
+        ompi_coll_base_free_reqs( reqs, neighbor + degree );
        return rc;
    }

-    return ompi_request_wait_all (degree * 2, basic_module->mccb_reqs, MPI_STATUSES_IGNORE);
+    rc = ompi_request_wait_all (degree * 2, reqs, MPI_STATUSES_IGNORE);
+    if (OMPI_SUCCESS != rc) {
+        ompi_coll_base_free_reqs( reqs, degree * 2 );
+    }
+    return rc;
 }

 static int
@ -165,12 +173,11 @@ mca_coll_basic_neighbor_alltoallw_dist_graph(const void *sbuf, const int scounts
                                             const MPI_Aint rdisps[], struct ompi_datatype_t * const *rdtypes,
                                             struct ompi_communicator_t *comm, mca_coll_base_module_t *module)
 {
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t *) module;
    const mca_topo_base_comm_dist_graph_2_2_0_t *dist_graph = comm->c_topo->mtc.dist_graph;
    int rc = MPI_SUCCESS, neighbor;
    const int *inedges, *outedges;
    int indegree, outdegree;
-    ompi_request_t **reqs;
+    ompi_request_t **reqs, **preqs;

    indegree = dist_graph->indegree;
    outdegree = dist_graph->outdegree;
@ -178,15 +185,16 @@ mca_coll_basic_neighbor_alltoallw_dist_graph(const void *sbuf, const int scounts
    inedges = dist_graph->in;
    outedges = dist_graph->out;

+    reqs = preqs = coll_base_comm_get_reqs( module->base_data, indegree + outdegree );
    /* post all receives first */
-    for (neighbor = 0, reqs = basic_module->mccb_reqs ; neighbor < indegree ; ++neighbor) {
+    for (neighbor = 0; neighbor < indegree ; ++neighbor) {
        rc = MCA_PML_CALL(irecv((char *) rbuf + rdisps[neighbor], rcounts[neighbor], rdtypes[neighbor],
-                                inedges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, comm, reqs++));
+                                inedges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, comm, preqs++));
        if (OMPI_SUCCESS != rc) break;
    }

    if (OMPI_SUCCESS != rc) {
-        /* should probably try to clean up here */
+        ompi_coll_base_free_reqs( reqs, neighbor );
        return rc;
    }

@ -194,16 +202,20 @@ mca_coll_basic_neighbor_alltoallw_dist_graph(const void *sbuf, const int scounts
        /* remove cast from const when the pml layer is updated to take a const for the send buffer */
        rc = MCA_PML_CALL(isend((char *) sbuf + sdisps[neighbor], scounts[neighbor], sdtypes[neighbor],
                                outedges[neighbor], MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD,
-                                comm, reqs++));
+                                comm, preqs++));
        if (OMPI_SUCCESS != rc) break;
    }

    if (OMPI_SUCCESS != rc) {
-        /* should probably try to clean up here */
+        ompi_coll_base_free_reqs( reqs, indegree + neighbor );
        return rc;
    }

-    return ompi_request_wait_all (indegree + outdegree, basic_module->mccb_reqs, MPI_STATUSES_IGNORE);
+    rc = ompi_request_wait_all (indegree + outdegree, reqs, MPI_STATUSES_IGNORE);
+    if (OMPI_SUCCESS != rc) {
+        ompi_coll_base_free_reqs( reqs, indegree + outdegree );
+    }
+    return rc;
 }

 int mca_coll_basic_neighbor_alltoallw(const void *sbuf, const int scounts[], const MPI_Aint sdisps[],
--- a/ompi/mca/coll/basic/coll_basic_scatter.c
+++ b/ompi/mca/coll/basic/coll_basic_scatter.c
@ -48,8 +48,7 @@ mca_coll_basic_scatter_inter(const void *sbuf, int scount,
    int i, size, err;
    char *ptmp;
    ptrdiff_t lb, incr;
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
-    ompi_request_t **reqs = basic_module->mccb_reqs;
+    ompi_request_t **reqs;

    /* Initialize */
    size = ompi_comm_remote_size(comm);
@ -69,6 +68,8 @@ mca_coll_basic_scatter_inter(const void *sbuf, int scount,
            return OMPI_ERROR;
        }

+        reqs = coll_base_comm_get_reqs(module->base_data, size);
+
        incr *= scount;
        for (i = 0, ptmp = (char *) sbuf; i < size; ++i, ptmp += incr) {
            err = MCA_PML_CALL(isend(ptmp, scount, sdtype, i,
@ -76,13 +77,15 @@ mca_coll_basic_scatter_inter(const void *sbuf, int scount,
                                     MCA_PML_BASE_SEND_STANDARD, comm,
                                     reqs++));
            if (OMPI_SUCCESS != err) {
+                ompi_coll_base_free_reqs(reqs, i);
                return err;
            }
        }

-        err =
-            ompi_request_wait_all(size, basic_module->mccb_reqs,
-                                  MPI_STATUSES_IGNORE);
+        err = ompi_request_wait_all(size, reqs, MPI_STATUSES_IGNORE);
+        if (OMPI_SUCCESS != err) {
+            ompi_coll_base_free_reqs(reqs, size);
+        }
    }

    return err;
--- a/ompi/mca/coll/basic/coll_basic_scatterv.c
+++ b/ompi/mca/coll/basic/coll_basic_scatterv.c
@ -2,7 +2,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2005 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -121,8 +121,7 @@ mca_coll_basic_scatterv_inter(const void *sbuf, const int *scounts,
    int i, size, err;
    char *ptmp;
    ptrdiff_t lb, extent;
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
-    ompi_request_t **reqs = basic_module->mccb_reqs;
+    ompi_request_t **reqs;

    /* Initialize */
    size = ompi_comm_remote_size(comm);
@ -145,6 +144,7 @@ mca_coll_basic_scatterv_inter(const void *sbuf, const int *scounts,
            return OMPI_ERROR;
        }

+        reqs = coll_base_comm_get_reqs(module->base_data, size);
        for (i = 0; i < size; ++i) {
            ptmp = ((char *) sbuf) + (extent * disps[i]);
            err = MCA_PML_CALL(isend(ptmp, scounts[i], sdtype, i,
@ -152,11 +152,15 @@ mca_coll_basic_scatterv_inter(const void *sbuf, const int *scounts,
                                     MCA_PML_BASE_SEND_STANDARD, comm,
                                     &(reqs[i])));
            if (OMPI_SUCCESS != err) {
+                ompi_coll_base_free_reqs(reqs, i);
                return err;
            }
        }

        err = ompi_request_wait_all(size, reqs, MPI_STATUSES_IGNORE);
+        if (OMPI_SUCCESS != err) {
+            ompi_coll_base_free_reqs(reqs, size);
+        }
    }

    /* All done */
--- a/ompi/mca/coll/self/coll_self.h
+++ b/ompi/mca/coll/self/coll_self.h
@ -145,9 +145,6 @@ int mca_coll_self_ft_event(int state);

 struct mca_coll_self_module_t {
    mca_coll_base_module_t super;
-
-    ompi_request_t **mccb_reqs;
-    int mccb_num_reqs;
 };
 typedef struct mca_coll_self_module_t mca_coll_self_module_t;
 OBJ_CLASS_DECLARATION(mca_coll_self_module_t);
--- a/ompi/mca/coll/tuned/coll_tuned_module.c
+++ b/ompi/mca/coll/tuned/coll_tuned_module.c
@ -200,7 +200,7 @@ tuned_module_enable( mca_coll_base_module_t *module,
     * The default is set very high
     */

-    /* if we within the memory/size limit, allow preallocated data */
+    /* prepare the placeholder for the array of request* */
    data = OBJ_NEW(mca_coll_base_comm_t);
    if (NULL == data) {
        return OMPI_ERROR;
--- a/ompi/mca/topo/base/topo_base_cart_create.c
+++ b/ompi/mca/topo/base/topo_base_cart_create.c
@ -160,12 +160,6 @@ int mca_topo_base_cart_create(mca_topo_base_module_t *topo,
        return MPI_ERR_INTERN;
    }

-    assert(NULL == new_comm->c_topo);
-    assert(!(new_comm->c_flags & OMPI_COMM_CART));
-    new_comm->c_topo           = topo;
-    new_comm->c_topo->mtc.cart = cart;
-    new_comm->c_topo->reorder  = reorder;
-    new_comm->c_flags         |= OMPI_COMM_CART;
    ret = ompi_comm_enable(old_comm, new_comm,
                           new_rank, num_procs, topo_procs);
    if (OMPI_SUCCESS != ret) {
@ -180,6 +174,10 @@ int mca_topo_base_cart_create(mca_topo_base_module_t *topo,
        return ret;
    }

+    new_comm->c_topo           = topo;
+    new_comm->c_topo->mtc.cart = cart;
+    new_comm->c_topo->reorder  = reorder;
+    new_comm->c_flags         |= OMPI_COMM_CART;
    *comm_topo = new_comm;

    if( MPI_UNDEFINED == new_rank ) {
--- a/ompi/mca/topo/base/topo_base_dist_graph_create.c
+++ b/ompi/mca/topo/base/topo_base_dist_graph_create.c
@ -287,79 +287,29 @@ int mca_topo_base_dist_graph_create(mca_topo_base_module_t* module,
                                    ompi_communicator_t **newcomm)
 {
    int err;
-    ompi_proc_t **topo_procs = NULL;
-    int num_procs, ret, rank, i;
-    ompi_communicator_t *new_comm;
-    mca_topo_base_comm_dist_graph_2_2_0_t* topo;

-    num_procs = ompi_comm_size(comm_old);
-    topo_procs = (ompi_proc_t**)malloc(num_procs * sizeof(ompi_proc_t *));
-    if (NULL == topo_procs) {
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-    new_comm = ompi_comm_allocate(num_procs, 0);
-    if (NULL == new_comm) {
-        free(topo_procs);
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-    err = mca_topo_base_dist_graph_distribute(module,
-                                              comm_old,
-                                              n, nodes,
-                                              degrees, targets,
-                                              weights,
-                                              &topo);
-    if( OMPI_SUCCESS != err ) {
-        free(topo_procs);
-        ompi_comm_free(&new_comm);
+    if( OMPI_SUCCESS != (err = ompi_comm_create(comm_old,
+                                                comm_old->c_local_group,
+                                                newcomm)) ) {
+        OBJ_RELEASE(module);
        return err;
    }

-    /* we cannot simply call ompi_comm_create because c_topo
-       must be set before invoking ompi_comm_enable */
-    rank = ompi_comm_rank(comm_old);
-    if(OMPI_GROUP_IS_DENSE(comm_old->c_local_group)) {
-        memcpy(topo_procs,
-               comm_old->c_local_group->grp_proc_pointers,
-               num_procs * sizeof(ompi_proc_t *));
-    } else {
-        for(i = 0 ; i < num_procs; i++) {
-            topo_procs[i] = ompi_group_peer_lookup(comm_old->c_local_group,i);
-        }
-    }
-    assert(NULL == new_comm->c_topo);
-    new_comm->c_topo             = module;
-    new_comm->c_topo->reorder    = reorder;
-    new_comm->c_flags           |= OMPI_COMM_DIST_GRAPH;
-    new_comm->c_topo->mtc.dist_graph = topo;
+    assert(NULL == (*newcomm)->c_topo);
+    (*newcomm)->c_topo             = module;
+    (*newcomm)->c_topo->reorder    = reorder;
+    (*newcomm)->c_flags           |= OMPI_COMM_DIST_GRAPH;

-    ret = ompi_comm_enable(comm_old, new_comm,
-                           rank, num_procs, topo_procs);
-    if (OMPI_SUCCESS != ret) {
-        if ( NULL != topo->in ) {
-            free(topo->in);
-        }
-        if ( NULL != topo->out ) {
-            free(topo->out);
-        }
-        if ( NULL != topo->inw ) {
-            free(topo->inw);
-        }
-        if ( NULL != topo->outw ) {
-            free(topo->outw);
-        }
-        if (MPI_COMM_NULL != new_comm) {
-            new_comm->c_topo->mtc.dist_graph = NULL;
-            new_comm->c_topo             = NULL;
-            new_comm->c_flags           &= ~OMPI_COMM_DIST_GRAPH;
-            ompi_comm_free (&new_comm);
-        }
-        free(topo);
-        free(topo_procs);
-        return ret;
+    err = mca_topo_base_dist_graph_distribute(module,
+                                              *newcomm, 
+                                              n, nodes,
+                                              degrees, targets,
+                                              weights,
+                                              &((*newcomm)->c_topo->mtc.dist_graph));
+    if( OMPI_SUCCESS != err ) {
+        ompi_comm_free(newcomm);
    }
-    *newcomm = new_comm;
-
-    return OMPI_SUCCESS;
+    return err;
 }

 static void mca_topo_base_comm_dist_graph_2_2_0_construct(mca_topo_base_comm_dist_graph_2_2_0_t * dist_graph) {
--- a/ompi/mca/topo/base/topo_base_graph_create.c
+++ b/ompi/mca/topo/base/topo_base_graph_create.c
@ -123,11 +123,6 @@ int mca_topo_base_graph_create(mca_topo_base_module_t *topo,
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

-    new_comm->c_topo            = topo;
-    new_comm->c_topo->mtc.graph = graph;
-    new_comm->c_flags          |= OMPI_COMM_GRAPH;
-    new_comm->c_topo->reorder   = reorder;
-
    ret = ompi_comm_enable(old_comm, new_comm,
                           new_rank, num_procs, topo_procs);
    if (OMPI_SUCCESS != ret) {
@ -140,7 +135,11 @@ int mca_topo_base_graph_create(mca_topo_base_module_t *topo,
        }
        return ret;
    }
-
+    
+    new_comm->c_topo            = topo;
+    new_comm->c_topo->mtc.graph = graph;
+    new_comm->c_flags          |= OMPI_COMM_GRAPH;
+    new_comm->c_topo->reorder   = reorder;
    *comm_topo = new_comm;

    if( MPI_UNDEFINED == new_rank ) {