adding tuned allgatherv implementation using bruck, ring, and neighbor-exchange algorithms.

The implementations passed intel and imb tests up to 40 processes. This commit was SVN r15280.
2007-07-03 23:33:12 +00:00 · 2007-07-03 23:33:12 +00:00 · 3b0a52a104
--- a/ompi/mca/coll/tuned/Makefile.am
+++ b/ompi/mca/coll/tuned/Makefile.am
@ -33,6 +33,7 @@ sources = \
        coll_tuned_allreduce.c \
        coll_tuned_alltoall.c \
        coll_tuned_allgather.c \
+        coll_tuned_allgatherv.c \
        coll_tuned_barrier.c \
        coll_tuned_bcast.c \
        coll_tuned_reduce.c \
--- a/ompi/mca/coll/tuned/coll_tuned.h
+++ b/ompi/mca/coll/tuned/coll_tuned.h
@ -124,6 +124,14 @@ extern int ompi_coll_tuned_forced_max_algorithms[COLLCOUNT];
  /* All GatherV */
  int ompi_coll_tuned_allgatherv_intra_dec_fixed(ALLGATHERV_ARGS);
  int ompi_coll_tuned_allgatherv_intra_dec_dynamic(ALLGATHERV_ARGS);
+  int ompi_coll_tuned_allgatherv_intra_do_forced(ALLGATHERV_ARGS);
+  int ompi_coll_tuned_allgatherv_intra_do_this(ALLGATHERV_ARGS, int algorithm, int faninout, int segsize);
+  int ompi_coll_tuned_allgatherv_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
+  int ompi_coll_tuned_allgatherv_intra_bruck(ALLGATHERV_ARGS);
+  int ompi_coll_tuned_allgatherv_intra_ring(ALLGATHERV_ARGS);
+  int ompi_coll_tuned_allgatherv_intra_neighborexchange(ALLGATHERV_ARGS);
+  int ompi_coll_tuned_allgatherv_intra_basic_default(ALLGATHERV_ARGS);
+  int ompi_coll_tuned_allgatherv_intra_two_procs(ALLGATHERV_ARGS);
  int ompi_coll_tuned_allgatherv_inter_dec_fixed(ALLGATHERV_ARGS);
  int ompi_coll_tuned_allgatherv_inter_dec_dynamic(ALLGATHERV_ARGS);

--- a/ompi/mca/coll/tuned/coll_tuned_allgatherv.c
+++ b/ompi/mca/coll/tuned/coll_tuned_allgatherv.c
@ -0,0 +1,837 @@
+/*
+ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2007 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#include "ompi_config.h"
+
+#include "mpi.h"
+#include "ompi/constants.h"
+#include "ompi/datatype/datatype.h"
+#include "ompi/communicator/communicator.h"
+#include "ompi/mca/coll/coll.h"
+#include "ompi/mca/coll/base/coll_tags.h"
+#include "ompi/mca/pml/pml.h"
+#include "ompi/op/op.h"
+#include "coll_tuned.h"
+#include "coll_tuned_topo.h"
+#include "coll_tuned_util.h"
+
+/*
+ * ompi_coll_tuned_allgatherv_intra_bruck
+ *
+ * Function:     allgather using O(log(N)) steps.
+ * Accepts:      Same arguments as MPI_Allgather
+ * Returns:      MPI_SUCCESS or error code
+ *
+ * Description:  Variation to All-to-all algorithm described by Bruck et al.in
+ *               "Efficient Algorithms for All-to-all Communications
+ *                in Multiport Message-Passing Systems"
+ * Note:         Unlike in case of allgather implementation, we relay on
+ *               indexed datatype to select buffers appropriately.
+ *               The only additional memory requirement is for creation of 
+ *               temporary datatypes.
+ * Example on 7 nodes (memory lay out need not be in-order)
+ *   Initial set up:
+ *    #     0      1      2      3      4      5      6
+ *         [0]    [ ]    [ ]    [ ]    [ ]    [ ]    [ ]
+ *         [ ]    [1]    [ ]    [ ]    [ ]    [ ]    [ ]
+ *         [ ]    [ ]    [2]    [ ]    [ ]    [ ]    [ ]
+ *         [ ]    [ ]    [ ]    [3]    [ ]    [ ]    [ ]
+ *         [ ]    [ ]    [ ]    [ ]    [4]    [ ]    [ ]
+ *         [ ]    [ ]    [ ]    [ ]    [ ]    [5]    [ ]
+ *         [ ]    [ ]    [ ]    [ ]    [ ]    [ ]    [6]
+ *   Step 0: send message to (rank - 2^0), receive message from (rank + 2^0)
+ *    #     0      1      2      3      4      5      6
+ *         [0]    [ ]    [ ]    [ ]    [ ]    [ ]    [0]
+ *         [1]    [1]    [ ]    [ ]    [ ]    [ ]    [ ]
+ *         [ ]    [2]    [2]    [ ]    [ ]    [ ]    [ ]
+ *         [ ]    [ ]    [3]    [3]    [ ]    [ ]    [ ]
+ *         [ ]    [ ]    [ ]    [4]    [4]    [ ]    [ ]
+ *         [ ]    [ ]    [ ]    [ ]    [5]    [5]    [ ]
+ *         [ ]    [ ]    [ ]    [ ]    [ ]    [6]    [6]
+ *   Step 1: send message to (rank - 2^1), receive message from (rank + 2^1).
+ *           message contains all blocks from (rank) .. (rank + 2^2) with 
+ *           wrap around.
+ *    #     0      1      2      3      4      5      6
+ *         [0]    [ ]    [ ]    [ ]    [0]    [0]    [0]
+ *         [1]    [1]    [ ]    [ ]    [ ]    [1]    [1]
+ *         [2]    [2]    [2]    [ ]    [ ]    [ ]    [2]
+ *         [3]    [3]    [3]    [3]    [ ]    [ ]    [ ]
+ *         [ ]    [4]    [4]    [4]    [4]    [ ]    [ ]
+ *         [ ]    [ ]    [5]    [5]    [5]    [5]    [ ]
+ *         [ ]    [ ]    [ ]    [6]    [6]    [6]    [6]
+ *   Step 2: send message to (rank - 2^2), receive message from (rank + 2^2).
+ *           message size is "all remaining blocks" 
+ *    #     0      1      2      3      4      5      6
+ *         [0]    [0]    [0]    [0]    [0]    [0]    [0]
+ *         [1]    [1]    [1]    [1]    [1]    [1]    [1]
+ *         [2]    [2]    [2]    [2]    [2]    [2]    [2]
+ *         [3]    [3]    [3]    [3]    [3]    [3]    [3]
+ *         [4]    [4]    [4]    [4]    [4]    [4]    [4]
+ *         [5]    [5]    [5]    [5]    [5]    [5]    [5]
+ *         [6]    [6]    [6]    [6]    [6]    [6]    [6]
+ */
+int ompi_coll_tuned_allgatherv_intra_bruck(void *sbuf, int scount,
+                                           struct ompi_datatype_t *sdtype,
+                                           void *rbuf, int *rcounts,
+                                           int *rdispls, 
+                                           struct ompi_datatype_t *rdtype,
+                                           struct ompi_communicator_t *comm)
+{
+   int line = -1, err = 0;
+   int rank, size;
+   int sendto, recvfrom, distance, blockcount, i;
+   int *new_rcounts, *new_rdispls, *new_scounts, *new_sdispls;
+   ptrdiff_t slb, rlb, sext, rext;
+   char *tmpsend = NULL, *tmprecv = NULL;
+   struct ompi_datatype_t *new_rdtype, *new_sdtype;
+   ompi_request_t *req;
+
+   size = ompi_comm_size(comm);
+   rank = ompi_comm_rank(comm);
+
+   OPAL_OUTPUT((ompi_coll_tuned_stream,
+                "coll:tuned:allgather_intra_bruck rank %d", rank));
+   
+   err = ompi_ddt_get_extent (sdtype, &slb, &sext);
+   if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+
+   err = ompi_ddt_get_extent (rdtype, &rlb, &rext);
+   if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+
+   /* Initialization step:
+      - if send buffer is not MPI_IN_PLACE, copy send buffer to block rank of 
+        the receive buffer.
+   */
+   tmprecv = (char*) rbuf + rdispls[rank] * rext;
+   if (MPI_IN_PLACE != sbuf) {
+      tmpsend = (char*) sbuf;
+      err = ompi_ddt_sndrcv(tmpsend, scount, sdtype, 
+                            tmprecv, rcounts[rank], rdtype);
+      if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl;  }
+
+   }
+   
+   /* Communication step:
+      At every step i, rank r:
+      - doubles the distance
+      - sends message with blockcount blocks, (rbuf[rank] .. rbuf[rank + 2^i])
+        to rank (r - distance)
+      - receives message of blockcount blocks, 
+        (rbuf[r + distance] ... rbuf[(r+distance) + 2^i]) from 
+        rank (r + distance)
+      - blockcount doubles until the last step when only the remaining data is 
+      exchanged.
+   */
+   blockcount = 1;
+   tmpsend = (char*) rbuf;
+
+   new_rcounts = (int*) calloc(size, sizeof(int));
+   if (NULL == new_rcounts) { err = -1; line = __LINE__; goto err_hndl; }
+   new_rdispls = (int*) calloc(size, sizeof(int));
+   if (NULL == new_rdispls) { err = -1; line = __LINE__; goto err_hndl; }
+   new_scounts = (int*) calloc(size, sizeof(int));
+   if (NULL == new_scounts) { err = -1; line = __LINE__; goto err_hndl; }
+   new_sdispls = (int*) calloc(size, sizeof(int));
+   if (NULL == new_sdispls) { err = -1; line = __LINE__; goto err_hndl; }
+
+   for (distance = 1; distance < size; distance<<=1) {
+
+      recvfrom = (rank + distance) % size;
+      sendto = (rank - distance + size) % size;
+
+      if (distance <= (size >> 1)) {
+         blockcount = distance;
+      } else { 
+         blockcount = size - distance;
+      }
+
+      /* create send and receive datatypes */
+      for (i = 0; i < blockcount; i++) {
+          const int tmp_srank = (rank + i) % size;
+          const int tmp_rrank = (recvfrom + i) % size;
+          new_scounts[i] = rcounts[tmp_srank];
+          new_sdispls[i] = rdispls[tmp_srank];
+          new_rcounts[i] = rcounts[tmp_rrank];
+          new_rdispls[i] = rdispls[tmp_rrank];
+      }
+      err = ompi_ddt_create_indexed(blockcount, new_scounts, new_sdispls, 
+                                    rdtype, &new_sdtype);
+      if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+      err = ompi_ddt_create_indexed(blockcount, new_rcounts, new_rdispls,
+                                    rdtype, &new_rdtype);
+
+      err = ompi_ddt_commit(&new_sdtype);
+      if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+      err = ompi_ddt_commit(&new_rdtype);
+      if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+
+      /* Sendreceive */
+      err = ompi_coll_tuned_sendrecv(rbuf, 1, new_sdtype, sendto,
+                                     MCA_COLL_BASE_TAG_ALLGATHERV,
+                                     rbuf, 1, new_rdtype, recvfrom,
+                                     MCA_COLL_BASE_TAG_ALLGATHERV,
+                                     comm, MPI_STATUS_IGNORE, rank);
+      ompi_ddt_destroy(&new_sdtype);
+      ompi_ddt_destroy(&new_rdtype);
+
+   }
+
+   free(new_rcounts);
+   free(new_rdispls);
+   free(new_scounts);
+   free(new_sdispls);
+
+   return OMPI_SUCCESS;
+
+ err_hndl:
+   OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+                __FILE__, line, err, rank));
+   return err;
+}
+
+
+/*
+ * ompi_coll_tuned_allgatherv_intra_ring
+ *
+ * Function:     allgatherv using O(N) steps.
+ * Accepts:      Same arguments as MPI_Allgatherv
+ * Returns:      MPI_SUCCESS or error code
+ *
+ * Description:  Ring algorithm for all gather.
+ *               At every step i, rank r receives message from rank (r - 1)
+ *               containing data from rank (r - i - 1) and sends message to rank
+ *               (r + 1) containing data from rank (r - i), with wrap arounds.
+ * Memory requirements:
+ *               No additional memory requirements.
+ * 
+ */
+int ompi_coll_tuned_allgatherv_intra_ring(void *sbuf, int scount,
+                                          struct ompi_datatype_t *sdtype,
+                                          void* rbuf, int *rcounts, int *rdisps,
+                                          struct ompi_datatype_t *rdtype,
+                                          struct ompi_communicator_t *comm)
+{
+    int line = -1;
+    int rank, size;
+    int sendto, recvfrom, i, recvdatafrom, senddatafrom;
+    int err = 0;
+    ptrdiff_t slb, rlb, sext, rext;
+    char *tmpsend = NULL, *tmprecv = NULL;
+
+    size = ompi_comm_size(comm);
+    rank = ompi_comm_rank(comm);
+
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "coll:tuned:allgatherv_intra_ring rank %d", rank));
+
+    err = ompi_ddt_get_extent (sdtype, &slb, &sext);
+    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+
+    err = ompi_ddt_get_extent (rdtype, &rlb, &rext);
+    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+
+    /* Initialization step:
+       - if send buffer is not MPI_IN_PLACE, copy send buffer to 
+       the appropriate block of receive buffer
+    */
+    tmprecv = (char*) rbuf + rdisps[rank] * rext;
+    if (MPI_IN_PLACE != sbuf) {
+        tmpsend = (char*) sbuf;
+        err = ompi_ddt_sndrcv(tmpsend, scount, sdtype, 
+                              tmprecv, rcounts[rank], rdtype);
+        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl;  }
+    } 
+   
+    /* Communication step:
+       At every step i: 0 .. (P-1), rank r:
+       - receives message from [(r - 1 + size) % size] containing data from rank
+       [(r - i - 1 + size) % size]
+       - sends message to rank [(r + 1) % size] containing data from rank
+       [(r - i + size) % size]
+       - sends message which starts at begining of rbuf and has size 
+    */
+    sendto = (rank + 1) % size;
+    recvfrom  = (rank - 1 + size) % size;
+
+    for (i = 0; i < size - 1; i++) {
+        recvdatafrom = (rank - i - 1 + size) % size;
+        senddatafrom = (rank - i + size) % size;
+
+        tmprecv = (char*)rbuf + rdisps[recvdatafrom] * rext;
+        tmpsend = (char*)rbuf + rdisps[senddatafrom] * rext;
+
+        /* Sendreceive */
+        err = ompi_coll_tuned_sendrecv(tmpsend, rcounts[senddatafrom], rdtype, 
+                                       sendto, MCA_COLL_BASE_TAG_ALLGATHERV,
+                                       tmprecv, rcounts[recvdatafrom], rdtype, 
+                                       recvfrom, MCA_COLL_BASE_TAG_ALLGATHERV,
+                                       comm, MPI_STATUS_IGNORE, rank);
+        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+
+    }
+
+    return OMPI_SUCCESS;
+
+ err_hndl:
+    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+                 __FILE__, line, err, rank));
+    return err;
+}
+
+/*
+ * ompi_coll_tuned_allgatherv_intra_neighborexchange
+ *
+ * Function:     allgatherv using N/2 steps (O(N))
+ * Accepts:      Same arguments as MPI_Allgatherv
+ * Returns:      MPI_SUCCESS or error code
+ *
+ * Description:  Neighbor Exchange algorithm for allgather adapted for 
+ *               allgatherv.
+ *               Described by Chen et.al. in 
+ *               "Performance Evaluation of Allgather Algorithms on 
+ *                Terascale Linux Cluster with Fast Ethernet",
+ *               Proceedings of the Eighth International Conference on 
+ *               High-Performance Computing inn Asia-Pacific Region
+ *               (HPCASIA'05), 2005
+ * 
+ *               Rank r exchanges message with one of its neighbors and
+ *               forwards the data further in the next step.
+ *
+ *               No additional memory requirements.
+ * 
+ * Limitations:  Algorithm works only on even number of processes.
+ *               For odd number of processes we switch to ring algorithm.
+ * 
+ * Example on 6 nodes:
+ *  Initial state
+ *    #     0      1      2      3      4      5
+ *         [0]    [ ]    [ ]    [ ]    [ ]    [ ]
+ *         [ ]    [1]    [ ]    [ ]    [ ]    [ ]
+ *         [ ]    [ ]    [2]    [ ]    [ ]    [ ]
+ *         [ ]    [ ]    [ ]    [3]    [ ]    [ ]
+ *         [ ]    [ ]    [ ]    [ ]    [4]    [ ]
+ *         [ ]    [ ]    [ ]    [ ]    [ ]    [5]
+ *   Step 0:
+ *    #     0      1      2      3      4      5
+ *         [0]    [0]    [ ]    [ ]    [ ]    [ ]
+ *         [1]    [1]    [ ]    [ ]    [ ]    [ ]
+ *         [ ]    [ ]    [2]    [2]    [ ]    [ ]
+ *         [ ]    [ ]    [3]    [3]    [ ]    [ ]
+ *         [ ]    [ ]    [ ]    [ ]    [4]    [4]
+ *         [ ]    [ ]    [ ]    [ ]    [5]    [5]
+ *   Step 1:
+ *    #     0      1      2      3      4      5
+ *         [0]    [0]    [0]    [ ]    [ ]    [0]
+ *         [1]    [1]    [1]    [ ]    [ ]    [1]
+ *         [ ]    [2]    [2]    [2]    [2]    [ ]
+ *         [ ]    [3]    [3]    [3]    [3]    [ ]
+ *         [4]    [ ]    [ ]    [4]    [4]    [4]
+ *         [5]    [ ]    [ ]    [5]    [5]    [5]
+ *   Step 2:
+ *    #     0      1      2      3      4      5
+ *         [0]    [0]    [0]    [0]    [0]    [0]
+ *         [1]    [1]    [1]    [1]    [1]    [1]
+ *         [2]    [2]    [2]    [2]    [2]    [2]
+ *         [3]    [3]    [3]    [3]    [3]    [3]
+ *         [4]    [4]    [4]    [4]    [4]    [4]
+ *         [5]    [5]    [5]    [5]    [5]    [5]
+ */
+int 
+ompi_coll_tuned_allgatherv_intra_neighborexchange(void *sbuf, int scount,
+                                                  struct ompi_datatype_t *sdtype,
+                                                  void* rbuf, int *rcounts, int *rdispls,
+                                                  struct ompi_datatype_t *rdtype,
+                                                  struct ompi_communicator_t *comm)
+{
+    int line = -1;
+    int rank, size;
+    int neighbor[2], offset_at_step[2], recv_data_from[2], send_data_from;
+    int new_scounts[2], new_sdispls[2], new_rcounts[2], new_rdispls[2];
+    int i, even_rank;
+    int err = 0;
+    ptrdiff_t slb, rlb, sext, rext;
+    char *tmpsend = NULL, *tmprecv = NULL;
+    struct ompi_datatype_t  *new_rdtype, *new_sdtype;
+
+    size = ompi_comm_size(comm);
+    rank = ompi_comm_rank(comm);
+
+    if (size % 2) {
+        OPAL_OUTPUT((ompi_coll_tuned_stream,
+                     "coll:tuned:allgatherv_intra_neighborexchange WARNING: odd size %d, switching to ring algorithm", 
+                     size));
+        return ompi_coll_tuned_allgatherv_intra_ring(sbuf, scount, sdtype,
+                                                     rbuf, rcounts, 
+                                                     rdispls, rdtype,
+                                                     comm);
+    }
+
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "coll:tuned:allgatherv_intra_neighborexchange rank %d", rank));
+
+    err = ompi_ddt_get_extent (sdtype, &slb, &sext);
+    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+
+    err = ompi_ddt_get_extent (rdtype, &rlb, &rext);
+    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+
+    /* Initialization step:
+       - if send buffer is not MPI_IN_PLACE, copy send buffer to 
+       the appropriate block of receive buffer
+    */
+    tmprecv = (char*) rbuf + rdispls[rank] * rext;
+    if (MPI_IN_PLACE != sbuf) {
+        tmpsend = (char*) sbuf;
+        err = ompi_ddt_sndrcv(tmpsend, scount, sdtype, 
+                              tmprecv, rcounts[rank], rdtype);
+        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl;  }
+    } 
+
+    /* Determine neighbors, order in which blocks will arrive, etc. */
+    even_rank = !(rank % 2);
+    if (even_rank) {
+        neighbor[0] = (rank + 1) % size;
+        neighbor[1] = (rank - 1 + size) % size;
+        recv_data_from[0] = rank;
+        recv_data_from[1] = rank;
+        offset_at_step[0] = (+2);
+        offset_at_step[1] = (-2);
+    } else {
+        neighbor[0] = (rank - 1 + size) % size;
+        neighbor[1] = (rank + 1) % size;
+        recv_data_from[0] = neighbor[0];
+        recv_data_from[1] = neighbor[0];
+        offset_at_step[0] = (-2);
+        offset_at_step[1] = (+2);
+    }
+
+    /* Communication loop:
+       - First step is special: exchange a single block with neighbor[0].
+       - Rest of the steps: 
+       update recv_data_from according to offset, and 
+       exchange two blocks with appropriate neighbor.
+       the send location becomes previous receve location.
+       Note, we need to create indexed datatype to send and receive these
+       blocks properly.
+    */
+    tmprecv = (char*)rbuf + rdispls[neighbor[0]] * rext;
+    tmpsend = (char*)rbuf + rdispls[rank] * rext;
+    err = ompi_coll_tuned_sendrecv(tmpsend, rcounts[rank], rdtype, 
+                                   neighbor[0], MCA_COLL_BASE_TAG_ALLGATHERV,
+                                   tmprecv, rcounts[neighbor[0]], rdtype, 
+                                   neighbor[0], MCA_COLL_BASE_TAG_ALLGATHERV,
+                                   comm, MPI_STATUS_IGNORE, rank);
+    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+   
+    /* Determine initial sending counts and displacements*/
+    if (even_rank) {
+        send_data_from = rank;
+    } else {
+        send_data_from = recv_data_from[0];
+    }
+
+    for (i = 1; i < (size / 2); i++) {
+        const int i_parity = i % 2;
+        recv_data_from[i_parity] = 
+            (recv_data_from[i_parity] + offset_at_step[i_parity] + size) % size;
+
+        /* Create new indexed types for sending and receiving.
+           We are sending data from ranks (send_data_from) and (send_data_from+1)
+           We are receiving data from ranks (recv_data_from[i_parity]) and
+           (recv_data_from[i_parity]+1).
+        */
+        new_scounts[0] = rcounts[send_data_from];
+        new_scounts[1] = rcounts[(send_data_from + 1)];
+        new_sdispls[0] = rdispls[send_data_from];
+        new_sdispls[1] = rdispls[(send_data_from + 1)];
+        err = ompi_ddt_create_indexed(2, new_scounts, new_sdispls, rdtype, 
+                                      &new_sdtype);
+        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+        err = ompi_ddt_commit(&new_sdtype);
+        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+
+        new_rcounts[0] = rcounts[recv_data_from[i_parity]];
+        new_rcounts[1] = rcounts[(recv_data_from[i_parity] + 1)];
+        new_rdispls[0] = rdispls[recv_data_from[i_parity]];
+        new_rdispls[1] = rdispls[(recv_data_from[i_parity] + 1)];
+        err = ompi_ddt_create_indexed(2, new_rcounts, new_rdispls, rdtype, 
+                                      &new_rdtype);
+        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+        err = ompi_ddt_commit(&new_rdtype);
+        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+      
+        tmprecv = (char*)rbuf;
+        tmpsend = (char*)rbuf;
+      
+        /* Sendreceive */
+        err = ompi_coll_tuned_sendrecv(tmpsend, 1, new_sdtype, neighbor[i_parity],
+                                       MCA_COLL_BASE_TAG_ALLGATHERV,
+                                       tmprecv, 1, new_rdtype, neighbor[i_parity],
+                                       MCA_COLL_BASE_TAG_ALLGATHERV,
+                                       comm, MPI_STATUS_IGNORE, rank);
+        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+
+        send_data_from = recv_data_from[i_parity];
+      
+        ompi_ddt_destroy(&new_sdtype);
+        ompi_ddt_destroy(&new_rdtype);
+    }
+
+    return OMPI_SUCCESS;
+
+ err_hndl:
+    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+                 __FILE__, line, err, rank));
+    return err;
+}
+
+
+int ompi_coll_tuned_allgatherv_intra_two_procs(void *sbuf, int scount,
+                                               struct ompi_datatype_t *sdtype,
+                                               void* rbuf, int *rcounts,
+                                               int *rdispls,
+                                               struct ompi_datatype_t *rdtype,
+                                               struct ompi_communicator_t *comm)
+{
+    int line = -1, err = 0;
+    int rank;
+    int remote;
+    char *tmpsend = NULL, *tmprecv = NULL;
+    ptrdiff_t sext, rext, lb;
+
+    rank = ompi_comm_rank(comm);
+
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "ompi_coll_tuned_allgatherv_intra_two_procs rank %d", rank));
+
+    err = ompi_ddt_get_extent (sdtype, &lb, &sext);
+    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+
+    err = ompi_ddt_get_extent (rdtype, &lb, &rext);
+    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+
+    /* Exchange data:
+       - compute source and destinations
+       - send receive data
+    */
+    remote  = rank ^ 0x1;
+
+    tmpsend = (char*)sbuf;
+    if (MPI_IN_PLACE == sbuf) {
+        tmpsend = (char*)rbuf + rdispls[rank] * rext;
+        scount = rcounts[rank];
+        sdtype = rdtype;
+    }
+    tmprecv = (char*)rbuf + rdispls[remote] * rext;
+
+    err = ompi_coll_tuned_sendrecv(tmpsend, scount, sdtype, remote,
+                                   MCA_COLL_BASE_TAG_ALLGATHERV,
+                                   tmprecv, rcounts[remote], rdtype, remote,
+                                   MCA_COLL_BASE_TAG_ALLGATHERV,
+                                   comm, MPI_STATUS_IGNORE, rank);
+    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+
+    /* Place your data in correct location if necessary */
+    if (MPI_IN_PLACE != sbuf) {
+        err = ompi_ddt_sndrcv((char*)sbuf, scount, sdtype, 
+                              (char*)rbuf + rdispls[rank] * rext, 
+                              rcounts[rank], rdtype); 
+        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl;  }
+    }
+
+    return MPI_SUCCESS;
+
+ err_hndl:
+    OPAL_OUTPUT((ompi_coll_tuned_stream, "%s:%4d\tError occurred %d, rank %2d",
+                 __FILE__, line, err, rank));
+    return err;
+}
+
+
+/*
+ * Linear functions are copied from the BASIC coll module
+ * they do not segment the message and are simple implementations
+ * but for some small number of nodes and/or small data sizes they 
+ * are just as fast as tuned/tree based segmenting operations 
+ * and as such may be selected by the decision functions
+ * These are copied into this module due to the way we select modules
+ * in V1. i.e. in V2 we will handle this differently and so will not
+ * have to duplicate code.
+ * JPG following the examples from other coll_tuned implementations. Dec06.
+ */
+
+/* copied function (with appropriate renaming) starts here */
+
+/*
+ *	allgatherv_intra_basic
+ *
+ *	Function:	- allgatherv using other MPI collectives: 
+ *                        gatherv + bcast (from basic module).
+ *	Accepts:	- same as MPI_Allgatherv()
+ *	Returns:	- MPI_SUCCESS or error code
+ */
+int
+ompi_coll_tuned_allgatherv_intra_basic_default(void *sbuf, int scount,
+                                               struct ompi_datatype_t *sdtype,
+                                               void *rbuf, int *rcounts,
+                                               int *disps,
+                                               struct ompi_datatype_t *rdtype,
+                                               struct ompi_communicator_t *comm)
+{
+    int i, size, rank ;
+    int err;
+    MPI_Aint extent;
+    MPI_Aint lb;
+    char *send_buf = NULL;
+    struct ompi_datatype_t *newtype, *send_type;
+
+    size = ompi_comm_size(comm);
+    rank = ompi_comm_rank(comm);
+    /*
+     * We don't have a root process defined. Arbitrarily assign root
+     * to process with rank 0 (OMPI convention)
+     */
+
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "ompi_coll_tuned_allgatherv_intra_basic_default rank %d", 
+                 rank));
+
+    if (MPI_IN_PLACE == sbuf) {
+        ompi_ddt_get_extent(rdtype, &lb, &extent);
+        send_type = rdtype;
+        send_buf = (char*)rbuf;
+        for (i = 0; i < rank; ++i) {
+            send_buf += (rcounts[i] * extent);
+        }
+    } else {
+        send_buf = (char*)sbuf;
+        send_type = sdtype;
+    }
+
+    err = comm->c_coll.coll_gatherv(send_buf,
+                                    rcounts[rank], send_type,rbuf,
+                                    rcounts, disps, rdtype, 0,
+                                    comm);
+    
+    if (MPI_SUCCESS != err) {
+        return err;
+    }
+    /*
+     * we now have all the data in the root's rbuf. Need to
+     * broadcast the data out to the other processes
+     *
+     * Need to define a datatype that captures the different vectors
+     * from each process. MPI_TYPE_INDEXED with params 
+     *                    size,rcount,displs,rdtype,newtype
+     * should do the trick.
+     * Use underlying ddt functions to create, and commit the
+     * new datatype on each process, then broadcast and destroy the
+     * datatype.
+     */
+
+    err = ompi_ddt_create_indexed(size,rcounts,disps,rdtype,&newtype);
+    if (MPI_SUCCESS != err) {
+        return err;
+    }
+    
+    err = ompi_ddt_commit(&newtype);
+    if(MPI_SUCCESS != err) {
+        return err;
+    }
+
+    comm->c_coll.coll_bcast( rbuf, 1 ,newtype,0,comm);
+
+    ompi_ddt_destroy (&newtype);
+
+    return MPI_SUCCESS;
+}
+
+/* copied function (with appropriate renaming) ends here */
+
+/* The following are used by dynamic and forced rules */
+
+/* publish details of each algorithm and if its forced/fixed/locked in */
+/* as you add methods/algorithms you must update this and the query/map 
+   routines */
+
+/* this routine is called by the component only */
+/* this makes sure that the mca parameters are set to their initial values 
+   and perms */
+/* module does not call this they call the forced_getvalues routine instead */
+
+int 
+ompi_coll_tuned_allgatherv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
+{
+    int rc, max_alg = 5, requested_alg;
+    
+    ompi_coll_tuned_forced_max_algorithms[ALLGATHERV] = max_alg;
+    
+    rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version,
+                                 "allgatherv_algorithm_count",
+                                 "Number of allgather algorithms available",
+                                 false, true, max_alg, NULL);
+    
+    mca_param_indices->algorithm_param_index
+        = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
+                                 "allgatherv_algorithm",
+                                 "Which allgather algorithm is used. Can be locked down to choice of: 0 ignore, 1 default (gatherv + bcast), 2 bruck, 3 ring, 4 neighbor exchange, 5: two proc only.",
+                                 false, false, 0, NULL);
+    mca_base_param_lookup_int(mca_param_indices->algorithm_param_index, 
+                              &(requested_alg));
+    if( requested_alg > max_alg ) {
+        if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) {
+            opal_output( 0, "Allgather algorithm #%d is not available (range [0..%d]). Switching back to ignore(0)\n",
+                         requested_alg, max_alg );
+        }
+        mca_base_param_set_int( mca_param_indices->algorithm_param_index, 0);
+    }
+    
+    mca_param_indices->segsize_param_index
+        = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
+                                 "allgatherv_algorithm_segmentsize",
+                                 "Segment size in bytes used by default for allgather algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
+                                 false, false, 0, NULL);
+    
+    mca_param_indices->tree_fanout_param_index
+        = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
+                                 "allgatherv_algorithm_tree_fanout",
+                                 "Fanout for n-tree used for allgather algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
+                                 false, false, 
+                                 ompi_coll_tuned_init_tree_fanout, /* get system wide default */
+                                 NULL);
+
+    mca_param_indices->chain_fanout_param_index
+        = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
+                                 "allgatherv_algorithm_chain_fanout",
+                                 "Fanout for chains used for allgather algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
+                                 false, false, 
+                                 ompi_coll_tuned_init_chain_fanout, /* get system wide default */
+                                 NULL);
+
+    return (MPI_SUCCESS);
+}
+
+int ompi_coll_tuned_allgatherv_intra_do_forced(void *sbuf, int scount,
+                                               struct ompi_datatype_t *sdtype,
+                                               void *rbuf, int *rcounts, 
+                                               int *rdispls,
+                                               struct ompi_datatype_t *rdtype,
+                                               struct ompi_communicator_t *comm)
+{
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "coll:tuned:allgatherv_intra_do_forced selected algorithm %d",
+                 comm->c_coll_selected_data->user_forced[ALLGATHERV].algorithm));
+   
+    switch (comm->c_coll_selected_data->user_forced[ALLGATHERV].algorithm) {
+    case (0):   
+        return ompi_coll_tuned_allgatherv_intra_dec_fixed (sbuf, scount, sdtype, 
+                                                           rbuf, rcounts, 
+                                                           rdispls, rdtype, 
+                                                           comm);
+    case (1):   
+        return ompi_coll_tuned_allgatherv_intra_basic_default (sbuf, scount, 
+                                                               sdtype,
+                                                               rbuf, rcounts, 
+                                                               rdispls, rdtype,
+                                                               comm);
+    case (2):   
+        return ompi_coll_tuned_allgatherv_intra_bruck (sbuf, scount, sdtype, 
+                                                       rbuf, rcounts, 
+                                                       rdispls, rdtype, 
+                                                       comm);
+    case (3):   
+        return ompi_coll_tuned_allgatherv_intra_ring (sbuf, scount, sdtype, 
+                                                      rbuf, rcounts, 
+                                                      rdispls, rdtype, 
+                                                      comm);
+    case (4):
+        return ompi_coll_tuned_allgatherv_intra_neighborexchange (sbuf, scount, 
+                                                                  sdtype, 
+                                                                  rbuf, rcounts, 
+                                                                  rdispls, 
+                                                                  rdtype,
+                                                                  comm);
+    case (5):
+        return ompi_coll_tuned_allgatherv_intra_two_procs (sbuf, scount, sdtype, 
+                                                           rbuf, rcounts, 
+                                                           rdispls, rdtype, 
+                                                           comm);
+    default:
+        OPAL_OUTPUT((ompi_coll_tuned_stream,
+                     "coll:tuned:allgatherv_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", 
+                     comm->c_coll_selected_data->user_forced[ALLGATHERV].algorithm,
+                     ompi_coll_tuned_forced_max_algorithms[ALLGATHERV]));
+        return (MPI_ERR_ARG);
+    } /* switch */
+
+}
+
+
+int ompi_coll_tuned_allgatherv_intra_do_this(void *sbuf, int scount,
+                                             struct ompi_datatype_t *sdtype,
+                                             void *rbuf, int *rcounts, 
+                                             int *rdispls, 
+                                             struct ompi_datatype_t *rdtype,
+                                             struct ompi_communicator_t *comm,
+                                             int algorithm, int faninout, 
+                                             int segsize)
+{
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "coll:tuned:allgatherv_intra_do_this selected algorithm %d topo faninout %d segsize %d", 
+                 algorithm, faninout, segsize));
+   
+    switch (algorithm) {
+    case (0):   
+        return ompi_coll_tuned_allgatherv_intra_dec_fixed(sbuf, scount, sdtype, 
+                                                          rbuf, rcounts, 
+                                                          rdispls, rdtype, 
+                                                          comm);
+    case (1):   
+        return ompi_coll_tuned_allgatherv_intra_basic_default(sbuf, scount, 
+                                                              sdtype,
+                                                              rbuf, rcounts, 
+                                                              rdispls, rdtype,
+                                                              comm);
+    case (2): 
+        return ompi_coll_tuned_allgatherv_intra_bruck(sbuf, scount, sdtype, 
+                                                      rbuf, rcounts, rdispls, 
+                                                      rdtype, comm);
+    case (3): 
+        return ompi_coll_tuned_allgatherv_intra_ring(sbuf, scount, sdtype, 
+                                                     rbuf, rcounts, rdispls, 
+                                                     rdtype, comm);
+    case (4): 
+        return ompi_coll_tuned_allgatherv_intra_neighborexchange(sbuf, scount, 
+                                                                 sdtype, 
+                                                                 rbuf, rcounts,
+                                                                 rdispls, 
+                                                                 rdtype,
+                                                                 comm);
+    case (5):
+        return ompi_coll_tuned_allgatherv_intra_two_procs (sbuf, scount, sdtype,
+                                                           rbuf, rcounts, 
+                                                           rdispls, rdtype, 
+                                                           comm);
+    default:
+        OPAL_OUTPUT((ompi_coll_tuned_stream,
+                     "coll:tuned:allgatherv_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", 
+                     algorithm, 
+                     ompi_coll_tuned_forced_max_algorithms[ALLGATHERV]));
+        return (MPI_ERR_ARG);
+    } /* switch */
+}
--- a/ompi/mca/coll/tuned/coll_tuned_component.c
+++ b/ompi/mca/coll/tuned/coll_tuned_component.c
@ -185,6 +185,7 @@ static int tuned_open(void)
        ompi_coll_tuned_allreduce_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLREDUCE]);
        ompi_coll_tuned_alltoall_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLTOALL]);
        ompi_coll_tuned_allgather_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLGATHER]);
+        ompi_coll_tuned_allgatherv_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLGATHERV]);
        /*ompi_coll_tuned_alltoallv_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLTOALLV]); */
        ompi_coll_tuned_barrier_intra_check_forced_init(&ompi_coll_tuned_forced_params[BARRIER]);
        ompi_coll_tuned_bcast_intra_check_forced_init(&ompi_coll_tuned_forced_params[BCAST]);
--- a/ompi/mca/coll/tuned/coll_tuned_decision_dynamic.c
+++ b/ompi/mca/coll/tuned/coll_tuned_decision_dynamic.c
@ -360,6 +360,67 @@ int ompi_coll_tuned_allgather_intra_dec_dynamic(void *sbuf, int scount,
                                                     comm);
 }

+/*
+ *    allgatherv_intra_dec 
+ *
+ *    Function:    - seletects allgatherv algorithm to use
+ *    Accepts:    - same arguments as MPI_Allgatherv()
+ *    Returns:    - MPI_SUCCESS or error code (passed from the selected
+ *                        allgatherv function).
+ */
+
+int ompi_coll_tuned_allgatherv_intra_dec_dynamic(void *sbuf, int scount, 
+                                                 struct ompi_datatype_t *sdtype,
+                                                 void* rbuf, int *rcounts, 
+                                                 int *rdispls,
+                                                 struct ompi_datatype_t *rdtype, 
+                                                 struct ompi_communicator_t *comm)
+{
+   
+   OPAL_OUTPUT((ompi_coll_tuned_stream, 
+                "ompi_coll_tuned_allgatherv_intra_dec_dynamic"));
+   
+   if (comm->c_coll_selected_data->com_rules[ALLGATHERV]) {
+      /* We have file based rules:
+         - calculate message size and other necessary information */
+      int comsize, i;
+      int alg, faninout, segsize, ignoreme;
+      size_t dsize, total_size;
+
+      comsize = ompi_comm_size(comm);      
+      ompi_ddt_type_size (sdtype, &dsize);
+      total_size = 0;
+      for (i = 0; i < comsize; i++) { total_size += dsize * rcounts[i]; }
+      
+      alg = ompi_coll_tuned_get_target_method_params (comm->c_coll_selected_data->com_rules[ALLGATHERV], 
+                                                      total_size, &faninout, &segsize, &ignoreme);
+      if (alg) { 
+         /* we have found a valid choice from the file based rules for 
+            this message size */
+         return ompi_coll_tuned_allgatherv_intra_do_this (sbuf, scount, sdtype,
+                                                          rbuf, rcounts, 
+                                                          rdispls, rdtype,
+                                                          comm, alg, faninout, 
+                                                          segsize);
+      }
+   } 
+
+   /* We do not have file based rules */
+   if (comm->c_coll_selected_data->user_forced[ALLGATHERV].algorithm) {
+      /* User-forced algorithm */
+      return ompi_coll_tuned_allgatherv_intra_do_forced (sbuf, scount, sdtype, 
+                                                         rbuf, rcounts, 
+                                                         rdispls, rdtype, 
+                                                         comm);
+   }
+
+   /* Use default decision */
+   return ompi_coll_tuned_allgatherv_intra_dec_fixed (sbuf, scount, sdtype, 
+                                                      rbuf, rcounts, 
+                                                      rdispls, rdtype, 
+                                                      comm);
+}
+
 int ompi_coll_tuned_gather_intra_dec_dynamic(void *sbuf, int scount, 
 					     struct ompi_datatype_t *sdtype,
 					     void* rbuf, int rcount, 
--- a/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c
+++ b/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c
@ -563,6 +563,76 @@ int ompi_coll_tuned_allgather_intra_dec_fixed(void *sbuf, int scount,
 #endif  /* defined(USE_MPICH2_DECISION) */
 }

+/*
+ *	allgatherv_intra_dec 
+ *
+ *	Function:	- seletects allgatherv algorithm to use
+ *	Accepts:	- same arguments as MPI_Allgatherv()
+ *	Returns:	- MPI_SUCCESS or error code, passed from corresponding
+ *                        internal allgatherv function.
+ */
+
+int ompi_coll_tuned_allgatherv_intra_dec_fixed(void *sbuf, int scount, 
+                                               struct ompi_datatype_t *sdtype,
+                                               void* rbuf, int *rcounts, 
+                                               int *rdispls,
+                                               struct ompi_datatype_t *rdtype, 
+                                               struct ompi_communicator_t *comm)
+{
+    int i;
+    int communicator_size, rank, pow2_size;
+    size_t dsize, total_dsize;
+    
+    communicator_size = ompi_comm_size(comm);
+    rank = ompi_comm_rank(comm);
+    
+    /* Special case for 2 processes */
+    if (communicator_size == 2) {
+        return ompi_coll_tuned_allgatherv_intra_two_procs (sbuf, scount, sdtype,
+                                                           rbuf, rcounts, 
+                                                           rdispls, rdtype, 
+                                                           comm);
+    }
+    
+    /* Determine complete data size */
+    ompi_ddt_type_size(sdtype, &dsize);
+    total_dsize = 0;
+    for (i = 0; i < communicator_size; i++) {
+        total_dsize += dsize * rcounts[i];
+    }
+    
+    OPAL_OUTPUT((ompi_coll_tuned_stream, 
+                 "ompi_coll_tuned_allgatherv_intra_dec_fixed"
+                 " rank %d com_size %d msg_length %lu",
+                 rank, communicator_size, (unsigned long)total_dsize));
+    
+    for (pow2_size  = 1; pow2_size <= communicator_size; pow2_size <<=1); 
+    pow2_size >>=1;
+
+   /* Decision based on allgather decision.   */
+   if (total_dsize < 50000) {
+       return ompi_coll_tuned_allgatherv_intra_bruck(sbuf, scount, sdtype, 
+                                                     rbuf, rcounts, 
+                                                     rdispls, rdtype, 
+                                                     comm);
+   } else {
+       if (communicator_size % 2) {
+           return ompi_coll_tuned_allgatherv_intra_ring(sbuf, scount, sdtype, 
+                                                        rbuf, rcounts, 
+                                                        rdispls, rdtype, 
+                                                        comm);
+      } else {
+         return  ompi_coll_tuned_allgatherv_intra_neighborexchange(sbuf, scount,
+                                                                   sdtype,
+                                                                   rbuf, 
+                                                                   rcounts,
+                                                                   rdispls, 
+                                                                   rdtype, 
+                                                                   comm);
+      }
+   }
+}
+
 /*
 *	gather_intra_dec 
 *
--- a/ompi/mca/coll/tuned/coll_tuned_module.c
+++ b/ompi/mca/coll/tuned/coll_tuned_module.c
@ -54,8 +54,8 @@ static const mca_coll_base_module_1_0_0_t intra_fixed = {

    ompi_coll_tuned_allgather_intra_dec_fixed,
    /*     NULL, */
-    /*   ompi_coll_tuned_allgatherv_intra_dec_fixed, */
-    NULL,
+    ompi_coll_tuned_allgatherv_intra_dec_fixed, 
+    /*     NULL, */
    ompi_coll_tuned_allreduce_intra_dec_fixed,
    /*     NULL, */
    ompi_coll_tuned_alltoall_intra_dec_fixed,
@ -98,8 +98,8 @@ static const mca_coll_base_module_1_0_0_t intra_dynamic = {

    ompi_coll_tuned_allgather_intra_dec_dynamic,
    /*     NULL, */
-    /*   ompi_coll_tuned_allgatherv_intra_dec_dynamic, */
-    NULL,
+    ompi_coll_tuned_allgatherv_intra_dec_dynamic,
+    /*     NULL, */
    ompi_coll_tuned_allreduce_intra_dec_dynamic,
    /*     NULL, */
    ompi_coll_tuned_alltoall_intra_dec_dynamic,
@ -401,6 +401,7 @@ ompi_coll_tuned_module_init(struct ompi_communicator_t *comm)
        ompi_coll_tuned_forced_getvalues         (ompi_coll_tuned_forced_params[ALLREDUCE], &(data->user_forced[ALLREDUCE]));
        ompi_coll_tuned_forced_getvalues         (ompi_coll_tuned_forced_params[ALLTOALL],  &(data->user_forced[ALLTOALL]));
        ompi_coll_tuned_forced_getvalues         (ompi_coll_tuned_forced_params[ALLGATHER],  &(data->user_forced[ALLGATHER]));
+        ompi_coll_tuned_forced_getvalues         (ompi_coll_tuned_forced_params[ALLGATHERV],  &(data->user_forced[ALLGATHERV]));
        /*         ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLTOALLV], &(data->user_forced[ALLTOALLV])); */
        ompi_coll_tuned_forced_getvalues_barrier (ompi_coll_tuned_forced_params[BARRIER],   &(data->user_forced[BARRIER]));
        ompi_coll_tuned_forced_getvalues         (ompi_coll_tuned_forced_params[BCAST],     &(data->user_forced[BCAST]));