openmpi/ompi/mca/coll/ml/coll_ml_progress.c

/*
 * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
 * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
 * $COPYRIGHT$
 *
 * Additional copyrights may follow
 *
 * $HEADER$
 */

#include "ompi/mca/coll/ml/coll_ml.h"

/*
 * This routine is used to progress a series of communication
 * primitives.
 *
 *  Assumptions:
 *      - A message is described by a message descriptor
 *      - Each message has a setup function associated with it, which is
 *        algorithm specific.  When a fragment is being prepared, this
 *        progress is used to setup the arguments that will be passed into
 *        each routine called to complete a given function.  The idea here
 *        is that when the progress routines is called, the full communication
 *        pattern has already been described in the setup function, with
 *        progress function being generic.
 *      - Each fragment is described by a fragment descriptor
 *      - Each message descriptor has a fragment descriptor permanently
 *        associated with it.
 *      - The message will be proressed as long as the individul
 *        functions complete.  When an indivicual funciton does not
 *        complete, the current state will be saved, for future
 *        restart.
 *      - return status
 *          OMPI_COMPLETE: funciton completed
 *          OMPI_INCOMPLETE: need to continue progressing the funciton
 *          any other return value - error condition
 */

int coll_ml_progress_individual_message(mca_coll_ml_fragment_t *frag_descriptor)
{
   /* local variables */
   int fn_index, ret = OMPI_SUCCESS;
   uint32_t n_frags_complete;
   int starting_fn_index=frag_descriptor->current_fn_index;
   coll_ml_collective_description_t *local_comm_description=
       frag_descriptor->full_msg_descriptor->local_comm_description;

   /* loop over functions */
   for( fn_index=starting_fn_index ; fn_index < local_comm_description->n_functions;
        fn_index ++ ) {
       mca_bcol_base_module_t *bcol_module=
           local_comm_description->functions[fn_index].bcol_module;
       ret =(bcol_module->bcol_function_table[local_comm_description->functions[fn_index].fn_idx])
               (&(frag_descriptor->fn_args[fn_index]), &local_comm_description->functions[fn_index]);
       if( ML_OMPI_COMPLETE != ret ) {
           /* since function incomplete, need to decide what to do */
           if( ML_OMPI_INCOMPLETE == ret ) {
               /* need to return to this later */
               /* mark where to continue */
               frag_descriptor->current_fn_index=fn_index;
               /* RLG - is this really best ?  Only advantage is that
                * if we exit the loop, we can assume message is
                * complete
                */
               return OMPI_SUCCESS;
           } else {
               /* some sort of error condition */
               frag_descriptor->current_fn_index=fn_index;
               return ret;
           }
       }

   }

   /* looks like we are done */
   /* increment counter for number of completed fragments */
   n_frags_complete = OPAL_THREAD_ADD_SIZE_T(
       &(frag_descriptor->full_msg_descriptor->frags_complete), 1);

   /*
    * release resrouces
    */

   /* fragment resources */

   /* full message resources */
   if ( n_frags_complete == frag_descriptor->full_msg_descriptor->n_fragments)
   {
       /* free any fragments that still need to be freed
        * NOTE: at this level we do not handle any resrouces
        * aside from the pre-registered buffers, all these
        * are handled in the bcol level */

       /* return the buffers to the ml free list */

       /* mark as complete - so MPI can complete
        * the message descriptor will be freed by a call
        * to mpi_test/mpi_wait/... as the message descriptor
        * also holds the mpi request object */

   }

   return OMPI_SUCCESS;
}