1
1
openmpi/ompi/mca/coll/ml/coll_ml_progress.c
Pavel Shamis b89f8fabc9 Adding Hierarchical Collectives project to the Open MPI trunk.
The project includes following components and frameworks: 
- ML Collective component
- NETPATTERNS and COMMPATTERNS common components
- BCOL framework
- SBGP framework

Note: By default the ML collective component is disabled. In order to enable
new collectives user should bump up the priority of ml component (coll_ml_priority)

=============================================

Primary Contributors (in alphabetical order):

Ishai Rabinovich (Mellanox)
Joshua S. Ladd (ORNL / Mellanox)
Manjunath Gorentla Venkata (ORNL)
Mike Dubman (Mellanox)
Noam Bloch (Mellanox)
Pavel (Pasha) Shamis (ORNL / Mellanox)
Richard Graham (ORNL / Mellanox)
Vasily Filipov (Mellanox)

This commit was SVN r27078.
2012-08-16 19:11:35 +00:00

105 строки
3.8 KiB
C

/*
* Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
* Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi/mca/coll/ml/coll_ml.h"
/*
* This routine is used to progress a series of communication
* primitives.
*
* Assumptions:
* - A message is described by a message descriptor
* - Each message has a setup function associated with it, which is
* algorithm specific. When a fragment is being prepared, this
* progress is used to setup the arguments that will be passed into
* each routine called to complete a given function. The idea here
* is that when the progress routines is called, the full communication
* pattern has already been described in the setup function, with
* progress function being generic.
* - Each fragment is described by a fragment descriptor
* - Each message descriptor has a fragment descriptor permanently
* associated with it.
* - The message will be proressed as long as the individul
* functions complete. When an indivicual funciton does not
* complete, the current state will be saved, for future
* restart.
* - return status
* OMPI_COMPLETE: funciton completed
* OMPI_INCOMPLETE: need to continue progressing the funciton
* any other return value - error condition
*/
int coll_ml_progress_individual_message(mca_coll_ml_fragment_t *frag_descriptor)
{
/* local variables */
int fn_index, ret = OMPI_SUCCESS;
uint32_t n_frags_complete;
int starting_fn_index=frag_descriptor->current_fn_index;
coll_ml_collective_description_t *local_comm_description=
frag_descriptor->full_msg_descriptor->local_comm_description;
/* loop over functions */
for( fn_index=starting_fn_index ; fn_index < local_comm_description->n_functions;
fn_index ++ ) {
mca_bcol_base_module_t *bcol_module=
local_comm_description->functions[fn_index].bcol_module;
ret =(bcol_module->bcol_function_table[local_comm_description->functions[fn_index].fn_idx])
(&(frag_descriptor->fn_args[fn_index]), &local_comm_description->functions[fn_index]);
if( ML_OMPI_COMPLETE != ret ) {
/* since function incomplete, need to decide what to do */
if( ML_OMPI_INCOMPLETE == ret ) {
/* need to return to this later */
/* mark where to continue */
frag_descriptor->current_fn_index=fn_index;
/* RLG - is this really best ? Only advantage is that
* if we exit the loop, we can assume message is
* complete
*/
return OMPI_SUCCESS;
} else {
/* some sort of error condition */
frag_descriptor->current_fn_index=fn_index;
return ret;
}
}
}
/* looks like we are done */
/* increment counter for number of completed fragments */
n_frags_complete=OPAL_THREAD_ADD64(
&(frag_descriptor->full_msg_descriptor->frags_complete),1);
/*
* release resrouces
*/
/* fragment resources */
/* full message resources */
if ( n_frags_complete == frag_descriptor->full_msg_descriptor->n_fragments)
{
/* free any fragments that still need to be freed
* NOTE: at this level we do not handle any resrouces
* aside from the pre-registered buffers, all these
* are handled in the bcol level */
/* return the buffers to the ml free list */
/* mark as complete - so MPI can complete
* the message descriptor will be freed by a call
* to mpi_test/mpi_wait/... as the message descriptor
* also holds the mpi request object */
}
return OMPI_SUCCESS;
}