105 строки
3.8 KiB
C
105 строки
3.8 KiB
C
|
/*
|
||
|
* Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
|
||
|
* Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
|
||
|
* $COPYRIGHT$
|
||
|
*
|
||
|
* Additional copyrights may follow
|
||
|
*
|
||
|
* $HEADER$
|
||
|
*/
|
||
|
|
||
|
#include "ompi/mca/coll/ml/coll_ml.h"
|
||
|
|
||
|
/*
|
||
|
* This routine is used to progress a series of communication
|
||
|
* primitives.
|
||
|
*
|
||
|
* Assumptions:
|
||
|
* - A message is described by a message descriptor
|
||
|
* - Each message has a setup function associated with it, which is
|
||
|
* algorithm specific. When a fragment is being prepared, this
|
||
|
* progress is used to setup the arguments that will be passed into
|
||
|
* each routine called to complete a given function. The idea here
|
||
|
* is that when the progress routines is called, the full communication
|
||
|
* pattern has already been described in the setup function, with
|
||
|
* progress function being generic.
|
||
|
* - Each fragment is described by a fragment descriptor
|
||
|
* - Each message descriptor has a fragment descriptor permanently
|
||
|
* associated with it.
|
||
|
* - The message will be proressed as long as the individul
|
||
|
* functions complete. When an indivicual funciton does not
|
||
|
* complete, the current state will be saved, for future
|
||
|
* restart.
|
||
|
* - return status
|
||
|
* OMPI_COMPLETE: funciton completed
|
||
|
* OMPI_INCOMPLETE: need to continue progressing the funciton
|
||
|
* any other return value - error condition
|
||
|
*/
|
||
|
|
||
|
int coll_ml_progress_individual_message(mca_coll_ml_fragment_t *frag_descriptor)
|
||
|
{
|
||
|
/* local variables */
|
||
|
int fn_index, ret = OMPI_SUCCESS;
|
||
|
uint32_t n_frags_complete;
|
||
|
int starting_fn_index=frag_descriptor->current_fn_index;
|
||
|
coll_ml_collective_description_t *local_comm_description=
|
||
|
frag_descriptor->full_msg_descriptor->local_comm_description;
|
||
|
|
||
|
/* loop over functions */
|
||
|
for( fn_index=starting_fn_index ; fn_index < local_comm_description->n_functions;
|
||
|
fn_index ++ ) {
|
||
|
mca_bcol_base_module_t *bcol_module=
|
||
|
local_comm_description->functions[fn_index].bcol_module;
|
||
|
ret =(bcol_module->bcol_function_table[local_comm_description->functions[fn_index].fn_idx])
|
||
|
(&(frag_descriptor->fn_args[fn_index]), &local_comm_description->functions[fn_index]);
|
||
|
if( ML_OMPI_COMPLETE != ret ) {
|
||
|
/* since function incomplete, need to decide what to do */
|
||
|
if( ML_OMPI_INCOMPLETE == ret ) {
|
||
|
/* need to return to this later */
|
||
|
/* mark where to continue */
|
||
|
frag_descriptor->current_fn_index=fn_index;
|
||
|
/* RLG - is this really best ? Only advantage is that
|
||
|
* if we exit the loop, we can assume message is
|
||
|
* complete
|
||
|
*/
|
||
|
return OMPI_SUCCESS;
|
||
|
} else {
|
||
|
/* some sort of error condition */
|
||
|
frag_descriptor->current_fn_index=fn_index;
|
||
|
return ret;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
/* looks like we are done */
|
||
|
/* increment counter for number of completed fragments */
|
||
|
n_frags_complete=OPAL_THREAD_ADD64(
|
||
|
&(frag_descriptor->full_msg_descriptor->frags_complete),1);
|
||
|
|
||
|
/*
|
||
|
* release resrouces
|
||
|
*/
|
||
|
|
||
|
/* fragment resources */
|
||
|
|
||
|
/* full message resources */
|
||
|
if ( n_frags_complete == frag_descriptor->full_msg_descriptor->n_fragments)
|
||
|
{
|
||
|
/* free any fragments that still need to be freed
|
||
|
* NOTE: at this level we do not handle any resrouces
|
||
|
* aside from the pre-registered buffers, all these
|
||
|
* are handled in the bcol level */
|
||
|
|
||
|
/* return the buffers to the ml free list */
|
||
|
|
||
|
/* mark as complete - so MPI can complete
|
||
|
* the message descriptor will be freed by a call
|
||
|
* to mpi_test/mpi_wait/... as the message descriptor
|
||
|
* also holds the mpi request object */
|
||
|
|
||
|
}
|
||
|
|
||
|
return OMPI_SUCCESS;
|
||
|
}
|