146 строки
4.7 KiB
C
146 строки
4.7 KiB
C
|
/*
|
||
|
* Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
|
||
|
* Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
|
||
|
* $COPYRIGHT$
|
||
|
*
|
||
|
* Additional copyrights may follow
|
||
|
*
|
||
|
* $HEADER$
|
||
|
*/
|
||
|
/** @file */
|
||
|
|
||
|
#include "ompi_config.h"
|
||
|
|
||
|
#include "ompi/constants.h"
|
||
|
#include "opal/threads/mutex.h"
|
||
|
#include "ompi/communicator/communicator.h"
|
||
|
#include "ompi/mca/coll/coll.h"
|
||
|
#include "ompi/mca/bcol/bcol.h"
|
||
|
#include "opal/sys/atomic.h"
|
||
|
#include "ompi/mca/coll/ml/coll_ml.h"
|
||
|
#include "ompi/mca/coll/ml/coll_ml_hier_algorithms.h"
|
||
|
|
||
|
coll_ml_collective_description_t *collective_op;
|
||
|
bcol_fn_args_t fn_arguments;
|
||
|
mca_coll_ml_descriptor_t *msg_desc;
|
||
|
|
||
|
static int coll_ml_setup_ibarrier_instance_recursive_doubling(
|
||
|
mca_coll_ml_descriptor_t *msg_desc,
|
||
|
coll_ml_collective_description_t *collective_op)
|
||
|
{
|
||
|
/* local variables */
|
||
|
ret=OMPI_SUCCESS;
|
||
|
int i_fn,cnt;
|
||
|
|
||
|
/* initialize function arguments */
|
||
|
|
||
|
/* mark all routines as not yet started - need this, so that
|
||
|
* when we try to progress the barrier, we know where to pickup
|
||
|
* when a function is called - MOVE this into the setup function.
|
||
|
*/
|
||
|
for(i_fn=0 ; i_fn < collective_op->n_functions ; i_fn++ ) {
|
||
|
msg_desc->fragment.fn_args[i_fn].function_status=FUNCTION_NOT_STARTED;
|
||
|
}
|
||
|
|
||
|
/* setup the fanin root */
|
||
|
for(i_fn=0 ; i_fn < collective_op->
|
||
|
alg_params.coll_fn.ibarrier_recursive_doubling.n_fanin_steps ;
|
||
|
i_fn++ ) {
|
||
|
mca_bcol_base_module_t *bcol_mod=
|
||
|
msg_desc->local_comm_description->functions[i_fn].bcol_module;
|
||
|
/* the lowest rank in the group will be the root */
|
||
|
msg_desc->fragment.fn_args[i_fn].root=0;
|
||
|
}
|
||
|
|
||
|
/* setup the fanout root */
|
||
|
cnt=alg_params.coll_fn.ibarrier_recursive_doubling.n_fanin_steps+
|
||
|
alg_params.coll_fn.ibarrier_recursive_doubling.n_recursive_doubling_steps;
|
||
|
for(i_fn=cnt ; i_fn < cnt + collective_op->
|
||
|
alg_params.coll_fn.ibarrier_recursive_doubling.n_fanin_steps ;
|
||
|
i_fn++ ) {
|
||
|
mca_bcol_base_module_t *bcol_mod=
|
||
|
msg_desc->local_comm_description->functions[i_fn].bcol_module;
|
||
|
/* the lowest rank in the group will be the root */
|
||
|
msg_desc->fragment.fn_args[i_fn].root=0;
|
||
|
}
|
||
|
|
||
|
/* successful completion */
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Hierarchical blocking barrier
|
||
|
*/
|
||
|
int mca_coll_ml_nb_barrier_intra( struct ompi_communicator_t *comm,
|
||
|
ompi_request_t ** request, mca_coll_base_module_t *module)
|
||
|
{
|
||
|
/* local variables */
|
||
|
int ret=OMPI_SUCCESS;
|
||
|
mca_coll_ml_module_t *ml_module;
|
||
|
uint64_t sequence_number;
|
||
|
int i_fn;
|
||
|
coll_ml_collective_description_t *collective_op;
|
||
|
bcol_fn_args_t fn_arguments;
|
||
|
mca_coll_ml_descriptor_t *msg_desc;
|
||
|
|
||
|
ml_module=(mca_coll_ml_module_t *) module;
|
||
|
/* debug */
|
||
|
fprintf(stderr," mca_coll_ml_nb_barrier_intra called \n");
|
||
|
fflush(stderr);
|
||
|
/* end debug */
|
||
|
|
||
|
/* grab full message descriptor - RLG: this is really not optimal,
|
||
|
* as we may be doing too much initialization if the collective
|
||
|
* routine completes on the first call to progress which is called
|
||
|
* within this routine. Need to see if we can be more efficient
|
||
|
* here. The current issue is that the only way that I can think
|
||
|
* to do this now is with two separate code paths, which I want to
|
||
|
* avoid at this stage.
|
||
|
*/
|
||
|
OMPI_FREE_LIST_GET(&(ml_module->message_descriptors),
|
||
|
msg_desc,ret);
|
||
|
if( OMPI_SUCCESS != ret) {
|
||
|
goto Error;
|
||
|
}
|
||
|
|
||
|
/* get message sequence number */
|
||
|
sequence_number=OPAL_THREAD_ADD64(
|
||
|
&(ml_module->no_data_collective_sequence_num),1);
|
||
|
fn_arguments.sequence_num=sequence_number;
|
||
|
|
||
|
|
||
|
/* get pointer to schedule - only one algorithm at this stage */
|
||
|
collective_op=&(ml_module->hierarchical_algorithms[BCOL_NB_BARRIER][0]);
|
||
|
|
||
|
/* call setup function - RLG: right now this is totally extra,
|
||
|
* but if we are going to have more than one algorithm,
|
||
|
* this is a better way to do this. */
|
||
|
coll_ml_setup_ibarrier_instance_recursive_doubling(
|
||
|
msg_desc,collective_op);
|
||
|
|
||
|
/* call the progress function to actually start the barrier */
|
||
|
|
||
|
/* recycle resources - RLG: need to think about this one */
|
||
|
|
||
|
#if 0
|
||
|
/* run barrier */
|
||
|
/* need to add bcol context for the call */
|
||
|
for( i_fn =0 ; i_fn < collective_op->n_functions ; i_fn++ ) {
|
||
|
mca_bcol_base_module_t *bcol_module=
|
||
|
collective_op->functions[i_fn].bcol_module;
|
||
|
/* for barrier, all we need is the group information that is
|
||
|
* captured in the bcol module
|
||
|
*/
|
||
|
ret=collective_op->functions[i_fn].fn(&fn_arguments,
|
||
|
NULL,NULL,bcol_module);
|
||
|
if( OMPI_SUCCESS != ret) {
|
||
|
} goto Error;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
return OMPI_SUCCESS;
|
||
|
|
||
|
Error:
|
||
|
return ret;
|
||
|
}
|