
* first import of Bull specific modifications to HAN * Cleaning, renaming and compilation fixing Changed all future into han. * Import BULL specific modifications in coll/tuned and coll/base * Fixed compilation issues in Han * Changed han_output to directly point to coll framework output. * The verbosity MCA parameter was removed as a duplicated of coll verbosity * Add fallback in han reduce when op cannot commute and ppn are imbalanced * Added fallback wfor han bcast when nodes do not have the same number of process * Add fallback in han scatter when ppn are imbalanced + fixed missing scatter_fn pointer in the module interface Signed-off-by: Brelle Emmanuel <emmanuel.brelle@atos.net> Co-authored-by: a700850 <pierre.lemarinier@atos.net> Co-authored-by: germainf <florent.germain@atos.net>
601 строка
23 KiB
C
601 строка
23 KiB
C
/*
|
|
* Copyright (c) 2018-2020 The University of Tennessee and The University
|
|
* of Tennessee Research Foundation. All rights
|
|
* reserved.
|
|
* Copyright (c) 2020 Bull S.A.S. All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#ifndef MCA_COLL_HAN_EXPORT_H
|
|
#define MCA_COLL_HAN_EXPORT_H
|
|
|
|
#include "ompi_config.h"
|
|
|
|
#include "mpi.h"
|
|
#include "ompi/mca/mca.h"
|
|
#include "opal/util/output.h"
|
|
#include "ompi/mca/coll/base/coll_base_functions.h"
|
|
#include "coll_han_trigger.h"
|
|
#include "ompi/mca/coll/han/coll_han_dynamic.h"
|
|
|
|
BEGIN_C_DECLS
|
|
|
|
/*
|
|
* Today;
|
|
* . only 2 modules available for intranode (low) level
|
|
* . only 2 modules available for internode (up) level
|
|
*/
|
|
|
|
#define COLL_HAN_LOW_MODULES 2
|
|
#define COLL_HAN_UP_MODULES 2
|
|
|
|
typedef struct {
|
|
uint32_t umod;
|
|
uint32_t lmod;
|
|
uint32_t fs;
|
|
uint32_t ualg;
|
|
uint32_t us;
|
|
} selection;
|
|
|
|
struct mca_bcast_argu_s {
|
|
mca_coll_task_t *cur_task;
|
|
void *buff;
|
|
int seg_count;
|
|
struct ompi_datatype_t *dtype;
|
|
int root_low_rank;
|
|
int root_up_rank;
|
|
struct ompi_communicator_t *up_comm;
|
|
struct ompi_communicator_t *low_comm;
|
|
int num_segments;
|
|
int cur_seg;
|
|
int w_rank;
|
|
int last_seg_count;
|
|
bool noop;
|
|
};
|
|
typedef struct mca_bcast_argu_s mca_bcast_argu_t;
|
|
|
|
struct mca_reduce_argu_s {
|
|
mca_coll_task_t *cur_task;
|
|
void *sbuf;
|
|
void *rbuf;
|
|
int seg_count;
|
|
struct ompi_datatype_t *dtype;
|
|
struct ompi_op_t *op;
|
|
int root_low_rank;
|
|
int root_up_rank;
|
|
struct ompi_communicator_t *up_comm;
|
|
struct ompi_communicator_t *low_comm;
|
|
int num_segments;
|
|
int cur_seg;
|
|
int w_rank;
|
|
int last_seg_count;
|
|
bool noop;
|
|
};
|
|
typedef struct mca_reduce_argu_s mca_reduce_argu_t;
|
|
|
|
struct mca_allreduce_argu_s {
|
|
mca_coll_task_t *cur_task;
|
|
void *sbuf;
|
|
void *rbuf;
|
|
int seg_count;
|
|
struct ompi_datatype_t *dtype;
|
|
struct ompi_op_t *op;
|
|
int root_up_rank;
|
|
int root_low_rank;
|
|
struct ompi_communicator_t *up_comm;
|
|
struct ompi_communicator_t *low_comm;
|
|
int num_segments;
|
|
int cur_seg;
|
|
int w_rank;
|
|
int last_seg_count;
|
|
bool noop;
|
|
ompi_request_t *req;
|
|
int *completed;
|
|
};
|
|
typedef struct mca_allreduce_argu_s mca_allreduce_argu_t;
|
|
|
|
struct mca_scatter_argu_s {
|
|
mca_coll_task_t *cur_task;
|
|
void *sbuf;
|
|
void *sbuf_inter_free;
|
|
void *sbuf_reorder_free;
|
|
int scount;
|
|
struct ompi_datatype_t *sdtype;
|
|
void *rbuf;
|
|
int rcount;
|
|
struct ompi_datatype_t *rdtype;
|
|
int root;
|
|
int root_up_rank;
|
|
int root_low_rank;
|
|
struct ompi_communicator_t *up_comm;
|
|
struct ompi_communicator_t *low_comm;
|
|
int w_rank;
|
|
bool noop;
|
|
ompi_request_t *req;
|
|
};
|
|
typedef struct mca_scatter_argu_s mca_scatter_argu_t;
|
|
|
|
struct mca_gather_argu_s {
|
|
mca_coll_task_t *cur_task;
|
|
void *sbuf;
|
|
void *sbuf_inter_free;
|
|
int scount;
|
|
struct ompi_datatype_t *sdtype;
|
|
void *rbuf;
|
|
int rcount;
|
|
struct ompi_datatype_t *rdtype;
|
|
int root;
|
|
int root_up_rank;
|
|
int root_low_rank;
|
|
struct ompi_communicator_t *up_comm;
|
|
struct ompi_communicator_t *low_comm;
|
|
int w_rank;
|
|
bool noop;
|
|
ompi_request_t *req;
|
|
};
|
|
typedef struct mca_gather_argu_s mca_gather_argu_t;
|
|
|
|
struct mca_allgather_argu_s {
|
|
mca_coll_task_t *cur_task;
|
|
void *sbuf;
|
|
void *sbuf_inter_free;
|
|
int scount;
|
|
struct ompi_datatype_t *sdtype;
|
|
void *rbuf;
|
|
int rcount;
|
|
struct ompi_datatype_t *rdtype;
|
|
int root_low_rank;
|
|
struct ompi_communicator_t *up_comm;
|
|
struct ompi_communicator_t *low_comm;
|
|
int w_rank;
|
|
bool noop;
|
|
bool is_mapbycore;
|
|
int *topo;
|
|
ompi_request_t *req;
|
|
};
|
|
typedef struct mca_allgather_argu_s mca_allgather_argu_t;
|
|
|
|
/**
|
|
* Structure to hold the han coll component. First it holds the
|
|
* base coll component, and then holds a bunch of
|
|
* han-coll-component-specific stuff (e.g., current MCA param
|
|
* values).
|
|
*/
|
|
typedef struct mca_coll_han_component_t {
|
|
/** Base coll component */
|
|
mca_coll_base_component_2_0_0_t super;
|
|
|
|
/** MCA parameter: Priority of this component */
|
|
int han_priority;
|
|
/* whether output the log message */
|
|
int han_output;
|
|
/* segment size for bcast */
|
|
uint32_t han_bcast_segsize;
|
|
/* up level module for bcast */
|
|
uint32_t han_bcast_up_module;
|
|
/* low level module for bcast */
|
|
uint32_t han_bcast_low_module;
|
|
/* segment size for reduce */
|
|
uint32_t han_reduce_segsize;
|
|
/* up level module for reduce */
|
|
uint32_t han_reduce_up_module;
|
|
/* low level module for reduce */
|
|
uint32_t han_reduce_low_module;
|
|
/* segment size for allreduce */
|
|
uint32_t han_allreduce_segsize;
|
|
/* up level module for allreduce */
|
|
uint32_t han_allreduce_up_module;
|
|
/* low level module for allreduce */
|
|
uint32_t han_allreduce_low_module;
|
|
/* up level module for allgather */
|
|
uint32_t han_allgather_up_module;
|
|
/* low level module for allgather */
|
|
uint32_t han_allgather_low_module;
|
|
/* up level module for gather */
|
|
uint32_t han_gather_up_module;
|
|
/* low level module for gather */
|
|
uint32_t han_gather_low_module;
|
|
/* up level module for scatter */
|
|
uint32_t han_scatter_up_module;
|
|
/* low level module for scatter */
|
|
uint32_t han_scatter_low_module;
|
|
/* whether enable auto tune */
|
|
uint32_t han_auto_tune;
|
|
/* whether we need reproducible results
|
|
* (but disables topological optimisations)
|
|
*/
|
|
uint32_t han_reproducible;
|
|
/* create a 3D array
|
|
* num_processes (n): 2 4 8 16 32 64 (6)
|
|
* num_core (c): 2 4 8 12 (4)
|
|
* message size (m): 1 - 4194304 (23)
|
|
*/
|
|
uint32_t han_auto_tune_n;
|
|
uint32_t han_auto_tune_c;
|
|
uint32_t han_auto_tune_m;
|
|
selection *han_auto_tuned;
|
|
bool use_simple_algorithm[COLLCOUNT];
|
|
|
|
/* Dynamic configuration rules */
|
|
bool use_dynamic_file_rules;
|
|
bool dump_dynamic_rules;
|
|
char* dynamic_rules_filename;
|
|
/* Dynamic rules from file */
|
|
mca_coll_han_dynamic_rules_t dynamic_rules;
|
|
/* Dynamic rules from mca parameter */
|
|
COMPONENT_T mca_rules[COLLCOUNT][NB_TOPO_LVL];
|
|
int topo_level;
|
|
|
|
/* Define maximum dynamic errors printed by rank 0 with a 0 verbosity level */
|
|
int max_dynamic_errors;
|
|
} mca_coll_han_component_t;
|
|
|
|
typedef void (*previous_dummy_fn_t) (void);
|
|
|
|
/*
|
|
* Structure used to store what is necessary for the collective operations
|
|
* routines in case of fallback.
|
|
*/
|
|
typedef struct collective_fallback_t {
|
|
union {
|
|
mca_coll_base_module_allgather_fn_t allgather;
|
|
mca_coll_base_module_allgatherv_fn_t allgatherv;
|
|
mca_coll_base_module_allreduce_fn_t allreduce;
|
|
mca_coll_base_module_bcast_fn_t bcast;
|
|
mca_coll_base_module_gather_fn_t gather;
|
|
mca_coll_base_module_reduce_fn_t reduce;
|
|
mca_coll_base_module_scatter_fn_t scatter;
|
|
previous_dummy_fn_t dummy;
|
|
} previous_routine;
|
|
mca_coll_base_module_t *previous_module;
|
|
} collective_fallback_t;
|
|
|
|
/** Coll han module */
|
|
typedef struct mca_coll_han_module_t {
|
|
/** Base module */
|
|
mca_coll_base_module_t super;
|
|
|
|
/* Whether this module has been lazily initialized or not yet */
|
|
bool enabled;
|
|
|
|
struct ompi_communicator_t *cached_comm;
|
|
struct ompi_communicator_t **cached_low_comms;
|
|
struct ompi_communicator_t **cached_up_comms;
|
|
int *cached_vranks;
|
|
int *cached_topo;
|
|
bool is_mapbycore;
|
|
bool are_ppn_imbalanced;
|
|
|
|
/* To be able to fallback when the cases are not supported */
|
|
struct collective_fallback_t previous_routines[COLLCOUNT];
|
|
|
|
/* To be able to fallback on reproducible algorithm */
|
|
mca_coll_base_module_reduce_fn_t reproducible_reduce;
|
|
mca_coll_base_module_t *reproducible_reduce_module;
|
|
mca_coll_base_module_allreduce_fn_t reproducible_allreduce;
|
|
mca_coll_base_module_t *reproducible_allreduce_module;
|
|
|
|
/* Topological level of this communicator */
|
|
int topologic_level;
|
|
|
|
/* Collective module storage for module choice */
|
|
mca_coll_han_collective_modules_storage_t modules_storage;
|
|
bool storage_initialized;
|
|
|
|
/*
|
|
* Number of dynamic errors encountered
|
|
* The first mca_coll_han_component.max_dynamic_errors
|
|
* of rank 0 are printed with verbosity = 0
|
|
*/
|
|
int dynamic_errors;
|
|
|
|
/* Sub-communicator */
|
|
struct ompi_communicator_t *sub_comm[NB_TOPO_LVL];
|
|
} mca_coll_han_module_t;
|
|
OBJ_CLASS_DECLARATION(mca_coll_han_module_t);
|
|
|
|
/*
|
|
* Some defines to stick to the naming used in the other components in terms of
|
|
* fallback routines
|
|
*/
|
|
#define previous_allgather previous_routines[ALLGATHER].previous_routine.allgather
|
|
#define previous_allgatherv previous_routines[ALLGATHERV].previous_routine.allgatherv
|
|
#define previous_allreduce previous_routines[ALLREDUCE].previous_routine.allreduce
|
|
#define previous_bcast previous_routines[BCAST].previous_routine.bcast
|
|
#define previous_gather previous_routines[GATHER].previous_routine.gather
|
|
#define previous_reduce previous_routines[REDUCE].previous_routine.reduce
|
|
#define previous_scatter previous_routines[SCATTER].previous_routine.scatter
|
|
|
|
#define previous_allgather_module previous_routines[ALLGATHER].previous_module
|
|
#define previous_allgatherv_module previous_routines[ALLGATHERV].previous_module
|
|
#define previous_allreduce_module previous_routines[ALLREDUCE].previous_module
|
|
#define previous_bcast_module previous_routines[BCAST].previous_module
|
|
#define previous_gather_module previous_routines[GATHER].previous_module
|
|
#define previous_reduce_module previous_routines[REDUCE].previous_module
|
|
#define previous_scatter_module previous_routines[SCATTER].previous_module
|
|
|
|
/**
|
|
* Global component instance
|
|
*/
|
|
OMPI_MODULE_DECLSPEC extern mca_coll_han_component_t mca_coll_han_component;
|
|
|
|
/*
|
|
* coll module functions
|
|
*/
|
|
int mca_coll_han_init_query(bool enable_progress_threads, bool enable_mpi_threads);
|
|
|
|
mca_coll_base_module_t *mca_coll_han_comm_query(struct ompi_communicator_t *comm, int *priority);
|
|
|
|
int han_request_free(ompi_request_t ** request);
|
|
|
|
/* Subcommunicator creation */
|
|
void mca_coll_han_comm_create(struct ompi_communicator_t *comm, mca_coll_han_module_t * han_module);
|
|
void mca_coll_han_comm_create_new(struct ompi_communicator_t *comm, mca_coll_han_module_t *han_module);
|
|
/* Gather topology information */
|
|
int *mca_coll_han_topo_init(struct ompi_communicator_t *comm, mca_coll_han_module_t * han_module,
|
|
int num_topo_level);
|
|
|
|
/* Utils */
|
|
void mca_coll_han_get_ranks(int *vranks, int root, int low_size, int *root_low_rank,
|
|
int *root_up_rank);
|
|
uint32_t han_auto_tuned_get_n(uint32_t n);
|
|
uint32_t han_auto_tuned_get_c(uint32_t c);
|
|
uint32_t han_auto_tuned_get_m(uint32_t m);
|
|
|
|
const char* mca_coll_han_colltype_to_str(COLLTYPE_T coll);
|
|
const char* mca_coll_han_topo_lvl_to_str(TOPO_LVL_T topo_lvl);
|
|
|
|
/** Dynamic component choice */
|
|
/*
|
|
* Get all the collective modules initialized on this communicator
|
|
* This function must be call at the start of every selector implementation
|
|
*/
|
|
int
|
|
mca_coll_han_get_all_coll_modules(struct ompi_communicator_t *comm,
|
|
mca_coll_han_module_t *han_module);
|
|
|
|
int
|
|
mca_coll_han_allgather_intra_dynamic(ALLGATHER_BASE_ARGS,
|
|
mca_coll_base_module_t *module);
|
|
int
|
|
mca_coll_han_allgatherv_intra_dynamic(ALLGATHERV_BASE_ARGS,
|
|
mca_coll_base_module_t *module);
|
|
int
|
|
mca_coll_han_allreduce_intra_dynamic(ALLREDUCE_BASE_ARGS,
|
|
mca_coll_base_module_t *module);
|
|
int
|
|
mca_coll_han_bcast_intra_dynamic(BCAST_BASE_ARGS,
|
|
mca_coll_base_module_t *module);
|
|
int
|
|
mca_coll_han_gather_intra_dynamic(GATHER_BASE_ARGS,
|
|
mca_coll_base_module_t *module);
|
|
int
|
|
mca_coll_han_reduce_intra_dynamic(REDUCE_BASE_ARGS,
|
|
mca_coll_base_module_t *module);
|
|
int
|
|
mca_coll_han_scatter_intra_dynamic(SCATTER_BASE_ARGS,
|
|
mca_coll_base_module_t *module);
|
|
|
|
/* Bcast */
|
|
int mca_coll_han_bcast_intra_simple(void *buff,
|
|
int count,
|
|
struct ompi_datatype_t *dtype,
|
|
int root,
|
|
struct ompi_communicator_t *comm,
|
|
mca_coll_base_module_t *module);
|
|
void mac_coll_han_set_bcast_argu(mca_bcast_argu_t * argu, mca_coll_task_t * cur_task, void *buff,
|
|
int seg_count, struct ompi_datatype_t *dtype,
|
|
int root_up_rank, int root_low_rank,
|
|
struct ompi_communicator_t *up_comm,
|
|
struct ompi_communicator_t *low_comm,
|
|
int num_segments, int cur_seg, int w_rank, int last_seg_count,
|
|
bool noop);
|
|
int mca_coll_han_bcast_intra(void *buff, int count, struct ompi_datatype_t *dtype, int root,
|
|
struct ompi_communicator_t *comm, mca_coll_base_module_t * module);
|
|
int mca_coll_han_bcast_t0_task(void *task_argu);
|
|
int mca_coll_han_bcast_t1_task(void *task_argu);
|
|
|
|
/* Reduce */
|
|
int
|
|
mca_coll_han_reduce_intra_simple(const void *sbuf,
|
|
void* rbuf,
|
|
int count,
|
|
struct ompi_datatype_t *dtype,
|
|
ompi_op_t *op,
|
|
int root,
|
|
struct ompi_communicator_t *comm,
|
|
mca_coll_base_module_t *module);
|
|
int
|
|
mca_coll_han_reduce_reproducible_decision(struct ompi_communicator_t *comm,
|
|
mca_coll_base_module_t *module);
|
|
int
|
|
mca_coll_han_reduce_reproducible(const void *sbuf,
|
|
void *rbuf,
|
|
int count,
|
|
struct ompi_datatype_t *dtype,
|
|
struct ompi_op_t *op,
|
|
int root,
|
|
struct ompi_communicator_t *comm,
|
|
mca_coll_base_module_t *module);
|
|
|
|
|
|
|
|
void mac_coll_han_set_reduce_argu(mca_reduce_argu_t * argu, mca_coll_task_t * cur_task,
|
|
void *sbuf,
|
|
void *rbuf, int seg_count, struct ompi_datatype_t *dtype,
|
|
struct ompi_op_t *op,
|
|
int root_up_rank, int root_low_rank,
|
|
struct ompi_communicator_t *up_comm,
|
|
struct ompi_communicator_t *low_comm,
|
|
int num_segments, int cur_seg, int w_rank, int last_seg_count,
|
|
bool noop);
|
|
|
|
int mca_coll_han_reduce_intra(const void *sbuf,
|
|
void *rbuf,
|
|
int count,
|
|
struct ompi_datatype_t *dtype,
|
|
ompi_op_t* op,
|
|
int root,
|
|
struct ompi_communicator_t *comm,
|
|
mca_coll_base_module_t * module);
|
|
|
|
int mca_coll_han_reduce_t0_task(void *task_argu);
|
|
int mca_coll_han_reduce_t1_task(void *task_argu);
|
|
|
|
/* Allreduce */
|
|
int
|
|
mca_coll_han_allreduce_intra_simple(const void *sbuf,
|
|
void *rbuf,
|
|
int count,
|
|
struct ompi_datatype_t *dtype,
|
|
struct ompi_op_t *op,
|
|
struct ompi_communicator_t *comm,
|
|
mca_coll_base_module_t *module);
|
|
int
|
|
mca_coll_han_allreduce_reproducible_decision(struct ompi_communicator_t *comm,
|
|
mca_coll_base_module_t *module);
|
|
int
|
|
mca_coll_han_allreduce_reproducible(const void *sbuf,
|
|
void *rbuf,
|
|
int count,
|
|
struct ompi_datatype_t *dtype,
|
|
struct ompi_op_t *op,
|
|
struct ompi_communicator_t *comm,
|
|
mca_coll_base_module_t *module);
|
|
|
|
void mac_coll_han_set_allreduce_argu(mca_allreduce_argu_t * argu,
|
|
mca_coll_task_t * cur_task,
|
|
void *sbuf,
|
|
void *rbuf,
|
|
int seg_count,
|
|
struct ompi_datatype_t *dtype,
|
|
struct ompi_op_t *op,
|
|
int root_up_rank,
|
|
int root_low_rank,
|
|
struct ompi_communicator_t *up_comm,
|
|
struct ompi_communicator_t *low_comm,
|
|
int num_segments,
|
|
int cur_seg,
|
|
int w_rank,
|
|
int last_seg_count,
|
|
bool noop, ompi_request_t * req, int *completed);
|
|
int mca_coll_han_allreduce_intra(const void *sbuf,
|
|
void *rbuf,
|
|
int count,
|
|
struct ompi_datatype_t *dtype,
|
|
struct ompi_op_t *op,
|
|
struct ompi_communicator_t *comm, mca_coll_base_module_t * module);
|
|
int mca_coll_han_allreduce_t0_task(void *task_argu);
|
|
int mca_coll_han_allreduce_t1_task(void *task_argu);
|
|
int mca_coll_han_allreduce_t2_task(void *task_argu);
|
|
int mca_coll_han_allreduce_t3_task(void *task_argu);
|
|
|
|
/* Scatter */
|
|
int
|
|
mca_coll_han_scatter_intra(const void *sbuf, int scount,
|
|
struct ompi_datatype_t *sdtype,
|
|
void *rbuf, int rcount,
|
|
struct ompi_datatype_t *rdtype,
|
|
int root,
|
|
struct ompi_communicator_t *comm, mca_coll_base_module_t * module);
|
|
int mca_coll_han_scatter_us_task(void *task_argu);
|
|
int mca_coll_han_scatter_ls_task(void *task_argu);
|
|
void mac_coll_han_set_scatter_argu(mca_scatter_argu_t * argu,
|
|
mca_coll_task_t * cur_task,
|
|
void *sbuf,
|
|
void *sbuf_inter_free,
|
|
void *sbuf_reorder_free,
|
|
int scount,
|
|
struct ompi_datatype_t *sdtype,
|
|
void *rbuf,
|
|
int rcount,
|
|
struct ompi_datatype_t *rdtype,
|
|
int root,
|
|
int root_up_rank,
|
|
int root_low_rank,
|
|
struct ompi_communicator_t *up_comm,
|
|
struct ompi_communicator_t *low_comm,
|
|
int w_rank, bool noop, ompi_request_t * req);
|
|
|
|
/* Gather */
|
|
int
|
|
mca_coll_han_gather_intra(const void *sbuf, int scount,
|
|
struct ompi_datatype_t *sdtype,
|
|
void *rbuf, int rcount,
|
|
struct ompi_datatype_t *rdtype,
|
|
int root,
|
|
struct ompi_communicator_t *comm, mca_coll_base_module_t * module);
|
|
int mca_coll_han_gather_lg_task(void *task_argu);
|
|
int mca_coll_han_gather_ug_task(void *task_argu);
|
|
void mac_coll_han_set_gather_argu(mca_gather_argu_t * argu,
|
|
mca_coll_task_t * cur_task,
|
|
void *sbuf,
|
|
void *sbuf_inter_free,
|
|
int scount,
|
|
struct ompi_datatype_t *sdtype,
|
|
void *rbuf,
|
|
int rcount,
|
|
struct ompi_datatype_t *rdtype,
|
|
int root,
|
|
int root_up_rank,
|
|
int root_low_rank,
|
|
struct ompi_communicator_t *up_comm,
|
|
struct ompi_communicator_t *low_comm,
|
|
int w_rank, bool noop, ompi_request_t * req);
|
|
int
|
|
mca_coll_han_gather_intra_simple(const void *sbuf, int scount,
|
|
struct ompi_datatype_t *sdtype,
|
|
void *rbuf, int rcount,
|
|
struct ompi_datatype_t *rdtype,
|
|
int root,
|
|
struct ompi_communicator_t *comm,
|
|
mca_coll_base_module_t *module);
|
|
/* reordering after gather, for unordered ranks */
|
|
void
|
|
ompi_coll_han_reorder_gather(const void *sbuf,
|
|
void *rbuf, int rcount,
|
|
struct ompi_datatype_t *rdtype,
|
|
struct ompi_communicator_t *comm,
|
|
int * topo);
|
|
|
|
|
|
|
|
/* Allgather */
|
|
int
|
|
mca_coll_han_allgather_intra(const void *sbuf, int scount,
|
|
struct ompi_datatype_t *sdtype,
|
|
void *rbuf, int rcount,
|
|
struct ompi_datatype_t *rdtype,
|
|
struct ompi_communicator_t *comm, mca_coll_base_module_t * module);
|
|
int mca_coll_han_allgather_lg_task(void *task_argu);
|
|
int mca_coll_han_allgather_uag_task(void *task_argu);
|
|
int mca_coll_han_allgather_lb_task(void *task_argu);
|
|
void mac_coll_han_set_allgather_argu(mca_allgather_argu_t * argu,
|
|
mca_coll_task_t * cur_task,
|
|
void *sbuf,
|
|
void *sbuf_inter_free,
|
|
int scount,
|
|
struct ompi_datatype_t *sdtype,
|
|
void *rbuf,
|
|
int rcount,
|
|
struct ompi_datatype_t *rdtype,
|
|
int root_low_rank,
|
|
struct ompi_communicator_t *up_comm,
|
|
struct ompi_communicator_t *low_comm,
|
|
int w_rank,
|
|
bool noop, bool is_mapbycore, int *topo, ompi_request_t * req);
|
|
int
|
|
mca_coll_han_allgather_intra_simple(const void *sbuf, int scount,
|
|
struct ompi_datatype_t *sdtype,
|
|
void* rbuf, int rcount,
|
|
struct ompi_datatype_t *rdtype,
|
|
struct ompi_communicator_t *comm,
|
|
mca_coll_base_module_t *module);
|
|
|
|
END_C_DECLS
|
|
#endif /* MCA_COLL_HAN_EXPORT_H */
|