cf377db823
Features: - Support for an override parameter file (openmpi-mca-param-override.conf). Variable values in this file can not be overridden by any file or environment value. - Support for boolean, unsigned, and unsigned long long variables. - Support for true/false values. - Support for enumerations on integer variables. - Support for MPIT scope, verbosity, and binding. - Support for command line source. - Support for setting variable source via the environment using OMPI_MCA_SOURCE_<var name>=source (either command or file:filename) - Cleaner API. - Support for variable groups (equivalent to MPIT categories). Notes: - Variables must be created with a backing store (char **, int *, or bool *) that must live at least as long as the variable. - Creating a variable with the MCA_BASE_VAR_FLAG_SETTABLE enables the use of mca_base_var_set_value() to change the value. - String values are duplicated when the variable is registered. It is up to the caller to free the original value if necessary. The new value will be freed by the mca_base_var system and must not be freed by the user. - Variables with constant scope may not be settable. - Variable groups (and all associated variables) are deregistered when the component is closed or the component repository item is freed. This prevents a segmentation fault from accessing a variable after its component is unloaded. - After some discussion we decided we should remove the automatic registration of component priority variables. Few component actually made use of this feature. - The enumerator interface was updated to be general enough to handle future uses of the interface. - The code to generate ompi_info output has been moved into the MCA variable system. See mca_base_var_dump(). opal: update core and components to mca_base_var system orte: update core and components to mca_base_var system ompi: update core and components to mca_base_var system This commit also modifies the rmaps framework. The following variables were moved from ppr and lama: rmaps_base_pernode, rmaps_base_n_pernode, rmaps_base_n_persocket. Both lama and ppr create synonyms for these variables. This commit was SVN r28236.
682 строки
22 KiB
C
682 строки
22 KiB
C
/*
|
|
* Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
|
|
* Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#ifndef MCA_BCOL_H
|
|
#define MCA_BCOL_H
|
|
|
|
#include "ompi_config.h"
|
|
#include "opal/class/opal_list.h"
|
|
#include "opal/mca/mca.h"
|
|
#include "ompi/mca/coll/coll.h"
|
|
#include "ompi/mca/mpool/mpool.h"
|
|
#include "ompi/mca/sbgp/sbgp.h"
|
|
#include "ompi/datatype/ompi_datatype.h"
|
|
#include "ompi/op/op.h"
|
|
#include "ompi/include/ompi/constants.h"
|
|
#include "ompi/patterns/net/netpatterns_knomial_tree.h"
|
|
|
|
#include "opal/util/show_help.h"
|
|
|
|
#include <limits.h>
|
|
|
|
#if defined(c_plusplus) || defined(__cplusplus)
|
|
extern "C" {
|
|
#endif
|
|
|
|
/* Forward declaration - please do not remove it */
|
|
struct ml_memory_block_desc_t;
|
|
struct mca_coll_ml_module_t;
|
|
struct ml_buffers_t;
|
|
|
|
struct mca_bcol_base_coll_fn_comm_attributes_t;
|
|
struct mca_bcol_base_coll_fn_invoke_attributes_t;
|
|
struct mca_bcol_base_coll_fn_desc_t;
|
|
|
|
#define NUM_MSG_RANGES 5
|
|
#define MSG_RANGE_INITIAL (1024)*12
|
|
#define MSG_RANGE_INC 10
|
|
#define BCOL_THRESHOLD_UNLIMITED (INT_MAX)
|
|
|
|
#define BCOL_HEAD_ALIGN 32 /* will turn into an MCA parameter after debug */
|
|
|
|
/*
|
|
* Functions supported
|
|
*/
|
|
enum bcol_coll {
|
|
/* blocking functions */
|
|
BCOL_ALLGATHER,
|
|
BCOL_ALLGATHERV,
|
|
BCOL_ALLREDUCE,
|
|
BCOL_ALLTOALL,
|
|
BCOL_ALLTOALLV,
|
|
BCOL_ALLTOALLW,
|
|
BCOL_BARRIER,
|
|
BCOL_BCAST,
|
|
BCOL_EXSCAN,
|
|
BCOL_GATHER,
|
|
BCOL_GATHERV,
|
|
BCOL_REDUCE,
|
|
BCOL_REDUCE_SCATTER,
|
|
BCOL_SCAN,
|
|
BCOL_SCATTER,
|
|
BCOL_SCATTERV,
|
|
BCOL_FANIN,
|
|
BCOL_FANOUT,
|
|
|
|
/* nonblocking functions */
|
|
BCOL_IALLGATHER,
|
|
BCOL_IALLGATHERV,
|
|
BCOL_IALLREDUCE,
|
|
BCOL_IALLTOALL,
|
|
BCOL_IALLTOALLV,
|
|
BCOL_IALLTOALLW,
|
|
BCOL_IBARRIER,
|
|
BCOL_IBCAST,
|
|
BCOL_IEXSCAN,
|
|
BCOL_IGATHER,
|
|
BCOL_IGATHERV,
|
|
BCOL_IREDUCE,
|
|
BCOL_IREDUCE_SCATTER,
|
|
BCOL_ISCAN,
|
|
BCOL_ISCATTER,
|
|
BCOL_ISCATTERV,
|
|
BCOL_IFANIN,
|
|
BCOL_IFANOUT,
|
|
|
|
BCOL_SYNC,
|
|
/* New function - needed for intermediate steps */
|
|
BCOL_REDUCE_TO_LEADER,
|
|
BCOL_NUM_OF_FUNCTIONS
|
|
};
|
|
typedef enum bcol_coll bcol_coll;
|
|
|
|
typedef enum bcol_elem_type {
|
|
BCOL_SINGLE_ELEM_TYPE,
|
|
BCOL_MULTI_ELEM_TYPE,
|
|
BCOL_NUM_OF_ELEM_TYPES
|
|
} bcol_elem_type;
|
|
|
|
typedef int (*mca_bcol_base_module_coll_support_all_types_fn_t)(bcol_coll coll_name);
|
|
typedef int (*mca_bcol_base_module_coll_support_fn_t)(int op, int dtype, bcol_elem_type elem_num);
|
|
|
|
/*
|
|
* Collective function status
|
|
*/
|
|
enum {
|
|
BCOL_FN_NOT_STARTED = (OMPI_ERR_MAX - 1),
|
|
BCOL_FN_STARTED = (OMPI_ERR_MAX - 2),
|
|
BCOL_FN_COMPLETE = (OMPI_ERR_MAX - 3)
|
|
};
|
|
|
|
/* Originally this enum was placed in ompi/op/op.h file. It should be moved back
|
|
* when we are ready to lobby for its inclusion. Since we are releasing only the
|
|
* bcast and barrier initially and this struct supports the allreduce, we are not
|
|
* going to worry about it now. Note that in the same h-file, op.h, the struct "ompi_op_t"
|
|
* also has a field that we introduced called "enum ompi_op_type op_type" that this needs to
|
|
* be resolved also.
|
|
*/
|
|
enum ompi_op_type {
|
|
OMPI_OP_NULL,
|
|
OMPI_OP_MAX,
|
|
OMPI_OP_MIN,
|
|
OMPI_OP_SUM,
|
|
OMPI_OP_PROD,
|
|
OMPI_OP_LAND,
|
|
OMPI_OP_BAND,
|
|
OMPI_OP_LOR,
|
|
OMPI_OP_BOR,
|
|
OMPI_OP_LXOR,
|
|
OMPI_OP_BXOR,
|
|
OMPI_OP_MAXLOC,
|
|
OMPI_OP_MINLOC,
|
|
OMPI_OP_REPLACE,
|
|
OMPI_OP_NUM_OF_TYPES
|
|
};
|
|
|
|
|
|
/**
|
|
* Collective component initialization
|
|
*
|
|
* Initialize the given collective component. This function should
|
|
* initialize any component-level. data. It will be called exactly
|
|
* once during MPI_INIT.
|
|
*
|
|
* @note The component framework is not lazily opened, so attempts
|
|
* should be made to minimze the amount of memory allocated during
|
|
* this function.
|
|
*
|
|
* @param[in] enable_progress_threads True if the component needs to
|
|
* support progress threads
|
|
* @param[in] enable_mpi_threads True if the component needs to
|
|
* support MPI_THREAD_MULTIPLE
|
|
*
|
|
* @retval OMPI_SUCCESS Component successfully initialized
|
|
* @retval ORTE_ERROR An unspecified error occurred
|
|
*/
|
|
typedef int (*mca_bcol_base_component_init_query_fn_t)
|
|
(bool enable_progress_threads, bool enable_mpi_threads);
|
|
|
|
/**
|
|
* Query whether a component is available for the given sub-group
|
|
*
|
|
* Query whether the component is available for the given
|
|
* sub-group. If the component is available, an array of pointers should be
|
|
* allocated and returned (with refcount at 1). The module will not
|
|
* be used for collective operations until module_enable() is called
|
|
* on the module, but may be destroyed (via OBJ_RELEASE) either before
|
|
* or after module_enable() is called. If the module needs to release
|
|
* resources obtained during query(), it should do so in the module
|
|
* destructor.
|
|
*
|
|
* A component may provide NULL to this function to indicate it does
|
|
* not wish to run or return an error during module_enable().
|
|
*
|
|
* @note The communicator is available for point-to-point
|
|
* communication, but other functionality is not available during this
|
|
* phase of initialization.
|
|
*
|
|
* @param[in] sbgp Pointer to sub-group module.
|
|
* @param[out] priority Priority setting for component on
|
|
* this communicator
|
|
* @param[out] num_modules Number of modules that where generated
|
|
* for the sub-group module.
|
|
*
|
|
* @returns An array of pointer to an initialized modules structures if the component can
|
|
* provide a modules with the requested functionality or NULL if the
|
|
* component should not be used on the given communicator.
|
|
*/
|
|
typedef struct mca_bcol_base_module_t **(*mca_bcol_base_component_comm_query_fn_t)
|
|
(mca_sbgp_base_module_t *sbgp, int *num_modules);
|
|
|
|
|
|
typedef int (*mca_bcol_barrier_init_fn_t)(struct mca_bcol_base_module_t *bcol_module,
|
|
mca_sbgp_base_module_t *sbgp_module);
|
|
|
|
|
|
|
|
/*
|
|
* Macro for use in modules that are of type btl v2.0.0
|
|
*/
|
|
#define MCA_BCOL_BASE_VERSION_2_0_0 \
|
|
MCA_BASE_VERSION_2_0_0, \
|
|
"bcol", 2, 0, 0
|
|
|
|
|
|
/* This is really an abstarction violation, but is the easiest way to get
|
|
* started. For memory management we need to know what bcol components
|
|
* have compatible memory management schemes. Such compatibility can
|
|
* be used to eliminate memory copies between levels in the collective
|
|
* operation hierarchy, by having the output buffer of one level be the
|
|
* input buffer to the next level
|
|
*/
|
|
|
|
enum {
|
|
BCOL_SHARED_MEMORY_UMA=0,
|
|
BCOL_SHARED_MEMORY_SOCKET,
|
|
BCOL_POINT_TO_POINT,
|
|
BCOL_IB_OFFLOAD,
|
|
BCOL_SIZE
|
|
};
|
|
|
|
OMPI_DECLSPEC extern int bcol_mpool_compatibility[BCOL_SIZE][BCOL_SIZE];
|
|
OMPI_DECLSPEC extern int bcol_mpool_index[BCOL_SIZE][BCOL_SIZE];
|
|
|
|
/* what are the input parameters ? too many void * pointers here */
|
|
typedef int (*bcol_register_mem_fn_t)(void *context_data, void *base,
|
|
size_t size, void **reg_desc);
|
|
/* deregistration function */
|
|
typedef int (*bcol_deregister_mem_fn_t)(void *context_data, void *reg_desc);
|
|
|
|
/* Bcol network context definition */
|
|
struct bcol_base_network_context_t {
|
|
opal_object_t super;
|
|
/* Context id - defined by upper layer, ML */
|
|
int context_id;
|
|
/* Any context information that bcol what to use */
|
|
void *context_data;
|
|
|
|
/* registration function */
|
|
bcol_register_mem_fn_t register_memory_fn;
|
|
/* deregistration function */
|
|
bcol_deregister_mem_fn_t deregister_memory_fn;
|
|
};
|
|
typedef struct bcol_base_network_context_t bcol_base_network_context_t;
|
|
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(bcol_base_network_context_t);
|
|
|
|
/*
|
|
*primitive function types
|
|
*/
|
|
|
|
/* bcast */
|
|
enum {
|
|
/* small data function */
|
|
BCOL_BCAST_SMALL_DATA,
|
|
|
|
/* small data - dynamic decision making supported */
|
|
BCOL_BCAST_SMALL_DATA_DYNAMIC,
|
|
|
|
/* number of functions */
|
|
BCOL_NUM_BCAST_FUNCTIONS
|
|
};
|
|
|
|
|
|
/**
|
|
* BCOL instance.
|
|
*/
|
|
|
|
/* no limit on fragment size - this supports using user buffers rather
|
|
* than library buffers
|
|
*/
|
|
#define FRAG_SIZE_NO_LIMIT -1
|
|
|
|
/* forward declaration */
|
|
struct coll_bcol_collective_description_t;
|
|
|
|
struct mca_bcol_base_component_2_0_0_t {
|
|
|
|
/** Base component description */
|
|
mca_base_component_t bcol_version;
|
|
|
|
/** Component initialization function */
|
|
mca_bcol_base_component_init_query_fn_t collm_init_query;
|
|
|
|
/** Query whether component is useable for given communicator */
|
|
mca_bcol_base_component_comm_query_fn_t collm_comm_query;
|
|
|
|
/** If bcol supports all possible data types */
|
|
mca_bcol_base_module_coll_support_fn_t coll_support;
|
|
|
|
/** If bcol supports all possible data types for given collective operation */
|
|
mca_bcol_base_module_coll_support_all_types_fn_t coll_support_all_types;
|
|
|
|
/** Use this flag to prevent init_query multiple calls
|
|
in case we have the same bcol more than on a single level */
|
|
bool init_done;
|
|
|
|
/** If collective calls with bcols of this type need to be ordered */
|
|
bool need_ordering;
|
|
|
|
/** MCA parameter: Priority of this component */
|
|
int priority;
|
|
|
|
/** Bcast function pointers */
|
|
struct coll_bcol_collective_description_t *
|
|
bcast_functions[BCOL_NUM_BCAST_FUNCTIONS];
|
|
|
|
/** Number of network contexts - need this for resource management */
|
|
int n_net_contexts;
|
|
|
|
/** List of network contexts */
|
|
bcol_base_network_context_t **network_contexts;
|
|
|
|
/*
|
|
* Fragmentation support
|
|
*/
|
|
|
|
/** Minimum fragement size */
|
|
int min_frag_size;
|
|
|
|
/** Maximum fragment size */
|
|
int max_frag_size;
|
|
|
|
/** Supports direct use of user-buffers */
|
|
bool can_use_user_buffers;
|
|
};
|
|
typedef struct mca_bcol_base_component_2_0_0_t mca_bcol_base_component_2_0_0_t;
|
|
typedef struct mca_bcol_base_component_2_0_0_t mca_bcol_base_component_t;
|
|
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_bcol_base_component_t);
|
|
|
|
/* forward declaration */
|
|
struct mca_coll_ml_descriptor_t;
|
|
struct ml_payload_buffer_desc_t;
|
|
struct mca_coll_ml_route_info_t;
|
|
|
|
typedef struct {
|
|
int order_num; /* Seq num of collective fragment */
|
|
int bcols_started; /* How many bcols need ordering have been started */
|
|
int n_fns_need_ordering; /* The number of functions are called for bcols need ordering */
|
|
} mca_bcol_base_order_info_t;
|
|
|
|
struct bcol_function_args_t {
|
|
/* full message sequence number */
|
|
int64_t sequence_num;
|
|
/* full message descriptor - single copy of fragment invariant
|
|
* parameters */
|
|
/* Pasha: We don need this one for new flow - remove it */
|
|
struct mca_coll_ml_descriptor_t *full_message_descriptor;
|
|
struct mca_coll_ml_route_info_t *root_route;
|
|
/* function status */
|
|
int function_status;
|
|
/* root, for rooted operations */
|
|
int root;
|
|
/* input buffer */
|
|
void *sbuf;
|
|
void *rbuf;
|
|
void *userbuf;
|
|
struct ml_payload_buffer_desc_t *src_desc;
|
|
struct ml_payload_buffer_desc_t *dst_desc;
|
|
/* ml buffer size */
|
|
uint32_t buffer_size;
|
|
/* index of buffer in ml payload cache */
|
|
int buffer_index;
|
|
int count;
|
|
struct ompi_datatype_t *dtype;
|
|
struct ompi_op_t *op;
|
|
int sbuf_offset;
|
|
int rbuf_offset;
|
|
/* for bcol opaque data */
|
|
void *bcol_opaque_data;
|
|
/* An output argument that will be used by BCOL funstion to tell ML that the result of the BCOL is in rbuf */
|
|
bool result_in_rbuf;
|
|
bool root_flag; /* True if the rank is root of operation */
|
|
int status; /* Used for non-blocking collective completion */
|
|
uint32_t frag_size; /* fragment size for large messages */
|
|
int hier_factor; /* factor used when bcast is invoked as a service function back down
|
|
* the tree in allgather for example, the pacl_len is not the actual
|
|
* len of the data needing bcasting
|
|
*/
|
|
mca_bcol_base_order_info_t order_info;
|
|
};
|
|
|
|
typedef struct bcol_function_args_t bcol_function_args_t;
|
|
|
|
|
|
/* The collective operation is defined by a series of collective operations
|
|
* invoked through a function pointer. Each function may be different,
|
|
* so will store the arguments in a struct and pass a pointer to the struct,
|
|
* and use this as a way to hide the different function signatures.
|
|
*
|
|
* @param[in] input_args Structure with function arguments
|
|
* @param[in] bcol_desc Component specific paremeters
|
|
* @param[out] status return status of the function
|
|
* MCA_BCOL_COMPLETE - function completed
|
|
* MCA_BCOL_IN_PROGRESS - function incomplete
|
|
*
|
|
* @retval OMPI_SUCCESS successful completion
|
|
* @retval OMPI_ERROR function returned error
|
|
*/
|
|
/* forward declaration */
|
|
struct mca_bcol_base_module_t;
|
|
|
|
/* collective function prototype - all functions have the same interface
|
|
* so that we can call them via a function pointer */
|
|
struct coll_ml_function_t;
|
|
typedef int (*mca_bcol_base_module_collective_fn_primitives_t)
|
|
(bcol_function_args_t *input_args, struct coll_ml_function_t *const_args);
|
|
|
|
typedef int (*mca_bcol_base_module_collective_init_fn_primitives_t)
|
|
(struct mca_bcol_base_module_t *bcol_module);
|
|
|
|
/**
|
|
* function to query for collctive function attributes
|
|
*
|
|
* @param attribute (IN) the attribute of interest
|
|
* @param algorithm_parameters (OUT) the value of attribute for this
|
|
* function. If this attribute is not supported,
|
|
* OMPI_ERR_NOT_FOUND is returned.
|
|
*/
|
|
typedef int (*mca_bcol_get_collective_attributes)(int attribute,
|
|
void *algorithm_parameters);
|
|
|
|
/* data structure for tracking the relevant data needed for ml level
|
|
* algorithm construction (e.g., function selection), initialization, and
|
|
* usage.
|
|
*/
|
|
struct coll_bcol_collective_description_t {
|
|
/* collective initiation function - first functin called */
|
|
mca_bcol_base_module_collective_fn_primitives_t coll_fn;
|
|
|
|
/* collective progress function - first functin called */
|
|
mca_bcol_base_module_collective_fn_primitives_t progress_fn;
|
|
|
|
/* collective progress function - first functin called */
|
|
mca_bcol_get_collective_attributes get_attributes;
|
|
|
|
/* attributes supported - bit map */
|
|
uint64_t attribute;
|
|
|
|
};
|
|
typedef struct coll_bcol_collective_description_t
|
|
coll_bcol_collective_description_t;
|
|
|
|
/* collective operation attributes */
|
|
enum {
|
|
/* supports dynamic decisions - e.g., do not need to have the collective
|
|
* operation fully defined before it can be started
|
|
*/
|
|
BCOL_ATTRIBUTE_DYNAMIC,
|
|
|
|
/* number of attributes */
|
|
BCOL_NUM_ATTRIBUTES
|
|
};
|
|
|
|
/* For rooted collectives,
|
|
* does the algorithm knows its data source ?
|
|
*/
|
|
enum {
|
|
DATA_SRC_KNOWN=0,
|
|
DATA_SRC_UNKNOWN,
|
|
DATA_SRC_TYPES
|
|
};
|
|
|
|
enum {
|
|
BLOCKING,
|
|
NON_BLOCKING
|
|
};
|
|
/* gvm For selection logic */
|
|
struct mca_bcol_base_coll_fn_comm_attributes_t {
|
|
int bcoll_type;
|
|
int comm_size_min;
|
|
int comm_size_max;
|
|
int data_src;
|
|
int waiting_semantics;
|
|
};
|
|
|
|
typedef struct mca_bcol_base_coll_fn_comm_attributes_t
|
|
mca_bcol_base_coll_fn_comm_attributes_t;
|
|
|
|
struct mca_bcol_base_coll_fn_invoke_attributes_t {
|
|
int bcol_msg_min;
|
|
int bcol_msg_max;
|
|
uint64_t datatype_bitmap; /* Max is OMPI_DATATYPE_MAX_PREDEFINED defined to be 45 */
|
|
uint32_t op_types_bitmap; /* bit map of optypes supported */
|
|
};
|
|
|
|
typedef struct mca_bcol_base_coll_fn_invoke_attributes_t
|
|
mca_bcol_base_coll_fn_invoke_attributes_t;
|
|
|
|
struct mca_bcol_base_coll_fn_desc_t {
|
|
opal_list_item_t super;
|
|
struct mca_bcol_base_coll_fn_comm_attributes_t *comm_attr;
|
|
struct mca_bcol_base_coll_fn_invoke_attributes_t *inv_attr;
|
|
mca_bcol_base_module_collective_fn_primitives_t coll_fn;
|
|
mca_bcol_base_module_collective_fn_primitives_t progress_fn;
|
|
};
|
|
|
|
typedef struct mca_bcol_base_coll_fn_desc_t mca_bcol_base_coll_fn_desc_t;
|
|
OBJ_CLASS_DECLARATION(mca_bcol_base_coll_fn_desc_t);
|
|
|
|
/* end selection logic */
|
|
|
|
typedef int (*mca_bcol_base_module_collective_init_fn_t)
|
|
(struct mca_bcol_base_module_t *bcol_module,
|
|
mca_sbgp_base_module_t *sbgp_module);
|
|
|
|
/* per communicator memory initialization function */
|
|
typedef int (*mca_bcol_module_mem_init)(struct ml_buffers_t *registered_buffers,
|
|
mca_bcol_base_component_t *module);
|
|
|
|
/* Initialize memory block - ml_memory_block initialization interface function
|
|
*
|
|
* Invoked at the ml level, used to pass bcol specific registration information
|
|
* for the "ml_memory_block"
|
|
*
|
|
* @param[in] ml_memory_block Pointer to the ml_memory_block. This struct
|
|
* contains bcol specific registration information and a call back function
|
|
* used for resource recycling.
|
|
*
|
|
* @param[in] reg_data bcol specific registration data.
|
|
*
|
|
* @returns On Success: OMPI_SUCCESS
|
|
* On Failure: OMPI_ERROR
|
|
*
|
|
*/
|
|
/*typedef int (*mca_bcol_base_init_memory_fn_t)
|
|
(struct ml_memory_block_desc_t *ml_block, void *reg_data);*/
|
|
|
|
typedef int (*mca_bcol_base_init_memory_fn_t)
|
|
(struct mca_coll_ml_module_t *ml_module,
|
|
struct mca_bcol_base_module_t *bcol_module,
|
|
void *reg_data);
|
|
|
|
typedef int (*mca_common_allgather_init_fn_t)
|
|
(struct mca_bcol_base_module_t *bcol_module);
|
|
|
|
typedef void (*mca_bcol_base_set_thresholds_fn_t)
|
|
(struct mca_bcol_base_module_t *bcol_module);
|
|
|
|
enum {
|
|
MCA_BCOL_BASE_ZERO_COPY = 1,
|
|
MCA_BCOL_BASE_NO_ML_BUFFER_FOR_LARGE_MSG = 1 << 1,
|
|
MCA_BCOL_BASE_NO_ML_BUFFER_FOR_BARRIER = 1 << 2
|
|
};
|
|
|
|
/* base module */
|
|
struct mca_bcol_base_module_t {
|
|
/* base coll component */
|
|
opal_object_t super;
|
|
|
|
/* bcol component (Pasha: Do we really need cache the component?)*/
|
|
mca_bcol_base_component_t *bcol_component;
|
|
|
|
/* network context that is used by this bcol
|
|
only one context per bcol is allowed */
|
|
bcol_base_network_context_t *network_context;
|
|
|
|
/* We are going to use the context index a lot,
|
|
int order to decrease number of dereferences
|
|
bcol->network_context->index
|
|
we are caching the value on bcol */
|
|
int context_index;
|
|
|
|
/* Set of flags that describe features supported by bcol */
|
|
uint64_t supported_mode;
|
|
|
|
/* per communicator memory initialization function */
|
|
mca_bcol_module_mem_init init_module;
|
|
|
|
/* sub-grouping module partner */
|
|
mca_sbgp_base_module_t *sbgp_partner_module;
|
|
|
|
/* size of subgroup - cache this, so can have access when
|
|
* sbgp_partner_module no longer existes */
|
|
int size_of_subgroup;
|
|
|
|
/* sequence number offset - want to make sure that we start
|
|
* id'ing collectives with id 0, so we can have simple
|
|
* resource management.
|
|
*/
|
|
int64_t squence_number_offset;
|
|
|
|
|
|
/* number of times to poll for operation completion before
|
|
* breaking out of a non-blocking collective operation
|
|
*/
|
|
int n_poll_loops;
|
|
|
|
/* size of header that will go in data buff, should not include
|
|
* any info regarding alignment, let the ml level handle this
|
|
*/
|
|
uint32_t header_size;
|
|
|
|
|
|
/* Each bcol is assigned a unique value
|
|
* see if we can get away with 16-bit id
|
|
*/
|
|
int16_t bcol_id;
|
|
|
|
/*FIXME:
|
|
* Since mca_bcol_base_module_t is the only parameter which will be passed
|
|
* into the bcol_basesmuma_bcast_init(), add the flag to indicate whether
|
|
* the hdl-based algorithms will get enabled.
|
|
*/
|
|
bool use_hdl;
|
|
/*
|
|
* Collective function pointers
|
|
*/
|
|
/* changing function signature - will replace bcol_functions */
|
|
mca_bcol_base_module_collective_fn_primitives_t bcol_function_table[BCOL_NUM_OF_FUNCTIONS];
|
|
|
|
/* Tables hold pointers to functions */
|
|
mca_bcol_base_module_collective_init_fn_primitives_t bcol_function_init_table[BCOL_NUM_OF_FUNCTIONS];
|
|
opal_list_t bcol_fns_table[BCOL_NUM_OF_FUNCTIONS];
|
|
struct mca_bcol_base_coll_fn_desc_t*
|
|
filtered_fns_table[DATA_SRC_TYPES][2][BCOL_NUM_OF_FUNCTIONS][NUM_MSG_RANGES+1][OMPI_OP_NUM_OF_TYPES][OMPI_DATATYPE_MAX_PREDEFINED];
|
|
|
|
/*
|
|
* Bcol interface function to pass bcol specific
|
|
* info and memory recycling call back
|
|
*/
|
|
mca_bcol_base_init_memory_fn_t bcol_memory_init;
|
|
|
|
/*
|
|
* netpatterns interface function, would like to invoke this on
|
|
* on the ml level
|
|
*/
|
|
mca_common_allgather_init_fn_t k_nomial_tree;
|
|
/* Each bcol caches a list which describes how many ranks
|
|
* are "below" each rank in this bcol
|
|
*/
|
|
int *list_n_connected;
|
|
|
|
/* offsets for scatter/gather */
|
|
int hier_scather_offset;
|
|
|
|
/* Small message threshold for each collective */
|
|
int small_message_thresholds[BCOL_NUM_OF_FUNCTIONS];
|
|
|
|
/* Set small_message_thresholds array */
|
|
mca_bcol_base_set_thresholds_fn_t set_small_msg_thresholds;
|
|
|
|
/* Pointer to the order counter on the upper layer,
|
|
used if the bcol needs to be ordered */
|
|
int *next_inorder;
|
|
};
|
|
typedef struct mca_bcol_base_module_t mca_bcol_base_module_t;
|
|
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_bcol_base_module_t);
|
|
|
|
struct mca_bcol_base_descriptor_t {
|
|
ompi_free_list_item_t super;
|
|
/* Vasily: will be described in the future */
|
|
};
|
|
typedef struct mca_bcol_base_descriptor_t mca_bcol_base_descriptor_t;
|
|
|
|
#define MCA_BCOL_CHECK_ORDER(module, bcol_function_args) \
|
|
do { \
|
|
if (*((module)->next_inorder) != \
|
|
(bcol_function_args)->order_info.order_num) { \
|
|
return BCOL_FN_NOT_STARTED; \
|
|
} \
|
|
} while (0);
|
|
|
|
#define MCA_BCOL_UPDATE_ORDER_COUNTER(module, order_info) \
|
|
do { \
|
|
(order_info)->bcols_started++; \
|
|
if ((order_info)->n_fns_need_ordering == \
|
|
(order_info)->bcols_started) { \
|
|
++(*((module)->next_inorder)); \
|
|
} \
|
|
} while (0);
|
|
|
|
#if defined(c_plusplus) || defined(__cplusplus)
|
|
}
|
|
#endif
|
|
#endif /* MCA_BCOL_H */
|