1
1

MCA/base: Add new MCA variable system

Features:
 - Support for an override parameter file (openmpi-mca-param-override.conf).
   Variable values in this file can not be overridden by any file or environment
   value.
 - Support for boolean, unsigned, and unsigned long long variables.
 - Support for true/false values.
 - Support for enumerations on integer variables.
 - Support for MPIT scope, verbosity, and binding.
 - Support for command line source.
 - Support for setting variable source via the environment using
   OMPI_MCA_SOURCE_<var name>=source (either command or file:filename)
 - Cleaner API.
 - Support for variable groups (equivalent to MPIT categories).

Notes:
 - Variables must be created with a backing store (char **, int *, or bool *)
   that must live at least as long as the variable.
 - Creating a variable with the MCA_BASE_VAR_FLAG_SETTABLE enables the use of
   mca_base_var_set_value() to change the value.
 - String values are duplicated when the variable is registered. It is up to
   the caller to free the original value if necessary. The new value will be
   freed by the mca_base_var system and must not be freed by the user.
 - Variables with constant scope may not be settable.
 - Variable groups (and all associated variables) are deregistered when the
   component is closed or the component repository item is freed. This
   prevents a segmentation fault from accessing a variable after its component
   is unloaded.
 - After some discussion we decided we should remove the automatic registration
   of component priority variables. Few component actually made use of this
   feature.
 - The enumerator interface was updated to be general enough to handle
   future uses of the interface.
 - The code to generate ompi_info output has been moved into the MCA variable
   system. See mca_base_var_dump().

opal: update core and components to mca_base_var system
orte: update core and components to mca_base_var system
ompi: update core and components to mca_base_var system

This commit also modifies the rmaps framework. The following variables were
moved from ppr and lama: rmaps_base_pernode, rmaps_base_n_pernode,
rmaps_base_n_persocket. Both lama and ppr create synonyms for these variables.

This commit was SVN r28236.
This commit is contained in:
Nathan Hjelm 2013-03-27 21:09:41 +00:00
parent 9d1041b058
commit cf377db823
443 changed files with 13669 additions and 9918 deletions

View File

@ -61,7 +61,7 @@
#
# Basic behavior to smooth startup
mca_component_show_load_errors = 0
mca_base_component_show_load_errors = 0
orte_abort_timeout = 10
opal_set_max_sys_limits = 1
orte_report_launch_progress = 1

View File

@ -128,6 +128,8 @@ OMPI_DECLSPEC volatile int MPIR_being_debugged = 0;
OMPI_DECLSPEC volatile int MPIR_debug_state = 0;
OMPI_DECLSPEC char *MPIR_debug_abort_string = "";
static char *ompi_debugger_dll_path = NULL;
/* Check for a file in few direct ways for portability */
static void check(char *dir, char *file, char **locations)
{
@ -164,18 +166,19 @@ extern void
ompi_debugger_setup_dlls(void)
{
int i;
char *a, *b, **dirs, **tmp1 = NULL, **tmp2 = NULL;
char **dirs, **tmp1 = NULL, **tmp2 = NULL;
a = strdup(opal_install_dirs.pkglibdir);
mca_base_param_reg_string_name("ompi",
"debugger_dll_path",
"List of directories where MPI_INIT should search for debugger plugins",
false, false, a, &b);
free(a);
ompi_debugger_dll_path = opal_install_dirs.pkglibdir;
(void) mca_base_var_register("ompi", "ompi", "debugger", "dll_path",
"List of directories where MPI_INIT should search for debugger plugins",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_debugger_dll_path);
/* Search the directory for MPI debugger DLLs */
if (NULL != b) {
dirs = opal_argv_split(b, ':');
if (NULL != ompi_debugger_dll_path) {
dirs = opal_argv_split(ompi_debugger_dll_path, ':');
for (i = 0; dirs[i] != NULL; ++i) {
check(dirs[i], OMPI_MPIHANDLES_DLL_PREFIX, tmp1);
check(dirs[i], OMPI_MSGQ_DLL_PREFIX, tmp2);

View File

@ -20,7 +20,6 @@
#include "ompi_config.h"
#include "ompi/mca/allocator/allocator.h"
#include "ompi/constants.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/mca/allocator/bucket/allocator_bucket_alloc.h"
#include "ompi/mca/mpool/mpool.h"
@ -80,9 +79,16 @@ struct mca_allocator_base_module_t* mca_allocator_bucket_module_init(
return((mca_allocator_base_module_t *) allocator);
}
static int mca_allocator_bucket_module_register(void) {
mca_allocator_num_buckets = 30;
(void) mca_base_component_var_register(&mca_allocator_bucket_component.allocator_version,
"num_buckets", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_LOCAL, &mca_allocator_num_buckets);
return(OMPI_SUCCESS);
}
int mca_allocator_bucket_module_open(void) {
(void) mca_base_param_reg_int_name ("allocator", "bucket_num_buckets", NULL, false, false,
30, &mca_allocator_num_buckets);
return(OMPI_SUCCESS);
}
@ -116,7 +122,9 @@ mca_allocator_base_component_t mca_allocator_bucket_component = {
OMPI_MINOR_VERSION,
OMPI_RELEASE_VERSION,
mca_allocator_bucket_module_open, /* module open */
mca_allocator_bucket_module_close /* module close */
mca_allocator_bucket_module_close, /* module close */
NULL,
mca_allocator_bucket_module_register
},
{
/* The component is checkpoint ready */

View File

@ -40,6 +40,8 @@
int mca_bcol_base_output = 0;
opal_list_t mca_bcol_base_components_opened;
static int mca_bcol_base_verbose = 0;
OMPI_DECLSPEC opal_list_t mca_bcol_base_components_in_use;
OMPI_DECLSPEC char *ompi_bcol_bcols_string;
OMPI_DECLSPEC int bcol_mpool_compatibility[BCOL_SIZE][BCOL_SIZE];
@ -200,27 +202,46 @@ static int mca_bcol_base_set_components_to_use(opal_list_t *bcol_components_avai
return OMPI_SUCCESS;
}
static int mca_bcol_base_register(int flags)
{
/* Debugging/Verbose output */
(void) mca_base_var_register("ompi", "bcol", "base", "verbose",
"Verbosity level of BCOL framework",
MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_bcol_base_verbose);
/* figure out which bcol and sbgp components will actually be used */
/* get list of sub-grouping functions to use */
ompi_bcol_bcols_string = "basesmuma,basesmuma,iboffload,ptpcoll,ugni";
(void) mca_base_var_register("ompi", "bcol", "base", "string",
"Default set of basic collective components to use",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_bcol_bcols_string);
return OMPI_SUCCESS;
}
/**
* Function for finding and opening either all MCA components, or the one
* that was specifically requested via a MCA parameter.
*/
int mca_bcol_base_open(void)
{
int value, ret;
int ret;
/*_bcol_base_components_available
* Register some MCA parameters
*/
/* Debugging/Verbose output */
mca_base_param_reg_int_name("bcol",
"base_verbose",
"Verbosity level of BCOL framework",
false, false,
0, &value);
(void) mca_bcol_base_register(0);
/* get fraemwork id */
/* get framework id */
mca_bcol_base_output = opal_output_open(NULL);
opal_output_set_verbosity(mca_bcol_base_output, value);
opal_output_set_verbosity(mca_bcol_base_output, mca_bcol_base_verbose);
/* Open up all available components */
if (OMPI_SUCCESS !=
@ -230,12 +251,6 @@ int mca_bcol_base_open(void)
return OMPI_ERROR;
}
/* figure out which bcol and sbgp components will actually be used */
/* get list of sub-grouping functions to use */
mca_base_param_reg_string_name("bcol","base_string",
"Default set of basic collective components to use ",
false, false, "basesmuma,basesmuma,iboffload,ptpcoll,ugni", &ompi_bcol_bcols_string);
ret = mca_bcol_base_set_components_to_use(&mca_bcol_base_components_opened,
&mca_bcol_base_components_in_use);

View File

@ -105,14 +105,8 @@ struct mca_bcol_basesmuma_component_t {
/* management data for collectives with no user data */
/** MCA parameter: control region size (bytes), per proc */
size_t basesmuma_ctl_size_per_proc;
/** MCA parameter: control region alignment */
size_t basesmuma_ctl_alignment;
/** MCA parameter: number of memory banks */
size_t basesmuma_num_mem_banks;
int basesmuma_num_mem_banks;
/** MCA parameter: number of regions per memory bank */
int basesmuma_num_regions_per_bank;

View File

@ -33,6 +33,7 @@ const char *mca_bcol_basesmuma_component_version_string =
* Local functions
*/
static int basesmuma_register(void);
static int basesmuma_open(void);
static int basesmuma_close(void);
static int mca_bcol_basesmuma_deregister_ctl_sm(
@ -40,13 +41,23 @@ static int mca_bcol_basesmuma_deregister_ctl_sm(
static inline int mca_bcol_basesmuma_param_register_int(
const char* param_name, int default_value)
const char* param_name, int default_value, int *storage)
{
int param_value;
*storage = default_value;
return mca_base_component_var_register(&mca_bcol_basesmuma_component.super.bcol_version, param_name,
NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
}
(void) mca_base_param_reg_int (&mca_bcol_basesmuma_component.super.bcol_version, param_name,
NULL, false, false, default_value, &param_value);
return param_value;
static inline int mca_bcol_basesmuma_param_register_bool(
const char* param_name, bool default_value, bool *storage)
{
*storage = default_value;
return mca_base_component_var_register(&mca_bcol_basesmuma_component.super.bcol_version, param_name,
NULL, MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
}
/*
@ -76,6 +87,8 @@ mca_bcol_basesmuma_component_t mca_bcol_basesmuma_component = {
basesmuma_open,
basesmuma_close,
NULL,
basesmuma_register
},
/* Initialization / querying functions */
@ -90,6 +103,76 @@ mca_bcol_basesmuma_component_t mca_bcol_basesmuma_component = {
},
};
/*
* Register the component
*/
static int basesmuma_register(void)
{
mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
/* set component priority */
mca_bcol_basesmuma_param_register_int("priority", 90, &cs->super.priority);
/* Number of memory banks */
mca_bcol_basesmuma_param_register_int("basesmuma_num_ctl_banks", 2,
&cs->basesmuma_num_mem_banks);
/* Number of regions per memory bank */
mca_bcol_basesmuma_param_register_int("basesmuma_num_buffs_per_bank", 16,
&cs->basesmuma_num_regions_per_bank);
/* number of polling loops to allow pending resources to
* complete their work
*/
mca_bcol_basesmuma_param_register_int("n_poll_loops", 4, &cs->n_poll_loops);
/* Number of groups supported */
mca_bcol_basesmuma_param_register_int("n_groups_supported", 100,
&cs->n_groups_supported);
/* order of fanin tree */
mca_bcol_basesmuma_param_register_int("radix_fanin", 2, &cs->radix_fanin);
/* order of fanout tree */
mca_bcol_basesmuma_param_register_int("radix_fanout", 2, &cs->radix_fanout);
/* order of read tree */
mca_bcol_basesmuma_param_register_int("radix_read_tree", 3,
&cs->radix_read_tree);
/* order of reduction fanout tree */
mca_bcol_basesmuma_param_register_int("order_reduction_tree", 2,
&cs->order_reduction_tree);
/* k-nomial radix */
mca_bcol_basesmuma_param_register_int("k_nomial_radix", 3, &cs->k_nomial_radix);
/* number of polling loops for non-blocking algorithms */
mca_bcol_basesmuma_param_register_int("num_to_probe", 10, &cs->num_to_probe);
/* radix of the k-ary scatter tree */
mca_bcol_basesmuma_param_register_int("scatter_kary_radix", 4,
&cs->scatter_kary_radix);
/* register parmeters controlling message fragementation */
mca_bcol_basesmuma_param_register_int("min_frag_size", getpagesize(),
&cs->super.min_frag_size);
mca_bcol_basesmuma_param_register_int("max_frag_size", FRAG_SIZE_NO_LIMIT,
&cs->super.max_frag_size);
/* by default use pre-registered shared memory segments */
/* RLG NOTE: When we have a systematic way to handle single memory
* copy semantics, we need to update this logic
*/
mca_bcol_basesmuma_param_register_bool("can_use_user_buffers", false,
&cs->super.can_use_user_buffers);
mca_bcol_basesmuma_param_register_int("verbose", 0, &cs->verbose);
return OMPI_SUCCESS;
}
/*
* Open the component
*/
@ -102,37 +185,6 @@ static int basesmuma_open(void)
opal_mutex_t *mutex_ptr;
int dummy;
/* set component priority */
cs->super.priority=
mca_bcol_basesmuma_param_register_int("priority",90);
/* set control region size (bytes), per proc */
cs->basesmuma_ctl_size_per_proc=
mca_bcol_basesmuma_param_register_int("basesmuma_ctl_size_per_proc",
CACHE_LINE_SIZE);
/* set control region alignment (bytes) */
cs->basesmuma_ctl_alignment=
mca_bcol_basesmuma_param_register_int("basesmuma_ctl_alignment",
getpagesize());
/* Number of memory banks */
cs->basesmuma_num_mem_banks=
mca_bcol_basesmuma_param_register_int("basesmuma_num_ctl_banks",
2);
/* Number of regions per memory bank */
cs->basesmuma_num_regions_per_bank=
mca_bcol_basesmuma_param_register_int("basesmuma_num_buffs_per_bank",
16);
/* number of polling loops to allow pending resources to
* complete their work
*/
cs->n_poll_loops=
mca_bcol_basesmuma_param_register_int("n_poll_loops",4);
/*
* Make sure that the number of banks is a power of 2
*/
@ -153,59 +205,10 @@ static int basesmuma_open(void)
goto ERROR;
}
/* Number of groups supported */
cs->n_groups_supported=
mca_bcol_basesmuma_param_register_int("n_groups_supported",100);
/* order of fanin tree */
cs->radix_fanin=
mca_bcol_basesmuma_param_register_int("radix_fanin",2);
/* order of fanout tree */
cs->radix_fanout=
mca_bcol_basesmuma_param_register_int("radix_fanout",2);
/* order of read tree */
cs->radix_read_tree =
mca_bcol_basesmuma_param_register_int("radix_read_tree",3);
/* order of reduction fanout tree */
cs->order_reduction_tree=
mca_bcol_basesmuma_param_register_int("order_reduction_tree",2);
/* k-nomial radix */
cs->k_nomial_radix=
mca_bcol_basesmuma_param_register_int("k_nomial_radix",3);
/* number of polling loops for non-blocking algorithms */
cs->num_to_probe =
mca_bcol_basesmuma_param_register_int("num_to_probe",10);
/* radix of the k-ary scatter tree */
cs->scatter_kary_radix =
mca_bcol_basesmuma_param_register_int("scatter_kary_radix",4);
/* Portals initialization */
cs->portals_init = false;
cs->portals_info = NULL;
cs->verbose =
mca_bcol_basesmuma_param_register_int("verbose",0);
/* register parmeters controlling message fragementation */
cs->super.min_frag_size=
mca_bcol_basesmuma_param_register_int("min_frag_size",getpagesize());
cs->super.max_frag_size=
mca_bcol_basesmuma_param_register_int("max_frag_size",FRAG_SIZE_NO_LIMIT);
/* by default use pre-registered shared memory segments */
/* RLG NOTE: When we have a systematic way to handle single memory
* copy semantics, we need to update this logic
*/
cs->super.can_use_user_buffers=
mca_bcol_basesmuma_param_register_int("can_use_user_buffers",0);
cs->super.use_pipeline=
mca_bcol_basesmuma_param_register_int("use_pipeline",1);
/*
* initialization
*/

View File

@ -321,16 +321,13 @@ struct mca_bcol_base_component_2_0_0_t {
*/
/** Minimum fragement size */
size_t min_frag_size;
int min_frag_size;
/** Maximum fragment size */
int32_t max_frag_size;
int max_frag_size;
/** Supports direct use of user-buffers */
int can_use_user_buffers;
/** Support pipelining */
int use_pipeline;
bool can_use_user_buffers;
};
typedef struct mca_bcol_base_component_2_0_0_t mca_bcol_base_component_2_0_0_t;
typedef struct mca_bcol_base_component_2_0_0_t mca_bcol_base_component_t;

View File

@ -178,43 +178,33 @@ struct mca_bcol_iboffload_component_t {
/** name of ib memory pool */
char* mpool_name;
/** max outstanding CQE on the CQ */
uint32_t cq_size;
int cq_size;
/** Max size of inline data */
uint32_t max_inline_data;
int max_inline_data;
/** IB partition definition */
uint32_t pkey_val;
/** Outstanding atomic reads */
uint32_t qp_ous_rd_atom;
/** IB MTU */
uint32_t mtu;
int mtu;
/** Recv not ready timer */
uint32_t min_rnr_timer;
int min_rnr_timer;
/** IB timeout */
uint32_t timeout;
int timeout;
/** IB retry count */
uint32_t retry_count;
int retry_count;
/** Recv not ready retry count */
uint32_t rnr_retry;
int rnr_retry;
/** IB maximum pending RDMA */
uint32_t max_rdma_dst_ops;
int max_rdma_dst_ops;
/** IB Service level (QOS) */
uint32_t service_level;
/** number of iboffload modules that we want to open per single lid */
uint32_t bcols_per_lid;
/** Max LMCs that we want to support */
uint32_t max_lmc;
/** Max number of bcols */
uint32_t max_bcols;
/** Use the async event handler */
uint32_t use_async_event_thread;
int service_level;
/** Preferred communication buffer alignment in Bytes (must be power of two) */
uint32_t buffer_alignment;
int buffer_alignment;
/** Max tasks number for MQ */
uint32_t max_mqe_tasks;
int max_mqe_tasks;
/** Max MQ size */
uint32_t max_mq_size;
/** Memory fragment size */
uint32_t frag_size;
int max_mq_size;
/** HCA/Port include exclude list */
char *if_include;
char **if_include_list;
@ -257,10 +247,6 @@ struct mca_bcol_iboffload_component_t {
enum ibv_m_wr_calc_op map_ompi_to_ib_calcs[OMPI_OP_NUM_OF_TYPES];
/** array mapping Open MPI data types to MVerbs data types */
enum ibv_m_wr_data_type map_ompi_to_ib_dt[OMPI_DATATYPE_MPI_MAX_PREDEFINED];
/** The last ib offload calculation will be done by the cpu */
bool last_calc_in_cpu;
/** The last ib offload calculation will be done by the cpu */
bool enable_rdma_calc;
/** The order of the exchange tree */
int exchange_tree_order;
/** Knomial tree order */

View File

@ -98,7 +98,8 @@ mca_bcol_iboffload_component_t mca_bcol_iboffload_component = {
iboffload_open,
iboffload_close,
NULL, /* mca_register_component_params() */
NULL, /* query */
mca_bcol_iboffload_register_params
NULL, /* reserved */
},
@ -110,41 +111,36 @@ mca_bcol_iboffload_component_t mca_bcol_iboffload_component = {
true, /* collective calls with iboffload should to be ordered */
},
/* iboffload-component specifc information */
0, /* verbose */
0, /* number of qps to use */
false, /* warn_default_gid_prefix */
false, /* warn_nonexistent_if */
0, /* free_list_num */
0, /* free_list_max */
0, /* free_list_inc */
NULL, /* mpool_name */
0, /* cq_size */
0, /* max_inline_data */
0, /* pkey_val */
0, /* qp_ous_rd_atom */
0, /* mtu */
0, /* min_rnr_timer */
0, /* timeout */
0, /* retry_count */
0, /* rnr_retry */
0, /* max_rdma_dst_ops */
0, /* service_level */
0, /* bcols_per_lid */
0, /* max_lmc */
0, /* max_bcols */
0, /* use_async_event_thread */
0, /* buffer_alignment */
0, /* max_mqe_tasks */
0, /* max_mq_size */
0, /* frag_size */
NULL, /* if_include */
NULL, /* if_include_list */
NULL, /* if_exclude */
NULL, /* if_exclude_list */
NULL, /* if_list */
NULL, /* ib_devs */
0, /* num_devs */
NULL, /* receive_queues */
.verbose = 0, /* verbose */
.num_qps = 0, /* number of qps to use */
.warn_default_gid_prefix = false, /* warn_default_gid_prefix */
.warn_nonexistent_if = false, /* warn_nonexistent_if */
.free_list_num = 0, /* free_list_num */
.free_list_max = 0, /* free_list_max */
.free_list_inc = 0, /* free_list_inc */
.mpool_name = NULL, /* mpool_name */
.cq_size = 0, /* cq_size */
.max_inline_data = 0, /* max_inline_data */
.pkey_val = 0, /* pkey_val */
.qp_ous_rd_atom = 0, /* qp_ous_rd_atom */
.mtu = 0, /* mtu */
.min_rnr_timer = 0, /* min_rnr_timer */
.timeout = 0, /* timeout */
.retry_count = 0, /* retry_count */
.rnr_retry = 0, /* rnr_retry */
.max_rdma_dst_ops = 0, /* max_rdma_dst_ops */
.service_level = 0, /* service_level */
.buffer_alignment = 0, /* buffer_alignment */
.max_mqe_tasks = 0, /* max_mqe_tasks */
.max_mq_size = 0, /* max_mq_size */
.if_include = NULL, /* if_include */
.if_include_list = NULL, /* if_include_list */
.if_exclude = NULL, /* if_exclude */
.if_exclude_list = NULL, /* if_exclude_list */
.if_list = NULL, /* if_list */
.ib_devs = NULL, /* ib_devs */
.num_devs = 0, /* num_devs */
.receive_queues = NULL, /* receive_queues */
};
static int mca_bcol_iboffload_dummy_init_query(
@ -403,6 +399,8 @@ static int iboffload_open(void)
IBOFFLOAD_VERBOSE(10, ("Open Iboffload component.\n"));
(void) mca_bcol_iboffload_verify_params();
cm->super.priority = 100;
cm->super.n_net_contexts = 0;
cm->super.network_contexts = NULL;
@ -416,9 +414,20 @@ static int iboffload_open(void)
goto close_device;
}
/* load mca parametres */
rc = mca_bcol_iboffload_register_params();
if (OMPI_SUCCESS != rc) {
/* Check MCA parameters */
if (0 == (ival & (ival - 1))) {
mca_bcol_iboffload_component.exchange_tree_order = ival;
} else {
IBOFFLOAD_ERROR(("Warning: ibcol_iboffload_exchange_tree_order is %d which is not a power of 2, setting it to 2", ival));
mca_bcol_iboffload_component.exchange_tree_order = 2;
}
/* Pasha: Since we do not have max inline check like in openib,
I will put some dummy check here. All mlnx devices support at least 512b */
if (mca_bcol_iboffload_component.max_inline_data > 512) {
IBOFFLOAD_ERROR(("Warning the inline %d, is to big and unsupported",
mca_bcol_iboffload_component.max_inline_data));
rc = OMPI_ERROR;
goto close_device;
}

View File

@ -22,6 +22,8 @@
#include "ompi/mca/common/ofacm/base.h"
#include "ompi/communicator/communicator.h"
#include "opal/util/show_help.h"
/*
* Local flags
*/
@ -38,37 +40,41 @@ enum {
REGSTR_MAX = 0x88
};
mca_base_var_enum_value_t mtu_values[] = {
{IBV_MTU_256, "256B"},
{IBV_MTU_512, "512B"},
{IBV_MTU_1024, "1k"},
{IBV_MTU_4096, "4k"},
{0, NULL}
};
/*
* utility routine for string parameter registration
*/
static int reg_string(const char* param_name,
const char* deprecated_param_name,
const char* param_desc,
const char* default_value, char **out_value,
const char* default_value, char **storage,
int flags)
{
int index;
char *value;
index = mca_base_param_reg_string(&mca_bcol_iboffload_component.super.bcol_version,
param_name, param_desc, false, false,
default_value, &value);
*storage = default_value;
index = mca_base_component_var_register(&mca_bcol_iboffload_component.super.bcol_version,
param_name, param_desc, MCA_BASE_VAR_TYPE_STRING,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
if (NULL != deprecated_param_name) {
mca_base_param_reg_syn(index,
&mca_bcol_iboffload_component.super.bcol_version,
deprecated_param_name, true);
(void) mca_base_var_register_synonym(index, "ompi", "bcol", "iboffload", deprecated_param_name,
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
}
mca_base_param_lookup_string(index, &value);
if (0 != (flags & REGSTR_EMPTY_OK) && 0 == strlen(value)) {
if (0 != (flags & REGSTR_EMPTY_OK) && 0 == strlen(*storage)) {
opal_output(0, "Bad parameter value for parameter \"%s\"",
param_name);
return OMPI_ERR_BAD_PARAM;
}
*out_value = value;
return OMPI_SUCCESS;
}
@ -78,43 +84,111 @@ static int reg_string(const char* param_name,
static int reg_int(const char* param_name,
const char* deprecated_param_name,
const char* param_desc,
int default_value, int *out_value, int flags)
int default_value, int *storage, int flags)
{
int index, value;
int index;
index = mca_base_param_reg_int(&mca_bcol_iboffload_component.super.bcol_version,
param_name, param_desc, false, false,
default_value, NULL);
*storage = default_value;
index = mca_base_component_var_register(&mca_bcol_iboffload_component.super.bcol_version,
param_name, param_desc, MCA_BASE_VAR_TYPE_INT,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
if (NULL != deprecated_param_name) {
mca_base_param_reg_syn(index,
&mca_bcol_iboffload_component.super.bcol_version,
deprecated_param_name, true);
(void) mca_base_var_register_synonym(index, "ompi", "bcol", "iboffload", deprecated_param_name,
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
}
mca_base_param_lookup_int(index, &value);
if (0 != (flags & REGINT_NEG_ONE_OK) && -1 == value) {
*out_value = value;
if (0 != (flags & REGINT_NEG_ONE_OK) && -1 == *storage) {
return OMPI_SUCCESS;
}
if ((0 != (flags & REGINT_GE_ZERO) && value < 0) ||
(0 != (flags & REGINT_GE_ONE) && value < 1) ||
(0 != (flags & REGINT_NONZERO) && 0 == value)) {
if ((0 != (flags & REGINT_GE_ZERO) && *storage < 0) ||
(0 != (flags & REGINT_GE_ONE) && *storage < 1) ||
(0 != (flags & REGINT_NONZERO) && 0 == *storage)) {
opal_output(0, "Bad parameter value for parameter \"%s\"",
param_name);
return OMPI_ERR_BAD_PARAM;
}
*out_value = value;
return OMPI_SUCCESS;
}
int mca_bcol_iboffload_verify_params(void)
{
if (mca_bcol_iboffload_component.min_rnr_timer > 31) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_min_rnr_timer > 31",
"bcol_iboffload_ib_min_rnr_timer reset to 31");
mca_bcol_iboffload_component.min_rnr_timer = 31;
} else if (ival < 0){
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_min_rnr_timer < 0",
"bcol_iboffload_ib_min_rnr_timer reset to 0");
mca_bcol_iboffload_component.min_rnr_timer = 0;
}
if (mca_bcol_iboffload_component.timeout > 31) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_timeout > 31",
"bcol_iboffload_ib_timeout reset to 31");
mca_bcol_iboffload_component.timeout = 31;
} else if (ival < 0) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_timeout < 0",
"bcol_iboffload_ib_timeout reset to 0");
mca_bcol_iboffload_component.timeout = 0;
}
if (mca_bcol_iboffload_component.retry_count > 7) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_retry_count > 7",
"bcol_iboffload_ib_retry_count reset to 7");
mca_bcol_iboffload_component.retry_count = 7;
} else if (mca_bcol_iboffload_component.retry_count < 0) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_retry_count < 0",
"bcol_iboffload_ib_retry_count reset to 0");
mca_bcol_iboffload_component.retry_count = 0;
}
if (mca_bcol_iboffload_component.max_rdma_dst_ops > 7) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_rnr_retry > 7",
"bcol_iboffload_ib_rnr_retry reset to 7");
mca_bcol_iboffload_component.max_rdma_dst_ops = 7;
} else if (mca_bcol_iboffload_component.max_rdma_dst_ops < 0) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_rnr_retry < 0",
"bcol_iboffload_ib_rnr_retry reset to 0");
mca_bcol_iboffload_component.max_rdma_dst_ops = 0;
}
if (mca_bcol_iboffload_component.service_level > 15) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_service_level > 15",
"bcol_iboffload_ib_service_level reset to 15");
mca_bcol_iboffload_component.service_level = 15;
} else if (mca_bcol_iboffload_component.service_level < 0) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_service_level < 0",
"bcol_iboffload_ib_service_level reset to 0");
mca_bcol_iboffload_component.service_level = 0;
}
if(mca_bcol_iboffload_component.buffer_alignment <= 1 ||
(mca_bcol_iboffload_component.buffer_alignment & (mca_bcol_iboffload_component.buffer_alignment - 1))) {
opal_show_help("help-mpi-bcol-iboffload.txt", "wrong buffer alignment",
true, ival, ompi_process_info.nodename, 64);
mca_bcol_iboffload_component.buffer_alignment = 64;
}
return OMPI_SUCCESS;
}
int mca_bcol_iboffload_register_params(void)
{
char *msg, *pkey;
int ival, ret = OMPI_SUCCESS, tmp;
char *msg;
int ret = OMPI_SUCCESS, tmp;
#define CHECK(expr) do { \
tmp = (expr); \
@ -123,43 +197,38 @@ int mca_bcol_iboffload_register_params(void)
/* register openib component parameters */
CHECK(reg_int("k_nomial_radix", NULL,
"The radix of the K-nomial tree for scatther-gather type algorithms"
"(starts from 2)", 2, &ival, REGINT_GE_ONE));
mca_bcol_iboffload_component.k_nomial_radix= ival;
"The radix of the K-nomial tree for scatther-gather type algorithms"
"(starts from 2)", 2, &mca_bcol_iboffload_component.k_nomial_radix,
REGINT_GE_ONE));
CHECK(reg_int("priority", NULL,
"IB offload component priority"
"(from 0(low) to 90 (high))", 90, &ival, 0));
mca_bcol_iboffload_component.super.priority = ival;
"(from 0(low) to 90 (high))", 90,
&mca_bcol_iboffload_component.super.priority, 0));
CHECK(reg_int("verbose", NULL,
"Output some verbose IB offload BTL information "
"(0 = no output, nonzero = output)", 0, &ival, 0));
mca_bcol_iboffload_component.verbose = ival;
"(0 = no output, nonzero = output)", 0,
&mca_bcol_iboffload_component.verbose, 0));
CHECK(reg_int("warn_default_gid_prefix", NULL,
"Warn when there is more than one active ports and at least one of them connected to the network with only default GID prefix configured (0 = do not warn; any other value = warn)",
1, &ival, 0));
mca_bcol_iboffload_component.warn_default_gid_prefix = (0 != ival);
CHECK(reg_int("warn_nonexistent_if", NULL,
"Warn if non-existent devices and/or ports are specified in the bcol_iboffla_if_[in|ex]clude MCA parameters (0 = do not warn; any other value = warn)",
1, &ival, 0));
mca_bcol_iboffload_component.warn_nonexistent_if = (0 != ival);
CHECK(reg_bool("warn_default_gid_prefix", NULL,
"Warn when there is more than one active ports and at least one of them connected to the network with only default GID prefix configured (0 = do not warn; any other value = warn)",
true, &mca_bcol_iboffload_component.warn_default_gid_prefix, 0));
CHECK(reg_bool("warn_nonexistent_if", NULL,
"Warn if non-existent devices and/or ports are specified in the bcol_iboffla_if_[in|ex]clude MCA parameters (0 = do not warn; any other value = warn)",
true, &mca_bcol_iboffload_component.warn_nonexistent_if, 0));
CHECK(reg_int("max_pipeline_depth", NULL,
"The maximal number of fragments of the same collective request that can be transferred in parallel", 3, &ival, 0));
mca_bcol_iboffload_component.max_pipeline_depth = ival;
"The maximal number of fragments of the same collective request that can be transferred in parallel", 3,
&mca_bcol_iboffload_component.max_pipeline_depth, 0));
CHECK(reg_int("max_bcols", NULL,
"Maximum number of device ports to use (-1 = use all available, otherwise must be >= 1)",
-1, (int *)&mca_bcol_iboffload_component.max_bcols,
REGINT_NEG_ONE_OK | REGINT_GE_ONE));
CHECK(reg_int("max_mqe_tasks", NULL,
"Maximum number of MQEs for each iboffload module",
1024, (int *)&mca_bcol_iboffload_component.max_mqe_tasks, 0));
1024, &mca_bcol_iboffload_component.max_mqe_tasks, 0));
CHECK(reg_int("max_mq_size", NULL,
"Maximum size of each MQ for each iboffload module",
1024, (int *)&mca_bcol_iboffload_component.max_mq_size, 0));
1024, &mca_bcol_iboffload_component.max_mq_size, 0));
CHECK(reg_int("free_list_num", NULL,
"Intial size of free lists (must be >= 1)",
256, &mca_bcol_iboffload_component.free_list_num,
@ -181,24 +250,16 @@ int mca_bcol_iboffload_register_params(void)
"Size of the OpenFabrics completion "
"queue (will automatically be set to a minimum of "
"(2 * number_of_peers * bcol_iboffload_rd_num))",
1024, &ival, REGINT_GE_ONE));
mca_bcol_iboffload_component.cq_size = (uint32_t) ival;
1024, &mca_bcol_iboffload_component.cq_size, REGINT_GE_ONE));
CHECK(reg_int("exchange_tree_order", NULL,
"The order of the exchange tree. "
"Must be power of two.",
2, &ival, REGINT_GE_ONE));
if (0 == (ival & (ival - 1))) {
mca_bcol_iboffload_component.exchange_tree_order = ival;
} else {
IBOFFLOAD_ERROR(("Warning: ibcol_iboffload_exchange_tree_order is %d which is not a power of 2, setting it to 2", ival));
mca_bcol_iboffload_component.exchange_tree_order = 2;
}
2, &mca_bcol_iboffload_component.exchange_tree_order, REGINT_GE_ONE));
CHECK(reg_int("knomial_tree_order", NULL,
"The order of the knomial exchange tree. ",
3, &ival, REGINT_GE_ONE));
mca_bcol_iboffload_component.knomial_tree_order = ival;
3, &mca_bcol_iboffload_component.knomial_tree_order, REGINT_GE_ONE));
CHECK(reg_int("max_inline_data", "max_inline_data",
@ -207,16 +268,10 @@ int mca_bcol_iboffload_register_params(void)
"otherwise must be >= 0). "
"If not explicitly set, use max_inline_data from "
"the INI file containing device-specific parameters",
128, &ival, REGINT_NEG_ONE_OK | REGINT_GE_ZERO));
mca_bcol_iboffload_component.max_inline_data = (int32_t) ival;
/* Pasha: Since we do not have max inline check like in openib,
I will put some dummy check here. All mlnx devices support at least 512b */
if (mca_bcol_iboffload_component.max_inline_data > 512) {
IBOFFLOAD_ERROR(("Warning the inline %d, is to big and unsupported",
mca_bcol_iboffload_component.max_inline_data));
ret = OMPI_ERROR;
}
128, &mca_bcol_iboffload_component.max_inline_data,
REGINT_NEG_ONE_OK | REGINT_GE_ZERO));
#if 0
CHECK(reg_string("pkey", "ib_pkey_val",
"OpenFabrics partition key (pkey) value. "
"Unsigned integer decimal or hex values are allowed (e.g., \"3\" or \"0x3f\") and will be masked against the maximum allowable IB paritition key value (0x7fff)",
@ -226,6 +281,7 @@ int mca_bcol_iboffload_register_params(void)
ompi_btl_openib_ini_intify(pkey) & MCA_BTL_IB_PKEY_MASK;
free(pkey);
*/
#endif
CHECK(reg_string("receive_queues", NULL,
"Colon-delimited, comma delimited list of receive queues: P,4096,8,6,4:P,32768,8,6,4",
@ -248,209 +304,96 @@ int mca_bcol_iboffload_register_params(void)
/* Don't try to recover from this */
return OMPI_ERR_OUT_OF_RESOURCE;
}
CHECK(reg_int("mtu", "ib_mtu", msg, IBV_MTU_1024, &ival, 0));
CHECK(mca_base_var_enum_create("infiniband mtu", mtu_values, &new_enum));
mca_bcol_iboffload_component.mtu = IBV_MTU_1024;
tmp = mca_base_component_var_register(&mca_bcol_iboffload_component.super.bcol_version,
"mtu", MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_bcol_iboffload_component.mtu);
OBJ_RELEASE(new_enum);
free(msg);
if (ival < IBV_MTU_1024 || ival > IBV_MTU_4096) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "invalid value for bcol_iboffload_ib_mtu",
"bcol_iboffload_ib_mtu reset to 1024");
mca_bcol_iboffload_component.mtu = IBV_MTU_1024;
} else {
mca_bcol_iboffload_component.mtu = (uint32_t) ival;
}
if (0 > tmp) ret = tmp;
tmp = mca_base_var_register_synonym(tmp, "ompi", "bcol", "iboffload", "ib_mtu",
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
if (0 > tmp) ret = tmp;
CHECK(reg_int("ib_min_rnr_timer", NULL, "InfiniBand minimum "
"\"receiver not ready\" timer, in seconds "
"(must be >= 0 and <= 31)",
1 , &ival, 0));
1 , &mca_bcol_iboffload_component.min_rnr_timer, 0));
if (ival > 31) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_min_rnr_timer > 31",
"bcol_iboffload_ib_min_rnr_timer reset to 31");
ival = 31;
} else if (ival < 0){
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_min_rnr_timer < 0",
"bcol_iboffload_ib_min_rnr_timer reset to 0");
ival = 0;
}
mca_bcol_iboffload_component.min_rnr_timer = (uint32_t) ival;
CHECK(reg_int("ib_timeout", NULL, "InfiniBand transmit timeout, plugged into formula: 4.096 microseconds * (2^bcol_iboffload_ib_timeout)"
"(must be >= 0 and <= 31)",
20, &ival, 0));
if (ival > 31) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_timeout > 31",
"bcol_iboffload_ib_timeout reset to 31");
ival = 31;
} else if (ival < 0) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_timeout < 0",
"bcol_iboffload_ib_timeout reset to 0");
ival = 0;
}
mca_bcol_iboffload_component.timeout = (uint32_t) ival;
CHECK(reg_int("ib_timeout", NULL, "InfiniBand transmit timeout, plugged into formula: 4.096 microseconds * "
"(2^bcol_iboffload_ib_timeout) (must be >= 0 and <= 31)",
20, &mca_bcol_iboffload_component.timeout, 0));
CHECK(reg_int("ib_retry_count", NULL, "InfiniBand transmit retry count "
"(must be >= 0 and <= 7)",
7, &ival, 0));
if (ival > 7) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_retry_count > 7",
"bcol_iboffload_ib_retry_count reset to 7");
ival = 7;
} else if (ival < 0) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_retry_count < 0",
"bcol_iboffload_ib_retry_count reset to 0");
ival = 0;
}
mca_bcol_iboffload_component.retry_count = (uint32_t) ival;
7, &mca_bcol_iboffload_component.retry_count, 0));
CHECK(reg_int("ib_rnr_retry", NULL, "InfiniBand \"receiver not ready\" "
"retry count; applies *only* to SRQ/XRC queues. PP queues "
"use RNR retry values of 0 because Open MPI performs "
"software flow control to guarantee that RNRs never occur "
"(must be >= 0 and <= 7; 7 = \"infinite\")",
7, &ival, 0));
if (ival > 7) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_rnr_retry > 7",
"bcol_iboffload_ib_rnr_retry reset to 7");
ival = 7;
} else if (ival < 0) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_rnr_retry < 0",
"bcol_iboffload_ib_rnr_retry reset to 0");
ival = 0;
}
mca_bcol_iboffload_component.rnr_retry = (uint32_t) ival;
7, &mca_bcol_iboffload_component.rnr_retry, 0));
CHECK(reg_int("ib_max_rdma_dst_ops", NULL, "InfiniBand maximum pending RDMA "
"destination operations "
"(must be >= 0)",
4, &ival, REGINT_GE_ZERO));
mca_bcol_iboffload_component.max_rdma_dst_ops = (uint32_t) ival;
4, &mca_bcol_iboffload_component.max_rdma_dst_ops, REGINT_GE_ZERO));
CHECK(reg_int("ib_service_level", NULL, "InfiniBand service level "
"(must be >= 0 and <= 15)",
0, &ival, 0));
if (ival > 15) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_service_level > 15",
"bcol_iboffload_ib_service_level reset to 15");
ival = 15;
} else if (ival < 0) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_service_level < 0",
"bcol_iboffload_ib_service_level reset to 0");
ival = 0;
}
mca_bcol_iboffload_component.service_level = (uint32_t) ival;
CHECK(reg_int("btls_per_lid", NULL, "Number of BTLs to create for each "
"InfiniBand LID "
"(must be >= 1)",
1, &ival, REGINT_GE_ONE));
mca_bcol_iboffload_component.bcols_per_lid = (uint32_t) ival;
CHECK(reg_int("max_lmc", NULL, "Maximum number of LIDs to use for each device port "
"(must be >= 0, where 0 = use all available)",
0, &ival, REGINT_GE_ZERO));
mca_bcol_iboffload_component.max_lmc = (uint32_t) ival;
#if OPAL_HAVE_THREADS
CHECK(reg_int("use_async_event_thread", NULL,
"If nonzero, use the thread that will handle InfiniBand asyncihronous events ",
1, &ival, 0));
mca_bcol_iboffload_component.use_async_event_thread = (0 != ival);
#endif
0, &mca_bcol_iboffload_component.service_level, 0));
CHECK(reg_int("buffer_alignment", NULL,
"Prefered communication buffer alignment, in bytes "
"(must be > 0 and power of two)",
64, &ival, REGINT_GE_ZERO));
if(ival <= 1 || (ival & (ival - 1))) {
opal_show_help("help-mpi-bcol-iboffload.txt", "wrong buffer alignment",
true, ival, ompi_process_info.nodename, 64);
mca_bcol_iboffload_component.buffer_alignment = 64;
} else {
mca_bcol_iboffload_component.buffer_alignment = (uint32_t) ival;
}
CHECK(reg_int("last_calc_in_cpu", NULL,
"If set, the last ib offload calculation will "
"be done in the cpu (default: yes)",
1, &ival, REGINT_GE_ZERO));
mca_bcol_iboffload_component.last_calc_in_cpu = (0 != ival);
CHECK(reg_int("enable_rdma_calc", NULL,
"Enable RDMA Calc"
"(default: yes)",
1, &ival, REGINT_GE_ZERO));
mca_bcol_iboffload_component.enable_rdma_calc = (0 != ival);
64, &mca_bcol_iboffload_component.buffer_alignment, REGINT_GE_ZERO));
/* register parmeters controlling message fragementation */
CHECK(reg_int("min_frag_size", NULL,
"Minimum fragment size",
getpagesize(), &ival, REGINT_GE_ONE));
mca_bcol_iboffload_component.super.min_frag_size = ival;
"Minimum fragment size",
getpagesize(), &mca_bcol_iboffload_component.super.min_frag_size,
REGINT_GE_ONE));
CHECK(reg_int("max_frag_size", NULL,
"Maximum fragment size",
FRAG_SIZE_NO_LIMIT, &ival, REGINT_NONZERO));
mca_bcol_iboffload_component.super.max_frag_size = ival;
"Maximum fragment size",
FRAG_SIZE_NO_LIMIT, &mca_bcol_iboffload_component.super.max_frag_size,
REGINT_NONZERO));
CHECK(reg_int("can_use_user_buffers", NULL,
"User memory can be used by the collective algorithms",
1, &ival, REGINT_GE_ZERO));
mca_bcol_iboffload_component.super.can_use_user_buffers = ival;
CHECK(reg_int("use_pipeline", NULL,
"Pipeline the algorithm",
1, &ival, REGINT_GE_ZERO));
mca_bcol_iboffload_component.super.use_pipeline = ival;
CHECK(reg_bool("can_use_user_buffers", NULL,
"User memory can be used by the collective algorithms",
true, &mca_bcol_iboffload_component.super.can_use_user_buffers));
CHECK(reg_int("barrier_mode", NULL,
"Barrier mode: 0 - Recursive doubling; 1 - Recursive K-ing",
0, &ival, REGINT_GE_ZERO));
mca_bcol_iboffload_component.barrier_mode = ival;
0, &mca_bcol_iboffload_component.barrier_mode, REGINT_GE_ZERO));
CHECK(reg_int("max_progress_pull", NULL,
"Max number of progress pull checks",
8, &ival, REGINT_GE_ZERO));
mca_bcol_iboffload_component.max_progress_pull = ival;
8, &mca_bcol_iboffload_component.max_progress_pull, REGINT_GE_ZERO));
CHECK(reg_int("use_brucks_smsg_alltoall_rdma", NULL,
"Use brucks algorithm for smsg alltoall and RDMA semantics 1 = No Temp buffer recycling"
"1 = Alg with no Temp Buffer Recycling (faster), 2 = Alg with temp Buffer Recycling (slower)",
0, &ival, 0));
mca_bcol_iboffload_component.use_brucks_smsg_alltoall_rdma = ival;
0, &mca_bcol_iboffload_component.use_brucks_smsg_alltoall_rdma, 0));
CHECK(reg_int("use_brucks_smsg_alltoall_sr", NULL,
"Use brucks algorithm for smsg alltoall and Send/Recv semantics "
"1 = Alg with RTR (faster), 2 = Alg with RNR (slower)",
0, &ival, 0));
mca_bcol_iboffload_component.use_brucks_smsg_alltoall_sr = ival;
0, &mca_bcol_iboffload_component.use_brucks_smsg_alltoall_sr, 0));
CHECK(reg_int("alltoall_bruck_radix", NULL,
"Radix for Bruck algorithm for smsg alltoall",
3, &ival, 0));
mca_bcol_iboffload_component.k_alltoall_bruck_radix = ival;
3, &mca_bcol_iboffload_component.k_alltoall_bruck_radix, 0));
CHECK(reg_int("k_alltoall_bruck_radix", NULL,
"Temp Buffer alignment for Bruck algorithm for smsg alltoall",
64, &ival, 0));
mca_bcol_iboffload_component.tmp_buf_alignment = ival;
64, &mca_bcol_iboffload_component.tmp_buf_alignment, 0));
/*
CHECK(reg_string("if_include", NULL,
@ -464,6 +407,8 @@ int mca_bcol_iboffload_register_params(void)
0));
*/
CHECK(mca_bcol_iboffload_verify_params());
/* Register any MCA params for the connect pseudo-components */
if (OMPI_SUCCESS == ret) {
ret = ompi_common_ofacm_base_register(&mca_bcol_iboffload_component.super.bcol_version);

View File

@ -15,5 +15,6 @@
#include "ompi_config.h"
int mca_bcol_iboffload_register_params(void);
int mca_bcol_iboffload_verify_params(void);
#endif

View File

@ -768,7 +768,7 @@ static inline struct ibv_cq *ibv_create_cq_compat(struct ibv_context *context,
int mca_bcol_iboffload_adjust_cq(mca_bcol_iboffload_device_t *device,
struct ibv_cq **ib_cq)
{
uint32_t cq_size = mca_bcol_iboffload_component.cq_size;
uint32_t cq_size = (uint32_t) mca_bcol_iboffload_component.cq_size;
if (NULL == *ib_cq) {
*ib_cq = ibv_create_cq_compat(device->dev.ib_dev_context, cq_size,
@ -1070,8 +1070,8 @@ mca_bcol_iboffload_comm_query(mca_sbgp_base_module_t *sbgp, int *num_modules)
}
memset(&mqe_attr, 0, sizeof(mqe_attr));
mqe_attr.max_mqe_tasks = mca_bcol_iboffload_component.max_mqe_tasks;
mqe_attr.max_mq_size = mca_bcol_iboffload_component.max_mq_size;
mqe_attr.max_mqe_tasks = (uint32_t)mca_bcol_iboffload_component.max_mqe_tasks;
mqe_attr.max_mq_size = (uint32_t)mca_bcol_iboffload_component.max_mq_size;
mqe_attr.cq = iboffload_module->device->ib_mq_cq;
/* ALL MQs have the same configuration */

View File

@ -232,7 +232,7 @@ static void mca_bcol_iboffload_fillin_qp_attr(int qp_index,
Todo: copy max_inline_size() from ofacm to
common area.
*/
init_attr->cap.max_inline_data = cm->max_inline_data;
init_attr->cap.max_inline_data = (int32_t) cm->max_inline_data;
/* We allocate SG list for some algorithms (Bruck's alltoall) */
max_sge = ep->iboffload_module->group_size / 2 +
@ -248,8 +248,8 @@ static void mca_bcol_iboffload_fillin_qp_attr(int qp_index,
init_attr->cap.max_recv_sge = max_sge;
/* Vasily: the value will be changed later */
/* TODO Pasha: this is real crap */
init_attr->cap.max_recv_wr = cm->cq_size;
init_attr->cap.max_send_wr = cm->cq_size;
init_attr->cap.max_recv_wr = (uint32_t) cm->cq_size;
init_attr->cap.max_send_wr = (uint32_t) cm->cq_size;
/* Set attributes */
@ -257,13 +257,13 @@ static void mca_bcol_iboffload_fillin_qp_attr(int qp_index,
attr->port_num = ep->iboffload_module->port;
/* Vasily: the value will be changed later */
attr->path_mtu = cm->mtu;
attr->path_mtu = (uint32_t)cm->mtu;
attr->max_dest_rd_atomic = cm->max_rdma_dst_ops;
attr->min_rnr_timer = cm->min_rnr_timer;
attr->min_rnr_timer = (uint32_t)cm->min_rnr_timer;
attr->ah_attr.is_global = 0;
attr->ah_attr.sl = cm->service_level;
attr->ah_attr.sl = (uint32_t)cm->service_level;
/* Vasily: from struct mca_bcol_iboffload_port_t ????? */
/*
attr->ah_attr.src_path_bits = iboffload_module->src_path_bits;
@ -272,10 +272,10 @@ static void mca_bcol_iboffload_fillin_qp_attr(int qp_index,
/* JMS to be filled in later dynamically */
attr->ah_attr.static_rate = 0;
/* RTS params */
attr->timeout = cm->timeout;
attr->retry_cnt = cm->retry_count;
attr->rnr_retry = cm->rnr_retry;
attr->max_rd_atomic = cm->max_rdma_dst_ops;
attr->timeout = (uint32_t)cm->timeout;
attr->retry_cnt = (uint32_t)cm->retry_count;
attr->rnr_retry = (uint32_t)cm->rnr_retry;
attr->max_rd_atomic = (uint32_t)cm->max_rdma_dst_ops;
/* Init for local mca_bcol_iboffload_endpoint_qp_t qps structure
* that caches the qp information on endpoint */

View File

@ -41,27 +41,28 @@ enum {
static int reg_string(const char* param_name,
const char* deprecated_param_name,
const char* param_desc,
const char* default_value, char **out_value,
const char* default_value, char **storage,
int flags)
{
int index;
char *value;
index = mca_base_param_reg_string(&mca_bcol_ptpcoll_component.super.bcol_version,
param_name, param_desc, false, false,
default_value, &value);
if (NULL != deprecated_param_name) {
mca_base_param_reg_syn(index,
&mca_bcol_ptpcoll_component.super.bcol_version,
deprecated_param_name, true);
}
mca_base_param_lookup_string(index, &value);
if (0 != (flags & REGSTR_EMPTY_OK) && 0 == strlen(value)) {
*storage = default_value;
index = mca_base_component_var_register(&mca_bcol_ptpcoll_component.super.bcol_version,
param_name, param_desc, MCA_BASE_VAR_TYPE_STRING,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
if (NULL != deprecated_param_name) {
(void) mca_base_var_register_synonym(index, "ompi", "bcol", "ptpcoll",
deprecated_param_name,
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
}
if (0 != (flags & REGSTR_EMPTY_OK) && (NULL == *storage || 0 == strlen(*storage))) {
opal_output(0, "Bad parameter value for parameter \"%s\"",
param_name);
return OMPI_ERR_BAD_PARAM;