1
1

MCA/base: Add new MCA variable system

Features:
 - Support for an override parameter file (openmpi-mca-param-override.conf).
   Variable values in this file can not be overridden by any file or environment
   value.
 - Support for boolean, unsigned, and unsigned long long variables.
 - Support for true/false values.
 - Support for enumerations on integer variables.
 - Support for MPIT scope, verbosity, and binding.
 - Support for command line source.
 - Support for setting variable source via the environment using
   OMPI_MCA_SOURCE_<var name>=source (either command or file:filename)
 - Cleaner API.
 - Support for variable groups (equivalent to MPIT categories).

Notes:
 - Variables must be created with a backing store (char **, int *, or bool *)
   that must live at least as long as the variable.
 - Creating a variable with the MCA_BASE_VAR_FLAG_SETTABLE enables the use of
   mca_base_var_set_value() to change the value.
 - String values are duplicated when the variable is registered. It is up to
   the caller to free the original value if necessary. The new value will be
   freed by the mca_base_var system and must not be freed by the user.
 - Variables with constant scope may not be settable.
 - Variable groups (and all associated variables) are deregistered when the
   component is closed or the component repository item is freed. This
   prevents a segmentation fault from accessing a variable after its component
   is unloaded.
 - After some discussion we decided we should remove the automatic registration
   of component priority variables. Few component actually made use of this
   feature.
 - The enumerator interface was updated to be general enough to handle
   future uses of the interface.
 - The code to generate ompi_info output has been moved into the MCA variable
   system. See mca_base_var_dump().

opal: update core and components to mca_base_var system
orte: update core and components to mca_base_var system
ompi: update core and components to mca_base_var system

This commit also modifies the rmaps framework. The following variables were
moved from ppr and lama: rmaps_base_pernode, rmaps_base_n_pernode,
rmaps_base_n_persocket. Both lama and ppr create synonyms for these variables.

This commit was SVN r28236.
Этот коммит содержится в:
Nathan Hjelm 2013-03-27 21:09:41 +00:00
родитель 9d1041b058
Коммит cf377db823
443 изменённых файлов: 13669 добавлений и 9918 удалений

Просмотреть файл

@ -61,7 +61,7 @@
#
# Basic behavior to smooth startup
mca_component_show_load_errors = 0
mca_base_component_show_load_errors = 0
orte_abort_timeout = 10
opal_set_max_sys_limits = 1
orte_report_launch_progress = 1

Просмотреть файл

@ -128,6 +128,8 @@ OMPI_DECLSPEC volatile int MPIR_being_debugged = 0;
OMPI_DECLSPEC volatile int MPIR_debug_state = 0;
OMPI_DECLSPEC char *MPIR_debug_abort_string = "";
static char *ompi_debugger_dll_path = NULL;
/* Check for a file in few direct ways for portability */
static void check(char *dir, char *file, char **locations)
{
@ -164,18 +166,19 @@ extern void
ompi_debugger_setup_dlls(void)
{
int i;
char *a, *b, **dirs, **tmp1 = NULL, **tmp2 = NULL;
char **dirs, **tmp1 = NULL, **tmp2 = NULL;
a = strdup(opal_install_dirs.pkglibdir);
mca_base_param_reg_string_name("ompi",
"debugger_dll_path",
"List of directories where MPI_INIT should search for debugger plugins",
false, false, a, &b);
free(a);
ompi_debugger_dll_path = opal_install_dirs.pkglibdir;
(void) mca_base_var_register("ompi", "ompi", "debugger", "dll_path",
"List of directories where MPI_INIT should search for debugger plugins",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_debugger_dll_path);
/* Search the directory for MPI debugger DLLs */
if (NULL != b) {
dirs = opal_argv_split(b, ':');
if (NULL != ompi_debugger_dll_path) {
dirs = opal_argv_split(ompi_debugger_dll_path, ':');
for (i = 0; dirs[i] != NULL; ++i) {
check(dirs[i], OMPI_MPIHANDLES_DLL_PREFIX, tmp1);
check(dirs[i], OMPI_MSGQ_DLL_PREFIX, tmp2);

Просмотреть файл

@ -20,7 +20,6 @@
#include "ompi_config.h"
#include "ompi/mca/allocator/allocator.h"
#include "ompi/constants.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/mca/allocator/bucket/allocator_bucket_alloc.h"
#include "ompi/mca/mpool/mpool.h"
@ -80,9 +79,16 @@ struct mca_allocator_base_module_t* mca_allocator_bucket_module_init(
return((mca_allocator_base_module_t *) allocator);
}
static int mca_allocator_bucket_module_register(void) {
mca_allocator_num_buckets = 30;
(void) mca_base_component_var_register(&mca_allocator_bucket_component.allocator_version,
"num_buckets", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_LOCAL, &mca_allocator_num_buckets);
return(OMPI_SUCCESS);
}
int mca_allocator_bucket_module_open(void) {
(void) mca_base_param_reg_int_name ("allocator", "bucket_num_buckets", NULL, false, false,
30, &mca_allocator_num_buckets);
return(OMPI_SUCCESS);
}
@ -116,7 +122,9 @@ mca_allocator_base_component_t mca_allocator_bucket_component = {
OMPI_MINOR_VERSION,
OMPI_RELEASE_VERSION,
mca_allocator_bucket_module_open, /* module open */
mca_allocator_bucket_module_close /* module close */
mca_allocator_bucket_module_close, /* module close */
NULL,
mca_allocator_bucket_module_register
},
{
/* The component is checkpoint ready */

Просмотреть файл

@ -40,6 +40,8 @@
int mca_bcol_base_output = 0;
opal_list_t mca_bcol_base_components_opened;
static int mca_bcol_base_verbose = 0;
OMPI_DECLSPEC opal_list_t mca_bcol_base_components_in_use;
OMPI_DECLSPEC char *ompi_bcol_bcols_string;
OMPI_DECLSPEC int bcol_mpool_compatibility[BCOL_SIZE][BCOL_SIZE];
@ -200,27 +202,46 @@ static int mca_bcol_base_set_components_to_use(opal_list_t *bcol_components_avai
return OMPI_SUCCESS;
}
static int mca_bcol_base_register(int flags)
{
/* Debugging/Verbose output */
(void) mca_base_var_register("ompi", "bcol", "base", "verbose",
"Verbosity level of BCOL framework",
MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_bcol_base_verbose);
/* figure out which bcol and sbgp components will actually be used */
/* get list of sub-grouping functions to use */
ompi_bcol_bcols_string = "basesmuma,basesmuma,iboffload,ptpcoll,ugni";
(void) mca_base_var_register("ompi", "bcol", "base", "string",
"Default set of basic collective components to use",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_bcol_bcols_string);
return OMPI_SUCCESS;
}
/**
* Function for finding and opening either all MCA components, or the one
* that was specifically requested via a MCA parameter.
*/
int mca_bcol_base_open(void)
{
int value, ret;
int ret;
/*_bcol_base_components_available
* Register some MCA parameters
*/
/* Debugging/Verbose output */
mca_base_param_reg_int_name("bcol",
"base_verbose",
"Verbosity level of BCOL framework",
false, false,
0, &value);
(void) mca_bcol_base_register(0);
/* get fraemwork id */
/* get framework id */
mca_bcol_base_output = opal_output_open(NULL);
opal_output_set_verbosity(mca_bcol_base_output, value);
opal_output_set_verbosity(mca_bcol_base_output, mca_bcol_base_verbose);
/* Open up all available components */
if (OMPI_SUCCESS !=
@ -230,12 +251,6 @@ int mca_bcol_base_open(void)
return OMPI_ERROR;
}
/* figure out which bcol and sbgp components will actually be used */
/* get list of sub-grouping functions to use */
mca_base_param_reg_string_name("bcol","base_string",
"Default set of basic collective components to use ",
false, false, "basesmuma,basesmuma,iboffload,ptpcoll,ugni", &ompi_bcol_bcols_string);
ret = mca_bcol_base_set_components_to_use(&mca_bcol_base_components_opened,
&mca_bcol_base_components_in_use);

Просмотреть файл

@ -105,14 +105,8 @@ struct mca_bcol_basesmuma_component_t {
/* management data for collectives with no user data */
/** MCA parameter: control region size (bytes), per proc */
size_t basesmuma_ctl_size_per_proc;
/** MCA parameter: control region alignment */
size_t basesmuma_ctl_alignment;
/** MCA parameter: number of memory banks */
size_t basesmuma_num_mem_banks;
int basesmuma_num_mem_banks;
/** MCA parameter: number of regions per memory bank */
int basesmuma_num_regions_per_bank;

Просмотреть файл

@ -33,6 +33,7 @@ const char *mca_bcol_basesmuma_component_version_string =
* Local functions
*/
static int basesmuma_register(void);
static int basesmuma_open(void);
static int basesmuma_close(void);
static int mca_bcol_basesmuma_deregister_ctl_sm(
@ -40,13 +41,23 @@ static int mca_bcol_basesmuma_deregister_ctl_sm(
static inline int mca_bcol_basesmuma_param_register_int(
const char* param_name, int default_value)
const char* param_name, int default_value, int *storage)
{
int param_value;
*storage = default_value;
return mca_base_component_var_register(&mca_bcol_basesmuma_component.super.bcol_version, param_name,
NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
}
(void) mca_base_param_reg_int (&mca_bcol_basesmuma_component.super.bcol_version, param_name,
NULL, false, false, default_value, &param_value);
return param_value;
static inline int mca_bcol_basesmuma_param_register_bool(
const char* param_name, bool default_value, bool *storage)
{
*storage = default_value;
return mca_base_component_var_register(&mca_bcol_basesmuma_component.super.bcol_version, param_name,
NULL, MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
}
/*
@ -76,6 +87,8 @@ mca_bcol_basesmuma_component_t mca_bcol_basesmuma_component = {
basesmuma_open,
basesmuma_close,
NULL,
basesmuma_register
},
/* Initialization / querying functions */
@ -90,6 +103,76 @@ mca_bcol_basesmuma_component_t mca_bcol_basesmuma_component = {
},
};
/*
* Register the component
*/
static int basesmuma_register(void)
{
mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
/* set component priority */
mca_bcol_basesmuma_param_register_int("priority", 90, &cs->super.priority);
/* Number of memory banks */
mca_bcol_basesmuma_param_register_int("basesmuma_num_ctl_banks", 2,
&cs->basesmuma_num_mem_banks);
/* Number of regions per memory bank */
mca_bcol_basesmuma_param_register_int("basesmuma_num_buffs_per_bank", 16,
&cs->basesmuma_num_regions_per_bank);
/* number of polling loops to allow pending resources to
* complete their work
*/
mca_bcol_basesmuma_param_register_int("n_poll_loops", 4, &cs->n_poll_loops);
/* Number of groups supported */
mca_bcol_basesmuma_param_register_int("n_groups_supported", 100,
&cs->n_groups_supported);
/* order of fanin tree */
mca_bcol_basesmuma_param_register_int("radix_fanin", 2, &cs->radix_fanin);
/* order of fanout tree */
mca_bcol_basesmuma_param_register_int("radix_fanout", 2, &cs->radix_fanout);
/* order of read tree */
mca_bcol_basesmuma_param_register_int("radix_read_tree", 3,
&cs->radix_read_tree);
/* order of reduction fanout tree */
mca_bcol_basesmuma_param_register_int("order_reduction_tree", 2,
&cs->order_reduction_tree);
/* k-nomial radix */
mca_bcol_basesmuma_param_register_int("k_nomial_radix", 3, &cs->k_nomial_radix);
/* number of polling loops for non-blocking algorithms */
mca_bcol_basesmuma_param_register_int("num_to_probe", 10, &cs->num_to_probe);
/* radix of the k-ary scatter tree */
mca_bcol_basesmuma_param_register_int("scatter_kary_radix", 4,
&cs->scatter_kary_radix);
/* register parmeters controlling message fragementation */
mca_bcol_basesmuma_param_register_int("min_frag_size", getpagesize(),
&cs->super.min_frag_size);
mca_bcol_basesmuma_param_register_int("max_frag_size", FRAG_SIZE_NO_LIMIT,
&cs->super.max_frag_size);
/* by default use pre-registered shared memory segments */
/* RLG NOTE: When we have a systematic way to handle single memory
* copy semantics, we need to update this logic
*/
mca_bcol_basesmuma_param_register_bool("can_use_user_buffers", false,
&cs->super.can_use_user_buffers);
mca_bcol_basesmuma_param_register_int("verbose", 0, &cs->verbose);
return OMPI_SUCCESS;
}
/*
* Open the component
*/
@ -102,37 +185,6 @@ static int basesmuma_open(void)
opal_mutex_t *mutex_ptr;
int dummy;
/* set component priority */
cs->super.priority=
mca_bcol_basesmuma_param_register_int("priority",90);
/* set control region size (bytes), per proc */
cs->basesmuma_ctl_size_per_proc=
mca_bcol_basesmuma_param_register_int("basesmuma_ctl_size_per_proc",
CACHE_LINE_SIZE);
/* set control region alignment (bytes) */
cs->basesmuma_ctl_alignment=
mca_bcol_basesmuma_param_register_int("basesmuma_ctl_alignment",
getpagesize());
/* Number of memory banks */
cs->basesmuma_num_mem_banks=
mca_bcol_basesmuma_param_register_int("basesmuma_num_ctl_banks",
2);
/* Number of regions per memory bank */
cs->basesmuma_num_regions_per_bank=
mca_bcol_basesmuma_param_register_int("basesmuma_num_buffs_per_bank",
16);
/* number of polling loops to allow pending resources to
* complete their work
*/
cs->n_poll_loops=
mca_bcol_basesmuma_param_register_int("n_poll_loops",4);
/*
* Make sure that the number of banks is a power of 2
*/
@ -153,59 +205,10 @@ static int basesmuma_open(void)
goto ERROR;
}
/* Number of groups supported */
cs->n_groups_supported=
mca_bcol_basesmuma_param_register_int("n_groups_supported",100);
/* order of fanin tree */
cs->radix_fanin=
mca_bcol_basesmuma_param_register_int("radix_fanin",2);
/* order of fanout tree */
cs->radix_fanout=
mca_bcol_basesmuma_param_register_int("radix_fanout",2);
/* order of read tree */
cs->radix_read_tree =
mca_bcol_basesmuma_param_register_int("radix_read_tree",3);
/* order of reduction fanout tree */
cs->order_reduction_tree=
mca_bcol_basesmuma_param_register_int("order_reduction_tree",2);
/* k-nomial radix */
cs->k_nomial_radix=
mca_bcol_basesmuma_param_register_int("k_nomial_radix",3);
/* number of polling loops for non-blocking algorithms */
cs->num_to_probe =
mca_bcol_basesmuma_param_register_int("num_to_probe",10);
/* radix of the k-ary scatter tree */
cs->scatter_kary_radix =
mca_bcol_basesmuma_param_register_int("scatter_kary_radix",4);
/* Portals initialization */
cs->portals_init = false;
cs->portals_info = NULL;
cs->verbose =
mca_bcol_basesmuma_param_register_int("verbose",0);
/* register parmeters controlling message fragementation */
cs->super.min_frag_size=
mca_bcol_basesmuma_param_register_int("min_frag_size",getpagesize());
cs->super.max_frag_size=
mca_bcol_basesmuma_param_register_int("max_frag_size",FRAG_SIZE_NO_LIMIT);
/* by default use pre-registered shared memory segments */
/* RLG NOTE: When we have a systematic way to handle single memory
* copy semantics, we need to update this logic
*/
cs->super.can_use_user_buffers=
mca_bcol_basesmuma_param_register_int("can_use_user_buffers",0);
cs->super.use_pipeline=
mca_bcol_basesmuma_param_register_int("use_pipeline",1);
/*
* initialization
*/

Просмотреть файл

@ -321,16 +321,13 @@ struct mca_bcol_base_component_2_0_0_t {
*/
/** Minimum fragement size */
size_t min_frag_size;
int min_frag_size;
/** Maximum fragment size */
int32_t max_frag_size;
int max_frag_size;
/** Supports direct use of user-buffers */
int can_use_user_buffers;
/** Support pipelining */
int use_pipeline;
bool can_use_user_buffers;
};
typedef struct mca_bcol_base_component_2_0_0_t mca_bcol_base_component_2_0_0_t;
typedef struct mca_bcol_base_component_2_0_0_t mca_bcol_base_component_t;

Просмотреть файл

@ -178,43 +178,33 @@ struct mca_bcol_iboffload_component_t {
/** name of ib memory pool */
char* mpool_name;
/** max outstanding CQE on the CQ */
uint32_t cq_size;
int cq_size;
/** Max size of inline data */
uint32_t max_inline_data;
int max_inline_data;
/** IB partition definition */
uint32_t pkey_val;
/** Outstanding atomic reads */
uint32_t qp_ous_rd_atom;
/** IB MTU */
uint32_t mtu;
int mtu;
/** Recv not ready timer */
uint32_t min_rnr_timer;
int min_rnr_timer;
/** IB timeout */
uint32_t timeout;
int timeout;
/** IB retry count */
uint32_t retry_count;
int retry_count;
/** Recv not ready retry count */
uint32_t rnr_retry;
int rnr_retry;
/** IB maximum pending RDMA */
uint32_t max_rdma_dst_ops;
int max_rdma_dst_ops;
/** IB Service level (QOS) */
uint32_t service_level;
/** number of iboffload modules that we want to open per single lid */
uint32_t bcols_per_lid;
/** Max LMCs that we want to support */
uint32_t max_lmc;
/** Max number of bcols */
uint32_t max_bcols;
/** Use the async event handler */
uint32_t use_async_event_thread;
int service_level;
/** Preferred communication buffer alignment in Bytes (must be power of two) */
uint32_t buffer_alignment;
int buffer_alignment;
/** Max tasks number for MQ */
uint32_t max_mqe_tasks;
int max_mqe_tasks;
/** Max MQ size */
uint32_t max_mq_size;
/** Memory fragment size */
uint32_t frag_size;
int max_mq_size;
/** HCA/Port include exclude list */
char *if_include;
char **if_include_list;
@ -257,10 +247,6 @@ struct mca_bcol_iboffload_component_t {
enum ibv_m_wr_calc_op map_ompi_to_ib_calcs[OMPI_OP_NUM_OF_TYPES];
/** array mapping Open MPI data types to MVerbs data types */
enum ibv_m_wr_data_type map_ompi_to_ib_dt[OMPI_DATATYPE_MPI_MAX_PREDEFINED];
/** The last ib offload calculation will be done by the cpu */
bool last_calc_in_cpu;
/** The last ib offload calculation will be done by the cpu */
bool enable_rdma_calc;
/** The order of the exchange tree */
int exchange_tree_order;
/** Knomial tree order */

Просмотреть файл

@ -98,7 +98,8 @@ mca_bcol_iboffload_component_t mca_bcol_iboffload_component = {
iboffload_open,
iboffload_close,
NULL, /* mca_register_component_params() */
NULL, /* query */
mca_bcol_iboffload_register_params
NULL, /* reserved */
},
@ -110,41 +111,36 @@ mca_bcol_iboffload_component_t mca_bcol_iboffload_component = {
true, /* collective calls with iboffload should to be ordered */
},
/* iboffload-component specifc information */
0, /* verbose */
0, /* number of qps to use */
false, /* warn_default_gid_prefix */
false, /* warn_nonexistent_if */
0, /* free_list_num */
0, /* free_list_max */
0, /* free_list_inc */
NULL, /* mpool_name */
0, /* cq_size */
0, /* max_inline_data */
0, /* pkey_val */
0, /* qp_ous_rd_atom */
0, /* mtu */
0, /* min_rnr_timer */
0, /* timeout */
0, /* retry_count */
0, /* rnr_retry */
0, /* max_rdma_dst_ops */
0, /* service_level */
0, /* bcols_per_lid */
0, /* max_lmc */
0, /* max_bcols */
0, /* use_async_event_thread */
0, /* buffer_alignment */
0, /* max_mqe_tasks */
0, /* max_mq_size */
0, /* frag_size */
NULL, /* if_include */
NULL, /* if_include_list */
NULL, /* if_exclude */
NULL, /* if_exclude_list */
NULL, /* if_list */
NULL, /* ib_devs */
0, /* num_devs */
NULL, /* receive_queues */
.verbose = 0, /* verbose */
.num_qps = 0, /* number of qps to use */
.warn_default_gid_prefix = false, /* warn_default_gid_prefix */
.warn_nonexistent_if = false, /* warn_nonexistent_if */
.free_list_num = 0, /* free_list_num */
.free_list_max = 0, /* free_list_max */
.free_list_inc = 0, /* free_list_inc */
.mpool_name = NULL, /* mpool_name */
.cq_size = 0, /* cq_size */
.max_inline_data = 0, /* max_inline_data */
.pkey_val = 0, /* pkey_val */
.qp_ous_rd_atom = 0, /* qp_ous_rd_atom */
.mtu = 0, /* mtu */
.min_rnr_timer = 0, /* min_rnr_timer */
.timeout = 0, /* timeout */
.retry_count = 0, /* retry_count */
.rnr_retry = 0, /* rnr_retry */
.max_rdma_dst_ops = 0, /* max_rdma_dst_ops */
.service_level = 0, /* service_level */
.buffer_alignment = 0, /* buffer_alignment */
.max_mqe_tasks = 0, /* max_mqe_tasks */
.max_mq_size = 0, /* max_mq_size */
.if_include = NULL, /* if_include */
.if_include_list = NULL, /* if_include_list */
.if_exclude = NULL, /* if_exclude */
.if_exclude_list = NULL, /* if_exclude_list */
.if_list = NULL, /* if_list */
.ib_devs = NULL, /* ib_devs */
.num_devs = 0, /* num_devs */
.receive_queues = NULL, /* receive_queues */
};
static int mca_bcol_iboffload_dummy_init_query(
@ -403,6 +399,8 @@ static int iboffload_open(void)
IBOFFLOAD_VERBOSE(10, ("Open Iboffload component.\n"));
(void) mca_bcol_iboffload_verify_params();
cm->super.priority = 100;
cm->super.n_net_contexts = 0;
cm->super.network_contexts = NULL;
@ -416,9 +414,20 @@ static int iboffload_open(void)
goto close_device;
}
/* load mca parametres */
rc = mca_bcol_iboffload_register_params();
if (OMPI_SUCCESS != rc) {
/* Check MCA parameters */
if (0 == (ival & (ival - 1))) {
mca_bcol_iboffload_component.exchange_tree_order = ival;
} else {
IBOFFLOAD_ERROR(("Warning: ibcol_iboffload_exchange_tree_order is %d which is not a power of 2, setting it to 2", ival));
mca_bcol_iboffload_component.exchange_tree_order = 2;
}
/* Pasha: Since we do not have max inline check like in openib,
I will put some dummy check here. All mlnx devices support at least 512b */
if (mca_bcol_iboffload_component.max_inline_data > 512) {
IBOFFLOAD_ERROR(("Warning the inline %d, is to big and unsupported",
mca_bcol_iboffload_component.max_inline_data));
rc = OMPI_ERROR;
goto close_device;
}

Просмотреть файл

@ -22,6 +22,8 @@
#include "ompi/mca/common/ofacm/base.h"
#include "ompi/communicator/communicator.h"
#include "opal/util/show_help.h"
/*
* Local flags
*/
@ -38,37 +40,41 @@ enum {
REGSTR_MAX = 0x88
};
mca_base_var_enum_value_t mtu_values[] = {
{IBV_MTU_256, "256B"},
{IBV_MTU_512, "512B"},
{IBV_MTU_1024, "1k"},
{IBV_MTU_4096, "4k"},
{0, NULL}
};
/*
* utility routine for string parameter registration
*/
static int reg_string(const char* param_name,
const char* deprecated_param_name,
const char* param_desc,
const char* default_value, char **out_value,
const char* default_value, char **storage,
int flags)
{
int index;
char *value;
index = mca_base_param_reg_string(&mca_bcol_iboffload_component.super.bcol_version,
param_name, param_desc, false, false,
default_value, &value);
*storage = default_value;
index = mca_base_component_var_register(&mca_bcol_iboffload_component.super.bcol_version,
param_name, param_desc, MCA_BASE_VAR_TYPE_STRING,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
if (NULL != deprecated_param_name) {
mca_base_param_reg_syn(index,
&mca_bcol_iboffload_component.super.bcol_version,
deprecated_param_name, true);
(void) mca_base_var_register_synonym(index, "ompi", "bcol", "iboffload", deprecated_param_name,
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
}
mca_base_param_lookup_string(index, &value);
if (0 != (flags & REGSTR_EMPTY_OK) && 0 == strlen(value)) {
if (0 != (flags & REGSTR_EMPTY_OK) && 0 == strlen(*storage)) {
opal_output(0, "Bad parameter value for parameter \"%s\"",
param_name);
return OMPI_ERR_BAD_PARAM;
}
*out_value = value;
return OMPI_SUCCESS;
}
@ -78,43 +84,111 @@ static int reg_string(const char* param_name,
static int reg_int(const char* param_name,
const char* deprecated_param_name,
const char* param_desc,
int default_value, int *out_value, int flags)
int default_value, int *storage, int flags)
{
int index, value;
int index;
index = mca_base_param_reg_int(&mca_bcol_iboffload_component.super.bcol_version,
param_name, param_desc, false, false,
default_value, NULL);
*storage = default_value;
index = mca_base_component_var_register(&mca_bcol_iboffload_component.super.bcol_version,
param_name, param_desc, MCA_BASE_VAR_TYPE_INT,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
if (NULL != deprecated_param_name) {
mca_base_param_reg_syn(index,
&mca_bcol_iboffload_component.super.bcol_version,
deprecated_param_name, true);
(void) mca_base_var_register_synonym(index, "ompi", "bcol", "iboffload", deprecated_param_name,
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
}
mca_base_param_lookup_int(index, &value);
if (0 != (flags & REGINT_NEG_ONE_OK) && -1 == value) {
*out_value = value;
if (0 != (flags & REGINT_NEG_ONE_OK) && -1 == *storage) {
return OMPI_SUCCESS;
}
if ((0 != (flags & REGINT_GE_ZERO) && value < 0) ||
(0 != (flags & REGINT_GE_ONE) && value < 1) ||
(0 != (flags & REGINT_NONZERO) && 0 == value)) {
if ((0 != (flags & REGINT_GE_ZERO) && *storage < 0) ||
(0 != (flags & REGINT_GE_ONE) && *storage < 1) ||
(0 != (flags & REGINT_NONZERO) && 0 == *storage)) {
opal_output(0, "Bad parameter value for parameter \"%s\"",
param_name);
return OMPI_ERR_BAD_PARAM;
}
*out_value = value;
return OMPI_SUCCESS;
}
int mca_bcol_iboffload_verify_params(void)
{
if (mca_bcol_iboffload_component.min_rnr_timer > 31) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_min_rnr_timer > 31",
"bcol_iboffload_ib_min_rnr_timer reset to 31");
mca_bcol_iboffload_component.min_rnr_timer = 31;
} else if (ival < 0){
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_min_rnr_timer < 0",
"bcol_iboffload_ib_min_rnr_timer reset to 0");
mca_bcol_iboffload_component.min_rnr_timer = 0;
}
if (mca_bcol_iboffload_component.timeout > 31) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_timeout > 31",
"bcol_iboffload_ib_timeout reset to 31");
mca_bcol_iboffload_component.timeout = 31;
} else if (ival < 0) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_timeout < 0",
"bcol_iboffload_ib_timeout reset to 0");
mca_bcol_iboffload_component.timeout = 0;
}
if (mca_bcol_iboffload_component.retry_count > 7) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_retry_count > 7",
"bcol_iboffload_ib_retry_count reset to 7");
mca_bcol_iboffload_component.retry_count = 7;
} else if (mca_bcol_iboffload_component.retry_count < 0) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_retry_count < 0",
"bcol_iboffload_ib_retry_count reset to 0");
mca_bcol_iboffload_component.retry_count = 0;
}
if (mca_bcol_iboffload_component.max_rdma_dst_ops > 7) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_rnr_retry > 7",
"bcol_iboffload_ib_rnr_retry reset to 7");
mca_bcol_iboffload_component.max_rdma_dst_ops = 7;
} else if (mca_bcol_iboffload_component.max_rdma_dst_ops < 0) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_rnr_retry < 0",
"bcol_iboffload_ib_rnr_retry reset to 0");
mca_bcol_iboffload_component.max_rdma_dst_ops = 0;
}
if (mca_bcol_iboffload_component.service_level > 15) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_service_level > 15",
"bcol_iboffload_ib_service_level reset to 15");
mca_bcol_iboffload_component.service_level = 15;
} else if (mca_bcol_iboffload_component.service_level < 0) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_service_level < 0",
"bcol_iboffload_ib_service_level reset to 0");
mca_bcol_iboffload_component.service_level = 0;
}
if(mca_bcol_iboffload_component.buffer_alignment <= 1 ||
(mca_bcol_iboffload_component.buffer_alignment & (mca_bcol_iboffload_component.buffer_alignment - 1))) {
opal_show_help("help-mpi-bcol-iboffload.txt", "wrong buffer alignment",
true, ival, ompi_process_info.nodename, 64);
mca_bcol_iboffload_component.buffer_alignment = 64;
}
return OMPI_SUCCESS;
}
int mca_bcol_iboffload_register_params(void)
{
char *msg, *pkey;
int ival, ret = OMPI_SUCCESS, tmp;
char *msg;
int ret = OMPI_SUCCESS, tmp;
#define CHECK(expr) do { \
tmp = (expr); \
@ -123,43 +197,38 @@ int mca_bcol_iboffload_register_params(void)
/* register openib component parameters */
CHECK(reg_int("k_nomial_radix", NULL,
"The radix of the K-nomial tree for scatther-gather type algorithms"
"(starts from 2)", 2, &ival, REGINT_GE_ONE));
mca_bcol_iboffload_component.k_nomial_radix= ival;
"The radix of the K-nomial tree for scatther-gather type algorithms"
"(starts from 2)", 2, &mca_bcol_iboffload_component.k_nomial_radix,
REGINT_GE_ONE));
CHECK(reg_int("priority", NULL,
"IB offload component priority"
"(from 0(low) to 90 (high))", 90, &ival, 0));
mca_bcol_iboffload_component.super.priority = ival;
"(from 0(low) to 90 (high))", 90,
&mca_bcol_iboffload_component.super.priority, 0));
CHECK(reg_int("verbose", NULL,
"Output some verbose IB offload BTL information "
"(0 = no output, nonzero = output)", 0, &ival, 0));
mca_bcol_iboffload_component.verbose = ival;
"(0 = no output, nonzero = output)", 0,
&mca_bcol_iboffload_component.verbose, 0));
CHECK(reg_int("warn_default_gid_prefix", NULL,
"Warn when there is more than one active ports and at least one of them connected to the network with only default GID prefix configured (0 = do not warn; any other value = warn)",
1, &ival, 0));
mca_bcol_iboffload_component.warn_default_gid_prefix = (0 != ival);
CHECK(reg_int("warn_nonexistent_if", NULL,
"Warn if non-existent devices and/or ports are specified in the bcol_iboffla_if_[in|ex]clude MCA parameters (0 = do not warn; any other value = warn)",
1, &ival, 0));
mca_bcol_iboffload_component.warn_nonexistent_if = (0 != ival);
CHECK(reg_bool("warn_default_gid_prefix", NULL,
"Warn when there is more than one active ports and at least one of them connected to the network with only default GID prefix configured (0 = do not warn; any other value = warn)",
true, &mca_bcol_iboffload_component.warn_default_gid_prefix, 0));
CHECK(reg_bool("warn_nonexistent_if", NULL,
"Warn if non-existent devices and/or ports are specified in the bcol_iboffla_if_[in|ex]clude MCA parameters (0 = do not warn; any other value = warn)",
true, &mca_bcol_iboffload_component.warn_nonexistent_if, 0));
CHECK(reg_int("max_pipeline_depth", NULL,
"The maximal number of fragments of the same collective request that can be transferred in parallel", 3, &ival, 0));
mca_bcol_iboffload_component.max_pipeline_depth = ival;
"The maximal number of fragments of the same collective request that can be transferred in parallel", 3,
&mca_bcol_iboffload_component.max_pipeline_depth, 0));
CHECK(reg_int("max_bcols", NULL,
"Maximum number of device ports to use (-1 = use all available, otherwise must be >= 1)",
-1, (int *)&mca_bcol_iboffload_component.max_bcols,
REGINT_NEG_ONE_OK | REGINT_GE_ONE));
CHECK(reg_int("max_mqe_tasks", NULL,
"Maximum number of MQEs for each iboffload module",
1024, (int *)&mca_bcol_iboffload_component.max_mqe_tasks, 0));
1024, &mca_bcol_iboffload_component.max_mqe_tasks, 0));
CHECK(reg_int("max_mq_size", NULL,
"Maximum size of each MQ for each iboffload module",
1024, (int *)&mca_bcol_iboffload_component.max_mq_size, 0));
1024, &mca_bcol_iboffload_component.max_mq_size, 0));
CHECK(reg_int("free_list_num", NULL,
"Intial size of free lists (must be >= 1)",
256, &mca_bcol_iboffload_component.free_list_num,
@ -181,24 +250,16 @@ int mca_bcol_iboffload_register_params(void)
"Size of the OpenFabrics completion "
"queue (will automatically be set to a minimum of "
"(2 * number_of_peers * bcol_iboffload_rd_num))",
1024, &ival, REGINT_GE_ONE));
mca_bcol_iboffload_component.cq_size = (uint32_t) ival;
1024, &mca_bcol_iboffload_component.cq_size, REGINT_GE_ONE));
CHECK(reg_int("exchange_tree_order", NULL,
"The order of the exchange tree. "
"Must be power of two.",
2, &ival, REGINT_GE_ONE));
if (0 == (ival & (ival - 1))) {
mca_bcol_iboffload_component.exchange_tree_order = ival;
} else {
IBOFFLOAD_ERROR(("Warning: ibcol_iboffload_exchange_tree_order is %d which is not a power of 2, setting it to 2", ival));
mca_bcol_iboffload_component.exchange_tree_order = 2;
}
2, &mca_bcol_iboffload_component.exchange_tree_order, REGINT_GE_ONE));
CHECK(reg_int("knomial_tree_order", NULL,
"The order of the knomial exchange tree. ",
3, &ival, REGINT_GE_ONE));
mca_bcol_iboffload_component.knomial_tree_order = ival;
3, &mca_bcol_iboffload_component.knomial_tree_order, REGINT_GE_ONE));
CHECK(reg_int("max_inline_data", "max_inline_data",
@ -207,16 +268,10 @@ int mca_bcol_iboffload_register_params(void)
"otherwise must be >= 0). "
"If not explicitly set, use max_inline_data from "
"the INI file containing device-specific parameters",
128, &ival, REGINT_NEG_ONE_OK | REGINT_GE_ZERO));
mca_bcol_iboffload_component.max_inline_data = (int32_t) ival;
/* Pasha: Since we do not have max inline check like in openib,
I will put some dummy check here. All mlnx devices support at least 512b */
if (mca_bcol_iboffload_component.max_inline_data > 512) {
IBOFFLOAD_ERROR(("Warning the inline %d, is to big and unsupported",
mca_bcol_iboffload_component.max_inline_data));
ret = OMPI_ERROR;
}
128, &mca_bcol_iboffload_component.max_inline_data,
REGINT_NEG_ONE_OK | REGINT_GE_ZERO));
#if 0
CHECK(reg_string("pkey", "ib_pkey_val",
"OpenFabrics partition key (pkey) value. "
"Unsigned integer decimal or hex values are allowed (e.g., \"3\" or \"0x3f\") and will be masked against the maximum allowable IB paritition key value (0x7fff)",
@ -226,6 +281,7 @@ int mca_bcol_iboffload_register_params(void)
ompi_btl_openib_ini_intify(pkey) & MCA_BTL_IB_PKEY_MASK;
free(pkey);
*/
#endif
CHECK(reg_string("receive_queues", NULL,
"Colon-delimited, comma delimited list of receive queues: P,4096,8,6,4:P,32768,8,6,4",
@ -248,209 +304,96 @@ int mca_bcol_iboffload_register_params(void)
/* Don't try to recover from this */
return OMPI_ERR_OUT_OF_RESOURCE;
}
CHECK(reg_int("mtu", "ib_mtu", msg, IBV_MTU_1024, &ival, 0));
CHECK(mca_base_var_enum_create("infiniband mtu", mtu_values, &new_enum));
mca_bcol_iboffload_component.mtu = IBV_MTU_1024;
tmp = mca_base_component_var_register(&mca_bcol_iboffload_component.super.bcol_version,
"mtu", MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_bcol_iboffload_component.mtu);
OBJ_RELEASE(new_enum);
free(msg);
if (ival < IBV_MTU_1024 || ival > IBV_MTU_4096) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "invalid value for bcol_iboffload_ib_mtu",
"bcol_iboffload_ib_mtu reset to 1024");
mca_bcol_iboffload_component.mtu = IBV_MTU_1024;
} else {
mca_bcol_iboffload_component.mtu = (uint32_t) ival;
}
if (0 > tmp) ret = tmp;
tmp = mca_base_var_register_synonym(tmp, "ompi", "bcol", "iboffload", "ib_mtu",
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
if (0 > tmp) ret = tmp;
CHECK(reg_int("ib_min_rnr_timer", NULL, "InfiniBand minimum "
"\"receiver not ready\" timer, in seconds "
"(must be >= 0 and <= 31)",
1 , &ival, 0));
1 , &mca_bcol_iboffload_component.min_rnr_timer, 0));
if (ival > 31) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_min_rnr_timer > 31",
"bcol_iboffload_ib_min_rnr_timer reset to 31");
ival = 31;
} else if (ival < 0){
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_min_rnr_timer < 0",
"bcol_iboffload_ib_min_rnr_timer reset to 0");
ival = 0;
}
mca_bcol_iboffload_component.min_rnr_timer = (uint32_t) ival;
CHECK(reg_int("ib_timeout", NULL, "InfiniBand transmit timeout, plugged into formula: 4.096 microseconds * (2^bcol_iboffload_ib_timeout)"
"(must be >= 0 and <= 31)",
20, &ival, 0));
if (ival > 31) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_timeout > 31",
"bcol_iboffload_ib_timeout reset to 31");
ival = 31;
} else if (ival < 0) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_timeout < 0",
"bcol_iboffload_ib_timeout reset to 0");
ival = 0;
}
mca_bcol_iboffload_component.timeout = (uint32_t) ival;
CHECK(reg_int("ib_timeout", NULL, "InfiniBand transmit timeout, plugged into formula: 4.096 microseconds * "
"(2^bcol_iboffload_ib_timeout) (must be >= 0 and <= 31)",
20, &mca_bcol_iboffload_component.timeout, 0));
CHECK(reg_int("ib_retry_count", NULL, "InfiniBand transmit retry count "
"(must be >= 0 and <= 7)",
7, &ival, 0));
if (ival > 7) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_retry_count > 7",
"bcol_iboffload_ib_retry_count reset to 7");
ival = 7;
} else if (ival < 0) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_retry_count < 0",
"bcol_iboffload_ib_retry_count reset to 0");
ival = 0;
}
mca_bcol_iboffload_component.retry_count = (uint32_t) ival;
7, &mca_bcol_iboffload_component.retry_count, 0));
CHECK(reg_int("ib_rnr_retry", NULL, "InfiniBand \"receiver not ready\" "
"retry count; applies *only* to SRQ/XRC queues. PP queues "
"use RNR retry values of 0 because Open MPI performs "
"software flow control to guarantee that RNRs never occur "
"(must be >= 0 and <= 7; 7 = \"infinite\")",
7, &ival, 0));
if (ival > 7) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_rnr_retry > 7",
"bcol_iboffload_ib_rnr_retry reset to 7");
ival = 7;
} else if (ival < 0) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_rnr_retry < 0",
"bcol_iboffload_ib_rnr_retry reset to 0");
ival = 0;
}
mca_bcol_iboffload_component.rnr_retry = (uint32_t) ival;
7, &mca_bcol_iboffload_component.rnr_retry, 0));
CHECK(reg_int("ib_max_rdma_dst_ops", NULL, "InfiniBand maximum pending RDMA "
"destination operations "
"(must be >= 0)",
4, &ival, REGINT_GE_ZERO));
mca_bcol_iboffload_component.max_rdma_dst_ops = (uint32_t) ival;
4, &mca_bcol_iboffload_component.max_rdma_dst_ops, REGINT_GE_ZERO));
CHECK(reg_int("ib_service_level", NULL, "InfiniBand service level "
"(must be >= 0 and <= 15)",
0, &ival, 0));
if (ival > 15) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_service_level > 15",
"bcol_iboffload_ib_service_level reset to 15");
ival = 15;
} else if (ival < 0) {
opal_show_help("help-mpi-bcol-iboffload.txt", "invalid mca param value",
true, "bcol_iboffload_ib_service_level < 0",
"bcol_iboffload_ib_service_level reset to 0");
ival = 0;
}
mca_bcol_iboffload_component.service_level = (uint32_t) ival;
CHECK(reg_int("btls_per_lid", NULL, "Number of BTLs to create for each "
"InfiniBand LID "
"(must be >= 1)",
1, &ival, REGINT_GE_ONE));
mca_bcol_iboffload_component.bcols_per_lid = (uint32_t) ival;
CHECK(reg_int("max_lmc", NULL, "Maximum number of LIDs to use for each device port "
"(must be >= 0, where 0 = use all available)",
0, &ival, REGINT_GE_ZERO));
mca_bcol_iboffload_component.max_lmc = (uint32_t) ival;
#if OPAL_HAVE_THREADS
CHECK(reg_int("use_async_event_thread", NULL,
"If nonzero, use the thread that will handle InfiniBand asyncihronous events ",
1, &ival, 0));
mca_bcol_iboffload_component.use_async_event_thread = (0 != ival);
#endif
0, &mca_bcol_iboffload_component.service_level, 0));
CHECK(reg_int("buffer_alignment", NULL,
"Prefered communication buffer alignment, in bytes "
"(must be > 0 and power of two)",
64, &ival, REGINT_GE_ZERO));
if(ival <= 1 || (ival & (ival - 1))) {
opal_show_help("help-mpi-bcol-iboffload.txt", "wrong buffer alignment",
true, ival, ompi_process_info.nodename, 64);
mca_bcol_iboffload_component.buffer_alignment = 64;
} else {
mca_bcol_iboffload_component.buffer_alignment = (uint32_t) ival;
}
CHECK(reg_int("last_calc_in_cpu", NULL,
"If set, the last ib offload calculation will "
"be done in the cpu (default: yes)",
1, &ival, REGINT_GE_ZERO));
mca_bcol_iboffload_component.last_calc_in_cpu = (0 != ival);
CHECK(reg_int("enable_rdma_calc", NULL,
"Enable RDMA Calc"
"(default: yes)",
1, &ival, REGINT_GE_ZERO));
mca_bcol_iboffload_component.enable_rdma_calc = (0 != ival);
64, &mca_bcol_iboffload_component.buffer_alignment, REGINT_GE_ZERO));
/* register parmeters controlling message fragementation */
CHECK(reg_int("min_frag_size", NULL,
"Minimum fragment size",
getpagesize(), &ival, REGINT_GE_ONE));
mca_bcol_iboffload_component.super.min_frag_size = ival;
"Minimum fragment size",
getpagesize(), &mca_bcol_iboffload_component.super.min_frag_size,
REGINT_GE_ONE));
CHECK(reg_int("max_frag_size", NULL,
"Maximum fragment size",
FRAG_SIZE_NO_LIMIT, &ival, REGINT_NONZERO));
mca_bcol_iboffload_component.super.max_frag_size = ival;
"Maximum fragment size",
FRAG_SIZE_NO_LIMIT, &mca_bcol_iboffload_component.super.max_frag_size,
REGINT_NONZERO));
CHECK(reg_int("can_use_user_buffers", NULL,
"User memory can be used by the collective algorithms",
1, &ival, REGINT_GE_ZERO));
mca_bcol_iboffload_component.super.can_use_user_buffers = ival;
CHECK(reg_int("use_pipeline", NULL,
"Pipeline the algorithm",
1, &ival, REGINT_GE_ZERO));
mca_bcol_iboffload_component.super.use_pipeline = ival;
CHECK(reg_bool("can_use_user_buffers", NULL,
"User memory can be used by the collective algorithms",
true, &mca_bcol_iboffload_component.super.can_use_user_buffers));
CHECK(reg_int("barrier_mode", NULL,
"Barrier mode: 0 - Recursive doubling; 1 - Recursive K-ing",
0, &ival, REGINT_GE_ZERO));
mca_bcol_iboffload_component.barrier_mode = ival;
0, &mca_bcol_iboffload_component.barrier_mode, REGINT_GE_ZERO));
CHECK(reg_int("max_progress_pull", NULL,
"Max number of progress pull checks",
8, &ival, REGINT_GE_ZERO));
mca_bcol_iboffload_component.max_progress_pull = ival;
8, &mca_bcol_iboffload_component.max_progress_pull, REGINT_GE_ZERO));
CHECK(reg_int("use_brucks_smsg_alltoall_rdma", NULL,
"Use brucks algorithm for smsg alltoall and RDMA semantics 1 = No Temp buffer recycling"
"1 = Alg with no Temp Buffer Recycling (faster), 2 = Alg with temp Buffer Recycling (slower)",
0, &ival, 0));
mca_bcol_iboffload_component.use_brucks_smsg_alltoall_rdma = ival;
0, &mca_bcol_iboffload_component.use_brucks_smsg_alltoall_rdma, 0));
CHECK(reg_int("use_brucks_smsg_alltoall_sr", NULL,
"Use brucks algorithm for smsg alltoall and Send/Recv semantics "
"1 = Alg with RTR (faster), 2 = Alg with RNR (slower)",
0, &ival, 0));
mca_bcol_iboffload_component.use_brucks_smsg_alltoall_sr = ival;
0, &mca_bcol_iboffload_component.use_brucks_smsg_alltoall_sr, 0));
CHECK(reg_int("alltoall_bruck_radix", NULL,
"Radix for Bruck algorithm for smsg alltoall",
3, &ival, 0));
mca_bcol_iboffload_component.k_alltoall_bruck_radix = ival;
3, &mca_bcol_iboffload_component.k_alltoall_bruck_radix, 0));
CHECK(reg_int("k_alltoall_bruck_radix", NULL,
"Temp Buffer alignment for Bruck algorithm for smsg alltoall",
64, &ival, 0));
mca_bcol_iboffload_component.tmp_buf_alignment = ival;
64, &mca_bcol_iboffload_component.tmp_buf_alignment, 0));
/*
CHECK(reg_string("if_include", NULL,
@ -464,6 +407,8 @@ int mca_bcol_iboffload_register_params(void)
0));
*/
CHECK(mca_bcol_iboffload_verify_params());
/* Register any MCA params for the connect pseudo-components */
if (OMPI_SUCCESS == ret) {
ret = ompi_common_ofacm_base_register(&mca_bcol_iboffload_component.super.bcol_version);

Просмотреть файл

@ -15,5 +15,6 @@
#include "ompi_config.h"
int mca_bcol_iboffload_register_params(void);
int mca_bcol_iboffload_verify_params(void);
#endif

Просмотреть файл

@ -768,7 +768,7 @@ static inline struct ibv_cq *ibv_create_cq_compat(struct ibv_context *context,
int mca_bcol_iboffload_adjust_cq(mca_bcol_iboffload_device_t *device,
struct ibv_cq **ib_cq)
{
uint32_t cq_size = mca_bcol_iboffload_component.cq_size;
uint32_t cq_size = (uint32_t) mca_bcol_iboffload_component.cq_size;
if (NULL == *ib_cq) {
*ib_cq = ibv_create_cq_compat(device->dev.ib_dev_context, cq_size,
@ -1070,8 +1070,8 @@ mca_bcol_iboffload_comm_query(mca_sbgp_base_module_t *sbgp, int *num_modules)
}
memset(&mqe_attr, 0, sizeof(mqe_attr));
mqe_attr.max_mqe_tasks = mca_bcol_iboffload_component.max_mqe_tasks;
mqe_attr.max_mq_size = mca_bcol_iboffload_component.max_mq_size;
mqe_attr.max_mqe_tasks = (uint32_t)mca_bcol_iboffload_component.max_mqe_tasks;
mqe_attr.max_mq_size = (uint32_t)mca_bcol_iboffload_component.max_mq_size;
mqe_attr.cq = iboffload_module->device->ib_mq_cq;
/* ALL MQs have the same configuration */

Просмотреть файл

@ -232,7 +232,7 @@ static void mca_bcol_iboffload_fillin_qp_attr(int qp_index,
Todo: copy max_inline_size() from ofacm to
common area.
*/
init_attr->cap.max_inline_data = cm->max_inline_data;
init_attr->cap.max_inline_data = (int32_t) cm->max_inline_data;
/* We allocate SG list for some algorithms (Bruck's alltoall) */
max_sge = ep->iboffload_module->group_size / 2 +
@ -248,8 +248,8 @@ static void mca_bcol_iboffload_fillin_qp_attr(int qp_index,
init_attr->cap.max_recv_sge = max_sge;
/* Vasily: the value will be changed later */
/* TODO Pasha: this is real crap */
init_attr->cap.max_recv_wr = cm->cq_size;
init_attr->cap.max_send_wr = cm->cq_size;
init_attr->cap.max_recv_wr = (uint32_t) cm->cq_size;
init_attr->cap.max_send_wr = (uint32_t) cm->cq_size;
/* Set attributes */
@ -257,13 +257,13 @@ static void mca_bcol_iboffload_fillin_qp_attr(int qp_index,
attr->port_num = ep->iboffload_module->port;
/* Vasily: the value will be changed later */
attr->path_mtu = cm->mtu;
attr->path_mtu = (uint32_t)cm->mtu;
attr->max_dest_rd_atomic = cm->max_rdma_dst_ops;
attr->min_rnr_timer = cm->min_rnr_timer;
attr->min_rnr_timer = (uint32_t)cm->min_rnr_timer;
attr->ah_attr.is_global = 0;
attr->ah_attr.sl = cm->service_level;
attr->ah_attr.sl = (uint32_t)cm->service_level;
/* Vasily: from struct mca_bcol_iboffload_port_t ????? */
/*
attr->ah_attr.src_path_bits = iboffload_module->src_path_bits;
@ -272,10 +272,10 @@ static void mca_bcol_iboffload_fillin_qp_attr(int qp_index,
/* JMS to be filled in later dynamically */
attr->ah_attr.static_rate = 0;
/* RTS params */
attr->timeout = cm->timeout;
attr->retry_cnt = cm->retry_count;
attr->rnr_retry = cm->rnr_retry;
attr->max_rd_atomic = cm->max_rdma_dst_ops;
attr->timeout = (uint32_t)cm->timeout;
attr->retry_cnt = (uint32_t)cm->retry_count;
attr->rnr_retry = (uint32_t)cm->rnr_retry;
attr->max_rd_atomic = (uint32_t)cm->max_rdma_dst_ops;
/* Init for local mca_bcol_iboffload_endpoint_qp_t qps structure
* that caches the qp information on endpoint */

Просмотреть файл

@ -41,27 +41,28 @@ enum {
static int reg_string(const char* param_name,
const char* deprecated_param_name,
const char* param_desc,
const char* default_value, char **out_value,
const char* default_value, char **storage,
int flags)
{
int index;
char *value;
index = mca_base_param_reg_string(&mca_bcol_ptpcoll_component.super.bcol_version,
param_name, param_desc, false, false,
default_value, &value);
if (NULL != deprecated_param_name) {
mca_base_param_reg_syn(index,
&mca_bcol_ptpcoll_component.super.bcol_version,
deprecated_param_name, true);
}
mca_base_param_lookup_string(index, &value);
if (0 != (flags & REGSTR_EMPTY_OK) && 0 == strlen(value)) {
*storage = default_value;
index = mca_base_component_var_register(&mca_bcol_ptpcoll_component.super.bcol_version,
param_name, param_desc, MCA_BASE_VAR_TYPE_STRING,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
if (NULL != deprecated_param_name) {
(void) mca_base_var_register_synonym(index, "ompi", "bcol", "ptpcoll",
deprecated_param_name,
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
}
if (0 != (flags & REGSTR_EMPTY_OK) && (NULL == *storage || 0 == strlen(*storage))) {
opal_output(0, "Bad parameter value for parameter \"%s\"",
param_name);
return OMPI_ERR_BAD_PARAM;
}
*out_value = value;
return OMPI_SUCCESS;
}
#endif
@ -72,37 +73,63 @@ static int reg_string(const char* param_name,
static int reg_int(const char* param_name,
const char* deprecated_param_name,
const char* param_desc,
int default_value, int *out_value, int flags)
int default_value, int *storage, int flags)
{
int index, value;
index = mca_base_param_reg_int(&mca_bcol_ptpcoll_component.super.bcol_version,
param_name, param_desc, false, false,
default_value, NULL);
if (NULL != deprecated_param_name) {
mca_base_param_reg_syn(index,
&mca_bcol_ptpcoll_component.super.bcol_version,
deprecated_param_name, true);
}
mca_base_param_lookup_int(index, &value);
int index;
if (0 != (flags & REGINT_NEG_ONE_OK) && -1 == value) {
*out_value = value;
*storage = default_value;
index = mca_base_component_var_register(&mca_bcol_ptpcoll_component.super.bcol_version,
param_name, param_desc, MCA_BASE_VAR_TYPE_INT,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
if (NULL != deprecated_param_name) {
(void) mca_base_var_register_synonym(index, "ompi", "bcol", "ptpcoll",
deprecated_param_name,
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
}
if (0 != (flags & REGINT_NEG_ONE_OK) && -1 == *storage) {
return OMPI_SUCCESS;
}
if ((0 != (flags & REGINT_GE_ZERO) && value < 0) ||
(0 != (flags & REGINT_GE_ONE) && value < 1) ||
(0 != (flags & REGINT_NONZERO) && 0 == value)) {
if ((0 != (flags & REGINT_GE_ZERO) && *storage < 0) ||
(0 != (flags & REGINT_GE_ONE) && *storage < 1) ||
(0 != (flags & REGINT_NONZERO) && 0 == *storage)) {
opal_output(0, "Bad parameter value for parameter \"%s\"",
param_name);
return OMPI_ERR_BAD_PARAM;
}
*out_value = value;
return OMPI_SUCCESS;
}
static int reg_bool(const char* param_name,
const char* deprecated_param_name,
const char* param_desc,
bool default_value, bool *storage)
{
int index;
*storage = default_value;
index = mca_base_component_var_register(&mca_bcol_ptpcoll_component.super.bcol_version,
param_name, param_desc, MCA_BASE_VAR_TYPE_BOOL,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
if (0 > index) {
return index;
}
if (NULL != deprecated_param_name) {
(void) mca_base_var_register_synonym(index, "ompi", "bcol", "ptpcoll",
deprecated_param_name,
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
}
return OMPI_SUCCESS;
}
int mca_bcol_ptpcoll_register_mca_params(void)
{
int ival, ret, tmp;
int ret, tmp;
mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
ret = OMPI_SUCCESS;
@ -113,74 +140,57 @@ int mca_bcol_ptpcoll_register_mca_params(void)
CHECK(reg_int("priority", NULL,
"PTPCOLL component priority"
"(from 0(low) to 90 (high))", 90, &ival, 0));
cm->super.priority = ival;
"(from 0(low) to 90 (high))", 90, &cm->super.priority, 0));
CHECK(reg_int("verbose", NULL,
"Output some verbose PTPCOLL information "
"(0 = no output, nonzero = output)", 0, &ival, REGINT_GE_ZERO));
cm->verbose = ival;
"(0 = no output, nonzero = output)", 0, &cm->verbose, REGINT_GE_ZERO));
CHECK(reg_int("k_nomial_radix", NULL,
"The radix of K-Nomial Tree "
"(starts from 2)", 2, &ival, REGINT_GE_ONE));
cm->k_nomial_radix = ival;
"(starts from 2)", 2, &cm->k_nomial_radix, REGINT_GE_ONE));
CHECK(reg_int("narray_radix", NULL,
"The radix of Narray Tree "
"(starts from 2)", 2, &ival, REGINT_GE_ONE));
cm->narray_radix = ival;
"(starts from 2)", 2, &cm->narray_radix, REGINT_GE_ONE));
CHECK(reg_int("narray_knomial_radix", NULL,
"The radix of Narray/Knomial Tree for scatther-gather type algorithms"
"(starts from 2)", 2, &ival, REGINT_GE_ONE));
cm->narray_knomial_radix = ival;
"(starts from 2)", 2, &cm->narray_knomial_radix, REGINT_GE_ONE));
CHECK(reg_int("num_to_probe", NULL,
"Number of probe operation in single source data check"
"(starts from 8)", 8, &ival, REGINT_GE_ONE));
cm->num_to_probe = ival;
"(starts from 8)", 8, &cm->num_to_probe, REGINT_GE_ONE));
CHECK(reg_int("bcast_small_msg_known_root_alg", NULL,
"Algoritm selection for bcast small messages known root"
"(1 - K-nomial, 2 - N-array)", 1, &ival, REGINT_GE_ZERO));
cm->bcast_small_messages_known_root_alg = ival;
"(1 - K-nomial, 2 - N-array)", 1, &cm->bcast_small_messages_known_root_alg,
REGINT_GE_ZERO));
CHECK(reg_int("bcast_large_msg_known_root_alg", NULL,
"Algoritm selection for bcast large messages known root"
"(1 - Binomial scatther-gather, 2 - N-array scather, K-nomial gather)",
1, &ival, REGINT_GE_ZERO));
cm->bcast_large_messages_known_root_alg = ival;
1, &cm->bcast_large_messages_known_root_alg, REGINT_GE_ZERO));
CHECK(reg_int("barrier_alg", NULL,
"Algoritm selection for Barrier"
"(1 - Recursive doubling, 2 - Recursive K-ing)",
1, &ival, REGINT_GE_ZERO));
cm->barrier_alg = ival;
1, &cm->barrier_alg, REGINT_GE_ZERO));
/* register parmeters controlling message fragementation */
CHECK(reg_int("min_frag_size", NULL,
"Minimum fragment size",
getpagesize(), &ival, REGINT_GE_ONE));
cm->super.min_frag_size=ival;
getpagesize(), &cm->super.min_frag_size, REGINT_GE_ONE));
CHECK(reg_int("max_frag_size", NULL,
"Maximum fragment size",
FRAG_SIZE_NO_LIMIT, &ival, REGINT_NONZERO));
cm->super.max_frag_size=ival;
FRAG_SIZE_NO_LIMIT, &cm->super.max_frag_size, REGINT_NONZERO));
CHECK(reg_int("can_use_user_buffers", NULL,
CHECK(reg_bool("can_use_user_buffers", NULL,
"User memory can be used by the collective algorithms",
1, &ival, REGINT_GE_ZERO));
cm->super.can_use_user_buffers=ival;
CHECK(reg_int("use_pipeline", NULL,
"Pipeline the algorithm",
1, &ival, REGINT_GE_ZERO));
cm->super.use_pipeline=ival;
1, &cm->super.can_use_user_buffers));
CHECK(reg_int("use_brucks_smsg_alltoall_rdma", NULL,
"Use brucks algorithm for smsg alltoall and RDMA semantics 1 = No Temp buffer recycling"
"1 = Alg with no Temp Buffer Recycling (faster), 2 = Alg with temp Buffer Recycling (slower)",
0, &ival, 0));
cm->use_brucks_smsg_alltoall_rdma = ival;
0, &cm->use_brucks_smsg_alltoall_rdma, 0));
return ret;
}

Просмотреть файл

@ -69,9 +69,9 @@ int mca_bml_base_btl_array_reserve(mca_bml_base_btl_array_t* array, size_t size)
#if OPAL_ENABLE_DEBUG_RELIABILITY
extern double mca_bml_base_error_rate_floor;
extern double mca_bml_base_error_rate_ceiling;
extern int mca_bml_base_error_count;
extern int mca_bml_base_error_rate_floor;
extern int mca_bml_base_error_rate_ceiling;
extern int mca_bml_base_error_count;
struct mca_bml_base_context_t {
size_t index;
@ -102,9 +102,9 @@ int mca_bml_base_send( mca_bml_base_btl_t* bml_btl,
{
des->des_context = (void*)bml_btl;
if(mca_bml_base_error_count <= 0 && mca_bml_base_error_rate_ceiling > 0) {
mca_bml_base_error_count = (int) ((mca_bml_base_error_rate_ceiling * rand())/(RAND_MAX+1.0));
if(mca_bml_base_error_count < mca_bml_base_error_rate_floor) {
mca_bml_base_error_count = mca_bml_base_error_rate_floor;
mca_bml_base_error_count = (int) (((double) mca_bml_base_error_rate_ceiling * rand())/(RAND_MAX+1.0));
if(mca_bml_base_error_count < (double) mca_bml_base_error_rate_floor) {
mca_bml_base_error_count = (double) mca_bml_base_error_rate_floor;
}
if(mca_bml_base_error_count % 2) {
/* local completion - network "drops" packet */

Просмотреть файл

@ -32,11 +32,51 @@ int mca_bml_base_already_opened = 0;
opal_list_t mca_bml_base_components_available = {{0}};
#if OPAL_ENABLE_DEBUG_RELIABILITY
double mca_bml_base_error_rate_floor;
double mca_bml_base_error_rate_ceiling;
int mca_bml_base_error_count;
int mca_bml_base_error_rate_floor;
int mca_bml_base_error_rate_ceiling;
int mca_bml_base_error_count;
static bool mca_bml_base_srand;
#endif
int mca_bml_base_register(int flags)
{
#if OPAL_ENABLE_DEBUG_RELIABILITY
do {
int var_id;
mca_bml_base_error_rate_floor = 0;
var_id = mca_base_var_register("ompi", "bml", "base", "error_rate_floor", NULL,
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_bml_base_error_rate_floor);
(void) mca_base_var_register_synonym(var_id, "ompi", "bml", NULL, "error_rate_floor",
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
mca_bml_base_error_rate_ceiling = 0;
var_id = mca_base_var_register("ompi", "bml", "base", "error_rate_ceiling", NULL,
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_bml_base_error_rate_ceiling);
(void) mca_base_var_register_synonym(var_id, "ompi", "bml", NULL, "error_rate_ceiling",
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
mca_bml_base_srand = true;
var_id = mca_base_var_register("ompi", "bml", "base", "srand", NULL,
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_bml_base_srand);
(void) mca_base_var_register_synonym(var_id, "ompi", "bml", NULL, "srand",
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
} while (0);
#endif
return OMPI_SUCCESS;
}
int mca_bml_base_open(void)
{
/* See if we've already been here */
@ -44,6 +84,8 @@ int mca_bml_base_open(void)
return OMPI_SUCCESS;
}
(void) mca_bml_base_register(0);
if(OMPI_SUCCESS !=
mca_base_components_open("bml", 0, mca_bml_base_static_components,
&mca_bml_base_components_available,
@ -52,34 +94,20 @@ int mca_bml_base_open(void)
}
#if OPAL_ENABLE_DEBUG_RELIABILITY
do {
int param, value;
/* seed random number generator */
if(mca_bml_base_srand) {
struct timeval tv;
gettimeofday(&tv, NULL);
srand(getpid() * tv.tv_usec);
}
(void) mca_base_param_reg_int_name ("bml", "error_rate_floor", NULL, false, false,
0, &value);
mca_bml_base_error_rate_floor = (double) value;
(void) mca_base_param_reg_int_name ("bml", "error_rate_ceiling", NULL, false, false,
0, &value);
mca_bml_base_error_rate_ceiling = (double) value;
(void) mca_base_param_reg_int_name ("bml", "srand", NULL, false, false,
1, &value);
/* seed random number generator */
if(value) {
struct timeval tv;
gettimeofday(&tv, NULL);
srand(getpid() * tv.tv_usec);
}
/* initialize count */
if(mca_bml_base_error_rate_ceiling > 0
&& mca_bml_base_error_rate_floor <= mca_bml_base_error_rate_ceiling) {
mca_bml_base_error_count = (int) ((mca_bml_base_error_rate_ceiling * rand())/(RAND_MAX+1.0));
}
} while (0);
/* initialize count */
if(mca_bml_base_error_rate_ceiling > 0
&& mca_bml_base_error_rate_floor <= mca_bml_base_error_rate_ceiling) {
mca_bml_base_error_count = (int) (((double) mca_bml_base_error_rate_ceiling * rand())/(RAND_MAX+1.0));
}
#endif
return mca_btl_base_open();
}

Просмотреть файл

@ -22,9 +22,9 @@
#include "mpi.h"
#include "ompi/mca/bml/bml.h"
#include "ompi/mca/btl/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "bml_r2.h"
static int mca_bml_r2_component_register(void);
mca_bml_base_component_2_0_0_t mca_bml_r2_component = {
@ -39,7 +39,9 @@ mca_bml_base_component_2_0_0_t mca_bml_r2_component = {
OMPI_MINOR_VERSION, /* MCA component minor version */
OMPI_RELEASE_VERSION, /* MCA component release version */
mca_bml_r2_component_open, /* component open */
mca_bml_r2_component_close /* component close */
mca_bml_r2_component_close, /* component close */
NULL,
mca_bml_r2_component_register
},
{
/* The component is checkpoint ready */
@ -47,22 +49,23 @@ mca_bml_base_component_2_0_0_t mca_bml_r2_component = {
},
mca_bml_r2_component_init
};
static int mca_bml_r2_component_register(void)
{
mca_bml_r2.show_unreach_errors = true;
(void) mca_base_component_var_register(&mca_bml_r2_component.bml_version,
"show_unreach_errors",
"Show error message when procs are unreachable",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0,0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_bml_r2.show_unreach_errors);
return OMPI_SUCCESS;
}
int mca_bml_r2_component_open(void)
{
int tmp;
mca_base_param_reg_int(&mca_bml_r2_component.bml_version,
"show_unreach_errors",
"Show error message when procs are unreachable",
false,
false,
1,
&tmp);
mca_bml_r2.show_unreach_errors = OPAL_INT_TO_BOOL(tmp);
return OMPI_SUCCESS;
}

Просмотреть файл

@ -47,7 +47,7 @@ int mca_bml_r2_ft_event(int state)
int ret, p;
int loc_state;
int param_type = -1;
char *param_list = NULL;
const char **btl_list;
ompi_rte_collective_t coll;
if(OPAL_CRS_CHECKPOINT == state) {
@ -247,28 +247,22 @@ int mca_bml_r2_ft_event(int state)
* Re-open the BTL framework to get the full list of components.
* - but first clear the MCA value that was there
*/
param_type = mca_base_param_find("btl", NULL, NULL);
mca_base_param_lookup_string(param_type, &param_list);
param_type = mca_base_var_find("ompi", "btl", NULL, NULL);
btl_list = NULL;
mca_base_var_get_value(param_type, &btl_list, NULL, NULL);
opal_output_verbose(11, ompi_cr_output,
"Restart (Previous BTL MCA): <%s>\n", param_list);
if( NULL != param_list ) {
free(param_list);
param_list = NULL;
}
/* Deregister the old value, and refresh the file cache to grab any updates */
mca_base_param_deregister(param_type);
mca_base_param_recache_files(false);
"Restart (Previous BTL MCA): <%s>\n", btl_list ? btl_list[0] : "");
if( OMPI_SUCCESS != (ret = mca_btl_base_open()) ) {
opal_output(0, "bml:r2: ft_event(Restart): Failed to open BTL framework\n");
return ret;
}
param_type = mca_base_param_find("btl", NULL, NULL);
mca_base_param_lookup_string(param_type, &param_list);
/* The reregistered paramter is guaranteed to have the same index */
btl_list = NULL;
mca_base_var_get_value(param_type, &btl_list, NULL, NULL);
opal_output_verbose(11, ompi_cr_output,
"Restart (New BTL MCA): <%s>\n", param_list);
"Restart (New BTL MCA): <%s>\n", btl_list ? btl_list[0] : "");
if( NULL != param_list ) {
free(param_list);
param_list = NULL;

Просмотреть файл

@ -63,6 +63,7 @@ OMPI_DECLSPEC void mca_btl_base_dump(
int verbose);
OMPI_DECLSPEC int mca_btl_base_param_register(mca_base_component_t *version,
mca_btl_base_module_t *module);
OMPI_DECLSPEC int mca_btl_base_param_verify(mca_btl_base_module_t *module);
/*
* Globals

Просмотреть файл

@ -24,7 +24,6 @@
#include <stdio.h>
#include "opal/mca/base/mca_base_param.h"
#include "opal/util/output.h"
@ -35,18 +34,17 @@
int mca_btl_base_param_register(mca_base_component_t *version,
mca_btl_base_module_t *module)
{
int value, err = 0;
char *msg;
#define REG_INT(N, H, D, L, T) \
mca_base_param_reg_int(version, N, H, false, false, D, &value); \
if(value < (L)) \
err = -1; \
else \
D = (T)value;
/* If this is ever triggered change the uint32_ts in mca_btl_base_module_t to unsigned ints */
assert(sizeof(unsigned int) == sizeof(uint32_t));
REG_INT("exclusivity", "BTL exclusivity (must be >= 0)",
module->btl_exclusivity, 0, uint32_t);
(void) mca_base_component_var_register(version, "exclusivity",
"BTL exclusivity (must be >= 0)",
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&module->btl_exclusivity);
asprintf(&msg, "BTL bit flags (general flags: SEND=%d, PUT=%d, GET=%d, SEND_INPLACE=%d, RDMA_MATCHED=%d, HETEROGENEOUS_RDMA=%d; flags only used by the \"dr\" PML (ignored by others): ACK=%d, CHECKSUM=%d, RDMA_COMPLETION=%d; flags only used by the \"bfo\" PML (ignored by others): FAILOVER_SUPPORT=%d)",
MCA_BTL_FLAGS_SEND,
@ -59,60 +57,76 @@ int mca_btl_base_param_register(mca_base_component_t *version,
MCA_BTL_FLAGS_NEED_CSUM,
MCA_BTL_FLAGS_RDMA_COMPLETION,
MCA_BTL_FLAGS_FAILOVER_SUPPORT);
REG_INT("flags", msg,
module->btl_flags,
0, uint32_t);
(void) mca_base_component_var_register(version, "flags", msg,
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&module->btl_flags);
free(msg);
REG_INT("rndv_eager_limit", "Size (in bytes, including header) of \"phase 1\" fragment sent for all large messages (must be >= 0 and <= eager_limit)",
module->btl_rndv_eager_limit, 0, size_t);
(void) mca_base_component_var_register(version, "rndv_eager_limit", "Size (in bytes, including header) of \"phase 1\" fragment sent for all large messages (must be >= 0 and <= eager_limit)",
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&module->btl_rndv_eager_limit);
REG_INT("eager_limit", "Maximum size (in bytes, including header) of \"short\" messages (must be >= 1).",
module->btl_eager_limit, 1, size_t);
(void) mca_base_component_var_register(version, "eager_limit", "Maximum size (in bytes, including header) of \"short\" messages (must be >= 1).",
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&module->btl_eager_limit);
REG_INT("max_send_size", "Maximum size (in bytes) of a single \"phase 2\" fragment of a long message when using the pipeline protocol (must be >= 1)",
module->btl_max_send_size, 1, size_t);
(void) mca_base_component_var_register(version, "max_send_size", "Maximum size (in bytes) of a single \"phase 2\" fragment of a long message when using the pipeline protocol (must be >= 1)",
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&module->btl_max_send_size);
if(module->btl_flags & MCA_BTL_FLAGS_PUT) {
/* Obsolete synonym for rdma_pipeline_send_length -- no help
message needed because it's a "hidden" parameter. */
mca_base_param_reg_int(version, "min_rdma_size", "", true, false,
0, &value);
if(value != 0) {
opal_output(0, "min_rdma_size BTL parameter is deprecated. Please "
"use the rdma_pipeline_send_length BTL parameter instead\n");
module->btl_rdma_pipeline_send_length = (size_t)value;
}
if (NULL != module->btl_put) {
(void) mca_base_component_var_register(version, "rdma_pipeline_send_length", "Length of the \"phase 2\" portion of a large message (in bytes) when using the pipeline protocol. This part of the message will be split into fragments of size max_send_size and sent using send/receive semantics (must be >= 0; only relevant when the PUT flag is set)",
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&module->btl_rdma_pipeline_send_length);
REG_INT("rdma_pipeline_send_length", "Length of the \"phase 2\" portion of a large message (in bytes) when using the pipeline protocol. This part of the message will be split into fragments of size max_send_size and sent using send/receive semantics (must be >= 0; only relevant when the PUT flag is set)",
module->btl_rdma_pipeline_send_length, 0, size_t);
(void) mca_base_component_var_register(version, "rdma_pipeline_frag_size", "Maximum size (in bytes) of a single \"phase 3\" fragment from a long message when using the pipeline protocol. These fragments will be sent using RDMA semantics (must be >= 1; only relevant when the PUT flag is set)",
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&module->btl_rdma_pipeline_frag_size);
/* Obsolete synonym for rdma_pipeline_frag_size -- no help
message needed because it's a "hidden" parameter. */
mca_base_param_reg_int(version, "max_rdma_size", "", true, false,
0, &value);
if (0 != value) {
opal_output(0, "The max_rdma_size BTL parameter is deprecated. Please use the rdma_pipeline_frag_size BTL parameter instead");
module->btl_rdma_pipeline_frag_size = (size_t)value;
}
(void) mca_base_component_var_register(version, "min_rdma_pipeline_size", "Messages smaller than this size (in bytes) will not use the RDMA pipeline protocol. Instead, they will be split into fragments of max_send_size and sent using send/receive semantics (must be >=0, and is automatically adjusted up to at least (eager_limit+btl_rdma_pipeline_send_length); only relevant when the PUT flag is set)",
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&module->btl_min_rdma_pipeline_size);
REG_INT("rdma_pipeline_frag_size", "Maximum size (in bytes) of a single \"phase 3\" fragment from a long message when using the pipeline protocol. These fragments will be sent using RDMA semantics (must be >= 1; only relevant when the PUT flag is set)",
module->btl_rdma_pipeline_frag_size, 1, size_t);
REG_INT("min_rdma_pipeline_size", "Messages smaller than this size (in bytes) will not use the RDMA pipeline protocol. Instead, they will be split into fragments of max_send_size and sent using send/receive semantics (must be >=0, and is automatically adjusted up to at least (eager_limit+btl_rdma_pipeline_send_length); only relevant when the PUT flag is set)",
module->btl_min_rdma_pipeline_size, 0, size_t);
if (module->btl_min_rdma_pipeline_size <
(module->btl_eager_limit + module->btl_rdma_pipeline_send_length)) {
module->btl_min_rdma_pipeline_size =
module->btl_eager_limit + module->btl_rdma_pipeline_send_length;
}
(void) mca_base_component_var_register(version, "bandwidth", "Approximate maximum bandwidth of interconnect (0 = auto-detect value at run-time [not supported in all BTL modules], >= 1 = bandwidth in Mbps)",
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&module->btl_latency);
}
REG_INT("bandwidth", "Approximate maximum bandwidth of interconnect"
"(0 = auto-detect value at run-time [not supported in all BTL modules], >= 1 = bandwidth in Mbps)", module->btl_bandwidth, 0, uint32_t);
REG_INT("latency", "Approximate latency of interconnect (must be >= 0)",
module->btl_latency, 0, uint32_t);
return err;
return mca_btl_base_param_verify(module);
}
/* Verify btl parameters make sense */
int mca_btl_base_param_verify(mca_btl_base_module_t *module)
{
if (module->btl_min_rdma_pipeline_size <
(module->btl_eager_limit + module->btl_rdma_pipeline_send_length)) {
module->btl_min_rdma_pipeline_size =
module->btl_eager_limit + module->btl_rdma_pipeline_send_length;
}
if (NULL == module->btl_put) {
module->btl_flags &= ~MCA_BTL_FLAGS_PUT;
}
if (NULL == module->btl_get) {
module->btl_flags &= ~MCA_BTL_FLAGS_GET;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -27,7 +27,6 @@
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/base.h"
@ -80,6 +79,48 @@ opal_list_t mca_btl_base_modules_initialized;
int mca_btl_base_already_opened = 0;
bool mca_btl_base_thread_multiple_override = false;
static int mca_btl_base_register(int flags)
{
mca_btl_base_verbose = 0;
(void) mca_base_var_register("ompi", "btl", "base", "verbose",
"Verbosity level of the BTL framework",
MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_base_verbose);
/* Override the per-BTL "don't run if THREAD_MULTIPLE selected"
embargo? */
mca_btl_base_thread_multiple_override = false;
(void) mca_base_var_register("ompi", "btl", "base", "thread_multiple_override",
"Enable BTLs that are not normally enabled when MPI_THREAD_MULTIPLE is enabled (THIS IS FOR DEVELOPERS ONLY! SHOULD NOT BE USED BY END USERS!)",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0,
MCA_BASE_VAR_FLAG_INTERNAL,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_base_thread_multiple_override);
(void) mca_base_var_register("ompi", "btl", "base", "include", NULL,
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_base_include);
(void) mca_base_var_register("ompi", "btl", "base", "exclude", NULL,
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_base_exclude);
(void) mca_base_var_register("ompi", "btl", "base", "warn_component_unused",
"This parameter is used to turn on warning messages when certain NICs are not used",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_base_warn_component_unused);
return OMPI_SUCCESS;
}
/**
* Function for finding and opening either all MCA components, or the one
* that was specifically requested via a MCA parameter.
@ -89,33 +130,19 @@ int mca_btl_base_open(void)
int i;
if( ++mca_btl_base_already_opened > 1 ) return OMPI_SUCCESS;
/* Verbose output */
mca_base_param_reg_int_name("btl",
"base_verbose",
"Verbosity level of the BTL framework",
false, false,
0,
&mca_btl_base_verbose);
(void) mca_btl_base_register(0);
/* Verbose output */
mca_btl_base_output = opal_output_open(NULL);
opal_output_set_verbosity(mca_btl_base_output, mca_btl_base_verbose);
/* Override the per-BTL "don't run if THREAD_MULTIPLE selected"
embargo? */
mca_base_param_reg_int_name("btl",
"base_thread_multiple_override",
"Enable BTLs that are not normally enabled when MPI_THREAD_MULTIPLE is enabled (THIS IS FOR DEVELOPERS ONLY! SHOULD NOT BE USED BY END USERS!)",
true, false,
0, &i);
mca_btl_base_thread_multiple_override = OPAL_INT_TO_BOOL(i);
/* Open up all available components */
/* Open up all available components */
if (OMPI_SUCCESS !=
mca_base_components_open("btl", mca_btl_base_output, mca_btl_base_static_components,
&mca_btl_base_components_opened, true)) {
return OMPI_ERROR;
}
if (OMPI_SUCCESS !=
mca_base_components_open("btl", mca_btl_base_output, mca_btl_base_static_components,
&mca_btl_base_components_opened, true)) {
return OMPI_ERROR;
}
/* Initialize the list so that in mca_btl_base_close(), we can
iterate over it (even if it's empty, as in the case of
@ -123,15 +150,6 @@ int mca_btl_base_open(void)
OBJ_CONSTRUCT(&mca_btl_base_modules_initialized, opal_list_t);
/* register parameters */
(void) mca_base_param_reg_string_name ("btl", "base_include", NULL, false, false, NULL,
&mca_btl_base_include);
(void) mca_base_param_reg_string_name ("btl", "base_exclude", NULL, false, false, NULL,
&mca_btl_base_exclude);
(void) mca_base_param_reg_int_name ("btl", "base_warn_component_unused",
"This parameter is used to turn on warning messages when certain NICs are not used",
false, false, 1, &mca_btl_base_warn_component_unused);
/* All done */
return OMPI_SUCCESS;
}

Просмотреть файл

@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights
* Copyright (c) 2006-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 NVIDIA Corporation. All rights reserved.
@ -31,8 +31,8 @@
* During library initialization, all available BTL components are
* loaded and opened via their mca_base_open_component_fn_t
* function. The BTL open function should register any mca parameters
* used to tune/adjust the behaviour of the BTL (mca_base_param_reg_int(),
* mca_base_param_reg_string()). Note that the open function may fail
* used to tune/adjust the behaviour of the BTL (mca_base_var_register()
* mca_base_component_var_register()). Note that the open function may fail
* if the resources (e.g. shared libraries, etc) required by the network
* transport are not available.
*

Просмотреть файл

@ -25,7 +25,6 @@
#include "opal/util/opal_environ.h"
#include "ompi/constants.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/runtime/ompi_module_exchange.h"
#include "ompi/mca/btl/base/btl_base_error.h"
#include "ompi/mca/common/mx/common_mx.h"
@ -72,30 +71,8 @@ mca_btl_mx_component_t mca_btl_mx_component = {
}
};
static int mca_btl_mx_component_register(void)
static int mca_btl_mx_component_verify(void)
{
mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_mx_component, "max_btls",
"Maximum number of accepted Myrinet cards",
false, false, 8, &mca_btl_mx_component.mx_max_btls );
mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_mx_component, "timeout",
"Timeout for connections",
false, false, MX_INFINITE, &mca_btl_mx_component.mx_timeout );
mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_mx_component, "retries",
"Number of retries for each new connection before considering the peer as unreacheable",
false, false, 20, &mca_btl_mx_component.mx_connection_retries );
mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_mx_component, "filter",
"Unique ID for the application (used to connect to the peers)",
false, false, 0xdeadbeef, &mca_btl_mx_component.mx_filter );
mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_mx_component, "self",
"Enable the MX support for self communications",
false, false, 0, &mca_btl_mx_component.mx_support_self );
mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_mx_component, "shared_mem",
"Enable the MX support for shared memory",
false, false, 0, &mca_btl_mx_component.mx_support_sharedmem );
mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_mx_component, "bonding",
"Integrate MX library bonding. Less than 0 is system default, everything else will set the MX_BONDING to the value.",
false, false, 1, &mca_btl_mx_component.mx_bonding );
if( 0 >= mca_btl_mx_component.mx_bonding ) {
char* value = getenv("MX_BONDING");
if( NULL == value ) {
@ -110,34 +87,117 @@ static int mca_btl_mx_component_register(void)
snprintf( value, 8, "%d\n", mca_btl_mx_component.mx_bonding );
opal_setenv( "MX_BONDING", value, true, &environ );
}
#ifdef HAVE_MX_REGISTER_UNEXP_HANDLER
mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_mx_component, "register_unexp",
"Enable the MX support for the unexpected request handler (Open MPI matching)",
false, false, 0, &mca_btl_mx_component.mx_use_unexpected );
#endif /* HAVE_MX_REGISTER_UNEXP_HANDLER */
mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_mx_component, "free_list_num",
"Number of allocated default request",
false, false, 8, &mca_btl_mx_component.mx_free_list_num );
mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_mx_component, "free_list_inc",
"Number of request we allocate each time we miss some",
false, false, 32, &mca_btl_mx_component.mx_free_list_inc );
mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_mx_component, "free_list_max",
"Maximum number of request this device is allowed to allocate",
false, false, 1024, &mca_btl_mx_component.mx_free_list_max );
mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_mx_component, "max_posted_recv",
"Number of received posted in advance. Increasing this number for"
" communication bound application can lead to visible improvement"
" in performances",
false, false, 16, &mca_btl_mx_component.mx_max_posted_recv );
return mca_btl_base_param_verify(&mca_btl_mx_module.super);
}
static int mca_btl_mx_component_register(void)
{
mca_btl_mx_component.mx_max_btls = 8;
(void) mca_base_component_var_register(&mca_btl_mx_component.btl_version, "max_btls",
"Maximum number of accepted Myrinet cards",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_mx_component.mx_max_btls);
mca_btl_mx_component.mx_timeout = MX_INFINITE;
(void) mca_base_component_var_register(&mca_btl_mx_component.btl_version, "timeout",
"Timeout for connections",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_mx_component.mx_timeout);
mca_btl_mx_component.mx_connection_retries = 20;
(void) mca_base_component_var_register(&mca_btl_mx_component.btl_version, "retries",
"Number of retries for each new connection before considering the peer as unreacheable",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_mx_component.mx_connection_retries);
mca_btl_mx_component.mx_filter = 0xdeadbeef;
(void) mca_base_component_var_register(&mca_btl_mx_component.btl_version, "filter",
"Unique ID for the application (used to connect to the peers)",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_mx_component.mx_filter);
mca_btl_mx_component.mx_support_self = 0;
(void) mca_base_component_var_register(&mca_btl_mx_component.btl_version, "self",
"Enable the MX support for self communications",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_mx_component.mx_support_self);
mca_btl_mx_component.mx_support_sharedmem = 0;
(void) mca_base_component_var_register(&mca_btl_mx_component.btl_version, "shared_mem",
"Enable the MX support for shared memory",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_mx_component.mx_support_sharedmem);
mca_btl_mx_component.mx_bonding = 1;
(void) mca_base_component_var_register(&mca_btl_mx_component.btl_version, "bonding",
"Integrate MX library bonding. Less than 0 is system default, everything else will set the MX_BONDING to the value.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_mx_component.mx_bonding);
#ifdef HAVE_MX_REGISTER_UNEXP_HANDLER
mca_btl_mx_component.mx_use_unexpected = 0;
(void) mca_base_component_var_register(&mca_btl_mx_component.btl_version, "register_unexp",
"Enable the MX support for the unexpected request handler (Open MPI matching)",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_mx_component.mx_use_unexpected);
#endif /* HAVE_MX_REGISTER_UNEXP_HANDLER */
mca_btl_mx_component.mx_free_list_num = 8;
(void) mca_base_component_var_register(&mca_btl_mx_component.btl_version, "free_list_num",
"Number of allocated default request",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_mx_component.mx_free_list_num);
mca_btl_mx_component.mx_free_list_inc = 32;
(void) mca_base_component_var_register(&mca_btl_mx_component.btl_version, "free_list_inc",
"Number of request we allocate each time we miss some",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_mx_component.mx_free_list_inc);
mca_btl_mx_component.mx_free_list_max = 1024;
(void) mca_base_component_var_register(&mca_btl_mx_component.btl_version, "free_list_max",
"Maximum number of request this device is allowed to allocate",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_mx_component.mx_free_list_max);
mca_btl_mx_component.mx_max_posted_recv = 16;
(void) mca_base_component_var_register(&mca_btl_mx_component.btl_version, "max_posted_recv",
"Number of received posted in advance. Increasing this number for communication "
"bound application can lead to visible improvement in performances",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_mx_component.mx_max_posted_recv);
#if MX_HAVE_MAPPER_STATE
mca_base_param_reg_string( (mca_base_component_t*)&mca_btl_mx_component, "if_include",
"Myrinet card to use (last 6 digits from the mapper MAC)",
false, false, NULL, &mca_btl_mx_component.mx_if_include );
mca_base_param_reg_string( (mca_base_component_t*)&mca_btl_mx_component, "if_exclude",
"Myrinet card to avoid (last 6 digits from the mapper MAC)",
false, false, NULL, &mca_btl_mx_component.mx_if_exclude );
mca_btl_mx_component.mx_if_include = NULL;
(void) mca_base_component_var_register(&mca_btl_mx_component.btl_version, "if_include",
"Myrinet card to use (last 6 digits from the mapper MAC)",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_mx_component.mx_if_include);
mca_btl_mx_component.mx_if_exclude = NULL;
(void) mca_base_component_var_register(&mca_btl_mx_component.btl_version, "if_exclude",
"Myrinet card to avoid (last 6 digits from the mapper MAC)",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_mx_component.mx_if_exclude);
#endif /* MX_HAVE_MAPPER_STATE */
mca_btl_mx_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_DEFAULT;
@ -159,10 +219,11 @@ static int mca_btl_mx_component_register(void)
mca_btl_mx_module.super.btl_seg_size = sizeof (mca_btl_mx_segment_t);
mca_btl_mx_module.super.btl_bandwidth = 2000;
mca_btl_mx_module.super.btl_latency = 5;
mca_btl_base_param_register(&mca_btl_mx_component.super.btl_version,
&mca_btl_mx_module.super);
return OMPI_SUCCESS;
return mca_btl_mx_component_verify();
}
@ -173,11 +234,18 @@ static int mca_btl_mx_component_register(void)
static int mca_btl_mx_component_open(void)
{
int ret;
/* initialize state */
mca_btl_mx_component.mx_num_btls = 0;
mca_btl_mx_component.mx_btls = NULL;
mca_btl_mx_component.mx_use_unexpected = 0;
ret = mca_btl_mx_component_verify();
if (OMPI_SUCCESS != ret) {
return ret;
}
/* initialize objects */
OBJ_CONSTRUCT(&mca_btl_mx_component.mx_procs, opal_list_t);
if( 0 == mca_btl_mx_component.mx_support_sharedmem )

Просмотреть файл

@ -206,41 +206,44 @@ struct mca_btl_openib_component_t {
uint32_t ib_cq_size[2]; /**< Max outstanding CQE on the CQ */
int32_t ib_max_inline_data; /**< Max size of inline data */
uint32_t ib_pkey_val;
uint32_t ib_psn;
uint32_t ib_qp_ous_rd_atom;
int ib_max_inline_data; /**< Max size of inline data */
unsigned int ib_pkey_val;
unsigned int ib_psn;
unsigned int ib_qp_ous_rd_atom;
uint32_t ib_mtu;
uint32_t ib_min_rnr_timer;
uint32_t ib_timeout;
uint32_t ib_retry_count;
uint32_t ib_rnr_retry;
uint32_t ib_max_rdma_dst_ops;
uint32_t ib_service_level;
unsigned int ib_min_rnr_timer;
unsigned int ib_timeout;
unsigned int ib_retry_count;
unsigned int ib_rnr_retry;
unsigned int ib_max_rdma_dst_ops;
unsigned int ib_service_level;
#if (ENABLE_DYNAMIC_SL)
uint32_t ib_path_record_service_level;
unsigned int ib_path_record_service_level;
#endif
int32_t use_eager_rdma;
int32_t eager_rdma_threshold; /**< After this number of msg, use RDMA for short messages, always */
int32_t eager_rdma_num;
int use_eager_rdma;
int eager_rdma_threshold; /**< After this number of msg, use RDMA for short messages, always */
int eager_rdma_num;
int32_t max_eager_rdma;
uint32_t btls_per_lid;
uint32_t max_lmc;
int32_t apm_lmc;
int32_t apm_ports;
uint32_t buffer_alignment; /**< Preferred communication buffer alignment in Bytes (must be power of two) */
unsigned int btls_per_lid;
unsigned int max_lmc;
int apm_lmc;
int apm_ports;
unsigned int buffer_alignment; /**< Preferred communication buffer alignment in Bytes (must be power of two) */
#if OPAL_HAVE_THREADS
int32_t error_counter; /**< Counts number on error events that we got on all devices */
int async_pipe[2]; /**< Pipe for comunication with async event thread */
int async_comp_pipe[2]; /**< Pipe for async thread comunication with main thread */
pthread_t async_thread; /**< Async thread that will handle fatal errors */
uint32_t use_async_event_thread; /**< Use the async event handler */
bool use_async_event_thread; /**< Use the async event handler */
mca_btl_openib_srq_manager_t srq_manager; /**< Hash table for all BTL SRQs */
#if BTL_OPENIB_FAILOVER_ENABLED
uint32_t port_error_failover; /**< Report port errors to speed up failover */
bool port_error_failover; /**< Report port errors to speed up failover */
#endif
#endif
btl_openib_device_type_t device_type;
/* declare as an int instead of btl_openib_device_type_t since there is no
guarantee about the size of an enum. this value will be registered as an
integer with the MCA variable system */
int device_type;
char *if_include;
char **if_include_list;
char *if_exclude;
@ -277,10 +280,10 @@ struct mca_btl_openib_component_t {
that they all exist) */
char **if_list;
bool use_message_coalescing;
uint32_t cq_poll_ratio;
uint32_t cq_poll_progress;
uint32_t cq_poll_batch;
uint32_t eager_rdma_poll_ratio;
unsigned int cq_poll_ratio;
unsigned int cq_poll_progress;
unsigned int cq_poll_batch;
unsigned int eager_rdma_poll_ratio;
#ifdef HAVE_IBV_FORK_INIT
/** Whether we want fork support or not */
int want_fork_support;
@ -309,7 +312,7 @@ struct mca_btl_openib_component_t {
void* (*previous_malloc_hook)(size_t __size, const void*);
#endif
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_SEND */
int cuda_async_send;
bool cuda_async_send;
int cuda_async_recv;
#endif /* OMPI_CUDA_SUPPORT */
}; typedef struct mca_btl_openib_component_t mca_btl_openib_component_t;

Просмотреть файл

@ -49,7 +49,6 @@
#include "opal/sys/atomic.h"
#include "opal/util/argv.h"
#include "opal/memoryhooks/memory.h"
#include "opal/mca/base/mca_base_param.h"
/* Define this before including hwloc.h so that we also get the hwloc
verbs helper header file, too. We have to do this level of
indirection because the hwloc subsystem is a component -- we don't
@ -173,7 +172,7 @@ static int btl_openib_component_register(void)
/* if_include and if_exclude need to be mutually exclusive */
if (OPAL_SUCCESS !=
mca_base_param_check_exclusive_string(
mca_base_var_check_exclusive("ompi",
mca_btl_openib_component.super.btl_version.mca_type_name,
mca_btl_openib_component.super.btl_version.mca_component_name,
"if_include",
@ -620,15 +619,15 @@ static int openib_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg)
openib_reg->mr = NULL;
return OMPI_SUCCESS;
}
static inline int param_register_int(const char* param_name, int default_value)
static inline int param_register_uint(const char* param_name, unsigned int default_value, unsigned int *storage)
{
int param_value = default_value;
(void) mca_base_param_reg_int (&mca_btl_openib_component.super.btl_version,
param_name, NULL, false, false, default_value,
&param_value);
return param_value;
*storage = default_value;
(void) mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
param_name, NULL, MCA_BASE_VAR_TYPE_UNSIGNED_INT,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
return *storage;
}
#if OPAL_HAVE_THREADS
@ -809,37 +808,31 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
/* Check bandwidth configured for this device */
sprintf(param, "bandwidth_%s", ibv_get_device_name(device->ib_dev));
openib_btl->super.btl_bandwidth =
param_register_int(param, openib_btl->super.btl_bandwidth);
param_register_uint(param, openib_btl->super.btl_bandwidth, &openib_btl->super.btl_bandwidth);
/* Check bandwidth configured for this device/port */
sprintf(param, "bandwidth_%s:%d", ibv_get_device_name(device->ib_dev),
port_num);
openib_btl->super.btl_bandwidth =
param_register_int(param, openib_btl->super.btl_bandwidth);
param_register_uint(param, openib_btl->super.btl_bandwidth, &openib_btl->super.btl_bandwidth);
/* Check bandwidth configured for this device/port/LID */
sprintf(param, "bandwidth_%s:%d:%d",
ibv_get_device_name(device->ib_dev), port_num, lid);
openib_btl->super.btl_bandwidth =
param_register_int(param, openib_btl->super.btl_bandwidth);
param_register_uint(param, openib_btl->super.btl_bandwidth, &openib_btl->super.btl_bandwidth);
/* Check latency configured for this device */
sprintf(param, "latency_%s", ibv_get_device_name(device->ib_dev));
openib_btl->super.btl_latency =
param_register_int(param, openib_btl->super.btl_latency);
param_register_uint(param, openib_btl->super.btl_latency, &openib_btl->super.btl_latency);
/* Check latency configured for this device/port */
sprintf(param, "latency_%s:%d", ibv_get_device_name(device->ib_dev),
port_num);
openib_btl->super.btl_latency =
param_register_int(param, openib_btl->super.btl_latency);
param_register_uint(param, openib_btl->super.btl_latency, &openib_btl->super.btl_latency);
/* Check latency configured for this device/port/LID */
sprintf(param, "latency_%s:%d:%d", ibv_get_device_name(device->ib_dev),
port_num, lid);
openib_btl->super.btl_latency =
param_register_int(param, openib_btl->super.btl_latency);
param_register_uint(param, openib_btl->super.btl_latency, &openib_btl->super.btl_latency);
/* Auto-detect the port bandwidth */
if (0 == openib_btl->super.btl_bandwidth) {
@ -2443,7 +2436,7 @@ btl_openib_component_init(int *num_btl_modules,
int distance;
int index, value;
bool found;
mca_base_param_source_t source;
mca_base_var_source_t source;
int list_count = 0;
/* initialization */
@ -2520,36 +2513,20 @@ btl_openib_component_init(int *num_btl_modules,
support */
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & value)) {
ret = 0;
index = mca_base_param_find("mpi", NULL, "leave_pinned");
if (index >= 0) {
if (OPAL_SUCCESS == mca_base_param_lookup_int(index, &value) &&
-1 == value) {
++ret;
}
}
index = mca_base_param_find("mpi", NULL, "leave_pinned_pipeline");
if (index >= 0) {
if (OPAL_SUCCESS == mca_base_param_lookup_int(index, &value) &&
OPAL_SUCCESS == mca_base_param_lookup_source(index, &source,
NULL)) {
if (0 == value && MCA_BASE_PARAM_SOURCE_DEFAULT == source) {
++ret;
}
}
}
/* If we were good on both parameters, then set leave_pinned=1 */
if (2 == ret) {
if (0 == ompi_mpi_leave_pinned_pipeline &&
-1 == ompi_mpi_leave_pinned) {
ompi_mpi_leave_pinned = 1;
ompi_mpi_leave_pinned_pipeline = 0;
}
} else {
ompi_mpi_leave_pinned = 0;
ompi_mpi_leave_pinned_pipeline = 0;
}
index = mca_base_param_find("btl", "openib", "max_inline_data");
index = mca_base_var_find("ompi", "btl", "openib", "max_inline_data");
if (index >= 0) {
if (OPAL_SUCCESS == mca_base_param_lookup_source(index, &source,
NULL)) {
if (OPAL_SUCCESS == mca_base_var_get_value(index, NULL, &source, NULL)) {
if (-1 == mca_btl_openib_component.ib_max_inline_data &&
MCA_BASE_PARAM_SOURCE_DEFAULT == source) {
MCA_BASE_VAR_SOURCE_DEFAULT == source) {
/* If the user has not explicitly set this MCA parameter
use max_inline_data value specified in the
device-specific parameters INI file */

Просмотреть файл

@ -30,7 +30,6 @@
#include <unistd.h>
#endif
#include "opal/mca/base/mca_base_param.h"
#include "opal/util/show_help.h"
#include "btl_openib.h"

Просмотреть файл

@ -29,7 +29,6 @@
#include "opal/util/bit_ops.h"
#include "opal/mca/installdirs/installdirs.h"
#include "opal/util/output.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/util/show_help.h"
#include "btl_openib.h"
#include "btl_openib_mca.h"
@ -61,6 +60,31 @@ enum {
REGSTR_MAX = 0x88
};
static mca_base_var_enum_value_t ib_mtu_values[] = {
{IBV_MTU_256, "256B"},
{IBV_MTU_512, "512B"},
{IBV_MTU_1024, "1k"},
{IBV_MTU_2048, "2k"},
{IBV_MTU_4096, "4k"},
{0, NULL}
};
static mca_base_var_enum_value_t device_type_values[] = {
{BTL_OPENIB_DT_IB, "infiniband"},
{BTL_OPENIB_DT_IB, "ib"},
{BTL_OPENIB_DT_IWARP, "iwarp"},
{BTL_OPENIB_DT_IWARP, "iw"},
{BTL_OPENIB_DT_ALL, "all"},
{0, NULL}
};
static int btl_openib_cq_size;
static bool btl_openib_have_fork_support = OMPI_HAVE_IBV_FORK_INIT;
#if BTL_OPENIB_FAILOVER_ENABLED
static int btl_openib_verbose_failover;
static bool btl_openib_failover_enabled = true;
#endif
/*
* utility routine for string parameter registration
@ -68,26 +92,29 @@ enum {
static int reg_string(const char* param_name,
const char* deprecated_param_name,
const char* param_desc,
const char* default_value, char **out_value,
const char* default_value, char **storage,
int flags)
{
int index;
char *value;
index = mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
param_name, param_desc, false, false,
default_value, &value);
/* The MCA variable system will not change this pointer */
*storage = (char *) default_value;
index = mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
param_name, param_desc, MCA_BASE_VAR_TYPE_STRING,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
if (NULL != deprecated_param_name) {
mca_base_param_reg_syn(index,
&mca_btl_openib_component.super.btl_version,
deprecated_param_name, true);
(void) mca_base_var_register_synonym(index, "ompi", "btl", "openib",
deprecated_param_name,
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
}
if (0 != (flags & REGSTR_EMPTY_OK) && 0 == strlen(value)) {
if (0 != (flags & REGSTR_EMPTY_OK) && (NULL == storage || 0 == strlen(*storage))) {
opal_output(0, "Bad parameter value for parameter \"%s\"",
param_name);
return OMPI_ERR_BAD_PARAM;
}
*out_value = value;
return OMPI_SUCCESS;
}
@ -98,31 +125,89 @@ static int reg_string(const char* param_name,
static int reg_int(const char* param_name,
const char* deprecated_param_name,
const char* param_desc,
int default_value, int *out_value, int flags)
int default_value, int *storage, int flags)
{
int index, value;
index = mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
param_name, param_desc, false, false,
default_value, NULL);
if (NULL != deprecated_param_name) {
mca_base_param_reg_syn(index,
&mca_btl_openib_component.super.btl_version,
deprecated_param_name, true);
}
mca_base_param_lookup_int(index, &value);
int index;
if (0 != (flags & REGINT_NEG_ONE_OK) && -1 == value) {
*out_value = value;
*storage = default_value;
index = mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
param_name, param_desc, MCA_BASE_VAR_TYPE_INT,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
if (NULL != deprecated_param_name) {
(void) mca_base_var_register_synonym(index, "ompi", "btl", "openib",
deprecated_param_name,
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
}
if (0 != (flags & REGINT_NEG_ONE_OK) && -1 == *storage) {
return OMPI_SUCCESS;
}
if ((0 != (flags & REGINT_GE_ZERO) && value < 0) ||
(0 != (flags & REGINT_GE_ONE) && value < 1) ||
(0 != (flags & REGINT_NONZERO) && 0 == value)) {
if ((0 != (flags & REGINT_GE_ZERO) && *storage < 0) ||
(0 != (flags & REGINT_GE_ONE) && *storage < 1) ||
(0 != (flags & REGINT_NONZERO) && 0 == *storage)) {
opal_output(0, "Bad parameter value for parameter \"%s\"",
param_name);
return OMPI_ERR_BAD_PARAM;
}
*out_value = value;
return OMPI_SUCCESS;
}
/*
* utility routine for integer parameter registration
*/
static int reg_uint(const char* param_name,
const char* deprecated_param_name,
const char* param_desc,
unsigned int default_value, unsigned int *storage,
int flags)
{
int index;
*storage = default_value;
index = mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
param_name, param_desc, MCA_BASE_VAR_TYPE_UNSIGNED_INT,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
if (NULL != deprecated_param_name) {
(void) mca_base_var_register_synonym(index, "ompi", "btl", "openib",
deprecated_param_name,
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
}
if ((0 != (flags & REGINT_GE_ONE) && *storage < 1) ||
(0 != (flags & REGINT_NONZERO) && 0 == *storage)) {
opal_output(0, "Bad parameter value for parameter \"%s\"",
param_name);
return OMPI_ERR_BAD_PARAM;
}
return OMPI_SUCCESS;
}
/*
* utility routine for integer parameter registration
*/
static int reg_bool(const char* param_name,
const char* deprecated_param_name,
const char* param_desc,
bool default_value, bool *storage)
{
int index;
*storage = default_value;
index = mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
param_name, param_desc, MCA_BASE_VAR_TYPE_BOOL,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
if (NULL != deprecated_param_name) {
(void) mca_base_var_register_synonym(index, "ompi", "btl", "openib",
deprecated_param_name,
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
}
return OMPI_SUCCESS;
}
@ -131,10 +216,11 @@ static int reg_int(const char* param_name,
*/
int btl_openib_register_mca_params(void)
{
mca_base_var_enum_t *new_enum;
char default_qps[100];
uint32_t mid_qp_size;
char *msg, *str, *pkey;
int ival, ival2, ret, tmp;
char *msg, *str;
int ret, tmp;
ret = OMPI_SUCCESS;
#define CHECK(expr) do {\
@ -143,61 +229,43 @@ int btl_openib_register_mca_params(void)
} while (0)
/* register openib component parameters */
CHECK(reg_int("verbose", NULL,
CHECK(reg_bool("verbose", NULL,
"Output some verbose OpenIB BTL information "
"(0 = no output, nonzero = output)", 0, &ival, 0));
mca_btl_openib_component.verbose = (0 != ival);
"(0 = no output, nonzero = output)", false,
&mca_btl_openib_component.verbose));
CHECK(reg_int("warn_no_device_params_found",
CHECK(reg_bool("warn_no_device_params_found",
"warn_no_hca_params_found",
"Warn when no device-specific parameters are found in the INI file specified by the btl_openib_device_param_files MCA parameter "
"(0 = do not warn; any other value = warn)",
1, &ival, 0));
mca_btl_openib_component.warn_no_device_params_found = (0 != ival);
CHECK(reg_int("warn_default_gid_prefix", NULL,
true, &mca_btl_openib_component.warn_no_device_params_found));
CHECK(reg_bool("warn_default_gid_prefix", NULL,
"Warn when there is more than one active ports and at least one of them connected to the network with only default GID prefix configured "
"(0 = do not warn; any other value = warn)",
1, &ival, 0));
mca_btl_openib_component.warn_default_gid_prefix = (0 != ival);
CHECK(reg_int("warn_nonexistent_if", NULL,
true, &mca_btl_openib_component.warn_default_gid_prefix));
CHECK(reg_bool("warn_nonexistent_if", NULL,
"Warn if non-existent devices and/or ports are specified in the btl_openib_if_[in|ex]clude MCA parameters "
"(0 = do not warn; any other value = warn)",
1, &ival, 0));
mca_btl_openib_component.warn_nonexistent_if = (0 != ival);
true, &mca_btl_openib_component.warn_nonexistent_if));
/* If we print a warning about not having enough registered memory
available, do we want to abort? */
CHECK(reg_int("abort_not_enough_reg_mem", NULL,
CHECK(reg_bool("abort_not_enough_reg_mem", NULL,
"If there is not enough registered memory available on the system for Open MPI to function properly, Open MPI will issue a warning. If this MCA parameter is set to true, then Open MPI will also abort all MPI jobs "
"(0 = warn, but do not abort; any other value = warn and abort)",
0, &ival, 0));
mca_btl_openib_component.abort_not_enough_reg_mem = (0 != ival);
false, &mca_btl_openib_component.abort_not_enough_reg_mem));
CHECK(reg_int("poll_cq_batch", NULL,
"Retrieve up to poll_cq_batch completions from CQ",
MCA_BTL_OPENIB_CQ_POLL_BATCH_DEFAULT, &ival, REGINT_GE_ONE));
CHECK(reg_uint("poll_cq_batch", NULL,
"Retrieve up to poll_cq_batch completions from CQ",
MCA_BTL_OPENIB_CQ_POLL_BATCH_DEFAULT, &mca_btl_openib_component.cq_poll_batch,
REGINT_GE_ONE));
mca_btl_openib_component.cq_poll_batch = (ival > MCA_BTL_OPENIB_CQ_POLL_BATCH_DEFAULT)? MCA_BTL_OPENIB_CQ_POLL_BATCH_DEFAULT : ival;
if (OMPI_HAVE_IBV_FORK_INIT) {
ival2 = -1;
} else {
ival2 = 0;
}
CHECK(reg_int("want_fork_support", NULL,
"Whether fork support is desired or not "
"(negative = try to enable fork support, but continue even if it is not available, 0 = do not enable fork support, positive = try to enable fork support and fail if it is not available)",
ival2, &ival, 0));
#ifdef HAVE_IBV_FORK_INIT
mca_btl_openib_component.want_fork_support = ival;
#else
if (0 != ival) {
opal_show_help("help-mpi-btl-openib.txt",
"ibv_fork requested but not supported", true,
ompi_process_info.nodename);
return OMPI_ERR_BAD_PARAM;
}
#endif
OMPI_HAVE_IBV_FORK_INIT ? -1 : 0, &mca_btl_openib_component.want_fork_support, 0));
asprintf(&str, "%s/mca-btl-openib-device-params.ini",
opal_install_dirs.pkgdatadir);
@ -210,25 +278,18 @@ int btl_openib_register_mca_params(void)
0));
free(str);
CHECK(reg_string("device_type", NULL,
"Specify to only use IB or iWARP network adapters "
"(infiniband = only use InfiniBand HCAs; iwarp = only use iWARP NICs; all = use any available adapters)",
"all", &str, 0));
if (0 == strcasecmp(str, "ib") ||
0 == strcasecmp(str, "infiniband")) {
mca_btl_openib_component.device_type = BTL_OPENIB_DT_IB;
} else if (0 == strcasecmp(str, "iw") ||
0 == strcasecmp(str, "iwarp")) {
mca_btl_openib_component.device_type = BTL_OPENIB_DT_IWARP;
} else if (0 == strcasecmp(str, "all")) {
mca_btl_openib_component.device_type = BTL_OPENIB_DT_ALL;
} else {
opal_show_help("help-mpi-btl-openib.txt",
"ibv_fork requested but not supported", true,
ompi_process_info.nodename);
return OMPI_ERR_BAD_PARAM;
}
free(str);
(void)mca_base_var_enum_create("btl_openib_device_types", device_type_values, &new_enum);
mca_btl_openib_component.device_type = BTL_OPENIB_DT_ALL;
tmp = mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
"device_type", "Specify to only use IB or iWARP "
"network adapters (infiniband = only use InfiniBand "
"HCAs; iwarp = only use iWARP NICs; all = use any "
"available adapters)", MCA_BASE_VAR_TYPE_INT, new_enum,
0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_openib_component.device_type);
if (0 > tmp) ret = tmp;
OBJ_RELEASE(new_enum);
CHECK(reg_int("max_btls", NULL,
"Maximum number of device ports to use "
@ -265,38 +326,33 @@ int btl_openib_register_mca_params(void)
"(CQs are automatically sized based on the number "
"of peer MPI processes; this value determines the "
"*minimum* size of all CQs)",
8192, &ival, REGINT_GE_ONE));
8192, &btl_openib_cq_size, REGINT_GE_ONE));
mca_btl_openib_component.ib_cq_size[BTL_OPENIB_LP_CQ] =
mca_btl_openib_component.ib_cq_size[BTL_OPENIB_HP_CQ] = (uint32_t) ival;
mca_btl_openib_component.ib_cq_size[BTL_OPENIB_HP_CQ] = (uint32_t) btl_openib_cq_size;
CHECK(reg_int("max_inline_data", "ib_max_inline_data",
"Maximum size of inline data segment "
"(-1 = run-time probe to discover max value, otherwise must be >= 0). "
"If not explicitly set, use max_inline_data from "
"the INI file containing device-specific parameters",
-1, &ival, REGINT_NEG_ONE_OK | REGINT_GE_ZERO));
mca_btl_openib_component.ib_max_inline_data = (int32_t) ival;
-1, &mca_btl_openib_component.ib_max_inline_data,
REGINT_NEG_ONE_OK | REGINT_GE_ZERO));
CHECK(reg_string("pkey", "ib_pkey_val",
"OpenFabrics partition key (pkey) value. "
"Unsigned integer decimal or hex values are allowed (e.g., \"3\" or \"0x3f\") and will be masked against the maximum allowable IB partition key value (0x7fff)",
"0", &pkey, 0));
mca_btl_openib_component.ib_pkey_val =
ompi_btl_openib_ini_intify(pkey) & MCA_BTL_IB_PKEY_MASK;
free(pkey);
CHECK(reg_uint("pkey", "ib_pkey_val",
"OpenFabrics partition key (pkey) value. "
"Unsigned integer decimal or hex values are allowed (e.g., \"3\" or \"0x3f\") and will be masked against the maximum allowable IB partition key value (0x7fff)",
0, &mca_btl_openib_component.ib_pkey_val, 0));
CHECK(reg_int("psn", "ib_psn",
CHECK(reg_uint("psn", "ib_psn",
"OpenFabrics packet sequence starting number "
"(must be >= 0)",
0, &ival, REGINT_GE_ZERO));
mca_btl_openib_component.ib_psn = (uint32_t) ival;
CHECK(reg_int("ib_qp_ous_rd_atom", NULL,
"InfiniBand outstanding atomic reads "
"(must be >= 0)",
4, &ival, REGINT_GE_ZERO));
mca_btl_openib_component.ib_qp_ous_rd_atom = (uint32_t) ival;
0, &mca_btl_openib_component.ib_psn, 0));
CHECK(reg_uint("ib_qp_ous_rd_atom", NULL,
"InfiniBand outstanding atomic reads "
"(must be >= 0)",
4, &mca_btl_openib_component.ib_qp_ous_rd_atom, 0));
asprintf(&msg, "OpenFabrics MTU, in bytes (if not specified in INI files). Valid values are: %d=256 bytes, %d=512 bytes, %d=1024 bytes, %d=2048 bytes, %d=4096 bytes",
IBV_MTU_256,
IBV_MTU_512,
@ -307,244 +363,175 @@ int btl_openib_register_mca_params(void)
/* Don't try to recover from this */
return OMPI_ERR_OUT_OF_RESOURCE;
}
CHECK(reg_int("mtu", "ib_mtu", msg, IBV_MTU_1024, &ival, 0));
free(msg);
if (ival < IBV_MTU_1024 || ival > IBV_MTU_4096) {
opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
true, "invalid value for btl_openib_ib_mtu",
"btl_openib_ib_mtu reset to 1024");
mca_btl_openib_component.ib_mtu = IBV_MTU_1024;
mca_btl_openib_component.ib_mtu = IBV_MTU_1024;
(void) mca_base_var_enum_create("btl_openib_mtus", ib_mtu_values, &new_enum);
tmp = mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
"mtu", msg, MCA_BASE_VAR_TYPE_INT, new_enum,
0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_openib_component.ib_mtu);
if (0 <= tmp) {
(void) mca_base_var_register_synonym(tmp, "ompi", "btl", "openib", "ib_mtu",
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
} else {
mca_btl_openib_component.ib_mtu = (uint32_t) ival;
ret = tmp;
}
CHECK(reg_int("ib_min_rnr_timer", NULL, "InfiniBand minimum "
"\"receiver not ready\" timer, in seconds "
"(must be >= 0 and <= 31)",
25, &ival, 0));
if (ival > 31) {
opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
true, "btl_openib_ib_min_rnr_timer > 31",
"btl_openib_ib_min_rnr_timer reset to 31");
ival = 31;
} else if (ival < 0){
opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
true, "btl_openib_ib_min_rnr_timer < 0",
"btl_openib_ib_min_rnr_timer reset to 0");
ival = 0;
}
mca_btl_openib_component.ib_min_rnr_timer = (uint32_t) ival;
OBJ_RELEASE(new_enum);
free(msg);
CHECK(reg_int("ib_timeout", NULL,
CHECK(reg_uint("ib_min_rnr_timer", NULL, "InfiniBand minimum "
"\"receiver not ready\" timer, in seconds "
"(must be >= 0 and <= 31)",
25, &mca_btl_openib_component.ib_min_rnr_timer, 0));
CHECK(reg_uint("ib_timeout", NULL,
"InfiniBand transmit timeout, plugged into formula: 4.096 microseconds * (2^btl_openib_ib_timeout) "
"(must be >= 0 and <= 31)",
20, &ival, 0));
if (ival > 31) {
opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
true, "btl_openib_ib_timeout > 31",
"btl_openib_ib_timeout reset to 31");
ival = 31;
} else if (ival < 0) {
opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
true, "btl_openib_ib_timeout < 0",
"btl_openib_ib_timeout reset to 0");
ival = 0;
}
mca_btl_openib_component.ib_timeout = (uint32_t) ival;
CHECK(reg_int("ib_retry_count", NULL,
20, &mca_btl_openib_component.ib_timeout, 0));
CHECK(reg_uint("ib_retry_count", NULL,
"InfiniBand transmit retry count "
"(must be >= 0 and <= 7)",
7, &ival, 0));
if (ival > 7) {
opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
true, "btl_openib_ib_retry_count > 7",
"btl_openib_ib_retry_count reset to 7");
ival = 7;
} else if (ival < 0) {
opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
true, "btl_openib_ib_retry_count < 0",
"btl_openib_ib_retry_count reset to 0");
ival = 0;
}
mca_btl_openib_component.ib_retry_count = (uint32_t) ival;
7, &mca_btl_openib_component.ib_retry_count, 0));
CHECK(reg_int("ib_rnr_retry", NULL,
"InfiniBand \"receiver not ready\" "
"retry count; applies *only* to SRQ/XRC queues. PP queues "
"use RNR retry values of 0 because Open MPI performs "
"software flow control to guarantee that RNRs never occur "
"(must be >= 0 and <= 7; 7 = \"infinite\")",
7, &ival, 0));
if (ival > 7) {
opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
true, "btl_openib_ib_rnr_retry > 7",
"btl_openib_ib_rnr_retry reset to 7");
ival = 7;
} else if (ival < 0) {
opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
true, "btl_openib_ib_rnr_retry < 0",
"btl_openib_ib_rnr_retry reset to 0");
ival = 0;
}
mca_btl_openib_component.ib_rnr_retry = (uint32_t) ival;
CHECK(reg_uint("ib_rnr_retry", NULL,
"InfiniBand \"receiver not ready\" "
"retry count; applies *only* to SRQ/XRC queues. PP queues "
"use RNR retry values of 0 because Open MPI performs "
"software flow control to guarantee that RNRs never occur "
"(must be >= 0 and <= 7; 7 = \"infinite\")",
7, &mca_btl_openib_component.ib_rnr_retry, 0));
CHECK(reg_int("ib_max_rdma_dst_ops", NULL, "InfiniBand maximum pending RDMA "
CHECK(reg_uint("ib_max_rdma_dst_ops", NULL, "InfiniBand maximum pending RDMA "
"destination operations "
"(must be >= 0)",
4, &ival, REGINT_GE_ZERO));
mca_btl_openib_component.ib_max_rdma_dst_ops = (uint32_t) ival;
4, &mca_btl_openib_component.ib_max_rdma_dst_ops, 0));
CHECK(reg_int("ib_service_level", NULL, "InfiniBand service level "
"(must be >= 0 and <= 15)",
0, &ival, 0));
if (ival > 15) {
opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
true, "btl_openib_ib_service_level > 15",
"btl_openib_ib_service_level reset to 15");
ival = 15;
} else if (ival < 0) {
opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
true, "btl_openib_ib_service_level < 0",
"btl_openib_ib_service_level reset to 0");
ival = 0;
}
mca_btl_openib_component.ib_service_level = (uint32_t) ival;
CHECK(reg_uint("ib_service_level", NULL, "InfiniBand service level "
"(must be >= 0 and <= 15)",
0, &mca_btl_openib_component.ib_service_level, 0));
#if (ENABLE_DYNAMIC_SL)
CHECK(reg_int("ib_path_record_service_level", NULL,
"Enable getting InfiniBand service level from PathRecord "
"(must be >= 0, 0 = disabled, positive = try to get the "
"service level from PathRecord)",
0, &ival, REGINT_GE_ZERO));
mca_btl_openib_component.ib_path_record_service_level = (uint32_t) ival;
CHECK(reg_uint("ib_path_record_service_level", NULL,
"Enable getting InfiniBand service level from PathRecord "
"(must be >= 0, 0 = disabled, positive = try to get the "
"service level from PathRecord)",
0, &mca_btl_openib_component.ib_path_record_service_level, 0));
#endif
CHECK(reg_int("use_eager_rdma", NULL, "Use RDMA for eager messages "
"(-1 = use device default, 0 = do not use eager RDMA, "
"1 = use eager RDMA)",
-1, &ival, 0));
mca_btl_openib_component.use_eager_rdma = (int32_t) ival;
-1, &mca_btl_openib_component.use_eager_rdma, 0));
CHECK(reg_int("eager_rdma_threshold", NULL,
"Use RDMA for short messages after this number of "
"messages are received from a given peer "
"(must be >= 1)",
16, &ival, REGINT_GE_ONE));
mca_btl_openib_component.eager_rdma_threshold = (int32_t) ival;
16, &mca_btl_openib_component.eager_rdma_threshold, REGINT_GE_ONE));
CHECK(reg_int("max_eager_rdma", NULL, "Maximum number of peers allowed to use "
"RDMA for short messages (RDMA is used for all long "
"messages, except if explicitly disabled, such as "
"with the \"dr\" pml) "
"(must be >= 0)",
16, &ival, REGINT_GE_ZERO));
mca_btl_openib_component.max_eager_rdma = (int32_t) ival;
16, &mca_btl_openib_component.max_eager_rdma, REGINT_GE_ZERO));
CHECK(reg_int("eager_rdma_num", NULL, "Number of RDMA buffers to allocate "
"for small messages "
"(must be >= 1)",
16, &ival, REGINT_GE_ONE));
mca_btl_openib_component.eager_rdma_num = (int32_t) (ival + 1);
16, &mca_btl_openib_component.eager_rdma_num, REGINT_GE_ONE));
mca_btl_openib_component.eager_rdma_num++;
CHECK(reg_int("btls_per_lid", NULL, "Number of BTLs to create for each "
CHECK(reg_uint("btls_per_lid", NULL, "Number of BTLs to create for each "
"InfiniBand LID "
"(must be >= 1)",
1, &ival, REGINT_GE_ONE));
mca_btl_openib_component.btls_per_lid = (uint32_t) ival;
1, &mca_btl_openib_component.btls_per_lid, REGINT_GE_ONE));
CHECK(reg_int("max_lmc", NULL, "Maximum number of LIDs to use for each device port "
"(must be >= 0, where 0 = use all available)",
0, &ival, REGINT_GE_ZERO));
mca_btl_openib_component.max_lmc = (uint32_t) ival;
CHECK(reg_uint("max_lmc", NULL, "Maximum number of LIDs to use for each device port "
"(must be >= 0, where 0 = use all available)",
0, &mca_btl_openib_component.max_lmc, 0));
#if OPAL_HAVE_THREADS
CHECK(reg_int("enable_apm_over_lmc", NULL, "Maximum number of alternative paths for each device port "
"(must be >= -1, where 0 = disable apm, -1 = all available alternative paths )",
0, &ival, REGINT_NEG_ONE_OK|REGINT_GE_ZERO));
mca_btl_openib_component.apm_lmc = (uint32_t) ival;
0, &mca_btl_openib_component.apm_lmc, REGINT_NEG_ONE_OK|REGINT_GE_ZERO));
CHECK(reg_int("enable_apm_over_ports", NULL, "Enable alternative path migration (APM) over different ports of the same device "
"(must be >= 0, where 0 = disable APM over ports, 1 = enable APM over ports of the same device)",
0, &ival, REGINT_GE_ZERO));
mca_btl_openib_component.apm_ports = (uint32_t) ival;
0, &mca_btl_openib_component.apm_ports, REGINT_GE_ZERO));
CHECK(reg_int("use_async_event_thread", NULL,
"If nonzero, use the thread that will handle InfiniBand asynchronous events",
1, &ival, 0));
mca_btl_openib_component.use_async_event_thread = (0 != ival);
CHECK(reg_bool("use_async_event_thread", NULL,
"If nonzero, use the thread that will handle InfiniBand asynchronous events",
true, &mca_btl_openib_component.use_async_event_thread));
#if BTL_OPENIB_FAILOVER_ENABLED
/* failover specific output */
CHECK(reg_int("verbose_failover", NULL,
"Output some verbose OpenIB BTL failover information "
"(0 = no output, nonzero = output)", 0, &ival, 0));
"(0 = no output, nonzero = output)", 0, &btl_openib_verbose_failover, 0));
mca_btl_openib_component.verbose_failover = opal_output_open(NULL);
opal_output_set_verbosity(mca_btl_openib_component.verbose_failover, ival);
opal_output_set_verbosity(mca_btl_openib_component.verbose_failover, );
CHECK(reg_int("port_error_failover", NULL,
"If nonzero, asynchronous port errors will trigger failover",
0, &ival, 0));
mca_btl_openib_component.port_error_failover = (0 != ival);
CHECK(reg_bool("port_error_failover", NULL,
"If nonzero, asynchronous port errors will trigger failover",
0, &mca_btl_openib_component.port_error_failover));
/* Make non writeable parameter that indicates failover is configured in. */
tmp = mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
"failover_enabled",
"openib failover is configured: run with bfo PML to support failover between openib BTLs",
false, true,
1, NULL);
tmp = mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
"failover_enabled",
"openib failover is configured: run with bfo PML to support failover between openib BTLs",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0,
MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_CONSTANT,
&btl_openib_failover_enabled);
if (0 > tmp) ret = tmp;
#endif
CHECK(reg_int("enable_srq_resize", NULL,
"Enable/Disable on demand SRQ resize. "
"(0 = without resizing, nonzero = with resizing)", 1, &ival, 0));
mca_btl_openib_component.enable_srq_resize = (0 != ival);
CHECK(reg_bool("enable_srq_resize", NULL,
"Enable/Disable on demand SRQ resize. "
"(0 = without resizing, nonzero = with resizing)", 1,
&mca_btl_openib_component.enable_srq_resize));
#else
mca_btl_openib_component.enable_srq_resize = 0;
mca_btl_openib_component.enable_srq_resize = false;
#endif
CHECK(reg_int("buffer_alignment", NULL,
"Preferred communication buffer alignment, in bytes "
"(must be > 0 and power of two)",
64, &ival, REGINT_GE_ZERO));
if(ival <= 1 || (ival & (ival - 1))) {
opal_show_help("help-mpi-btl-openib.txt", "wrong buffer alignment",
true, ival, ompi_process_info.nodename, 64);
mca_btl_openib_component.buffer_alignment = 64;
} else {
mca_btl_openib_component.buffer_alignment = (uint32_t) ival;
}
CHECK(reg_uint("buffer_alignment", NULL,
"Preferred communication buffer alignment, in bytes "
"(must be > 0 and power of two)",
64, &mca_btl_openib_component.buffer_alignment, 0));
CHECK(reg_int("use_message_coalescing", NULL,
"If nonzero, use message coalescing", 1, &ival, 0));
mca_btl_openib_component.use_message_coalescing = (0 != ival);
CHECK(reg_bool("use_message_coalescing", NULL,
"If nonzero, use message coalescing", true,
&mca_btl_openib_component.use_message_coalescing));
CHECK(reg_int("cq_poll_ratio", NULL,
"How often to poll high priority CQ versus low priority CQ",
100, &ival, REGINT_GE_ONE));
mca_btl_openib_component.cq_poll_ratio = (uint32_t)ival;
CHECK(reg_int("eager_rdma_poll_ratio", NULL,
"How often to poll eager RDMA channel versus CQ",
100, &ival, REGINT_GE_ONE));
mca_btl_openib_component.eager_rdma_poll_ratio = (uint32_t)ival;
CHECK(reg_int("hp_cq_poll_per_progress", NULL,
CHECK(reg_uint("cq_poll_ratio", NULL,
"How often to poll high priority CQ versus low priority CQ",
100, &mca_btl_openib_component.cq_poll_ratio, REGINT_GE_ONE));
CHECK(reg_uint("eager_rdma_poll_ratio", NULL,
"How often to poll eager RDMA channel versus CQ",
100, &mca_btl_openib_component.eager_rdma_poll_ratio, REGINT_GE_ONE));
CHECK(reg_uint("hp_cq_poll_per_progress", NULL,
"Max number of completion events to process for each call "
"of BTL progress engine",
10, &ival, REGINT_GE_ONE));
mca_btl_openib_component.cq_poll_progress = (uint32_t)ival;
10, &mca_btl_openib_component.cq_poll_progress, REGINT_GE_ONE));
CHECK(reg_int("max_hw_msg_size", NULL,
"Maximum size (in bytes) of a single fragment of a long message when using the RDMA protocols (must be > 0 and <= hw capabilities).",
0, &ival, REGINT_GE_ZERO));
mca_btl_openib_component.max_hw_msg_size = (uint32_t)ival;
CHECK(reg_uint("max_hw_msg_size", NULL,
"Maximum size (in bytes) of a single fragment of a long message when using the RDMA protocols (must be > 0 and <= hw capabilities).",
0, &mca_btl_openib_component.max_hw_msg_size, 0));
/* Info only */
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
"have_fork_support",
"Whether the OpenFabrics stack supports applications that invoke the \"fork()\" system call or not (0 = no, 1 = yes). "
"Note that this value does NOT indicate whether the system being run on supports \"fork()\" with OpenFabrics applications or not.",
false, true,
OMPI_HAVE_IBV_FORK_INIT ? 1 : 0,
NULL);
tmp = mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
"have_fork_support",
"Whether the OpenFabrics stack supports applications that invoke the \"fork()\" system call or not (0 = no, 1 = yes). "
"Note that this value does NOT indicate whether the system being run on supports \"fork()\" with OpenFabrics applications or not.",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0,
MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_CONSTANT,
&btl_openib_have_fork_support);
mca_btl_openib_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_DEFAULT;
@ -568,23 +555,16 @@ int btl_openib_register_mca_params(void)
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
/* Default is enabling CUDA asynchronous send copies */
CHECK(reg_int("cuda_async_send", NULL,
"Enable or disable CUDA async send copies "
"(1 = async; 0 = sync)",
1, &ival, 0));
mca_btl_openib_component.cuda_async_send = (0 != ival);
if (mca_btl_openib_component.cuda_async_send) {
mca_btl_openib_module.super.btl_flags |= MCA_BTL_FLAGS_CUDA_COPY_ASYNC_SEND;
}
CHECK(reg_bool("cuda_async_send", NULL,
"Enable or disable CUDA async send copies "
"(true = async; false = sync)",
true, &mca_btl_openib_component.cuda_async_send));
/* Default is enabling CUDA asynchronous receive copies */
CHECK(reg_int("cuda_async_recv", NULL,
"Enable or disable CUDA async recv copies "
"(1 = async; 0 = sync)",
1, &ival, 0));
mca_btl_openib_component.cuda_async_recv = (0 != ival);
if (mca_btl_openib_component.cuda_async_recv) {
mca_btl_openib_module.super.btl_flags |= MCA_BTL_FLAGS_CUDA_COPY_ASYNC_RECV;
}
CHECK(reg_bool("cuda_async_recv", NULL,
"Enable or disable CUDA async recv copies "
"(true = async; false = sync)",
true, &mca_btl_openib_component.cuda_async_recv));
/* Also make the max send size larger for better GPU buffer performance */
mca_btl_openib_module.super.btl_max_send_size = 128 * 1024;
/* Turn of message coalescing - not sure if it works with GPU buffers */
@ -652,7 +632,101 @@ int btl_openib_register_mca_params(void)
"alignment for all malloc calls if btl openib is used.",
32, &mca_btl_openib_component.use_memalign,
REGINT_GE_ZERO));
mca_btl_openib_component.memalign_threshold = mca_btl_openib_component.eager_limit;
tmp = mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
"memalign_threshold",
"Allocating memory more than btl_openib_memalign_threshhold"
"bytes will automatically be algined to the value of btl_openib_memalign bytes."
"memalign_threshhold defaults to the same value as mca_btl_openib_eager_limit.",
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_openib_component.memalign_threshold);
if (0 > tmp) ret = tmp;
#endif
/* Register any MCA params for the connect pseudo-components */
if (OMPI_SUCCESS == ret) {
ret = ompi_btl_openib_connect_base_register();
}
return btl_openib_verify_mca_params();
}
int btl_openib_verify_mca_params (void)
{
if (mca_btl_openib_component.cq_poll_batch > MCA_BTL_OPENIB_CQ_POLL_BATCH_DEFAULT) {
mca_btl_openib_component.cq_poll_batch = MCA_BTL_OPENIB_CQ_POLL_BATCH_DEFAULT;
}
#if !HAVE_IBV_FORK_INIT
if (0 != mca_btl_openib_component.want_fork_support) {
opal_show_help("help-mpi-btl-openib.txt",
"ibv_fork requested but not supported", true,
ompi_process_info.nodename);
return OMPI_ERR_BAD_PARAM;
}
#endif
mca_btl_openib_component.ib_pkey_val &= MCA_BTL_IB_PKEY_MASK;
if (mca_btl_openib_component.ib_min_rnr_timer > 31) {
opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
true, "btl_openib_ib_min_rnr_timer > 31",
"btl_openib_ib_min_rnr_timer reset to 31");
mca_btl_openib_component.ib_min_rnr_timer = 31;
}
if (mca_btl_openib_component.ib_timeout > 31) {
opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
true, "btl_openib_ib_timeout > 31",
"btl_openib_ib_timeout reset to 31");
mca_btl_openib_component.ib_timeout = 31;
}
if (mca_btl_openib_component.ib_retry_count > 7) {
opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
true, "btl_openib_ib_retry_count > 7",
"btl_openib_ib_retry_count reset to 7");
mca_btl_openib_component.ib_retry_count = 7;
}
if (mca_btl_openib_component.ib_rnr_retry > 7) {
opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
true, "btl_openib_ib_rnr_retry > 7",
"btl_openib_ib_rnr_retry reset to 7");
mca_btl_openib_component.ib_rnr_retry = 7;
}
if (mca_btl_openib_component.ib_service_level > 15) {
opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
true, "btl_openib_ib_service_level > 15",
"btl_openib_ib_service_level reset to 15");
mca_btl_openib_component.ib_service_level = 15;
}
if(mca_btl_openib_component.buffer_alignment <= 1 ||
(mca_btl_openib_component.buffer_alignment & (mca_btl_openib_component.buffer_alignment - 1))) {
opal_show_help("help-mpi-btl-openib.txt", "wrong buffer alignment",
true, mca_btl_openib_component.buffer_alignment, ompi_process_info.nodename, 64);
mca_btl_openib_component.buffer_alignment = 64;
}
#if OMPI_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
if (mca_btl_openib_component.cuda_async_send) {
mca_btl_openib_module.super.btl_flags |= MCA_BTL_FLAGS_CUDA_COPY_ASYNC_SEND;
} else {
mca_btl_openib_module.super.btl_flags &= ~MCA_BTL_FLAGS_CUDA_COPY_ASYNC_SEND;
}
if (mca_btl_openib_component.cuda_async_recv) {
mca_btl_openib_module.super.btl_flags |= MCA_BTL_FLAGS_CUDA_COPY_ASYNC_RECV;
} else {
mca_btl_openib_module.super.btl_flags &= ~MCA_BTL_FLAGS_CUDA_COPY_ASYNC_RECV;
}
#endif
if (mca_btl_openib_component.use_memalign != 32
&& mca_btl_openib_component.use_memalign != 64
&& mca_btl_openib_component.use_memalign != 0){
@ -661,27 +735,6 @@ int btl_openib_register_mca_params(void)
"btl_openib_memalign is reset to 32");
mca_btl_openib_component.use_memalign = 32;
}
reg_int("memalign_threshold", NULL,
"Allocating memory more than btl_openib_memalign_threshhold"
"bytes will automatically be algined to the value of btl_openib_memalign bytes."
"memalign_threshhold defaults to the same value as mca_btl_openib_eager_limit.",
mca_btl_openib_component.eager_limit,
&ival,
REGINT_GE_ZERO);
if (ival < 0){
opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
true, "btl_openib_memalign_threshold must be positive",
"btl_openib_memalign_threshold is reset to btl_openib_eager_limit");
ival = mca_btl_openib_component.eager_limit;
}
mca_btl_openib_component.memalign_threshold = (size_t)ival;
#endif
/* Register any MCA params for the connect pseudo-components */
if (OMPI_SUCCESS == ret) {
ret = ompi_btl_openib_connect_base_register();
}
return ret;
return OMPI_SUCCESS;
}

Просмотреть файл

@ -16,6 +16,7 @@ BEGIN_C_DECLS
* Function to register MCA params and check for sane values
*/
int btl_openib_register_mca_params(void);
int btl_openib_verify_mca_params (void);
END_C_DECLS
#endif

Просмотреть файл

@ -66,6 +66,9 @@ static ompi_btl_openib_connect_base_component_t *all[] = {
static ompi_btl_openib_connect_base_component_t **available = NULL;
static int num_available = 0;
static char *btl_openib_cpc_include;
static char *btl_openib_cpc_exclude;
/*
* Register MCA parameters
*/
@ -73,7 +76,6 @@ int ompi_btl_openib_connect_base_register(void)
{
int i, j, save;
char **temp = NULL, *string = NULL, *all_cpc_names = NULL;
char *cpc_include = NULL, *cpc_exclude = NULL;
/* Make an MCA parameter to select which connect module to use */
for (i = 0; NULL != all[i]; ++i) {
@ -88,18 +90,24 @@ int ompi_btl_openib_connect_base_register(void)
"Method used to select OpenFabrics connections (valid values: %s)",
all_cpc_names);
mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
"cpc_include", string, false, false,
NULL, &cpc_include);
btl_openib_cpc_include = NULL;
(void) mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
"cpc_include", string, MCA_BASE_VAR_TYPE_STRING,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&btl_openib_cpc_include);
free(string);
asprintf(&string,
"Method used to exclude OpenFabrics connections (valid values: %s)",
all_cpc_names);
mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
"cpc_exclude", string, false, false,
NULL, &cpc_exclude);
btl_openib_cpc_include = NULL;
(void) mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
"cpc_exclude", string, MCA_BASE_VAR_TYPE_STRING,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&btl_openib_cpc_exclude);
free(string);
/* Parse the if_[in|ex]clude paramters to come up with a list of
@ -108,9 +116,9 @@ int ompi_btl_openib_connect_base_register(void)
/* If we have an "include" list, then find all those CPCs and put
them in available[] */
if (NULL != cpc_include) {
if (NULL != btl_openib_cpc_include) {
mca_btl_openib_component.cpc_explicitly_defined = true;
temp = opal_argv_split(cpc_include, ',');
temp = opal_argv_split(btl_openib_cpc_include, ',');
for (save = j = 0; NULL != temp[j]; ++j) {
for (i = 0; NULL != all[i]; ++i) {
if (0 == strcmp(temp[j], all[i]->cbc_name)) {
@ -124,7 +132,7 @@ int ompi_btl_openib_connect_base_register(void)
opal_show_help("help-mpi-btl-openib-cpc-base.txt",
"cpc name not found", true,
"include", ompi_process_info.nodename,
"include", cpc_include, temp[j],
"include", btl_openib_cpc_include, temp[j],
all_cpc_names);
opal_argv_free(temp);
free(all_cpc_names);
@ -136,9 +144,9 @@ int ompi_btl_openib_connect_base_register(void)
/* Otherwise, if we have an "exclude" list, take all the CPCs that
are not in that list and put them in available[] */
else if (NULL != cpc_exclude) {
else if (NULL != btl_openib_cpc_exclude) {
mca_btl_openib_component.cpc_explicitly_defined = true;
temp = opal_argv_split(cpc_exclude, ',');
temp = opal_argv_split(btl_openib_cpc_exclude, ',');
/* First: error check -- ensure that all the names are valid */
for (j = 0; NULL != temp[j]; ++j) {
for (i = 0; NULL != all[i]; ++i) {
@ -150,7 +158,7 @@ int ompi_btl_openib_connect_base_register(void)
opal_show_help("help-mpi-btl-openib-cpc-base.txt",
"cpc name not found", true,
"exclude", ompi_process_info.nodename,
"exclude", cpc_exclude, temp[j],
"exclude", btl_openib_cpc_exclude, temp[j],
all_cpc_names);
opal_argv_free(temp);
free(all_cpc_names);

Просмотреть файл

@ -98,10 +98,14 @@ ompi_btl_openib_connect_base_component_t ompi_btl_openib_connect_oob = {
/* Open - this functions sets up any oob specific commandline params */
static void oob_component_register(void)
{
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
"connect_oob_priority",
"The selection method priority for oob",
false, false, oob_priority, &oob_priority);
oob_priority = 50;
(void) mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
"connect_oob_priority",
"The selection method priority for oob",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&oob_priority);
if (oob_priority > 100) {
oob_priority = 100;
@ -166,6 +170,13 @@ static int oob_component_query(mca_btl_openib_module_t *btl,
"openib BTL: oob CPC system error (malloc failed)");
return OMPI_ERR_OUT_OF_RESOURCE;
}
if (oob_priority > 100) {
oob_priority = 100;
} else if (oob_priority < -1) {
oob_priority = -1;
}
(*cpc)->data.cbm_component = &ompi_btl_openib_connect_oob;
(*cpc)->data.cbm_priority = oob_priority;
(*cpc)->data.cbm_modex_message = NULL;

Просмотреть файл

@ -164,7 +164,7 @@ static opal_list_t client_list;
static opal_mutex_t client_list_lock;
static struct rdma_event_channel *event_channel = NULL;
static int rdmacm_priority = 30;
static uint16_t rdmacm_port = 0;
static unsigned int rdmacm_port = 0;
static uint32_t rdmacm_addr = 0;
static int rdmacm_resolve_timeout = 30000;
static int rdmacm_resolve_max_retry_count = 20;
@ -229,55 +229,70 @@ static void rdmacm_component_register(void)
{
int value;
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
"connect_rdmacm_priority",
"The selection method priority for rdma_cm",
false, false, rdmacm_priority, &rdmacm_priority);
rdmacm_priority = 30;
(void) mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
"connect_rdmacm_priority",
"The selection method priority for rdma_cm",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&rdmacm_priority);
if (rdmacm_priority > 100) {
rdmacm_priority = 100;
} else if (rdmacm_priority < 0) {
rdmacm_priority = 0;
}
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
"connect_rdmacm_port",
"The selection method port for rdma_cm",
false, false, rdmacm_port, &value);
if (value >= 0 && value < 65536) {
rdmacm_port = (uint16_t) value;
} else {
rdmacm_port = 0;
(void) mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
"connect_rdmacm_port",
"The selection method port for rdma_cm",
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&rdmacm_port);
if (rdmacm_port & ~0xfffful) {
opal_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
"illegal tcp port", true, value);
"illegal tcp port", true, (int) rdmacm_port);
rdmacm_port = 0;
}
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
"connect_rdmacm_resolve_timeout",
"The timeout (in miliseconds) for address and route resolution",
false, false, rdmacm_resolve_timeout, &value);
if (value > 0) {
rdmacm_resolve_timeout = value;
} else {
rdmacm_resolve_timeout = 30000;
(void) mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
"connect_rdmacm_resolve_timeout",
"The timeout (in miliseconds) for address and route resolution",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&rdmacm_resolve_timeout);
if (0 > rdmacm_resolve_timeout) {
opal_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
"illegal timeout", true, value);
"illegal timeout", true, rdmacm_resolve_timeout);
rdmacm_resolve_timeout = 30000;
}
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
"connect_rdmacm_retry_count",
"Maximum number of times rdmacm will retry route resolution",
false, false, rdmacm_resolve_max_retry_count, &value);
if (value > 0) {
rdmacm_resolve_max_retry_count = value;
} else {
rdmacm_resolve_max_retry_count = 20;
(void) mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
"connect_rdmacm_retry_count",
"Maximum number of times rdmacm will retry route resolution",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&rdmacm_resolve_max_retry_count);
if (0 > rdmacm_resolve_max_retry_count) {
opal_show_help("help-mpi-btl-openib-cpc-rdmacm.txt",
"illegal retry count", true, value);
"illegal retry count", true, rdmacm_resolve_max_retry_count);
rdmacm_resolve_max_retry_count = 20;
}
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
"connect_rdmacm_reject_causes_connect_error",
"The drivers for some devices are buggy such that an RDMA REJECT action may result in a CONNECT_ERROR event instead of a REJECTED event. Setting this MCA parameter to true tells Open MPI to treat CONNECT_ERROR events on connections where a REJECT is expected as a REJECT (default: false)",
false, false, 0, &value);
rdmacm_reject_causes_connect_error = (bool) (value != 0);
rdmacm_reject_causes_connect_error = false;
(void) mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
"connect_rdmacm_reject_causes_connect_error",
"The drivers for some devices are buggy such that an RDMA REJECT action may result in a CONNECT_ERROR event instead of a REJECTED event. Setting this MCA parameter to true tells Open MPI to treat CONNECT_ERROR events on connections where a REJECT is expected as a REJECT (default: false)",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&rdmacm_reject_causes_connect_error);
}
/*
@ -1864,7 +1879,7 @@ static int rdmacm_component_query(mca_btl_openib_module_t *openib_btl, ompi_btl_
memset(&sin, 0, sizeof(sin));
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = rdmacm_addr;
sin.sin_port = rdmacm_port;
sin.sin_port = (uint16_t) rdmacm_port;
/* Bind the rdmacm server to the local IP address and an ephemerial
* port or one specified by a comand arg.

Просмотреть файл

@ -354,38 +354,41 @@ ompi_btl_openib_connect_base_component_t ompi_btl_openib_connect_udcm = {
static void udcm_component_register(void)
{
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
"connect_udcm_priority",
"The selection method priority for ud",
false, false, udcm_priority, &udcm_priority);
if (udcm_priority > 100) {
udcm_priority = 100;
} else if (udcm_priority < 0) {
udcm_priority = 0;
}
udcm_priority = 0;
(void) mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
"connect_udcm_priority",
"The selection method priority for ud",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&udcm_priority);
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
"connect_udcm_recv_count",
"Number of connection buffers to post",
false, false, udcm_recv_count,
&udcm_recv_count);
if (UDCM_MIN_RECV_COUNT > udcm_recv_count) {
udcm_recv_count = UDCM_MIN_RECV_COUNT;
}
udcm_recv_count = UDCM_MIN_RECV_COUNT;
(void) mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
"connect_udcm_recv_count",
"Number of connection buffers to post",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&udcm_recv_count);
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
"connect_udcm_timeout",
"Microseconds to wait for ud connection response",
false, false, udcm_timeout,
&udcm_timeout);
if (UDCM_MIN_TIMEOUT > udcm_timeout) {
udcm_timeout = UDCM_MIN_TIMEOUT;
}
udcm_timeout = UDCM_MIN_TIMEOUT;
(void) mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
"connect_udcm_timeout",
"Microseconds to wait for ud connection response",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&udcm_timeout);
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
"connect_udcm_max_retry",
"Maximum number of times to retry sending a connection message",
false, false, udcm_max_retry, &udcm_max_retry);
udcm_max_retry = 10;
(void) mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
"connect_udcm_max_retry",
"Maximum number of times to retry sending a connection message",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&udcm_max_retry);
}
static int udcm_component_query(mca_btl_openib_module_t *btl,
@ -423,7 +426,19 @@ static int udcm_component_query(mca_btl_openib_module_t *btl,
break;
}
signal (SIGSEGV, SIG_DFL);
if (udcm_priority > 100) {
udcm_priority = 100;
} else if (udcm_priority < 0) {
udcm_priority = 0;
}
if (UDCM_MIN_RECV_COUNT > udcm_recv_count) {
udcm_recv_count = UDCM_MIN_RECV_COUNT;
}
if (UDCM_MIN_TIMEOUT > udcm_timeout) {
udcm_timeout = UDCM_MIN_TIMEOUT;
}
rc = udcm_module_init (m, btl);
if (OMPI_SUCCESS != rc) {

Просмотреть файл

@ -1038,6 +1038,12 @@ static int xoob_component_query(mca_btl_openib_module_t *openib_btl,
rml_recv_posted = true;
}
if (xoob_priority > 100) {
xoob_priority = 100;
} else if (xoob_priority < -1) {
xoob_priority = -1;
}
(*cpc)->data.cbm_component = &ompi_btl_openib_connect_xoob;
(*cpc)->data.cbm_priority = xoob_priority;
(*cpc)->data.cbm_modex_message = NULL;
@ -1059,10 +1065,14 @@ static int xoob_component_query(mca_btl_openib_module_t *openib_btl,
/* Open - this functions sets up any xoob specific commandline params */
static void xoob_component_register(void)
{
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
"connect_xoob_priority",
"The selection method priority for xoob",
false, false, xoob_priority, &xoob_priority);
xoob_priority = 60;
(void) mca_base_component_var_register(&mca_btl_openib_component.super.btl_version,
"connect_xoob_priority",
"The selection method priority for xoob",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&xoob_priority);
if (xoob_priority > 100) {
xoob_priority = 100;

Просмотреть файл

@ -46,7 +46,6 @@
#include "opal/util/argv.h"
#include "ompi/mca/btl/btl.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/runtime/ompi_module_exchange.h"
#include "ompi/runtime/mpiruntime.h"
#include "ompi/mca/mpool/base/base.h"
@ -104,27 +103,29 @@ struct mca_btl_sctp_proc_table_node *sender_proc_table;
*/
static inline char* mca_btl_sctp_param_register_string(
const char* param_name,
const char* default_value)
const char* param_name,
const char* default_value,
char **storage)
{
char *param_value;
char *help_string = NULL;
mca_base_param_reg_string(&mca_btl_sctp_component.super.btl_version,
param_name, help_string, false, false,
default_value, &param_value);
return param_value;
*storage = default_value;
(void) mca_base_component_var_register(&mca_btl_sctp_component.super.btl_version,
param_name, NULL, MCA_BASE_VAR_TYPE_STRING,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
return *storage;
}
static inline int mca_btl_sctp_param_register_int(
const char* param_name,
int default_value)
int default_value,
int *storage)
{
int param_value;
char *help_string = NULL;
mca_base_param_reg_int(&mca_btl_sctp_component.super.btl_version,
param_name, help_string, false, false,
default_value, &param_value);
return param_value;
*storage = default_value;
(void) mca_base_component_var_register(&mca_btl_sctp_component.super.btl_version,
param_name, NULL, MCA_BASE_VAR_TYPE_INT,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
return *storage;
}
@ -166,39 +167,38 @@ OBJ_CLASS_INSTANCE(
static void mca_btl_sctp_component_recv_handler(int, short, void*); /* for 1-1 */
/* mca_btl_sctp_recv_handler(int, short, void*) for 1-many is in btl_sctp_recv_handler.h */
static bool use_nagle = false;
static int mca_btl_sctp_component_register(void)
{
/* register SCTP component parameters */
/* num links */
mca_btl_sctp_component.sctp_if_include =
mca_btl_sctp_param_register_string("if_include", "");
mca_btl_sctp_component.sctp_if_exclude =
mca_btl_sctp_param_register_string("if_exclude", "lo");
mca_btl_sctp_component.sctp_free_list_num =
mca_btl_sctp_param_register_int ("free_list_num", 8);
mca_btl_sctp_component.sctp_free_list_max =
mca_btl_sctp_param_register_int ("free_list_max", -1);
mca_btl_sctp_component.sctp_free_list_inc =
mca_btl_sctp_param_register_int ("free_list_inc", 32);
mca_btl_sctp_component.sctp_sndbuf =
mca_btl_sctp_param_register_int ("sndbuf", 128*1024);
mca_btl_sctp_component.sctp_rcvbuf =
mca_btl_sctp_param_register_int ("rcvbuf", 128*1024);
mca_btl_sctp_component.sctp_endpoint_cache =
mca_btl_sctp_param_register_int ("endpoint_cache", 30*1024);
mca_btl_sctp_component.sctp_use_nodelay =
!mca_btl_sctp_param_register_int ("use_nagle", 0);
mca_btl_sctp_param_register_string("if_include", "", &mca_btl_sctp_component.sctp_if_include);
mca_btl_sctp_param_register_string("if_exclude", "lo", &mca_btl_sctp_component.sctp_if_exclude);
mca_btl_sctp_param_register_int("free_list_num", 8, &mca_btl_sctp_component.sctp_free_list_num);
mca_btl_sctp_param_register_int("free_list_max", -1, &mca_btl_sctp_component.sctp_free_list_max);
mca_btl_sctp_param_register_int("free_list_inc", 32, &mca_btl_sctp_component.sctp_free_list_inc);
mca_btl_sctp_param_register_int("sndbuf", 128*1024, &mca_btl_sctp_component.sctp_sndbuf);
mca_btl_sctp_param_register_int("rcvbuf", 128*1024, &mca_btl_sctp_component.sctp_rcvbuf);
mca_btl_sctp_param_register_int("endpoint_cache", 30*1024, &mca_btl_sctp_component.sctp_endpoint_cache);
(void) mca_base_component_var_register(&mca_btl_sctp_component.super.btl_version,
"use_nagle", NULL, MCA_BASE_VAR_TYPE_BOOL,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &use_nagle);
/* port_min */
/* port_range */
/* use a single one-to-many socket by default except in Solaris (see
* the configure.m4 file)
*/
mca_base_param_reg_int(&mca_btl_sctp_component.super.btl_version,
"if_11", "If 0, have one SCTP BTL module and let SCTP do multilink scheduling. If non-zero, have an SCTP BTL module per link and let the PML do the scheduling.",
false, false,
OMPI_MCA_BTL_SCTP_USE_ONE_TO_ONE_SOCKET,
&mca_btl_sctp_component.sctp_if_11);
mca_btl_sctp_component.sctp_if_11 = OMPI_MCA_BTL_SCTP_USE_ONE_TO_ONE_SOCKET;
(void) mca_base_component_var_register(&mca_btl_sctp_component.super.btl_version,
"if_11", "If 0, have one SCTP BTL module and let SCTP do multilink scheduling. If non-zero, have an SCTP BTL module per link and let the PML do the scheduling.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_sctp_component.sctp_if_11);
/* have lower exclusivity than tcp */
mca_btl_sctp_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_LOW;
@ -234,6 +234,8 @@ static int mca_btl_sctp_component_open(void)
mca_btl_sctp_component.sctp_num_btls=0;
/* addr_count */
mca_btl_sctp_component.sctp_btls=NULL;
mca_btl_sctp_component.sctp_use_nodelay = !use_nagle;
/* initialize objects */
OBJ_CONSTRUCT(&mca_btl_sctp_component.sctp_lock, opal_mutex_t);
@ -257,7 +259,7 @@ static int mca_btl_sctp_component_open(void)
/* if_include and if_exclude need to be mutually exclusive */
if (OPAL_SUCCESS !=
mca_base_param_check_exclusive_string(
mca_base_var_check_exclusive("ompi",
mca_btl_sctp_component.super.btl_version.mca_type_name,
mca_btl_sctp_component.super.btl_version.mca_component_name,
"if_include",
@ -374,11 +376,11 @@ static int mca_btl_sctp_create(int if_index, const char* if_name)
/* allow user to specify interface bandwidth */
sprintf(param, "bandwidth_%s", if_name);
btl->super.btl_bandwidth = mca_btl_sctp_param_register_int(param, 0);
mca_btl_sctp_param_register_int(param, 0, &btl->super.btl_bandwidth);
/* allow user to override/specify latency ranking */
sprintf(param, "latency_%s", if_name);
btl->super.btl_latency = mca_btl_sctp_param_register_int(param, 0);
mca_btl_sctp_param_register_int(param, 0, &btl->super.btl_latency);
#if 0 && OPAL_ENABLE_DEBUG
BTL_OUTPUT(("interface: %s bandwidth %d latency %d",
@ -420,11 +422,11 @@ static int mca_btl_sctp_create(int if_index, const char* if_name)
/* allow user to specify interface bandwidth */
sprintf(param, "bandwidth_%s", if_name);
btl->super.btl_bandwidth = mca_btl_sctp_param_register_int(param, 0);
mca_btl_sctp_param_register_int(param, 0, &btl->super.btl_bandwidth);
/* allow user to override/specify latency ranking */
sprintf(param, "latency_%s", if_name);
btl->super.btl_latency = mca_btl_sctp_param_register_int(param, 0);
mca_btl_sctp_param_register_int(param, 0, &btl->super.btl_latency);
#if 0 && OPAL_ENABLE_DEBUG
BTL_OUTPUT(("interface: %s bandwidth %d latency %d",

Просмотреть файл

@ -39,7 +39,6 @@
#include "ompi/constants.h"
#include "opal/mca/event/event.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/mca/mpool/base/base.h"
#include "ompi/mca/btl/base/btl_base_error.h"
#include "btl_sctp.h"

Просмотреть файл

@ -30,11 +30,10 @@
#include "ompi/constants.h"
#include "opal/runtime/opal.h"
#include "opal/mca/event/event.h"
#include "opal/mca/base/mca_base_param.h"
#include "btl_self.h"
#include "btl_self_frag.h"
static int mca_btl_self_component_register(void);
/*
* Shared Memory (SELF) component instance.
@ -52,7 +51,9 @@ mca_btl_self_component_t mca_btl_self_component = {
OMPI_MINOR_VERSION, /* MCA component minor version */
OMPI_RELEASE_VERSION, /* MCA component release version */
mca_btl_self_component_open, /* component open */
mca_btl_self_component_close /* component close */
mca_btl_self_component_close, /* component close */
NULL,
mca_btl_self_component_register
},
{
/* The component is checkpoint ready */
@ -69,18 +70,33 @@ mca_btl_self_component_t mca_btl_self_component = {
* component parameters.
*/
int mca_btl_self_component_open(void)
static int mca_btl_self_component_register(void)
{
mca_base_var_group_component_register(&mca_btl_self_component.super.btl_version,
"BTL for self communication");
/* register SELF component parameters */
mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_self_component, "free_list_num",
"Number of fragments by default", false, false,
0, &mca_btl_self_component.free_list_num );
mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_self_component, "free_list_max",
"Maximum number of fragments", false, false,
-1, &mca_btl_self_component.free_list_max );
mca_base_param_reg_int( (mca_base_component_t*)&mca_btl_self_component, "free_list_inc",
"Increment by this number of fragments", false, false,
32, &mca_btl_self_component.free_list_inc );
mca_btl_self_component.free_list_num = 0;
(void) mca_base_component_var_register(&mca_btl_self_component.super.btl_version, "free_list_num",
"Number of fragments by default",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_self_component.free_list_num);
mca_btl_self_component.free_list_max = -1;
(void) mca_base_component_var_register(&mca_btl_self_component.super.btl_version, "free_list_max",
"Maximum number of fragments",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_self_component.free_list_max);
mca_btl_self_component.free_list_inc = 32;
(void) mca_base_component_var_register(&mca_btl_self_component.super.btl_version, "free_list_inc",
"Increment by this number of fragments",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_self_component.free_list_inc);
mca_btl_self.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH;
mca_btl_self.btl_eager_limit = 128 * 1024;
@ -96,11 +112,17 @@ int mca_btl_self_component_open(void)
mca_btl_base_param_register(&mca_btl_self_component.super.btl_version,
&mca_btl_self);
return OMPI_SUCCESS;
}
int mca_btl_self_component_open(void)
{
/* initialize objects */
OBJ_CONSTRUCT(&mca_btl_self_component.self_lock, opal_mutex_t);
OBJ_CONSTRUCT(&mca_btl_self_component.self_frags_eager, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_btl_self_component.self_frags_send, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_btl_self_component.self_frags_rdma, ompi_free_list_t);
return OMPI_SUCCESS;
}

Просмотреть файл

@ -130,7 +130,7 @@ struct mca_btl_sm_component_t {
int sm_free_list_num; /**< initial size of free lists */
int sm_free_list_max; /**< maximum size of free lists */
int sm_free_list_inc; /**< number of elements to alloc when growing free lists */
int32_t sm_max_procs; /**< upper limit on the number of processes using the shared memory pool */
int sm_max_procs; /**< upper limit on the number of processes using the shared memory pool */
int sm_extra_procs; /**< number of extra procs to allow */
char* sm_mpool_name; /**< name of shared memory pool module */
mca_mpool_base_module_t **sm_mpools; /**< shared memory pools (one for each memory node) */
@ -149,8 +149,8 @@ struct mca_btl_sm_component_t {
but this one, in process private memory, is
a real virtual address */
uint16_t *mem_nodes; /**< cached copy of mem nodes of each local rank */
size_t fifo_size; /**< number of FIFO queue entries */
size_t fifo_lazy_free; /**< number of reads before lazy fifo free is triggered */
unsigned int fifo_size; /**< number of FIFO queue entries */
unsigned int fifo_lazy_free; /**< number of reads before lazy fifo free is triggered */
int nfifos; /**< number of FIFOs per receiver */
int32_t num_smp_procs; /**< current number of smp procs on this host */
int32_t my_smp_rank; /**< My SMP process rank. Used for accessing
@ -190,7 +190,7 @@ struct mca_btl_sm_component_t {
/** MCA: minimal message size (bytes) to offload on DMA engine
when using knem */
uint32_t knem_dma_min;
unsigned int knem_dma_min;
/** MCA: how many simultaneous ongoing knem operations to
support */

Просмотреть файл

@ -42,7 +42,6 @@
#include <sys/stat.h> /* for mkfifo */
#endif /* HAVE_SYS_STAT_H */
#include "opal/mca/base/mca_base_param.h"
#include "opal/mca/shmem/base/base.h"
#include "opal/mca/shmem/shmem.h"
#include "opal/util/bit_ops.h"
@ -78,6 +77,8 @@ typedef enum {
MCA_BTL_SM_RNDV_MOD_MPOOL
} mca_btl_sm_rndv_module_type_t;
static bool btl_sm_have_knem = OMPI_BTL_SM_HAVE_KNEM;
/*
* Shared Memory (SM) component instance.
*/
@ -112,116 +113,33 @@ mca_btl_sm_component_t mca_btl_sm_component = {
* utility routines for parameter registration
*/
static inline char* mca_btl_sm_param_register_string(
const char* param_name,
const char* default_value)
{
char *param_value;
(void) mca_base_param_reg_string (&mca_btl_sm_component.super.btl_version,
param_name, NULL, false, false, default_value,
&param_value);
return param_value;
}
static inline int mca_btl_sm_param_register_int(
const char* param_name,
int default_value)
int default_value,
int *storage)
{
int param_value = default_value;
(void) mca_base_param_reg_int (&mca_btl_sm_component.super.btl_version,
param_name, NULL, false, false, default_value,
&param_value);
return param_value;
*storage = default_value;
(void) mca_base_component_var_register (&mca_btl_sm_component.super.btl_version,
param_name, NULL, MCA_BASE_VAR_TYPE_INT,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
return *storage;
}
static int sm_register(void)
static inline unsigned int mca_btl_sm_param_register_uint(
const char* param_name,
unsigned int default_value,
unsigned int *storage)
{
int i;
*storage = default_value;
(void) mca_base_component_var_register (&mca_btl_sm_component.super.btl_version,
param_name, NULL, MCA_BASE_VAR_TYPE_UNSIGNED_INT,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
return *storage;
}
/* Register an MCA param to indicate whether we have knem support
or not */
mca_base_param_reg_int(&mca_btl_sm_component.super.btl_version,
"have_knem_support", "Whether this component supports the knem Linux kernel module or not",
false, true, OMPI_BTL_SM_HAVE_KNEM, NULL);
if (OMPI_BTL_SM_HAVE_KNEM) {
i = -1;
} else {
i = 0;
}
mca_base_param_reg_int(&mca_btl_sm_component.super.btl_version,
"use_knem",
"Whether knem support is desired or not "
"(negative = try to enable knem support, but continue even if it is not available, 0 = do not enable knem support, positive = try to enable knem support and fail if it is not available)",
false, false, i, &i);
if (OMPI_BTL_SM_HAVE_KNEM) {
mca_btl_sm_component.use_knem = i;
} else {
if (i > 0) {
opal_show_help("help-mpi-btl-sm.txt",
"knem requested but not supported", true,
ompi_process_info.nodename);
return OMPI_ERROR;
}
mca_btl_sm_component.use_knem = 0;
}
/* Currently disabling DMA mode by default; it's not clear that
this is useful in all applications and architectures. */
mca_base_param_reg_int(&mca_btl_sm_component.super.btl_version,
"knem_dma_min",
"Minimum message size (in bytes) to use the knem DMA mode; ignored if knem does not support DMA mode (0 = do not use the knem DMA mode)",
false, false, 0, &i);
mca_btl_sm_component.knem_dma_min = (uint32_t) i;
mca_base_param_reg_int(&mca_btl_sm_component.super.btl_version,
"knem_max_simultaneous",
"Max number of simultaneous ongoing knem operations to support (0 = do everything synchronously, which probably gives the best large message latency; >0 means to do all operations asynchronously, which supports better overlap for simultaneous large message sends)",
false, false, 0,
&mca_btl_sm_component.knem_max_simultaneous);
/* CMA parameters */
mca_base_param_reg_int(&mca_btl_sm_component.super.btl_version,
"use_cma",
"Whether or not to enable CMA",
false, false, 0, &mca_btl_sm_component.use_cma);
/* register SM component parameters */
mca_btl_sm_component.sm_free_list_num =
mca_btl_sm_param_register_int("free_list_num", 8);
mca_btl_sm_component.sm_free_list_max =
mca_btl_sm_param_register_int("free_list_max", -1);
mca_btl_sm_component.sm_free_list_inc =
mca_btl_sm_param_register_int("free_list_inc", 64);
mca_btl_sm_component.sm_max_procs =
mca_btl_sm_param_register_int("max_procs", -1);
mca_btl_sm_component.sm_mpool_name =
mca_btl_sm_param_register_string("mpool", "sm");
mca_btl_sm_component.fifo_size =
mca_btl_sm_param_register_int("fifo_size", 4096);
mca_btl_sm_component.nfifos =
mca_btl_sm_param_register_int("num_fifos", 1);
mca_btl_sm_component.fifo_lazy_free =
mca_btl_sm_param_register_int("fifo_lazy_free", 120);
/* default number of extra procs to allow for future growth */
mca_btl_sm_component.sm_extra_procs =
mca_btl_sm_param_register_int("sm_extra_procs", 0);
mca_btl_sm.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH-1;
mca_btl_sm.super.btl_eager_limit = 4*1024;
mca_btl_sm.super.btl_rndv_eager_limit = 4*1024;
mca_btl_sm.super.btl_max_send_size = 32*1024;
mca_btl_sm.super.btl_rdma_pipeline_send_length = 64*1024;
mca_btl_sm.super.btl_rdma_pipeline_frag_size = 64*1024;
mca_btl_sm.super.btl_min_rdma_pipeline_size = 64*1024;
mca_btl_sm.super.btl_flags = MCA_BTL_FLAGS_SEND;
static int mca_btl_sm_component_verify(void) {
#if OMPI_BTL_SM_HAVE_KNEM || OMPI_BTL_SM_HAVE_CMA
if (mca_btl_sm_component.use_knem || mca_btl_sm_component.use_cma) {
mca_btl_sm.super.btl_flags |= MCA_BTL_FLAGS_GET;
@ -234,6 +152,93 @@ static int sm_register(void)
}
#endif /* OMPI_BTL_SM_HAVE_KNEM || OMPI_BTL_SM_HAVE_CMA */
return mca_btl_base_param_verify(&mca_btl_sm.super);
}
static int sm_register(void)
{
mca_base_var_flag_t var_flags;
/* Register an MCA param to indicate whether we have knem support
or not */
(void) mca_base_component_var_register(&mca_btl_sm_component.super.btl_version,
"have_knem_support", "Whether this component supports the knem Linux kernel module or not",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0,
MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_CONSTANT,
&btl_sm_have_knem);
if (OMPI_BTL_SM_HAVE_KNEM) {
var_flags = 0;
mca_btl_sm_component.use_knem = -1;
} else {
var_flags = MCA_BASE_VAR_FLAG_DEFAULT_ONLY;
mca_btl_sm_component.use_knem = 0;
}
(void) mca_base_component_var_register(&mca_btl_sm_component.super.btl_version,
"use_knem", "Whether knem support is desired or not "
"(negative = try to enable knem support, but continue "
"even if it is not available, 0 = do not enable knem "
"support, positive = try to enable knem support and "
"fail if it is not available)", MCA_BASE_VAR_TYPE_INT,
NULL, 0, var_flags, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &mca_btl_sm_component.use_knem);
/* Currently disabling DMA mode by default; it's not clear that
this is useful in all applications and architectures. */
mca_btl_sm_component.knem_dma_min = 0;
(void) mca_base_component_var_register(&mca_btl_sm_component.super.btl_version,
"knem_dma_min",
"Minimum message size (in bytes) to use the knem DMA mode; "
"ignored if knem does not support DMA mode (0 = do not use the "
"knem DMA mode)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0,
0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_sm_component.knem_dma_min);
mca_btl_sm_component.knem_max_simultaneous = 0;
(void) mca_base_component_var_register(&mca_btl_sm_component.super.btl_version,
"knem_max_simultaneous",
"Max number of simultaneous ongoing knem operations to support "
"(0 = do everything synchronously, which probably gives the "
"best large message latency; >0 means to do all operations "
"asynchronously, which supports better overlap for simultaneous "
"large message sends)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0,
0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_sm_component.knem_max_simultaneous);
/* CMA parameters */
mca_btl_sm_component.use_cma = 0;
(void) mca_base_component_var_register(&mca_btl_sm_component.super.btl_version,
"use_cma", "Whether or not to enable CMA",
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_sm_component.use_cma);
/* register SM component parameters */
mca_btl_sm_param_register_int("free_list_num", 8, &mca_btl_sm_component.sm_free_list_num);
mca_btl_sm_param_register_int("free_list_max", -1, &mca_btl_sm_component.sm_free_list_max);
mca_btl_sm_param_register_int("free_list_inc", 64, &mca_btl_sm_component.sm_free_list_inc);
mca_btl_sm_param_register_int("max_procs", -1, &mca_btl_sm_component.sm_max_procs);
/* there is no practical use for the mpool name parameter since mpool resources differ
between components */
mca_btl_sm_component.sm_mpool_name = "sm";
mca_btl_sm_param_register_uint("fifo_size", 4096, &mca_btl_sm_component.fifo_size);
mca_btl_sm_param_register_int("num_fifos", 1, &mca_btl_sm_component.nfifos);
mca_btl_sm_param_register_uint("fifo_lazy_free", 120, &mca_btl_sm_component.fifo_lazy_free);
/* default number of extra procs to allow for future growth */
mca_btl_sm_param_register_int("sm_extra_procs", 0, &mca_btl_sm_component.sm_extra_procs);
mca_btl_sm.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH-1;
mca_btl_sm.super.btl_eager_limit = 4*1024;
mca_btl_sm.super.btl_rndv_eager_limit = 4*1024;
mca_btl_sm.super.btl_max_send_size = 32*1024;
mca_btl_sm.super.btl_rdma_pipeline_send_length = 64*1024;
mca_btl_sm.super.btl_rdma_pipeline_frag_size = 64*1024;
mca_btl_sm.super.btl_min_rdma_pipeline_size = 64*1024;
mca_btl_sm.super.btl_flags = MCA_BTL_FLAGS_SEND;
mca_btl_sm.super.btl_seg_size = sizeof (mca_btl_sm_segment_t);
mca_btl_sm.super.btl_bandwidth = 9000; /* Mbs */
mca_btl_sm.super.btl_latency = 1; /* Microsecs */
@ -242,7 +247,7 @@ static int sm_register(void)
mca_btl_base_param_register(&mca_btl_sm_component.super.btl_version,
&mca_btl_sm.super);
return OMPI_SUCCESS;
return mca_btl_sm_component_verify();
}
/*
@ -252,6 +257,10 @@ static int sm_register(void)
static int mca_btl_sm_component_open(void)
{
if (OMPI_SUCCESS != mca_btl_sm_component_verify()) {
return OMPI_ERROR;
}
mca_btl_sm_component.sm_max_btls = 1;
/* make sure the number of fifos is a power of 2 */
@ -352,10 +361,6 @@ static int mca_btl_sm_component_close(void)
}
#endif
if (NULL != mca_btl_sm_component.sm_mpool_name) {
free(mca_btl_sm_component.sm_mpool_name);
}
CLEANUP:
/* return */
@ -423,40 +428,26 @@ static int
get_min_mpool_size(mca_btl_sm_component_t *comp_ptr,
size_t *out_size)
{
char *type_name = "mpool";
char *param_name = "min_size";
char *min_size = NULL;
const char *type_name = "mpool";
const char *param_name = "min_size";
const mca_base_var_storage_t *min_size;
int id = 0;
size_t default_min = 67108864;
size_t size = 0;
long tmp_size = 0;
if (0 > (id = mca_base_param_find(type_name, comp_ptr->sm_mpool_name,
if (0 > (id = mca_base_var_find("ompi", type_name, comp_ptr->sm_mpool_name,
param_name))) {
opal_output(0, "mca_base_param_find: failure looking for %s_%s_%s\n",
opal_output(0, "mca_base_var_find: failure looking for %s_%s_%s\n",
type_name, comp_ptr->sm_mpool_name, param_name);
return OMPI_ERR_NOT_FOUND;
}
if (OPAL_ERROR == mca_base_param_lookup_string(id, &min_size)) {
opal_output(0, "mca_base_param_lookup_string failure\n");
if (OPAL_SUCCESS != mca_base_var_get_value(id, &min_size, NULL, NULL)) {
opal_output(0, "mca_base_var_get_value failure\n");
return OMPI_ERROR;
}
errno = 0;
tmp_size = strtol(min_size, (char **)NULL, 10);
if (ERANGE == errno || EINVAL == errno || tmp_size <= 0) {
opal_output(0, "mca_btl_sm::get_min_mpool_size: "
"Unusable %s_%s_min_size provided. "
"Continuing with %lu.", type_name,
comp_ptr->sm_mpool_name,
(unsigned long)default_min);
size = default_min;
}
else {
size = (size_t)tmp_size;
}
free(min_size);
*out_size = size;
/* the min_size variable is an unsigned long long */
*out_size = (size_t) min_size->ullval;
return OMPI_SUCCESS;
}

Просмотреть файл

@ -127,7 +127,7 @@ struct mca_btl_smcuda_component_t {
int sm_free_list_num; /**< initial size of free lists */
int sm_free_list_max; /**< maximum size of free lists */
int sm_free_list_inc; /**< number of elements to alloc when growing free lists */
int32_t sm_max_procs; /**< upper limit on the number of processes using the shared memory pool */
int sm_max_procs; /**< upper limit on the number of processes using the shared memory pool */
int sm_extra_procs; /**< number of extra procs to allow */
char* sm_mpool_name; /**< name of shared memory pool module */
mca_mpool_base_module_t **sm_mpools; /**< shared memory pools (one for each memory node) */

Просмотреть файл

@ -41,7 +41,6 @@
#include <sys/stat.h> /* for mkfifo */
#endif /* HAVE_SYS_STAT_H */
#include "opal/mca/base/mca_base_param.h"
#include "opal/mca/shmem/base/base.h"
#include "opal/mca/shmem/shmem.h"
#include "opal/util/bit_ops.h"
@ -116,55 +115,47 @@ mca_btl_smcuda_component_t mca_btl_smcuda_component = {
static inline char* mca_btl_smcuda_param_register_string(
const char* param_name,
const char* default_value)
const char* default_value,
char **storage)
{
char *param_value;
(void) mca_base_param_reg_string (&mca_btl_smcuda_component.super.btl_version,
param_name, NULL, false, false, default_value,
&param_value);
return param_value;
*storage = default_value;
(void) mca_base_component_var_register(&mca_btl_smcuda_component.super.btl_version,
param_name, NULL, MCA_BASE_VAR_TYPE_STRING,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
return *storage;
}
static inline int mca_btl_smcuda_param_register_int(
const char* param_name,
int default_value)
int default_value,
int *storage)
{
int param_value = default_value;
(void) mca_base_param_reg_int (&mca_btl_smcuda_component.super.btl_version,
param_name, NULL, false, false, default_value,
&param_value);
return param_value;
*storage = default_value;
(void) mca_base_component_var_register(&mca_btl_smcuda_component.super.btl_version,
param_name, NULL, MCA_BASE_VAR_TYPE_INT,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
return *storage;
}
static int smcuda_register(void)
{
/* register SM component parameters */
mca_btl_smcuda_component.sm_free_list_num =
mca_btl_smcuda_param_register_int("free_list_num", 8);
mca_btl_smcuda_component.sm_free_list_max =
mca_btl_smcuda_param_register_int("free_list_max", -1);
mca_btl_smcuda_component.sm_free_list_inc =
mca_btl_smcuda_param_register_int("free_list_inc", 64);
mca_btl_smcuda_component.sm_max_procs =
mca_btl_smcuda_param_register_int("max_procs", -1);
mca_btl_smcuda_component.sm_mpool_name =
mca_btl_smcuda_param_register_string("mpool", "sm");
mca_btl_smcuda_component.fifo_size =
mca_btl_smcuda_param_register_int("fifo_size", 4096);
mca_btl_smcuda_component.nfifos =
mca_btl_smcuda_param_register_int("num_fifos", 1);
mca_btl_smcuda_param_register_int("free_list_num", 8, &mca_btl_smcuda_component.sm_free_list_num);
mca_btl_smcuda_param_register_int("free_list_max", -1, &mca_btl_smcuda_component.sm_free_list_max);
mca_btl_smcuda_param_register_int("free_list_inc", 64, &mca_btl_smcuda_component.sm_free_list_inc);
mca_btl_smcuda_param_register_int("max_procs", -1, &mca_btl_smcuda_component.sm_max_procs);
/* NTH: selection variables for mpool names don't really work so hard-code the mpool name */
mca_btl_smcuda_component.sm_mpool_name = "sm";
mca_btl_smcuda_param_register_int("fifo_size", 4096, &mca_btl_smcuda_component.fifo_size);
mca_btl_smcuda_param_register_int("num_fifos", 1, &mca_btl_smcuda_component.nfifos);
mca_btl_smcuda_component.fifo_lazy_free =
mca_btl_smcuda_param_register_int("fifo_lazy_free", 120);
mca_btl_smcuda_param_register_int("fifo_lazy_free", 120, &mca_btl_smcuda_component.fifo_lazy_free);
/* default number of extra procs to allow for future growth */
mca_btl_smcuda_component.sm_extra_procs =
mca_btl_smcuda_param_register_int("sm_extra_procs", 0);
mca_btl_smcuda_param_register_int("sm_extra_procs", 0, &mca_btl_smcuda_component.sm_extra_procs);
#if OMPI_CUDA_SUPPORT
/* Lower priority when CUDA support is not requested */
@ -204,6 +195,10 @@ static int smcuda_register(void)
static int mca_btl_smcuda_component_open(void)
{
if (OMPI_SUCCESS != mca_btl_base_param_verify(&mca_btl_smcuda.super)) {
return OMPI_ERROR;
}
mca_btl_smcuda_component.sm_max_btls = 1;
/* make sure the number of fifos is a power of 2 */
@ -289,10 +284,6 @@ static int mca_btl_smcuda_component_close(void)
}
#endif
if (NULL != mca_btl_smcuda_component.sm_mpool_name) {
free(mca_btl_smcuda_component.sm_mpool_name);
}
CLEANUP:
/* return */
@ -362,38 +353,24 @@ get_min_mpool_size(mca_btl_smcuda_component_t *comp_ptr,
{
char *type_name = "mpool";
char *param_name = "min_size";
char *min_size = NULL;
const mca_base_var_storage_t *min_size;
int id = 0;
size_t default_min = 67108864;
size_t size = 0;
long tmp_size = 0;
if (0 > (id = mca_base_param_find(type_name, comp_ptr->sm_mpool_name,
param_name))) {
opal_output(0, "mca_base_param_find: failure looking for %s_%s_%s\n",
if (0 > (id = mca_base_var_find("ompi", type_name, comp_ptr->sm_mpool_name,
param_name))) {
opal_output(0, "mca_base_var_find: failure looking for %s_%s_%s\n",
type_name, comp_ptr->sm_mpool_name, param_name);
return OMPI_ERR_NOT_FOUND;
}
if (OPAL_ERROR == mca_base_param_lookup_string(id, &min_size)) {
opal_output(0, "mca_base_param_lookup_string failure\n");
return OMPI_ERROR;
}
errno = 0;
tmp_size = strtol(min_size, (char **)NULL, 10);
if (ERANGE == errno || EINVAL == errno || tmp_size <= 0) {
opal_output(0, "mca_btl_sm::get_min_mpool_size: "
"Unusable %s_%s_min_size provided. "
"Continuing with %lu.", type_name,
comp_ptr->sm_mpool_name,
(unsigned long)default_min);
size = default_min;
if (OPAL_SUCCESS != mca_base_var_get_value(id, &min_size, NULL, NULL)) {
opal_output(0, "mca_base_var_get_value failure!");
return OMPI_ERROR;
}
else {
size = (size_t)tmp_size;
}
free(min_size);
*out_size = size;
/* min_size is a unsigned long long */
*out_size = (size_t) min_size->ullval;
return OMPI_SUCCESS;
}

Просмотреть файл

@ -54,7 +54,7 @@ struct mca_btl_tcp_component_t {
mca_btl_base_component_2_0_0_t super; /**< base BTL component */
uint32_t tcp_addr_count; /**< total number of addresses */
uint32_t tcp_num_btls; /**< number of interfaces available to the TCP component */
uint32_t tcp_num_links; /**< number of logical links per physical device */
unsigned int tcp_num_links; /**< number of logical links per physical device */
struct mca_btl_tcp_module_t **tcp_btls; /**< array of available BTL modules */
struct mca_btl_tcp_proc_t* tcp_local; /**< local proc struct */
int tcp_free_list_num; /**< initial size of free lists */
@ -68,14 +68,14 @@ struct mca_btl_tcp_component_t {
opal_event_t tcp_recv_event; /**< recv event for IPv4 listen socket */
int tcp_listen_sd; /**< IPv4 listen socket for incoming connection requests */
unsigned short tcp_listen_port; /**< IPv4 listen port */
int32_t tcp_port_min; /**< IPv4 minimum port */
int32_t tcp_port_range; /**< IPv4 port range */
int tcp_port_min; /**< IPv4 minimum port */
int tcp_port_range; /**< IPv4 port range */
#if OPAL_WANT_IPV6
opal_event_t tcp6_recv_event; /**< recv event for IPv6 listen socket */
int tcp6_listen_sd; /**< IPv6 listen socket for incoming connection requests */
unsigned short tcp6_listen_port; /**< IPv6 listen port */
int32_t tcp6_port_min; /**< IPv4 minimum port */
int32_t tcp6_port_range; /**< IPv4 port range */
int tcp6_port_min; /**< IPv4 minimum port */
int tcp6_port_range; /**< IPv4 port range */
#endif
/* Port range restriction */
@ -91,7 +91,7 @@ struct mca_btl_tcp_component_t {
ompi_free_list_t tcp_frag_user;
/* Do we want to use TCP_NODELAY? */
int tcp_use_nodelay;
int tcp_not_use_nodelay;
/* If btl_tcp_if_seq was specified, this is the one interface
(name) that we're supposed to use. */

Просмотреть файл

@ -56,7 +56,6 @@
#include "opal/util/argv.h"
#include "opal/util/net.h"
#include "opal/util/show_help.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/constants.h"
#include "ompi/mca/btl/btl.h"
@ -78,6 +77,7 @@ static int mca_btl_tcp_component_register(void);
static int mca_btl_tcp_component_open(void);
static int mca_btl_tcp_component_close(void);
static char *mca_btl_tcp_if_seq_string;
mca_btl_tcp_component_t mca_btl_tcp_component = {
{
@ -113,25 +113,43 @@ mca_btl_tcp_component_t mca_btl_tcp_component = {
static inline char* mca_btl_tcp_param_register_string(
const char* param_name,
const char* help_string,
const char* default_value)
const char* default_value,
char **storage)
{
char *value;
mca_base_param_reg_string(&mca_btl_tcp_component.super.btl_version,
param_name, help_string, false, false,
default_value, &value);
return value;
*storage = (char *) default_value;
(void) mca_base_component_var_register(&mca_btl_tcp_component.super.btl_version,
param_name, help_string, MCA_BASE_VAR_TYPE_STRING,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
return *storage;
}
static inline int mca_btl_tcp_param_register_int(
const char* param_name,
const char* help_string,
int default_value)
int default_value,
int *storage)
{
int value;
mca_base_param_reg_int(&mca_btl_tcp_component.super.btl_version,
param_name, help_string, false, false,
default_value, &value);
return value;
*storage = default_value;
(void) mca_base_component_var_register(&mca_btl_tcp_component.super.btl_version,
param_name, help_string, MCA_BASE_VAR_TYPE_INT,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
return *storage;
}
static inline unsigned int mca_btl_tcp_param_register_uint(
const char* param_name,
const char* help_string,
unsigned int default_value,
unsigned int *storage)
{
*storage = default_value;
(void) mca_base_component_var_register(&mca_btl_tcp_component.super.btl_version,
param_name, help_string, MCA_BASE_VAR_TYPE_UNSIGNED_INT,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
return *storage;
}
@ -172,6 +190,25 @@ OBJ_CLASS_INSTANCE(
static void mca_btl_tcp_component_recv_handler(int, short, void*);
static void mca_btl_tcp_component_accept_handler(int, short, void*);
static int mca_btl_tcp_component_verify(void)
{
if( mca_btl_tcp_component.tcp_port_min > USHRT_MAX ) {
opal_show_help("help-mpi-btl-tcp.txt", "invalid minimum port",
true, "v4", ompi_process_info.nodename,
mca_btl_tcp_component.tcp_port_min );
mca_btl_tcp_component.tcp_port_min = 1024;
}
#if OPAL_WANT_IPV6
if( mca_btl_tcp_component.tcp6_port_min > USHRT_MAX ) {
opal_show_help("help-mpi-btl-tcp.txt", "invalid minimum port",
true, "v6", ompi_process_info.nodename,
mca_btl_tcp_component.tcp6_port_min );
mca_btl_tcp_component.tcp6_port_min = 1024;
}
#endif
return OMPI_SUCCESS;
}
/*
* Called by MCA framework to open the component, registers
@ -183,69 +220,48 @@ static int mca_btl_tcp_component_register(void)
char* message;
/* register TCP component parameters */
mca_btl_tcp_component.tcp_num_links =
mca_btl_tcp_param_register_int("links", NULL, 1);
mca_btl_tcp_component.tcp_if_include =
mca_btl_tcp_param_register_string("if_include", "Comma-delimited list of devices and/or CIDR notation of networks to use for MPI communication (e.g., \"eth0,192.168.0.0/16\"). Mutually exclusive with btl_tcp_if_exclude.", "");
mca_btl_tcp_component.tcp_if_exclude =
mca_btl_tcp_param_register_string("if_exclude", "Comma-delimited list of devices and/or CIDR notation of networks to NOT use for MPI communication -- all devices not matching these specifications will be used (e.g., \"eth0,192.168.0.0/16\"). If set to a non-default value, it is mutually exclusive with btl_tcp_if_include.",
"127.0.0.1/8,sppp"
);
mca_btl_tcp_param_register_uint("links", NULL, 1, &mca_btl_tcp_component.tcp_num_links);
mca_btl_tcp_param_register_string("if_include", "Comma-delimited list of devices and/or CIDR notation of networks to use for MPI communication (e.g., \"eth0,192.168.0.0/16\"). Mutually exclusive with btl_tcp_if_exclude.", "", &mca_btl_tcp_component.tcp_if_include);
mca_btl_tcp_param_register_string("if_exclude", "Comma-delimited list of devices and/or CIDR notation of networks to NOT use for MPI communication -- all devices not matching these specifications will be used (e.g., \"eth0,192.168.0.0/16\"). If set to a non-default value, it is mutually exclusive with btl_tcp_if_include.",
"127.0.0.1/8,sppp",
&mca_btl_tcp_component.tcp_if_exclude);
mca_btl_tcp_param_register_int ("free_list_num", NULL, 8, &mca_btl_tcp_component.tcp_free_list_num);
mca_btl_tcp_param_register_int ("free_list_max", NULL, -1, &mca_btl_tcp_component.tcp_free_list_max);
mca_btl_tcp_param_register_int ("free_list_inc", NULL, 32, &mca_btl_tcp_component.tcp_free_list_inc);
mca_btl_tcp_param_register_int ("sndbuf", NULL, 128*1024, &mca_btl_tcp_component.tcp_sndbuf);
mca_btl_tcp_param_register_int ("rcvbuf", NULL, 128*1024, &mca_btl_tcp_component.tcp_rcvbuf);
mca_btl_tcp_param_register_int ("endpoint_cache",
"The size of the internal cache for each TCP connection. This cache is"
" used to reduce the number of syscalls, by replacing them with memcpy."
" Every read will read the expected data plus the amount of the"
" endpoint_cache", 30*1024, &mca_btl_tcp_component.tcp_endpoint_cache);
mca_btl_tcp_param_register_int ("use_nagle", "Whether to use Nagle's algorithm or not (using Nagle's algorithm may increase short message latency)", 0, &mca_btl_tcp_component.tcp_not_use_nodelay);
mca_btl_tcp_param_register_int( "port_min_v4",
"The minimum port where the TCP BTL will try to bind (default 1024)",
1024, &mca_btl_tcp_component.tcp_port_min);
mca_btl_tcp_component.tcp_free_list_num =
mca_btl_tcp_param_register_int ("free_list_num", NULL, 8);
mca_btl_tcp_component.tcp_free_list_max =
mca_btl_tcp_param_register_int ("free_list_max", NULL, -1);
mca_btl_tcp_component.tcp_free_list_inc =
mca_btl_tcp_param_register_int ("free_list_inc", NULL, 32);
mca_btl_tcp_component.tcp_sndbuf =
mca_btl_tcp_param_register_int ("sndbuf", NULL, 128*1024);
mca_btl_tcp_component.tcp_rcvbuf =
mca_btl_tcp_param_register_int ("rcvbuf", NULL, 128*1024);
mca_btl_tcp_component.tcp_endpoint_cache =
mca_btl_tcp_param_register_int ("endpoint_cache",
"The size of the internal cache for each TCP connection. This cache is"
" used to reduce the number of syscalls, by replacing them with memcpy."
" Every read will read the expected data plus the amount of the"
" endpoint_cache", 30*1024);
mca_btl_tcp_component.tcp_use_nodelay =
!mca_btl_tcp_param_register_int ("use_nagle", "Whether to use Nagle's algorithm or not (using Nagle's algorithm may increase short message latency)", 0);
mca_btl_tcp_component.tcp_port_min =
mca_btl_tcp_param_register_int( "port_min_v4",
"The minimum port where the TCP BTL will try to bind (default 1024)", 1024 );
if( mca_btl_tcp_component.tcp_port_min > USHRT_MAX ) {
opal_show_help("help-mpi-btl-tcp.txt", "invalid minimum port",
true, "v4", ompi_process_info.nodename,
mca_btl_tcp_component.tcp_port_min );
mca_btl_tcp_component.tcp_port_min = 1024;
}
asprintf( &message,
"The number of ports where the TCP BTL will try to bind (default %d)."
" This parameter together with the port min, define a range of ports"
" where Open MPI will open sockets.",
(0x1 << 16) - mca_btl_tcp_component.tcp_port_min - 1 );
mca_btl_tcp_component.tcp_port_range =
mca_btl_tcp_param_register_int( "port_range_v4", message,
(0x1 << 16) - mca_btl_tcp_component.tcp_port_min - 1);
mca_btl_tcp_param_register_int( "port_range_v4", message,
(0x1 << 16) - mca_btl_tcp_component.tcp_port_min - 1,
&mca_btl_tcp_component.tcp_port_range);
free(message);
#if OPAL_WANT_IPV6
mca_btl_tcp_component.tcp6_port_min =
mca_btl_tcp_param_register_int( "port_min_v6",
"The minimum port where the TCP BTL will try to bind (default 1024)", 1024 );
if( mca_btl_tcp_component.tcp6_port_min > USHRT_MAX ) {
opal_show_help("help-mpi-btl-tcp.txt", "invalid minimum port",
true, "v6", ompi_process_info.nodename,
mca_btl_tcp_component.tcp6_port_min );
mca_btl_tcp_component.tcp6_port_min = 1024;
}
mca_btl_tcp_param_register_int( "port_min_v6",
"The minimum port where the TCP BTL will try to bind (default 1024)", 1024,
& mca_btl_tcp_component.tcp6_port_min );
asprintf( &message,
"The number of ports where the TCP BTL will try to bind (default %d)."
" This parameter together with the port min, define a range of ports"
" where Open MPI will open sockets.",
(0x1 << 16) - mca_btl_tcp_component.tcp6_port_min - 1 );
mca_btl_tcp_component.tcp6_port_range =
mca_btl_tcp_param_register_int( "port_range_v6", message,
(0x1 << 16) - mca_btl_tcp_component.tcp6_port_min - 1);
mca_btl_tcp_param_register_int( "port_range_v6", message,
(0x1 << 16) - mca_btl_tcp_component.tcp6_port_min - 1,
&mca_btl_tcp_component.tcp6_port_range );
free(message);
#endif
@ -268,15 +284,15 @@ static int mca_btl_tcp_component_register(void)
mca_btl_base_param_register(&mca_btl_tcp_component.super.btl_version,
&mca_btl_tcp_module.super);
mca_btl_tcp_component.tcp_disable_family =
mca_btl_tcp_param_register_int ("disable_family", NULL, 0);
mca_btl_tcp_param_register_int ("disable_family", NULL, 0, &mca_btl_tcp_component.tcp_disable_family);
/* Register a list of interfaces to use in sequence */
message = mca_btl_tcp_param_register_string("if_seq",
"If specified, a comma-delimited list of TCP interfaces. Interfaces will be assigned, one to each MPI process, in a round-robin fashion on each server. For example, if the list is \"eth0,eth1\" and four MPI processes are run on a single server, then local ranks 0 and 2 will use eth0 and local ranks 1 and 3 will use eth1.", NULL);
mca_btl_tcp_param_register_string("if_seq",
"If specified, a comma-delimited list of TCP interfaces. Interfaces will be assigned, one to each MPI process, in a round-robin fashion on each server. For example, if the list is \"eth0,eth1\" and four MPI processes are run on a single server, then local ranks 0 and 2 will use eth0 and local ranks 1 and 3 will use eth1.", NULL, &mca_btl_tcp_if_seq_string);
mca_btl_tcp_component.tcp_if_seq = NULL;
if (NULL != message && '\0' != *message) {
char **argv = opal_argv_split(message, ',');
if (NULL != mca_btl_tcp_if_seq_string && '\0' != *mca_btl_tcp_if_seq_string) {
char **argv = opal_argv_split(mca_btl_tcp_if_seq_string, ',');
if (NULL != argv && '\0' != *(argv[0])) {
int if_index, rc, count;
@ -320,11 +336,15 @@ static int mca_btl_tcp_component_register(void)
}
}
return OMPI_SUCCESS;
return mca_btl_tcp_component_verify();
}
static int mca_btl_tcp_component_open(void)
{
if (OMPI_SUCCESS != mca_btl_tcp_component_verify()) {
return OMPI_ERROR;
}
/* initialize state */
mca_btl_tcp_component.tcp_listen_sd = -1;
#if OPAL_WANT_IPV6
@ -345,7 +365,7 @@ static int mca_btl_tcp_component_open(void)
/* if_include and if_exclude need to be mutually exclusive */
if (OPAL_SUCCESS !=
mca_base_param_check_exclusive_string(
mca_base_var_check_exclusive("ompi",
mca_btl_tcp_component.super.btl_version.mca_type_name,
mca_btl_tcp_component.super.btl_version.mca_component_name,
"if_include",
@ -370,21 +390,13 @@ static int mca_btl_tcp_component_close(void)
opal_list_item_t* item;
opal_list_item_t* next;
if(NULL != mca_btl_tcp_component.tcp_if_include) {
free(mca_btl_tcp_component.tcp_if_include);
mca_btl_tcp_component.tcp_if_include = NULL;
}
if(NULL != mca_btl_tcp_component.tcp_if_exclude) {
free(mca_btl_tcp_component.tcp_if_exclude);
mca_btl_tcp_component.tcp_if_exclude = NULL;
}
if (NULL != mca_btl_tcp_component.tcp_if_seq) {
free(mca_btl_tcp_component.tcp_if_seq);
}
if (NULL != mca_btl_tcp_component.tcp_btls)
free(mca_btl_tcp_component.tcp_btls);
if (mca_btl_tcp_component.tcp_listen_sd >= 0) {
opal_event_del(&mca_btl_tcp_component.tcp_recv_event);
CLOSE_THE_SOCKET(mca_btl_tcp_component.tcp_listen_sd);
@ -450,11 +462,11 @@ static int mca_btl_tcp_create(int if_kindex, const char* if_name)
/* allow user to specify interface bandwidth */
sprintf(param, "bandwidth_%s", if_name);
btl->super.btl_bandwidth = mca_btl_tcp_param_register_int(param, NULL, btl->super.btl_bandwidth);
mca_btl_tcp_param_register_uint(param, NULL, btl->super.btl_bandwidth, &btl->super.btl_bandwidth);
/* allow user to override/specify latency ranking */
sprintf(param, "latency_%s", if_name);
btl->super.btl_latency = mca_btl_tcp_param_register_int(param, NULL, btl->super.btl_latency);
mca_btl_tcp_param_register_uint(param, NULL, btl->super.btl_latency, &btl->super.btl_latency);
if( i > 0 ) {
btl->super.btl_bandwidth >>= 1;
btl->super.btl_latency <<= 1;
@ -462,11 +474,11 @@ static int mca_btl_tcp_create(int if_kindex, const char* if_name)
/* allow user to specify interface bandwidth */
sprintf(param, "bandwidth_%s:%d", if_name, i);
btl->super.btl_bandwidth = mca_btl_tcp_param_register_int(param, NULL, btl->super.btl_bandwidth);
mca_btl_tcp_param_register_uint(param, NULL, btl->super.btl_bandwidth, &btl->super.btl_bandwidth);
/* allow user to override/specify latency ranking */
sprintf(param, "latency_%s:%d", if_name, i);
btl->super.btl_latency = mca_btl_tcp_param_register_int(param, NULL, btl->super.btl_latency);
mca_btl_tcp_param_register_uint(param, NULL, btl->super.btl_latency, &btl->super.btl_latency);
#if 0 && OPAL_ENABLE_DEBUG
BTL_OUTPUT(("interface %s instance %i: bandwidth %d latency %d\n", if_name, i,
btl->super.btl_bandwidth, btl->super.btl_latency));

Просмотреть файл

@ -533,7 +533,7 @@ void mca_btl_tcp_set_socket_options(int sd)
{
int optval;
#if defined(TCP_NODELAY)
optval = mca_btl_tcp_component.tcp_use_nodelay;
optval = mca_btl_tcp_component.tcp_not_use_nodelay;
if(setsockopt(sd, IPPROTO_TCP, TCP_NODELAY, (char *)&optval, sizeof(optval)) < 0) {
BTL_ERROR(("setsockopt(TCP_NODELAY) failed: %s (%d)",
strerror(opal_socket_errno), opal_socket_errno));

Просмотреть файл

@ -22,7 +22,6 @@
#include "opal/mca/event/event.h"
#include "ompi/mca/btl/btl.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/mca/mpool/base/base.h"
#include "btl_template.h"
#include "btl_template_frag.h"
@ -72,37 +71,6 @@ mca_btl_template_component_t mca_btl_template_component = {
}
};
/*
* utility routines for parameter registration
*/
static inline char* mca_btl_template_param_register_string(
const char* param_name,
const char* default_value)
{
char *param_value;
(void) mca_base_param_reg_string (&mca_btl_template_component.super.btl_version,
param_name, NULL, false, false, default_value,
&param_value);
return param_value;
}
static inline int mca_btl_template_param_register_int(
const char* param_name,
int default_value)
{
int param_value = default_value;
(void) mca_base_param_reg_int (&mca_btl_template_component.super.btl_version,
param_name, NULL, false, false, default_value,
&param_value);
return param_value;
}
static int mca_btl_template_component_open(void)
{
return OMPI_SUCCESS;
@ -118,31 +86,41 @@ static int mca_btl_template_component_register(void)
OBJ_CONSTRUCT(&mca_btl_template_component.template_procs, opal_list_t);
/* register TEMPLATE component parameters */
mca_btl_template_component.template_free_list_num =
mca_btl_template_param_register_int ("free_list_num", 8);
mca_btl_template_component.template_free_list_max =
mca_btl_template_param_register_int ("free_list_max", 1024);
mca_btl_template_component.template_free_list_inc =
mca_btl_template_param_register_int ("free_list_inc", 32);
mca_btl_template_component.template_mpool_name =
mca_btl_template_param_register_string("mpool", "ib");
mca_btl_template_module.super.btl_exclusivity =
mca_btl_template_param_register_int ("exclusivity", 0);
mca_btl_template_module.super.btl_eager_limit =
mca_btl_template_param_register_int ("first_frag_size", 64*1024) - sizeof(mca_btl_base_header_t);
mca_btl_template_module.super.btl_rndv_eager_limit =
mca_btl_template_param_register_int ("min_send_size", 64*1024) - sizeof(mca_btl_base_header_t);
mca_btl_template_module.super.btl_max_send_size =
mca_btl_template_param_register_int ("max_send_size", 128*1024) - sizeof(mca_btl_base_header_t);
mca_btl_template_module.super.btl_min_rdma_pipeline_size =
mca_btl_template_param_register_int("min_rdma_pipeline_size", 1024*1024);
mca_btl_template_module.super.btl_rdma_pipeline_frag_size =
mca_btl_template_param_register_int("rdma_pipeline_frag_size", 1024*1024);
mca_btl_template_module.super.btl_rdma_pipeline_send_length =
mca_btl_template_param_register_int("rdma_pipeline_send_length", 1024*1024);
mca_btl_template_module.super.btl_flags =
mca_btl_template_param_register_int("flags", MCA_BTL_FLAGS_PUT);
return OMPI_SUCCESS;
mca_btl_template_component.template_free_list_num = 8;
(void) mca_base_component_var_register(&mca_btl_template_component.super.btl_version,
"free_list_num", NULL, MCA_BASE_VAR_TYPE_INT,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_template_component.template_free_list_num);
(void) mca_base_component_var_register(&mca_btl_template_component.super.btl_version,
"free_list_max", NULL, MCA_BASE_VAR_TYPE_INT,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_template_component.template_free_list_max);
(void) mca_base_component_var_register(&mca_btl_template_component.super.btl_version,
"free_list_inc", NULL, MCA_BASE_VAR_TYPE_INT,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_template_component.template_free_list_inc);
mca_btl_template_component.template_mpool_name = "grdma";
(void) mca_base_component_var_register(&mca_btl_template_component.super.btl_version,
"mpool", NULL, MCA_BASE_VAR_TYPE_STRING,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_btl_template_component.template_mpool_name);
mca_btl_template_module.super.btl_exclusivity = 0;
mca_btl_template_module.super.btl_eager_limit = 64*1024;
mca_btl_template_module.super.btl_rndv_eager_limit = 64*1024;
mca_btl_template_module.super.btl_max_send_size = 128*1024;
mca_btl_template_module.super.btl_min_rdma_pipeline_size = 1024*1024;
mca_btl_template_module.super.btl_rdma_pipeline_frag_size = 1024*1024;
mca_btl_template_module.super.btl_rdma_pipeline_send_length = 1024*1024;
mca_btl_template_module.super.btl_flags = MCA_BTL_FLAGS_PUT;
return mca_btl_base_param_register(&mca_btl_template_component.super.btl_version,
&mca_btl_template_module.super);
}
/*

Просмотреть файл

@ -54,13 +54,13 @@ struct mca_btl_udapl_component_t {
int32_t udapl_verbosity; /**< report out level, see
"Report Out from uDAPL BTL" below for details. */
size_t udapl_num_btls; /**< number of hcas available to the uDAPL component */
size_t udapl_max_btls; /**< maximum number of supported hcas */
unsigned int udapl_max_btls; /**< maximum number of supported hcas */
struct mca_btl_udapl_module_t **udapl_btls; /**< array of available BTL modules */
int32_t udapl_num_recvs; /**< number of recv buffers to keep posted */
int32_t udapl_num_sends; /**< number of sends to post on endpoint */
int32_t udapl_sr_win; /**< number of fragments recieved before
returning credits to sender */
uint32_t udapl_timeout; /**< connection timeout, in microseconds */
unsigned int udapl_timeout; /**< connection timeout, in microseconds */
size_t udapl_eager_frag_size;
size_t udapl_max_frag_size;
size_t udapl_eager_rdma_frag_size; /* size of the rdma fragement including data

Просмотреть файл

@ -29,7 +29,6 @@
#include "opal/util/argv.h"
#include "ompi/mca/btl/btl.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/mca/mpool/base/base.h"
#include "btl_udapl.h"
#include "btl_udapl_frag.h"
@ -55,7 +54,6 @@ static int mca_btl_udapl_modify_ia_list(DAT_COUNT *num_info_entries,
static const char*
mca_btl_udapl_dat_event_to_string(DAT_EVENT_NUMBER event_number);
mca_btl_udapl_component_t mca_btl_udapl_component = {
{
/* First, the mca_base_component_t struct containing meta information
@ -68,7 +66,9 @@ mca_btl_udapl_component_t mca_btl_udapl_component = {
OMPI_MINOR_VERSION, /* MCA component minor version */
OMPI_RELEASE_VERSION, /* MCA component release version */
mca_btl_udapl_component_open, /* component open */
mca_btl_udapl_component_close /* component close */
mca_btl_udapl_component_close, /* component close */
NULL,
mca_btl_udapl_register_mca_params
},
{
/* The component is not checkpoint ready */
@ -179,7 +179,6 @@ mca_btl_udapl_error(DAT_RETURN ret, char* str)
* Called by MCA framework to open the component, registers
* component parameters.
*/
int mca_btl_udapl_component_open(void)
{
int rc = OMPI_SUCCESS;
@ -193,9 +192,6 @@ int mca_btl_udapl_component_open(void)
OBJ_CONSTRUCT(&mca_btl_udapl_component.udapl_procs, opal_list_t);
OBJ_CONSTRUCT(&mca_btl_udapl_component.udapl_lock, opal_mutex_t);
/* register uDAPL MCA parameters */
rc = mca_btl_udapl_register_mca_params();
/* compute udapl_eager_frag_size and udapl_max_frag_size */
mca_btl_udapl_component.udapl_eager_frag_size =
mca_btl_udapl_module.super.btl_eager_limit;

Просмотреть файл

@ -19,7 +19,7 @@
*/
#include "ompi_config.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/mca/base/base.h"
#include "btl_udapl.h"
#include "btl_udapl_mca.h"
#include <string.h>
@ -39,28 +39,28 @@
static inline int mca_btl_udapl_reg_string(const char* param_name,
const char* param_desc,
const char* default_value,
char **out_value, int flags)
char **storage, int flags)
{
char *value;
mca_base_param_reg_string(&mca_btl_udapl_component.super.btl_version,
param_name, param_desc, false, false, default_value, &value);
*storage = default_value;
(void) mca_base_component_var_register(&mca_btl_udapl_component.super.btl_version, param_name,
param_desc, MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
if (NULL == value && !((flags & REGSTR_EMPTY_OK) == REGSTR_EMPTY_OK)) {
if (NULL == *storage && !((flags & REGSTR_EMPTY_OK) == REGSTR_EMPTY_OK)) {
BTL_ERROR(("ERROR: MCA Parameter %s : Value (NULL) out of range : "
"Default value (%s)\n \t Parameter Description : %s",
param_name, default_value, param_desc));
return OMPI_ERR_BAD_PARAM;
}
if ((flags & REGSTR_EMPTY_NOT_OK) && 0 == strlen(value)) {
if ((flags & REGSTR_EMPTY_NOT_OK) && 0 == strlen(*storage)) {
BTL_ERROR(("ERROR: MCA Parameter %s : Value (%s) out of range : "
"Default value (%s)\n \t Parameter Description : %s",
param_name, value, default_value, param_desc));
param_name, *storage, default_value, param_desc));
return OMPI_ERR_BAD_PARAM;
}
*out_value = value;
return OMPI_SUCCESS;
}
@ -79,27 +79,28 @@ static inline int mca_btl_udapl_reg_string(const char* param_name,
*/
static inline int mca_btl_udapl_reg_int(const char* param_name,
const char* param_desc,
int default_value, int *out_value,
int default_value, int *storage,
int flags)
{
int value;
*storage = default_value;
(void) mca_base_component_var_register(&mca_btl_udapl_component.super.btl_version, param_name,
param_desc, MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
mca_base_param_reg_int(&mca_btl_udapl_component.super.btl_version,
param_name, param_desc, false, false, default_value, &value);
if ((flags & REGINT_NEG_ONE_OK) && -1 == value) {
*out_value = value;
if ((flags & REGINT_NEG_ONE_OK) && -1 == *storage) {
return OMPI_SUCCESS;
}
if (((flags & REGINT_GE_ZERO) && value < 0) ||
((flags & REGINT_GE_ONE) && value < 1) ||
((flags & REGINT_NONZERO) && 0 == value)) {
if (((flags & REGINT_GE_ZERO) && *storage < 0) ||
((flags & REGINT_GE_ONE) && *storage < 1) ||
((flags & REGINT_NONZERO) && 0 == *storage)) {
BTL_ERROR(("ERROR: MCA Parameter %s : Value (%d) out of range : "
"Default value (%d)\n \t Parameter Description : %s\n",
param_name, value, default_value, param_desc));
param_name, *storage, default_value, param_desc));
return OMPI_ERR_BAD_PARAM;
}
*out_value = value;
return OMPI_SUCCESS;
}
@ -111,7 +112,7 @@ static inline int mca_btl_udapl_reg_int(const char* param_name,
*/
int mca_btl_udapl_register_mca_params(void)
{
int ival, rc, tmp_rc;
int rc, tmp_rc;
rc = OMPI_SUCCESS;
@ -144,9 +145,8 @@ int mca_btl_udapl_register_mca_params(void)
CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("max_modules",
"Maximum number of supported HCAs.",
8,
&ival,
&mca_btl_udapl_component.udapl_max_btls,
REGINT_GE_ONE), tmp_rc, rc);
mca_btl_udapl_component.udapl_max_btls = (uint32_t) ival;
CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("num_recvs",
"Total number of receive buffers to keep posted "
@ -203,9 +203,8 @@ int mca_btl_udapl_register_mca_params(void)
CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("timeout",
"Connection timeout, in microseconds.",
MCA_BTL_UDAPL_CONN_TIMEOUT_DEFAULT,
&ival,
&mca_btl_udapl_component.udapl_timeout,
REGINT_GE_ONE), tmp_rc, rc);
mca_btl_udapl_component.udapl_timeout = (uint32_t) ival;
CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("conn_priv_data",
"Use connect private data to establish connections "

Просмотреть файл

@ -16,7 +16,6 @@
#include "btl_ugni_smsg.h"
#include "opal/include/opal/align.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/memoryhooks/memory.h"
#include "ompi/runtime/params.h"
@ -51,81 +50,141 @@ mca_btl_ugni_component_t mca_btl_ugni_component = {
}
};
static inline int
mca_btl_ugni_param_register_int (const char *param_name, const char *help,
int value)
{
mca_base_param_reg_int(&mca_btl_ugni_component.super.btl_version,
param_name, help, false, false, value, &value);
return value;
}
static int
btl_ugni_component_register(void)
{
mca_btl_ugni_component.ugni_free_list_num =
mca_btl_ugni_param_register_int("free_list_num", NULL, 8);
mca_btl_ugni_component.ugni_free_list_max =
mca_btl_ugni_param_register_int("free_list_max", NULL, 16384);
mca_btl_ugni_component.ugni_free_list_inc =
mca_btl_ugni_param_register_int("free_list_inc", NULL, 64);
(void) mca_base_var_group_component_register(&mca_btl_ugni_component.super.btl_version,
"Gemini byte transport layer");
mca_btl_ugni_component.ugni_eager_num =
mca_btl_ugni_param_register_int("eager_num", NULL, 16);
mca_btl_ugni_component.ugni_eager_max =
mca_btl_ugni_param_register_int("eager_max", NULL, 128);
mca_btl_ugni_component.ugni_eager_inc =
mca_btl_ugni_param_register_int("eager_inc", NULL, 16);
mca_btl_ugni_component.ugni_free_list_num = 8;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"free_list_num", NULL, MCA_BASE_VAR_TYPE_INT,
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_ugni_component.ugni_free_list_num);
mca_btl_ugni_component.ugni_free_list_max = 16384;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"free_list_max", NULL, MCA_BASE_VAR_TYPE_INT,
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_ugni_component.ugni_free_list_max);
mca_btl_ugni_component.ugni_free_list_inc = 64;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"free_list_inc", NULL, MCA_BASE_VAR_TYPE_INT,
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_ugni_component.ugni_free_list_inc);
mca_btl_ugni_component.remote_cq_size =
mca_btl_ugni_param_register_int("remote_cq_size", "Remote SMSG completion queue "
"size (default 40000)", 40000);
mca_btl_ugni_component.ugni_eager_num = 16;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"eager_num", NULL, MCA_BASE_VAR_TYPE_INT,
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_ugni_component.ugni_eager_num);
mca_btl_ugni_component.ugni_eager_max = 128;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"eager_max", NULL, MCA_BASE_VAR_TYPE_INT,
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_ugni_component.ugni_eager_max);
mca_btl_ugni_component.ugni_eager_inc = 16;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"eager_inc", NULL, MCA_BASE_VAR_TYPE_INT,
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_ugni_component.ugni_eager_inc);
mca_btl_ugni_component.local_cq_size =
mca_btl_ugni_param_register_int("local_cq_size", "Local completion queue size "
"(default 8192)", 8192);
mca_btl_ugni_component.remote_cq_size = 40000;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"remote_cq_size", "Remote SMSG completion queue "
"size (default 40000)", MCA_BASE_VAR_TYPE_INT,
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_ugni_component.remote_cq_size);
mca_btl_ugni_component.local_cq_size = 8192;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"local_cq_size", "Local completion queue size "
"(default 8192)", MCA_BASE_VAR_TYPE_INT,
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_ugni_component.local_cq_size);
mca_btl_ugni_component.ugni_smsg_limit =
mca_btl_ugni_param_register_int("smsg_limit", "Maximum size message that "
"will be sent using the SMSG/MSGQ protocol "
"(0 - autoselect(default), 16k max)", 0);
mca_btl_ugni_component.ugni_smsg_limit = 0;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"smsg_limit", "Maximum size message that "
"will be sent using the SMSG/MSGQ protocol "
"(0 - autoselect(default), 16k max)",
MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_ugni_component.ugni_smsg_limit);
if (16384 < mca_btl_ugni_component.ugni_smsg_limit) {
mca_btl_ugni_component.ugni_smsg_limit = 16384;
}
mca_btl_ugni_component.smsg_max_credits = 32;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"smsg_max_credits", "Maximum number of "
"outstanding SMSG/MSGQ message (default 32)",
MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_ugni_component.smsg_max_credits);
mca_btl_ugni_component.smsg_max_credits =
mca_btl_ugni_param_register_int("smsg_max_credits", "Maximum number of "
"outstanding SMSG/MSGQ message (default 32)",
32);
mca_btl_ugni_component.ugni_fma_limit = 1024;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"fma_limit", "Maximum size message that "
"will be sent using the FMA (Fast Memory "
"Access) protocol (default 1024, 64k max)",
MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_ugni_component.ugni_fma_limit);
mca_btl_ugni_component.ugni_fma_limit =
mca_btl_ugni_param_register_int("fma_limit", "Maximum size message that "
"will be sent using the FMA (Fast Memory "
"Access) protocol (default 1024, 64k max)",
1024);
mca_btl_ugni_component.ugni_get_limit = 1 * 1024 * 1024;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"get_limit", "Maximum size message that "
"will be sent using a get protocol "
"(default 1M)", MCA_BASE_VAR_TYPE_INT,
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_ugni_component.ugni_get_limit);
if (65536 < mca_btl_ugni_component.ugni_fma_limit) {
mca_btl_ugni_component.ugni_fma_limit = 65536;
}
mca_btl_ugni_component.rdma_max_retries = 16;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"rdma_max_retries", NULL, MCA_BASE_VAR_TYPE_INT,
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_ugni_component.rdma_max_retries);
mca_btl_ugni_component.ugni_get_limit =
mca_btl_ugni_param_register_int("get_limit", "Maximum size message that "
"will be sent using a get protocol "
"(default 1M)", 1 * 1024 * 1024);
mca_btl_ugni_component.smsg_max_retries = 16;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"smsg_max_retries", NULL, MCA_BASE_VAR_TYPE_INT,
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_ugni_component.smsg_max_retries);
mca_btl_ugni_component.rdma_max_retries =
mca_btl_ugni_param_register_int("rdma_max_retries", NULL, 16);
mca_btl_ugni_component.smsg_max_retries =
mca_btl_ugni_param_register_int("smsg_max_retries", NULL, 16);
mca_btl_ugni_component.max_mem_reg =
mca_btl_ugni_param_register_int("max_mem_reg", "Maximum number of "
"memory registrations a process can "
"hold (0 - autoselect, -1 - unlimited)"
" (default 0)", 0);
mca_btl_ugni_component.max_mem_reg = 0;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"max_mem_reg", "Maximum number of "
"memory registrations a process can "
"hold (0 - autoselect, -1 - unlimited)"
" (default 0)", MCA_BASE_VAR_TYPE_INT,
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_ugni_component.max_mem_reg);
mca_btl_ugni_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH;
@ -175,40 +234,16 @@ btl_ugni_component_close(void)
}
static void mca_btl_ugni_autoset_leave_pinned (void) {
mca_base_param_source_t source;
int index, rc, value;
/* If we have a memory manager available, and
mpi_leave_pinned==-1, then unless the user explicitly set
mpi_leave_pinned_pipeline==0, then set mpi_leave_pinned to 1.
We have a memory manager if we have both FREE and MUNMAP
support */
value = opal_mem_hooks_support_level();
int value = opal_mem_hooks_support_level();
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & value)) {
rc = 0;
index = mca_base_param_find("mpi", NULL, "leave_pinned");
if (index >= 0) {
if (OPAL_SUCCESS == mca_base_param_lookup_int(index, &value) &&
-1 == value) {
++rc;
}
}
index = mca_base_param_find("mpi", NULL, "leave_pinned_pipeline");
if (index >= 0) {
if (OPAL_SUCCESS == mca_base_param_lookup_int(index, &value) &&
OPAL_SUCCESS == mca_base_param_lookup_source(index, &source,
NULL)) {
if (0 == value && MCA_BASE_PARAM_SOURCE_DEFAULT == source) {
++rc;
}
}
}
/* If we were good on both parameters, then set leave_pinned=1 */
if (2 == rc) {
ompi_mpi_leave_pinned = 1;
ompi_mpi_leave_pinned_pipeline = 0;
/* Set leave pinned to 1 if leave pinned pipeline is not set */
if (-1 == ompi_mpi_leave_pinned) {
ompi_mpi_leave_pinned = !ompi_mpi_leave_pinned_pipeline;
}
} else {
ompi_mpi_leave_pinned = 0;
ompi_mpi_leave_pinned_pipeline = 0;
}
}
@ -264,6 +299,14 @@ mca_btl_ugni_component_init (int *num_btl_modules,
unsigned int i;
int rc;
if (16384 < mca_btl_ugni_component.ugni_smsg_limit) {
mca_btl_ugni_component.ugni_smsg_limit = 16384;
}
if (65536 < mca_btl_ugni_component.ugni_fma_limit) {
mca_btl_ugni_component.ugni_fma_limit = 65536;
}
/* Initialize ugni library and create communication domain */
rc = ompi_common_ugni_init();
if (OMPI_SUCCESS != rc) {

Просмотреть файл

@ -92,7 +92,6 @@ struct mca_btl_vader_component_t {
int vader_free_list_max; /**< maximum size of free lists */
int vader_free_list_inc; /**< number of elements to alloc
* when growing free lists */
char *vader_mpool_name; /**< name of shared memory pool module */
mca_mpool_base_module_t *vader_mpool; /**< mpool on local node */
void *vader_mpool_base; /**< base address of shared memory pool */
size_t eager_limit; /**< send fragment size */

Просмотреть файл

@ -26,7 +26,6 @@
#include "ompi/constants.h"
#include "opal/util/output.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/mca/btl/base/btl_base_error.h"
#include "btl_vader.h"
@ -77,56 +76,56 @@ mca_btl_vader_component_t mca_btl_vader_component = {
} /* end super */
};
/*
* utility routines for parameter registration
*/
static inline char *mca_btl_vader_param_register_string(const char *param_name,
const char *default_value)
{
char *param_value;
(void) mca_base_param_reg_string (&mca_btl_vader_component.super.btl_version,
param_name, NULL, false, false, default_value,
&param_value);
return param_value;
}
static inline int mca_btl_vader_param_register_int(const char *param_name,
int value)
{
(void) mca_base_param_reg_int (&mca_btl_vader_component.super.btl_version,
param_name, NULL, false, false, value, &value);
return value;
}
static int mca_btl_vader_component_register (void)
{
/* register VADER component parameters */
mca_btl_vader_component.vader_free_list_num =
mca_btl_vader_param_register_int("free_list_num", 8);
mca_btl_vader_component.vader_free_list_max =
mca_btl_vader_param_register_int("free_list_max", -1);
mca_btl_vader_component.vader_free_list_inc =
mca_btl_vader_param_register_int("free_list_inc", 64);
mca_btl_vader_component.vader_mpool_name =
mca_btl_vader_param_register_string("mpool", "sm");
mca_btl_vader_memcpy_limit =
mca_btl_vader_param_register_int("memcpy_limit", mca_btl_vader_memcpy_limit);
mca_btl_vader_log_align =
mca_btl_vader_param_register_int("log_align", mca_btl_vader_log_align);
(void) mca_base_var_group_component_register(&mca_btl_vader_component.super.btl_version,
"XPMEM shared memory byte transport later");
/* limit segment alignment to be between 4k and 16M */
if (mca_btl_vader_log_align < 12) {
mca_btl_vader_log_align = 12;
} else if (mca_btl_vader_log_align > 25) {
mca_btl_vader_log_align = 25;
}
/* register VADER component variables */
mca_btl_vader_component.vader_free_list_num = 8;
(void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
"free_list_num", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_vader_component.vader_free_list_num);
mca_btl_vader_component.vader_free_list_max = 8192;
(void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
"free_list_max", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_vader_component.vader_free_list_max);
mca_btl_vader_component.vader_free_list_inc = 64;
(void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
"free_list_inc", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_vader_component.vader_free_list_inc);
mca_btl_vader_max_inline_send =
mca_btl_vader_param_register_int("max_inline_send", mca_btl_vader_max_inline_send);
mca_btl_vader_memcpy_limit = 524288;
(void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
"memcpy_limit", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_vader_memcpy_limit);
mca_btl_vader_log_align = 21;
(void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
"log_align", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_vader_log_align);
mca_btl_vader_max_inline_send = 256;
(void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
"max_inline_send", NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_ALL_EQ,
&mca_btl_vader_max_inline_send);
mca_btl_vader.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH;
mca_btl_vader.super.btl_eager_limit = 64 * 1024;
@ -156,6 +155,14 @@ static int mca_btl_vader_component_register (void)
static int mca_btl_vader_component_open(void)
{
/* limit segment alignment to be between 4k and 16M */
if (mca_btl_vader_log_align < 12) {
mca_btl_vader_log_align = 12;
} else if (mca_btl_vader_log_align > 25) {
mca_btl_vader_log_align = 25;
}
mca_btl_vader_component.eager_limit = mca_btl_vader.super.btl_eager_limit;
/* initialize objects */
@ -200,10 +207,6 @@ static int mca_btl_vader_component_close(void)
OBJ_RELEASE(mca_btl_vader_component.vader_seg);
}
if (NULL != mca_btl_vader_component.vader_mpool_name) {
free(mca_btl_vader_component.vader_mpool_name);
}
OBJ_DESTRUCT(&mca_btl_vader_component.active_sends);
CLEANUP:

Просмотреть файл

@ -133,8 +133,7 @@ static inline int vader_init_mpool (mca_btl_vader_t *vader_btl, int n)
/* now, create it */
component->vader_mpool =
mca_mpool_base_module_create(component->vader_mpool_name,
vader_btl, &res);
mca_mpool_base_module_create("sm", vader_btl, &res);
/* Sanity check to ensure that we found it */
if(NULL == component->vader_mpool) {
return OMPI_ERR_OUT_OF_RESOURCE;

Просмотреть файл

@ -24,7 +24,6 @@
#include "opal/mca/mca.h"
#include "opal/util/output.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/mca/coll/coll.h"

Просмотреть файл

@ -89,16 +89,20 @@ basic_register(void)
{
/* Use a low priority, but allow other components to be lower */
mca_base_param_reg_int(&mca_coll_basic_component.collm_version,
"priority",
"Priority of the basic coll component",
false, false, mca_coll_basic_priority,
&mca_coll_basic_priority);
mca_base_param_reg_int(&mca_coll_basic_component.collm_version,
"crossover",
"Minimum number of processes in a communicator before using the logarithmic algorithms",
false, false, mca_coll_basic_crossover,
&mca_coll_basic_crossover);
mca_coll_basic_priority = 10;
(void) mca_base_component_var_register(&mca_coll_basic_component.collm_version, "priority",
"Priority of the basic coll component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_basic_priority);
mca_coll_basic_crossover = 4;
(void) mca_base_component_var_register(&mca_coll_basic_component.collm_version, "crossover",
"Minimum number of processes in a communicator before using the logarithmic algorithms",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_basic_crossover);
return OMPI_SUCCESS;
}

Просмотреть файл

@ -23,7 +23,6 @@
#include <stdio.h>
#include "mpi.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/mca/coll/coll.h"
#include "ompi/mca/coll/base/base.h"
#include "coll_basic.h"

Просмотреть файл

@ -31,8 +31,7 @@ BEGIN_C_DECLS
/* Globally exported variables */
OMPI_MODULE_DECLSPEC extern const mca_coll_base_component_2_0_0_t mca_coll_demo_component;
extern int mca_coll_demo_priority_param;
extern int mca_coll_demo_verbose_param;
extern int mca_coll_demo_priority;
extern int mca_coll_demo_verbose;

Просмотреть файл

@ -38,14 +38,13 @@ const char *mca_coll_demo_component_version_string =
/*
* Global variable
*/
int mca_coll_demo_priority_param = -1;
int mca_coll_demo_verbose_param = -1;
int mca_coll_demo_priority = -1;
int mca_coll_demo_verbose = 0;
/*
* Local function
*/
static int demo_open(void);
static int demo_register(void);
/*
@ -68,8 +67,10 @@ const mca_coll_base_component_2_0_0_t mca_coll_demo_component = {
OMPI_RELEASE_VERSION,
/* Component open and close functions */
demo_open,
NULL
NULL,
NULL,
NULL,
demo_register
},
{
/* The component is checkpoint ready */
@ -83,15 +84,20 @@ const mca_coll_base_component_2_0_0_t mca_coll_demo_component = {
};
static int demo_open(void)
static int demo_register(void)
{
mca_coll_demo_priority_param =
mca_base_param_reg_int(&mca_coll_demo_component.collm_version,
"priority", NULL, false, false, 20, NULL);
mca_coll_demo_verbose_param =
mca_base_param_reg_int(&mca_coll_demo_component.collm_version,
"verbose", NULL, false, false,
mca_coll_demo_verbose, NULL);
mca_coll_demo_priority = 20;
(void) mca_base_component_var_register(&mca_coll_demo_component.collm_version, "priority",
NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_demo_priority);
mca_coll_demo_verbose = 0;
(void) mca_base_component_var_register(&mca_coll_demo_component.collm_version, "verbose",
NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_demo_verbose);
return OMPI_SUCCESS;
}

Просмотреть файл

@ -120,10 +120,7 @@ mca_coll_demo_comm_query(struct ompi_communicator_t *comm, int *priority)
demo_module = OBJ_NEW(mca_coll_demo_module_t);
if (NULL == demo_module) return NULL;
if (OMPI_SUCCESS !=
mca_base_param_lookup_int(mca_coll_demo_priority_param, priority)) {
return NULL;
}
*priority = mca_coll_demo_priority;
demo_module->super.coll_module_enable = mca_coll_demo_module_enable;
demo_module->super.ft_event = mca_coll_demo_ft_event;
@ -182,8 +179,6 @@ mca_coll_demo_module_enable(mca_coll_base_module_t *module,
{
mca_coll_demo_module_t *demo_module = (mca_coll_demo_module_t*) module;
mca_base_param_lookup_int(mca_coll_demo_verbose_param,
&mca_coll_demo_verbose);
if (mca_coll_demo_verbose > 0) {
printf("Hello! This is the \"demo\" coll component. I'll be your coll component\ntoday. Please tip your waitresses well.\n");
}

Просмотреть файл

@ -176,109 +176,142 @@ static int fca_register(void)
c = &mca_coll_fca_component.super.collm_version;
mca_base_param_reg_int(c, "priority",
"Priority of the fca coll component",
false, false,
80,
&mca_coll_fca_component.fca_priority);
mca_coll_fca_component.fca_priority = 80;
(void) mca_base_component_var_register(c, "priority",
"Priority of the fca coll component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_fca_component.fca_priority);
mca_base_param_reg_int(c, "verbose",
"Verbose level of the fca coll component",
false, false,
0,
&mca_coll_fca_component.fca_verbose);
mca_coll_fca_component.fca_verbose = 0;
(void) mca_base_component_var_register(c, "verbose",
"Verbose level of the fca coll component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_fca_component.fca_verbose);
mca_base_param_reg_int(c, "enable",
"[1|0|] Enable/Disable Fabric Collective Accelerator",
false, false,
1,
&mca_coll_fca_component.fca_enable);
mca_coll_fca_component.fca_enable = 1;
(void) mca_base_component_var_register(c, "enable",
"[1|0|] Enable/Disable Fabric Collective Accelerator",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_fca_component.fca_enable);
mca_base_param_reg_string(c, "spec_file",
"Path to the FCA configuration file fca_mpi_spec.ini",
false, false,
""COLL_FCA_HOME"/etc/fca_mpi_spec.ini",
&mca_coll_fca_component.fca_spec_file);
mca_coll_fca_component.fca_spec_file = ""COLL_FCA_HOME"/etc/fca_mpi_spec.ini";
(void) mca_base_component_var_register(c, "spec_file",
"Path to the FCA configuration file fca_mpi_spec.ini",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_fca_component.fca_spec_file);
mca_base_param_reg_int(c, "np",
"[integer] Minimal allowed job's NP to activate FCA",
false, false,
64,
&mca_coll_fca_component.fca_np);
mca_coll_fca_component.fca_np = 64;
(void) mca_base_component_var_register(c, "np",
"[integer] Minimal allowed job's NP to activate FCA",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_fca_component.fca_np);
mca_base_param_reg_int(c, "enable_barrier",
"[1|0|] Enable/Disable FCA Barrier support",
false, false,
OMPI_FCA_BCAST,
&mca_coll_fca_component.fca_enable_barrier);
mca_coll_fca_component.fca_enable_barrier = OMPI_FCA_BCAST;
(void) mca_base_component_var_register(c, "enable_barrier",
"[1|0|] Enable/Disable FCA Barrier support",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_fca_component.fca_enable_barrier);
mca_base_param_reg_int(c, "enable_bcast",
"[1|0|] Enable/Disable FCA Bcast support",
false, false,
OMPI_FCA_BCAST,
&mca_coll_fca_component.fca_enable_bcast);
mca_coll_fca_component.fca_enable_bcast = OMPI_FCA_BCAST;
(void) mca_base_component_var_register(c, "enable_bcast",
"[1|0|] Enable/Disable FCA Bcast support",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_fca_component.fca_enable_bcast);
mca_base_param_reg_int(c, "enable_reduce",
"[1|0|] Enable/Disable FCA Reduce support",
false, false,
OMPI_FCA_REDUCE,
&mca_coll_fca_component.fca_enable_reduce);
mca_coll_fca_component.fca_enable_reduce = OMPI_FCA_REDUCE;
(void) mca_base_component_var_register(c, "enable_reduce",
"[1|0|] Enable/Disable FCA Reduce support",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_fca_component.fca_enable_reduce);
mca_base_param_reg_int(c, "enable_reduce_scatter",
"[1|0|] Enable/Disable FCA Reduce support",
false, false,
OMPI_FCA_REDUCE_SCATTER,
&mca_coll_fca_component.fca_enable_reduce_scatter);
mca_coll_fca_component.fca_enable_reduce_scatter = OMPI_FCA_REDUCE_SCATTER;
(void) mca_base_component_var_register(c, "enable_reduce_scatter",
"[1|0|] Enable/Disable FCA Reduce support",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_fca_component.fca_enable_reduce_scatter);
mca_base_param_reg_int(c, "enable_allreduce",
"[1|0|] Enable/Disable FCA Allreduce support",
false, false,
OMPI_FCA_ALLREDUCE,
&mca_coll_fca_component.fca_enable_allreduce);
mca_coll_fca_component.fca_enable_allreduce = OMPI_FCA_ALLREDUCE;
(void) mca_base_component_var_register(c, "enable_allreduce",
"[1|0|] Enable/Disable FCA Allreduce support",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_fca_component.fca_enable_allreduce);
mca_base_param_reg_int(c, "enable_allgather",
"[1|0|] Enable/Disable FCA Allgather support",
false, false,
OMPI_FCA_ALLGATHER,
&mca_coll_fca_component.fca_enable_allgather);
mca_coll_fca_component.fca_enable_allgather = OMPI_FCA_ALLGATHER;
(void) mca_base_component_var_register(c, "enable_allgather",
"[1|0|] Enable/Disable FCA Allgather support",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_fca_component.fca_enable_allgather);
mca_base_param_reg_int(c, "enable_allgatherv",
"[1|0|] Enable/Disable FCA Allgatherv support",
false, false,
OMPI_FCA_ALLGATHERV,
&mca_coll_fca_component.fca_enable_allgatherv);
mca_coll_fca_component.fca_enable_allgatherv = OMPI_FCA_ALLGATHERV;
(void) mca_base_component_var_register(c, "enable_allgatherv",
"[1|0|] Enable/Disable FCA Allgatherv support",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_fca_component.fca_enable_allgatherv);
mca_base_param_reg_int(c, "enable_gather",
"[1|0|] Enable/Disable FCA Gather support",
false, false,
OMPI_FCA_GATHER,
&mca_coll_fca_component.fca_enable_gather);
mca_coll_fca_component.fca_enable_gather = OMPI_FCA_GATHER;
(void) mca_base_component_var_register(c, "enable_gather",
"[1|0|] Enable/Disable FCA Gather support",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_fca_component.fca_enable_gather);
mca_base_param_reg_int(c, "enable_gatherv",
"[1|0|] Enable/Disable FCA Gatherv support",
false, false,
OMPI_FCA_GATHER,
&mca_coll_fca_component.fca_enable_gatherv);
mca_coll_fca_component.fca_enable_gatherv = OMPI_FCA_GATHER;
(void) mca_base_component_var_register(c, "enable_gatherv",
"[1|0|] Enable/Disable FCA Gatherv support",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_fca_component.fca_enable_gatherv);
mca_base_param_reg_int(c, "enable_alltoall",
"[1|0|] Enable/Disable FCA AlltoAll support",
false, false,
OMPI_FCA_ALLTOALL,
&mca_coll_fca_component.fca_enable_alltoall);
mca_coll_fca_component.fca_enable_alltoall = OMPI_FCA_ALLTOALL;
(void) mca_base_component_var_register(c, "enable_alltoall",
"[1|0|] Enable/Disable FCA AlltoAll support",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_fca_component.fca_enable_alltoall);
mca_base_param_reg_int(c, "enable_alltoallv",
"[1|0|] Enable/Disable FCA AlltoAllv support",
false, false,
OMPI_FCA_ALLTOALLV,
&mca_coll_fca_component.fca_enable_alltoallv);
mca_base_param_reg_int(c, "enable_alltoallw",
"[1|0|] Enable/Disable FCA AlltoAllw support",
false, false,
OMPI_FCA_ALLTOALLW,
&mca_coll_fca_component.fca_enable_alltoallw);
mca_coll_fca_component.fca_enable_alltoallv = OMPI_FCA_ALLTOALLV;
(void) mca_base_component_var_register(c, "enable_alltoallv",
"[1|0|] Enable/Disable FCA AlltoAllv support",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_fca_component.fca_enable_alltoallv);
mca_coll_fca_component.fca_enable_alltoallw = OMPI_FCA_ALLTOALLW;
(void) mca_base_component_var_register(c, "enable_alltoallw",
"[1|0|] Enable/Disable FCA AlltoAllw support",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_fca_component.fca_enable_alltoallw);
return OMPI_SUCCESS;
}

Просмотреть файл

@ -52,7 +52,7 @@ int mca_coll_hierarch_segsize_param=32768;
/*
* Local function
*/
static int hierarch_open(void);
static int hierarch_register(void);
/*
* Instantiate the public struct with all of our public information
@ -74,8 +74,10 @@ const mca_coll_base_component_2_0_0_t mca_coll_hierarch_component = {
OMPI_RELEASE_VERSION,
/* Component open and close functions */
hierarch_open,
NULL
NULL,
NULL,
NULL,
hierarch_register
},
{
/* The component is checkpoint ready */
@ -88,58 +90,75 @@ const mca_coll_base_component_2_0_0_t mca_coll_hierarch_component = {
};
static int hierarch_open(void)
static int hierarch_register(void)
{
/* Use a high priority, but allow other components to be higher */
mca_base_param_reg_int(&mca_coll_hierarch_component.collm_version,
"priority",
"Priority of the hierarchical coll component",
false, false, mca_coll_hierarch_priority_param,
&mca_coll_hierarch_priority_param);
mca_coll_hierarch_priority_param = 0;
(void) mca_base_component_var_register(&mca_coll_hierarch_component.collm_version,
"priority", "Priority of the hierarchical coll component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_hierarch_priority_param);
mca_coll_hierarch_verbose_param = 0;
(void) mca_base_component_var_register(&mca_coll_hierarch_component.collm_version,
"verbose",
"Turn verbose message of the hierarchical coll component on/off",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_hierarch_verbose_param);
mca_coll_hierarch_use_rdma_param = 0;
(void) mca_base_component_var_register(&mca_coll_hierarch_component.collm_version,
"use_rdma",
"Switch from the send btl list used to detect hierarchies to "
"the rdma btl list",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_hierarch_use_rdma_param);
mca_coll_hierarch_ignore_sm_param = 0;
(void) mca_base_component_var_register(&mca_coll_hierarch_component.collm_version,
"ignore_sm",
"Ignore sm protocol when detecting hierarchies. "
"Required to enable the usage of protocol"
" specific collective operations",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_hierarch_ignore_sm_param);
mca_coll_hierarch_detection_alg_param = 2;
(void) mca_base_component_var_register(&mca_coll_hierarch_component.collm_version,
"detection_alg",
"Used to specify the algorithm for detecting Hierarchy."
"Choose between all or two levels of hierarchy",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_hierarch_detection_alg_param);
mca_base_param_reg_int(&mca_coll_hierarch_component.collm_version,
"verbose",
"Turn verbose message of the hierarchical coll component on/off",
false, false, mca_coll_hierarch_verbose_param,
&mca_coll_hierarch_verbose_param);
mca_coll_hierarch_bcast_alg_param = COLL_HIERARCH_BASIC_BCAST_ALG;
(void) mca_base_component_var_register(&mca_coll_hierarch_component.collm_version,
"bcast_alg",
"Used to specify the algorithm used for bcast operations.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_hierarch_bcast_alg_param);
mca_base_param_reg_int(&mca_coll_hierarch_component.collm_version,
"use_rdma",
"Switch from the send btl list used to detect hierarchies to "
"the rdma btl list",
false, false, mca_coll_hierarch_use_rdma_param,
&mca_coll_hierarch_use_rdma_param);
mca_base_param_reg_int(&mca_coll_hierarch_component.collm_version,
"ignore_sm",
"Ignore sm protocol when detecting hierarchies. "
"Required to enable the usage of protocol"
" specific collective operations",
false, false, mca_coll_hierarch_ignore_sm_param,
&mca_coll_hierarch_ignore_sm_param);
mca_base_param_reg_int(&mca_coll_hierarch_component.collm_version,
"detection_alg",
"Used to specify the algorithm for detecting Hierarchy."
"Choose between all or two levels of hierarchy",
false, false, mca_coll_hierarch_detection_alg_param,
&mca_coll_hierarch_detection_alg_param);
mca_base_param_reg_int(&mca_coll_hierarch_component.collm_version,
"bcast_alg",
"Used to specify the algorithm used for bcast operations.",
false, false, mca_coll_hierarch_bcast_alg_param,
&mca_coll_hierarch_bcast_alg_param);
mca_base_param_reg_int(&mca_coll_hierarch_component.collm_version,
"segment_size",
"Used to specify the segment size for segmented algorithms.",
false, false, mca_coll_hierarch_segsize_param,
&mca_coll_hierarch_segsize_param);
mca_coll_hierarch_segsize_param = 32768;
(void) mca_base_component_var_register(&mca_coll_hierarch_component.collm_version,
"segment_size",
"Used to specify the segment size for segmented algorithms.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_hierarch_segsize_param);
return OMPI_SUCCESS;
}

Просмотреть файл

@ -46,7 +46,7 @@ int mca_coll_inter_verbose_param = 0;
/*
* Local function
*/
static int inter_open(void);
static int inter_register(void);
/*
* Instantiate the public struct with all of our public information
@ -68,8 +68,10 @@ const mca_coll_base_component_2_0_0_t mca_coll_inter_component = {
OMPI_RELEASE_VERSION,
/* Component open and close functions */
inter_open,
NULL
NULL,
NULL,
NULL,
inter_register
},
{
/* This component is checkpointable */
@ -82,23 +84,25 @@ const mca_coll_base_component_2_0_0_t mca_coll_inter_component = {
};
static int inter_open(void)
static int inter_register(void)
{
/* Use a high priority, but allow other components to be higher */
mca_base_param_reg_int(&mca_coll_inter_component.collm_version,
"priority",
"Priority of the inter coll component",
false, false, mca_coll_inter_priority_param,
&mca_coll_inter_priority_param);
mca_coll_inter_priority_param = 40;
(void) mca_base_component_var_register(&mca_coll_inter_component.collm_version,
"priority", "Priority of the inter coll component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_inter_priority_param);
mca_base_param_reg_int(&mca_coll_inter_component.collm_version,
"verbose",
"Turn verbose message of the inter coll component on/off",
false, false, mca_coll_inter_verbose_param,
&mca_coll_inter_verbose_param);
mca_coll_inter_verbose_param = 0;
(void) mca_base_component_var_register(&mca_coll_inter_component.collm_version,
"verbose",
"Turn verbose message of the inter coll component on/off",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_inter_verbose_param);
return OMPI_SUCCESS;
}

Просмотреть файл

@ -122,12 +122,13 @@ static int
libnbc_register(void)
{
/* Use a low priority, but allow other components to be lower */
mca_base_param_reg_int(&mca_coll_libnbc_component.super.collm_version,
"priority",
"Priority of the libnbc coll component",
false, false, libnbc_priority,
&libnbc_priority);
libnbc_priority = 10;
(void) mca_base_component_var_register(&mca_coll_libnbc_component.super.collm_version,
"priority", "Priority of the libnbc coll component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&libnbc_priority);
return OMPI_SUCCESS;
}

Просмотреть файл

@ -499,7 +499,7 @@ struct mca_coll_ml_component_t {
int verbose;
/** max of communicators available to run ML */
uint32_t max_comm;
unsigned int max_comm;
/** min size of comm to be available to run ML */
int min_comm_size;
@ -547,7 +547,7 @@ struct mca_coll_ml_component_t {
int n_payload_buffs_per_bank;
/* size of payload buffer */
size_t payload_buffer_size;
unsigned long long payload_buffer_size;
/* pipeline depth for msg fragmentation */
int pipeline_depth;
@ -579,10 +579,10 @@ struct mca_coll_ml_component_t {
bool progress_is_busy;
/* Temporary hack for IMB test - not all bcols have allgather */
int disable_allgather;
bool disable_allgather;
/* Temporary hack for IMB test - not all bcols have alltoall */
int disable_alltoall;
bool disable_alltoall;
/* Brucks alltoall mca and other params */
int use_brucks_smsg_alltoall;

Просмотреть файл

@ -8,8 +8,6 @@
#include <unistd.h>
#endif
#include "opal/mca/base/mca_base_param.h"
#include "coll_ml.h"
#include "coll_ml_inlines.h"
#include "coll_ml_config.h"

Просмотреть файл

@ -124,21 +124,24 @@ int mca_coll_ml_lmngr_tune(mca_coll_ml_lmngr_t *lmngr,
int mca_coll_ml_lmngr_reg(void)
{
int ival, tmp;
int ret = OMPI_SUCCESS;
int tmp, ret = OMPI_SUCCESS;
mca_coll_ml_component_t *cm = &mca_coll_ml_component;
#define CHECK(expr) do {\
tmp = (expr); \
if (OMPI_SUCCESS != tmp) ret = tmp; \
if (0 > tmp) ret = tmp; \
} while (0)
ML_VERBOSE(7, ("Setting parameters for list manager"));
CHECK(reg_int("memory_manager_list_size", NULL,
"Memory manager list size", 8, &ival, 0));
cm->lmngr_size = ival;
cm->lmngr_size = 8;
CHECK(mca_base_component_var_register(&mca_coll_ml_component.super.collm_version,
"memory_manager_list_size", "Memory manager list size",
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&cm->lmngr_size));
/* The size list couldn't be less than possible max of ML modules,
it = max supported communicators by ML */
@ -146,17 +149,25 @@ int mca_coll_ml_lmngr_reg(void)
cm->lmngr_size = cm->max_comm;
}
CHECK(reg_int("memory_manager_block_size", NULL,
"Memory manager block size",
cm->payload_buffer_size *
cm->n_payload_buffs_per_bank *
cm->n_payload_mem_banks *
cm->lmngr_size, &ival, 0));
mca_coll_ml_component.lmngr_block_size = ival;
mca_coll_ml_component.lmngr_block_size = cm->payload_buffer_size *
cm->n_payload_buffs_per_bank *
cm->n_payload_mem_banks *
cm->lmngr_size;
CHECK(reg_int("memory_manager_alignment", NULL,
"Memory manager alignment", 4 * 1024, &ival, 0));
cm->lmngr_alignment = ival;
CHECK(mca_base_component_var_register(&mca_coll_ml_component.super.collm_version,
"memory_manager_block_size", "Memory manager block size",
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_ml_component.lmngr_block_size));
cm->lmngr_alignment = 4 * 1024;
CHECK(mca_base_component_var_register(&mca_coll_ml_component.super.collm_version,
"memory_manager_alignment", "Memory manager alignment",
MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_ml_component.lmngr_block_size));
return ret;
}

Просмотреть файл

@ -47,70 +47,132 @@ enum {
static int reg_string(const char* param_name,
const char* deprecated_param_name,
const char* param_desc,
const char* default_value, char **out_value,
const char* default_value, char **storage,
int flags)
{
int index;
char *value;
index = mca_base_param_reg_string(&mca_coll_ml_component.super.collm_version,
param_name, param_desc, false, false,
default_value, &value);
*storage = (char *) default_value;
index = mca_base_component_var_register(&mca_coll_ml_component.super.collm_version,
param_name, param_desc, MCA_BASE_VAR_TYPE_STRING,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
if (NULL != deprecated_param_name) {
mca_base_param_reg_syn(index,
&mca_coll_ml_component.super.collm_version,
deprecated_param_name, true);
(void) mca_base_var_register_synonym(index, "ompi", "coll", "ml", deprecated_param_name,
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
}
if (0 != (flags & REGSTR_EMPTY_OK) && 0 == strlen(value)) {
if (0 != (flags & REGSTR_EMPTY_OK) && (NULL == *storage || 0 == strlen(*storage))) {
opal_output(0, "Bad parameter value for parameter \"%s\"",
param_name);
return OMPI_ERR_BAD_PARAM;
}
*out_value = value;
return OMPI_SUCCESS;
}
/*
* utility routine for integer parameter registration
*/
int reg_int(const char* param_name,
static int reg_int(const char* param_name,
const char* deprecated_param_name,
const char* param_desc,
int default_value, int *out_value, int flags)
int default_value, int *storage, int flags)
{
int index, value;
index = mca_base_param_reg_int(&mca_coll_ml_component.super.collm_version,
param_name, param_desc, false, false,
default_value, NULL);
int index;
*storage = default_value;
index = mca_base_component_var_register(&mca_coll_ml_component.super.collm_version,
param_name, param_desc, MCA_BASE_VAR_TYPE_INT,
NULL, 0, 0,OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
if (NULL != deprecated_param_name) {
mca_base_param_reg_syn(index,
&mca_coll_ml_component.super.collm_version,
deprecated_param_name, true);
(void) mca_base_var_register_synonym(index, "ompi", "coll", "ml", deprecated_param_name,
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
}
mca_base_param_lookup_int(index, &value);
if (0 != (flags & REGINT_NEG_ONE_OK) && -1 == value) {
*out_value = value;
if (0 != (flags & REGINT_NEG_ONE_OK) && -1 == *storage) {
return OMPI_SUCCESS;
}
if ((0 != (flags & REGINT_GE_ZERO) && value < 0) ||
(0 != (flags & REGINT_GE_ONE) && value < 1) ||
(0 != (flags & REGINT_NONZERO) && 0 == value)) {
if ((0 != (flags & REGINT_GE_ZERO) && *storage < 0) ||
(0 != (flags & REGINT_GE_ONE) && *storage < 1) ||
(0 != (flags & REGINT_NONZERO) && 0 == *storage)) {
opal_output(0, "Bad parameter value for parameter \"%s\"",
param_name);
return OMPI_ERR_BAD_PARAM;
}
*out_value = value;
return OMPI_SUCCESS;
}
static int reg_bool(const char* param_name,
const char* deprecated_param_name,
const char* param_desc,
bool default_value, bool *storage)
{
int index;
*storage = default_value;
index = mca_base_component_var_register(&mca_coll_ml_component.super.collm_version,
param_name, param_desc, MCA_BASE_VAR_TYPE_BOOL,
NULL, 0, 0,OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
if (NULL != deprecated_param_name) {
(void) mca_base_var_register_synonym(index, "ompi", "coll", "ml", deprecated_param_name,
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
}
return OMPI_SUCCESS;
}
static int reg_ullint(const char* param_name,
const char* deprecated_param_name,
const char* param_desc,
unsigned long long default_value, unsigned long long *storage, int flags)
{
int index;
*storage = default_value;
index = mca_base_component_var_register(&mca_coll_ml_component.super.collm_version,
param_name, param_desc, MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG,
NULL, 0, 0,OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, storage);
if (NULL != deprecated_param_name) {
(void) mca_base_var_register_synonym(index, "ompi", "coll", "ml", deprecated_param_name,
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
}
if ((0 != (flags & REGINT_GE_ONE) && *storage < 1) ||
(0 != (flags & REGINT_NONZERO) && 0 == *storage)) {
opal_output(0, "Bad parameter value for parameter \"%s\"",
param_name);
return OMPI_ERR_BAD_PARAM;
}
return OMPI_SUCCESS;
}
static int mca_coll_ml_verify_params(void)
{
int dummy;
/* Make sure that the the number of memory banks is a power of 2 */
mca_coll_ml_component.n_payload_mem_banks =
roundup_to_power_radix(2, mca_coll_ml_component.n_payload_mem_banks,
&dummy);
/* Make sure that the the number of buffers is a power of 2 */
mca_coll_ml_component.n_payload_buffs_per_bank =
roundup_to_power_radix(2, mca_coll_ml_component.n_payload_buffs_per_bank,
&dummy);
return OMPI_SUCCESS;
}
int mca_coll_ml_register_params(void)
{
int ival, ret, tmp, dummy;
int ret, tmp;
char *str = NULL;
ret = OMPI_SUCCESS;
@ -123,99 +185,70 @@ int mca_coll_ml_register_params(void)
CHECK(reg_int("priority", NULL,
"ML component priority"
"(from 0(low) to 90 (high))", 0, &ival, 0));
mca_coll_ml_component.ml_priority = ival;
"(from 0(low) to 90 (high))", 0, &mca_coll_ml_component.ml_priority, 0));
CHECK(reg_int("verbose", NULL,
"Output some verbose ML information "
"(0 = no output, nonzero = output)", 0, &ival, 0));
mca_coll_ml_component.verbose = ival;
"(0 = no output, nonzero = output)", 0, &mca_coll_ml_component.verbose, 0));
CHECK(reg_int("n_levels", NULL,
"number of levels in the hierarchy ", 1, &ival, 0));
mca_coll_ml_component.ml_n_levels = ival;
"number of levels in the hierarchy ", 1, &mca_coll_ml_component.ml_n_levels, 0));
CHECK(reg_int("max_comm", NULL,
"max of communicators available to run ML", 12, &ival, 0));
mca_coll_ml_component.max_comm = ival;
"max of communicators available to run ML", 12, (int *) &mca_coll_ml_component.max_comm, 0));
CHECK(reg_int("min_comm_size", NULL,
" min size of comm to be available to run ML", 0, &ival, 0));
mca_coll_ml_component.min_comm_size = ival;
" min size of comm to be available to run ML", 0, &mca_coll_ml_component.min_comm_size, 0));
CHECK(reg_int("n_payload_mem_banks", NULL,
" number of payload memory banks", 2, &ival, 0));
mca_coll_ml_component.n_payload_mem_banks = ival;
/* Make sure that the the number of memory banks is a power of 2 */
mca_coll_ml_component.n_payload_mem_banks =
roundup_to_power_radix(2, mca_coll_ml_component.n_payload_mem_banks,
&dummy);
"number of payload memory banks", 2, &mca_coll_ml_component.n_payload_mem_banks, 0));
CHECK(reg_int("n_payload_buffs_per_bank", NULL,
" number of payload buffers per bank", 16, &ival, 0));
mca_coll_ml_component.n_payload_buffs_per_bank = ival;
/* Make sure that the the number of buffers is a power of 2 */
mca_coll_ml_component.n_payload_buffs_per_bank =
roundup_to_power_radix(2, mca_coll_ml_component.n_payload_buffs_per_bank,
&dummy);
"number of payload buffers per bank", 16, &mca_coll_ml_component.n_payload_buffs_per_bank, 0));
/* RLG: need to handle alignment and size */
CHECK(reg_int("payload_buffer_size", NULL,
" size of payload buffer", 4*1024, &ival, 0));
mca_coll_ml_component.payload_buffer_size = (size_t) ival;
CHECK(reg_ullint("payload_buffer_size", NULL,
"size of payload buffer", 4*1024, &mca_coll_ml_component.payload_buffer_size, 0));
/* get the pipeline depth, default is 2 */
CHECK(reg_int("pipeline_depth", NULL,
"size of fragmentation pipeline", 2, &ival, 0));
mca_coll_ml_component.pipeline_depth = (int) ival;
"size of fragmentation pipeline", 2, &mca_coll_ml_component.pipeline_depth, 0));
CHECK(reg_int("free_list_init_size", NULL,
" Initial size for free lists in ML", 128, &ival, 0));
mca_coll_ml_component.free_list_init_size = (size_t) ival;
" Initial size for free lists in ML", 128, &mca_coll_ml_component.free_list_init_size, 0));
CHECK(reg_int("free_list_grow_size", NULL,
" Initial size for free lists in ML", 64, &ival, 0));
mca_coll_ml_component.free_list_grow_size = (size_t) ival;
" Initial size for free lists in ML", 64, &mca_coll_ml_component.free_list_grow_size, 0));
CHECK(reg_int("free_list_max_size", NULL,
" Initial size for free lists in ML", -1, &ival, 0));
mca_coll_ml_component.free_list_max_size = (size_t) ival;
" Initial size for free lists in ML", -1, &mca_coll_ml_component.free_list_max_size, 0));
CHECK(reg_int("use_knomial_allreduce", NULL,
"Use k-nomial Allreduce supports only p2p currently"
, 1, &ival, 0));
mca_coll_ml_component.use_knomial_allreduce = ival;
, 1, &mca_coll_ml_component.use_knomial_allreduce, 0));
CHECK(reg_int("use_static_bcast", NULL,
"Use new bcast static algorithm", 1, &ival, 0));
mca_coll_ml_component.use_static_bcast = (0 != ival);
CHECK(reg_bool("use_static_bcast", NULL,
"Use new bcast static algorithm", true, &mca_coll_ml_component.use_static_bcast));
CHECK(reg_int("use_sequential_bcast", NULL,
"Use new bcast static algorithm", 0, &ival, 0));
mca_coll_ml_component.use_sequential_bcast = (0 != ival);
CHECK(reg_bool("use_sequential_bcast", NULL,
"Use new bcast static algorithm", false, &mca_coll_ml_component.use_sequential_bcast));
CHECK(reg_int("disable_allgather", NULL,
CHECK(reg_bool("disable_allgather", NULL,
"Allgather disabling",
0, &ival, 0));
mca_coll_ml_component.disable_allgather = (0 != ival);
false, &mca_coll_ml_component.disable_allgather));
CHECK(reg_int("disable_alltoall", NULL,
CHECK(reg_bool("disable_alltoall", NULL,
"Alltoall disabling",
0, &ival, 0));
mca_coll_ml_component.disable_alltoall = (0 != ival);
false, &mca_coll_ml_component.disable_alltoall));
CHECK(reg_int("enable_fragmentation", NULL,
"Disable/Enable fragmentation for large messages"
, 0, &ival, 0));
mca_coll_ml_component.enable_fragmentation = (0 != ival);
CHECK(reg_bool("enable_fragmentation", NULL,
"Disable/Enable fragmentation for large messages",
false, &mca_coll_ml_component.enable_fragmentation));
CHECK(reg_int("use_brucks_smsg_alltoall", NULL,
"Use Bruck's Algo for Small Msg Alltoall"
"1 - Bruck's Algo with RDMA; 2 - Bruck's with Send Recv"
, 0, &ival, 0));
mca_coll_ml_component.use_brucks_smsg_alltoall = ival;
, 0, &mca_coll_ml_component.use_brucks_smsg_alltoall, 0));
asprintf(&str, "%s/mca-coll-ml.config",
opal_install_dirs.pkgdatadir);
@ -232,5 +265,8 @@ int mca_coll_ml_register_params(void)
/* Reading parameters for list manager */
CHECK(mca_coll_ml_lmngr_reg());
/* Verify the parameters */
CHECK(mca_coll_ml_verify_params());
return ret;
}

Просмотреть файл

@ -15,11 +15,6 @@
#include<ctype.h>
#include "ompi_config.h"
int reg_int(const char* param_name,
const char* deprecated_param_name,
const char* param_desc,
int default_value, int *out_value, int flags);
int mca_coll_ml_register_params(void);
#endif

Просмотреть файл

@ -34,8 +34,7 @@ BEGIN_C_DECLS
*/
OMPI_MODULE_DECLSPEC extern const mca_coll_base_component_2_0_0_t mca_coll_self_component;
extern int mca_coll_self_priority_param;
extern int ompi_coll_self_priority;
/*
* coll API functions

Просмотреть файл

@ -39,12 +39,12 @@ const char *mca_coll_self_component_version_string =
/*
* Global variable
*/
int mca_coll_self_priority_param = -1;
int ompi_coll_self_priority = 0;
/*
* Local function
*/
static int self_open(void);
static int self_register(void);
/*
@ -67,8 +67,10 @@ const mca_coll_base_component_2_0_0_t mca_coll_self_component = {
OMPI_RELEASE_VERSION,
/* Component open and close functions */
self_open,
NULL
NULL,
NULL,
NULL,
self_register
},
{
/* The component is checkpoint ready */
@ -81,15 +83,16 @@ const mca_coll_base_component_2_0_0_t mca_coll_self_component = {
mca_coll_self_comm_query
};
static int self_open(void)
static int self_register(void)
{
/* We'll always be picked if there's only one process in the
communicator */
mca_coll_self_priority_param =
mca_base_param_reg_int (&mca_coll_self_component.collm_version,
"priority", NULL, false, false, 75, NULL);
ompi_coll_self_priority = 75;
(void) mca_base_component_var_register(&mca_coll_self_component.collm_version,
"priority", NULL, MCA_BASE_VAR_TYPE_INT,
NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_coll_self_priority);
return OMPI_SUCCESS;
}

Просмотреть файл

@ -23,7 +23,6 @@
#include "mpi.h"
#include "ompi/communicator/communicator.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/mca/coll/coll.h"
#include "ompi/mca/coll/base/base.h"
#include "coll_self.h"
@ -56,11 +55,7 @@ mca_coll_self_comm_query(struct ompi_communicator_t *comm,
/* We only work on intracommunicators of size 1 */
if (!OMPI_COMM_IS_INTER(comm) && 1 == ompi_comm_size(comm)) {
if (OMPI_SUCCESS !=
mca_base_param_lookup_int(mca_coll_self_priority_param,
priority)) {
return NULL;
}
*priority = ompi_coll_self_priority;
module = OBJ_NEW(mca_coll_self_module_t);
if (NULL == module) return NULL;

Просмотреть файл

@ -46,6 +46,7 @@ const char *mca_coll_sm_component_version_string =
static int sm_close(void);
static int sm_register(void);
static int coll_sm_shared_mem_used_data;
/*
* Instantiate the public struct with all of our public information
@ -127,54 +128,19 @@ static int sm_close(void)
return OMPI_SUCCESS;
}
/*
* Register MCA params
*/
static int sm_register(void)
static int sm_verify_mca_variables(void)
{
size_t size;
mca_base_component_t *c = &mca_coll_sm_component.super.collm_version;
mca_coll_sm_component_t *cs = &mca_coll_sm_component;
/* If we want to be selected (i.e., all procs on one node), then
we should have a high priority */
mca_base_param_reg_int(c, "priority",
"Priority of the sm coll component",
false, false,
cs->sm_priority,
&cs->sm_priority);
mca_base_param_reg_int(c, "control_size",
"Length of the control data -- should usually be either the length of a cache line on most SMPs, or the size of a page on machines that support direct memory affinity page placement (in bytes)",
false, false,
cs->sm_control_size,
&cs->sm_control_size);
mca_base_param_reg_int(c, "fragment_size",
"Fragment size (in bytes) used for passing data through shared memory (will be rounded up to the nearest control_size size)",
false, false,
cs->sm_fragment_size,
&cs->sm_fragment_size);
if (0 != (cs->sm_fragment_size % cs->sm_control_size)) {
cs->sm_fragment_size += cs->sm_control_size -
(cs->sm_fragment_size % cs->sm_control_size);
}
mca_base_param_reg_int(c, "comm_in_use_flags",
"Number of \"in use\" flags, used to mark a message passing area segment as currently being used or not (must be >= 2 and <= comm_num_segments)",
false, false,
cs->sm_comm_num_in_use_flags,
&cs->sm_comm_num_in_use_flags);
if (cs->sm_comm_num_in_use_flags < 2) {
cs->sm_comm_num_in_use_flags = 2;
}
mca_base_param_reg_int(c, "comm_num_segments",
"Number of segments in each communicator's shared memory message passing area (must be >= 2, and must be a multiple of comm_in_use_flags)",
false, false,
cs->sm_comm_num_segments,
&cs->sm_comm_num_segments);
if (cs->sm_comm_num_segments < cs->sm_comm_num_in_use_flags) {
cs->sm_comm_num_segments = cs->sm_comm_num_in_use_flags;
}
@ -185,11 +151,6 @@ static int sm_register(void)
cs->sm_segs_per_inuse_flag =
cs->sm_comm_num_segments / cs->sm_comm_num_in_use_flags;
mca_base_param_reg_int(c, "tree_degree",
"Degree of the tree for tree-based operations (must be => 1 and <= min(control_size, 255))",
false, false,
cs->sm_tree_degree,
&cs->sm_tree_degree);
if (cs->sm_tree_degree > cs->sm_control_size) {
opal_show_help("help-mpi-coll-sm.txt",
"tree-degree-larger-than-control", true,
@ -203,23 +164,95 @@ static int sm_register(void)
cs->sm_tree_degree = 255;
}
/* INFO: Calculate how much space we need in the per-communicator
shmem data segment. This formula taken directly from
coll_sm_module.c. */
mca_base_param_reg_int(c, "info_num_procs",
"Number of processes to use for the calculation of the shared_mem_size MCA information parameter (must be => 2)",
false, false,
cs->sm_info_comm_size,
&cs->sm_info_comm_size);
size = 4 * cs->sm_control_size +
coll_sm_shared_mem_used_data = (int)(4 * cs->sm_control_size +
(cs->sm_comm_num_in_use_flags * cs->sm_control_size) +
(cs->sm_comm_num_segments * (cs->sm_info_comm_size * cs->sm_control_size * 2)) +
(cs->sm_comm_num_segments * (cs->sm_info_comm_size * cs->sm_fragment_size));
mca_base_param_reg_int(c, "shared_mem_used_data",
"Amount of shared memory used, per communicator, in the shared memory data area for info_num_procs processes (in bytes)",
false, true,
(int)size, NULL);
(cs->sm_comm_num_segments * (cs->sm_info_comm_size * cs->sm_fragment_size)));
return OMPI_SUCCESS;
}
/*
* Register MCA params
*/
static int sm_register(void)
{
mca_base_component_t *c = &mca_coll_sm_component.super.collm_version;
mca_coll_sm_component_t *cs = &mca_coll_sm_component;
/* If we want to be selected (i.e., all procs on one node), then
we should have a high priority */
cs->sm_priority = 0;
(void) mca_base_component_var_register(c, "priority", "Priority of the sm coll component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&cs->sm_priority);
cs->sm_control_size = 4096;
(void) mca_base_component_var_register(c, "control_size",
"Length of the control data -- should usually be either the length of a cache line on most SMPs, or the size of a page on machines that support direct memory affinity page placement (in bytes)",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&cs->sm_control_size);
cs->sm_fragment_size = 8192;
(void) mca_base_component_var_register(c, "fragment_size",
"Fragment size (in bytes) used for passing data through shared memory (will be rounded up to the nearest control_size size)",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&cs->sm_fragment_size);
cs->sm_comm_num_in_use_flags = 2;
(void) mca_base_component_var_register(c, "comm_in_use_flags",
"Number of \"in use\" flags, used to mark a message passing area segment as currently being used or not (must be >= 2 and <= comm_num_segments)",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&cs->sm_comm_num_in_use_flags);
cs->sm_comm_num_segments = 8;
(void) mca_base_component_var_register(c, "comm_num_segments",
"Number of segments in each communicator's shared memory message passing area (must be >= 2, and must be a multiple of comm_in_use_flags)",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&cs->sm_comm_num_segments);
cs->sm_tree_degree = 4;
(void) mca_base_component_var_register(c, "tree_degree",
"Degree of the tree for tree-based operations (must be => 1 and <= min(control_size, 255))",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&cs->sm_tree_degree);
/* INFO: Calculate how much space we need in the per-communicator
shmem data segment. This formula taken directly from
coll_sm_module.c. */
cs->sm_info_comm_size = 4;
(void) mca_base_component_var_register(c, "info_num_procs",
"Number of processes to use for the calculation of the shared_mem_size MCA information parameter (must be => 2)",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&cs->sm_info_comm_size);
coll_sm_shared_mem_used_data = (int)(4 * cs->sm_control_size +
(cs->sm_comm_num_in_use_flags * cs->sm_control_size) +
(cs->sm_comm_num_segments * (cs->sm_info_comm_size * cs->sm_control_size * 2)) +
(cs->sm_comm_num_segments * (cs->sm_info_comm_size * cs->sm_fragment_size)));
(void) mca_base_component_var_register(c, "shared_mem_used_data",
"Amount of shared memory used, per communicator, in the shared memory data area for info_num_procs processes (in bytes)",
MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_sm_shared_mem_used_data);
return sm_verify_mca_variables();
}

Просмотреть файл

@ -80,7 +80,7 @@ BEGIN_C_DECLS
extern int ompi_coll_tuned_stream;
extern int ompi_coll_tuned_priority;
extern int ompi_coll_tuned_preallocate_memory_comm_size_limit;
extern int ompi_coll_tuned_use_dynamic_rules;
extern bool ompi_coll_tuned_use_dynamic_rules;
extern char* ompi_coll_tuned_dynamic_rules_filename;
extern int ompi_coll_tuned_init_tree_fanout;
extern int ompi_coll_tuned_init_chain_fanout;

Просмотреть файл

@ -10,6 +10,8 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009 University of Houston. All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All Rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -30,6 +32,24 @@
#include "coll_tuned_topo.h"
#include "coll_tuned_util.h"
/* allgather algorithm variables */
static int coll_tuned_allgather_algorithm_count = 6;
static int coll_tuned_allgather_forced_algorithm = 0;
static int coll_tuned_allgather_segment_size = 0;
static int coll_tuned_allgather_tree_fanout;
static int coll_tuned_allgather_chain_fanout;
/* valid values for coll_tuned_allgather_forced_algorithm */
static mca_base_var_enum_value_t allgather_algorithms[] = {
{0, "ignore"},
{1, "linear"},
{2, "bruck"},
{3, "recursive_doubling"},
{4, "ring"},
{5, "neighbor"},
{6, "two_proc"},
{0, NULL}
};
/*
* ompi_coll_tuned_allgather_intra_bruck
@ -748,54 +768,64 @@ ompi_coll_tuned_allgather_intra_basic_linear(void *sbuf, int scount,
int
ompi_coll_tuned_allgather_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
{
int max_alg = 6, requested_alg;
mca_base_var_enum_t *new_enum;
ompi_coll_tuned_forced_max_algorithms[ALLGATHER] = max_alg;
ompi_coll_tuned_forced_max_algorithms[ALLGATHER] = coll_tuned_allgather_algorithm_count;
mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version,
"allgather_algorithm_count",
"Number of allgather algorithms available",
false, true, max_alg, NULL);
mca_param_indices->algorithm_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"allgather_algorithm",
"Which allgather algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 bruck, 3 recursive doubling, 4 ring, 5 neighbor exchange, 6: two proc only.",
false, false, 0, NULL);
(void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"allgather_algorithm_count",
"Number of allgather algorithms available",
MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_CONSTANT,
&coll_tuned_allgather_algorithm_count);
/* MPI_T: This variable should eventually be bound to a communicator */
coll_tuned_allgather_forced_algorithm = 0;
(void) mca_base_var_enum_create("coll_tuned_allgather_algorithms", allgather_algorithms, &new_enum);
mca_param_indices->algorithm_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"allgather_algorithm",
"Which allallgather algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 bruck, 3 recursive doubling, 4 ring, 5 neighbor exchange, 6: two proc only.",
MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_allgather_forced_algorithm);
OBJ_RELEASE(new_enum);
if (mca_param_indices->algorithm_param_index < 0) {
return mca_param_indices->algorithm_param_index;
}
mca_base_param_lookup_int(mca_param_indices->algorithm_param_index,
&(requested_alg));
if( 0 > requested_alg || requested_alg > max_alg ) {
if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) {
opal_output( 0, "Allgather algorithm #%d is not available (range [0..%d]). Switching back to ignore(0)\n",
requested_alg, max_alg );
}
mca_base_param_set_int( mca_param_indices->algorithm_param_index, 0);
}
mca_param_indices->segsize_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"allgather_algorithm_segmentsize",
"Segment size in bytes used by default for allgather algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
false, false, 0, NULL);
mca_param_indices->tree_fanout_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"allgather_algorithm_tree_fanout",
"Fanout for n-tree used for allgather algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
false, false,
ompi_coll_tuned_init_tree_fanout, /* get system wide default */
NULL);
mca_param_indices->chain_fanout_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"allgather_algorithm_chain_fanout",
"Fanout for chains used for allgather algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
false, false,
ompi_coll_tuned_init_chain_fanout, /* get system wide default */
NULL);
coll_tuned_allgather_segment_size = 0;
mca_param_indices->segsize_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"allgather_algorithm_segmentsize",
"Segment size in bytes used by default for allgather algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_allgather_segment_size);
coll_tuned_allgather_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
mca_param_indices->tree_fanout_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"allgather_algorithm_tree_fanout",
"Fanout for n-tree used for allgather algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_allgather_tree_fanout);
coll_tuned_allgather_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
mca_param_indices->chain_fanout_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"allgather_algorithm_chain_fanout",
"Fanout for chains used for allgather algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_allgather_chain_fanout);
return (MPI_SUCCESS);
}

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -10,6 +11,8 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009 University of Houston. All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All Rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -29,6 +32,24 @@
#include "coll_tuned_topo.h"
#include "coll_tuned_util.h"
/* allgatherv algorithm variables */
static int coll_tuned_allgatherv_algorithm_count = 5;
static int coll_tuned_allgatherv_forced_algorithm = 0;
static int coll_tuned_allgatherv_segment_size = 0;
static int coll_tuned_allgatherv_tree_fanout;
static int coll_tuned_allgatherv_chain_fanout;
/* valid values for coll_tuned_allgatherv_forced_algorithm */
static mca_base_var_enum_value_t allgatherv_algorithms[] = {
{0, "ignore"},
{1, "default"},
{2, "bruck"},
{3, "ring"},
{4, "neighbor"},
{5, "two_proc"},
{0, NULL}
};
/*
* ompi_coll_tuned_allgatherv_intra_bruck
*
@ -665,54 +686,64 @@ ompi_coll_tuned_allgatherv_intra_basic_default(void *sbuf, int scount,
int
ompi_coll_tuned_allgatherv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
{
int max_alg = 5, requested_alg;
ompi_coll_tuned_forced_max_algorithms[ALLGATHERV] = max_alg;
mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version,
"allgatherv_algorithm_count",
"Number of allgather algorithms available",
false, true, max_alg, NULL);
mca_param_indices->algorithm_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"allgatherv_algorithm",
"Which allgather algorithm is used. Can be locked down to choice of: 0 ignore, 1 default (gatherv + bcast), 2 bruck, 3 ring, 4 neighbor exchange, 5: two proc only.",
false, false, 0, NULL);
mca_base_var_enum_t *new_enum;
ompi_coll_tuned_forced_max_algorithms[ALLGATHERV] = coll_tuned_allgatherv_algorithm_count;
(void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"allgatherv_algorithm_count",
"Number of allgatherv algorithms available",
MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_CONSTANT,
&coll_tuned_allgatherv_algorithm_count);
/* MPI_T: This variable should eventually be bound to a communicator */
coll_tuned_allgatherv_forced_algorithm = 0;
(void) mca_base_var_enum_create("coll_tuned_allgatherv_algorithms", allgatherv_algorithms, &new_enum);
mca_param_indices->algorithm_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"allgatherv_algorithm",
"Which allallgatherv algorithm is used. Can be locked down to choice of: 0 ignore, 1 default (allgathervv + bcast), 2 bruck, 3 ring, 4 neighbor exchange, 5: two proc only.",
MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_allgatherv_forced_algorithm);
OBJ_RELEASE(new_enum);
if (mca_param_indices->algorithm_param_index < 0) {
return mca_param_indices->algorithm_param_index;
}
mca_base_param_lookup_int(mca_param_indices->algorithm_param_index,
&(requested_alg));
if( 0 > requested_alg || requested_alg > max_alg ) {
if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) {
opal_output( 0, "Allgather algorithm #%d is not available (range [0..%d]). Switching back to ignore(0)\n",
requested_alg, max_alg );
}
mca_base_param_set_int( mca_param_indices->algorithm_param_index, 0);
}
mca_param_indices->segsize_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"allgatherv_algorithm_segmentsize",
"Segment size in bytes used by default for allgather algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
false, false, 0, NULL);
mca_param_indices->tree_fanout_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"allgatherv_algorithm_tree_fanout",
"Fanout for n-tree used for allgather algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
false, false,
ompi_coll_tuned_init_tree_fanout, /* get system wide default */
NULL);
mca_param_indices->chain_fanout_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"allgatherv_algorithm_chain_fanout",
"Fanout for chains used for allgather algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
false, false,
ompi_coll_tuned_init_chain_fanout, /* get system wide default */
NULL);
coll_tuned_allgatherv_segment_size = 0;
mca_param_indices->segsize_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"allgatherv_algorithm_segmentsize",
"Segment size in bytes used by default for allgatherv algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_allgatherv_segment_size);
coll_tuned_allgatherv_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
mca_param_indices->tree_fanout_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"allgatherv_algorithm_tree_fanout",
"Fanout for n-tree used for allgatherv algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_allgatherv_tree_fanout);
coll_tuned_allgatherv_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
mca_param_indices->chain_fanout_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"allgatherv_algorithm_chain_fanout",
"Fanout for chains used for allgatherv algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_allgatherv_chain_fanout);
return (MPI_SUCCESS);
}

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -10,6 +11,8 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009 University of Houston. All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All Rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -32,6 +35,24 @@
#include "coll_tuned_topo.h"
#include "coll_tuned_util.h"
/* allreduce algorithm variables */
static int coll_tuned_allreduce_algorithm_count = 5;
static int coll_tuned_allreduce_forced_algorithm = 0;
static int coll_tuned_allreduce_segment_size = 0;
static int coll_tuned_allreduce_tree_fanout;
static int coll_tuned_allreduce_chain_fanout;
/* valid values for coll_tuned_allreduce_forced_algorithm */
static mca_base_var_enum_value_t allreduce_algorithms[] = {
{0, "ignore"},
{1, "basic_linear"},
{2, "nonoverlapping"},
{3, "recursive_doubling"},
{4, "ring"},
{5, "segmented_ring"},
{0, NULL}
};
/*
* ompi_coll_tuned_allreduce_intra_nonoverlapping
*
@ -921,52 +942,64 @@ ompi_coll_tuned_allreduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
int ompi_coll_tuned_allreduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
{
int max_alg = 5, requested_alg;
mca_base_var_enum_t *new_enum;
ompi_coll_tuned_forced_max_algorithms[ALLREDUCE] = max_alg;
ompi_coll_tuned_forced_max_algorithms[ALLREDUCE] = coll_tuned_allreduce_algorithm_count;
mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version,
"allreduce_algorithm_count",
"Number of allreduce algorithms available",
false, true, max_alg, NULL);
(void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"allreduce_algorithm_count",
"Number of allreduce algorithms available",
MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_CONSTANT,
&coll_tuned_allreduce_algorithm_count);
mca_param_indices->algorithm_param_index
= mca_base_param_reg_int( &mca_coll_tuned_component.super.collm_version,
"allreduce_algorithm",
"Which allreduce algorithm is used. Can be locked down to any of: 0 ignore, 1 basic linear, 2 nonoverlapping (tuned reduce + tuned bcast), 3 recursive doubling, 4 ring, 5 segmented ring",
false, false, 0, NULL);
/* MPI_T: This variable should eventually be bound to a communicator */
coll_tuned_allreduce_forced_algorithm = 0;
(void) mca_base_var_enum_create("coll_tuned_allreduce_algorithms", allreduce_algorithms, &new_enum);
mca_param_indices->algorithm_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"allreduce_algorithm",
"Which allreduce algorithm is used. Can be locked down to any of: 0 ignore, 1 basic linear, 2 nonoverlapping (tuned reduce + tuned bcast), 3 recursive doubling, 4 ring, 5 segmented ring",
MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_allreduce_forced_algorithm);
OBJ_RELEASE(new_enum);
if (mca_param_indices->algorithm_param_index < 0) {
return mca_param_indices->algorithm_param_index;
}
mca_base_param_lookup_int( mca_param_indices->algorithm_param_index, &(requested_alg));
if( 0 > requested_alg || requested_alg > max_alg ) {
if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) {
opal_output( 0, "Allreduce algorithm #%d is not available (range [0..%d]). Switching back to ignore(0)\n",
requested_alg, max_alg );
}
mca_base_param_set_int( mca_param_indices->algorithm_param_index, 0);
}
mca_param_indices->segsize_param_index
= mca_base_param_reg_int( &mca_coll_tuned_component.super.collm_version,
"allreduce_algorithm_segmentsize",
"Segment size in bytes used by default for allreduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
false, false, 0, NULL);
mca_param_indices->tree_fanout_param_index
= mca_base_param_reg_int( &mca_coll_tuned_component.super.collm_version,
"allreduce_algorithm_tree_fanout",
"Fanout for n-tree used for allreduce algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
false, false, ompi_coll_tuned_init_tree_fanout, /* get system wide default */
NULL);
coll_tuned_allreduce_segment_size = 0;
mca_param_indices->segsize_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"allreduce_algorithm_segmentsize",
"Segment size in bytes used by default for allreduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_allreduce_segment_size);
mca_param_indices->chain_fanout_param_index
= mca_base_param_reg_int( &mca_coll_tuned_component.super.collm_version,
"allreduce_algorithm_chain_fanout",
"Fanout for chains used for allreduce algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
false, false,
ompi_coll_tuned_init_chain_fanout, /* get system wide default */
NULL);
coll_tuned_allreduce_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
mca_param_indices->tree_fanout_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"allreduce_algorithm_tree_fanout",
"Fanout for n-tree used for allreduce algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_allreduce_tree_fanout);
coll_tuned_allreduce_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
mca_param_indices->chain_fanout_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"allreduce_algorithm_chain_fanout",
"Fanout for chains used for allreduce algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_allreduce_chain_fanout);
return (MPI_SUCCESS);
}

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -9,6 +10,8 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All Rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -29,6 +32,25 @@
#include "coll_tuned_topo.h"
#include "coll_tuned_util.h"
/* alltoall algorithm variables */
static int coll_tuned_alltoall_algorithm_count = 5;
static int coll_tuned_alltoall_forced_algorithm = 0;
static int coll_tuned_alltoall_segment_size = 0;
static int coll_tuned_alltoall_max_requests;
static int coll_tuned_alltoall_tree_fanout;
static int coll_tuned_alltoall_chain_fanout;
/* valid values for coll_tuned_alltoall_forced_algorithm */
static mca_base_var_enum_value_t alltoall_algorithms[] = {
{0, "ignore"},
{1, "linear"},
{2, "pairwise"},
{3, "modified_bruck"},
{4, "linear_sync"},
{5, "two_proc"},
{0, NULL}
};
int ompi_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void* rbuf, int rcount,
@ -580,72 +602,84 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
{
int max_alg = 5, requested_alg, max_requests;
mca_base_var_enum_t*new_enum;
ompi_coll_tuned_forced_max_algorithms[ALLTOALL] = max_alg;
ompi_coll_tuned_forced_max_algorithms[ALLTOALL] = coll_tuned_alltoall_algorithm_count;
mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version,
"alltoall_algorithm_count",
"Number of alltoall algorithms available",
false, true, max_alg, NULL);
(void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"alltoall_algorithm_count",
"Number of alltoall algorithms available",
MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_CONSTANT,
&coll_tuned_alltoall_algorithm_count);
mca_param_indices->algorithm_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"alltoall_algorithm",
"Which alltoall algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 pairwise, 3: modified bruck, 4: linear with sync, 5:two proc only.",
false, false, 0, NULL);
/* MPI_T: This variable should eventually be bound to a communicator */
coll_tuned_alltoall_forced_algorithm = 0;
(void) mca_base_var_enum_create("coll_tuned_alltoall_algorithms", alltoall_algorithms, &new_enum);
mca_param_indices->algorithm_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"alltoall_algorithm",
"Which alltoall algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 pairwise, 3: modified bruck, 4: linear with sync, 5:two proc only.",
MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_alltoall_forced_algorithm);
OBJ_RELEASE(new_enum);
if (mca_param_indices->algorithm_param_index < 0) {
return mca_param_indices->algorithm_param_index;
}
mca_base_param_lookup_int(mca_param_indices->algorithm_param_index, &(requested_alg));
if( 0 > requested_alg || requested_alg > max_alg ) {
if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) {
opal_output( 0, "Alltoall algorithm #%d is not available (range [0..%d]). Switching back to ignore(0)\n",
requested_alg, max_alg );
}
mca_base_param_set_int( mca_param_indices->algorithm_param_index, 0);
}
mca_param_indices->segsize_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"alltoall_algorithm_segmentsize",
"Segment size in bytes used by default for alltoall algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
false, false, 0, NULL);
mca_param_indices->tree_fanout_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"alltoall_algorithm_tree_fanout",
"Fanout for n-tree used for alltoall algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
false, false,
ompi_coll_tuned_init_tree_fanout, /* get system wide default */
NULL);
mca_param_indices->chain_fanout_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"alltoall_algorithm_chain_fanout",
"Fanout for chains used for alltoall algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
false, false,
ompi_coll_tuned_init_chain_fanout, /* get system wide default */
NULL);
coll_tuned_alltoall_segment_size = 0;
mca_param_indices->segsize_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"alltoall_algorithm_segmentsize",
"Segment size in bytes used by default for alltoall algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_alltoall_segment_size);
mca_param_indices->max_requests_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"alltoall_algorithm_max_requests",
"Maximum number of outstanding send or recv requests. Only has meaning for synchronized algorithms.",
false, false,
ompi_coll_tuned_init_max_requests, /* get system wide default */
NULL);
coll_tuned_alltoall_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
mca_param_indices->tree_fanout_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"alltoall_algorithm_tree_fanout",
"Fanout for n-tree used for alltoall algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_alltoall_tree_fanout);
coll_tuned_alltoall_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
mca_param_indices->chain_fanout_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"alltoall_algorithm_chain_fanout",
"Fanout for chains used for alltoall algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_alltoall_chain_fanout);
coll_tuned_alltoall_max_requests = 0; /* no limit for alltoall by default */
mca_param_indices->max_requests_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"alltoall_algorithm_max_requests",
"Maximum number of outstanding send or recv requests. Only has meaning for synchronized algorithms.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_alltoall_max_requests);
if (mca_param_indices->max_requests_param_index < 0) {
return mca_param_indices->algorithm_param_index;
return mca_param_indices->max_requests_param_index;
}
mca_base_param_lookup_int(mca_param_indices->max_requests_param_index, &(max_requests));
if( max_requests <= 1 ) {
if (coll_tuned_alltoall_max_requests < 0) {
if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) {
opal_output( 0, "Maximum outstanding requests must be positive number greater than 1. Switching to system level default %d \n",
ompi_coll_tuned_init_max_requests );
}
mca_base_param_set_int( mca_param_indices->max_requests_param_index,
ompi_coll_tuned_init_max_requests);
coll_tuned_alltoall_max_requests = 0;
}
return (MPI_SUCCESS);

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -10,6 +11,8 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All Rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -30,6 +33,18 @@
#include "coll_tuned_topo.h"
#include "coll_tuned_util.h"
/* alltoallv algorithm variables */
static int coll_tuned_alltoallv_algorithm_count = 2;
static int coll_tuned_alltoallv_forced_algorithm = 0;
/* valid values for coll_tuned_alltoallv_forced_algorithm */
static mca_base_var_enum_value_t alltoallv_algorithms[] = {
{0, "ignore"},
{1, "basic_linear"},
{2, "pairwise"},
{0, NULL}
};
int
ompi_coll_tuned_alltoallv_intra_pairwise(void *sbuf, int *scounts, int *sdisps,
struct ompi_datatype_t *sdtype,
@ -198,34 +213,36 @@ ompi_coll_tuned_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdis
int ompi_coll_tuned_alltoallv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t
*mca_param_indices)
{
int max_alg = 2, requested_alg;
mca_base_var_enum_t *new_enum;
ompi_coll_tuned_forced_max_algorithms[ALLTOALLV] = max_alg;
ompi_coll_tuned_forced_max_algorithms[ALLTOALLV] = coll_tuned_alltoallv_algorithm_count;
mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version,
"alltoallv_algorithm_count",
"Number of alltoallv algorithms available",
false, true, max_alg, NULL);
(void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"alltoallv_algorithm_count",
"Number of alltoallv algorithms available",
MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_CONSTANT,
&coll_tuned_alltoallv_algorithm_count);
mca_param_indices->algorithm_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"alltoallv_algorithm",
"Which alltoallv algorithm is used. "
"Can be locked down to choice of: 0 ignore, "
"1 basic linear, 2 pairwise.",
false, false, 0, NULL);
/* MPI_T: This variable should eventually be bound to a communicator */
coll_tuned_alltoallv_forced_algorithm = 0;
(void) mca_base_var_enum_create("coll_tuned_alltoallv_algorithms", alltoallv_algorithms, &new_enum);
mca_param_indices->algorithm_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"alltoallv_algorithm",
"Which alltoallv algorithm is used. "
"Can be locked down to choice of: 0 ignore, "
"1 basic linear, 2 pairwise.",
MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_alltoallv_forced_algorithm);
OBJ_RELEASE(new_enum);
if (mca_param_indices->algorithm_param_index < 0) {
return mca_param_indices->algorithm_param_index;
}
mca_base_param_lookup_int(mca_param_indices->algorithm_param_index, &(requested_alg));
if (0 > requested_alg || requested_alg > max_alg) {
if (0 == ompi_comm_rank( MPI_COMM_WORLD)) {
opal_output(0, "Alltoallv algorithm #%d is not available (range [0..%d]). "
"Switching back to ignore(0)\n",
requested_alg, max_alg );
}
mca_base_param_set_int(mca_param_indices->algorithm_param_index, 0);
}
return (MPI_SUCCESS);
}

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -10,6 +11,8 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All Rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -30,6 +33,22 @@
#include "coll_tuned_topo.h"
#include "coll_tuned_util.h"
/* barrier algorithm variables */
static int coll_tuned_barrier_algorithm_count = 6;
static int coll_tuned_barrier_forced_algorithm = 0;
/* valid values for coll_tuned_barrier_forced_algorithm */
static mca_base_var_enum_value_t barrier_algorithms[] = {
{0, "ignore"},
{1, "linear"},
{2, "double_ring"},
{3, "recursive_doubling"},
{4, "bruck"},
{5, "two_proc"},
{6, "tree"},
{0, NULL}
};
/*
* Barrier is ment to be a synchronous operation, as some BTLs can mark
* a request done before its passed to the NIC and progress might not be made
@ -405,32 +424,34 @@ int ompi_coll_tuned_barrier_intra_tree(struct ompi_communicator_t *comm,
int ompi_coll_tuned_barrier_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
{
int max_alg = 6, requested_alg;
mca_base_var_enum_t *new_enum;
ompi_coll_tuned_forced_max_algorithms[BARRIER] = max_alg;
ompi_coll_tuned_forced_max_algorithms[BARRIER] = coll_tuned_barrier_algorithm_count;
mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version,
"barrier_algorithm_count",
"Number of barrier algorithms available",
false, true, max_alg, NULL);
(void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"barrier_algorithm_count",
"Number of barrier algorithms available",
MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_CONSTANT,
&coll_tuned_barrier_algorithm_count);
mca_param_indices->algorithm_param_index =
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"barrier_algorithm",
"Which barrier algorithm is used. Can be locked down to choice of: 0 ignore, 1 linear, 2 double ring, 3: recursive doubling 4: bruck, 5: two proc only, 6: tree",
false, false, 0, NULL);
/* MPI_T: This variable should eventually be bound to a communicator */
coll_tuned_barrier_forced_algorithm = 0;
(void) mca_base_var_enum_create("coll_tuned_barrier_algorithms", barrier_algorithms, &new_enum);
mca_param_indices->algorithm_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"barrier_algorithm",
"Which barrier algorithm is used. Can be locked down to choice of: 0 ignore, 1 linear, 2 double ring, 3: recursive doubling 4: bruck, 5: two proc only, 6: tree",
MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_barrier_forced_algorithm);
OBJ_RELEASE(new_enum);
if (mca_param_indices->algorithm_param_index < 0) {
return mca_param_indices->algorithm_param_index;
}
mca_base_param_lookup_int(mca_param_indices->algorithm_param_index,
&(requested_alg));
if( 0 > requested_alg || requested_alg > max_alg ) {
if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) {
opal_output( 0, "Barrier algorithm #%d is not available (range [0..%d]). Switching back to ignore(0)\n",
requested_alg, max_alg );
}
mca_base_param_set_int( mca_param_indices->algorithm_param_index, 0);
}
return (MPI_SUCCESS);
}

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -30,6 +31,25 @@
#include "coll_tuned_topo.h"
#include "coll_tuned_util.h"
/* bcast algorithm variables */
static int coll_tuned_bcast_algorithm_count = 6;
static int coll_tuned_bcast_forced_algorithm = 0;
static int coll_tuned_bcast_segment_size = 0;
static int coll_tuned_bcast_tree_fanout;
static int coll_tuned_bcast_chain_fanout;
/* valid values for coll_tuned_bcast_forced_algorithm */
static mca_base_var_enum_value_t bcast_algorithms[] = {
{0, "ignore"},
{1, "basic_linear"},
{2, "chain"},
{3, "pipeline"},
{4, "split_binary_tree"},
{5, "binary_tree"},
{6, "binomial"},
{0, NULL}
};
int
ompi_coll_tuned_bcast_intra_generic( void* buffer,
int original_count,
@ -712,54 +732,64 @@ ompi_coll_tuned_bcast_intra_basic_linear (void *buff, int count,
int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
{
int rc, max_alg = 6, requested_alg;
mca_base_var_enum_t *new_enum;
ompi_coll_tuned_forced_max_algorithms[BCAST] = max_alg;
ompi_coll_tuned_forced_max_algorithms[BCAST] = coll_tuned_bcast_algorithm_count;
rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version,
"bcast_algorithm_count",
"Number of bcast algorithms available",
false, true, max_alg, NULL);
(void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"bcast_algorithm_count",
"Number of bcast algorithms available",
MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_CONSTANT,
&coll_tuned_bcast_algorithm_count);
mca_param_indices->algorithm_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"bcast_algorithm",
"Which bcast algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 chain, 3: pipeline, 4: split binary tree, 5: binary tree, 6: binomial tree.",
false, false, 0, NULL);
/* MPI_T: This variable should eventually be bound to a communicator */
coll_tuned_bcast_forced_algorithm = 0;
(void) mca_base_var_enum_create("coll_tuned_bcast_algorithms", bcast_algorithms, &new_enum);
mca_param_indices->algorithm_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"bcast_algorithm",
"Which bcast algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 chain, 3: pipeline, 4: split binary tree, 5: binary tree, 6: binomial tree.",
MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_bcast_forced_algorithm);
OBJ_RELEASE(new_enum);
if (mca_param_indices->algorithm_param_index < 0) {
return mca_param_indices->algorithm_param_index;
}
mca_base_param_lookup_int(mca_param_indices->algorithm_param_index, &(requested_alg));
if( 0 > requested_alg || requested_alg > max_alg ) {
if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) {
opal_output( 0, "Broadcast algorithm #%d is not available (range [0..%d]). Switching back to ignore(0)\n",
requested_alg, max_alg );
}
mca_base_param_set_int( mca_param_indices->algorithm_param_index, 0);
}
mca_param_indices->segsize_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"bcast_algorithm_segmentsize",
"Segment size in bytes used by default for bcast algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
false, false, 0, NULL);
coll_tuned_bcast_segment_size = 0;
mca_param_indices->segsize_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"bcast_algorithm_segmentsize",
"Segment size in bytes used by default for bcast algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_bcast_segment_size);
mca_param_indices->tree_fanout_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"bcast_algorithm_tree_fanout",
"Fanout for n-tree used for bcast algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
false, false,
ompi_coll_tuned_init_tree_fanout, /* get system wide default */
NULL);
coll_tuned_bcast_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
mca_param_indices->tree_fanout_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"bcast_algorithm_tree_fanout",
"Fanout for n-tree used for bcast algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_bcast_tree_fanout);
mca_param_indices->chain_fanout_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"bcast_algorithm_chain_fanout",
"Fanout for chains used for bcast algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
false, false,
ompi_coll_tuned_init_chain_fanout, /* get system wide default */
NULL);
coll_tuned_bcast_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
mca_param_indices->chain_fanout_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"bcast_algorithm_chain_fanout",
"Fanout for chains used for bcast algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_bcast_chain_fanout);
return (MPI_SUCCESS);
}

Просмотреть файл

@ -45,7 +45,7 @@ const char *ompi_coll_tuned_component_version_string =
int ompi_coll_tuned_stream = -1;
int ompi_coll_tuned_priority = 30;
int ompi_coll_tuned_preallocate_memory_comm_size_limit = (32 * 1024);
int ompi_coll_tuned_use_dynamic_rules = 0;
bool ompi_coll_tuned_use_dynamic_rules = false;
char* ompi_coll_tuned_dynamic_rules_filename = (char*) NULL;
int ompi_coll_tuned_init_tree_fanout = 4;
int ompi_coll_tuned_init_chain_fanout = 4;
@ -60,6 +60,7 @@ int ompi_coll_tuned_forced_max_algorithms[COLLCOUNT];
/*
* Local function
*/
static int tuned_register(void);
static int tuned_open(void);
static int tuned_close(void);
@ -84,7 +85,9 @@ mca_coll_tuned_component_t mca_coll_tuned_component = {
/* Component open and close functions */
tuned_open,
tuned_close
tuned_close,
NULL,
tuned_register
},
{
/* The component is checkpoint ready */
@ -104,6 +107,80 @@ mca_coll_tuned_component_t mca_coll_tuned_component = {
NULL /* ompi_coll_alg_rule_t ptr */
};
static int tuned_register(void)
{
/* Use a low priority, but allow other components to be lower */
ompi_coll_tuned_priority = 30;
(void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"priority", "Priority of the tuned coll component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_6,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_coll_tuned_priority);
/* parameter for pre-allocated memory requests etc */
ompi_coll_tuned_preallocate_memory_comm_size_limit = (32 * 1024);
(void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"pre_allocate_memory_comm_size_limit",
"Size of communicator were we stop pre-allocating memory for the fixed internal buffer used for message requests etc that is hung off the communicator data segment. I.e. if you have a 100'000 nodes you might not want to pre-allocate 200'000 request handle slots per communicator instance!",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_6,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_coll_tuned_preallocate_memory_comm_size_limit);
/* some initial guesses at topology parameters */
ompi_coll_tuned_init_tree_fanout = 4;
(void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"init_tree_fanout",
"Inital fanout used in the tree topologies for each communicator. This is only an initial guess, if a tuned collective needs a different fanout for an operation, it build it dynamically. This parameter is only for the first guess and might save a little time",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_6,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_coll_tuned_init_tree_fanout);
ompi_coll_tuned_init_chain_fanout = 4;
(void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"init_chain_fanout",
"Inital fanout used in the chain (fanout followed by pipeline) topologies for each communicator. This is only an initial guess, if a tuned collective needs a different fanout for an operation, it build it dynamically. This parameter is only for the first guess and might save a little time",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_6,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_coll_tuned_init_chain_fanout);
ompi_coll_tuned_use_dynamic_rules = false;
(void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"use_dynamic_rules",
"Switch used to decide if we use static (compiled/if statements) or dynamic (built at runtime) decision function rules",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_6,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_coll_tuned_use_dynamic_rules);
ompi_coll_tuned_dynamic_rules_filename = NULL;
(void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"dynamic_rules_filename",
"Filename of configuration file that contains the dynamic (@runtime) decision function rules",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_6,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_coll_tuned_dynamic_rules_filename);
/* register forced params */
ompi_coll_tuned_allreduce_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLREDUCE]);
ompi_coll_tuned_alltoall_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLTOALL]);
ompi_coll_tuned_allgather_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLGATHER]);
ompi_coll_tuned_allgatherv_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLGATHERV]);
ompi_coll_tuned_alltoallv_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLTOALLV]);
ompi_coll_tuned_barrier_intra_check_forced_init(&ompi_coll_tuned_forced_params[BARRIER]);
ompi_coll_tuned_bcast_intra_check_forced_init(&ompi_coll_tuned_forced_params[BCAST]);
ompi_coll_tuned_reduce_intra_check_forced_init(&ompi_coll_tuned_forced_params[REDUCE]);
ompi_coll_tuned_reduce_scatter_intra_check_forced_init(&ompi_coll_tuned_forced_params[REDUCESCATTER]);
ompi_coll_tuned_gather_intra_check_forced_init(&ompi_coll_tuned_forced_params[GATHER]);
ompi_coll_tuned_scatter_intra_check_forced_init(&ompi_coll_tuned_forced_params[SCATTER]);
return OMPI_SUCCESS;
}
static int tuned_open(void)
{
@ -113,44 +190,17 @@ static int tuned_open(void)
{
int param;
param = mca_base_param_find("coll", NULL, "base_verbose");
param = mca_base_var_find("ompi", "coll", "base", "verbose");
if (param >= 0) {
int verbose;
mca_base_param_lookup_int(param, &verbose);
if (verbose > 0) {
const int *verbose = NULL;
mca_base_var_get_value(param, &verbose, NULL, NULL);
if (verbose && verbose[0] > 0) {
ompi_coll_tuned_stream = opal_output_open(NULL);
}
}
}
#endif /* OPAL_ENABLE_DEBUG */
/* Use a low priority, but allow other components to be lower */
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"priority",
"Priority of the tuned coll component",
false, false, ompi_coll_tuned_priority,
&ompi_coll_tuned_priority);
/* parameter for pre-allocated memory requests etc */
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"pre_allocate_memory_comm_size_limit",
"Size of communicator were we stop pre-allocating memory for the fixed internal buffer used for message requests etc that is hung off the communicator data segment. I.e. if you have a 100'000 nodes you might not want to pre-allocate 200'000 request handle slots per communicator instance!",
false, false, ompi_coll_tuned_preallocate_memory_comm_size_limit,
&ompi_coll_tuned_preallocate_memory_comm_size_limit);
/* some initial guesses at topology parameters */
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"init_tree_fanout",
"Inital fanout used in the tree topologies for each communicator. This is only an initial guess, if a tuned collective needs a different fanout for an operation, it build it dynamically. This parameter is only for the first guess and might save a little time",
false, false, ompi_coll_tuned_init_tree_fanout,
&ompi_coll_tuned_init_tree_fanout);
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"init_chain_fanout",
"Inital fanout used in the chain (fanout followed by pipeline) topologies for each communicator. This is only an initial guess, if a tuned collective needs a different fanout for an operation, it build it dynamically. This parameter is only for the first guess and might save a little time",
false, false, ompi_coll_tuned_init_chain_fanout,
&ompi_coll_tuned_init_chain_fanout);
/* now check that the user hasn't overrode any of the decision functions if dynamic rules are enabled */
/* the user can redo this before every comm dup/create if they like */
/* this is useful for benchmarking and user knows best tuning */
@ -160,18 +210,7 @@ static int tuned_open(void)
/* intra functions first */
/* if dynamic rules allowed then look up dynamic rules config filename, else we leave it an empty filename (NULL) */
/* by default DISABLE dynamic rules and instead use fixed [if based] rules */
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"use_dynamic_rules",
"Switch used to decide if we use static (compiled/if statements) or dynamic (built at runtime) decision function rules",
false, false, ompi_coll_tuned_use_dynamic_rules,
&ompi_coll_tuned_use_dynamic_rules);
if (ompi_coll_tuned_use_dynamic_rules) {
mca_base_param_reg_string(&mca_coll_tuned_component.super.collm_version,
"dynamic_rules_filename",
"Filename of configuration file that contains the dynamic (@runtime) decision function rules",
false, false, ompi_coll_tuned_dynamic_rules_filename,
&ompi_coll_tuned_dynamic_rules_filename);
if( ompi_coll_tuned_dynamic_rules_filename ) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:component_open Reading collective rules file [%s]",
ompi_coll_tuned_dynamic_rules_filename));
@ -184,17 +223,6 @@ static int tuned_open(void)
mca_coll_tuned_component.all_base_rules = NULL;
}
}
ompi_coll_tuned_allreduce_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLREDUCE]);
ompi_coll_tuned_alltoall_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLTOALL]);
ompi_coll_tuned_allgather_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLGATHER]);
ompi_coll_tuned_allgatherv_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLGATHERV]);
ompi_coll_tuned_alltoallv_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLTOALLV]);
ompi_coll_tuned_barrier_intra_check_forced_init(&ompi_coll_tuned_forced_params[BARRIER]);
ompi_coll_tuned_bcast_intra_check_forced_init(&ompi_coll_tuned_forced_params[BCAST]);
ompi_coll_tuned_reduce_intra_check_forced_init(&ompi_coll_tuned_forced_params[REDUCE]);
ompi_coll_tuned_reduce_scatter_intra_check_forced_init(&ompi_coll_tuned_forced_params[REDUCESCATTER]);
ompi_coll_tuned_gather_intra_check_forced_init(&ompi_coll_tuned_forced_params[GATHER]);
ompi_coll_tuned_scatter_intra_check_forced_init(&ompi_coll_tuned_forced_params[SCATTER]);
}
OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:component_open: done!"));

Просмотреть файл

@ -23,7 +23,6 @@
#include "ompi/constants.h"
#include "ompi/datatype/ompi_datatype.h"
#include "ompi/communicator/communicator.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/mca/coll/base/base.h"
#include "ompi/mca/coll/coll.h"
#include "ompi/mca/coll/base/coll_tags.h"

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -9,6 +10,8 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All Rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -29,6 +32,21 @@
#include "coll_tuned_topo.h"
#include "coll_tuned_util.h"
/* gather algorithm variables */
static int coll_tuned_gather_algorithm_count = 3;
static int coll_tuned_gather_forced_algorithm = 0;
static int coll_tuned_gather_segment_size = 0;
static int coll_tuned_gather_tree_fanout;
static int coll_tuned_gather_chain_fanout;
/* valid values for coll_tuned_gather_forced_algorithm */
static mca_base_var_enum_value_t gather_algorithms[] = {
{0, "ignore"},
{1, "basic_linear"},
{2, "binomial"},
{3, "linear_sync"},
{0, NULL}
};
/* Todo: gather_intra_generic, gather_intra_binary, gather_intra_chain,
* gather_intra_pipeline, segmentation? */
@ -418,54 +436,64 @@ ompi_coll_tuned_gather_intra_basic_linear(void *sbuf, int scount,
int
ompi_coll_tuned_gather_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
{
int rc, max_alg = 3, requested_alg;
mca_base_var_enum_t *new_enum;
ompi_coll_tuned_forced_max_algorithms[GATHER] = max_alg;
ompi_coll_tuned_forced_max_algorithms[GATHER] = coll_tuned_gather_algorithm_count;
rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version,
"gather_algorithm_count",
"Number of gather algorithms available",
false, true, max_alg, NULL);
(void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"gather_algorithm_count",
"Number of gather algorithms available",
MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_CONSTANT,
&coll_tuned_gather_algorithm_count);
mca_param_indices->algorithm_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"gather_algorithm",
"Which gather algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 binomial, 3 linear with synchronization.",
false, false, 0, NULL);
/* MPI_T: This variable should eventually be bound to a communicator */
coll_tuned_gather_forced_algorithm = 0;
(void) mca_base_var_enum_create("coll_tuned_gather_algorithms", gather_algorithms, &new_enum);
mca_param_indices->algorithm_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"gather_algorithm",
"Which gather algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 binomial, 3 linear with synchronization.",
MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_gather_forced_algorithm);
OBJ_RELEASE(new_enum);
if (mca_param_indices->algorithm_param_index < 0) {
return mca_param_indices->algorithm_param_index;
}
mca_base_param_lookup_int(mca_param_indices->algorithm_param_index,
&(requested_alg));
if( 0 > requested_alg || requested_alg > max_alg ) {
if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) {
opal_output( 0, "Gather algorithm #%d is not available (range [0..%d]). Switching back to default(0)\n",
requested_alg, max_alg );
}
mca_base_param_set_int( mca_param_indices->algorithm_param_index, 0);
}
mca_param_indices->segsize_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"gather_algorithm_segmentsize",
"Segment size in bytes used by default for gather algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
false, false, 0, NULL);
coll_tuned_gather_segment_size = 0;
mca_param_indices->segsize_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"gather_algorithm_segmentsize",
"Segment size in bytes used by default for gather algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_gather_segment_size);
mca_param_indices->tree_fanout_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"gather_algorithm_tree_fanout",
"Fanout for n-tree used for gather algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
false, false,
ompi_coll_tuned_init_tree_fanout, /* get system wide default */
NULL);
coll_tuned_gather_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
mca_param_indices->tree_fanout_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"gather_algorithm_tree_fanout",
"Fanout for n-tree used for gather algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_gather_tree_fanout);
mca_param_indices->chain_fanout_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"gather_algorithm_chain_fanout",
"Fanout for chains used for gather algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
false, false,
ompi_coll_tuned_init_chain_fanout, /* get system wide default */
NULL);
coll_tuned_gather_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
mca_param_indices->chain_fanout_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"gather_algorithm_chain_fanout",
"Fanout for chains used for gather algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_gather_chain_fanout);
return (MPI_SUCCESS);
}

Просмотреть файл

@ -24,7 +24,6 @@
#include "mpi.h"
#include "ompi/communicator/communicator.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/mca/coll/coll.h"
#include "ompi/mca/coll/base/base.h"
#include "coll_tuned.h"
@ -121,6 +120,7 @@ ompi_coll_tuned_forced_getvalues( enum COLLTYPE type,
coll_tuned_force_algorithm_params_t *forced_values )
{
coll_tuned_force_algorithm_mca_param_indices_t* mca_params;
const int *tmp;
mca_params = &(ompi_coll_tuned_forced_params[type]);
@ -129,13 +129,18 @@ ompi_coll_tuned_forced_getvalues( enum COLLTYPE type,
* to see if it was setted explicitly (if we suppose that setting it to 0 enable the
* default behavior) or not.
*/
forced_values->algorithm = 0;
mca_base_param_lookup_int (mca_params->algorithm_param_index, &(forced_values->algorithm));
mca_base_var_get_value(mca_params->algorithm_param_index, &tmp, NULL, NULL);
forced_values->algorithm = tmp ? tmp[0] : 0;
if( BARRIER != type ) {
mca_base_param_lookup_int (mca_params->segsize_param_index, &(forced_values->segsize));
mca_base_param_lookup_int (mca_params->tree_fanout_param_index, &(forced_values->tree_fanout));
mca_base_param_lookup_int (mca_params->chain_fanout_param_index, &(forced_values->chain_fanout));
mca_base_param_lookup_int (mca_params->max_requests_param_index, &(forced_values->max_requests));
mca_base_var_get_value(mca_params->segsize_param_index, &tmp, NULL, NULL);
if (tmp) forced_values->segsize = tmp[0];
mca_base_var_get_value(mca_params->tree_fanout_param_index, &tmp, NULL, NULL);
if (tmp) forced_values->tree_fanout = tmp[0];
mca_base_var_get_value(mca_params->chain_fanout_param_index, &tmp, NULL, NULL);
if (tmp) forced_values->chain_fanout = tmp[0];
mca_base_var_get_value(mca_params->max_requests_param_index, &tmp, NULL, NULL);
if (tmp) forced_values->max_requests = tmp[0];
}
return (MPI_SUCCESS);
}
@ -159,7 +164,7 @@ ompi_coll_tuned_forced_getvalues( enum COLLTYPE type,
} \
if( 1 == need_dynamic_decision ) { \
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned: enable dynamic selection for "#TYPE)); \
ompi_coll_tuned_use_dynamic_rules = 1; \
ompi_coll_tuned_use_dynamic_rules = true; \
EXECUTE; \
} \
}
@ -219,7 +224,7 @@ tuned_module_enable( mca_coll_base_module_t *module,
/**
* Reset it to 0, it will be enabled again if we discover any need for dynamic decisions.
*/
ompi_coll_tuned_use_dynamic_rules = 0;
ompi_coll_tuned_use_dynamic_rules = false;
/**
* next dynamic state, recheck all forced rules as well
@ -258,7 +263,7 @@ tuned_module_enable( mca_coll_base_module_t *module,
COLL_TUNED_EXECUTE_IF_DYNAMIC(data, SCATTERV,
tuned_module->super.coll_scatterv = NULL);
if( 0 == ompi_coll_tuned_use_dynamic_rules ) {
if( false == ompi_coll_tuned_use_dynamic_rules ) {
/* no real need for dynamic decisions */
OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:module_enable switch back to fixed"
" decision by lack of dynamic rules"));

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -9,6 +10,8 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All Rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -29,6 +32,26 @@
#include "coll_tuned.h"
#include "coll_tuned_topo.h"
/* reduce algorithm variables */
static int coll_tuned_reduce_algorithm_count = 6;
static int coll_tuned_reduce_forced_algorithm = 0;
static int coll_tuned_reduce_segment_size = 0;
static int coll_tuned_reduce_max_requests;
static int coll_tuned_reduce_tree_fanout;
static int coll_tuned_reduce_chain_fanout;
/* valid values for coll_tuned_reduce_forced_algorithm */
static mca_base_var_enum_value_t reduce_algorithms[] = {
{0, "ignore"},
{1, "linear"},
{2, "chain"},
{3, "pipeline"},
{4, "binary"},
{5, "binomial"},
{6, "in-order_binary"},
{0, NULL}
};
/**
* This is a generic implementation of the reduce protocol. It used the tree
* provided as an argument and execute all operations using a segment of
@ -711,72 +734,86 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
* perms module does not call this they call the forced_getvalues routine
* instead.
*/
int ompi_coll_tuned_reduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
{
int rc, requested_alg, max_alg = 6, max_requests;
mca_base_var_enum_t*new_enum;
ompi_coll_tuned_forced_max_algorithms[REDUCE] = max_alg;
ompi_coll_tuned_forced_max_algorithms[REDUCE] = coll_tuned_reduce_algorithm_count;
rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version,
"reduce_algorithm_count",
"Number of reduce algorithms available",
false, true, max_alg, NULL);
(void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"reduce_algorithm_count",
"Number of reduce algorithms available",
MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_CONSTANT,
&coll_tuned_reduce_algorithm_count);
mca_param_indices->algorithm_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"reduce_algorithm",
"Which reduce algorithm is used. Can be locked down to choice of: 0 ignore, 1 linear, 2 chain, 3 pipeline, 4 binary, 5 binomial, 6 in-order binary",
false, false, 0, NULL);
/* MPI_T: This variable should eventually be bound to a communicator */
coll_tuned_reduce_forced_algorithm = 0;
(void) mca_base_var_enum_create("coll_tuned_reduce_algorithms", reduce_algorithms, &new_enum);
mca_param_indices->algorithm_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"reduce_algorithm",
"Which reduce algorithm is used. Can be locked down to choice of: 0 ignore, 1 linear, 2 chain, 3 pipeline, 4 binary, 5 binomial, 6 in-order binary",
MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_reduce_forced_algorithm);
OBJ_RELEASE(new_enum);
if (mca_param_indices->algorithm_param_index < 0) {
return mca_param_indices->algorithm_param_index;
}
mca_base_param_lookup_int(mca_param_indices->algorithm_param_index, &(requested_alg));
if( 0 > requested_alg || requested_alg > max_alg ) {
if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) {
opal_output( 0, "Reduce algorithm #%d is not available (range [0..%d]). Switching back to ignore(0)\n",
requested_alg, max_alg );
}
mca_base_param_set_int( mca_param_indices->algorithm_param_index, 0);
}
mca_param_indices->segsize_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"reduce_algorithm_segmentsize",
"Segment size in bytes used by default for reduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
false, false, 0, NULL);
coll_tuned_reduce_segment_size = 0;
mca_param_indices->segsize_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"reduce_algorithm_segmentsize",
"Segment size in bytes used by default for reduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_reduce_segment_size);
mca_param_indices->tree_fanout_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"reduce_algorithm_tree_fanout",
"Fanout for n-tree used for reduce algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
false, false,
ompi_coll_tuned_init_tree_fanout, /* get system wide default */
NULL);
coll_tuned_reduce_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
mca_param_indices->tree_fanout_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"reduce_algorithm_tree_fanout",
"Fanout for n-tree used for reduce algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_reduce_tree_fanout);
mca_param_indices->chain_fanout_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"reduce_algorithm_chain_fanout",
"Fanout for chains used for reduce algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
false, false,
ompi_coll_tuned_init_chain_fanout, /* get system wide default */
NULL);
mca_param_indices->max_requests_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"reduce_algorithm_max_requests",
"Maximum number of outstanding send requests on leaf nodes. 0 means no limit.",
false, false, 0, /* no limit for reduce by default */
NULL);
coll_tuned_reduce_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
mca_param_indices->chain_fanout_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"reduce_algorithm_chain_fanout",
"Fanout for chains used for reduce algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_reduce_chain_fanout);
coll_tuned_reduce_max_requests = 0; /* no limit for reduce by default */
mca_param_indices->max_requests_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"reduce_algorithm_max_requests",
"Maximum number of outstanding send requests on leaf nodes. 0 means no limit.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_reduce_max_requests);
if (mca_param_indices->max_requests_param_index < 0) {
return mca_param_indices->max_requests_param_index;
}
mca_base_param_lookup_int(mca_param_indices->max_requests_param_index, &(max_requests));
if( max_requests < 0 ) {
if (coll_tuned_reduce_max_requests < 0) {
if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) {
opal_output( 0, "Maximum outstanding requests must be positive number or 0. Initializing to 0 (no limit).\n" );
}
mca_base_param_set_int( mca_param_indices->max_requests_param_index, 0);
coll_tuned_reduce_max_requests = 0;
}
return (MPI_SUCCESS);

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -32,6 +33,22 @@
#include "coll_tuned.h"
#include "coll_tuned_topo.h"
/* reduce_scatter algorithm variables */
static int coll_tuned_reduce_scatter_algorithm_count = 2;
static int coll_tuned_reduce_scatter_forced_algorithm = 0;
static int coll_tuned_reduce_scatter_segment_size = 0;
static int coll_tuned_reduce_scatter_tree_fanout;
static int coll_tuned_reduce_scatter_chain_fanout;
/* valid values for coll_tuned_reduce_scatter_forced_algorithm */
static mca_base_var_enum_value_t reduce_scatter_algorithms[] = {
{0, "ignore"},
{1, "non-overlapping"},
{2, "recursive_halfing"},
{3, "ring"},
{0, NULL}
};
/*******************************************************************************
* ompi_coll_tuned_reduce_scatter_intra_nonoverlapping
*
@ -629,55 +646,68 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
* perms module does not call this they call the forced_getvalues routine
* instead
*/
int ompi_coll_tuned_reduce_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
{
int rc, requested_alg, max_alg = 3;
mca_base_var_enum_t *new_enum;
ompi_coll_tuned_forced_max_algorithms[REDUCESCATTER] = max_alg;
ompi_coll_tuned_forced_max_algorithms[REDUCESCATTER] = coll_tuned_reduce_scatter_algorithm_count;
rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version,
"reduce_scatter_algorithm_count",
"Number of reduce_scatter algorithms available",
false, true, max_alg, NULL);
mca_param_indices->algorithm_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"reduce_scatter_algorithm",
"Which reduce reduce_scatter algorithm is used. Can be locked down to choice of: 0 ignore, 1 non-overlapping (Reduce + Scatterv), 2 recursive halving, 3 ring",
false, false, 0, NULL);
(void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"reduce_scatter_algorithm_count",
"Number of reduce_scatter algorithms available",
MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_CONSTANT,
&coll_tuned_reduce_scatter_algorithm_count);
/* MPI_T: This variable should eventually be bound to a communicator */
coll_tuned_reduce_scatter_forced_algorithm = 0;
(void) mca_base_var_enum_create("coll_tuned_reduce_scatter_algorithms", reduce_scatter_algorithms, &new_enum);
mca_param_indices->algorithm_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"reduce_scatter_algorithm",
"Which reduce reduce_scatter algorithm is used. Can be locked down to choice of: 0 ignore, 1 non-overlapping (Reduce + Scatterv), 2 recursive halving, 3 ring",
MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_reduce_scatter_forced_algorithm);
OBJ_RELEASE(new_enum);
if (mca_param_indices->algorithm_param_index < 0) {
return mca_param_indices->algorithm_param_index;
}
mca_base_param_lookup_int(mca_param_indices->algorithm_param_index, &(requested_alg));
if( 0 > requested_alg || requested_alg > max_alg ) {
if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) {
opal_output( 0, "Reduce_scatter algorithm #%d is not available (range [0..%d]). Switching back to ignore(0)\n",
requested_alg, max_alg );
}
mca_base_param_set_int( mca_param_indices->algorithm_param_index, 0);
}
mca_param_indices->segsize_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"reduce_scatter_algorithm_segmentsize",
"Segment size in bytes used by default for reduce_scatter algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
false, false, 0, NULL);
coll_tuned_reduce_scatter_segment_size = 0;
mca_param_indices->segsize_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"reduce_scatter_algorithm_segmentsize",
"Segment size in bytes used by default for reduce_scatter algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_reduce_scatter_segment_size);
mca_param_indices->tree_fanout_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"reduce_scatter_algorithm_tree_fanout",
"Fanout for n-tree used for reduce_scatter algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
false, false,
ompi_coll_tuned_init_tree_fanout, /* get system wide default */
NULL);
coll_tuned_reduce_scatter_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
mca_param_indices->tree_fanout_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"reduce_scatter_algorithm_tree_fanout",
"Fanout for n-tree used for reduce_scatter algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_reduce_scatter_tree_fanout);
coll_tuned_reduce_scatter_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
mca_param_indices->chain_fanout_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"reduce_scatter_algorithm_chain_fanout",
"Fanout for chains used for reduce_scatter algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_reduce_scatter_chain_fanout);
mca_param_indices->chain_fanout_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"reduce_scatter_algorithm_chain_fanout",
"Fanout for chains used for reduce_scatter algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
false, false,
ompi_coll_tuned_init_chain_fanout, /* get system wide default */
NULL);
return (MPI_SUCCESS);
}

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -9,6 +10,8 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -29,6 +32,20 @@
#include "coll_tuned_topo.h"
#include "coll_tuned_util.h"
/* scatter algorithm variables */
static int coll_tuned_scatter_algorithm_count = 2;
static int coll_tuned_scatter_forced_algorithm = 0;
static int coll_tuned_scatter_segment_size = 0;
static int coll_tuned_scatter_tree_fanout;
static int coll_tuned_scatter_chain_fanout;
/* valid values for coll_tuned_scatter_forced_algorithm */
static mca_base_var_enum_value_t scatter_algorithms[] = {
{0, "ignore"},
{1, "basic_linear"},
{2, "binomial"},
{0, NULL}
};
int
ompi_coll_tuned_scatter_intra_binomial(void *sbuf, int scount,
@ -267,54 +284,64 @@ ompi_coll_tuned_scatter_intra_basic_linear(void *sbuf, int scount,
int
ompi_coll_tuned_scatter_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
{
int rc, max_alg = 2, requested_alg;
mca_base_var_enum_t *new_enum;
ompi_coll_tuned_forced_max_algorithms[SCATTER] = max_alg;
ompi_coll_tuned_forced_max_algorithms[SCATTER] = coll_tuned_scatter_algorithm_count;
rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version,
"scatter_algorithm_count",
"Number of scatter algorithms available",
false, true, max_alg, NULL);
(void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"scatter_algorithm_count",
"Number of scatter algorithms available",
MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_CONSTANT,
&coll_tuned_scatter_algorithm_count);
mca_param_indices->algorithm_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"scatter_algorithm",
"Which scatter algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 binomial.",
false, false, 0, NULL);
/* MPI_T: This variable should eventually be bound to a communicator */
coll_tuned_scatter_forced_algorithm = 0;
(void) mca_base_var_enum_create("coll_tuned_scatter_algorithms", scatter_algorithms, &new_enum);
mca_param_indices->algorithm_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"scatter_algorithm",
"Which scatter algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 binomial.",
MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_scatter_forced_algorithm);
OBJ_RELEASE(new_enum);
if (mca_param_indices->algorithm_param_index < 0) {
return mca_param_indices->algorithm_param_index;
}
mca_base_param_lookup_int(mca_param_indices->algorithm_param_index,
&(requested_alg));
if( 0 > requested_alg || requested_alg > max_alg ) {
if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) {
opal_output( 0, "Scatter algorithm #%d is not available (range [0..%d]). Switching back to ignore(0)\n",
requested_alg, max_alg );
}
mca_base_param_set_int( mca_param_indices->algorithm_param_index, 0);
}
mca_param_indices->segsize_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"scatter_algorithm_segmentsize",
"Segment size in bytes used by default for scatter algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
false, false, 0, NULL);
coll_tuned_scatter_segment_size = 0;
mca_param_indices->segsize_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"scatter_algorithm_segmentsize",
"Segment size in bytes used by default for scatter algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_scatter_segment_size);
mca_param_indices->tree_fanout_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"scatter_algorithm_tree_fanout",
"Fanout for n-tree used for scatter algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
false, false,
ompi_coll_tuned_init_tree_fanout, /* get system wide default */
NULL);
coll_tuned_scatter_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
mca_param_indices->tree_fanout_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"scatter_algorithm_tree_fanout",
"Fanout for n-tree used for scatter algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_scatter_tree_fanout);
mca_param_indices->chain_fanout_param_index
= mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"scatter_algorithm_chain_fanout",
"Fanout for chains used for scatter algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
false, false,
ompi_coll_tuned_init_chain_fanout, /* get system wide default */
NULL);
coll_tuned_scatter_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
mca_param_indices->chain_fanout_param_index=
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"scatter_algorithm_chain_fanout",
"Fanout for chains used for scatter algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&coll_tuned_scatter_chain_fanout);
return (MPI_SUCCESS);
}

Просмотреть файл

@ -30,7 +30,6 @@
#include <cuda.h>
#include "opal/align.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/datatype/opal_convertor.h"
#include "opal/datatype/opal_datatype_cuda.h"
#include "opal/util/output.h"
@ -182,6 +181,69 @@ static void cuda_dump_memhandle(int, void *, char *) __opal_attribute_unused__ ;
#endif /* OMPI_CUDA_SUPPORT_41 */
int mca_common_cuda_register_mca_variables(void)
{
static bool registered = false;
if (registered) {
return OMPI_SUCCESS;
}
registered = true;
/* Set different levels of verbosity in the cuda related code. */
mca_common_cuda_verbose = 0;
(void) mca_base_var_register("ompi", "mpi", "common_cuda", "verbose",
"Set level of common cuda verbosity",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_common_cuda_verbose);
/* Control whether system buffers get CUDA pinned or not. Allows for
* performance analysis. */
mca_common_cuda_register_memory = true;
(void) mca_base_var_register("ompi", "mpi", "common_cuda", "register_memory",
"Whether to cuMemHostRegister preallocated BTL buffers",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_common_cuda_register_memory);
/* Control whether we see warnings when CUDA memory registration fails. This is
* useful when CUDA support is configured in, but we are running a regular MPI
* application without CUDA. */
mca_common_cuda_warning = true;
(void) mca_base_var_register("ompi", "mpi", "common_cuda", "warning",
"Whether to print warnings when CUDA registration fails",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_common_cuda_warning);
#if OMPI_CUDA_SUPPORT_41
/* Use this flag to test async vs sync copies */
mca_common_cuda_async = 1;
(void) mca_base_var_register("ompi", "mpi", "common_cuda", "memcpy_async",
"Set to 0 to force CUDA sync copy instead of async",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_common_cuda_async);
/* Use this parameter to increase the number of outstanding events allows */
cuda_event_max = 200;
(void) mca_base_var_register("ompi", "mpi", "common_cuda", "event_max",
"Set number of oustanding CUDA events",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&cuda_event_max);
#endif /* OMPI_CUDA_SUPPORT_41 */
return OMPI_SUCCESS;
}
/**
* This function is registered with the OPAL CUDA support. In that way,
* we will complete initialization when OPAL detects the first GPU memory
@ -203,35 +265,17 @@ static int mca_common_cuda_init(opal_common_cuda_function_table_t *ftable)
return OMPI_SUCCESS;
}
/* Make sure this component's variables are registered */
mca_common_cuda_register_mca_variables();
ftable->gpu_is_gpu_buffer = &mca_common_cuda_is_gpu_buffer;
ftable->gpu_cu_memcpy_async = &mca_common_cuda_cu_memcpy_async;
ftable->gpu_cu_memcpy = &mca_common_cuda_cu_memcpy;
ftable->gpu_memmove = &mca_common_cuda_memmove;
/* Set different levels of verbosity in the cuda related code. */
id = mca_base_param_reg_int_name("mpi", "common_cuda_verbose",
"Set level of common cuda verbosity",
false, false, 0, &mca_common_cuda_verbose);
mca_common_cuda_output = opal_output_open(NULL);
opal_output_set_verbosity(mca_common_cuda_output, mca_common_cuda_verbose);
/* Control whether system buffers get CUDA pinned or not. Allows for
* performance analysis. */
id = mca_base_param_reg_int_name("mpi", "common_cuda_register_memory",
"Whether to cuMemHostRegister preallocated BTL buffers",
false, false,
(int) mca_common_cuda_register_memory, &value);
mca_common_cuda_register_memory = OPAL_INT_TO_BOOL(value);
/* Control whether we see warnings when CUDA memory registration fails. This is
* useful when CUDA support is configured in, but we are running a regular MPI
* application without CUDA. */
id = mca_base_param_reg_int_name("mpi", "common_cuda_warning",
"Whether to print warnings when CUDA registration fails",
false, false,
(int) mca_common_cuda_warning, &value);
mca_common_cuda_warning = OPAL_INT_TO_BOOL(value);
/* If we cannot load the libary, then disable support */
if (0 != mca_common_cuda_load_libcuda()) {
common_cuda_initialized = true;
@ -239,20 +283,6 @@ static int mca_common_cuda_init(opal_common_cuda_function_table_t *ftable)
return OMPI_ERROR;
}
#if OMPI_CUDA_SUPPORT_41
/* Use this flag to test async vs sync copies */
id = mca_base_param_reg_int_name("mpi", "common_cuda_memcpy_async",
"Set to 0 to force CUDA sync copy instead of async",
false, false, mca_common_cuda_async, &i);
mca_common_cuda_async = i;
/* Use this parameter to increase the number of outstanding events allows */
id = mca_base_param_reg_int_name("mpi", "common_cuda_event_max",
"Set number of oustanding CUDA events",
false, false, cuda_event_max, &i);
cuda_event_max = i;
#endif /* OMPI_CUDA_SUPPORT_41 */
/* Check to see if this process is running in a CUDA context. If
* so, all is good. If not, then disable registration of memory. */
res = cuFunc.cuCtxGetCurrent(&cuContext);

Просмотреть файл

@ -31,6 +31,8 @@ struct mca_mpool_common_cuda_reg_t {
};
typedef struct mca_mpool_common_cuda_reg_t mca_mpool_common_cuda_reg_t;
OMPI_DECLSPEC int mca_common_cuda_register_mca_variables(void);
OMPI_DECLSPEC void mca_common_cuda_register(void *ptr, size_t amount, char *msg);
OMPI_DECLSPEC void mca_common_cuda_unregister(void *ptr, char *msg);

Просмотреть файл

@ -27,7 +27,6 @@
#include <errno.h>
#include "opal/memoryhooks/memory.h"
#include "opal/util/output.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/runtime/params.h"

Просмотреть файл

@ -26,9 +26,12 @@
#include "opal/util/show_help.h"
#include "infiniband/verbs.h"
/* Global variables */
static ompi_common_ofacm_base_component_t **available = NULL;
static int num_available = 0;
static char *ompi_common_ofacm_cpc_include;
static char *ompi_common_ofacm_cpc_exclude;
/* Global variables */
int ompi_common_ofacm_base_verbose = 0; /* disabled by default */
char* ompi_common_ofacm_three_dim_torus = NULL;
bool cpc_explicitly_defined = false;
@ -196,6 +199,7 @@ ompi_common_ofacm_base_proc_t* ompi_common_ofacm_base_find_proc
}
return ret;
}
/*
* Register MCA parameters
*/
@ -203,7 +207,6 @@ int ompi_common_ofacm_base_register(mca_base_component_t *base)
{
int i, j, save;
char **temp = NULL, *string = NULL, *all_cpc_names = NULL;
char *cpc_include = NULL, *cpc_exclude = NULL;
if (ompi_common_ofacm_base_register_was_called) {
return OMPI_SUCCESS;
@ -224,30 +227,41 @@ int ompi_common_ofacm_base_register(mca_base_component_t *base)
"Method used to select OpenFabrics connections (valid values: %s)",
all_cpc_names);
mca_base_param_reg_string(base, "ofacm_cpc_include", string, false, false,
NULL, &cpc_include);
ompi_common_ofacm_cpc_include = NULL;
(void) mca_base_component_var_register(base, "ofacm_cpc_include", string,
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_common_ofacm_cpc_include);
free(string);
asprintf(&string,
"Method used to exclude OpenFabrics connections (valid values: %s)",
all_cpc_names);
mca_base_param_reg_string(base, "ofacm_cpc_exclude", string, false, false,
NULL, &cpc_exclude);
ompi_common_ofacm_cpc_exclude = NULL;
(void) mca_base_component_var_register(base, "ofacm_cpc_exclude", string,
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_common_ofacm_cpc_exclude);
free(string);
/* Register the name of the file containing the fabric's Service Levels (SL) */
mca_base_param_reg_string_name("common", "ofacm_three_dim_torus",
"The name of the file contating Service Level (SL) data for 3D-Torus cluster",
false, false, NULL, &ompi_common_ofacm_three_dim_torus);
ompi_common_ofacm_three_dim_torus = NULL;
(void) mca_base_var_register("ompi", "common", "ofacm", "three_dim_torus",
"The name of the file contating Service Level (SL) data for 3D-Torus cluster",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_common_ofacm_three_dim_torus);
mca_base_param_reg_int_name("common",
"ofacm_base_verbose",
"Verbosity level of the OFACM framework",
false, false,
0,
&ompi_common_ofacm_base_verbose);
ompi_common_ofacm_base_verbose = 0;
(void) mca_base_var_register("ompi", "common", "ofacm", "base_verbose",
"Verbosity level of the OFACM framework",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_common_ofacm_base_verbose);
/* Parse the if_[in|ex]clude paramters to come up with a list of
@ -256,9 +270,9 @@ int ompi_common_ofacm_base_register(mca_base_component_t *base)
/* If we have an "include" list, then find all those CPCs and put
them in available[] */
if (NULL != cpc_include) {
if (NULL != ompi_common_ofacm_cpc_include) {
cpc_explicitly_defined = true;
temp = opal_argv_split(cpc_include, ',');
temp = opal_argv_split(ompi_common_ofacm_cpc_include, ',');
for (save = j = 0; NULL != temp[j]; ++j) {
for (i = 0; NULL != all[i]; ++i) {
if (0 == strcmp(temp[j], all[i]->cbc_name)) {
@ -272,7 +286,7 @@ int ompi_common_ofacm_base_register(mca_base_component_t *base)
opal_show_help("help-mpi-common-ofacm-cpc-base.txt",
"cpc name not found", true,
"include", ompi_process_info.nodename,
"include", cpc_include, temp[j],
"include", ompi_common_ofacm_cpc_include, temp[j],
all_cpc_names);
opal_argv_free(temp);
free(all_cpc_names);
@ -284,9 +298,9 @@ int ompi_common_ofacm_base_register(mca_base_component_t *base)
/* Otherwise, if we have an "exclude" list, take all the CPCs that
are not in that list and put them in available[] */
else if (NULL != cpc_exclude) {
else if (NULL != ompi_common_ofacm_cpc_exclude) {
cpc_explicitly_defined = true;
temp = opal_argv_split(cpc_exclude, ',');
temp = opal_argv_split(ompi_common_ofacm_cpc_exclude, ',');
/* First: error check -- ensure that all the names are valid */
for (j = 0; NULL != temp[j]; ++j) {
for (i = 0; NULL != all[i]; ++i) {
@ -298,7 +312,7 @@ int ompi_common_ofacm_base_register(mca_base_component_t *base)
opal_show_help("help-mpi-common-ofacm-cpc-base.txt",
"cpc name not found", true,
"exclude", ompi_process_info.nodename,
"exclude", cpc_exclude, temp[j],
"exclude", ompi_common_ofacm_cpc_exclude, temp[j],
all_cpc_names);
opal_argv_free(temp);
free(all_cpc_names);

Просмотреть файл

@ -130,16 +130,13 @@ ompi_common_ofacm_base_component_t ompi_common_ofacm_oob = {
/* Open - this functions sets up any oob specific commandline params */
static void oob_component_register(void)
{
mca_base_param_reg_int_name("common",
"ofacm_connect_oob_priority",
"The selection method priority for oob",
false, false, oob_priority, &oob_priority);
if (oob_priority > 100) {
oob_priority = 100;
} else if (oob_priority < -1) {
oob_priority = -1;
}
oob_priority = 50;
(void) mca_base_var_register("ompi", "common", "ofacm", "connect_oob_priority",
"The selection method priority for oob",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&oob_priority);
}
/*
@ -151,6 +148,12 @@ static int oob_component_query(ompi_common_ofacm_base_dev_desc_t *dev,
{
int rc;
if (oob_priority > 100) {
oob_priority = 100;
} else if (oob_priority < -1) {
oob_priority = -1;
}
/* If we have the transport_type member, check to ensure we're on
IB (this CPC will not work with iWarp). If we do not have the
transport_type member, then we must be < OFED v1.2, and

Просмотреть файл

@ -1378,6 +1378,12 @@ static int xoob_component_query(ompi_common_ofacm_base_dev_desc_t *dev,
ompi_common_ofacm_xoob_module_t *xcpc; /* xoob cpc module */
ompi_common_ofacm_base_module_t *bcpc; /* base cpc module */
if (xoob_priority > 100) {
xoob_priority = 100;
} else if (xoob_priority < -1) {
xoob_priority = -1;
}
if (!(dev->capabilities & OMPI_COMMON_OFACM_XRC_ONLY)) {
OFACM_VERBOSE(("openib BTL: xoob CPC only supported with XRC receive queues; skipped on device %s",
ibv_get_device_name(dev->ib_dev)));
@ -1444,16 +1450,12 @@ static int xoob_component_query(ompi_common_ofacm_base_dev_desc_t *dev,
/* Open - this functions sets up any xoob specific commandline params */
static void xoob_component_register(void)
{
mca_base_param_reg_int_name("common",
"ofacm_connect_xoob_priority",
"The selection method priority for xoob",
false, false, xoob_priority, &xoob_priority);
if (xoob_priority > 100) {
xoob_priority = 100;
} else if (xoob_priority < -1) {
xoob_priority = -1;
}
xoob_priority = 60;
(void) mca_base_var_register("ompi", "common", "ofacm", "connect_xoob_priority",
"The selection method priority for xoob",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &xoob_priority);
}
/*

Просмотреть файл

@ -10,10 +10,10 @@
#include "ompi_config.h"
#include "opal/mca/base/mca_base_param.h"
#include "common_verbs.h"
#include "opal/mca/base/mca_base_var.h"
/***********************************************************************/
static bool registered = false;
@ -23,34 +23,25 @@ bool ompi_common_verbs_warn_nonexistent_if = true;
static void register_internal(void)
{
int ival;
ompi_common_verbs_warn_nonexistent_if = true;
warn_nonexistent_if_index =
mca_base_param_reg_int_name("ompi_common_verbs",
"warn_nonexistent_if",
"Warn if non-existent devices and/or ports are specified in device include/exclude MCA parameters "
"(0 = do not warn; any other value = warn)",
false, false,
(int) ompi_common_verbs_warn_nonexistent_if,
&ival);
ompi_common_verbs_warn_nonexistent_if = (bool) ival;
mca_base_var_register("ompi", "ompi_common", "verbs", "warn_nonexistent_if",
"Warn if non-existent devices and/or ports are specified in device include/exclude MCA parameters "
"(0 = do not warn; any other value = warn)",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
&ompi_common_verbs_warn_nonexistent_if);
registered = true;
}
void ompi_common_verbs_mca_register(mca_base_component_t *component)
{
int ival;
if (!registered) {
register_internal();
}
/* Make synonyms for the common_verbs MCA params. Need to look up
the value again, because a new/different value may have been
set by the new synonym name. */
mca_base_param_reg_syn(warn_nonexistent_if_index, component,
"warn_nonexistent_if", false);
mca_base_param_lookup_int(warn_nonexistent_if_index, &ival);
ompi_common_verbs_warn_nonexistent_if = (bool) ival;
/* Make synonym for the common_verbs MCA params. */
mca_base_var_register_synonym(warn_nonexistent_if_index, "ompi", component->mca_type_name,
component->mca_component_name, "warn_nonexistent_if", 0);
}

Просмотреть файл

@ -19,8 +19,6 @@
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/mca/crcp/crcp.h"
#include "ompi/mca/crcp/base/base.h"

Просмотреть файл

@ -29,7 +29,6 @@
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/mca/crs/crs.h"
#include "opal/mca/crs/base/base.h"

Просмотреть файл

@ -20,9 +20,6 @@
#include "opal/util/output.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/mca/crcp/crcp.h"
#include "ompi/mca/crcp/base/base.h"
@ -45,8 +42,6 @@ ompi_crcp_base_component_t ompi_crcp_base_selected_component;
*/
int ompi_crcp_base_open(void)
{
char *str_value = NULL;
ompi_crcp_base_output = opal_output_open(NULL);
/*
@ -57,13 +52,6 @@ int ompi_crcp_base_open(void)
* Note: Set the default to NULL here so ompi_info will work correctly,
* The 'real' default is set in base_select.c
*/
mca_base_param_reg_string_name("crcp", NULL,
"Which CRCP component to use (empty = auto-select)",
false, false,
NULL, &str_value);
if( NULL != str_value ) {
free(str_value);
}
/* Open up all available components */
if (OPAL_SUCCESS !=

Просмотреть файл

@ -20,8 +20,6 @@
#include "opal/util/output.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/mca/crcp/crcp.h"
#include "ompi/mca/crcp/base/base.h"
@ -122,17 +120,27 @@ int ompi_crcp_base_select(void)
int ret, exit_status = OMPI_SUCCESS;
ompi_crcp_base_component_t *best_component = NULL;
ompi_crcp_base_module_t *best_module = NULL;
char *include_list = NULL;
const char *include_list = NULL;
const char **selection_value;
int var_id;
/*
* Register the framework MCA param and look up include list
*/
mca_base_param_reg_string_name("crcp", NULL,
"Which CRCP component to use (empty = auto-select)",
false, false,
strdup("none"), &include_list);
var_id = mca_base_var_find("ompi", "crcp", NULL, NULL);
if(NULL != include_list && 0 == strncmp(include_list, "none", strlen("none")) ){
/* NTH: The old parameter code here set the selection to none if no file value
or environment value was set. This effectively means include_list is never NULL. */
selection_value = NULL;
(void) mca_base_var_get_value(var_id, &selection_value, NULL, NULL);
if (NULL == selection_value || NULL == selection_value[0]) {
(void) mca_base_var_set_value(var_id, "none", 5, MCA_BASE_VAR_SOURCE_DEFAULT, NULL);
include_list = "none";
} else {
include_list = selection_value[0];
}
if(0 == strncmp(include_list, "none", strlen("none")) ){
opal_output_verbose(10, ompi_crcp_base_output,
"crcp:select: Using %s component",
include_list);
@ -172,10 +180,6 @@ int ompi_crcp_base_select(void)
}
cleanup:
if( NULL != include_list ) {
free(include_list);
include_list = NULL;
}
return exit_status;
}

Просмотреть файл

@ -45,7 +45,7 @@ BEGIN_C_DECLS
/*
* Local variables
*/
extern int timing_enabled;
extern bool timing_enabled;
/*
* Module functions

Просмотреть файл

@ -24,7 +24,6 @@
#include "opal/class/opal_bitmap.h"
#include "opal/mca/event/event.h"
#include "opal/util/opal_environ.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"

Просмотреть файл

@ -28,11 +28,12 @@
const char *ompi_crcp_bkmrk_component_version_string =
"OMPI CRCP bkmrk MCA component version " OMPI_VERSION;
int timing_enabled = 0;
bool timing_enabled;
/*
* Local functionality
*/
static int crcp_bkmrk_register(void);
static int crcp_bkmrk_open(void);
static int crcp_bkmrk_close(void);
@ -57,7 +58,8 @@ ompi_crcp_bkmrk_component_t mca_crcp_bkmrk_component = {
/* Component open and close functions */
crcp_bkmrk_open,
crcp_bkmrk_close,
ompi_crcp_bkmrk_component_query
ompi_crcp_bkmrk_component_query,
crcp_bkmrk_register
},
{
/* The component is checkpoint ready */
@ -73,27 +75,43 @@ ompi_crcp_bkmrk_component_t mca_crcp_bkmrk_component = {
}
};
static int crcp_bkmrk_open(void)
static int crcp_bkmrk_register(void)
{
int val;
/*
* This should be the last componet to ever get used since
* it doesn't do anything.
*/
mca_base_param_reg_int(&mca_crcp_bkmrk_component.super.base_version,
"priority",
"Priority of the CRCP bkmrk component",
false, false,
mca_crcp_bkmrk_component.super.priority,
&mca_crcp_bkmrk_component.super.priority);
mca_base_param_reg_int(&mca_crcp_bkmrk_component.super.base_version,
"verbose",
"Verbose level for the CRCP bkmrk component",
false, false,
mca_crcp_bkmrk_component.super.verbose,
&mca_crcp_bkmrk_component.super.verbose);
mca_crcp_bkmrk_component.super.priority = 20;
(void) mca_base_component_var_register(&mca_crcp_bkmrk_component.super.base_version,
"priority",
"Priority of the CRCP bkmrk component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_crcp_bkmrk_component.super.priority);
mca_crcp_bkmrk_component.super.verbose = 0;
(void) mca_base_component_var_register(&mca_crcp_bkmrk_component.super.base_version,
"verbose",
"Verbose level for the CRCP bkmrk component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_crcp_bkmrk_component.super.verbose);
timing_enabled = false;
(void) mca_base_component_var_register(&mca_crcp_bkmrk_component.super.base_version,
"timing", "Enable Performance timing",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&timing_enabled);
return OMPI_SUCCESS;
}
static int crcp_bkmrk_open(void)
{
/* If there is a custom verbose level for this component than use it
* otherwise take our parents level and output channel
*/
@ -105,14 +123,6 @@ static int crcp_bkmrk_open(void)
mca_crcp_bkmrk_component.super.output_handle = ompi_crcp_base_output;
}
mca_base_param_reg_int(&mca_crcp_bkmrk_component.super.base_version,
"timing",
"Enable Performance timing",
false, false,
0,
&val);
timing_enabled = val;
/*
* Debug Output
*/

Просмотреть файл

@ -25,7 +25,6 @@
#include "opal/mca/base/base.h"
#include "opal/util/output.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/util/opal_environ.h"

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше