1
1

coll/ml: update the coll_ml_enable_fragmentation variable to support the

option to autodetect whether fragmentation should be enabled

cmr=v1.7.3:ticket=trac:3717

This commit was SVN r29065.

The following Trac tickets were found above:
  Ticket 3717 --> https://svn.open-mpi.org/trac/ompi/ticket/3717
Этот коммит содержится в:
Nathan Hjelm 2013-08-27 16:36:54 +00:00
родитель c9a25465da
Коммит f5495ace48
5 изменённых файлов: 82 добавлений и 20 удалений

Просмотреть файл

@ -480,9 +480,12 @@ mca_bcol_basesmuma_comm_query(mca_sbgp_base_module_t *module, int *num_modules)
sm_module->super.supported_mode = 0;
/* NTH: this is not set anywhere on the trunk as of 08/13/13 */
#if 0
if (module->use_hdl) {
sm_module->super.supported_mode = MCA_BCOL_BASE_ZERO_COPY;
}
#endif
/* Initializes portals library required for basesmuma large message */
#ifdef __PORTALS_AVAIL__

Просмотреть файл

@ -525,8 +525,8 @@ struct mca_coll_ml_component_t {
/* use hdl_framework */
bool use_hdl_bcast;
/* Enable / Disable fragmentation */
bool enable_fragmentation;
/* Enable / Disable fragmentation (0 - off, 1 - on, 2 - auto) */
int enable_fragmentation;
/* Use sequential bcast algorithm */
bool use_sequential_bcast;

Просмотреть файл

@ -276,9 +276,9 @@ static int set_section_name(section_config_t *section_config)
void mca_coll_ml_reset_config(per_collective_configuration_t *config)
{
config->topology_id = ML_UNDEFINED;
config->threshold = ML_UNDEFINED;;
config->algorithm_id = ML_UNDEFINED;;
config->fragmentation_enabled = ML_UNDEFINED;;
config->threshold = ML_UNDEFINED;
config->algorithm_id = ML_UNDEFINED;
config->fragmentation_enabled = ML_UNDEFINED;
}
static void reset_section(section_config_t *section_cf)
@ -369,12 +369,12 @@ static int parse_fragmentation_key(section_config_t *section, char *value)
}
/* Save configuration that have been collected so far */
static void save_settings(coll_config_t *coll_config)
static int save_settings(coll_config_t *coll_config)
{
per_collective_configuration_t *cf;
if (ML_UNDEFINED == coll_config->coll_id || ML_UNDEFINED == coll_config->section.section_id) {
return;
return OMPI_ERROR;
}
cf = &mca_coll_ml_component.coll_config[coll_config->coll_id][coll_config->section.section_id];
@ -383,6 +383,8 @@ static void save_settings(coll_config_t *coll_config)
cf->threshold = coll_config->section.config.threshold;
cf->algorithm_id = coll_config->section.config.algorithm_id;
cf->fragmentation_enabled = coll_config->section.config.fragmentation_enabled;
return OMPI_SUCCESS;
}
/*
@ -495,7 +497,7 @@ static int parse_file(char *filename)
{
int val;
int ret = OMPI_SUCCESS;
bool first_section = true, first_coll = true;
coll_config_t coll_config;
memset (&coll_config, 0, sizeof (coll_config));
@ -521,11 +523,21 @@ static int parse_file(char *filename)
break;
case COLL_ML_CONFIG_PARSE_COLLECTIVE:
/* dump all the information to last section that was defined */
save_settings(&coll_config);
if (!first_coll) {
ret = save_settings(&coll_config);
if (OMPI_SUCCESS != ret) {
ML_ERROR(("Error in syntax for collective %s", coll_config.coll_name));
goto cleanup;
}
}
/* reset collective config */
reset_collective(&coll_config);
first_coll = false;
first_section = true;
ret = set_collective_name(&coll_config);
if (OMPI_SUCCESS != ret) {
goto cleanup;
@ -538,8 +550,17 @@ static int parse_file(char *filename)
goto cleanup;
}
/* dump all the information to last section that was defined */
save_settings(&coll_config);
if (!first_section) {
/* dump all the information to last section that was defined */
ret = save_settings(&coll_config);
if (OMPI_SUCCESS != ret) {
ML_ERROR(("Error in syntax for collective %s section %s", coll_config.coll_name,
coll_config.section.section_name));
goto cleanup;
}
}
first_section = false;
/* reset all section values */
reset_section(&coll_config.section);

Просмотреть файл

@ -1,6 +1,9 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
* Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -41,6 +44,15 @@ enum {
REGSTR_MAX = 0x88
};
/*
* Enumerators
*/
mca_base_var_enum_value_t fragmentation_enable_enum[] = {
{0, "disable"},
{1, "enable"},
{2, "auto"}
};
/*
* utility routine for string parameter registration
*/
@ -172,6 +184,7 @@ static int mca_coll_ml_verify_params(void)
int mca_coll_ml_register_params(void)
{
mca_base_var_enum_t *new_enum;
int ret, tmp;
char *str = NULL;
@ -241,9 +254,21 @@ int mca_coll_ml_register_params(void)
"Alltoall disabling",
false, &mca_coll_ml_component.disable_alltoall));
CHECK(reg_bool("enable_fragmentation", NULL,
"Disable/Enable fragmentation for large messages",
false, &mca_coll_ml_component.enable_fragmentation));
tmp = mca_base_var_enum_create ("coll_ml_enable_fragmentation_enum", fragmentation_enable_enum, &new_enum);
if (OPAL_SUCCESS != ret) {
return tmp;
}
/* default to auto-enable fragmentation */
mca_coll_ml_component.enable_fragmentation = 2;
tmp = mca_base_component_var_register (&mca_coll_ml_component.super.collm_version, "enable_fragmentation",
"Disable/Enable fragmentation for large messages", MCA_BASE_VAR_TYPE_INT,
new_enum, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_ml_component.enable_fragmentation);
if (0 > tmp) {
ret = tmp;
}
OBJ_RELEASE(new_enum);
CHECK(reg_int("use_brucks_smsg_alltoall", NULL,
"Use Bruck's Algo for Small Msg Alltoall"

Просмотреть файл

@ -3106,14 +3106,18 @@ static int mca_coll_ml_need_multi_topo(int bcol_collective)
static int setup_bcast_table(mca_coll_ml_module_t *module)
{
mca_coll_ml_component_t *cm = &mca_coll_ml_component;
bool has_zero_copy;
/* setup bcast index table */
if (cm->use_static_bcast) {
module->bcast_fn_index_table[0] = ML_BCAST_SMALL_DATA_KNOWN;
if (cm->enable_fragmentation) {
has_zero_copy = !!(MCA_BCOL_BASE_ZERO_COPY &
module->coll_ml_bcast_functions[ML_BCAST_LARGE_DATA_KNOWN]->topo_info->all_bcols_mode);
if (1 == cm->enable_fragmentation || (2 == cm->enable_fragmentation && !has_zero_copy)) {
module->bcast_fn_index_table[1] = ML_BCAST_SMALL_DATA_KNOWN;
} else if (!(MCA_BCOL_BASE_ZERO_COPY &
module->coll_ml_bcast_functions[ML_BCAST_LARGE_DATA_KNOWN]->topo_info->all_bcols_mode)) {
} else if (!has_zero_copy) {
ML_ERROR(("ML couldn't be used: because the mca param coll_ml_enable_fragmentation "
"was set to zero and there is a bcol doesn't support zero copy method."));
return OMPI_ERROR;
@ -3122,10 +3126,19 @@ static int setup_bcast_table(mca_coll_ml_module_t *module)
}
} else {
module->bcast_fn_index_table[0] = ML_BCAST_SMALL_DATA_UNKNOWN;
if (cm->enable_fragmentation) {
if (NULL == module->coll_ml_bcast_functions[ML_BCAST_LARGE_DATA_UNKNOWN]) {
ML_ERROR(("ML couldn't be used: because the mca param coll_ml_use_static_bcast was set "
"to zero and no function is available."));
return OMPI_ERROR;
}
has_zero_copy = !!(MCA_BCOL_BASE_ZERO_COPY &
module->coll_ml_bcast_functions[ML_BCAST_LARGE_DATA_UNKNOWN]->topo_info->all_bcols_mode);
if (1 == cm->enable_fragmentation || (2 == cm->enable_fragmentation && !has_zero_copy)) {
module->bcast_fn_index_table[1] = ML_BCAST_SMALL_DATA_UNKNOWN;
} else if (!(MCA_BCOL_BASE_ZERO_COPY &
module->coll_ml_bcast_functions[ML_BCAST_LARGE_DATA_UNKNOWN]->topo_info->all_bcols_mode)) {
} else if (!has_zero_copy) {
ML_ERROR(("ML couldn't be used: because the mca param coll_ml_enable_fragmentation "
"was set to zero and there is a bcol doesn't support zero copy method."));
return OMPI_ERROR;