1
1

Merge pull request #8220 from devreal/fix-coll-base-preference

Fix preference treatment in coll/base
Этот коммит содержится в:
Raghu Raja 2020-11-20 08:14:37 -08:00 коммит произвёл GitHub
родитель 28779321a7 1cdc85564e
Коммит 38d2f12112
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
7 изменённых файлов: 29 добавлений и 35 удалений

Просмотреть файл

@ -3,9 +3,9 @@
* of Tennessee Research Foundation. All rights
* reserved.
* $COPYRIGHT$
*
*
* Additional copyrights may follow
*
*
* $HEADER$
*/
@ -146,7 +146,7 @@ mca_coll_base_module_t *ompi_coll_adapt_comm_query(struct ompi_communicator_t *
/* Get the priority level attached to this module.
If priority is less than or equal to 0, then the module is unavailable. */
*priority = mca_coll_adapt_component.adapt_priority;
if (mca_coll_adapt_component.adapt_priority <= 0) {
if (mca_coll_adapt_component.adapt_priority < 0) {
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
"coll:adapt:comm_query (%d/%s): priority too low; "
"disqualifying myself",

Просмотреть файл

@ -337,6 +337,7 @@ static opal_list_t *check_components(opal_list_t * components,
ompi_communicator_t * comm)
{
int priority, flag;
int count_include = 0;
const mca_base_component_t *component;
mca_base_component_list_item_t *cli;
mca_coll_base_module_2_3_0_t *module;
@ -363,7 +364,8 @@ static opal_list_t *check_components(opal_list_t * components,
if(NULL == coll_argv) {
goto proceed_to_select;
}
int idx2, count_include = opal_argv_count(coll_argv);
int idx2;
count_include = opal_argv_count(coll_argv);
/* Allocate the coll_include argv */
coll_include = (char**)malloc((count_include + 1) * sizeof(char*));
coll_include[count_include] = NULL; /* NULL terminated array */
@ -385,15 +387,6 @@ static opal_list_t *check_components(opal_list_t * components,
}
coll_include[idx] = coll_argv[idx];
}
/* Reverse the order of the coll_inclide argv to faciliate the ordering of
* the selected components reverse.
*/
for( idx2 = 0; idx2 < (count_include - 1); idx2++ ) {
char* temp = coll_include[idx2];
coll_include[idx2] = coll_include[count_include - 1];
coll_include[count_include - 1] = temp;
count_include--;
}
}
proceed_to_select:
/* Make a list of the components that query successfully */
@ -453,14 +446,17 @@ static opal_list_t *check_components(opal_list_t * components,
/* For all valid component reorder them not on their provided priorities but on
* the order requested in the info key. As at this point the coll_include is
* already ordered backward we can simply prepend the components.
* already ordered backward we can simply append the components.
* Note that the last element in selectable will have the highest priorty.
*/
mca_coll_base_avail_coll_t *item, *item_next;
OPAL_LIST_FOREACH_SAFE(item, item_next,
selectable, mca_coll_base_avail_coll_t) {
if( component_in_argv(coll_include, item->ac_component_name) ) {
opal_list_remove_item(selectable, &item->super);
opal_list_prepend(selectable, &item->super);
for (int idx = count_include-1; idx >= 0; --idx) {
mca_coll_base_avail_coll_t *item;
OPAL_LIST_FOREACH(item, selectable, mca_coll_base_avail_coll_t) {
if (0 == strcmp(item->ac_component_name, coll_include[idx])) {
opal_list_remove_item(selectable, &item->super);
opal_list_append(selectable, &item->super);
break;
}
}
}

Просмотреть файл

@ -39,7 +39,6 @@ ompi_coll_han_components available_components[COMPONENTS_COUNT] = {
{ LIBNBC, "libnbc", NULL },
{ TUNED, "tuned", NULL },
{ SM, "sm", NULL },
{ SHARED, "shared", NULL },
{ ADAPT, "adapt", NULL },
{ HAN, "han", NULL }
};
@ -179,12 +178,12 @@ static int han_register(void)
cs->han_bcast_low_module = 0;
(void) mca_base_component_var_register(c, "bcast_low_module",
"low level module for bcast, 0 sm, 1 solo",
"low level module for bcast, 0 tuned, 1 sm",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_bcast_low_module);
cs->han_reduce_segsize = 524288;
cs->han_reduce_segsize = 65536;
(void) mca_base_component_var_register(c, "reduce_segsize",
"segment size for reduce",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
@ -200,11 +199,11 @@ static int han_register(void)
cs->han_reduce_low_module = 0;
(void) mca_base_component_var_register(c, "reduce_low_module",
"low level module for allreduce, 0 sm, 1 shared",
"low level module for allreduce, 0 tuned, 1 sm",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_reduce_low_module);
cs->han_allreduce_segsize = 524288;
cs->han_allreduce_segsize = 65536;
(void) mca_base_component_var_register(c, "allreduce_segsize",
"segment size for allreduce",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
@ -220,7 +219,7 @@ static int han_register(void)
cs->han_allreduce_low_module = 0;
(void) mca_base_component_var_register(c, "allreduce_low_module",
"low level module for allreduce, 0 sm, 1 shared",
"low level module for allreduce, 0 tuned, 1 sm",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_allreduce_low_module);
@ -234,7 +233,7 @@ static int han_register(void)
cs->han_allgather_low_module = 0;
(void) mca_base_component_var_register(c, "allgather_low_module",
"low level module for allgather, 0 sm, 1 shared",
"low level module for allgather, 0 tuned, 1 sm",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_allgather_low_module);
@ -248,7 +247,7 @@ static int han_register(void)
cs->han_gather_low_module = 0;
(void) mca_base_component_var_register(c, "gather_low_module",
"low level module for gather, 0 sm, 1 shared",
"low level module for gather, 0 tuned, 1 sm",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_gather_low_module);
@ -262,7 +261,7 @@ static int han_register(void)
cs->han_scatter_low_module = 0;
(void) mca_base_component_var_register(c, "scatter_low_module",
"low level module for scatter, 0 sm, 1 shared",
"low level module for scatter, 0 tuned, 1 sm",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_scatter_low_module);

Просмотреть файл

@ -102,7 +102,6 @@ typedef enum COMPONENTS {
LIBNBC,
TUNED,
SM,
SHARED,
ADAPT,
HAN,
COMPONENTS_COUNT

Просмотреть файл

@ -188,7 +188,7 @@ mca_coll_han_comm_query(struct ompi_communicator_t * comm, int *priority)
/* Get the priority level attached to this module. If priority is less
* than or equal to 0, then the module is unavailable. */
*priority = mca_coll_han_component.han_priority;
if (mca_coll_han_component.han_priority <= 0) {
if (mca_coll_han_component.han_priority < 0) {
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
"coll:han:comm_query (%d/%s): priority too low; disqualifying myself",
comm->c_contextid, comm->c_name);

Просмотреть файл

@ -258,7 +258,7 @@ int mca_coll_han_comm_create(struct ompi_communicator_t *comm,
* Upgrade sm module priority to set up low_comms[0] with sm module
* This sub-communicator contains the ranks that share my node.
*/
opal_info_set(&comm_info, "ompi_comm_coll_preference", "sm,^han");
opal_info_set(&comm_info, "ompi_comm_coll_preference", "tuned,^han");
ompi_comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0,
&comm_info, &(low_comms[0]));
@ -272,7 +272,7 @@ int mca_coll_han_comm_create(struct ompi_communicator_t *comm,
* Upgrade shared module priority to set up low_comms[1] with shared module
* This sub-communicator contains the ranks that share my node.
*/
opal_info_set(&comm_info, "ompi_comm_coll_preference", "shared,^han");
opal_info_set(&comm_info, "ompi_comm_coll_preference", "sm,^han");
ompi_comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0,
&comm_info, &(low_comms[1]));

Просмотреть файл

@ -182,10 +182,10 @@ mca_coll_sm_comm_query(struct ompi_communicator_t *comm, int *priority)
/* Get the priority level attached to this module. If priority is less
* than or equal to 0, then the module is unavailable. */
*priority = mca_coll_sm_component.sm_priority;
if (mca_coll_sm_component.sm_priority <= 0) {
if (mca_coll_sm_component.sm_priority < 0) {
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
"coll:sm:comm_query (%d/%s): priority too low; disqualifying myself", comm->c_contextid, comm->c_name);
return NULL;
return NULL;
}
sm_module = OBJ_NEW(mca_coll_sm_module_t);