coll/ml: increase the number of allowed processes in a local reduce and
add checks to see if the bcol module can support allreduce. cmr=v1.7.5:reviewer=manjugv This commit was SVN r31096.
Этот коммит содержится в:
родитель
fba1edbf14
Коммит
3f469d08e7
@ -32,7 +32,7 @@ int bcol_basesmuma_allreduce_init(mca_bcol_base_module_t *super)
|
|||||||
|
|
||||||
comm_attribs.bcoll_type = BCOL_ALLREDUCE;
|
comm_attribs.bcoll_type = BCOL_ALLREDUCE;
|
||||||
comm_attribs.comm_size_min = 0;
|
comm_attribs.comm_size_min = 0;
|
||||||
comm_attribs.comm_size_max = 16;
|
comm_attribs.comm_size_max = 64;
|
||||||
comm_attribs.data_src = DATA_SRC_KNOWN;
|
comm_attribs.data_src = DATA_SRC_KNOWN;
|
||||||
|
|
||||||
/* selection logic at the ml level specifies a
|
/* selection logic at the ml level specifies a
|
||||||
|
@ -38,7 +38,8 @@ int bcol_basesmuma_reduce_init(mca_bcol_base_module_t *super)
|
|||||||
|
|
||||||
comm_attribs.bcoll_type = BCOL_REDUCE;
|
comm_attribs.bcoll_type = BCOL_REDUCE;
|
||||||
comm_attribs.comm_size_min = 0;
|
comm_attribs.comm_size_min = 0;
|
||||||
comm_attribs.comm_size_max = 16;
|
/* NTH: is there any reason for this restriction */
|
||||||
|
comm_attribs.comm_size_max = 64;
|
||||||
comm_attribs.data_src = DATA_SRC_KNOWN;
|
comm_attribs.data_src = DATA_SRC_KNOWN;
|
||||||
comm_attribs.waiting_semantics = NON_BLOCKING;
|
comm_attribs.waiting_semantics = NON_BLOCKING;
|
||||||
|
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
|
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
|
* Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
|
||||||
* Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
|
* Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
|
||||||
|
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
|
||||||
|
* reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -147,7 +150,6 @@ static int mca_coll_ml_build_allreduce_schedule(
|
|||||||
goto Allreduce_Setup_Error;
|
goto Allreduce_Setup_Error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
for (i = 0; i < num_up_levels; i++) {
|
for (i = 0; i < num_up_levels; i++) {
|
||||||
comp_fn = &schedule->component_functions[i];
|
comp_fn = &schedule->component_functions[i];
|
||||||
comp_fn->h_level = i; /* hierarchy level */
|
comp_fn->h_level = i; /* hierarchy level */
|
||||||
@ -160,6 +162,10 @@ static int mca_coll_ml_build_allreduce_schedule(
|
|||||||
|
|
||||||
comp_fn->bcol_function =
|
comp_fn->bcol_function =
|
||||||
bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_REDUCE][bcol_func_index][0][0];
|
bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_REDUCE][bcol_func_index][0][0];
|
||||||
|
if (NULL == comp_fn->bcol_function) {
|
||||||
|
/* if there isn't a bcol function for this then we can't continue */
|
||||||
|
goto Allreduce_Setup_Error;
|
||||||
|
}
|
||||||
|
|
||||||
comp_fn->task_comp_fn = NULL;
|
comp_fn->task_comp_fn = NULL;
|
||||||
|
|
||||||
@ -176,6 +182,8 @@ static int mca_coll_ml_build_allreduce_schedule(
|
|||||||
comp_fn->h_level = nfn; /* hierarchy level */
|
comp_fn->h_level = nfn; /* hierarchy level */
|
||||||
bcol_module = GET_BCOL(topo_info, nfn);
|
bcol_module = GET_BCOL(topo_info, nfn);
|
||||||
|
|
||||||
|
assert (NULL != bcol_module);
|
||||||
|
|
||||||
/* strcpy (comp_fn->fn_name, "ALLREDUCE_SMALL_DATA"); */
|
/* strcpy (comp_fn->fn_name, "ALLREDUCE_SMALL_DATA"); */
|
||||||
|
|
||||||
/* The allreduce should depend on the reduce */
|
/* The allreduce should depend on the reduce */
|
||||||
@ -183,6 +191,10 @@ static int mca_coll_ml_build_allreduce_schedule(
|
|||||||
comp_fn->num_dependencies = 0;
|
comp_fn->num_dependencies = 0;
|
||||||
comp_fn->bcol_function =
|
comp_fn->bcol_function =
|
||||||
bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_ALLREDUCE][bcol_func_index][0][0];
|
bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_ALLREDUCE][bcol_func_index][0][0];
|
||||||
|
if (NULL == comp_fn->bcol_function) {
|
||||||
|
/* if there isn't a bcol function for this then we can't continue */
|
||||||
|
goto Allreduce_Setup_Error;
|
||||||
|
}
|
||||||
|
|
||||||
comp_fn->task_comp_fn = NULL;
|
comp_fn->task_comp_fn = NULL;
|
||||||
|
|
||||||
@ -200,6 +212,8 @@ static int mca_coll_ml_build_allreduce_schedule(
|
|||||||
comp_fn->h_level = i; /* hierarchy level */
|
comp_fn->h_level = i; /* hierarchy level */
|
||||||
bcol_module = GET_BCOL(topo_info, i);
|
bcol_module = GET_BCOL(topo_info, i);
|
||||||
|
|
||||||
|
assert (NULL != bcol_module);
|
||||||
|
|
||||||
/* strcpy (comp_fn->fn_name, "ALLREDUCE_SMALL_DATA"); */
|
/* strcpy (comp_fn->fn_name, "ALLREDUCE_SMALL_DATA"); */
|
||||||
|
|
||||||
comp_fn->num_dependent_tasks = 0;
|
comp_fn->num_dependent_tasks = 0;
|
||||||
@ -207,6 +221,10 @@ static int mca_coll_ml_build_allreduce_schedule(
|
|||||||
|
|
||||||
comp_fn->bcol_function =
|
comp_fn->bcol_function =
|
||||||
bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_BCAST][bcol_func_index][0][0];
|
bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_BCAST][bcol_func_index][0][0];
|
||||||
|
if (NULL == comp_fn->bcol_function) {
|
||||||
|
/* if there isn't a bcol function for this then we can't continue */
|
||||||
|
goto Allreduce_Setup_Error;
|
||||||
|
}
|
||||||
|
|
||||||
comp_fn->task_comp_fn = NULL;
|
comp_fn->task_comp_fn = NULL;
|
||||||
|
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user