coll/ml: fix some warnings and the spelling of indices
This commit fixes one warning that should have caused coll/ml to segfault on reduce. The fix should be correct but we will continue to investigate. cmr=v1.7.5:ticket=trac:4158 This commit was SVN r30477. The following Trac tickets were found above: Ticket 4158 --> https://svn.open-mpi.org/trac/ompi/ticket/4158
Этот коммит содержится в:
родитель
700e97cf6a
Коммит
afae924e29
@ -56,7 +56,6 @@ int bcol_basesmuma_k_nomial_gather_init(bcol_function_args_t *input_args,
|
|||||||
coll_ml_function_t *c_input_args)
|
coll_ml_function_t *c_input_args)
|
||||||
{
|
{
|
||||||
/* local variables */
|
/* local variables */
|
||||||
int group_size;
|
|
||||||
int leading_dim, buff_idx, idx;
|
int leading_dim, buff_idx, idx;
|
||||||
int src, i, j, k_temp1, k_temp2;
|
int src, i, j, k_temp1, k_temp2;
|
||||||
int pseudo_root, proxy_root, pseudo_base_adj;
|
int pseudo_root, proxy_root, pseudo_base_adj;
|
||||||
@ -122,7 +121,6 @@ int bcol_basesmuma_k_nomial_gather_init(bcol_function_args_t *input_args,
|
|||||||
/* Get addressing information */
|
/* Get addressing information */
|
||||||
my_rank = bcol_module->super.sbgp_partner_module->my_index;
|
my_rank = bcol_module->super.sbgp_partner_module->my_index;
|
||||||
|
|
||||||
group_size = bcol_module->colls_no_user_data.size_of_group;
|
|
||||||
leading_dim=bcol_module->colls_no_user_data.size_of_group;
|
leading_dim=bcol_module->colls_no_user_data.size_of_group;
|
||||||
idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
|
idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
|
||||||
data_buffs=(volatile mca_bcol_basesmuma_payload_t *)
|
data_buffs=(volatile mca_bcol_basesmuma_payload_t *)
|
||||||
|
@ -563,7 +563,7 @@ int mca_coll_ml_allgather_start (void *sbuf, int scount,
|
|||||||
|
|
||||||
/* Fill in the function arguments */
|
/* Fill in the function arguments */
|
||||||
coll_op->variable_fn_params.sequence_num =
|
coll_op->variable_fn_params.sequence_num =
|
||||||
OPAL_THREAD_ADD64(&(ml_module->collective_sequence_num), 1);
|
OPAL_THREAD_ADD32(&(ml_module->collective_sequence_num), 1);
|
||||||
coll_op->variable_fn_params.hier_factor = comm_size;
|
coll_op->variable_fn_params.hier_factor = comm_size;
|
||||||
|
|
||||||
MCA_COLL_ML_SET_ORDER_INFO(coll_op, n_fragments);
|
MCA_COLL_ML_SET_ORDER_INFO(coll_op, n_fragments);
|
||||||
|
@ -394,7 +394,7 @@ int parallel_allreduce_start(void *sbuf, void *rbuf, int count,
|
|||||||
|
|
||||||
/* Fill in the function arguments */
|
/* Fill in the function arguments */
|
||||||
coll_op->variable_fn_params.sequence_num =
|
coll_op->variable_fn_params.sequence_num =
|
||||||
OPAL_THREAD_ADD64(&(ml_module->collective_sequence_num), 1);
|
OPAL_THREAD_ADD32(&(ml_module->collective_sequence_num), 1);
|
||||||
coll_op->sequential_routine.current_active_bcol_fn = 0;
|
coll_op->sequential_routine.current_active_bcol_fn = 0;
|
||||||
coll_op->variable_fn_params.dtype = dtype;
|
coll_op->variable_fn_params.dtype = dtype;
|
||||||
coll_op->variable_fn_params.op = op;
|
coll_op->variable_fn_params.op = op;
|
||||||
|
@ -29,7 +29,7 @@ static void mca_coll_ml_barrier_task_setup(
|
|||||||
{
|
{
|
||||||
task_status->rt_num_dependencies = func->num_dependencies;
|
task_status->rt_num_dependencies = func->num_dependencies;
|
||||||
task_status->rt_num_dependent_tasks = func->num_dependent_tasks;
|
task_status->rt_num_dependent_tasks = func->num_dependent_tasks;
|
||||||
task_status->rt_dependent_task_indecies = func->dependent_task_indecies;
|
task_status->rt_dependent_task_indices = func->dependent_task_indices;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int mca_coll_ml_barrier_launch(mca_coll_ml_module_t *ml_module,
|
static int mca_coll_ml_barrier_launch(mca_coll_ml_module_t *ml_module,
|
||||||
|
@ -99,7 +99,7 @@ struct mca_coll_ml_compound_functions_t {
|
|||||||
* collective operation, with these indecies referencing elements
|
* collective operation, with these indecies referencing elements
|
||||||
* in this array.
|
* in this array.
|
||||||
*/
|
*/
|
||||||
int *dependent_task_indecies;
|
int *dependent_task_indices;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -191,7 +191,7 @@ struct mca_coll_ml_task_status_t{
|
|||||||
/* ***************************************************************
|
/* ***************************************************************
|
||||||
* Pasha:
|
* Pasha:
|
||||||
* I'm adding to the status: num_dependencies, num_dependent_tasks and
|
* I'm adding to the status: num_dependencies, num_dependent_tasks and
|
||||||
* dependent_task_indecies. The information originally resided on mca_coll_ml_compound_functions_t.
|
* dependent_task_indices. The information originally resided on mca_coll_ml_compound_functions_t.
|
||||||
* For collective operation with static nature it is not problem.
|
* For collective operation with static nature it is not problem.
|
||||||
* But for Bcast operation, where run time parameters, like root, actually
|
* But for Bcast operation, where run time parameters, like root, actually
|
||||||
* define the dependency. rt prefix mean run-time.
|
* define the dependency. rt prefix mean run-time.
|
||||||
@ -213,7 +213,7 @@ struct mca_coll_ml_task_status_t{
|
|||||||
* collective operation, with these indecies referencing elements
|
* collective operation, with these indecies referencing elements
|
||||||
* in this array.
|
* in this array.
|
||||||
*/
|
*/
|
||||||
int *rt_dependent_task_indecies;
|
int *rt_dependent_task_indices;
|
||||||
/*
|
/*
|
||||||
*
|
*
|
||||||
* ***************************************************************/
|
* ***************************************************************/
|
||||||
|
@ -130,8 +130,8 @@ static int mca_coll_ml_build_memsync_schedule(
|
|||||||
ML_VERBOSE(10, ("func indx %d set to %p", i_fn, comp_fn->bcol_function));
|
ML_VERBOSE(10, ("func indx %d set to %p", i_fn, comp_fn->bcol_function));
|
||||||
|
|
||||||
if (comp_fn->num_dependent_tasks > 0) {
|
if (comp_fn->num_dependent_tasks > 0) {
|
||||||
comp_fn->dependent_task_indecies = (int *) calloc(comp_fn->num_dependent_tasks, sizeof(int));
|
comp_fn->dependent_task_indices = (int *) calloc(comp_fn->num_dependent_tasks, sizeof(int));
|
||||||
if (OPAL_UNLIKELY(NULL == comp_fn->dependent_task_indecies)) {
|
if (OPAL_UNLIKELY(NULL == comp_fn->dependent_task_indices)) {
|
||||||
ML_ERROR(("Can't allocate memory.\n"));
|
ML_ERROR(("Can't allocate memory.\n"));
|
||||||
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
goto Barrier_Setup_Error;
|
goto Barrier_Setup_Error;
|
||||||
@ -139,7 +139,7 @@ static int mca_coll_ml_build_memsync_schedule(
|
|||||||
|
|
||||||
/* All indexes follow after this one */
|
/* All indexes follow after this one */
|
||||||
for (i = 0; i < comp_fn->num_dependent_tasks; ++i) {
|
for (i = 0; i < comp_fn->num_dependent_tasks; ++i) {
|
||||||
comp_fn->dependent_task_indecies[i] = i_fn + i + 1;
|
comp_fn->dependent_task_indices[i] = i_fn + i + 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -125,8 +125,8 @@ static int mca_coll_ml_build_barrier_schedule(
|
|||||||
ML_VERBOSE(10, ("func indx %d set to %p", i_fn, comp_fn->bcol_function));
|
ML_VERBOSE(10, ("func indx %d set to %p", i_fn, comp_fn->bcol_function));
|
||||||
|
|
||||||
if (comp_fn->num_dependent_tasks > 0) {
|
if (comp_fn->num_dependent_tasks > 0) {
|
||||||
comp_fn->dependent_task_indecies = (int *) calloc(comp_fn->num_dependent_tasks, sizeof(int));
|
comp_fn->dependent_task_indices = (int *) calloc(comp_fn->num_dependent_tasks, sizeof(int));
|
||||||
if (OPAL_UNLIKELY(NULL == comp_fn->dependent_task_indecies)) {
|
if (OPAL_UNLIKELY(NULL == comp_fn->dependent_task_indices)) {
|
||||||
ML_ERROR(("Can't allocate memory.\n"));
|
ML_ERROR(("Can't allocate memory.\n"));
|
||||||
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
goto Barrier_Setup_Error;
|
goto Barrier_Setup_Error;
|
||||||
@ -134,10 +134,10 @@ static int mca_coll_ml_build_barrier_schedule(
|
|||||||
|
|
||||||
/* All indexes follow after this one */
|
/* All indexes follow after this one */
|
||||||
for (i = 0; i < comp_fn->num_dependent_tasks; ++i) {
|
for (i = 0; i < comp_fn->num_dependent_tasks; ++i) {
|
||||||
comp_fn->dependent_task_indecies[i] = i_fn + i + 1;
|
comp_fn->dependent_task_indices[i] = i_fn + i + 1;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
comp_fn->dependent_task_indecies = NULL;
|
comp_fn->dependent_task_indices = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -115,7 +115,7 @@ static void mca_coll_ml_zero_dep_bcast(mca_coll_ml_task_status_t *task_status, i
|
|||||||
/* no real dependency, set everything to zero */
|
/* no real dependency, set everything to zero */
|
||||||
task_status->rt_num_dependencies = 0;
|
task_status->rt_num_dependencies = 0;
|
||||||
task_status->rt_num_dependent_tasks = 0;
|
task_status->rt_num_dependent_tasks = 0;
|
||||||
task_status->rt_dependent_task_indecies = NULL;
|
task_status->rt_dependent_task_indices = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -217,7 +217,7 @@ static int mca_coll_ml_build_bcast_dynamic_schedule_no_attributes(
|
|||||||
strcpy (comp_fn->fn_name, "BCAST_TEST_SMALL_DYNAMIC");
|
strcpy (comp_fn->fn_name, "BCAST_TEST_SMALL_DYNAMIC");
|
||||||
comp_fn->num_dependent_tasks = 0;
|
comp_fn->num_dependent_tasks = 0;
|
||||||
comp_fn->num_dependencies = 0;
|
comp_fn->num_dependencies = 0;
|
||||||
comp_fn->dependent_task_indecies = NULL;
|
comp_fn->dependent_task_indices = NULL;
|
||||||
comp_fn->bcol_function =
|
comp_fn->bcol_function =
|
||||||
bcol_module->filtered_fns_table[DATA_SRC_UNKNOWN][NON_BLOCKING][BCOL_BCAST][bcol_func_index][0][0];
|
bcol_module->filtered_fns_table[DATA_SRC_UNKNOWN][NON_BLOCKING][BCOL_BCAST][bcol_func_index][0][0];
|
||||||
comp_fn->task_comp_fn = mca_coll_ml_task_comp_dynamic_root_small_message;
|
comp_fn->task_comp_fn = mca_coll_ml_task_comp_dynamic_root_small_message;
|
||||||
@ -524,7 +524,7 @@ static void mca_coll_ml_static_bcast_root(mca_coll_ml_task_status_t *task_status
|
|||||||
{
|
{
|
||||||
task_status->rt_num_dependencies = 0;
|
task_status->rt_num_dependencies = 0;
|
||||||
task_status->rt_num_dependent_tasks = 0;
|
task_status->rt_num_dependent_tasks = 0;
|
||||||
task_status->rt_dependent_task_indecies = 0;
|
task_status->rt_dependent_task_indices = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mca_coll_ml_static_bcast_non_root(mca_coll_ml_task_status_t *task_status, int index,
|
static void mca_coll_ml_static_bcast_non_root(mca_coll_ml_task_status_t *task_status, int index,
|
||||||
@ -535,13 +535,13 @@ static void mca_coll_ml_static_bcast_non_root(mca_coll_ml_task_status_t *task_st
|
|||||||
if (task_status->ml_coll_operation->variable_fn_params.root_route->level == index) {
|
if (task_status->ml_coll_operation->variable_fn_params.root_route->level == index) {
|
||||||
task_status->rt_num_dependencies = 0;
|
task_status->rt_num_dependencies = 0;
|
||||||
task_status->rt_num_dependent_tasks = func->num_dependent_tasks;
|
task_status->rt_num_dependent_tasks = func->num_dependent_tasks;
|
||||||
task_status->rt_dependent_task_indecies = func->dependent_task_indecies;
|
task_status->rt_dependent_task_indices = func->dependent_task_indices;
|
||||||
task_status->ml_coll_operation->variable_fn_params.root =
|
task_status->ml_coll_operation->variable_fn_params.root =
|
||||||
task_status->ml_coll_operation->variable_fn_params.root_route->rank;
|
task_status->ml_coll_operation->variable_fn_params.root_route->rank;
|
||||||
} else {
|
} else {
|
||||||
task_status->rt_num_dependencies = 1; /* wait for root */
|
task_status->rt_num_dependencies = 1; /* wait for root */
|
||||||
task_status->rt_num_dependent_tasks = 0; /* no depended task */
|
task_status->rt_num_dependent_tasks = 0; /* no depended task */
|
||||||
task_status->rt_dependent_task_indecies = NULL; /* NULL */
|
task_status->rt_dependent_task_indices = NULL; /* NULL */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -650,9 +650,9 @@ static int mca_coll_ml_build_bcast_known_schedule_no_attributes(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (0 != comp_fn->num_dependent_tasks) {
|
if (0 != comp_fn->num_dependent_tasks) {
|
||||||
comp_fn->dependent_task_indecies = (int *)calloc(n_hiers, sizeof(int));
|
comp_fn->dependent_task_indices = (int *)calloc(n_hiers, sizeof(int));
|
||||||
for (j = 0; j < n_hiers; j++) {
|
for (j = 0; j < n_hiers; j++) {
|
||||||
comp_fn->dependent_task_indecies[j] = j; /* only root will use this one */
|
comp_fn->dependent_task_indices[j] = j; /* only root will use this one */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
|
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
|
* Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
|
||||||
* Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
|
* Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
|
||||||
|
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
|
||||||
|
* reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -28,13 +31,13 @@ static void mca_coll_ml_static_reduce_non_root(mca_coll_ml_task_status_t *task_s
|
|||||||
if (task_status->ml_coll_operation->variable_fn_params.root_route->level == index) {
|
if (task_status->ml_coll_operation->variable_fn_params.root_route->level == index) {
|
||||||
task_status->rt_num_dependencies = func->num_dependencies;
|
task_status->rt_num_dependencies = func->num_dependencies;
|
||||||
task_status->rt_num_dependent_tasks = 0;
|
task_status->rt_num_dependent_tasks = 0;
|
||||||
task_status->rt_dependent_task_indecies = NULL;
|
task_status->rt_dependent_task_indices = NULL;
|
||||||
task_status->ml_coll_operation->variable_fn_params.root =
|
task_status->ml_coll_operation->variable_fn_params.root =
|
||||||
task_status->ml_coll_operation->variable_fn_params.root_route->rank;
|
task_status->ml_coll_operation->variable_fn_params.root_route->rank;
|
||||||
} else {
|
} else {
|
||||||
task_status->rt_num_dependencies = 0;
|
task_status->rt_num_dependencies = 0;
|
||||||
task_status->rt_num_dependent_tasks = 1;
|
task_status->rt_num_dependent_tasks = 1;
|
||||||
task_status->rt_dependent_task_indecies = task_status->ml_coll_operation->variable_fn_params.root_route->level;
|
task_status->rt_dependent_task_indices = &task_status->ml_coll_operation->variable_fn_params.root_route->level;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -44,7 +47,7 @@ static void mca_coll_ml_static_reduce_root(mca_coll_ml_task_status_t *task_statu
|
|||||||
{
|
{
|
||||||
task_status->rt_num_dependencies = func->num_dependencies;
|
task_status->rt_num_dependencies = func->num_dependencies;
|
||||||
task_status->rt_num_dependent_tasks = 0;
|
task_status->rt_num_dependent_tasks = 0;
|
||||||
task_status->rt_dependent_task_indecies = NULL;
|
task_status->rt_dependent_task_indices = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -215,7 +215,7 @@ static inline __opal_attribute_always_inline__ int coll_ml_task_dependency_proce
|
|||||||
for (dep_task = 0; dep_task < n_dependent_tasks; dep_task++)
|
for (dep_task = 0; dep_task < n_dependent_tasks; dep_task++)
|
||||||
{
|
{
|
||||||
int task_index;
|
int task_index;
|
||||||
task_index = task->rt_dependent_task_indecies[dep_task];
|
task_index = task->rt_dependent_task_indices[dep_task];
|
||||||
my_schedule_instance->dag_description.status_array[task_index].n_dep_satisfied++;
|
my_schedule_instance->dag_description.status_array[task_index].n_dep_satisfied++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -83,7 +83,7 @@ static void mca_coll_ml_barrier_task_setup(
|
|||||||
{
|
{
|
||||||
task_status->rt_num_dependencies = func->num_dependencies;
|
task_status->rt_num_dependencies = func->num_dependencies;
|
||||||
task_status->rt_num_dependent_tasks = func->num_dependent_tasks;
|
task_status->rt_num_dependent_tasks = func->num_dependent_tasks;
|
||||||
task_status->rt_dependent_task_indecies = func->dependent_task_indecies;
|
task_status->rt_dependent_task_indices = func->dependent_task_indices;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline __opal_attribute_always_inline__ int mca_coll_ml_memsync_launch(mca_coll_ml_module_t *ml_module,
|
static inline __opal_attribute_always_inline__ int mca_coll_ml_memsync_launch(mca_coll_ml_module_t *ml_module,
|
||||||
|
@ -951,7 +951,7 @@ static int ml_setup_full_tree_data(mca_coll_ml_topology_t *topo,
|
|||||||
* the subcommunicator.
|
* the subcommunicator.
|
||||||
*
|
*
|
||||||
* The information needed for each rank in the subgroup are the
|
* The information needed for each rank in the subgroup are the
|
||||||
* group indicies for which it is a proxy.
|
* group indices for which it is a proxy.
|
||||||
*/
|
*/
|
||||||
/*
|
/*
|
||||||
* fill in the vertecies in the hierarchichal communications graph
|
* fill in the vertecies in the hierarchichal communications graph
|
||||||
|
@ -434,7 +434,7 @@ int parallel_reduce_start (void *sbuf, void *rbuf, int count,
|
|||||||
coll_op->global_root = root;
|
coll_op->global_root = root;
|
||||||
|
|
||||||
coll_op->variable_fn_params.sequence_num =
|
coll_op->variable_fn_params.sequence_num =
|
||||||
OPAL_THREAD_ADD64(&(ml_module->collective_sequence_num), 1);
|
OPAL_THREAD_ADD32(&(ml_module->collective_sequence_num), 1);
|
||||||
coll_op->sequential_routine.current_active_bcol_fn = 0;
|
coll_op->sequential_routine.current_active_bcol_fn = 0;
|
||||||
/* set the task setup callback */
|
/* set the task setup callback */
|
||||||
coll_op->sequential_routine.seq_task_setup = mca_coll_ml_reduce_task_setup;
|
coll_op->sequential_routine.seq_task_setup = mca_coll_ml_reduce_task_setup;
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user