Remove unnecessary \n's in ML_VERBOSE and ML_ERROR.
Also fixed spelling: IS_NOT_RECHABLE -> IS_NOT_REACHABLE. Also mark a few places where opal_show_help() should have been used; Manju will take care of these. This commit was SVN r31104.
Этот коммит содержится в:
родитель
0aa23cdc35
Коммит
5efd961149
@ -216,7 +216,7 @@ static int mca_coll_ml_allreduce_frag_progress(mca_coll_ml_collective_operation_
|
||||
|
||||
MCA_COLL_ML_SET_NEW_FRAG_ORDER_INFO(new_op);
|
||||
|
||||
ML_VERBOSE(10,("FFFF Contig + fragmentation [0-sk, 1-lk, 3-su, 4-lu] %d %d %d\n",
|
||||
ML_VERBOSE(10,("FFFF Contig + fragmentation [0-sk, 1-lk, 3-su, 4-lu] %d %d %d",
|
||||
new_op->variable_fn_params.buffer_size,
|
||||
new_op->fragment_data.fragment_size,
|
||||
new_op->fragment_data.message_descriptor->n_bytes_scheduled));
|
||||
|
@ -418,7 +418,7 @@ static int mca_coll_ml_bcast_frag_progress(mca_coll_ml_collective_operation_prog
|
||||
/* Set order info for new frag if there is a bcol needs ordering */
|
||||
MCA_COLL_ML_SET_NEW_FRAG_ORDER_INFO(new_op);
|
||||
|
||||
ML_VERBOSE(10, ("FFFF Contig + fragmentation [0-sk, 1-lk, 3-su, 4-lu] %d %d %d\n",
|
||||
ML_VERBOSE(10, ("FFFF Contig + fragmentation [0-sk, 1-lk, 3-su, 4-lu] %d %d %d",
|
||||
new_op->variable_fn_params.buffer_size ,
|
||||
new_op->fragment_data.fragment_size,
|
||||
new_op->fragment_data.message_descriptor->n_bytes_scheduled));
|
||||
@ -475,7 +475,7 @@ static inline __opal_attribute_always_inline__
|
||||
assert(pack_len <= ml_module->payload_block->size_buffer);
|
||||
bcast_index = ml_module->bcast_fn_index_table[SMALL_BCAST];
|
||||
|
||||
ML_VERBOSE(10, ("Contig + small message %d [0-sk, 1-lk, 3-su, 4-lu]\n", bcast_index));
|
||||
ML_VERBOSE(10, ("Contig + small message %d [0-sk, 1-lk, 3-su, 4-lu]", bcast_index));
|
||||
ALLOCATE_AND_PACK_CONTIG_BCAST_FRAG(ml_module, coll_op, bcast_index, root, pack_len,
|
||||
pack_len, actual_buf, src_buffer_desc);
|
||||
|
||||
@ -489,7 +489,7 @@ static inline __opal_attribute_always_inline__
|
||||
int frag_len, pipeline_depth = mca_coll_ml_component.pipeline_depth;
|
||||
bcast_index = ml_module->bcast_fn_index_table[LARGE_BCAST];
|
||||
|
||||
ML_VERBOSE(10, ("Contig + fragmentation %d [0-sk, 1-lk, 3-su, 4-lu]\n", bcast_index));
|
||||
ML_VERBOSE(10, ("Contig + fragmentation %d [0-sk, 1-lk, 3-su, 4-lu]", bcast_index));
|
||||
|
||||
/* Calculate the number of fragments required for this message */
|
||||
frag_len = (pack_len < (size_t) ml_module->small_message_thresholds[BCOL_BCAST] ?
|
||||
@ -514,12 +514,12 @@ static inline __opal_attribute_always_inline__
|
||||
/* should be removed */
|
||||
coll_op->variable_fn_params.buffer_size = frag_len;
|
||||
|
||||
ML_VERBOSE(10, ("Contig + fragmentation [0-sk, 1-lk, 3-su, 4-lu] %d %d\n",
|
||||
ML_VERBOSE(10, ("Contig + fragmentation [0-sk, 1-lk, 3-su, 4-lu] %d %d",
|
||||
coll_op->variable_fn_params.buffer_size,
|
||||
coll_op->fragment_data.fragment_size));
|
||||
} else {
|
||||
bcast_index = ml_module->bcast_fn_index_table[LARGE_BCAST];
|
||||
ML_VERBOSE(10, ("Contig + zero copy %d [0-sk, 1-lk, 3-su, 4-lu]\n", bcast_index));
|
||||
ML_VERBOSE(10, ("Contig + zero copy %d [0-sk, 1-lk, 3-su, 4-lu]", bcast_index));
|
||||
|
||||
coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
|
||||
ml_module->coll_ml_bcast_functions[bcast_index],
|
||||
@ -539,7 +539,7 @@ static inline __opal_attribute_always_inline__
|
||||
} else {
|
||||
/* Non contiguous data type */
|
||||
bcast_index = ml_module->bcast_fn_index_table[SMALL_BCAST];
|
||||
ML_VERBOSE(10, ("NON Contig + fragmentation %d [0-sk, 1-lk, 3-su, 4-lu]\n", bcast_index));
|
||||
ML_VERBOSE(10, ("NON Contig + fragmentation %d [0-sk, 1-lk, 3-su, 4-lu]", bcast_index));
|
||||
|
||||
coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
|
||||
ml_module->coll_ml_bcast_functions[bcast_index],
|
||||
|
@ -116,7 +116,7 @@ int mca_coll_ml_check_if_bcol_is_requested(const char *component_name)
|
||||
mca_base_component_list_item_t *bcol_comp;
|
||||
bcol_comp = (mca_base_component_list_item_t *) opal_list_get_first(&mca_bcol_base_components_in_use);
|
||||
|
||||
ML_VERBOSE(10, ("Loop over bcol components\n"));
|
||||
ML_VERBOSE(10, ("Loop over bcol components"));
|
||||
for ( bcol_comp = (mca_base_component_list_item_t *) opal_list_get_first(&mca_bcol_base_components_in_use);
|
||||
bcol_comp != (mca_base_component_list_item_t *) opal_list_get_end(&mca_bcol_base_components_in_use);
|
||||
bcol_comp = (mca_base_component_list_item_t *) opal_list_get_next(bcol_comp)) {
|
||||
|
@ -33,7 +33,7 @@ static int mca_coll_ml_build_memsync_schedule(
|
||||
|
||||
schedule = *coll_desc;
|
||||
if (OPAL_UNLIKELY(NULL == schedule)) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Barrier_Setup_Error;
|
||||
}
|
||||
@ -61,7 +61,7 @@ static int mca_coll_ml_build_memsync_schedule(
|
||||
calloc(n_fcns, sizeof(struct mca_coll_ml_compound_functions_t));
|
||||
|
||||
if (OPAL_UNLIKELY(NULL == schedule->component_functions)) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Barrier_Setup_Error;
|
||||
}
|
||||
@ -132,7 +132,7 @@ static int mca_coll_ml_build_memsync_schedule(
|
||||
if (comp_fn->num_dependent_tasks > 0) {
|
||||
comp_fn->dependent_task_indices = (int *) calloc(comp_fn->num_dependent_tasks, sizeof(int));
|
||||
if (OPAL_UNLIKELY(NULL == comp_fn->dependent_task_indices)) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Barrier_Setup_Error;
|
||||
}
|
||||
@ -154,7 +154,7 @@ static int mca_coll_ml_build_memsync_schedule(
|
||||
|
||||
rc = ml_coll_barrier_constant_group_data_setup(topo_info, schedule);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
ML_ERROR(("Failed to init const group data.\n"));
|
||||
ML_ERROR(("Failed to init const group data."));
|
||||
goto Barrier_Setup_Error;
|
||||
}
|
||||
|
||||
|
@ -43,7 +43,7 @@ static int mca_coll_ml_build_allgather_schedule(mca_coll_ml_topology_t *topo_inf
|
||||
ret = mca_coll_ml_schedule_init_scratch(topo_info, &h_info,
|
||||
&scratch_indx, &scratch_num);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
ML_ERROR(("Can't mca_coll_ml_schedule_init_scratch.\n"));
|
||||
ML_ERROR(("Can't mca_coll_ml_schedule_init_scratch."));
|
||||
goto Error;
|
||||
}
|
||||
assert(NULL != scratch_indx);
|
||||
@ -52,7 +52,7 @@ static int mca_coll_ml_build_allgather_schedule(mca_coll_ml_topology_t *topo_inf
|
||||
schedule = *coll_desc =
|
||||
mca_coll_ml_schedule_alloc(&h_info);
|
||||
if (NULL == schedule) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Error;
|
||||
}
|
||||
@ -116,7 +116,7 @@ int ml_coll_hier_allgather_setup(mca_coll_ml_module_t *ml_module)
|
||||
int ret, topo_index, alg;
|
||||
mca_coll_ml_topology_t *topo_info = ml_module->topo_list;
|
||||
|
||||
ML_VERBOSE(10,("entering allgather setup\n"));
|
||||
ML_VERBOSE(10,("entering allgather setup"));
|
||||
|
||||
#if 0
|
||||
/* used to validate the recursive k - ing allgather tree */
|
||||
|
@ -66,21 +66,21 @@ static int mca_coll_ml_build_allreduce_schedule(
|
||||
malloc(sizeof(mca_coll_ml_collective_operation_description_t));
|
||||
schedule = *coll_desc;
|
||||
if (NULL == schedule) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Allreduce_Setup_Error;
|
||||
}
|
||||
|
||||
scratch_indx = (int *) malloc(sizeof(int) * (n_hiers * 2));
|
||||
if (NULL == scratch_indx) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Allreduce_Setup_Error;
|
||||
}
|
||||
|
||||
scratch_num = (int *) malloc(sizeof(int) * (n_hiers * 2));
|
||||
if (NULL == scratch_num) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Allreduce_Setup_Error;
|
||||
}
|
||||
@ -145,7 +145,7 @@ static int mca_coll_ml_build_allreduce_schedule(
|
||||
calloc(nbcol_functions, sizeof(struct mca_coll_ml_compound_functions_t));
|
||||
|
||||
if (NULL == schedule->component_functions) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Allreduce_Setup_Error;
|
||||
}
|
||||
|
@ -35,7 +35,7 @@ static int mca_coll_ml_build_barrier_schedule(
|
||||
|
||||
schedule = *coll_desc;
|
||||
if (OPAL_UNLIKELY(NULL == schedule)) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Barrier_Setup_Error;
|
||||
}
|
||||
@ -67,7 +67,7 @@ static int mca_coll_ml_build_barrier_schedule(
|
||||
calloc(n_fcns, sizeof(struct mca_coll_ml_compound_functions_t));
|
||||
|
||||
if (OPAL_UNLIKELY(NULL == schedule->component_functions)) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Barrier_Setup_Error;
|
||||
}
|
||||
@ -127,7 +127,7 @@ static int mca_coll_ml_build_barrier_schedule(
|
||||
if (comp_fn->num_dependent_tasks > 0) {
|
||||
comp_fn->dependent_task_indices = (int *) calloc(comp_fn->num_dependent_tasks, sizeof(int));
|
||||
if (OPAL_UNLIKELY(NULL == comp_fn->dependent_task_indices)) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Barrier_Setup_Error;
|
||||
}
|
||||
@ -152,7 +152,7 @@ static int mca_coll_ml_build_barrier_schedule(
|
||||
|
||||
rc = ml_coll_barrier_constant_group_data_setup(topo_info, schedule);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
ML_ERROR(("Failed to init const group data.\n"));
|
||||
ML_ERROR(("Failed to init const group data."));
|
||||
goto Barrier_Setup_Error;
|
||||
}
|
||||
|
||||
|
@ -143,21 +143,21 @@ static int mca_coll_ml_build_bcast_dynamic_schedule_no_attributes(
|
||||
malloc(sizeof(mca_coll_ml_collective_operation_description_t));
|
||||
schedule = *coll_desc;
|
||||
if (NULL == schedule) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Bcast_Setup_Error;
|
||||
}
|
||||
|
||||
scratch_indx = (int *) malloc(sizeof(int) * (n_hiers));
|
||||
if (NULL == scratch_indx) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Bcast_Setup_Error;
|
||||
}
|
||||
|
||||
scratch_num = (int *) malloc(sizeof(int) * (n_hiers));
|
||||
if (NULL == scratch_num) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Bcast_Setup_Error;
|
||||
}
|
||||
@ -200,7 +200,7 @@ static int mca_coll_ml_build_bcast_dynamic_schedule_no_attributes(
|
||||
schedule->component_functions = (struct mca_coll_ml_compound_functions_t *)
|
||||
calloc(n_hiers, sizeof(struct mca_coll_ml_compound_functions_t));
|
||||
if (NULL == schedule->component_functions) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Bcast_Setup_Error;
|
||||
}
|
||||
@ -313,21 +313,21 @@ static int mca_coll_ml_build_bcast_sequential_schedule_no_attributes(
|
||||
malloc(sizeof(mca_coll_ml_collective_operation_description_t));
|
||||
schedule = *coll_desc;
|
||||
if (NULL == schedule) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Bcast_Setup_Error;
|
||||
}
|
||||
|
||||
scratch_indx = (int *) malloc(sizeof(int) * (n_hiers));
|
||||
if (NULL == scratch_indx) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Bcast_Setup_Error;
|
||||
}
|
||||
|
||||
scratch_num = (int *) malloc(sizeof(int) * (n_hiers));
|
||||
if (NULL == scratch_num) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Bcast_Setup_Error;
|
||||
}
|
||||
@ -373,7 +373,7 @@ static int mca_coll_ml_build_bcast_sequential_schedule_no_attributes(
|
||||
schedule->component_functions = (struct mca_coll_ml_compound_functions_t *)
|
||||
calloc(n_hiers, sizeof(struct mca_coll_ml_compound_functions_t));
|
||||
if (NULL == schedule->component_functions) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Bcast_Setup_Error;
|
||||
}
|
||||
@ -381,7 +381,7 @@ static int mca_coll_ml_build_bcast_sequential_schedule_no_attributes(
|
||||
schedule->comp_fn_arr = (struct mca_coll_ml_compound_functions_t **)
|
||||
calloc(n_hiers,sizeof(struct mca_coll_ml_compound_functions_t *));
|
||||
if (NULL == schedule->comp_fn_arr) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Bcast_Setup_Error;
|
||||
}
|
||||
@ -567,21 +567,21 @@ static int mca_coll_ml_build_bcast_known_schedule_no_attributes(
|
||||
malloc(sizeof(mca_coll_ml_collective_operation_description_t));
|
||||
schedule = *coll_desc;
|
||||
if (NULL == schedule) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Bcast_Setup_Error;
|
||||
}
|
||||
|
||||
scratch_indx = (int *) malloc(sizeof(int) * (n_hiers));
|
||||
if (NULL == scratch_indx) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Bcast_Setup_Error;
|
||||
}
|
||||
|
||||
scratch_num = (int *) malloc(sizeof(int) * (n_hiers));
|
||||
if (NULL == scratch_num) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Bcast_Setup_Error;
|
||||
}
|
||||
@ -624,7 +624,7 @@ static int mca_coll_ml_build_bcast_known_schedule_no_attributes(
|
||||
schedule->component_functions = (struct mca_coll_ml_compound_functions_t *)
|
||||
calloc(n_hiers, sizeof(struct mca_coll_ml_compound_functions_t));
|
||||
if (NULL == schedule->component_functions) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Bcast_Setup_Error;
|
||||
}
|
||||
|
@ -28,14 +28,14 @@ int mca_coll_ml_schedule_init_scratch(mca_coll_ml_topology_t *topo_info,
|
||||
scratch_indx = *out_scratch_indx =
|
||||
(int *) calloc(n_hiers * 2, sizeof(int));
|
||||
if (NULL == *out_scratch_indx) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
scratch_num = *out_scratch_num =
|
||||
(int *) calloc(n_hiers * 2, sizeof(int));
|
||||
if (NULL == *out_scratch_num) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
free(out_scratch_indx);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
@ -98,7 +98,7 @@ mca_coll_ml_collective_operation_description_t *
|
||||
schedule = (mca_coll_ml_collective_operation_description_t *)
|
||||
malloc(sizeof(mca_coll_ml_collective_operation_description_t));
|
||||
if (NULL == schedule) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -109,7 +109,7 @@ mca_coll_ml_collective_operation_description_t *
|
||||
schedule->component_functions = (struct mca_coll_ml_compound_functions_t *)
|
||||
calloc(h_info->nbcol_functions, sizeof(struct mca_coll_ml_compound_functions_t));
|
||||
if (NULL == schedule->component_functions) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
free(schedule);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -32,14 +32,14 @@ do { \
|
||||
ml_module->component_pairs[n_hr - 1].bcol_index) { \
|
||||
/* The process that is member of highest level subgroup \
|
||||
should call for top algorithms in addition to fan-in/out steps*/ \
|
||||
ML_VERBOSE(9, ("Setting top %d %d\n", n_hr, ml_module->component_pairs[g_hr - 1].bcol_index)); \
|
||||
ML_VERBOSE(9, ("Setting top %d %d", n_hr, ml_module->component_pairs[g_hr - 1].bcol_index)); \
|
||||
info.call_for_top_function = true; \
|
||||
/* hier level run only top algorithm, so we deduct 1 */ \
|
||||
info.num_up_levels = n_hr - 1; \
|
||||
/* Top algorithm is called only once, so we deduct 1 */ \
|
||||
info.nbcol_functions = 2 * n_hr - 1; \
|
||||
} else { \
|
||||
ML_VERBOSE(9, ("not setting top %d %d\n", n_hr, ml_module->component_pairs[g_hr - 1].bcol_index)); \
|
||||
ML_VERBOSE(9, ("not setting top %d %d", n_hr, ml_module->component_pairs[g_hr - 1].bcol_index)); \
|
||||
/* The process is not member of highest level subgroup, \
|
||||
as result it does not call for top algorithm, \
|
||||
but it calls for all fan-in/out steps */ \
|
||||
|
@ -76,21 +76,21 @@ static int mca_coll_ml_build_static_reduce_schedule(
|
||||
|
||||
schedule = *coll_desc;
|
||||
if (OPAL_UNLIKELY(NULL == schedule)) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Error;
|
||||
}
|
||||
|
||||
scratch_indx = (int *) malloc(sizeof(int) * (n_hiers));
|
||||
if (NULL == scratch_indx) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Error;
|
||||
}
|
||||
|
||||
scratch_num = (int *) malloc(sizeof(int) * (n_hiers));
|
||||
if (NULL == scratch_num) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Error;
|
||||
}
|
||||
@ -136,7 +136,7 @@ static int mca_coll_ml_build_static_reduce_schedule(
|
||||
calloc(n_fcns, sizeof(struct mca_coll_ml_compound_functions_t));
|
||||
|
||||
if (OPAL_UNLIKELY(NULL == schedule->component_functions)) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Error;
|
||||
}
|
||||
@ -203,7 +203,7 @@ static int mca_coll_ml_build_static_reduce_schedule(
|
||||
schedule->comp_fn_arr = (struct mca_coll_ml_compound_functions_t **)
|
||||
calloc(n_hiers,sizeof(struct mca_coll_ml_compound_functions_t *));
|
||||
if (NULL == schedule->comp_fn_arr) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Error;
|
||||
}
|
||||
|
@ -43,7 +43,7 @@ int ml_coll_up_and_down_hier_setup(mca_coll_ml_module_t *ml_module,
|
||||
collective_alg = (coll_ml_collective_description_t *)
|
||||
malloc(sizeof(coll_ml_collective_description_t));
|
||||
if (NULL == collective_alg) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Error;
|
||||
}
|
||||
@ -78,7 +78,7 @@ int ml_coll_up_and_down_hier_setup(mca_coll_ml_module_t *ml_module,
|
||||
collective_alg->functions = (mca_bcol_base_function_t *)
|
||||
malloc(sizeof(mca_bcol_base_function_t) * collective_alg->n_functions);
|
||||
if( NULL == collective_alg->functions) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Error;
|
||||
}
|
||||
@ -100,14 +100,14 @@ int ml_coll_up_and_down_hier_setup(mca_coll_ml_module_t *ml_module,
|
||||
* the total number of bcols in the row we store in scratch_num */
|
||||
scratch_indx = (int *) malloc(sizeof(int) * (2 * num_hierarchies));
|
||||
if(NULL == scratch_indx) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Error;
|
||||
}
|
||||
|
||||
scratch_num = (int *) malloc(sizeof(int) * (2 * num_hierarchies));
|
||||
if(NULL == scratch_num) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Error;
|
||||
}
|
||||
@ -375,14 +375,14 @@ int ml_coll_barrier_constant_group_data_setup(
|
||||
* the total number of bcols in the row we store in scratch_num */
|
||||
scratch_indx = (int *) malloc(sizeof(int) * (2 * num_hierarchies));
|
||||
if(NULL == scratch_indx) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Const_Data_Setup_Error;
|
||||
}
|
||||
|
||||
scratch_num = (int *) malloc(sizeof(int) * (2 * num_hierarchies));
|
||||
if(NULL == scratch_num) {
|
||||
ML_ERROR(("Can't allocate memory.\n"));
|
||||
ML_ERROR(("Can't allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Const_Data_Setup_Error;
|
||||
}
|
||||
|
@ -60,7 +60,7 @@ static inline __opal_attribute_always_inline__
|
||||
assert(ml_memblock->bank_is_busy);
|
||||
if (ml_memblock->memsync_counter == (int)bank_index) {
|
||||
while(ml_memblock->ready_for_memsync[ml_memblock->memsync_counter]) {
|
||||
ML_VERBOSE(10, ("Calling for service barrier: ml_buffer_index - %d %d %d == %d.\n",
|
||||
ML_VERBOSE(10, ("Calling for service barrier: ml_buffer_index - %d %d %d == %d.",
|
||||
ml_request->fragment_data.buffer_desc->buffer_index,
|
||||
ml_memblock->memsync_counter,
|
||||
ml_memblock->bank_release_counters[ml_memblock->memsync_counter],
|
||||
@ -81,7 +81,7 @@ static inline __opal_attribute_always_inline__
|
||||
ML_VERBOSE(10, ("After service barrier."));
|
||||
}
|
||||
} else {
|
||||
ML_VERBOSE(10, ("Out of order %d\n", ml_memblock->memsync_counter));
|
||||
ML_VERBOSE(10, ("Out of order %d", ml_memblock->memsync_counter));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -252,17 +252,17 @@ mca_bcol_base_lmngr_block_t* mca_coll_ml_lmngr_alloc (
|
||||
|
||||
/* Check if the list manager was initialized */
|
||||
if(OPAL_UNLIKELY(NULL == lmngr->base_addr)) {
|
||||
ML_VERBOSE(7 ,("Starting memory initialization\n"));
|
||||
ML_VERBOSE(7 ,("Starting memory initialization"));
|
||||
rc = mca_coll_ml_lmngr_init(lmngr);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
ML_ERROR(("Failed to init memory\n"));
|
||||
ML_ERROR(("Failed to init memory"));
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if(OPAL_UNLIKELY(opal_list_is_empty(list))) {
|
||||
/* Upper layer need to handle the NULL */
|
||||
ML_ERROR(("List manager is empty.\n"));
|
||||
ML_ERROR(("List manager is empty."));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -381,7 +381,7 @@ static void generate_active_bcols_list(mca_coll_ml_module_t *ml_module)
|
||||
* function is not provided we skip this bcol, since it isn't used
|
||||
* for memory synchronization (for instance - ptpcoll )*/
|
||||
if (NULL == GET_BCOL_SYNC_FN(bcol_module)) {
|
||||
ML_VERBOSE(10,(" No sync function was provided by bcol %s\n",
|
||||
ML_VERBOSE(10,(" No sync function was provided by bcol %s",
|
||||
bcol_module->bcol_component->bcol_version.mca_component_name));
|
||||
continue;
|
||||
}
|
||||
@ -527,7 +527,7 @@ static int ml_module_memory_initialization(mca_coll_ml_module_t *ml_module)
|
||||
ml_module->payload_block = mca_coll_ml_allocate_block(cs,ml_module->payload_block);
|
||||
|
||||
if (NULL == ml_module->payload_block) {
|
||||
ML_ERROR(("mca_coll_ml_allocate_block exited with error.\n"));
|
||||
ML_ERROR(("mca_coll_ml_allocate_block exited with error."));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
@ -536,7 +536,7 @@ static int ml_module_memory_initialization(mca_coll_ml_module_t *ml_module)
|
||||
nbuffers = cs->n_payload_buffs_per_bank;
|
||||
buf_size = cs->payload_buffer_size;
|
||||
|
||||
ML_VERBOSE(10, ("Call for initialize block.\n"));
|
||||
ML_VERBOSE(10, ("Call for initialize block."));
|
||||
|
||||
ret = mca_coll_ml_initialize_block(ml_module->payload_block,
|
||||
nbuffers, nbanks, buf_size, ml_module->data_offset,
|
||||
@ -545,13 +545,13 @@ static int ml_module_memory_initialization(mca_coll_ml_module_t *ml_module)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ML_VERBOSE(10, ("Call for register bcols.\n"));
|
||||
ML_VERBOSE(10, ("Call for register bcols."));
|
||||
|
||||
/* inititialize the memory with all of the bcols:
|
||||
loop through the bcol modules and invoke the memory init */
|
||||
ret = mca_coll_ml_register_bcols(ml_module);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
ML_ERROR(("mca_coll_ml_register_bcols returned an error.\n"));
|
||||
ML_ERROR(("mca_coll_ml_register_bcols returned an error."));
|
||||
/* goto CLEANUP; */
|
||||
return ret;
|
||||
}
|
||||
@ -578,7 +578,7 @@ static int check_global_view_of_subgroups( int n_procs_selected,
|
||||
/* more than one local leader - don't know how to
|
||||
* handle this, so bail
|
||||
*/
|
||||
ML_VERBOSE(0, ("More than a single leader for a group.\n"));
|
||||
ML_VERBOSE(0, ("More than a single leader for a group."));
|
||||
ret=OMPI_ERROR;
|
||||
goto exit_ERROR;
|
||||
} else {
|
||||
@ -599,7 +599,7 @@ static int check_global_view_of_subgroups( int n_procs_selected,
|
||||
}
|
||||
}
|
||||
if( sum != n_procs_selected ) {
|
||||
ML_VERBOSE(0, ("number of procs in the group unexpected. Expected %d Got %d\n",n_procs_selected,sum));
|
||||
ML_VERBOSE(0, ("number of procs in the group unexpected. Expected %d Got %d",n_procs_selected,sum));
|
||||
ret=OMPI_ERROR;
|
||||
goto exit_ERROR;
|
||||
}
|
||||
@ -609,7 +609,7 @@ static int check_global_view_of_subgroups( int n_procs_selected,
|
||||
if(ll_p1!=all_selected[module->group_list[i]] &&
|
||||
ll_p1!=-all_selected[module->group_list[i]] ) {
|
||||
ret=OMPI_ERROR;
|
||||
ML_VERBOSE(0, ("Mismatch in rank list - element #%d - %d \n",i,all_selected[module->group_list[i]]));
|
||||
ML_VERBOSE(0, ("Mismatch in rank list - element #%d - %d ",i,all_selected[module->group_list[i]]));
|
||||
goto exit_ERROR;
|
||||
}
|
||||
}
|
||||
@ -644,7 +644,7 @@ static void ml_init_k_nomial_trees(mca_coll_ml_topology_t *topo, int *list_of_ra
|
||||
level_one_knt++;
|
||||
}
|
||||
|
||||
/* fprintf(stderr,"PPP %d %d %d \n", level_one_knt, array_of_all_subgroup_ranks[0].level_in_hierarchy, num_total_subgroups); */
|
||||
/* fprintf(stderr,"PPP %d %d %d ", level_one_knt, array_of_all_subgroup_ranks[0].level_in_hierarchy, num_total_subgroups); */
|
||||
|
||||
/* I want to cache this number for unpack*/
|
||||
array_of_all_subgroup_ranks->level_one_index = level_one_knt;
|
||||
@ -831,7 +831,7 @@ static int ml_setup_full_tree_data(mca_coll_ml_topology_t *topo,
|
||||
ompi_comm_size(comm), map_to_comm_ranks,
|
||||
comm);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
ML_VERBOSE(10, ("comm_allreduce_pml failed. root reduction\n"));
|
||||
ML_VERBOSE(10, ("comm_allreduce_pml failed. root reduction"));
|
||||
goto exit_ERROR;
|
||||
}
|
||||
|
||||
@ -840,13 +840,13 @@ static int ml_setup_full_tree_data(mca_coll_ml_topology_t *topo,
|
||||
MPI_INT, my_rank, ompi_comm_size(comm),
|
||||
map_to_comm_ranks,comm);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
ML_VERBOSE(10, ("comm_bcast_pml failed. num_total_subgroups bcast\n"));
|
||||
ML_VERBOSE(10, ("comm_bcast_pml failed. num_total_subgroups bcast"));
|
||||
goto exit_ERROR;
|
||||
}
|
||||
|
||||
scratch_space=(int *)malloc(4*sizeof(int)*(*num_total_subgroups));
|
||||
if (OPAL_UNLIKELY(NULL == scratch_space)) {
|
||||
ML_VERBOSE(10, ("Cannot allocate memory scratch_space.\n"));
|
||||
ML_VERBOSE(10, ("Cannot allocate memory scratch_space."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto exit_ERROR;
|
||||
}
|
||||
@ -864,7 +864,7 @@ static int ml_setup_full_tree_data(mca_coll_ml_topology_t *topo,
|
||||
MPI_INT, my_rank, ompi_comm_size(comm),
|
||||
map_to_comm_ranks, comm);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
ML_VERBOSE(10, ("comm_allreduce_pml failed. scratch_space bcast\n"));
|
||||
ML_VERBOSE(10, ("comm_allreduce_pml failed. scratch_space bcast"));
|
||||
goto exit_ERROR;
|
||||
}
|
||||
if( my_rank != root ) {
|
||||
@ -876,7 +876,7 @@ static int ml_setup_full_tree_data(mca_coll_ml_topology_t *topo,
|
||||
(*array_of_all_subgroup_ranks)=(sub_group_params_t *)
|
||||
malloc(sizeof(sub_group_params_t)*(*num_total_subgroups));
|
||||
if (OPAL_UNLIKELY(NULL == (*array_of_all_subgroup_ranks))) {
|
||||
ML_VERBOSE(10, ("Cannot allocate memory array_of_all_subgroup_ranks.\n"));
|
||||
ML_VERBOSE(10, ("Cannot allocate memory array_of_all_subgroup_ranks."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto exit_ERROR;
|
||||
}
|
||||
@ -898,7 +898,7 @@ static int ml_setup_full_tree_data(mca_coll_ml_topology_t *topo,
|
||||
(*list_of_ranks_in_all_subgroups)=(int *)
|
||||
realloc((*list_of_ranks_in_all_subgroups),sizeof(int)*sum);
|
||||
if (OPAL_UNLIKELY(NULL == (*list_of_ranks_in_all_subgroups))) {
|
||||
ML_VERBOSE(10, ("Cannot allocate memory *list_of_ranks_in_all_subgroups.\n"));
|
||||
ML_VERBOSE(10, ("Cannot allocate memory *list_of_ranks_in_all_subgroups."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto exit_ERROR;
|
||||
}
|
||||
@ -907,7 +907,7 @@ static int ml_setup_full_tree_data(mca_coll_ml_topology_t *topo,
|
||||
MPI_INT, my_rank, ompi_comm_size(comm),
|
||||
map_to_comm_ranks, comm);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
ML_VERBOSE(10, ("Bcast failed for list_of_ranks_in_all_subgroups \n"));
|
||||
ML_VERBOSE(10, ("Bcast failed for list_of_ranks_in_all_subgroups "));
|
||||
goto exit_ERROR;
|
||||
}
|
||||
|
||||
@ -1039,7 +1039,7 @@ static int get_new_subgroup_data (int32_t *all_selected, int size_of_all_selecte
|
||||
PROVIDE_SUFFICIENT_MEMORY(temp, dummy2,
|
||||
knt1, int32_t *, knt2, 1, 5);
|
||||
if (OPAL_UNLIKELY(NULL == (*sub_group_meta_data))) {
|
||||
ML_VERBOSE(10, ("Cannot allocate memory for sub_group_meta_data.\n"));
|
||||
ML_VERBOSE(10, ("Cannot allocate memory for sub_group_meta_data."));
|
||||
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto exit_ERROR;
|
||||
}
|
||||
@ -1050,7 +1050,7 @@ static int get_new_subgroup_data (int32_t *all_selected, int size_of_all_selecte
|
||||
temp[knt2]=
|
||||
(int *)malloc(sizeof(int)*size_of_all_selected);
|
||||
if (OPAL_UNLIKELY(NULL == temp[knt2] ) ){
|
||||
ML_VERBOSE(10, ("Cannot allocate memory for sub_group_meta_data.\n"));
|
||||
ML_VERBOSE(10, ("Cannot allocate memory for sub_group_meta_data."));
|
||||
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto exit_ERROR;
|
||||
}
|
||||
@ -1073,7 +1073,7 @@ static int get_new_subgroup_data (int32_t *all_selected, int size_of_all_selecte
|
||||
int, (*num_ranks_in_list_of_ranks_in_all_subgroups),
|
||||
size_of_all_selected,size_of_all_selected);
|
||||
if (OPAL_UNLIKELY(NULL == (*list_of_ranks_in_all_subgroups))) {
|
||||
ML_VERBOSE(10, ("Cannot allocate memory for list_of_ranks_in_all_subgroups.\n"));
|
||||
ML_VERBOSE(10, ("Cannot allocate memory for list_of_ranks_in_all_subgroups."));
|
||||
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto exit_ERROR;
|
||||
}
|
||||
@ -1112,7 +1112,7 @@ static int topo_parse (sub_group_params_t *sub_group_meta_data, int index, int *
|
||||
int total_ranks_represented = 0, ranks_represented;
|
||||
|
||||
if (0 == sub_group_meta_data[index].level_in_hierarchy) {
|
||||
ML_VERBOSE(10, ("Copying data for index %d to %d. Ranks at this level: %d\n", index, *dst_offset,
|
||||
ML_VERBOSE(10, ("Copying data for index %d to %d. Ranks at this level: %d", index, *dst_offset,
|
||||
sub_group_meta_data[index].n_ranks));
|
||||
|
||||
/* move level one subgroup data */
|
||||
@ -1123,14 +1123,14 @@ static int topo_parse (sub_group_params_t *sub_group_meta_data, int index, int *
|
||||
*dst_offset += sub_group_meta_data[index].n_ranks;
|
||||
}
|
||||
|
||||
ML_VERBOSE(10, ("Subgroup %d has %d ranks. level = %d\n", index, sub_group_meta_data[index].n_ranks,
|
||||
ML_VERBOSE(10, ("Subgroup %d has %d ranks. level = %d", index, sub_group_meta_data[index].n_ranks,
|
||||
sub_group_meta_data[index].level_in_hierarchy));
|
||||
|
||||
/* fill in subgroup ranks */
|
||||
sub_group_meta_data[index].rank_data=(rank_properties_t *)
|
||||
malloc(sizeof(rank_properties_t) * sub_group_meta_data[index].n_ranks);
|
||||
if (OPAL_UNLIKELY(NULL == sub_group_meta_data[index].rank_data)) {
|
||||
ML_VERBOSE(10, ("Cannot allocate memory for rank_data \n"));
|
||||
ML_VERBOSE(10, ("Cannot allocate memory for rank_data "));
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
@ -1148,11 +1148,11 @@ static int topo_parse (sub_group_params_t *sub_group_meta_data, int index, int *
|
||||
sub_group_meta_data[index].rank_data[j].rank = rank;
|
||||
|
||||
if (sub_group_meta_data[index].level_in_hierarchy) {
|
||||
ML_VERBOSE(10, ("Looking for subgroup containing %d as root\n", rank));
|
||||
ML_VERBOSE(10, ("Looking for subgroup containing %d as root", rank));
|
||||
|
||||
for (next_level = index - 1 ; next_level >= 0 ; --next_level) {
|
||||
if (rank == sub_group_meta_data[next_level].root_rank_in_comm) {
|
||||
ML_VERBOSE(10, ("Subgroup %d has root %d\n", next_level, rank));
|
||||
ML_VERBOSE(10, ("Subgroup %d has root %d", next_level, rank));
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -1178,7 +1178,7 @@ static int topo_parse (sub_group_params_t *sub_group_meta_data, int index, int *
|
||||
total_ranks_represented++;
|
||||
}
|
||||
|
||||
ML_VERBOSE(10, ("Group %d, level %d, index %d, rank %d represents %d ranks\n", index,
|
||||
ML_VERBOSE(10, ("Group %d, level %d, index %d, rank %d represents %d ranks", index,
|
||||
sub_group_meta_data[index].level_in_hierarchy, j, rank,
|
||||
sub_group_meta_data[index].rank_data[j].num_of_ranks_represented));
|
||||
}
|
||||
@ -1334,7 +1334,7 @@ static int mca_coll_ml_read_allbcols_settings(mca_coll_ml_module_t *ml_module,
|
||||
*/
|
||||
bcols_in_use = (int *) malloc(sizeof(int) * 2 * n_hierarchies);
|
||||
if (OPAL_UNLIKELY(NULL == bcols_in_use)) {
|
||||
ML_VERBOSE(10, ("Cannot allocate memory for bcols_in_use.\n"));
|
||||
ML_VERBOSE(10, ("Cannot allocate memory for bcols_in_use."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto exit_ERROR;
|
||||
}
|
||||
@ -1382,7 +1382,7 @@ static int mca_coll_ml_read_allbcols_settings(mca_coll_ml_module_t *ml_module,
|
||||
MPI_MAX, ompi_comm_size(ml_module->comm),
|
||||
ranks_map, ml_module->comm);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
ML_VERBOSE(10, ("comm_allreduce_pml failed. bcols_in_use reduction\n"));
|
||||
ML_VERBOSE(10, ("comm_allreduce_pml failed. bcols_in_use reduction"));
|
||||
goto exit_ERROR;
|
||||
}
|
||||
|
||||
@ -1530,7 +1530,7 @@ static int ml_discover_hierarchy(mca_coll_ml_module_t *ml_module)
|
||||
|
||||
if ((size_bcol_list != size_sbgp_list) || size_sbgp_list < 1 || size_bcol_list < 1) {
|
||||
ML_ERROR(("Error: (size of mca_bcol_base_components_in_use = %d)"
|
||||
" != (size of mca_sbgp_base_components_in_use = %d) or zero.\n",
|
||||
" != (size of mca_sbgp_base_components_in_use = %d) or zero.",
|
||||
size_bcol_list, size_sbgp_list));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
@ -1596,7 +1596,7 @@ static int ml_discover_hierarchy(mca_coll_ml_module_t *ml_module)
|
||||
|
||||
comm_ranks = (int *)calloc(ompi_comm_size(ml_module->comm), sizeof(int));
|
||||
if (OPAL_UNLIKELY(NULL == comm_ranks)) {
|
||||
ML_VERBOSE(10, ("Cannot allocate memory.\n"));
|
||||
ML_VERBOSE(10, ("Cannot allocate memory."));
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
@ -1670,14 +1670,14 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
|
||||
/* allocates scratch space */
|
||||
all_selected = (int32_t *) calloc(ompi_comm_size(ml_module->comm), sizeof(int32_t));
|
||||
if (OPAL_UNLIKELY(NULL == all_selected)) {
|
||||
ML_VERBOSE(10, ("Cannot allocate memory.\n"));
|
||||
ML_VERBOSE(10, ("Cannot allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto exit_ERROR;
|
||||
}
|
||||
|
||||
map_to_comm_ranks = (int *) calloc(ompi_comm_size(ml_module->comm), sizeof(int));
|
||||
if (OPAL_UNLIKELY(NULL == map_to_comm_ranks)) {
|
||||
ML_VERBOSE(10, ("Cannot allocate memory.\n"));
|
||||
ML_VERBOSE(10, ("Cannot allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto exit_ERROR;
|
||||
}
|
||||
@ -1689,7 +1689,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
|
||||
copy_procs = (ompi_proc_t **) calloc(ompi_comm_size(ml_module->comm),
|
||||
sizeof(ompi_proc_t *));
|
||||
if (OPAL_UNLIKELY(NULL == copy_procs)) {
|
||||
ML_VERBOSE(10, ("Cannot allocate memory.\n"));
|
||||
ML_VERBOSE(10, ("Cannot allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto exit_ERROR;
|
||||
}
|
||||
@ -1706,7 +1706,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
|
||||
/* setup information for all-reduce over out of band */
|
||||
index_proc_selected = (int32_t *) malloc(sizeof(int32_t) * n_procs_in);
|
||||
if (OPAL_UNLIKELY(NULL == index_proc_selected)) {
|
||||
ML_VERBOSE(10, ("Cannot allocate memory.\n"));
|
||||
ML_VERBOSE(10, ("Cannot allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto exit_ERROR;
|
||||
}
|
||||
@ -1717,7 +1717,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
|
||||
|
||||
topo->component_pairs = (hierarchy_pairs *) calloc(n_hierarchies, sizeof(hierarchy_pairs));
|
||||
if (OPAL_UNLIKELY(NULL == topo->component_pairs)) {
|
||||
ML_VERBOSE(10, ("Cannot allocate memory.\n"));
|
||||
ML_VERBOSE(10, ("Cannot allocate memory."));
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto exit_ERROR;
|
||||
}
|
||||
@ -1758,7 +1758,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
|
||||
sbgp_cli = (sbgp_base_component_keyval_t *) opal_list_get_first(&mca_sbgp_base_components_in_use);
|
||||
bcol_cli = (mca_base_component_list_item_t *) opal_list_get_first(&mca_bcol_base_components_in_use);
|
||||
|
||||
ML_VERBOSE(10, ("Loop over hierarchies.\n"));
|
||||
ML_VERBOSE(10, ("Loop over hierarchies."));
|
||||
|
||||
i_hier = 0;
|
||||
while ((opal_list_item_t *) sbgp_cli != opal_list_get_end(&mca_sbgp_base_components_in_use)){
|
||||
@ -1797,7 +1797,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
|
||||
ML_VERBOSE(10,("Passed include %s exclude %s", include_sbgp_name, exclude_sbgp_name));
|
||||
|
||||
/* discover subgroup */
|
||||
ML_VERBOSE(10, ("Discover subgroup: hier level - %d.\n", i_hier));
|
||||
ML_VERBOSE(10, ("Discover subgroup: hier level - %d.", i_hier));
|
||||
module = sbgp_component->select_procs(copy_procs, n_procs_in,
|
||||
ml_module->comm,
|
||||
sbgp_cli->key_value, &ptr_output);
|
||||
@ -1832,7 +1832,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
|
||||
n_procs_selected = module->group_size;
|
||||
}
|
||||
|
||||
ML_VERBOSE(10, ("Hier level - %d; group size - %d\n", i_hier, n_procs_selected));
|
||||
ML_VERBOSE(10, ("Hier level - %d; group size - %d", i_hier, n_procs_selected));
|
||||
|
||||
/* setup array indicating all procs that were selected */
|
||||
for (i = 0; i < n_procs_in; i++) {
|
||||
@ -1874,7 +1874,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
|
||||
local_leader = map_to_comm_ranks[module->group_list[0]];
|
||||
|
||||
#endif
|
||||
ML_VERBOSE(10,("The local leader selected for hierarchy %d is rank %d \n",
|
||||
ML_VERBOSE(10,("The local leader selected for hierarchy %d is rank %d ",
|
||||
i_hier, local_leader));
|
||||
|
||||
ll_p1 = local_leader + 1;
|
||||
@ -1889,12 +1889,12 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
|
||||
}
|
||||
|
||||
/* gather the information from all the other remaining ranks */
|
||||
ML_VERBOSE(10, ("Call for comm_allreduce_pml.\n"));
|
||||
ML_VERBOSE(10, ("Call for comm_allreduce_pml."));
|
||||
ret = comm_allgather_pml(&in_allgather_value,
|
||||
all_selected, 1, MPI_INT, my_rank_in_list,
|
||||
n_procs_in, map_to_comm_ranks ,ml_module->comm);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
ML_VERBOSE(10, ("comm_allreduce_pml failed.\n"));
|
||||
ML_VERBOSE(10, ("comm_allreduce_pml failed."));
|
||||
goto exit_ERROR;
|
||||
}
|
||||
|
||||
@ -1903,7 +1903,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
|
||||
ret = check_global_view_of_subgroups(n_procs_selected,
|
||||
n_procs_in, ll_p1, all_selected, module );
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
ML_VERBOSE(10, ("check_global_view_of_subgroups failed.\n"));
|
||||
ML_VERBOSE(10, ("check_global_view_of_subgroups failed."));
|
||||
goto exit_ERROR;
|
||||
}
|
||||
}
|
||||
@ -1913,7 +1913,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
|
||||
** the communicator.
|
||||
*/
|
||||
|
||||
ML_VERBOSE(10, ("Change the list of procs; hier level - %d.\n", i_hier));
|
||||
ML_VERBOSE(10, ("Change the list of procs; hier level - %d.", i_hier));
|
||||
for (group_index = 0; group_index < n_procs_selected; group_index++) {
|
||||
module->group_list[group_index] = map_to_comm_ranks[module->group_list[group_index]];
|
||||
/* set my rank within the group */
|
||||
@ -1935,7 +1935,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
|
||||
&num_total_subgroups, map_to_comm_ranks,i_hier);
|
||||
|
||||
if( OMPI_SUCCESS != ret ) {
|
||||
ML_VERBOSE(10, (" Error: get_new_subgroup_data returned %d \n",ret));
|
||||
ML_VERBOSE(10, (" Error: get_new_subgroup_data returned %d ",ret));
|
||||
goto exit_ERROR;
|
||||
}
|
||||
|
||||
@ -1986,11 +1986,11 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
|
||||
pair->bcol_index = i_hier;
|
||||
|
||||
/* create bcol modules */
|
||||
ML_VERBOSE(10, ("Create bcol modules.\n"));
|
||||
ML_VERBOSE(10, ("Create bcol modules."));
|
||||
pair->bcol_modules = pair->bcol_component->collm_comm_query(module, &pair->num_bcol_modules);
|
||||
/* failed to create a new module */
|
||||
if (OPAL_UNLIKELY(NULL == pair->bcol_modules)) {
|
||||
ML_VERBOSE(10, ("Failed to create new modules.\n"));
|
||||
ML_VERBOSE(10, ("Failed to create new modules."));
|
||||
ret = OMPI_ERROR;
|
||||
goto exit_ERROR;
|
||||
}
|
||||
@ -2000,9 +2000,9 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
|
||||
}
|
||||
|
||||
/* Append new network contexts to our memory managment */
|
||||
ML_VERBOSE(10, ("Append new network contexts to our memory managment.\n"));
|
||||
ML_VERBOSE(10, ("Append new network contexts to our memory managment."));
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != append_new_network_context(pair))) {
|
||||
ML_VERBOSE(10, ("Exit with error. - append new network context\n"));
|
||||
ML_VERBOSE(10, ("Exit with error. - append new network context"));
|
||||
ret = OMPI_ERROR;
|
||||
goto exit_ERROR;
|
||||
}
|
||||
@ -2087,6 +2087,9 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
|
||||
if (0 == i_am_done) {
|
||||
if (NULL != include_sbgp_name || NULL != exclude_sbgp_name) {
|
||||
/* User explicitly asked for specific type of topology, which generates empty group */
|
||||
/* JMS You really should use opal_show_help() here;
|
||||
showing long error messages is *exactly* what
|
||||
opal_show_help() is for. */
|
||||
ML_ERROR(("ML topology configuration explicitly requested to %s subgroup %s. "
|
||||
"Such configuration results in a creation of empty groups. As a result, ML framework can't "
|
||||
"configure requested collective operations. ML framework will be disabled.",
|
||||
@ -2120,14 +2123,14 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
|
||||
MPI_MIN, original_group_size,
|
||||
map_to_comm_ranks, ml_module->comm);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
ML_VERBOSE(10, ("comm_allreduce_pml failed. all_reduce_buffer2_in reduction\n"));
|
||||
ML_VERBOSE(10, ("comm_allreduce_pml failed. all_reduce_buffer2_in reduction"));
|
||||
goto exit_ERROR;
|
||||
}
|
||||
|
||||
topo->global_lowest_hier_group_index = all_reduce_buffer2_out[0];
|
||||
topo->global_highest_hier_group_index = -all_reduce_buffer2_out[1];
|
||||
|
||||
ML_VERBOSE(10, ("The lowest index and highest index was successfully found.\n"));
|
||||
ML_VERBOSE(10, ("The lowest index and highest index was successfully found."));
|
||||
|
||||
ML_VERBOSE(10, ("ml_discover_hierarchy done, n_levels %d lowest_group_index %d highest_group_index %d,"
|
||||
" original_group_size %d my_lowest_group_index %d my_highest_group_index %d",
|
||||
@ -2145,7 +2148,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
|
||||
&list_of_ranks_in_all_subgroups);
|
||||
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
ML_VERBOSE(10, ("comm_allreduce_pml failed: bcols_in_use reduction %d \n",ret));
|
||||
ML_VERBOSE(10, ("comm_allreduce_pml failed: bcols_in_use reduction %d ",ret));
|
||||
goto exit_ERROR;
|
||||
}
|
||||
|
||||
@ -2158,7 +2161,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
|
||||
if (COLL_ML_STATIC_BCAST == mca_coll_ml_component.bcast_algorithm) {
|
||||
ret = mca_coll_ml_fill_in_route_tab(topo, ml_module->comm);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
ML_ERROR(("mca_coll_ml_fill_in_route_tab returned an error.\n"));
|
||||
ML_ERROR(("mca_coll_ml_fill_in_route_tab returned an error."));
|
||||
goto exit_ERROR;
|
||||
}
|
||||
}
|
||||
@ -2172,7 +2175,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
|
||||
|
||||
exit_ERROR:
|
||||
|
||||
ML_VERBOSE(10, ("Discovery done\n"));
|
||||
ML_VERBOSE(10, ("Discovery done"));
|
||||
|
||||
/* free temp resources */
|
||||
if (NULL != all_selected) {
|
||||
@ -2273,13 +2276,16 @@ int mca_coll_ml_allreduce_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
|
||||
if (1 == opal_list_get_size(&mca_bcol_base_components_in_use) ||
|
||||
(opal_list_item_t *) bcol_cli_next ==
|
||||
opal_list_get_end(&mca_bcol_base_components_in_use)) {
|
||||
ML_ERROR(("\n--------------------------------------------------------------------------------\n"
|
||||
"The BCOL component %s doesn't support \n"
|
||||
"all possible tuples (OPERATION X DATATYPE) for Allreduce \n"
|
||||
"and you didn't provide additional one for alternative topology building, \n"
|
||||
"as a result ML isn't be run correctly and its behavior is undefined. \n"
|
||||
"You should run this bcol with another one supports all possible tuples, \n"
|
||||
"\"--mca bcol_base_string %s,ptpcoll --mca sbgp_base_subgroups_string %s,p2p\" for example.\n",
|
||||
/* JMS You really should use opal_show_help() here;
|
||||
showing long error messages is *exactly* what
|
||||
opal_show_help() is for. */
|
||||
ML_ERROR(("\n--------------------------------------------------------------------------------"
|
||||
"The BCOL component %s doesn't support "
|
||||
"all possible tuples (OPERATION X DATATYPE) for Allreduce "
|
||||
"and you didn't provide additional one for alternative topology building, "
|
||||
"as a result ML isn't be run correctly and its behavior is undefined. "
|
||||
"You should run this bcol with another one supports all possible tuples, "
|
||||
"\"--mca bcol_base_string %s,ptpcoll --mca sbgp_base_subgroups_string %s,p2p\" for example.",
|
||||
bcol_component->bcol_version.mca_component_name,
|
||||
bcol_component->bcol_version.mca_component_name,
|
||||
sbgp_component->sbgp_version.mca_component_name));
|
||||
@ -2289,14 +2295,17 @@ int mca_coll_ml_allreduce_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
|
||||
|
||||
if (NULL != bcol_component_next->coll_support_all_types &&
|
||||
!bcol_component_next->coll_support_all_types(BCOL_ALLREDUCE)) {
|
||||
ML_ERROR(("\n--------------------------------------------------------------------------------\n"
|
||||
"The BCOL component %s doesn't support \n"
|
||||
"all possible tuples for Allreduce. \n"
|
||||
"While you did provid an additional %s bcol component for alternative topology building, \n"
|
||||
"this component also lacks support for all tuples. \n"
|
||||
"As a result, ML Allreduce's behavior is undefined. \n"
|
||||
"You must provide a component that supports all possible tuples, e.g. \n"
|
||||
"\"--mca bcol_base_string %s,ptpcoll --mca sbgp_base_subgroups_string %s,p2p\n",
|
||||
/* JMS You really should use opal_show_help() here;
|
||||
showing long error messages is *exactly* what
|
||||
opal_show_help() is for. */
|
||||
ML_ERROR(("\n--------------------------------------------------------------------------------"
|
||||
"The BCOL component %s doesn't support "
|
||||
"all possible tuples for Allreduce. "
|
||||
"While you did provid an additional %s bcol component for alternative topology building, "
|
||||
"this component also lacks support for all tuples. "
|
||||
"As a result, ML Allreduce's behavior is undefined. "
|
||||
"You must provide a component that supports all possible tuples, e.g. "
|
||||
"\"--mca bcol_base_string %s,ptpcoll --mca sbgp_base_subgroups_string %s,p2p",
|
||||
bcol_component->bcol_version.mca_component_name,
|
||||
bcol_component_next->bcol_version.mca_component_name,
|
||||
bcol_component->bcol_version.mca_component_name,
|
||||
@ -2337,8 +2346,8 @@ int mca_coll_ml_fulltree_iboffload_only_hierarchy_discovery(mca_coll_ml_module_t
|
||||
n_hierarchies, NULL, "ibnet");
|
||||
}
|
||||
|
||||
#define IS_RECHABLE 1
|
||||
#define IS_NOT_RECHABLE -1
|
||||
#define IS_REACHABLE 1
|
||||
#define IS_NOT_REACHABLE -1
|
||||
|
||||
static int mca_coll_ml_fill_in_route_tab(mca_coll_ml_topology_t *topo, ompi_communicator_t *comm)
|
||||
{
|
||||
@ -2355,18 +2364,18 @@ static int mca_coll_ml_fill_in_route_tab(mca_coll_ml_topology_t *topo, ompi_comm
|
||||
|
||||
all_reachable_ranks = (int32_t *) malloc(comm_size * sizeof(int32_t));
|
||||
if (NULL == all_reachable_ranks) {
|
||||
ML_VERBOSE(10, ("Cannot allocate memory.\n"));
|
||||
ML_VERBOSE(10, ("Cannot allocate memory."));
|
||||
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto exit_ERROR;
|
||||
}
|
||||
|
||||
for (i = 0; i < comm_size; ++i) {
|
||||
all_reachable_ranks[i] = IS_NOT_RECHABLE;
|
||||
all_reachable_ranks[i] = IS_NOT_REACHABLE;
|
||||
}
|
||||
|
||||
route_table = (int32_t **) calloc(topo->n_levels, sizeof(int32_t *));
|
||||
if (NULL == route_table) {
|
||||
ML_VERBOSE(10, ("Cannot allocate memory.\n"));
|
||||
ML_VERBOSE(10, ("Cannot allocate memory."));
|
||||
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto exit_ERROR;
|
||||
}
|
||||
@ -2374,25 +2383,25 @@ static int mca_coll_ml_fill_in_route_tab(mca_coll_ml_topology_t *topo, ompi_comm
|
||||
topo->route_vector = (mca_bcol_base_route_info_t *)
|
||||
calloc(comm_size, sizeof(mca_bcol_base_route_info_t));
|
||||
if (NULL == topo->route_vector) {
|
||||
ML_VERBOSE(10, ("Cannot allocate memory.\n"));
|
||||
ML_VERBOSE(10, ("Cannot allocate memory."));
|
||||
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto exit_ERROR;
|
||||
}
|
||||
|
||||
all_reachable_ranks[my_rank] = IS_RECHABLE;
|
||||
all_reachable_ranks[my_rank] = IS_REACHABLE;
|
||||
|
||||
for (level = 0; level < topo->n_levels; ++level) {
|
||||
sbgp_group = topo->component_pairs[level].subgroup_module;
|
||||
|
||||
route_table[level] = (int32_t *) malloc(comm_size * sizeof(int32_t));
|
||||
if (NULL == route_table[level]) {
|
||||
ML_VERBOSE(10, ("Cannot allocate memory.\n"));
|
||||
ML_VERBOSE(10, ("Cannot allocate memory."));
|
||||
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto exit_ERROR;
|
||||
}
|
||||
|
||||
for (i = 0; i < comm_size; ++i) {
|
||||
if (IS_NOT_RECHABLE != all_reachable_ranks[i]) {
|
||||
if (IS_NOT_REACHABLE != all_reachable_ranks[i]) {
|
||||
all_reachable_ranks[i] = sbgp_group->my_index;
|
||||
}
|
||||
}
|
||||
@ -2405,14 +2414,14 @@ static int mca_coll_ml_fill_in_route_tab(mca_coll_ml_topology_t *topo, ompi_comm
|
||||
sbgp_group->group_list,
|
||||
comm);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
ML_VERBOSE(10, ("comm_allreduce failed.\n"));
|
||||
ML_VERBOSE(10, ("comm_allreduce failed."));
|
||||
goto exit_ERROR;
|
||||
}
|
||||
|
||||
for (i = 0; i < comm_size; ++i) {
|
||||
if (IS_NOT_RECHABLE !=
|
||||
if (IS_NOT_REACHABLE !=
|
||||
route_table[level][i]) {
|
||||
all_reachable_ranks[i] = IS_RECHABLE;
|
||||
all_reachable_ranks[i] = IS_REACHABLE;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2422,7 +2431,7 @@ static int mca_coll_ml_fill_in_route_tab(mca_coll_ml_topology_t *topo, ompi_comm
|
||||
/* If there are unreachable ranks =>
|
||||
reach them through leader of my upper layer */
|
||||
for (i = 0; i < comm_size; ++i) {
|
||||
if (IS_NOT_RECHABLE ==
|
||||
if (IS_NOT_REACHABLE ==
|
||||
route_table[level - 1][i]) {
|
||||
route_table[level - 1][i] = 0;
|
||||
}
|
||||
@ -2432,7 +2441,7 @@ static int mca_coll_ml_fill_in_route_tab(mca_coll_ml_topology_t *topo, ompi_comm
|
||||
|
||||
for (i = 0; i < comm_size; ++i) {
|
||||
for (level = 0; level < topo->n_levels; ++level) {
|
||||
if (IS_NOT_RECHABLE != route_table[level][i]) {
|
||||
if (IS_NOT_REACHABLE != route_table[level][i]) {
|
||||
topo->route_vector[i].level = level;
|
||||
topo->route_vector[i].rank = route_table[level][i];
|
||||
break;
|
||||
@ -2502,7 +2511,7 @@ static int mca_coll_ml_fill_in_route_tab(mca_coll_ml_topology_t *topo, ompi_comm
|
||||
assert(COLL_ML_ROUTE_BUFF_SIZE + buff > output);
|
||||
}
|
||||
|
||||
ML_VERBOSE(10, ("\nThe table is:\n============\n%s\n", buff));
|
||||
ML_VERBOSE(10, ("\nThe table is:\n============%s", buff));
|
||||
free(buff);
|
||||
}
|
||||
#endif
|
||||
@ -2517,7 +2526,7 @@ static int mca_coll_ml_fill_in_route_tab(mca_coll_ml_topology_t *topo, ompi_comm
|
||||
|
||||
exit_ERROR:
|
||||
|
||||
ML_VERBOSE(10, ("Exit with error status - %d.\n", rc));
|
||||
ML_VERBOSE(10, ("Exit with error status - %d.", rc));
|
||||
if (NULL != route_table) {
|
||||
for (level = 0; level < topo->n_levels; ++level) {
|
||||
if (NULL != route_table[level]) {
|
||||
@ -2689,7 +2698,7 @@ static int check_for_max_supported_ml_modules(struct ompi_communicator_t *comm)
|
||||
|
||||
comm_ranks = (int *)calloc(ompi_comm_size(comm), sizeof(int));
|
||||
if (OPAL_UNLIKELY(NULL == comm_ranks)) {
|
||||
ML_VERBOSE(10, ("Cannot allocate memory.\n"));
|
||||
ML_VERBOSE(10, ("Cannot allocate memory."));
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
for (i = 0; i < ompi_comm_size(comm); i++) {
|
||||
@ -2723,7 +2732,7 @@ static int check_for_max_supported_ml_modules(struct ompi_communicator_t *comm)
|
||||
static int verbosity_level = 5; \
|
||||
static int module_num = 0; \
|
||||
ML_VERBOSE(10, ("ML module - %p num %d for comm - %p, " \
|
||||
"comm size - %d, ML component prio - %d.\n", \
|
||||
"comm size - %d, ML component prio - %d.", \
|
||||
ml_module, ++module_num, comm, ompi_comm_size(comm), *priority)); \
|
||||
/* For now I want to always print that we enter ML - \
|
||||
at the past there was an issue that we did not enter ML and actually run with tuned. \
|
||||
@ -2773,6 +2782,9 @@ static int setup_bcast_table(mca_coll_ml_module_t *module)
|
||||
if (1 == cm->enable_fragmentation || (2 == cm->enable_fragmentation && !has_zero_copy)) {
|
||||
module->bcast_fn_index_table[1] = ML_BCAST_SMALL_DATA_KNOWN;
|
||||
} else if (!has_zero_copy) {
|
||||
/* JMS You really should use opal_show_help() here;
|
||||
showing long error messages is *exactly* what
|
||||
opal_show_help() is for. */
|
||||
ML_ERROR(("ML couldn't be used: because the mca param coll_ml_enable_fragmentation "
|
||||
"was set to zero and there is a bcol doesn't support zero copy method."));
|
||||
return OMPI_ERROR;
|
||||
@ -2783,6 +2795,9 @@ static int setup_bcast_table(mca_coll_ml_module_t *module)
|
||||
module->bcast_fn_index_table[0] = ML_BCAST_SMALL_DATA_UNKNOWN;
|
||||
|
||||
if (NULL == module->coll_ml_bcast_functions[ML_BCAST_LARGE_DATA_UNKNOWN]) {
|
||||
/* JMS You really should use opal_show_help() here;
|
||||
showing long error messages is *exactly* what
|
||||
opal_show_help() is for. */
|
||||
ML_ERROR(("ML couldn't be used: because the mca param coll_ml_bcast_algorithm was not set "
|
||||
"to static and no function is available."));
|
||||
return OMPI_ERROR;
|
||||
@ -2794,6 +2809,9 @@ static int setup_bcast_table(mca_coll_ml_module_t *module)
|
||||
if (1 == cm->enable_fragmentation || (2 == cm->enable_fragmentation && !has_zero_copy)) {
|
||||
module->bcast_fn_index_table[1] = ML_BCAST_SMALL_DATA_UNKNOWN;
|
||||
} else if (!has_zero_copy) {
|
||||
/* JMS You really should use opal_show_help() here;
|
||||
showing long error messages is *exactly* what
|
||||
opal_show_help() is for. */
|
||||
ML_ERROR(("ML couldn't be used: because the mca param coll_ml_enable_fragmentation "
|
||||
"was set to zero and there is a bcol doesn't support zero copy method."));
|
||||
return OMPI_ERROR;
|
||||
@ -2915,7 +2933,7 @@ mca_coll_ml_comm_query(struct ompi_communicator_t *comm, int *priority)
|
||||
mca_coll_ml_component_t *cs = &mca_coll_ml_component;
|
||||
bool iboffload_was_requested = mca_coll_ml_check_if_bcol_is_requested("iboffload");
|
||||
|
||||
ML_VERBOSE(10, ("ML comm query start.\n"));
|
||||
ML_VERBOSE(10, ("ML comm query start."));
|
||||
|
||||
/**
|
||||
* No support for inter-communicator yet.
|
||||
@ -2936,7 +2954,7 @@ mca_coll_ml_comm_query(struct ompi_communicator_t *comm, int *priority)
|
||||
* to handle the intra collective communications.
|
||||
*/
|
||||
if (OMPI_COMM_IS_INTRA(comm) && ompi_comm_size(comm) < 2) {
|
||||
ML_VERBOSE(10, ("It is inter-communicator and size is less than 2.\n"));
|
||||
ML_VERBOSE(10, ("It is inter-communicator and size is less than 2."));
|
||||
*priority = -1;
|
||||
return NULL;
|
||||
}
|
||||
@ -2955,7 +2973,7 @@ mca_coll_ml_comm_query(struct ompi_communicator_t *comm, int *priority)
|
||||
}
|
||||
}
|
||||
|
||||
ML_VERBOSE(10, ("Create ML module start.\n"));
|
||||
ML_VERBOSE(10, ("Create ML module start."));
|
||||
|
||||
/* allocate and initialize an ml module */
|
||||
ml_module = OBJ_NEW(mca_coll_ml_module_t);
|
||||
@ -2988,14 +3006,14 @@ mca_coll_ml_comm_query(struct ompi_communicator_t *comm, int *priority)
|
||||
*/
|
||||
ret = ml_discover_hierarchy(ml_module);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
ML_ERROR(("ml_discover_hierarchy exited with error.\n"));
|
||||
ML_ERROR(("ml_discover_hierarchy exited with error."));
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
/* gvm Disabled for debuggin */
|
||||
ret = mca_coll_ml_build_filtered_fn_table(ml_module);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
ML_ERROR(("mca_coll_ml_build_filtered_fn_table returned an error.\n"));
|
||||
ML_ERROR(("mca_coll_ml_build_filtered_fn_table returned an error."));
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
@ -3006,7 +3024,7 @@ mca_coll_ml_comm_query(struct ompi_communicator_t *comm, int *priority)
|
||||
one module instantiated. We may want to use the same collective cap
|
||||
capabilities over more than one set of procs. Each module will store
|
||||
the relevant information for a given set of procs */
|
||||
ML_VERBOSE(10, ("Call for setup schedule.\n"));
|
||||
ML_VERBOSE(10, ("Call for setup schedule."));
|
||||
ret = ml_coll_schedule_setup(ml_module);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
ML_ERROR(("ml_coll_schedule_setup exit with error"));
|
||||
@ -3014,17 +3032,17 @@ mca_coll_ml_comm_query(struct ompi_communicator_t *comm, int *priority)
|
||||
}
|
||||
|
||||
/* Setup bcast table */
|
||||
ML_VERBOSE(10, ("Setup bcast table\n"));
|
||||
ML_VERBOSE(10, ("Setup bcast table"));
|
||||
ret = setup_bcast_table(ml_module);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
ML_ERROR(("setup_bcast_table exit with error"));
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
ML_VERBOSE(10, ("Setup pointer to collectives calls.\n"));
|
||||
ML_VERBOSE(10, ("Setup pointer to collectives calls."));
|
||||
init_coll_func_pointers(ml_module);
|
||||
|
||||
ML_VERBOSE(10, ("Setup free lists\n"));
|
||||
ML_VERBOSE(10, ("Setup free lists"));
|
||||
ret = init_lists(ml_module);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
goto CLEANUP;
|
||||
@ -3130,7 +3148,7 @@ ml_module_enable(mca_coll_base_module_t *module,
|
||||
snprintf(output_buffer, sizeof(output_buffer), "%s (cid %d)", comm->c_name,
|
||||
comm->c_contextid);
|
||||
|
||||
ML_VERBOSE(10, ("coll:ml:enable: new communicator: %s.\n", output_buffer));
|
||||
ML_VERBOSE(10, ("coll:ml:enable: new communicator: %s.", output_buffer));
|
||||
|
||||
/* All done */
|
||||
return OMPI_SUCCESS;
|
||||
|
@ -249,7 +249,7 @@ static int mca_coll_ml_reduce_frag_progress(mca_coll_ml_collective_operation_pro
|
||||
new_op->sequential_routine.current_bcol_status = SEQ_TASK_PENDING;
|
||||
MCA_COLL_ML_SET_NEW_FRAG_ORDER_INFO(new_op);
|
||||
|
||||
ML_VERBOSE(10,("FFFF Contig + fragmentation [0-sk, 1-lk, 3-su, 4-lu] %d %d %d\n",
|
||||
ML_VERBOSE(10,("FFFF Contig + fragmentation [0-sk, 1-lk, 3-su, 4-lu] %d %d %d",
|
||||
new_op->variable_fn_params.buffer_size,
|
||||
new_op->fragment_data.fragment_size,
|
||||
new_op->fragment_data.message_descriptor->n_bytes_scheduled));
|
||||
|
@ -84,7 +84,7 @@ int get_dev_distance_proc(opal_carto_graph_t *graph,
|
||||
|
||||
opal_paffinity_base_get_map_to_socket_core(process_id, &socket, &core);
|
||||
asprintf(&slot, "socket%d", socket);
|
||||
ML_VERBOSE(10,("The socket addres is %d\n",socket));
|
||||
ML_VERBOSE(10,("The socket addres is %d",socket));
|
||||
|
||||
slot_node = opal_carto_base_find_node(graph, slot);
|
||||
|
||||
@ -123,7 +123,7 @@ int coll_ml_select_leader(mca_coll_ml_module_t *ml_module,
|
||||
|
||||
dist = dist1 + dist2;
|
||||
|
||||
ML_VERBOSE(10,("The distance for proc %d dist1 %d, dist2 %d \n",i,dist1,dist2));
|
||||
ML_VERBOSE(10,("The distance for proc %d dist1 %d, dist2 %d",i,dist1,dist2));
|
||||
if ((dist < min_dist) || ((dist == min_dist) && (i < leader))) {
|
||||
leader = i;
|
||||
min_dist = dist;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user