1
1

Remove unnecessary \n's in ML_VERBOSE and ML_ERROR.

Also fixed spelling: IS_NOT_RECHABLE -> IS_NOT_REACHABLE.

Also mark a few places where opal_show_help() should have been used;
Manju will take care of these.

This commit was SVN r31104.
Этот коммит содержится в:
Jeff Squyres 2014-03-18 12:24:32 +00:00
родитель 0aa23cdc35
Коммит 5efd961149
17 изменённых файлов: 174 добавлений и 156 удалений

Просмотреть файл

@ -216,7 +216,7 @@ static int mca_coll_ml_allreduce_frag_progress(mca_coll_ml_collective_operation_
MCA_COLL_ML_SET_NEW_FRAG_ORDER_INFO(new_op);
ML_VERBOSE(10,("FFFF Contig + fragmentation [0-sk, 1-lk, 3-su, 4-lu] %d %d %d\n",
ML_VERBOSE(10,("FFFF Contig + fragmentation [0-sk, 1-lk, 3-su, 4-lu] %d %d %d",
new_op->variable_fn_params.buffer_size,
new_op->fragment_data.fragment_size,
new_op->fragment_data.message_descriptor->n_bytes_scheduled));

Просмотреть файл

@ -418,7 +418,7 @@ static int mca_coll_ml_bcast_frag_progress(mca_coll_ml_collective_operation_prog
/* Set order info for new frag if there is a bcol needs ordering */
MCA_COLL_ML_SET_NEW_FRAG_ORDER_INFO(new_op);
ML_VERBOSE(10, ("FFFF Contig + fragmentation [0-sk, 1-lk, 3-su, 4-lu] %d %d %d\n",
ML_VERBOSE(10, ("FFFF Contig + fragmentation [0-sk, 1-lk, 3-su, 4-lu] %d %d %d",
new_op->variable_fn_params.buffer_size ,
new_op->fragment_data.fragment_size,
new_op->fragment_data.message_descriptor->n_bytes_scheduled));
@ -475,7 +475,7 @@ static inline __opal_attribute_always_inline__
assert(pack_len <= ml_module->payload_block->size_buffer);
bcast_index = ml_module->bcast_fn_index_table[SMALL_BCAST];
ML_VERBOSE(10, ("Contig + small message %d [0-sk, 1-lk, 3-su, 4-lu]\n", bcast_index));
ML_VERBOSE(10, ("Contig + small message %d [0-sk, 1-lk, 3-su, 4-lu]", bcast_index));
ALLOCATE_AND_PACK_CONTIG_BCAST_FRAG(ml_module, coll_op, bcast_index, root, pack_len,
pack_len, actual_buf, src_buffer_desc);
@ -489,7 +489,7 @@ static inline __opal_attribute_always_inline__
int frag_len, pipeline_depth = mca_coll_ml_component.pipeline_depth;
bcast_index = ml_module->bcast_fn_index_table[LARGE_BCAST];
ML_VERBOSE(10, ("Contig + fragmentation %d [0-sk, 1-lk, 3-su, 4-lu]\n", bcast_index));
ML_VERBOSE(10, ("Contig + fragmentation %d [0-sk, 1-lk, 3-su, 4-lu]", bcast_index));
/* Calculate the number of fragments required for this message */
frag_len = (pack_len < (size_t) ml_module->small_message_thresholds[BCOL_BCAST] ?
@ -514,12 +514,12 @@ static inline __opal_attribute_always_inline__
/* should be removed */
coll_op->variable_fn_params.buffer_size = frag_len;
ML_VERBOSE(10, ("Contig + fragmentation [0-sk, 1-lk, 3-su, 4-lu] %d %d\n",
ML_VERBOSE(10, ("Contig + fragmentation [0-sk, 1-lk, 3-su, 4-lu] %d %d",
coll_op->variable_fn_params.buffer_size,
coll_op->fragment_data.fragment_size));
} else {
bcast_index = ml_module->bcast_fn_index_table[LARGE_BCAST];
ML_VERBOSE(10, ("Contig + zero copy %d [0-sk, 1-lk, 3-su, 4-lu]\n", bcast_index));
ML_VERBOSE(10, ("Contig + zero copy %d [0-sk, 1-lk, 3-su, 4-lu]", bcast_index));
coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
ml_module->coll_ml_bcast_functions[bcast_index],
@ -539,7 +539,7 @@ static inline __opal_attribute_always_inline__
} else {
/* Non contiguous data type */
bcast_index = ml_module->bcast_fn_index_table[SMALL_BCAST];
ML_VERBOSE(10, ("NON Contig + fragmentation %d [0-sk, 1-lk, 3-su, 4-lu]\n", bcast_index));
ML_VERBOSE(10, ("NON Contig + fragmentation %d [0-sk, 1-lk, 3-su, 4-lu]", bcast_index));
coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
ml_module->coll_ml_bcast_functions[bcast_index],

Просмотреть файл

@ -116,7 +116,7 @@ int mca_coll_ml_check_if_bcol_is_requested(const char *component_name)
mca_base_component_list_item_t *bcol_comp;
bcol_comp = (mca_base_component_list_item_t *) opal_list_get_first(&mca_bcol_base_components_in_use);
ML_VERBOSE(10, ("Loop over bcol components\n"));
ML_VERBOSE(10, ("Loop over bcol components"));
for ( bcol_comp = (mca_base_component_list_item_t *) opal_list_get_first(&mca_bcol_base_components_in_use);
bcol_comp != (mca_base_component_list_item_t *) opal_list_get_end(&mca_bcol_base_components_in_use);
bcol_comp = (mca_base_component_list_item_t *) opal_list_get_next(bcol_comp)) {

Просмотреть файл

@ -33,7 +33,7 @@ static int mca_coll_ml_build_memsync_schedule(
schedule = *coll_desc;
if (OPAL_UNLIKELY(NULL == schedule)) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
rc = OMPI_ERR_OUT_OF_RESOURCE;
goto Barrier_Setup_Error;
}
@ -61,7 +61,7 @@ static int mca_coll_ml_build_memsync_schedule(
calloc(n_fcns, sizeof(struct mca_coll_ml_compound_functions_t));
if (OPAL_UNLIKELY(NULL == schedule->component_functions)) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
rc = OMPI_ERR_OUT_OF_RESOURCE;
goto Barrier_Setup_Error;
}
@ -132,7 +132,7 @@ static int mca_coll_ml_build_memsync_schedule(
if (comp_fn->num_dependent_tasks > 0) {
comp_fn->dependent_task_indices = (int *) calloc(comp_fn->num_dependent_tasks, sizeof(int));
if (OPAL_UNLIKELY(NULL == comp_fn->dependent_task_indices)) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
rc = OMPI_ERR_OUT_OF_RESOURCE;
goto Barrier_Setup_Error;
}
@ -154,7 +154,7 @@ static int mca_coll_ml_build_memsync_schedule(
rc = ml_coll_barrier_constant_group_data_setup(topo_info, schedule);
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
ML_ERROR(("Failed to init const group data.\n"));
ML_ERROR(("Failed to init const group data."));
goto Barrier_Setup_Error;
}

Просмотреть файл

@ -43,7 +43,7 @@ static int mca_coll_ml_build_allgather_schedule(mca_coll_ml_topology_t *topo_inf
ret = mca_coll_ml_schedule_init_scratch(topo_info, &h_info,
&scratch_indx, &scratch_num);
if (OMPI_SUCCESS != ret) {
ML_ERROR(("Can't mca_coll_ml_schedule_init_scratch.\n"));
ML_ERROR(("Can't mca_coll_ml_schedule_init_scratch."));
goto Error;
}
assert(NULL != scratch_indx);
@ -52,7 +52,7 @@ static int mca_coll_ml_build_allgather_schedule(mca_coll_ml_topology_t *topo_inf
schedule = *coll_desc =
mca_coll_ml_schedule_alloc(&h_info);
if (NULL == schedule) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Error;
}
@ -116,7 +116,7 @@ int ml_coll_hier_allgather_setup(mca_coll_ml_module_t *ml_module)
int ret, topo_index, alg;
mca_coll_ml_topology_t *topo_info = ml_module->topo_list;
ML_VERBOSE(10,("entering allgather setup\n"));
ML_VERBOSE(10,("entering allgather setup"));
#if 0
/* used to validate the recursive k - ing allgather tree */

Просмотреть файл

@ -66,21 +66,21 @@ static int mca_coll_ml_build_allreduce_schedule(
malloc(sizeof(mca_coll_ml_collective_operation_description_t));
schedule = *coll_desc;
if (NULL == schedule) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Allreduce_Setup_Error;
}
scratch_indx = (int *) malloc(sizeof(int) * (n_hiers * 2));
if (NULL == scratch_indx) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Allreduce_Setup_Error;
}
scratch_num = (int *) malloc(sizeof(int) * (n_hiers * 2));
if (NULL == scratch_num) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Allreduce_Setup_Error;
}
@ -145,7 +145,7 @@ static int mca_coll_ml_build_allreduce_schedule(
calloc(nbcol_functions, sizeof(struct mca_coll_ml_compound_functions_t));
if (NULL == schedule->component_functions) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Allreduce_Setup_Error;
}

Просмотреть файл

@ -35,7 +35,7 @@ static int mca_coll_ml_build_barrier_schedule(
schedule = *coll_desc;
if (OPAL_UNLIKELY(NULL == schedule)) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
rc = OMPI_ERR_OUT_OF_RESOURCE;
goto Barrier_Setup_Error;
}
@ -67,7 +67,7 @@ static int mca_coll_ml_build_barrier_schedule(
calloc(n_fcns, sizeof(struct mca_coll_ml_compound_functions_t));
if (OPAL_UNLIKELY(NULL == schedule->component_functions)) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
rc = OMPI_ERR_OUT_OF_RESOURCE;
goto Barrier_Setup_Error;
}
@ -127,7 +127,7 @@ static int mca_coll_ml_build_barrier_schedule(
if (comp_fn->num_dependent_tasks > 0) {
comp_fn->dependent_task_indices = (int *) calloc(comp_fn->num_dependent_tasks, sizeof(int));
if (OPAL_UNLIKELY(NULL == comp_fn->dependent_task_indices)) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
rc = OMPI_ERR_OUT_OF_RESOURCE;
goto Barrier_Setup_Error;
}
@ -152,7 +152,7 @@ static int mca_coll_ml_build_barrier_schedule(
rc = ml_coll_barrier_constant_group_data_setup(topo_info, schedule);
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
ML_ERROR(("Failed to init const group data.\n"));
ML_ERROR(("Failed to init const group data."));
goto Barrier_Setup_Error;
}

Просмотреть файл

@ -143,21 +143,21 @@ static int mca_coll_ml_build_bcast_dynamic_schedule_no_attributes(
malloc(sizeof(mca_coll_ml_collective_operation_description_t));
schedule = *coll_desc;
if (NULL == schedule) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Bcast_Setup_Error;
}
scratch_indx = (int *) malloc(sizeof(int) * (n_hiers));
if (NULL == scratch_indx) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Bcast_Setup_Error;
}
scratch_num = (int *) malloc(sizeof(int) * (n_hiers));
if (NULL == scratch_num) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Bcast_Setup_Error;
}
@ -200,7 +200,7 @@ static int mca_coll_ml_build_bcast_dynamic_schedule_no_attributes(
schedule->component_functions = (struct mca_coll_ml_compound_functions_t *)
calloc(n_hiers, sizeof(struct mca_coll_ml_compound_functions_t));
if (NULL == schedule->component_functions) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Bcast_Setup_Error;
}
@ -313,21 +313,21 @@ static int mca_coll_ml_build_bcast_sequential_schedule_no_attributes(
malloc(sizeof(mca_coll_ml_collective_operation_description_t));
schedule = *coll_desc;
if (NULL == schedule) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Bcast_Setup_Error;
}
scratch_indx = (int *) malloc(sizeof(int) * (n_hiers));
if (NULL == scratch_indx) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Bcast_Setup_Error;
}
scratch_num = (int *) malloc(sizeof(int) * (n_hiers));
if (NULL == scratch_num) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Bcast_Setup_Error;
}
@ -373,7 +373,7 @@ static int mca_coll_ml_build_bcast_sequential_schedule_no_attributes(
schedule->component_functions = (struct mca_coll_ml_compound_functions_t *)
calloc(n_hiers, sizeof(struct mca_coll_ml_compound_functions_t));
if (NULL == schedule->component_functions) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Bcast_Setup_Error;
}
@ -381,7 +381,7 @@ static int mca_coll_ml_build_bcast_sequential_schedule_no_attributes(
schedule->comp_fn_arr = (struct mca_coll_ml_compound_functions_t **)
calloc(n_hiers,sizeof(struct mca_coll_ml_compound_functions_t *));
if (NULL == schedule->comp_fn_arr) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Bcast_Setup_Error;
}
@ -567,21 +567,21 @@ static int mca_coll_ml_build_bcast_known_schedule_no_attributes(
malloc(sizeof(mca_coll_ml_collective_operation_description_t));
schedule = *coll_desc;
if (NULL == schedule) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Bcast_Setup_Error;
}
scratch_indx = (int *) malloc(sizeof(int) * (n_hiers));
if (NULL == scratch_indx) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Bcast_Setup_Error;
}
scratch_num = (int *) malloc(sizeof(int) * (n_hiers));
if (NULL == scratch_num) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Bcast_Setup_Error;
}
@ -624,7 +624,7 @@ static int mca_coll_ml_build_bcast_known_schedule_no_attributes(
schedule->component_functions = (struct mca_coll_ml_compound_functions_t *)
calloc(n_hiers, sizeof(struct mca_coll_ml_compound_functions_t));
if (NULL == schedule->component_functions) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Bcast_Setup_Error;
}

Просмотреть файл

@ -28,14 +28,14 @@ int mca_coll_ml_schedule_init_scratch(mca_coll_ml_topology_t *topo_info,
scratch_indx = *out_scratch_indx =
(int *) calloc(n_hiers * 2, sizeof(int));
if (NULL == *out_scratch_indx) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
return OMPI_ERR_OUT_OF_RESOURCE;
}
scratch_num = *out_scratch_num =
(int *) calloc(n_hiers * 2, sizeof(int));
if (NULL == *out_scratch_num) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
free(out_scratch_indx);
return OMPI_ERR_OUT_OF_RESOURCE;
}
@ -98,7 +98,7 @@ mca_coll_ml_collective_operation_description_t *
schedule = (mca_coll_ml_collective_operation_description_t *)
malloc(sizeof(mca_coll_ml_collective_operation_description_t));
if (NULL == schedule) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
return NULL;
}
@ -109,7 +109,7 @@ mca_coll_ml_collective_operation_description_t *
schedule->component_functions = (struct mca_coll_ml_compound_functions_t *)
calloc(h_info->nbcol_functions, sizeof(struct mca_coll_ml_compound_functions_t));
if (NULL == schedule->component_functions) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
free(schedule);
return NULL;
}

Просмотреть файл

@ -32,14 +32,14 @@ do { \
ml_module->component_pairs[n_hr - 1].bcol_index) { \
/* The process that is member of highest level subgroup \
should call for top algorithms in addition to fan-in/out steps*/ \
ML_VERBOSE(9, ("Setting top %d %d\n", n_hr, ml_module->component_pairs[g_hr - 1].bcol_index)); \
ML_VERBOSE(9, ("Setting top %d %d", n_hr, ml_module->component_pairs[g_hr - 1].bcol_index)); \
info.call_for_top_function = true; \
/* hier level run only top algorithm, so we deduct 1 */ \
info.num_up_levels = n_hr - 1; \
/* Top algorithm is called only once, so we deduct 1 */ \
info.nbcol_functions = 2 * n_hr - 1; \
} else { \
ML_VERBOSE(9, ("not setting top %d %d\n", n_hr, ml_module->component_pairs[g_hr - 1].bcol_index)); \
ML_VERBOSE(9, ("not setting top %d %d", n_hr, ml_module->component_pairs[g_hr - 1].bcol_index)); \
/* The process is not member of highest level subgroup, \
as result it does not call for top algorithm, \
but it calls for all fan-in/out steps */ \

Просмотреть файл

@ -76,21 +76,21 @@ static int mca_coll_ml_build_static_reduce_schedule(
schedule = *coll_desc;
if (OPAL_UNLIKELY(NULL == schedule)) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Error;
}
scratch_indx = (int *) malloc(sizeof(int) * (n_hiers));
if (NULL == scratch_indx) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Error;
}
scratch_num = (int *) malloc(sizeof(int) * (n_hiers));
if (NULL == scratch_num) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Error;
}
@ -136,7 +136,7 @@ static int mca_coll_ml_build_static_reduce_schedule(
calloc(n_fcns, sizeof(struct mca_coll_ml_compound_functions_t));
if (OPAL_UNLIKELY(NULL == schedule->component_functions)) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Error;
}
@ -203,7 +203,7 @@ static int mca_coll_ml_build_static_reduce_schedule(
schedule->comp_fn_arr = (struct mca_coll_ml_compound_functions_t **)
calloc(n_hiers,sizeof(struct mca_coll_ml_compound_functions_t *));
if (NULL == schedule->comp_fn_arr) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Error;
}

Просмотреть файл

@ -43,7 +43,7 @@ int ml_coll_up_and_down_hier_setup(mca_coll_ml_module_t *ml_module,
collective_alg = (coll_ml_collective_description_t *)
malloc(sizeof(coll_ml_collective_description_t));
if (NULL == collective_alg) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Error;
}
@ -78,7 +78,7 @@ int ml_coll_up_and_down_hier_setup(mca_coll_ml_module_t *ml_module,
collective_alg->functions = (mca_bcol_base_function_t *)
malloc(sizeof(mca_bcol_base_function_t) * collective_alg->n_functions);
if( NULL == collective_alg->functions) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Error;
}
@ -100,14 +100,14 @@ int ml_coll_up_and_down_hier_setup(mca_coll_ml_module_t *ml_module,
* the total number of bcols in the row we store in scratch_num */
scratch_indx = (int *) malloc(sizeof(int) * (2 * num_hierarchies));
if(NULL == scratch_indx) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Error;
}
scratch_num = (int *) malloc(sizeof(int) * (2 * num_hierarchies));
if(NULL == scratch_num) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Error;
}
@ -375,14 +375,14 @@ int ml_coll_barrier_constant_group_data_setup(
* the total number of bcols in the row we store in scratch_num */
scratch_indx = (int *) malloc(sizeof(int) * (2 * num_hierarchies));
if(NULL == scratch_indx) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Const_Data_Setup_Error;
}
scratch_num = (int *) malloc(sizeof(int) * (2 * num_hierarchies));
if(NULL == scratch_num) {
ML_ERROR(("Can't allocate memory.\n"));
ML_ERROR(("Can't allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto Const_Data_Setup_Error;
}

Просмотреть файл

@ -60,7 +60,7 @@ static inline __opal_attribute_always_inline__
assert(ml_memblock->bank_is_busy);
if (ml_memblock->memsync_counter == (int)bank_index) {
while(ml_memblock->ready_for_memsync[ml_memblock->memsync_counter]) {
ML_VERBOSE(10, ("Calling for service barrier: ml_buffer_index - %d %d %d == %d.\n",
ML_VERBOSE(10, ("Calling for service barrier: ml_buffer_index - %d %d %d == %d.",
ml_request->fragment_data.buffer_desc->buffer_index,
ml_memblock->memsync_counter,
ml_memblock->bank_release_counters[ml_memblock->memsync_counter],
@ -81,7 +81,7 @@ static inline __opal_attribute_always_inline__
ML_VERBOSE(10, ("After service barrier."));
}
} else {
ML_VERBOSE(10, ("Out of order %d\n", ml_memblock->memsync_counter));
ML_VERBOSE(10, ("Out of order %d", ml_memblock->memsync_counter));
}
}

Просмотреть файл

@ -252,17 +252,17 @@ mca_bcol_base_lmngr_block_t* mca_coll_ml_lmngr_alloc (
/* Check if the list manager was initialized */
if(OPAL_UNLIKELY(NULL == lmngr->base_addr)) {
ML_VERBOSE(7 ,("Starting memory initialization\n"));
ML_VERBOSE(7 ,("Starting memory initialization"));
rc = mca_coll_ml_lmngr_init(lmngr);
if (OMPI_SUCCESS != rc) {
ML_ERROR(("Failed to init memory\n"));
ML_ERROR(("Failed to init memory"));
return NULL;
}
}
if(OPAL_UNLIKELY(opal_list_is_empty(list))) {
/* Upper layer need to handle the NULL */
ML_ERROR(("List manager is empty.\n"));
ML_ERROR(("List manager is empty."));
return NULL;
}

Просмотреть файл

@ -381,7 +381,7 @@ static void generate_active_bcols_list(mca_coll_ml_module_t *ml_module)
* function is not provided we skip this bcol, since it isn't used
* for memory synchronization (for instance - ptpcoll )*/
if (NULL == GET_BCOL_SYNC_FN(bcol_module)) {
ML_VERBOSE(10,(" No sync function was provided by bcol %s\n",
ML_VERBOSE(10,(" No sync function was provided by bcol %s",
bcol_module->bcol_component->bcol_version.mca_component_name));
continue;
}
@ -527,7 +527,7 @@ static int ml_module_memory_initialization(mca_coll_ml_module_t *ml_module)
ml_module->payload_block = mca_coll_ml_allocate_block(cs,ml_module->payload_block);
if (NULL == ml_module->payload_block) {
ML_ERROR(("mca_coll_ml_allocate_block exited with error.\n"));
ML_ERROR(("mca_coll_ml_allocate_block exited with error."));
return OMPI_ERROR;
}
@ -536,7 +536,7 @@ static int ml_module_memory_initialization(mca_coll_ml_module_t *ml_module)
nbuffers = cs->n_payload_buffs_per_bank;
buf_size = cs->payload_buffer_size;
ML_VERBOSE(10, ("Call for initialize block.\n"));
ML_VERBOSE(10, ("Call for initialize block."));
ret = mca_coll_ml_initialize_block(ml_module->payload_block,
nbuffers, nbanks, buf_size, ml_module->data_offset,
@ -545,13 +545,13 @@ static int ml_module_memory_initialization(mca_coll_ml_module_t *ml_module)
return ret;
}
ML_VERBOSE(10, ("Call for register bcols.\n"));
ML_VERBOSE(10, ("Call for register bcols."));
/* inititialize the memory with all of the bcols:
loop through the bcol modules and invoke the memory init */
ret = mca_coll_ml_register_bcols(ml_module);
if (OMPI_SUCCESS != ret) {
ML_ERROR(("mca_coll_ml_register_bcols returned an error.\n"));
ML_ERROR(("mca_coll_ml_register_bcols returned an error."));
/* goto CLEANUP; */
return ret;
}
@ -578,7 +578,7 @@ static int check_global_view_of_subgroups( int n_procs_selected,
/* more than one local leader - don't know how to
* handle this, so bail
*/
ML_VERBOSE(0, ("More than a single leader for a group.\n"));
ML_VERBOSE(0, ("More than a single leader for a group."));
ret=OMPI_ERROR;
goto exit_ERROR;
} else {
@ -599,7 +599,7 @@ static int check_global_view_of_subgroups( int n_procs_selected,
}
}
if( sum != n_procs_selected ) {
ML_VERBOSE(0, ("number of procs in the group unexpected. Expected %d Got %d\n",n_procs_selected,sum));
ML_VERBOSE(0, ("number of procs in the group unexpected. Expected %d Got %d",n_procs_selected,sum));
ret=OMPI_ERROR;
goto exit_ERROR;
}
@ -609,7 +609,7 @@ static int check_global_view_of_subgroups( int n_procs_selected,
if(ll_p1!=all_selected[module->group_list[i]] &&
ll_p1!=-all_selected[module->group_list[i]] ) {
ret=OMPI_ERROR;
ML_VERBOSE(0, ("Mismatch in rank list - element #%d - %d \n",i,all_selected[module->group_list[i]]));
ML_VERBOSE(0, ("Mismatch in rank list - element #%d - %d ",i,all_selected[module->group_list[i]]));
goto exit_ERROR;
}
}
@ -644,7 +644,7 @@ static void ml_init_k_nomial_trees(mca_coll_ml_topology_t *topo, int *list_of_ra
level_one_knt++;
}
/* fprintf(stderr,"PPP %d %d %d \n", level_one_knt, array_of_all_subgroup_ranks[0].level_in_hierarchy, num_total_subgroups); */
/* fprintf(stderr,"PPP %d %d %d ", level_one_knt, array_of_all_subgroup_ranks[0].level_in_hierarchy, num_total_subgroups); */
/* I want to cache this number for unpack*/
array_of_all_subgroup_ranks->level_one_index = level_one_knt;
@ -831,7 +831,7 @@ static int ml_setup_full_tree_data(mca_coll_ml_topology_t *topo,
ompi_comm_size(comm), map_to_comm_ranks,
comm);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
ML_VERBOSE(10, ("comm_allreduce_pml failed. root reduction\n"));
ML_VERBOSE(10, ("comm_allreduce_pml failed. root reduction"));
goto exit_ERROR;
}
@ -840,13 +840,13 @@ static int ml_setup_full_tree_data(mca_coll_ml_topology_t *topo,
MPI_INT, my_rank, ompi_comm_size(comm),
map_to_comm_ranks,comm);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
ML_VERBOSE(10, ("comm_bcast_pml failed. num_total_subgroups bcast\n"));
ML_VERBOSE(10, ("comm_bcast_pml failed. num_total_subgroups bcast"));
goto exit_ERROR;
}
scratch_space=(int *)malloc(4*sizeof(int)*(*num_total_subgroups));
if (OPAL_UNLIKELY(NULL == scratch_space)) {
ML_VERBOSE(10, ("Cannot allocate memory scratch_space.\n"));
ML_VERBOSE(10, ("Cannot allocate memory scratch_space."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto exit_ERROR;
}
@ -864,7 +864,7 @@ static int ml_setup_full_tree_data(mca_coll_ml_topology_t *topo,
MPI_INT, my_rank, ompi_comm_size(comm),
map_to_comm_ranks, comm);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
ML_VERBOSE(10, ("comm_allreduce_pml failed. scratch_space bcast\n"));
ML_VERBOSE(10, ("comm_allreduce_pml failed. scratch_space bcast"));
goto exit_ERROR;
}
if( my_rank != root ) {
@ -876,7 +876,7 @@ static int ml_setup_full_tree_data(mca_coll_ml_topology_t *topo,
(*array_of_all_subgroup_ranks)=(sub_group_params_t *)
malloc(sizeof(sub_group_params_t)*(*num_total_subgroups));
if (OPAL_UNLIKELY(NULL == (*array_of_all_subgroup_ranks))) {
ML_VERBOSE(10, ("Cannot allocate memory array_of_all_subgroup_ranks.\n"));
ML_VERBOSE(10, ("Cannot allocate memory array_of_all_subgroup_ranks."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto exit_ERROR;
}
@ -898,7 +898,7 @@ static int ml_setup_full_tree_data(mca_coll_ml_topology_t *topo,
(*list_of_ranks_in_all_subgroups)=(int *)
realloc((*list_of_ranks_in_all_subgroups),sizeof(int)*sum);
if (OPAL_UNLIKELY(NULL == (*list_of_ranks_in_all_subgroups))) {
ML_VERBOSE(10, ("Cannot allocate memory *list_of_ranks_in_all_subgroups.\n"));
ML_VERBOSE(10, ("Cannot allocate memory *list_of_ranks_in_all_subgroups."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto exit_ERROR;
}
@ -907,7 +907,7 @@ static int ml_setup_full_tree_data(mca_coll_ml_topology_t *topo,
MPI_INT, my_rank, ompi_comm_size(comm),
map_to_comm_ranks, comm);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
ML_VERBOSE(10, ("Bcast failed for list_of_ranks_in_all_subgroups \n"));
ML_VERBOSE(10, ("Bcast failed for list_of_ranks_in_all_subgroups "));
goto exit_ERROR;
}
@ -1039,7 +1039,7 @@ static int get_new_subgroup_data (int32_t *all_selected, int size_of_all_selecte
PROVIDE_SUFFICIENT_MEMORY(temp, dummy2,
knt1, int32_t *, knt2, 1, 5);
if (OPAL_UNLIKELY(NULL == (*sub_group_meta_data))) {
ML_VERBOSE(10, ("Cannot allocate memory for sub_group_meta_data.\n"));
ML_VERBOSE(10, ("Cannot allocate memory for sub_group_meta_data."));
rc = OMPI_ERR_OUT_OF_RESOURCE;
goto exit_ERROR;
}
@ -1050,7 +1050,7 @@ static int get_new_subgroup_data (int32_t *all_selected, int size_of_all_selecte
temp[knt2]=
(int *)malloc(sizeof(int)*size_of_all_selected);
if (OPAL_UNLIKELY(NULL == temp[knt2] ) ){
ML_VERBOSE(10, ("Cannot allocate memory for sub_group_meta_data.\n"));
ML_VERBOSE(10, ("Cannot allocate memory for sub_group_meta_data."));
rc = OMPI_ERR_OUT_OF_RESOURCE;
goto exit_ERROR;
}
@ -1073,7 +1073,7 @@ static int get_new_subgroup_data (int32_t *all_selected, int size_of_all_selecte
int, (*num_ranks_in_list_of_ranks_in_all_subgroups),
size_of_all_selected,size_of_all_selected);
if (OPAL_UNLIKELY(NULL == (*list_of_ranks_in_all_subgroups))) {
ML_VERBOSE(10, ("Cannot allocate memory for list_of_ranks_in_all_subgroups.\n"));
ML_VERBOSE(10, ("Cannot allocate memory for list_of_ranks_in_all_subgroups."));
rc = OMPI_ERR_OUT_OF_RESOURCE;
goto exit_ERROR;
}
@ -1112,7 +1112,7 @@ static int topo_parse (sub_group_params_t *sub_group_meta_data, int index, int *
int total_ranks_represented = 0, ranks_represented;
if (0 == sub_group_meta_data[index].level_in_hierarchy) {
ML_VERBOSE(10, ("Copying data for index %d to %d. Ranks at this level: %d\n", index, *dst_offset,
ML_VERBOSE(10, ("Copying data for index %d to %d. Ranks at this level: %d", index, *dst_offset,
sub_group_meta_data[index].n_ranks));
/* move level one subgroup data */
@ -1123,14 +1123,14 @@ static int topo_parse (sub_group_params_t *sub_group_meta_data, int index, int *
*dst_offset += sub_group_meta_data[index].n_ranks;
}
ML_VERBOSE(10, ("Subgroup %d has %d ranks. level = %d\n", index, sub_group_meta_data[index].n_ranks,
ML_VERBOSE(10, ("Subgroup %d has %d ranks. level = %d", index, sub_group_meta_data[index].n_ranks,
sub_group_meta_data[index].level_in_hierarchy));
/* fill in subgroup ranks */
sub_group_meta_data[index].rank_data=(rank_properties_t *)
malloc(sizeof(rank_properties_t) * sub_group_meta_data[index].n_ranks);
if (OPAL_UNLIKELY(NULL == sub_group_meta_data[index].rank_data)) {
ML_VERBOSE(10, ("Cannot allocate memory for rank_data \n"));
ML_VERBOSE(10, ("Cannot allocate memory for rank_data "));
return OMPI_ERR_OUT_OF_RESOURCE;
}
@ -1148,11 +1148,11 @@ static int topo_parse (sub_group_params_t *sub_group_meta_data, int index, int *
sub_group_meta_data[index].rank_data[j].rank = rank;
if (sub_group_meta_data[index].level_in_hierarchy) {
ML_VERBOSE(10, ("Looking for subgroup containing %d as root\n", rank));
ML_VERBOSE(10, ("Looking for subgroup containing %d as root", rank));
for (next_level = index - 1 ; next_level >= 0 ; --next_level) {
if (rank == sub_group_meta_data[next_level].root_rank_in_comm) {
ML_VERBOSE(10, ("Subgroup %d has root %d\n", next_level, rank));
ML_VERBOSE(10, ("Subgroup %d has root %d", next_level, rank));
break;
}
}
@ -1178,7 +1178,7 @@ static int topo_parse (sub_group_params_t *sub_group_meta_data, int index, int *
total_ranks_represented++;
}
ML_VERBOSE(10, ("Group %d, level %d, index %d, rank %d represents %d ranks\n", index,
ML_VERBOSE(10, ("Group %d, level %d, index %d, rank %d represents %d ranks", index,
sub_group_meta_data[index].level_in_hierarchy, j, rank,
sub_group_meta_data[index].rank_data[j].num_of_ranks_represented));
}
@ -1334,7 +1334,7 @@ static int mca_coll_ml_read_allbcols_settings(mca_coll_ml_module_t *ml_module,
*/
bcols_in_use = (int *) malloc(sizeof(int) * 2 * n_hierarchies);
if (OPAL_UNLIKELY(NULL == bcols_in_use)) {
ML_VERBOSE(10, ("Cannot allocate memory for bcols_in_use.\n"));
ML_VERBOSE(10, ("Cannot allocate memory for bcols_in_use."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto exit_ERROR;
}
@ -1382,7 +1382,7 @@ static int mca_coll_ml_read_allbcols_settings(mca_coll_ml_module_t *ml_module,
MPI_MAX, ompi_comm_size(ml_module->comm),
ranks_map, ml_module->comm);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
ML_VERBOSE(10, ("comm_allreduce_pml failed. bcols_in_use reduction\n"));
ML_VERBOSE(10, ("comm_allreduce_pml failed. bcols_in_use reduction"));
goto exit_ERROR;
}
@ -1530,7 +1530,7 @@ static int ml_discover_hierarchy(mca_coll_ml_module_t *ml_module)
if ((size_bcol_list != size_sbgp_list) || size_sbgp_list < 1 || size_bcol_list < 1) {
ML_ERROR(("Error: (size of mca_bcol_base_components_in_use = %d)"
" != (size of mca_sbgp_base_components_in_use = %d) or zero.\n",
" != (size of mca_sbgp_base_components_in_use = %d) or zero.",
size_bcol_list, size_sbgp_list));
return OMPI_ERROR;
}
@ -1596,7 +1596,7 @@ static int ml_discover_hierarchy(mca_coll_ml_module_t *ml_module)
comm_ranks = (int *)calloc(ompi_comm_size(ml_module->comm), sizeof(int));
if (OPAL_UNLIKELY(NULL == comm_ranks)) {
ML_VERBOSE(10, ("Cannot allocate memory.\n"));
ML_VERBOSE(10, ("Cannot allocate memory."));
return OMPI_ERR_OUT_OF_RESOURCE;
}
@ -1670,14 +1670,14 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
/* allocates scratch space */
all_selected = (int32_t *) calloc(ompi_comm_size(ml_module->comm), sizeof(int32_t));
if (OPAL_UNLIKELY(NULL == all_selected)) {
ML_VERBOSE(10, ("Cannot allocate memory.\n"));
ML_VERBOSE(10, ("Cannot allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto exit_ERROR;
}
map_to_comm_ranks = (int *) calloc(ompi_comm_size(ml_module->comm), sizeof(int));
if (OPAL_UNLIKELY(NULL == map_to_comm_ranks)) {
ML_VERBOSE(10, ("Cannot allocate memory.\n"));
ML_VERBOSE(10, ("Cannot allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto exit_ERROR;
}
@ -1689,7 +1689,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
copy_procs = (ompi_proc_t **) calloc(ompi_comm_size(ml_module->comm),
sizeof(ompi_proc_t *));
if (OPAL_UNLIKELY(NULL == copy_procs)) {
ML_VERBOSE(10, ("Cannot allocate memory.\n"));
ML_VERBOSE(10, ("Cannot allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto exit_ERROR;
}
@ -1706,7 +1706,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
/* setup information for all-reduce over out of band */
index_proc_selected = (int32_t *) malloc(sizeof(int32_t) * n_procs_in);
if (OPAL_UNLIKELY(NULL == index_proc_selected)) {
ML_VERBOSE(10, ("Cannot allocate memory.\n"));
ML_VERBOSE(10, ("Cannot allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto exit_ERROR;
}
@ -1717,7 +1717,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
topo->component_pairs = (hierarchy_pairs *) calloc(n_hierarchies, sizeof(hierarchy_pairs));
if (OPAL_UNLIKELY(NULL == topo->component_pairs)) {
ML_VERBOSE(10, ("Cannot allocate memory.\n"));
ML_VERBOSE(10, ("Cannot allocate memory."));
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto exit_ERROR;
}
@ -1758,7 +1758,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
sbgp_cli = (sbgp_base_component_keyval_t *) opal_list_get_first(&mca_sbgp_base_components_in_use);
bcol_cli = (mca_base_component_list_item_t *) opal_list_get_first(&mca_bcol_base_components_in_use);
ML_VERBOSE(10, ("Loop over hierarchies.\n"));
ML_VERBOSE(10, ("Loop over hierarchies."));
i_hier = 0;
while ((opal_list_item_t *) sbgp_cli != opal_list_get_end(&mca_sbgp_base_components_in_use)){
@ -1797,7 +1797,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
ML_VERBOSE(10,("Passed include %s exclude %s", include_sbgp_name, exclude_sbgp_name));
/* discover subgroup */
ML_VERBOSE(10, ("Discover subgroup: hier level - %d.\n", i_hier));
ML_VERBOSE(10, ("Discover subgroup: hier level - %d.", i_hier));
module = sbgp_component->select_procs(copy_procs, n_procs_in,
ml_module->comm,
sbgp_cli->key_value, &ptr_output);
@ -1832,7 +1832,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
n_procs_selected = module->group_size;
}
ML_VERBOSE(10, ("Hier level - %d; group size - %d\n", i_hier, n_procs_selected));
ML_VERBOSE(10, ("Hier level - %d; group size - %d", i_hier, n_procs_selected));
/* setup array indicating all procs that were selected */
for (i = 0; i < n_procs_in; i++) {
@ -1874,7 +1874,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
local_leader = map_to_comm_ranks[module->group_list[0]];
#endif
ML_VERBOSE(10,("The local leader selected for hierarchy %d is rank %d \n",
ML_VERBOSE(10,("The local leader selected for hierarchy %d is rank %d ",
i_hier, local_leader));
ll_p1 = local_leader + 1;
@ -1889,12 +1889,12 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
}
/* gather the information from all the other remaining ranks */
ML_VERBOSE(10, ("Call for comm_allreduce_pml.\n"));
ML_VERBOSE(10, ("Call for comm_allreduce_pml."));
ret = comm_allgather_pml(&in_allgather_value,
all_selected, 1, MPI_INT, my_rank_in_list,
n_procs_in, map_to_comm_ranks ,ml_module->comm);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
ML_VERBOSE(10, ("comm_allreduce_pml failed.\n"));
ML_VERBOSE(10, ("comm_allreduce_pml failed."));
goto exit_ERROR;
}
@ -1903,7 +1903,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
ret = check_global_view_of_subgroups(n_procs_selected,
n_procs_in, ll_p1, all_selected, module );
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
ML_VERBOSE(10, ("check_global_view_of_subgroups failed.\n"));
ML_VERBOSE(10, ("check_global_view_of_subgroups failed."));
goto exit_ERROR;
}
}
@ -1913,7 +1913,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
** the communicator.
*/
ML_VERBOSE(10, ("Change the list of procs; hier level - %d.\n", i_hier));
ML_VERBOSE(10, ("Change the list of procs; hier level - %d.", i_hier));
for (group_index = 0; group_index < n_procs_selected; group_index++) {
module->group_list[group_index] = map_to_comm_ranks[module->group_list[group_index]];
/* set my rank within the group */
@ -1935,7 +1935,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
&num_total_subgroups, map_to_comm_ranks,i_hier);
if( OMPI_SUCCESS != ret ) {
ML_VERBOSE(10, (" Error: get_new_subgroup_data returned %d \n",ret));
ML_VERBOSE(10, (" Error: get_new_subgroup_data returned %d ",ret));
goto exit_ERROR;
}
@ -1986,11 +1986,11 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
pair->bcol_index = i_hier;
/* create bcol modules */
ML_VERBOSE(10, ("Create bcol modules.\n"));
ML_VERBOSE(10, ("Create bcol modules."));
pair->bcol_modules = pair->bcol_component->collm_comm_query(module, &pair->num_bcol_modules);
/* failed to create a new module */
if (OPAL_UNLIKELY(NULL == pair->bcol_modules)) {
ML_VERBOSE(10, ("Failed to create new modules.\n"));
ML_VERBOSE(10, ("Failed to create new modules."));
ret = OMPI_ERROR;
goto exit_ERROR;
}
@ -2000,9 +2000,9 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
}
/* Append new network contexts to our memory managment */
ML_VERBOSE(10, ("Append new network contexts to our memory managment.\n"));
ML_VERBOSE(10, ("Append new network contexts to our memory managment."));
if (OPAL_UNLIKELY(OMPI_SUCCESS != append_new_network_context(pair))) {
ML_VERBOSE(10, ("Exit with error. - append new network context\n"));
ML_VERBOSE(10, ("Exit with error. - append new network context"));
ret = OMPI_ERROR;
goto exit_ERROR;
}
@ -2087,6 +2087,9 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
if (0 == i_am_done) {
if (NULL != include_sbgp_name || NULL != exclude_sbgp_name) {
/* User explicitly asked for specific type of topology, which generates empty group */
/* JMS You really should use opal_show_help() here;
showing long error messages is *exactly* what
opal_show_help() is for. */
ML_ERROR(("ML topology configuration explicitly requested to %s subgroup %s. "
"Such configuration results in a creation of empty groups. As a result, ML framework can't "
"configure requested collective operations. ML framework will be disabled.",
@ -2120,14 +2123,14 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
MPI_MIN, original_group_size,
map_to_comm_ranks, ml_module->comm);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
ML_VERBOSE(10, ("comm_allreduce_pml failed. all_reduce_buffer2_in reduction\n"));
ML_VERBOSE(10, ("comm_allreduce_pml failed. all_reduce_buffer2_in reduction"));
goto exit_ERROR;
}
topo->global_lowest_hier_group_index = all_reduce_buffer2_out[0];
topo->global_highest_hier_group_index = -all_reduce_buffer2_out[1];
ML_VERBOSE(10, ("The lowest index and highest index was successfully found.\n"));
ML_VERBOSE(10, ("The lowest index and highest index was successfully found."));
ML_VERBOSE(10, ("ml_discover_hierarchy done, n_levels %d lowest_group_index %d highest_group_index %d,"
" original_group_size %d my_lowest_group_index %d my_highest_group_index %d",
@ -2145,7 +2148,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
&list_of_ranks_in_all_subgroups);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
ML_VERBOSE(10, ("comm_allreduce_pml failed: bcols_in_use reduction %d \n",ret));
ML_VERBOSE(10, ("comm_allreduce_pml failed: bcols_in_use reduction %d ",ret));
goto exit_ERROR;
}
@ -2158,7 +2161,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
if (COLL_ML_STATIC_BCAST == mca_coll_ml_component.bcast_algorithm) {
ret = mca_coll_ml_fill_in_route_tab(topo, ml_module->comm);
if (OMPI_SUCCESS != ret) {
ML_ERROR(("mca_coll_ml_fill_in_route_tab returned an error.\n"));
ML_ERROR(("mca_coll_ml_fill_in_route_tab returned an error."));
goto exit_ERROR;
}
}
@ -2172,7 +2175,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
exit_ERROR:
ML_VERBOSE(10, ("Discovery done\n"));
ML_VERBOSE(10, ("Discovery done"));
/* free temp resources */
if (NULL != all_selected) {
@ -2273,13 +2276,16 @@ int mca_coll_ml_allreduce_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
if (1 == opal_list_get_size(&mca_bcol_base_components_in_use) ||
(opal_list_item_t *) bcol_cli_next ==
opal_list_get_end(&mca_bcol_base_components_in_use)) {
ML_ERROR(("\n--------------------------------------------------------------------------------\n"
"The BCOL component %s doesn't support \n"
"all possible tuples (OPERATION X DATATYPE) for Allreduce \n"
"and you didn't provide additional one for alternative topology building, \n"
"as a result ML isn't be run correctly and its behavior is undefined. \n"
"You should run this bcol with another one supports all possible tuples, \n"
"\"--mca bcol_base_string %s,ptpcoll --mca sbgp_base_subgroups_string %s,p2p\" for example.\n",
/* JMS You really should use opal_show_help() here;
showing long error messages is *exactly* what
opal_show_help() is for. */
ML_ERROR(("\n--------------------------------------------------------------------------------"
"The BCOL component %s doesn't support "
"all possible tuples (OPERATION X DATATYPE) for Allreduce "
"and you didn't provide additional one for alternative topology building, "
"as a result ML isn't be run correctly and its behavior is undefined. "
"You should run this bcol with another one supports all possible tuples, "
"\"--mca bcol_base_string %s,ptpcoll --mca sbgp_base_subgroups_string %s,p2p\" for example.",
bcol_component->bcol_version.mca_component_name,
bcol_component->bcol_version.mca_component_name,
sbgp_component->sbgp_version.mca_component_name));
@ -2289,14 +2295,17 @@ int mca_coll_ml_allreduce_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
if (NULL != bcol_component_next->coll_support_all_types &&
!bcol_component_next->coll_support_all_types(BCOL_ALLREDUCE)) {
ML_ERROR(("\n--------------------------------------------------------------------------------\n"
"The BCOL component %s doesn't support \n"
"all possible tuples for Allreduce. \n"
"While you did provid an additional %s bcol component for alternative topology building, \n"
"this component also lacks support for all tuples. \n"
"As a result, ML Allreduce's behavior is undefined. \n"
"You must provide a component that supports all possible tuples, e.g. \n"
"\"--mca bcol_base_string %s,ptpcoll --mca sbgp_base_subgroups_string %s,p2p\n",
/* JMS You really should use opal_show_help() here;
showing long error messages is *exactly* what
opal_show_help() is for. */
ML_ERROR(("\n--------------------------------------------------------------------------------"
"The BCOL component %s doesn't support "
"all possible tuples for Allreduce. "
"While you did provid an additional %s bcol component for alternative topology building, "
"this component also lacks support for all tuples. "
"As a result, ML Allreduce's behavior is undefined. "
"You must provide a component that supports all possible tuples, e.g. "
"\"--mca bcol_base_string %s,ptpcoll --mca sbgp_base_subgroups_string %s,p2p",
bcol_component->bcol_version.mca_component_name,
bcol_component_next->bcol_version.mca_component_name,
bcol_component->bcol_version.mca_component_name,
@ -2337,8 +2346,8 @@ int mca_coll_ml_fulltree_iboffload_only_hierarchy_discovery(mca_coll_ml_module_t
n_hierarchies, NULL, "ibnet");
}
#define IS_RECHABLE 1
#define IS_NOT_RECHABLE -1
#define IS_REACHABLE 1
#define IS_NOT_REACHABLE -1
static int mca_coll_ml_fill_in_route_tab(mca_coll_ml_topology_t *topo, ompi_communicator_t *comm)
{
@ -2355,18 +2364,18 @@ static int mca_coll_ml_fill_in_route_tab(mca_coll_ml_topology_t *topo, ompi_comm
all_reachable_ranks = (int32_t *) malloc(comm_size * sizeof(int32_t));
if (NULL == all_reachable_ranks) {
ML_VERBOSE(10, ("Cannot allocate memory.\n"));
ML_VERBOSE(10, ("Cannot allocate memory."));
rc = OMPI_ERR_OUT_OF_RESOURCE;
goto exit_ERROR;
}
for (i = 0; i < comm_size; ++i) {
all_reachable_ranks[i] = IS_NOT_RECHABLE;
all_reachable_ranks[i] = IS_NOT_REACHABLE;
}
route_table = (int32_t **) calloc(topo->n_levels, sizeof(int32_t *));
if (NULL == route_table) {
ML_VERBOSE(10, ("Cannot allocate memory.\n"));
ML_VERBOSE(10, ("Cannot allocate memory."));
rc = OMPI_ERR_OUT_OF_RESOURCE;
goto exit_ERROR;
}
@ -2374,25 +2383,25 @@ static int mca_coll_ml_fill_in_route_tab(mca_coll_ml_topology_t *topo, ompi_comm
topo->route_vector = (mca_bcol_base_route_info_t *)
calloc(comm_size, sizeof(mca_bcol_base_route_info_t));
if (NULL == topo->route_vector) {
ML_VERBOSE(10, ("Cannot allocate memory.\n"));
ML_VERBOSE(10, ("Cannot allocate memory."));
rc = OMPI_ERR_OUT_OF_RESOURCE;
goto exit_ERROR;
}
all_reachable_ranks[my_rank] = IS_RECHABLE;
all_reachable_ranks[my_rank] = IS_REACHABLE;
for (level = 0; level < topo->n_levels; ++level) {
sbgp_group = topo->component_pairs[level].subgroup_module;
route_table[level] = (int32_t *) malloc(comm_size * sizeof(int32_t));
if (NULL == route_table[level]) {
ML_VERBOSE(10, ("Cannot allocate memory.\n"));
ML_VERBOSE(10, ("Cannot allocate memory."));
rc = OMPI_ERR_OUT_OF_RESOURCE;
goto exit_ERROR;
}
for (i = 0; i < comm_size; ++i) {
if (IS_NOT_RECHABLE != all_reachable_ranks[i]) {
if (IS_NOT_REACHABLE != all_reachable_ranks[i]) {
all_reachable_ranks[i] = sbgp_group->my_index;
}
}
@ -2405,14 +2414,14 @@ static int mca_coll_ml_fill_in_route_tab(mca_coll_ml_topology_t *topo, ompi_comm
sbgp_group->group_list,
comm);
if (OMPI_SUCCESS != rc) {
ML_VERBOSE(10, ("comm_allreduce failed.\n"));
ML_VERBOSE(10, ("comm_allreduce failed."));
goto exit_ERROR;
}
for (i = 0; i < comm_size; ++i) {
if (IS_NOT_RECHABLE !=
if (IS_NOT_REACHABLE !=
route_table[level][i]) {
all_reachable_ranks[i] = IS_RECHABLE;
all_reachable_ranks[i] = IS_REACHABLE;
}
}
}
@ -2422,7 +2431,7 @@ static int mca_coll_ml_fill_in_route_tab(mca_coll_ml_topology_t *topo, ompi_comm
/* If there are unreachable ranks =>
reach them through leader of my upper layer */
for (i = 0; i < comm_size; ++i) {
if (IS_NOT_RECHABLE ==
if (IS_NOT_REACHABLE ==
route_table[level - 1][i]) {
route_table[level - 1][i] = 0;
}
@ -2432,7 +2441,7 @@ static int mca_coll_ml_fill_in_route_tab(mca_coll_ml_topology_t *topo, ompi_comm
for (i = 0; i < comm_size; ++i) {
for (level = 0; level < topo->n_levels; ++level) {
if (IS_NOT_RECHABLE != route_table[level][i]) {
if (IS_NOT_REACHABLE != route_table[level][i]) {
topo->route_vector[i].level = level;
topo->route_vector[i].rank = route_table[level][i];
break;
@ -2502,7 +2511,7 @@ static int mca_coll_ml_fill_in_route_tab(mca_coll_ml_topology_t *topo, ompi_comm
assert(COLL_ML_ROUTE_BUFF_SIZE + buff > output);
}
ML_VERBOSE(10, ("\nThe table is:\n============\n%s\n", buff));
ML_VERBOSE(10, ("\nThe table is:\n============%s", buff));
free(buff);
}
#endif
@ -2517,7 +2526,7 @@ static int mca_coll_ml_fill_in_route_tab(mca_coll_ml_topology_t *topo, ompi_comm
exit_ERROR:
ML_VERBOSE(10, ("Exit with error status - %d.\n", rc));
ML_VERBOSE(10, ("Exit with error status - %d.", rc));
if (NULL != route_table) {
for (level = 0; level < topo->n_levels; ++level) {
if (NULL != route_table[level]) {
@ -2689,7 +2698,7 @@ static int check_for_max_supported_ml_modules(struct ompi_communicator_t *comm)
comm_ranks = (int *)calloc(ompi_comm_size(comm), sizeof(int));
if (OPAL_UNLIKELY(NULL == comm_ranks)) {
ML_VERBOSE(10, ("Cannot allocate memory.\n"));
ML_VERBOSE(10, ("Cannot allocate memory."));
return OMPI_ERR_OUT_OF_RESOURCE;
}
for (i = 0; i < ompi_comm_size(comm); i++) {
@ -2723,7 +2732,7 @@ static int check_for_max_supported_ml_modules(struct ompi_communicator_t *comm)
static int verbosity_level = 5; \
static int module_num = 0; \
ML_VERBOSE(10, ("ML module - %p num %d for comm - %p, " \
"comm size - %d, ML component prio - %d.\n", \
"comm size - %d, ML component prio - %d.", \
ml_module, ++module_num, comm, ompi_comm_size(comm), *priority)); \
/* For now I want to always print that we enter ML - \
at the past there was an issue that we did not enter ML and actually run with tuned. \
@ -2773,6 +2782,9 @@ static int setup_bcast_table(mca_coll_ml_module_t *module)
if (1 == cm->enable_fragmentation || (2 == cm->enable_fragmentation && !has_zero_copy)) {
module->bcast_fn_index_table[1] = ML_BCAST_SMALL_DATA_KNOWN;
} else if (!has_zero_copy) {
/* JMS You really should use opal_show_help() here;
showing long error messages is *exactly* what
opal_show_help() is for. */
ML_ERROR(("ML couldn't be used: because the mca param coll_ml_enable_fragmentation "
"was set to zero and there is a bcol doesn't support zero copy method."));
return OMPI_ERROR;
@ -2783,6 +2795,9 @@ static int setup_bcast_table(mca_coll_ml_module_t *module)
module->bcast_fn_index_table[0] = ML_BCAST_SMALL_DATA_UNKNOWN;
if (NULL == module->coll_ml_bcast_functions[ML_BCAST_LARGE_DATA_UNKNOWN]) {
/* JMS You really should use opal_show_help() here;
showing long error messages is *exactly* what
opal_show_help() is for. */
ML_ERROR(("ML couldn't be used: because the mca param coll_ml_bcast_algorithm was not set "
"to static and no function is available."));
return OMPI_ERROR;
@ -2794,6 +2809,9 @@ static int setup_bcast_table(mca_coll_ml_module_t *module)
if (1 == cm->enable_fragmentation || (2 == cm->enable_fragmentation && !has_zero_copy)) {
module->bcast_fn_index_table[1] = ML_BCAST_SMALL_DATA_UNKNOWN;
} else if (!has_zero_copy) {
/* JMS You really should use opal_show_help() here;
showing long error messages is *exactly* what
opal_show_help() is for. */
ML_ERROR(("ML couldn't be used: because the mca param coll_ml_enable_fragmentation "
"was set to zero and there is a bcol doesn't support zero copy method."));
return OMPI_ERROR;
@ -2915,7 +2933,7 @@ mca_coll_ml_comm_query(struct ompi_communicator_t *comm, int *priority)
mca_coll_ml_component_t *cs = &mca_coll_ml_component;
bool iboffload_was_requested = mca_coll_ml_check_if_bcol_is_requested("iboffload");
ML_VERBOSE(10, ("ML comm query start.\n"));
ML_VERBOSE(10, ("ML comm query start."));
/**
* No support for inter-communicator yet.
@ -2936,7 +2954,7 @@ mca_coll_ml_comm_query(struct ompi_communicator_t *comm, int *priority)
* to handle the intra collective communications.
*/
if (OMPI_COMM_IS_INTRA(comm) && ompi_comm_size(comm) < 2) {
ML_VERBOSE(10, ("It is inter-communicator and size is less than 2.\n"));
ML_VERBOSE(10, ("It is inter-communicator and size is less than 2."));
*priority = -1;
return NULL;
}
@ -2955,7 +2973,7 @@ mca_coll_ml_comm_query(struct ompi_communicator_t *comm, int *priority)
}
}
ML_VERBOSE(10, ("Create ML module start.\n"));
ML_VERBOSE(10, ("Create ML module start."));
/* allocate and initialize an ml module */
ml_module = OBJ_NEW(mca_coll_ml_module_t);
@ -2988,14 +3006,14 @@ mca_coll_ml_comm_query(struct ompi_communicator_t *comm, int *priority)
*/
ret = ml_discover_hierarchy(ml_module);
if (OMPI_SUCCESS != ret) {
ML_ERROR(("ml_discover_hierarchy exited with error.\n"));
ML_ERROR(("ml_discover_hierarchy exited with error."));
goto CLEANUP;
}
/* gvm Disabled for debuggin */
ret = mca_coll_ml_build_filtered_fn_table(ml_module);
if (OMPI_SUCCESS != ret) {
ML_ERROR(("mca_coll_ml_build_filtered_fn_table returned an error.\n"));
ML_ERROR(("mca_coll_ml_build_filtered_fn_table returned an error."));
goto CLEANUP;
}
@ -3006,7 +3024,7 @@ mca_coll_ml_comm_query(struct ompi_communicator_t *comm, int *priority)
one module instantiated. We may want to use the same collective cap
capabilities over more than one set of procs. Each module will store
the relevant information for a given set of procs */
ML_VERBOSE(10, ("Call for setup schedule.\n"));
ML_VERBOSE(10, ("Call for setup schedule."));
ret = ml_coll_schedule_setup(ml_module);
if (OMPI_SUCCESS != ret) {
ML_ERROR(("ml_coll_schedule_setup exit with error"));
@ -3014,17 +3032,17 @@ mca_coll_ml_comm_query(struct ompi_communicator_t *comm, int *priority)
}
/* Setup bcast table */
ML_VERBOSE(10, ("Setup bcast table\n"));
ML_VERBOSE(10, ("Setup bcast table"));
ret = setup_bcast_table(ml_module);
if (OMPI_SUCCESS != ret) {
ML_ERROR(("setup_bcast_table exit with error"));
goto CLEANUP;
}
ML_VERBOSE(10, ("Setup pointer to collectives calls.\n"));
ML_VERBOSE(10, ("Setup pointer to collectives calls."));
init_coll_func_pointers(ml_module);
ML_VERBOSE(10, ("Setup free lists\n"));
ML_VERBOSE(10, ("Setup free lists"));
ret = init_lists(ml_module);
if (OMPI_SUCCESS != ret) {
goto CLEANUP;
@ -3130,7 +3148,7 @@ ml_module_enable(mca_coll_base_module_t *module,
snprintf(output_buffer, sizeof(output_buffer), "%s (cid %d)", comm->c_name,
comm->c_contextid);
ML_VERBOSE(10, ("coll:ml:enable: new communicator: %s.\n", output_buffer));
ML_VERBOSE(10, ("coll:ml:enable: new communicator: %s.", output_buffer));
/* All done */
return OMPI_SUCCESS;

Просмотреть файл

@ -249,7 +249,7 @@ static int mca_coll_ml_reduce_frag_progress(mca_coll_ml_collective_operation_pro
new_op->sequential_routine.current_bcol_status = SEQ_TASK_PENDING;
MCA_COLL_ML_SET_NEW_FRAG_ORDER_INFO(new_op);
ML_VERBOSE(10,("FFFF Contig + fragmentation [0-sk, 1-lk, 3-su, 4-lu] %d %d %d\n",
ML_VERBOSE(10,("FFFF Contig + fragmentation [0-sk, 1-lk, 3-su, 4-lu] %d %d %d",
new_op->variable_fn_params.buffer_size,
new_op->fragment_data.fragment_size,
new_op->fragment_data.message_descriptor->n_bytes_scheduled));

Просмотреть файл

@ -84,7 +84,7 @@ int get_dev_distance_proc(opal_carto_graph_t *graph,
opal_paffinity_base_get_map_to_socket_core(process_id, &socket, &core);
asprintf(&slot, "socket%d", socket);
ML_VERBOSE(10,("The socket addres is %d\n",socket));
ML_VERBOSE(10,("The socket addres is %d",socket));
slot_node = opal_carto_base_find_node(graph, slot);
@ -123,7 +123,7 @@ int coll_ml_select_leader(mca_coll_ml_module_t *ml_module,
dist = dist1 + dist2;
ML_VERBOSE(10,("The distance for proc %d dist1 %d, dist2 %d \n",i,dist1,dist2));
ML_VERBOSE(10,("The distance for proc %d dist1 %d, dist2 %d",i,dist1,dist2));
if ((dist < min_dist) || ((dist == min_dist) && (i < leader))) {
leader = i;
min_dist = dist;