1
1

common/ompio: fix calculation in simple-grouping option

This is based on a bug reported on the mailing list using a netcdf testcase.
The problem occurs if processes are using a custom file view, but on some
of them it appears as if the default file view is being used. Because of that,
the simple-grouping option lead to different number of aggregators used on different
processes, and ultimately to a deadlock. This patch fixes the problem by not using
the file_view size anymore for the calculation in the simple-grouping option,
but the contiguous chunk size (which is identical on all processes).

Fixes issue #7109

Signed-off-by: Edgar Gabriel <egabriel@central.uh.edu>
Этот коммит содержится в:
Edgar Gabriel 2019-10-29 12:30:41 -05:00
родитель 8343a289f2
Коммит ad5d0df4e9

Просмотреть файл

@ -126,17 +126,17 @@ int mca_common_ompio_simple_grouping(ompio_file_t *fh,
} }
P_a = 1; P_a = 1;
time_prev = cost_calc ( fh->f_size, P_a, fh->f_view_size, (size_t) fh->f_bytes_per_agg, mode ); time_prev = cost_calc ( fh->f_size, P_a, fh->f_cc_size, (size_t) fh->f_bytes_per_agg, mode );
P_a_prev = P_a; P_a_prev = P_a;
for ( P_a = incr; P_a <= fh->f_size; P_a += incr ) { for ( P_a = incr; P_a <= fh->f_size; P_a += incr ) {
time = cost_calc ( fh->f_size, P_a, fh->f_view_size, (size_t) fh->f_bytes_per_agg, mode ); time = cost_calc ( fh->f_size, P_a, fh->f_cc_size, (size_t) fh->f_bytes_per_agg, mode );
dtime_abs = (time_prev - time); dtime_abs = (time_prev - time);
dtime = dtime_abs / time_prev; dtime = dtime_abs / time_prev;
dtime_diff = ( P_a == incr ) ? dtime : (dtime_prev - dtime); dtime_diff = ( P_a == incr ) ? dtime : (dtime_prev - dtime);
#ifdef OMPIO_DEBUG #ifdef OMPIO_DEBUG
if ( 0 == fh->f_rank ){ if ( 0 == fh->f_rank ){
printf(" d_p = %ld P_a = %d time = %lf dtime = %lf dtime_abs =%lf dtime_diff=%lf\n", printf(" d_p = %ld P_a = %d time = %lf dtime = %lf dtime_abs =%lf dtime_diff=%lf\n",
fh->f_view_size, P_a, time, dtime, dtime_abs, dtime_diff ); fh->f_cc_size, P_a, time, dtime, dtime_abs, dtime_diff );
} }
#endif #endif
if ( dtime_diff < dtime_threshold ) { if ( dtime_diff < dtime_threshold ) {
@ -171,7 +171,7 @@ int mca_common_ompio_simple_grouping(ompio_file_t *fh,
num_groups = P_a_prev; num_groups = P_a_prev;
#ifdef OMPIO_DEBUG #ifdef OMPIO_DEBUG
printf(" For P=%d d_p=%ld b_c=%d threshold=%f chosen P_a = %d \n", printf(" For P=%d d_p=%ld b_c=%d threshold=%f chosen P_a = %d \n",
fh->f_size, fh->f_view_size, fh->f_bytes_per_agg, dtime_threshold, P_a_prev); fh->f_size, fh->f_cc_size, fh->f_bytes_per_agg, dtime_threshold, P_a_prev);
#endif #endif
/* Cap the maximum number of aggregators.*/ /* Cap the maximum number of aggregators.*/
@ -183,6 +183,7 @@ int mca_common_ompio_simple_grouping(ompio_file_t *fh,
} }
*num_groups_out = num_groups; *num_groups_out = num_groups;
return mca_common_ompio_forced_grouping ( fh, num_groups, contg_groups); return mca_common_ompio_forced_grouping ( fh, num_groups, contg_groups);
} }
@ -576,7 +577,7 @@ int mca_common_ompio_create_groups(ompio_file_t *fh,
opal_output (1, "mca_common_ompio_create_groups: error in mca_common_ompio_prepare_to_group\n"); opal_output (1, "mca_common_ompio_create_groups: error in mca_common_ompio_prepare_to_group\n");
goto exit; goto exit;
} }
switch(ompio_grouping_flag){ switch(ompio_grouping_flag){
case OMPIO_SPLIT: case OMPIO_SPLIT: