From 76a8c67575f1f554a5482e4265bea106002ac332 Mon Sep 17 00:00:00 2001 From: Edgar Gabriel Date: Mon, 18 Sep 2017 12:30:34 -0500 Subject: [PATCH] io/ompio: add a new grouping option avoiding communication the new grouping option simple+ performs all calculations used for the aggregator selection as if the default file view would be used, thus avoiding communication in file_set_view all together. This mode is useful for applications that do not set a file view, but use explicit offset operations on the default file view. Signed-off-by: Edgar Gabriel --- .../mca/common/ompio/common_ompio_file_view.c | 123 ++++++++++-------- ompi/mca/io/ompio/io_ompio.h | 2 +- ompi/mca/io/ompio/io_ompio_aggregators.c | 5 +- ompi/mca/io/ompio/io_ompio_component.c | 2 +- 4 files changed, 73 insertions(+), 59 deletions(-) diff --git a/ompi/mca/common/ompio/common_ompio_file_view.c b/ompi/mca/common/ompio/common_ompio_file_view.c index f4718375b9..ebe3f1a12e 100644 --- a/ompi/mca/common/ompio/common_ompio_file_view.c +++ b/ompi/mca/common/ompio/common_ompio_file_view.c @@ -30,7 +30,7 @@ #include "ompi/mca/fcoll/base/base.h" #include "ompi/mca/topo/topo.h" -static OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (mca_io_ompio_file_t *); +static OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (mca_io_ompio_file_t *, int flag); static int datatype_duplicate (ompi_datatype_t *oldtype, ompi_datatype_t **newtype ); static int datatype_duplicate (ompi_datatype_t *oldtype, ompi_datatype_t **newtype ) { @@ -139,7 +139,13 @@ int mca_common_ompio_set_view (mca_io_ompio_file_t *fh, // in orig_file type, No need to set args on this one. ompi_datatype_duplicate (newfiletype, &fh->f_filetype); - fh->f_cc_size = get_contiguous_chunk_size (fh); + + if( SIMPLE_PLUS == mca_io_ompio_grouping_option ) { + fh->f_cc_size = get_contiguous_chunk_size (fh, 1); + } + else { + fh->f_cc_size = get_contiguous_chunk_size (fh, 0); + } if (opal_datatype_is_contiguous_memory_layout(&etype->super,1)) { if (opal_datatype_is_contiguous_memory_layout(&filetype->super,1) && @@ -166,7 +172,7 @@ int mca_common_ompio_set_view (mca_io_ompio_file_t *fh, } } - if ( SIMPLE != mca_io_ompio_grouping_option ) { + if ( SIMPLE != mca_io_ompio_grouping_option || SIMPLE_PLUS != mca_io_ompio_grouping_option ) { ret = mca_io_ompio_fview_based_grouping(fh, &num_groups, @@ -179,6 +185,7 @@ int mca_common_ompio_set_view (mca_io_ompio_file_t *fh, else { int done=0; int ndims; + if ( fh->f_comm->c_flags & OMPI_COMM_CART ){ ret = fh->f_comm->c_topo->topo.cart.cartdim_get( fh->f_comm, &ndims); if ( OMPI_SUCCESS != ret ){ @@ -253,7 +260,7 @@ exit: return ret; } -OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (mca_io_ompio_file_t *fh) +OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (mca_io_ompio_file_t *fh, int flag) { int uniform = 0; OMPI_MPI_OFFSET_TYPE avg[3] = {0,0,0}; @@ -268,60 +275,66 @@ OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (mca_io_ompio_file_t *fh) ** 2. each section in the file view has exactly the same size */ - for (i=0 ; i<(int)fh->f_iov_count ; i++) { - avg[0] += fh->f_decoded_iov[i].iov_len; - if (i && 0 == uniform) { - if (fh->f_decoded_iov[i].iov_len != fh->f_decoded_iov[i-1].iov_len) { - uniform = 1; - } - } - } - if ( 0 != fh->f_iov_count ) { - avg[0] = avg[0]/fh->f_iov_count; - } - avg[1] = (OMPI_MPI_OFFSET_TYPE) fh->f_iov_count; - avg[2] = (OMPI_MPI_OFFSET_TYPE) uniform; - - fh->f_comm->c_coll->coll_allreduce (avg, - global_avg, - 3, - OMPI_OFFSET_DATATYPE, - MPI_SUM, - fh->f_comm, - fh->f_comm->c_coll->coll_allreduce_module); - global_avg[0] = global_avg[0]/fh->f_size; - global_avg[1] = global_avg[1]/fh->f_size; - -#if 0 - /* Disabling the feature since we are not using it anyway. Saves us one allreduce operation. */ - int global_uniform=0; - - if ( global_avg[0] == avg[0] && - global_avg[1] == avg[1] && - 0 == avg[2] && - 0 == global_avg[2] ) { - uniform = 0; + if ( flag ) { + global_avg[0] = MCA_IO_DEFAULT_FILE_VIEW_SIZE; } else { - uniform = 1; - } - - /* second confirmation round to see whether all processes agree - ** on having a uniform file view or not - */ - fh->f_comm->c_coll->coll_allreduce (&uniform, - &global_uniform, - 1, - MPI_INT, - MPI_MAX, - fh->f_comm, - fh->f_comm->c_coll->coll_allreduce_module); - - if ( 0 == global_uniform ){ - /* yes, everybody agrees on having a uniform file view */ - fh->f_flags |= OMPIO_UNIFORM_FVIEW; - } + for (i=0 ; i<(int)fh->f_iov_count ; i++) { + avg[0] += fh->f_decoded_iov[i].iov_len; + if (i && 0 == uniform) { + if (fh->f_decoded_iov[i].iov_len != fh->f_decoded_iov[i-1].iov_len) { + uniform = 1; + } + } + } + if ( 0 != fh->f_iov_count ) { + avg[0] = avg[0]/fh->f_iov_count; + } + avg[1] = (OMPI_MPI_OFFSET_TYPE) fh->f_iov_count; + avg[2] = (OMPI_MPI_OFFSET_TYPE) uniform; + + fh->f_comm->c_coll->coll_allreduce (avg, + global_avg, + 3, + OMPI_OFFSET_DATATYPE, + MPI_SUM, + fh->f_comm, + fh->f_comm->c_coll->coll_allreduce_module); + global_avg[0] = global_avg[0]/fh->f_size; + global_avg[1] = global_avg[1]/fh->f_size; + +#if 0 + /* Disabling the feature since we are not using it anyway. Saves us one allreduce operation. */ + int global_uniform=0; + + if ( global_avg[0] == avg[0] && + global_avg[1] == avg[1] && + 0 == avg[2] && + 0 == global_avg[2] ) { + uniform = 0; + } + else { + uniform = 1; + } + + /* second confirmation round to see whether all processes agree + ** on having a uniform file view or not + */ + fh->f_comm->c_coll->coll_allreduce (&uniform, + &global_uniform, + 1, + MPI_INT, + MPI_MAX, + fh->f_comm, + fh->f_comm->c_coll->coll_allreduce_module); + + if ( 0 == global_uniform ){ + /* yes, everybody agrees on having a uniform file view */ + fh->f_flags |= OMPIO_UNIFORM_FVIEW; + } #endif + } + return global_avg[0]; } diff --git a/ompi/mca/io/ompio/io_ompio.h b/ompi/mca/io/ompio/io_ompio.h index 0268a02244..ee310c9802 100644 --- a/ompi/mca/io/ompio/io_ompio.h +++ b/ompi/mca/io/ompio/io_ompio.h @@ -111,7 +111,7 @@ OMPI_DECLSPEC extern int mca_io_ompio_coll_timing_info; #define OPTIMIZE_GROUPING 4 #define SIMPLE 5 #define NO_REFINEMENT 6 - +#define SIMPLE_PLUS 7 #define OMPIO_UNIFORM_DIST_THRESHOLD 0.5 #define OMPIO_CONTG_THRESHOLD 1048576 diff --git a/ompi/mca/io/ompio/io_ompio_aggregators.c b/ompi/mca/io/ompio/io_ompio_aggregators.c index 17f9d17ac5..818b10c947 100644 --- a/ompi/mca/io/ompio/io_ompio_aggregators.c +++ b/ompi/mca/io/ompio/io_ompio_aggregators.c @@ -497,8 +497,9 @@ int mca_io_ompio_set_aggregator_props (struct mca_io_ompio_file_t *fh, fh->f_flags |= OMPIO_AGGREGATOR_IS_SET; if (-1 == num_aggregators) { - if ( SIMPLE == mca_io_ompio_grouping_option || - NO_REFINEMENT == mca_io_ompio_grouping_option ) { + if ( SIMPLE == mca_io_ompio_grouping_option || + NO_REFINEMENT == mca_io_ompio_grouping_option || + SIMPLE_PLUS == mca_io_ompio_grouping_option ) { fh->f_aggregator_index = 0; fh->f_final_num_aggrs = fh->f_init_num_aggrs; fh->f_procs_per_group = fh->f_init_procs_per_group; diff --git a/ompi/mca/io/ompio/io_ompio_component.c b/ompi/mca/io/ompio/io_ompio_component.c index 5a93a5f354..45a178c645 100644 --- a/ompi/mca/io/ompio/io_ompio_component.c +++ b/ompi/mca/io/ompio/io_ompio_component.c @@ -212,7 +212,7 @@ static int register_component(void) "Option for grouping of processes in the aggregator selection " "1: Data volume based grouping 2: maximizing group size uniformity 3: maximimze " "data contiguity 4: hybrid optimization 5: simple (default) " - "6: skip refinement step", + "6: skip refinement step 7: simple+: grouping based on default file view", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,