Merge pull request #4230 from edgargabriel/topic/no-smart-fview
io/ompio: add a new grouping option avoiding communication
Этот коммит содержится в:
Коммит
4c0d347412
@ -30,7 +30,7 @@
|
||||
#include "ompi/mca/fcoll/base/base.h"
|
||||
#include "ompi/mca/topo/topo.h"
|
||||
|
||||
static OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (mca_io_ompio_file_t *);
|
||||
static OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (mca_io_ompio_file_t *, int flag);
|
||||
static int datatype_duplicate (ompi_datatype_t *oldtype, ompi_datatype_t **newtype );
|
||||
static int datatype_duplicate (ompi_datatype_t *oldtype, ompi_datatype_t **newtype )
|
||||
{
|
||||
@ -139,7 +139,13 @@ int mca_common_ompio_set_view (mca_io_ompio_file_t *fh,
|
||||
// in orig_file type, No need to set args on this one.
|
||||
ompi_datatype_duplicate (newfiletype, &fh->f_filetype);
|
||||
|
||||
fh->f_cc_size = get_contiguous_chunk_size (fh);
|
||||
|
||||
if( SIMPLE_PLUS == mca_io_ompio_grouping_option ) {
|
||||
fh->f_cc_size = get_contiguous_chunk_size (fh, 1);
|
||||
}
|
||||
else {
|
||||
fh->f_cc_size = get_contiguous_chunk_size (fh, 0);
|
||||
}
|
||||
|
||||
if (opal_datatype_is_contiguous_memory_layout(&etype->super,1)) {
|
||||
if (opal_datatype_is_contiguous_memory_layout(&filetype->super,1) &&
|
||||
@ -166,7 +172,7 @@ int mca_common_ompio_set_view (mca_io_ompio_file_t *fh,
|
||||
}
|
||||
}
|
||||
|
||||
if ( SIMPLE != mca_io_ompio_grouping_option ) {
|
||||
if ( SIMPLE != mca_io_ompio_grouping_option || SIMPLE_PLUS != mca_io_ompio_grouping_option ) {
|
||||
|
||||
ret = mca_io_ompio_fview_based_grouping(fh,
|
||||
&num_groups,
|
||||
@ -179,6 +185,7 @@ int mca_common_ompio_set_view (mca_io_ompio_file_t *fh,
|
||||
else {
|
||||
int done=0;
|
||||
int ndims;
|
||||
|
||||
if ( fh->f_comm->c_flags & OMPI_COMM_CART ){
|
||||
ret = fh->f_comm->c_topo->topo.cart.cartdim_get( fh->f_comm, &ndims);
|
||||
if ( OMPI_SUCCESS != ret ){
|
||||
@ -253,7 +260,7 @@ exit:
|
||||
return ret;
|
||||
}
|
||||
|
||||
OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (mca_io_ompio_file_t *fh)
|
||||
OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (mca_io_ompio_file_t *fh, int flag)
|
||||
{
|
||||
int uniform = 0;
|
||||
OMPI_MPI_OFFSET_TYPE avg[3] = {0,0,0};
|
||||
@ -268,60 +275,66 @@ OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (mca_io_ompio_file_t *fh)
|
||||
** 2. each section in the file view has exactly the same size
|
||||
*/
|
||||
|
||||
for (i=0 ; i<(int)fh->f_iov_count ; i++) {
|
||||
avg[0] += fh->f_decoded_iov[i].iov_len;
|
||||
if (i && 0 == uniform) {
|
||||
if (fh->f_decoded_iov[i].iov_len != fh->f_decoded_iov[i-1].iov_len) {
|
||||
uniform = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ( 0 != fh->f_iov_count ) {
|
||||
avg[0] = avg[0]/fh->f_iov_count;
|
||||
}
|
||||
avg[1] = (OMPI_MPI_OFFSET_TYPE) fh->f_iov_count;
|
||||
avg[2] = (OMPI_MPI_OFFSET_TYPE) uniform;
|
||||
|
||||
fh->f_comm->c_coll->coll_allreduce (avg,
|
||||
global_avg,
|
||||
3,
|
||||
OMPI_OFFSET_DATATYPE,
|
||||
MPI_SUM,
|
||||
fh->f_comm,
|
||||
fh->f_comm->c_coll->coll_allreduce_module);
|
||||
global_avg[0] = global_avg[0]/fh->f_size;
|
||||
global_avg[1] = global_avg[1]/fh->f_size;
|
||||
|
||||
#if 0
|
||||
/* Disabling the feature since we are not using it anyway. Saves us one allreduce operation. */
|
||||
int global_uniform=0;
|
||||
|
||||
if ( global_avg[0] == avg[0] &&
|
||||
global_avg[1] == avg[1] &&
|
||||
0 == avg[2] &&
|
||||
0 == global_avg[2] ) {
|
||||
uniform = 0;
|
||||
if ( flag ) {
|
||||
global_avg[0] = MCA_IO_DEFAULT_FILE_VIEW_SIZE;
|
||||
}
|
||||
else {
|
||||
uniform = 1;
|
||||
}
|
||||
|
||||
/* second confirmation round to see whether all processes agree
|
||||
** on having a uniform file view or not
|
||||
*/
|
||||
fh->f_comm->c_coll->coll_allreduce (&uniform,
|
||||
&global_uniform,
|
||||
1,
|
||||
MPI_INT,
|
||||
MPI_MAX,
|
||||
fh->f_comm,
|
||||
fh->f_comm->c_coll->coll_allreduce_module);
|
||||
|
||||
if ( 0 == global_uniform ){
|
||||
/* yes, everybody agrees on having a uniform file view */
|
||||
fh->f_flags |= OMPIO_UNIFORM_FVIEW;
|
||||
}
|
||||
for (i=0 ; i<(int)fh->f_iov_count ; i++) {
|
||||
avg[0] += fh->f_decoded_iov[i].iov_len;
|
||||
if (i && 0 == uniform) {
|
||||
if (fh->f_decoded_iov[i].iov_len != fh->f_decoded_iov[i-1].iov_len) {
|
||||
uniform = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ( 0 != fh->f_iov_count ) {
|
||||
avg[0] = avg[0]/fh->f_iov_count;
|
||||
}
|
||||
avg[1] = (OMPI_MPI_OFFSET_TYPE) fh->f_iov_count;
|
||||
avg[2] = (OMPI_MPI_OFFSET_TYPE) uniform;
|
||||
|
||||
fh->f_comm->c_coll->coll_allreduce (avg,
|
||||
global_avg,
|
||||
3,
|
||||
OMPI_OFFSET_DATATYPE,
|
||||
MPI_SUM,
|
||||
fh->f_comm,
|
||||
fh->f_comm->c_coll->coll_allreduce_module);
|
||||
global_avg[0] = global_avg[0]/fh->f_size;
|
||||
global_avg[1] = global_avg[1]/fh->f_size;
|
||||
|
||||
#if 0
|
||||
/* Disabling the feature since we are not using it anyway. Saves us one allreduce operation. */
|
||||
int global_uniform=0;
|
||||
|
||||
if ( global_avg[0] == avg[0] &&
|
||||
global_avg[1] == avg[1] &&
|
||||
0 == avg[2] &&
|
||||
0 == global_avg[2] ) {
|
||||
uniform = 0;
|
||||
}
|
||||
else {
|
||||
uniform = 1;
|
||||
}
|
||||
|
||||
/* second confirmation round to see whether all processes agree
|
||||
** on having a uniform file view or not
|
||||
*/
|
||||
fh->f_comm->c_coll->coll_allreduce (&uniform,
|
||||
&global_uniform,
|
||||
1,
|
||||
MPI_INT,
|
||||
MPI_MAX,
|
||||
fh->f_comm,
|
||||
fh->f_comm->c_coll->coll_allreduce_module);
|
||||
|
||||
if ( 0 == global_uniform ){
|
||||
/* yes, everybody agrees on having a uniform file view */
|
||||
fh->f_flags |= OMPIO_UNIFORM_FVIEW;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
return global_avg[0];
|
||||
}
|
||||
|
||||
|
@ -111,7 +111,7 @@ OMPI_DECLSPEC extern int mca_io_ompio_coll_timing_info;
|
||||
#define OPTIMIZE_GROUPING 4
|
||||
#define SIMPLE 5
|
||||
#define NO_REFINEMENT 6
|
||||
|
||||
#define SIMPLE_PLUS 7
|
||||
|
||||
#define OMPIO_UNIFORM_DIST_THRESHOLD 0.5
|
||||
#define OMPIO_CONTG_THRESHOLD 1048576
|
||||
|
@ -497,8 +497,9 @@ int mca_io_ompio_set_aggregator_props (struct mca_io_ompio_file_t *fh,
|
||||
fh->f_flags |= OMPIO_AGGREGATOR_IS_SET;
|
||||
|
||||
if (-1 == num_aggregators) {
|
||||
if ( SIMPLE == mca_io_ompio_grouping_option ||
|
||||
NO_REFINEMENT == mca_io_ompio_grouping_option ) {
|
||||
if ( SIMPLE == mca_io_ompio_grouping_option ||
|
||||
NO_REFINEMENT == mca_io_ompio_grouping_option ||
|
||||
SIMPLE_PLUS == mca_io_ompio_grouping_option ) {
|
||||
fh->f_aggregator_index = 0;
|
||||
fh->f_final_num_aggrs = fh->f_init_num_aggrs;
|
||||
fh->f_procs_per_group = fh->f_init_procs_per_group;
|
||||
|
@ -212,7 +212,7 @@ static int register_component(void)
|
||||
"Option for grouping of processes in the aggregator selection "
|
||||
"1: Data volume based grouping 2: maximizing group size uniformity 3: maximimze "
|
||||
"data contiguity 4: hybrid optimization 5: simple (default) "
|
||||
"6: skip refinement step",
|
||||
"6: skip refinement step 7: simple+: grouping based on default file view",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user