1
1

io/ompio: add a new option to disable amode overwriting

ompio has historically changed the WRONLY flag provided by the applicaiton
to RDWR to allow for the data sieving optimization within the two-phase I/O
fcoll component. This change did not have a performance impact
on regular UNIX file systems, but seems to hurt performance on NFS (and maybe Lustre?)

So provide an option that allows to keep the WRONLY option, and raise an error
if tha fcoll/two-phase would actually like to use the data sieving.

Signed-off-by: Edgar Gabriel <egabriel@central.uh.edu>
Этот коммит содержится в:
Edgar Gabriel 2017-09-29 11:37:44 -05:00
родитель 97d0469719
Коммит 75ab006ec0
4 изменённых файлов: 30 добавлений и 5 удалений

Просмотреть файл

@ -112,10 +112,12 @@ int mca_common_ompio_file_open (ompi_communicator_t *comm,
/* This fix is needed for data seiving to work with
two-phase collective I/O */
if ((amode & MPI_MODE_WRONLY)){
amode -= MPI_MODE_WRONLY;
amode += MPI_MODE_RDWR;
}
if ( mca_io_ompio_overwrite_amode ) {
if ((amode & MPI_MODE_WRONLY)){
amode -= MPI_MODE_WRONLY;
amode += MPI_MODE_RDWR;
}
}
/*--------------------------------------------------*/

Просмотреть файл

@ -239,7 +239,6 @@ mca_fcoll_two_phase_file_write_all (mca_io_ompio_file_t *fh,
two_phase_num_io_procs = fh->f_size;
}
#if DEBUG_ON
printf("Number of aggregators : %ld\n", two_phase_num_io_procs);
#endif
@ -1074,6 +1073,18 @@ static int two_phase_exchage_data(mca_io_ompio_file_t *fh,
fh->f_io_array[0].length = size;
fh->f_io_array[0].memory_address = write_buf;
if (fh->f_num_of_io_entries){
if ( fh->f_amode & MPI_MODE_WRONLY &&
!mca_io_ompio_overwrite_amode ){
if ( 0 == fh->f_rank ) {
printf("\n File not opened in RDWR mode, can not continue."
"\n To resolve this problem, you can either \n"
" a. open the file with MPI_MODE_RDWR instead of MPI_MODE_WRONLY\n"
" b. ensure that the mca parameter mca_io_ompio_amode_overwrite is set to 1\n"
" c. use an fcoll component that does not use data sieving (e.g. dynamic)\n");
}
ret = MPI_ERR_FILE;
goto exit;
}
if ( 0 > fh->f_fbtl->fbtl_preadv (fh)) {
opal_output(1, "READ FAILED\n");
ret = OMPI_ERROR;

Просмотреть файл

@ -51,6 +51,7 @@ extern int mca_io_ompio_sharedfp_lazy_open;
extern int mca_io_ompio_grouping_option;
extern int mca_io_ompio_max_aggregators_ratio;
extern int mca_io_ompio_aggregators_cutoff_threshold;
extern int mca_io_ompio_overwrite_amode;
OMPI_DECLSPEC extern int mca_io_ompio_coll_timing_info;

Просмотреть файл

@ -41,6 +41,7 @@ int mca_io_ompio_coll_timing_info = 0;
int mca_io_ompio_sharedfp_lazy_open = 0;
int mca_io_ompio_max_aggregators_ratio=8;
int mca_io_ompio_aggregators_cutoff_threshold=3;
int mca_io_ompio_overwrite_amode = 1;
int mca_io_ompio_grouping_option=5;
@ -242,6 +243,16 @@ static int register_component(void)
MCA_BASE_VAR_SCOPE_READONLY,
&mca_io_ompio_aggregators_cutoff_threshold);
mca_io_ompio_overwrite_amode = 1;
(void) mca_base_component_var_register(&mca_io_ompio_component.io_version,
"overwrite_amode",
"Overwrite WRONLY amode to RDWR to enable data sieving "
"1: allow overwrite (default) "
"0: do not overwrite amode provided by application ",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_io_ompio_overwrite_amode);
return OMPI_SUCCESS;
}