1
1
1. use communicator collectives if possible for performance reasons
 2. combined multiple allgathers into a single one
Этот коммит содержится в:
Edgar Gabriel 2016-02-19 11:04:04 -06:00
родитель e63836c653
Коммит 92d1b99468

Просмотреть файл

@ -144,7 +144,8 @@ int mca_fcoll_dynamic_gen2_file_write_all (mca_io_ompio_file_t *fh,
MPI_Aint *broken_total_lengths=NULL; MPI_Aint *broken_total_lengths=NULL;
int *aggregators=NULL; int *aggregators=NULL;
int write_chunksize; int write_chunksize, *result_counts=NULL;
#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
double write_time = 0.0, start_write_time = 0.0, end_write_time = 0.0; double write_time = 0.0, start_write_time = 0.0, end_write_time = 0.0;
@ -261,16 +262,28 @@ int mca_fcoll_dynamic_gen2_file_write_all (mca_io_ompio_file_t *fh,
#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
start_comm_time = MPI_Wtime(); start_comm_time = MPI_Wtime();
#endif #endif
ret = fh->f_allgather_array (broken_total_lengths, if ( 1 == mca_fcoll_dynamic_gen2_num_groups ) {
dynamic_gen2_num_io_procs, ret = fh->f_comm->c_coll.coll_allgather (broken_total_lengths,
MPI_LONG, dynamic_gen2_num_io_procs,
total_bytes_per_process, MPI_LONG,
dynamic_gen2_num_io_procs, total_bytes_per_process,
MPI_LONG, dynamic_gen2_num_io_procs,
0, MPI_LONG,
fh->f_procs_in_group, fh->f_comm,
fh->f_procs_per_group, fh->f_comm->c_coll.coll_allgather_module);
fh->f_comm); }
else {
ret = fh->f_allgather_array (broken_total_lengths,
dynamic_gen2_num_io_procs,
MPI_LONG,
total_bytes_per_process,
dynamic_gen2_num_io_procs,
MPI_LONG,
0,
fh->f_procs_in_group,
fh->f_procs_per_group,
fh->f_comm);
}
if( OMPI_SUCCESS != ret){ if( OMPI_SUCCESS != ret){
goto exit; goto exit;
@ -298,8 +311,46 @@ int mca_fcoll_dynamic_gen2_file_write_all (mca_io_ompio_file_t *fh,
free (total_bytes_per_process); free (total_bytes_per_process);
total_bytes_per_process = NULL; total_bytes_per_process = NULL;
} }
result_counts = (int *) malloc ( dynamic_gen2_num_io_procs * fh->f_procs_per_group * sizeof(int) );
if ( NULL == result_counts ) {
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto exit;
}
#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
start_comm_time = MPI_Wtime();
#endif
if ( 1 == mca_fcoll_dynamic_gen2_num_groups ) {
ret = fh->f_comm->c_coll.coll_allgather(broken_counts,
dynamic_gen2_num_io_procs,
MPI_INT,
result_counts,
dynamic_gen2_num_io_procs,
MPI_INT,
fh->f_comm,
fh->f_comm->c_coll.coll_allgather_module);
}
else {
ret = fh->f_allgather_array (broken_counts,
dynamic_gen2_num_io_procs,
MPI_INT,
result_counts,
dynamic_gen2_num_io_procs,
MPI_INT,
0,
fh->f_procs_in_group,
fh->f_procs_per_group,
fh->f_comm);
}
if( OMPI_SUCCESS != ret){
goto exit;
}
#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
end_comm_time = MPI_Wtime();
comm_time += (end_comm_time - start_comm_time);
#endif
/************************************************************* /*************************************************************
*** 4. Allgather the offset/lengths array from all processes *** 4. Allgather the offset/lengths array from all processes
*************************************************************/ *************************************************************/
@ -312,28 +363,9 @@ int mca_fcoll_dynamic_gen2_file_write_all (mca_io_ompio_file_t *fh,
ret = OMPI_ERR_OUT_OF_RESOURCE; ret = OMPI_ERR_OUT_OF_RESOURCE;
goto exit; goto exit;
} }
#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN for ( j=0; j <fh->f_procs_per_group; j++ ) {
start_comm_time = MPI_Wtime(); aggr_data[i]->fview_count[j] = result_counts[dynamic_gen2_num_io_procs*j+i];
#endif
ret = fh->f_allgather_array (&broken_counts[i],
1,
MPI_INT,
aggr_data[i]->fview_count,
1,
MPI_INT,
i,
fh->f_procs_in_group,
fh->f_procs_per_group,
fh->f_comm);
if( OMPI_SUCCESS != ret){
goto exit;
} }
#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
end_comm_time = MPI_Wtime();
comm_time += (end_comm_time - start_comm_time);
#endif
displs = (int*) malloc (fh->f_procs_per_group * sizeof (int)); displs = (int*) malloc (fh->f_procs_per_group * sizeof (int));
if (NULL == displs) { if (NULL == displs) {
opal_output (1, "OUT OF MEMORY\n"); opal_output (1, "OUT OF MEMORY\n");
@ -375,17 +407,30 @@ int mca_fcoll_dynamic_gen2_file_write_all (mca_io_ompio_file_t *fh,
#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN #if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
start_comm_time = MPI_Wtime(); start_comm_time = MPI_Wtime();
#endif #endif
ret = fh->f_allgatherv_array (broken_iov_arrays[i], if ( 1 == mca_fcoll_dynamic_gen2_num_groups ) {
broken_counts[i], ret = fh->f_comm->c_coll.coll_allgatherv (broken_iov_arrays[i],
fh->f_iov_type, broken_counts[i],
aggr_data[i]->global_iov_array, fh->f_iov_type,
aggr_data[i]->fview_count, aggr_data[i]->global_iov_array,
displs, aggr_data[i]->fview_count,
fh->f_iov_type, displs,
i, fh->f_iov_type,
fh->f_procs_in_group, fh->f_comm,
fh->f_procs_per_group, fh->f_comm->c_coll.coll_allgatherv_module );
fh->f_comm); }
else {
ret = fh->f_allgatherv_array (broken_iov_arrays[i],
broken_counts[i],
fh->f_iov_type,
aggr_data[i]->global_iov_array,
aggr_data[i]->fview_count,
displs,
fh->f_iov_type,
aggregators[i],
fh->f_procs_in_group,
fh->f_procs_per_group,
fh->f_comm);
}
if (OMPI_SUCCESS != ret){ if (OMPI_SUCCESS != ret){
goto exit; goto exit;
} }
@ -659,6 +704,7 @@ exit :
fh->f_procs_per_group=0; fh->f_procs_per_group=0;
free(reqs1); free(reqs1);
free(reqs2); free(reqs2);
free(result_counts);
return OMPI_SUCCESS; return OMPI_SUCCESS;