From f45e9cfdbe7dbb520874f18b5d301603926cdd5e Mon Sep 17 00:00:00 2001 From: raafatfeki Date: Thu, 1 Aug 2019 15:18:58 -0500 Subject: [PATCH] fcoll/vulcan: Adjustment of displacement index in collective write Within the shuffle iteration, the aggregators have to set a displacement array needed to receive data from other processes. The array had 1 extra element. We adjust the displacement index to match the number of elements. Signed-off-by: raafatfeki --- .../vulcan/fcoll_vulcan_file_write_all.c | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/ompi/mca/fcoll/vulcan/fcoll_vulcan_file_write_all.c b/ompi/mca/fcoll/vulcan/fcoll_vulcan_file_write_all.c index 34548c63c1..abf281a0a3 100644 --- a/ompi/mca/fcoll/vulcan/fcoll_vulcan_file_write_all.c +++ b/ompi/mca/fcoll/vulcan/fcoll_vulcan_file_write_all.c @@ -836,7 +836,7 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i for(l=0;lprocs_per_group;l++){ - data->disp_index[l] = 1; + data->disp_index[l] = 0; if ( data->max_disp_index[l] == 0 ) { data->blocklen_per_process[l] = (int *) calloc (INIT_LEN, sizeof(int)); @@ -915,8 +915,8 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i if (data->bytes_remaining <= data->bytes_to_write_in_cycle) { /* The data fits completely into the block */ if (aggregator == rank) { - data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = data->bytes_remaining; - data->displs_per_process[data->n][data->disp_index[data->n] - 1] = + data->blocklen_per_process[data->n][data->disp_index[data->n]] = data->bytes_remaining; + data->displs_per_process[data->n][data->disp_index[data->n]] = (ptrdiff_t)data->global_iov_array[data->sorted[data->current_index]].iov_base + (data->global_iov_array[data->sorted[data->current_index]].iov_len - data->bytes_remaining); @@ -950,11 +950,12 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i /* the remaining data from the previous cycle is larger than the data->bytes_to_write_in_cycle, so we have to segment again */ if (aggregator == rank) { - data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = data->bytes_to_write_in_cycle; - data->displs_per_process[data->n][data->disp_index[data->n] - 1] = + data->blocklen_per_process[data->n][data->disp_index[data->n]] = data->bytes_to_write_in_cycle; + data->displs_per_process[data->n][data->disp_index[data->n]] = (ptrdiff_t)data->global_iov_array[data->sorted[data->current_index]].iov_base + (data->global_iov_array[data->sorted[data->current_index]].iov_len - data->bytes_remaining); + data->disp_index[data->n] += 1; } if (data->procs_in_group[data->n] == rank) { @@ -971,9 +972,10 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i (MPI_Aint) data->global_iov_array[data->sorted[data->current_index]].iov_len) { /* This entry has more data than we can sendin one cycle */ if (aggregator == rank) { - data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = data->bytes_to_write_in_cycle; - data->displs_per_process[data->n][data->disp_index[data->n] - 1] = + data->blocklen_per_process[data->n][data->disp_index[data->n]] = data->bytes_to_write_in_cycle; + data->displs_per_process[data->n][data->disp_index[data->n]] = (ptrdiff_t)data->global_iov_array[data->sorted[data->current_index]].iov_base ; + data->disp_index[data->n] += 1; } if (data->procs_in_group[data->n] == rank) { bytes_sent += data->bytes_to_write_in_cycle; @@ -987,9 +989,9 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i else { /* Next data entry is less than data->bytes_to_write_in_cycle */ if (aggregator == rank) { - data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = + data->blocklen_per_process[data->n][data->disp_index[data->n]] = data->global_iov_array[data->sorted[data->current_index]].iov_len; - data->displs_per_process[data->n][data->disp_index[data->n] - 1] = (ptrdiff_t) + data->displs_per_process[data->n][data->disp_index[data->n]] = (ptrdiff_t) data->global_iov_array[data->sorted[data->current_index]].iov_base; data->disp_index[data->n] += 1;