Merge pull request #6857 from raafatfeki/pr/ompio_coll_write_clean

Pr/ompio_fcoll_write_clean
2019-09-04 11:06:56 -05:00 · 2019-09-04 11:06:56 -05:00 · 7877743784
--- a/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_write_all.c
+++ b/ompi/mca/fcoll/dynamic_gen2/fcoll_dynamic_gen2_file_write_all.c
@ -801,7 +801,7 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
        }
        
        for(l=0;l<data->procs_per_group;l++){
-            data->disp_index[l] =  1;
+            data->disp_index[l] = 0;

            if(data->max_disp_index[l] == 0) {
                data->blocklen_per_process[l]   = (int *)       calloc (INIT_LEN, sizeof(int));
@ -880,8 +880,8 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
            if (data->bytes_remaining <= data->bytes_to_write_in_cycle) {
                /* The data fits completely into the block */
                if (aggregator == rank) {
-                    data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = data->bytes_remaining;
-                    data->displs_per_process[data->n][data->disp_index[data->n] - 1] =
+                    data->blocklen_per_process[data->n][data->disp_index[data->n]] = data->bytes_remaining;
+                    data->displs_per_process[data->n][data->disp_index[data->n]] =
                        (ptrdiff_t)data->global_iov_array[data->sorted[data->current_index]].iov_base +
                        (data->global_iov_array[data->sorted[data->current_index]].iov_len
                         - data->bytes_remaining);
@ -914,11 +914,12 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
                /* the remaining data from the previous cycle is larger than the
                   data->bytes_to_write_in_cycle, so we have to segment again */
                if (aggregator == rank) {
-                    data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = data->bytes_to_write_in_cycle;
-                    data->displs_per_process[data->n][data->disp_index[data->n] - 1] =
+                    data->blocklen_per_process[data->n][data->disp_index[data->n]] = data->bytes_to_write_in_cycle;
+                    data->displs_per_process[data->n][data->disp_index[data->n]] =
                        (ptrdiff_t)data->global_iov_array[data->sorted[data->current_index]].iov_base +
                        (data->global_iov_array[data->sorted[data->current_index]].iov_len
                         - data->bytes_remaining);
+                    data->disp_index[data->n] += 1;
                }
                
                if (data->procs_in_group[data->n] == rank) {
@ -935,9 +936,10 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
                (MPI_Aint) data->global_iov_array[data->sorted[data->current_index]].iov_len) {
                /* This entry has more data than we can sendin one cycle */
                if (aggregator == rank) {
-                    data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = data->bytes_to_write_in_cycle;
-                    data->displs_per_process[data->n][data->disp_index[data->n] - 1] =
+                    data->blocklen_per_process[data->n][data->disp_index[data->n]] = data->bytes_to_write_in_cycle;
+                    data->displs_per_process[data->n][data->disp_index[data->n]] =
                        (ptrdiff_t)data->global_iov_array[data->sorted[data->current_index]].iov_base ;
+                    data->disp_index[data->n] += 1;
                }
                if (data->procs_in_group[data->n] == rank) {
                    bytes_sent += data->bytes_to_write_in_cycle;
@ -951,9 +953,9 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
            else {
                /* Next data entry is less than data->bytes_to_write_in_cycle */
                if (aggregator == rank) {
-                    data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] =
+                    data->blocklen_per_process[data->n][data->disp_index[data->n]] =
                        data->global_iov_array[data->sorted[data->current_index]].iov_len;
-                    data->displs_per_process[data->n][data->disp_index[data->n] - 1] = (ptrdiff_t)
+                    data->displs_per_process[data->n][data->disp_index[data->n]] = (ptrdiff_t)
                        data->global_iov_array[data->sorted[data->current_index]].iov_base;
                    
                    data->disp_index[data->n] += 1;
--- a/ompi/mca/fcoll/vulcan/fcoll_vulcan_file_write_all.c
+++ b/ompi/mca/fcoll/vulcan/fcoll_vulcan_file_write_all.c
@ -836,7 +836,7 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i

        
        for(l=0;l<data->procs_per_group;l++){
-            data->disp_index[l] =  1;
+            data->disp_index[l] = 0;
            
            if ( data->max_disp_index[l] == 0 ) {
                data->blocklen_per_process[l] = (int *) calloc (INIT_LEN, sizeof(int));
@ -915,8 +915,8 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
            if (data->bytes_remaining <= data->bytes_to_write_in_cycle) {
                /* The data fits completely into the block */
                if (aggregator == rank) {
-                    data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = data->bytes_remaining;
-                    data->displs_per_process[data->n][data->disp_index[data->n] - 1] =
+                    data->blocklen_per_process[data->n][data->disp_index[data->n]] = data->bytes_remaining;
+                    data->displs_per_process[data->n][data->disp_index[data->n]] =
                        (ptrdiff_t)data->global_iov_array[data->sorted[data->current_index]].iov_base +
                        (data->global_iov_array[data->sorted[data->current_index]].iov_len
                         - data->bytes_remaining);
@ -950,11 +950,12 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
                /* the remaining data from the previous cycle is larger than the
                   data->bytes_to_write_in_cycle, so we have to segment again */
                if (aggregator == rank) {
-                    data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = data->bytes_to_write_in_cycle;
-                    data->displs_per_process[data->n][data->disp_index[data->n] - 1] =
+                    data->blocklen_per_process[data->n][data->disp_index[data->n]] = data->bytes_to_write_in_cycle;
+                    data->displs_per_process[data->n][data->disp_index[data->n]] =
                        (ptrdiff_t)data->global_iov_array[data->sorted[data->current_index]].iov_base +
                        (data->global_iov_array[data->sorted[data->current_index]].iov_len
                         - data->bytes_remaining);
+                    data->disp_index[data->n] += 1;
                }
                
                if (data->procs_in_group[data->n] == rank) {
@ -971,9 +972,10 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
                (MPI_Aint) data->global_iov_array[data->sorted[data->current_index]].iov_len) {
                /* This entry has more data than we can sendin one cycle */
                if (aggregator == rank) {
-                    data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] = data->bytes_to_write_in_cycle;
-                    data->displs_per_process[data->n][data->disp_index[data->n] - 1] =
+                    data->blocklen_per_process[data->n][data->disp_index[data->n]] = data->bytes_to_write_in_cycle;
+                    data->displs_per_process[data->n][data->disp_index[data->n]] =
                        (ptrdiff_t)data->global_iov_array[data->sorted[data->current_index]].iov_base ;
+                    data->disp_index[data->n] += 1;
                }
                if (data->procs_in_group[data->n] == rank) {
                    bytes_sent += data->bytes_to_write_in_cycle;
@ -987,9 +989,9 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
            else {
                /* Next data entry is less than data->bytes_to_write_in_cycle */
                if (aggregator == rank) {
-                    data->blocklen_per_process[data->n][data->disp_index[data->n] - 1] =
+                    data->blocklen_per_process[data->n][data->disp_index[data->n]] =
                        data->global_iov_array[data->sorted[data->current_index]].iov_len;
-                    data->displs_per_process[data->n][data->disp_index[data->n] - 1] = (ptrdiff_t)
+                    data->displs_per_process[data->n][data->disp_index[data->n]] = (ptrdiff_t)
                        data->global_iov_array[data->sorted[data->current_index]].iov_base;

                    data->disp_index[data->n] += 1;