From 3529d447020684ab305411caa97423826bb40906 Mon Sep 17 00:00:00 2001 From: "Nysal Jan K.A" Date: Wed, 24 Jul 2019 13:04:41 +0530 Subject: [PATCH] osc/ucx: Fix data corruption with non-contiguous accumulates Signed-off-by: Nysal Jan K.A --- ompi/mca/osc/ucx/osc_ucx_comm.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/ompi/mca/osc/ucx/osc_ucx_comm.c b/ompi/mca/osc/ucx/osc_ucx_comm.c index 4b58031f4d..e70516033f 100644 --- a/ompi/mca/osc/ucx/osc_ucx_comm.c +++ b/ompi/mca/osc/ucx/osc_ucx_comm.c @@ -510,12 +510,13 @@ int ompi_osc_ucx_accumulate(const void *origin_addr, int origin_count, if ((op != &ompi_mpi_op_maxloc.op && op != &ompi_mpi_op_minloc.op) || ompi_datatype_is_contiguous_memory_layout(temp_dt, temp_count)) { size_t temp_size; + char *curr_temp_addr = (char *)temp_addr; ompi_datatype_type_size(temp_dt, &temp_size); while (origin_ucx_iov_idx < origin_ucx_iov_count) { int curr_count = origin_ucx_iov[origin_ucx_iov_idx].len / temp_size; ompi_op_reduce(op, origin_ucx_iov[origin_ucx_iov_idx].addr, - temp_addr, curr_count, temp_dt); - temp_addr = (void *)((char *)temp_addr + curr_count * temp_size); + curr_temp_addr, curr_count, temp_dt); + curr_temp_addr += curr_count * temp_size; origin_ucx_iov_idx++; } } else { @@ -730,12 +731,13 @@ int ompi_osc_ucx_get_accumulate(const void *origin_addr, int origin_count, if ((op != &ompi_mpi_op_maxloc.op && op != &ompi_mpi_op_minloc.op) || ompi_datatype_is_contiguous_memory_layout(temp_dt, temp_count)) { size_t temp_size; + char *curr_temp_addr = (char *)temp_addr; ompi_datatype_type_size(temp_dt, &temp_size); while (origin_ucx_iov_idx < origin_ucx_iov_count) { int curr_count = origin_ucx_iov[origin_ucx_iov_idx].len / temp_size; ompi_op_reduce(op, origin_ucx_iov[origin_ucx_iov_idx].addr, - temp_addr, curr_count, temp_dt); - temp_addr = (void *)((char *)temp_addr + curr_count * temp_size); + curr_temp_addr, curr_count, temp_dt); + curr_temp_addr += curr_count * temp_size; origin_ucx_iov_idx++; } } else {