1
1

osc/ucx: Fix data corruption with non-contiguous accumulates

Signed-off-by: Nysal Jan K.A <jnysal@in.ibm.com>
(cherry picked from commit 3529d447020684ab305411caa97423826bb40906)
Этот коммит содержится в:
Nysal Jan K.A 2019-07-24 13:04:41 +05:30
родитель 86794e5b33
Коммит 359cdf2b53

Просмотреть файл

@ -566,12 +566,13 @@ int ompi_osc_ucx_accumulate(const void *origin_addr, int origin_count,
if ((op != &ompi_mpi_op_maxloc.op && op != &ompi_mpi_op_minloc.op) || if ((op != &ompi_mpi_op_maxloc.op && op != &ompi_mpi_op_minloc.op) ||
ompi_datatype_is_contiguous_memory_layout(temp_dt, temp_count)) { ompi_datatype_is_contiguous_memory_layout(temp_dt, temp_count)) {
size_t temp_size; size_t temp_size;
char *curr_temp_addr = (char *)temp_addr;
ompi_datatype_type_size(temp_dt, &temp_size); ompi_datatype_type_size(temp_dt, &temp_size);
while (origin_ucx_iov_idx < origin_ucx_iov_count) { while (origin_ucx_iov_idx < origin_ucx_iov_count) {
int curr_count = origin_ucx_iov[origin_ucx_iov_idx].len / temp_size; int curr_count = origin_ucx_iov[origin_ucx_iov_idx].len / temp_size;
ompi_op_reduce(op, origin_ucx_iov[origin_ucx_iov_idx].addr, ompi_op_reduce(op, origin_ucx_iov[origin_ucx_iov_idx].addr,
temp_addr, curr_count, temp_dt); curr_temp_addr, curr_count, temp_dt);
temp_addr = (void *)((char *)temp_addr + curr_count * temp_size); curr_temp_addr += curr_count * temp_size;
origin_ucx_iov_idx++; origin_ucx_iov_idx++;
} }
} else { } else {
@ -811,12 +812,13 @@ int ompi_osc_ucx_get_accumulate(const void *origin_addr, int origin_count,
if ((op != &ompi_mpi_op_maxloc.op && op != &ompi_mpi_op_minloc.op) || if ((op != &ompi_mpi_op_maxloc.op && op != &ompi_mpi_op_minloc.op) ||
ompi_datatype_is_contiguous_memory_layout(temp_dt, temp_count)) { ompi_datatype_is_contiguous_memory_layout(temp_dt, temp_count)) {
size_t temp_size; size_t temp_size;
char *curr_temp_addr = (char *)temp_addr;
ompi_datatype_type_size(temp_dt, &temp_size); ompi_datatype_type_size(temp_dt, &temp_size);
while (origin_ucx_iov_idx < origin_ucx_iov_count) { while (origin_ucx_iov_idx < origin_ucx_iov_count) {
int curr_count = origin_ucx_iov[origin_ucx_iov_idx].len / temp_size; int curr_count = origin_ucx_iov[origin_ucx_iov_idx].len / temp_size;
ompi_op_reduce(op, origin_ucx_iov[origin_ucx_iov_idx].addr, ompi_op_reduce(op, origin_ucx_iov[origin_ucx_iov_idx].addr,
temp_addr, curr_count, temp_dt); curr_temp_addr, curr_count, temp_dt);
temp_addr = (void *)((char *)temp_addr + curr_count * temp_size); curr_temp_addr += curr_count * temp_size;
origin_ucx_iov_idx++; origin_ucx_iov_idx++;
} }
} else { } else {