Merge pull request #6840 from nysal/ucx_accumulate_fix
osc/ucx: Fix data corruption with non-contiguous accumulates
Этот коммит содержится в:
Коммит
3c45542c51
@ -510,12 +510,13 @@ int ompi_osc_ucx_accumulate(const void *origin_addr, int origin_count,
|
|||||||
if ((op != &ompi_mpi_op_maxloc.op && op != &ompi_mpi_op_minloc.op) ||
|
if ((op != &ompi_mpi_op_maxloc.op && op != &ompi_mpi_op_minloc.op) ||
|
||||||
ompi_datatype_is_contiguous_memory_layout(temp_dt, temp_count)) {
|
ompi_datatype_is_contiguous_memory_layout(temp_dt, temp_count)) {
|
||||||
size_t temp_size;
|
size_t temp_size;
|
||||||
|
char *curr_temp_addr = (char *)temp_addr;
|
||||||
ompi_datatype_type_size(temp_dt, &temp_size);
|
ompi_datatype_type_size(temp_dt, &temp_size);
|
||||||
while (origin_ucx_iov_idx < origin_ucx_iov_count) {
|
while (origin_ucx_iov_idx < origin_ucx_iov_count) {
|
||||||
int curr_count = origin_ucx_iov[origin_ucx_iov_idx].len / temp_size;
|
int curr_count = origin_ucx_iov[origin_ucx_iov_idx].len / temp_size;
|
||||||
ompi_op_reduce(op, origin_ucx_iov[origin_ucx_iov_idx].addr,
|
ompi_op_reduce(op, origin_ucx_iov[origin_ucx_iov_idx].addr,
|
||||||
temp_addr, curr_count, temp_dt);
|
curr_temp_addr, curr_count, temp_dt);
|
||||||
temp_addr = (void *)((char *)temp_addr + curr_count * temp_size);
|
curr_temp_addr += curr_count * temp_size;
|
||||||
origin_ucx_iov_idx++;
|
origin_ucx_iov_idx++;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -730,12 +731,13 @@ int ompi_osc_ucx_get_accumulate(const void *origin_addr, int origin_count,
|
|||||||
if ((op != &ompi_mpi_op_maxloc.op && op != &ompi_mpi_op_minloc.op) ||
|
if ((op != &ompi_mpi_op_maxloc.op && op != &ompi_mpi_op_minloc.op) ||
|
||||||
ompi_datatype_is_contiguous_memory_layout(temp_dt, temp_count)) {
|
ompi_datatype_is_contiguous_memory_layout(temp_dt, temp_count)) {
|
||||||
size_t temp_size;
|
size_t temp_size;
|
||||||
|
char *curr_temp_addr = (char *)temp_addr;
|
||||||
ompi_datatype_type_size(temp_dt, &temp_size);
|
ompi_datatype_type_size(temp_dt, &temp_size);
|
||||||
while (origin_ucx_iov_idx < origin_ucx_iov_count) {
|
while (origin_ucx_iov_idx < origin_ucx_iov_count) {
|
||||||
int curr_count = origin_ucx_iov[origin_ucx_iov_idx].len / temp_size;
|
int curr_count = origin_ucx_iov[origin_ucx_iov_idx].len / temp_size;
|
||||||
ompi_op_reduce(op, origin_ucx_iov[origin_ucx_iov_idx].addr,
|
ompi_op_reduce(op, origin_ucx_iov[origin_ucx_iov_idx].addr,
|
||||||
temp_addr, curr_count, temp_dt);
|
curr_temp_addr, curr_count, temp_dt);
|
||||||
temp_addr = (void *)((char *)temp_addr + curr_count * temp_size);
|
curr_temp_addr += curr_count * temp_size;
|
||||||
origin_ucx_iov_idx++;
|
origin_ucx_iov_idx++;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
Загрузка…
Ссылка в новой задаче
Block a user