Merge pull request #8070 from devreal/osc-page-align
OSC RDMA: put memory for each process into separate pages
Этот коммит содержится в:
Коммит
1f5ed0b83d
@ -47,6 +47,7 @@
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/printf.h"
|
||||
#include "opal/align.h"
|
||||
#include "opal/util/sys_limits.h"
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
#include "opal/datatype/opal_datatype_cuda.h"
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
@ -550,6 +551,7 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
|
||||
ompi_osc_rdma_region_t *state_region;
|
||||
struct _local_data *temp;
|
||||
char *data_file;
|
||||
int page_size = opal_getpagesize();
|
||||
|
||||
shared_comm = module->shared_comm;
|
||||
|
||||
@ -575,9 +577,9 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
|
||||
data_base = state_base + leader_peer_data_size + module->state_size * local_size;
|
||||
|
||||
/* ensure proper alignment */
|
||||
data_base += OPAL_ALIGN_PAD_AMOUNT(data_base, OPAL_ALIGN_MIN);
|
||||
if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
|
||||
size += OPAL_ALIGN_PAD_AMOUNT(size, OPAL_ALIGN_MIN);
|
||||
data_base += OPAL_ALIGN_PAD_AMOUNT(data_base, page_size);
|
||||
size += OPAL_ALIGN_PAD_AMOUNT(size, page_size);
|
||||
}
|
||||
|
||||
do {
|
||||
@ -648,8 +650,12 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
|
||||
}
|
||||
|
||||
if (size && MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
|
||||
*base = (void *)((intptr_t) module->segment_base + my_base_offset);
|
||||
memset (*base, 0, size);
|
||||
char *baseptr = (char *)((intptr_t) module->segment_base + my_base_offset);
|
||||
*base = (void *)baseptr;
|
||||
// touch each page to force allocation on local NUMA node
|
||||
for (size_t i = 0; i < size; i += page_size) {
|
||||
baseptr[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
module->rank_array = (ompi_osc_rdma_rank_data_t *) module->segment_base;
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user