1
1

Merge pull request #8238 from devreal/osc-page-align-v4.1.x

OSC RDMA: put memory for each process into separate pages [4.1.x]
Этот коммит содержится в:
Jeff Squyres 2020-11-23 15:06:38 -05:00 коммит произвёл GitHub
родитель 4c0c0e9bcb 2e1e9dc9dd
Коммит 2c91509bcb
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23

Просмотреть файл

@ -46,6 +46,7 @@
#include "opal/util/argv.h"
#include "opal/util/printf.h"
#include "opal/align.h"
#include "opal/util/sys_limits.h"
#if OPAL_CUDA_SUPPORT
#include "opal/datatype/opal_datatype_cuda.h"
#endif /* OPAL_CUDA_SUPPORT */
@ -550,6 +551,7 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
ompi_osc_rdma_region_t *state_region;
struct _local_data *temp;
char *data_file;
int page_size = opal_getpagesize();
shared_comm = module->shared_comm;
@ -574,6 +576,12 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
module->state_offset = state_base = local_rank_array_size + module->region_size;
data_base = state_base + leader_peer_data_size + module->state_size * local_size;
/* ensure proper alignment */
if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
data_base += OPAL_ALIGN_PAD_AMOUNT(data_base, page_size);
size += OPAL_ALIGN_PAD_AMOUNT(size, page_size);
}
do {
temp = calloc (local_size, sizeof (temp[0]));
if (NULL == temp) {
@ -642,7 +650,12 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
}
if (size && MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
*base = (void *)((intptr_t) module->segment_base + my_base_offset);
char *baseptr = (char *)((intptr_t) module->segment_base + my_base_offset);
*base = (void *)baseptr;
// touch each page to force allocation on local NUMA node
for (size_t i = 0; i < size; i += page_size) {
baseptr[i] = 0;
}
}
module->rank_array = (ompi_osc_rdma_rank_data_t *) module->segment_base;