Merge pull request #8238 from devreal/osc-page-align-v4.1.x
OSC RDMA: put memory for each process into separate pages [4.1.x]
Этот коммит содержится в:
Коммит
2c91509bcb
@ -46,6 +46,7 @@
|
|||||||
#include "opal/util/argv.h"
|
#include "opal/util/argv.h"
|
||||||
#include "opal/util/printf.h"
|
#include "opal/util/printf.h"
|
||||||
#include "opal/align.h"
|
#include "opal/align.h"
|
||||||
|
#include "opal/util/sys_limits.h"
|
||||||
#if OPAL_CUDA_SUPPORT
|
#if OPAL_CUDA_SUPPORT
|
||||||
#include "opal/datatype/opal_datatype_cuda.h"
|
#include "opal/datatype/opal_datatype_cuda.h"
|
||||||
#endif /* OPAL_CUDA_SUPPORT */
|
#endif /* OPAL_CUDA_SUPPORT */
|
||||||
@ -550,6 +551,7 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
|
|||||||
ompi_osc_rdma_region_t *state_region;
|
ompi_osc_rdma_region_t *state_region;
|
||||||
struct _local_data *temp;
|
struct _local_data *temp;
|
||||||
char *data_file;
|
char *data_file;
|
||||||
|
int page_size = opal_getpagesize();
|
||||||
|
|
||||||
shared_comm = module->shared_comm;
|
shared_comm = module->shared_comm;
|
||||||
|
|
||||||
@ -574,6 +576,12 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
|
|||||||
module->state_offset = state_base = local_rank_array_size + module->region_size;
|
module->state_offset = state_base = local_rank_array_size + module->region_size;
|
||||||
data_base = state_base + leader_peer_data_size + module->state_size * local_size;
|
data_base = state_base + leader_peer_data_size + module->state_size * local_size;
|
||||||
|
|
||||||
|
/* ensure proper alignment */
|
||||||
|
if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
|
||||||
|
data_base += OPAL_ALIGN_PAD_AMOUNT(data_base, page_size);
|
||||||
|
size += OPAL_ALIGN_PAD_AMOUNT(size, page_size);
|
||||||
|
}
|
||||||
|
|
||||||
do {
|
do {
|
||||||
temp = calloc (local_size, sizeof (temp[0]));
|
temp = calloc (local_size, sizeof (temp[0]));
|
||||||
if (NULL == temp) {
|
if (NULL == temp) {
|
||||||
@ -642,7 +650,12 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (size && MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
|
if (size && MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
|
||||||
*base = (void *)((intptr_t) module->segment_base + my_base_offset);
|
char *baseptr = (char *)((intptr_t) module->segment_base + my_base_offset);
|
||||||
|
*base = (void *)baseptr;
|
||||||
|
// touch each page to force allocation on local NUMA node
|
||||||
|
for (size_t i = 0; i < size; i += page_size) {
|
||||||
|
baseptr[i] = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
module->rank_array = (ompi_osc_rdma_rank_data_t *) module->segment_base;
|
module->rank_array = (ompi_osc_rdma_rank_data_t *) module->segment_base;
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user