Merge pull request #5193 from hjelmn/osc_sm_location
Use /dev/shm for shared memory files in osc components
Этот коммит содержится в:
Коммит
64a5baaa28
@ -106,6 +106,9 @@ struct ompi_osc_rdma_component_t {
|
||||
|
||||
/** aggregation free list */
|
||||
opal_free_list_t aggregate;
|
||||
|
||||
/** directory where to place backing files */
|
||||
char *backing_directory;
|
||||
};
|
||||
typedef struct ompi_osc_rdma_component_t ompi_osc_rdma_component_t;
|
||||
|
||||
|
@ -268,6 +268,18 @@ static int ompi_osc_rdma_component_register (void)
|
||||
MCA_BASE_VAR_SCOPE_GROUP, &ompi_osc_rdma_mtl_names);
|
||||
free(description_str);
|
||||
|
||||
if (0 == access ("/dev/shm", W_OK)) {
|
||||
mca_osc_rdma_component.backing_directory = "/dev/shm";
|
||||
} else {
|
||||
mca_osc_rdma_component.backing_directory = ompi_process_info.proc_session_dir;
|
||||
}
|
||||
|
||||
(void) mca_base_component_var_register (&mca_osc_rdma_component.super.osc_version, "backing_directory",
|
||||
"Directory to place backing files for memory windows. "
|
||||
"This directory should be on a local filesystem such as /tmp or "
|
||||
"/dev/shm (default: (linux) /dev/shm, (others) session directory)",
|
||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3,
|
||||
MCA_BASE_VAR_SCOPE_READONLY, &mca_osc_rdma_component.backing_directory);
|
||||
|
||||
/* register performance variables */
|
||||
|
||||
@ -602,9 +614,9 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
|
||||
}
|
||||
|
||||
/* allocate the shared memory segment */
|
||||
ret = asprintf (&data_file, "%s"OPAL_PATH_SEP"window_%d.%s",
|
||||
ompi_process_info.job_session_dir, ompi_comm_get_cid (module->comm),
|
||||
ompi_process_info.nodename);
|
||||
ret = asprintf (&data_file, "%s" OPAL_PATH_SEP "osc_rdma.%s.%x.%d",
|
||||
mca_osc_rdma_component.backing_directory, ompi_process_info.nodename,
|
||||
OMPI_PROC_MY_NAME->jobid, ompi_comm_get_cid(module->comm));
|
||||
if (0 > ret) {
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
break;
|
||||
|
@ -61,6 +61,8 @@ typedef struct ompi_osc_sm_node_state_t ompi_osc_sm_node_state_t;
|
||||
|
||||
struct ompi_osc_sm_component_t {
|
||||
ompi_osc_base_component_t super;
|
||||
|
||||
char *backing_directory;
|
||||
};
|
||||
typedef struct ompi_osc_sm_component_t ompi_osc_sm_component_t;
|
||||
OMPI_DECLSPEC extern ompi_osc_sm_component_t mca_osc_sm_component;
|
||||
|
@ -36,6 +36,7 @@ static int component_finalize(void);
|
||||
static int component_query(struct ompi_win_t *win, void **base, size_t size, int disp_unit,
|
||||
struct ompi_communicator_t *comm, struct opal_info_t *info,
|
||||
int flavor);
|
||||
static int component_register (void);
|
||||
static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit,
|
||||
struct ompi_communicator_t *comm, struct opal_info_t *info,
|
||||
int flavor, int *model);
|
||||
@ -51,6 +52,7 @@ ompi_osc_sm_component_t mca_osc_sm_component = {
|
||||
MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
|
||||
OMPI_RELEASE_VERSION),
|
||||
.mca_open_component = component_open,
|
||||
.mca_register_component_params = component_register,
|
||||
},
|
||||
.osc_data = { /* mca_base_component_data */
|
||||
/* The component is not checkpoint ready */
|
||||
@ -105,6 +107,23 @@ ompi_osc_sm_module_t ompi_osc_sm_module_template = {
|
||||
}
|
||||
};
|
||||
|
||||
static int component_register (void)
|
||||
{
|
||||
if (0 == access ("/dev/shm", W_OK)) {
|
||||
mca_osc_sm_component.backing_directory = "/dev/shm";
|
||||
} else {
|
||||
mca_osc_sm_component.backing_directory = ompi_process_info.proc_session_dir;
|
||||
}
|
||||
|
||||
(void) mca_base_component_var_register (&mca_osc_sm_component.super.osc_version, "backing_directory",
|
||||
"Directory to place backing files for shared memory windows. "
|
||||
"This directory should be on a local filesystem such as /tmp or "
|
||||
"/dev/shm (default: (linux) /dev/shm, (others) session directory)",
|
||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3,
|
||||
MCA_BASE_VAR_SCOPE_READONLY, &mca_osc_sm_component.backing_directory);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static int
|
||||
component_open(void)
|
||||
@ -169,6 +188,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
|
||||
{
|
||||
ompi_osc_sm_module_t *module = NULL;
|
||||
int comm_size = ompi_comm_size (comm);
|
||||
bool unlink_needed = false;
|
||||
int ret = OMPI_ERROR;
|
||||
|
||||
if (OMPI_SUCCESS != (ret = check_win_ok(comm, flavor))) {
|
||||
@ -262,10 +282,10 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
|
||||
posts_size += OPAL_ALIGN_PAD_AMOUNT(posts_size, 64);
|
||||
if (0 == ompi_comm_rank (module->comm)) {
|
||||
char *data_file;
|
||||
if (asprintf(&data_file, "%s"OPAL_PATH_SEP"shared_window_%d.%s",
|
||||
ompi_process_info.proc_session_dir,
|
||||
ompi_comm_get_cid(module->comm),
|
||||
ompi_process_info.nodename) < 0) {
|
||||
ret = asprintf (&data_file, "%s" OPAL_PATH_SEP "osc_sm.%s.%x.%d",
|
||||
mca_osc_sm_component.backing_directory, ompi_process_info.nodename,
|
||||
OMPI_PROC_MY_NAME->jobid, ompi_comm_get_cid(module->comm));
|
||||
if (ret < 0) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
@ -274,6 +294,8 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
unlink_needed = true;
|
||||
}
|
||||
|
||||
ret = module->comm->c_coll->coll_bcast (&module->seg_ds, sizeof (module->seg_ds), MPI_BYTE, 0,
|
||||
@ -287,6 +309,17 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = module->comm->c_coll->coll_bcast (&module->seg_ds, sizeof (module->seg_ds), MPI_BYTE, 0,
|
||||
module->comm, module->comm->c_coll->coll_bcast_module);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (0 == ompi_comm_rank (module->comm)) {
|
||||
opal_shmem_unlink (&module->seg_ds);
|
||||
unlink_needed = false;
|
||||
}
|
||||
|
||||
module->sizes = malloc(sizeof(size_t) * comm_size);
|
||||
if (NULL == module->sizes) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||
module->bases = malloc(sizeof(void*) * comm_size);
|
||||
@ -399,6 +432,11 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
error:
|
||||
|
||||
if (0 == ompi_comm_rank (module->comm) && unlink_needed) {
|
||||
opal_shmem_unlink (&module->seg_ds);
|
||||
}
|
||||
|
||||
ompi_osc_sm_free (win);
|
||||
|
||||
return ret;
|
||||
@ -477,10 +515,6 @@ ompi_osc_sm_free(struct ompi_win_t *win)
|
||||
module->comm->c_coll->coll_barrier(module->comm,
|
||||
module->comm->c_coll->coll_barrier_module);
|
||||
|
||||
if (0 == ompi_comm_rank (module->comm)) {
|
||||
opal_shmem_unlink (&module->seg_ds);
|
||||
}
|
||||
|
||||
opal_shmem_segment_detach (&module->seg_ds);
|
||||
} else {
|
||||
free(module->node_states);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user