osc/sm: add support for controlling location of backing store
This commit adds a new MCA variable to set the location of the backing store: osc_sm_backing_directory. The default on Linux has been changed to use /dev/shm to improve performance in cases where /tmp is not a tmpfs. Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
родитель
d0d59b1d7d
Коммит
e9de42544e
@ -61,6 +61,8 @@ typedef struct ompi_osc_sm_node_state_t ompi_osc_sm_node_state_t;
|
||||
|
||||
struct ompi_osc_sm_component_t {
|
||||
ompi_osc_base_component_t super;
|
||||
|
||||
char *backing_directory;
|
||||
};
|
||||
typedef struct ompi_osc_sm_component_t ompi_osc_sm_component_t;
|
||||
OMPI_DECLSPEC extern ompi_osc_sm_component_t mca_osc_sm_component;
|
||||
|
@ -36,6 +36,7 @@ static int component_finalize(void);
|
||||
static int component_query(struct ompi_win_t *win, void **base, size_t size, int disp_unit,
|
||||
struct ompi_communicator_t *comm, struct opal_info_t *info,
|
||||
int flavor);
|
||||
static int component_register (void);
|
||||
static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit,
|
||||
struct ompi_communicator_t *comm, struct opal_info_t *info,
|
||||
int flavor, int *model);
|
||||
@ -51,6 +52,7 @@ ompi_osc_sm_component_t mca_osc_sm_component = {
|
||||
MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
|
||||
OMPI_RELEASE_VERSION),
|
||||
.mca_open_component = component_open,
|
||||
.mca_register_component_params = component_register,
|
||||
},
|
||||
.osc_data = { /* mca_base_component_data */
|
||||
/* The component is not checkpoint ready */
|
||||
@ -105,6 +107,23 @@ ompi_osc_sm_module_t ompi_osc_sm_module_template = {
|
||||
}
|
||||
};
|
||||
|
||||
static int component_register (void)
|
||||
{
|
||||
if (0 == access ("/dev/shm", W_OK)) {
|
||||
mca_osc_sm_component.backing_directory = "/dev/shm";
|
||||
} else {
|
||||
mca_osc_sm_component.backing_directory = ompi_process_info.proc_session_dir;
|
||||
}
|
||||
|
||||
(void) mca_base_component_var_register (&mca_osc_sm_component.super.osc_version, "backing_directory",
|
||||
"Directory to place backing files for shared memory windows. "
|
||||
"This directory should be on a local filesystem such as /tmp or "
|
||||
"/dev/shm (default: (linux) /dev/shm, (others) session directory)",
|
||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3,
|
||||
MCA_BASE_VAR_SCOPE_READONLY, &mca_osc_sm_component.backing_directory);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static int
|
||||
component_open(void)
|
||||
@ -169,6 +188,7 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
|
||||
{
|
||||
ompi_osc_sm_module_t *module = NULL;
|
||||
int comm_size = ompi_comm_size (comm);
|
||||
bool unlink_needed = false;
|
||||
int ret = OMPI_ERROR;
|
||||
|
||||
if (OMPI_SUCCESS != (ret = check_win_ok(comm, flavor))) {
|
||||
@ -262,10 +282,10 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
|
||||
posts_size += OPAL_ALIGN_PAD_AMOUNT(posts_size, 64);
|
||||
if (0 == ompi_comm_rank (module->comm)) {
|
||||
char *data_file;
|
||||
if (asprintf(&data_file, "%s"OPAL_PATH_SEP"shared_window_%d.%s",
|
||||
ompi_process_info.proc_session_dir,
|
||||
ompi_comm_get_cid(module->comm),
|
||||
ompi_process_info.nodename) < 0) {
|
||||
ret = asprintf (&data_file, "%s" OPAL_PATH_SEP "osc_sm.%s.%x.%d",
|
||||
mca_osc_sm_component.backing_directory, ompi_process_info.nodename,
|
||||
OMPI_PROC_MY_NAME->jobid, ompi_comm_get_cid(module->comm));
|
||||
if (ret < 0) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
@ -274,6 +294,8 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
unlink_needed = true;
|
||||
}
|
||||
|
||||
ret = module->comm->c_coll->coll_bcast (&module->seg_ds, sizeof (module->seg_ds), MPI_BYTE, 0,
|
||||
@ -287,6 +309,17 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = module->comm->c_coll->coll_bcast (&module->seg_ds, sizeof (module->seg_ds), MPI_BYTE, 0,
|
||||
module->comm, module->comm->c_coll->coll_bcast_module);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (0 == ompi_comm_rank (module->comm)) {
|
||||
opal_shmem_unlink (&module->seg_ds);
|
||||
unlink_needed = false;
|
||||
}
|
||||
|
||||
module->sizes = malloc(sizeof(size_t) * comm_size);
|
||||
if (NULL == module->sizes) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||
module->bases = malloc(sizeof(void*) * comm_size);
|
||||
@ -399,6 +432,11 @@ component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
error:
|
||||
|
||||
if (0 == ompi_comm_rank (module->comm) && unlink_needed) {
|
||||
opal_shmem_unlink (&module->seg_ds);
|
||||
}
|
||||
|
||||
ompi_osc_sm_free (win);
|
||||
|
||||
return ret;
|
||||
@ -477,10 +515,6 @@ ompi_osc_sm_free(struct ompi_win_t *win)
|
||||
module->comm->c_coll->coll_barrier(module->comm,
|
||||
module->comm->c_coll->coll_barrier_module);
|
||||
|
||||
if (0 == ompi_comm_rank (module->comm)) {
|
||||
opal_shmem_unlink (&module->seg_ds);
|
||||
}
|
||||
|
||||
opal_shmem_segment_detach (&module->seg_ds);
|
||||
} else {
|
||||
free(module->node_states);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user