From 7f1347677dbc23858f28e696d416142f2aaddda2 Mon Sep 17 00:00:00 2001 From: Christoph Niethammer Date: Wed, 21 Jun 2017 17:29:37 +0200 Subject: [PATCH 1/2] Create file for file backed shared memory in process job session dir. Prevents file collisions and can also be cleaned by orte-clean properly. Signed-off-by: Christoph Niethammer --- ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c index f58cbba56b..770668f9ff 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c +++ b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c @@ -130,12 +130,10 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, /* the shared memory segment is identified opening a file ** and then mapping it to memory ** For sharedfp we also want to put the file backed shared memory into the tmp directory - ** TODO: properly name the file so that different jobs can run on the same system w/o - ** overwriting each other, e.g. orte_process_info.proc_session_dir */ /*sprintf(sm_filename,"%s%s",filename,".sm");*/ filename_basename = basename((void *)filename); - sm_filename = (char*) malloc( sizeof(char) * (strlen(filename_basename)+64) ); + sm_filename = (char*) malloc( sizeof(char) * (strlen(filename_basename)+strlen(ompi_process_info.job_session_dir)+64) ); if (NULL == sm_filename) { free(sm_data); free(sh); @@ -151,7 +149,7 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, comm->c_coll->coll_bcast ( &masterjobid, 1, MPI_UNSIGNED, 0, comm, comm->c_coll->coll_bcast_module ); - sprintf(sm_filename,"/tmp/OMPIO_%s_%d_%s",filename_basename, masterjobid, ".sm"); + sprintf(sm_filename,"%s/OMPIO_%s_%d_%s",ompi_process_info.job_session_dir, filename_basename, masterjobid, ".sm"); /* open shared memory file, initialize to 0, map into memory */ sm_fd = open(sm_filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); From e1fc6ae304e29cf8fb88230dab0e6508dcc67f2b Mon Sep 17 00:00:00 2001 From: Christoph Niethammer Date: Thu, 13 Jul 2017 01:33:41 +0200 Subject: [PATCH 2/2] Change filename for shared_fm file to include comm cid instead of masterjobid. Signed-off-by: Christoph Niethammer --- ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c | 21 +++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c index 770668f9ff..954aa5ec75 100644 --- a/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c +++ b/ompi/mca/sharedfp/sm/sharedfp_sm_file_open.c @@ -58,11 +58,13 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, mca_io_ompio_file_t * shfileHandle, *ompio_fh; char * filename_basename; char * sm_filename; + int sm_filename_length; struct mca_sharedfp_sm_offset * sm_offset_ptr; struct mca_sharedfp_sm_offset sm_offset; mca_io_ompio_data_t *data; int sm_fd; int rank; + uint32_t comm_cid; /*----------------------------------------------------*/ /*Open the same file again without shared file pointer*/ @@ -131,25 +133,20 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm, ** and then mapping it to memory ** For sharedfp we also want to put the file backed shared memory into the tmp directory */ - /*sprintf(sm_filename,"%s%s",filename,".sm");*/ - filename_basename = basename((void *)filename); - sm_filename = (char*) malloc( sizeof(char) * (strlen(filename_basename)+strlen(ompi_process_info.job_session_dir)+64) ); + filename_basename = basename(filename); + /* format is "%s/%s_cid-%d.sm", see below */ + sm_filename_length = strlen(ompi_process_info.job_session_dir) + 1 + strlen(filename_basename) + 5 + (3*sizeof(uint32_t)+1) + 4; + sm_filename = (char*) malloc( sizeof(char) * sm_filename_length); if (NULL == sm_filename) { + opal_output(0, "mca_sharedfp_sm_file_open: Error, unable to malloc sm_filename\n"); free(sm_data); free(sh); free(shfileHandle); return OMPI_ERR_OUT_OF_RESOURCE; } - opal_jobid_t masterjobid; - if ( 0 == comm->c_my_rank ) { - ompi_proc_t *masterproc = ompi_group_peer_lookup(comm->c_local_group, 0 ); - masterjobid = OMPI_CAST_RTE_NAME(&masterproc->super.proc_name)->jobid; - } - comm->c_coll->coll_bcast ( &masterjobid, 1, MPI_UNSIGNED, 0, comm, - comm->c_coll->coll_bcast_module ); - - sprintf(sm_filename,"%s/OMPIO_%s_%d_%s",ompi_process_info.job_session_dir, filename_basename, masterjobid, ".sm"); + comm_cid = ompi_comm_get_cid(comm); + sprintf(sm_filename, "%s/%s_cid-%d.sm", ompi_process_info.job_session_dir, filename_basename, comm_cid); /* open shared memory file, initialize to 0, map into memory */ sm_fd = open(sm_filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);