1
1

sharedfp/lockedfile and sm: fix name collision

this fixes the issue reported by Nicolas Joly on the mailing: the sharedfp/lockedfile component does not support right now a scenario where multiple jobs read from the same input file, due to a collision of the filenames utilized for the sharedfp handle. Although not part of the oroginal report, the same occurs for the sharedfp/sm component. Add therefore the jobid to be part of the lockedfilename/sm file name.

use the OMPI_CAST_RTE_NAME macro to determine jobid

Fixes: #3098

Signed-off-by: Edgar Gabriel <egabriel@central.uh.edu>
Этот коммит содержится в:
Edgar Gabriel 2017-03-04 17:35:54 -06:00
родитель d1fed77781
Коммит 2d462b3b80
2 изменённых файлов: 32 добавлений и 5 удалений

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2013-2016 University of Houston. All rights reserved.
* Copyright (c) 2013-2017 University of Houston. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -25,6 +25,8 @@
#include "mpi.h"
#include "ompi/constants.h"
#include "ompi/group/group.h"
#include "ompi/proc/proc.h"
#include "ompi/mca/sharedfp/sharedfp.h"
#include "ompi/mca/sharedfp/base/base.h"
@ -99,8 +101,23 @@ int mca_sharedfp_lockedfile_file_open (struct ompi_communicator_t *comm,
return OMPI_ERR_OUT_OF_RESOURCE;
}
lockedfilename = (char*)malloc(sizeof(char) * (strlen(filename) + 64));
sprintf(lockedfilename,"%s%s",filename,".lockedfile");
opal_jobid_t masterjobid;
if ( 0 == comm->c_my_rank ) {
ompi_proc_t *masterproc = ompi_group_peer_lookup(comm->c_local_group, 0 );
masterjobid = OMPI_CAST_RTE_NAME(&masterproc->super.proc_name)->jobid;
}
comm->c_coll->coll_bcast ( &masterjobid, 1, MPI_UNSIGNED, 0, comm,
comm->c_coll->coll_bcast_module );
size_t filenamelen = strlen(filename) + 16;
lockedfilename = (char*)malloc(sizeof(char) * filenamelen);
if ( NULL == lockedfilename ) {
free (shfileHandle);
free (sh);
free (module_data);
return OMPI_ERR_OUT_OF_RESOURCE;
}
snprintf(lockedfilename, filenamelen, "%s-%u%s",filename,masterjobid,".lock");
module_data->filename = lockedfilename;
/*-------------------------------------------------*/

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2013-2016 University of Houston. All rights reserved.
* Copyright (c) 2013-2017 University of Houston. All rights reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
@ -35,6 +35,8 @@
#include "mpi.h"
#include "ompi/constants.h"
#include "ompi/group/group.h"
#include "ompi/proc/proc.h"
#include "ompi/mca/sharedfp/sharedfp.h"
#include "ompi/mca/sharedfp/base/base.h"
@ -139,8 +141,16 @@ int mca_sharedfp_sm_file_open (struct ompi_communicator_t *comm,
free(shfileHandle);
return OMPI_ERR_OUT_OF_RESOURCE;
}
sprintf(sm_filename,"/tmp/OMPIO_sharedfp_sm_%s%s",filename_basename,".sm");
opal_jobid_t masterjobid;
if ( 0 == comm->c_my_rank ) {
ompi_proc_t *masterproc = ompi_group_peer_lookup(comm->c_local_group, 0 );
masterjobid = OMPI_CAST_RTE_NAME(&masterproc->super.proc_name)->jobid;
}
comm->c_coll->coll_bcast ( &masterjobid, 1, MPI_UNSIGNED, 0, comm,
comm->c_coll->coll_bcast_module );
sprintf(sm_filename,"/tmp/OMPIO_%s_%d_%s",filename_basename, masterjobid, ".sm");
/* open shared memory file, initialize to 0, map into memory */
sm_fd = open(sm_filename, O_RDWR | O_CREAT,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);