backing file is allocated and deallocated.
This commit was SVN r17358.
Этот коммит содержится в:
родитель
f73adf69c0
Коммит
fda485ff9c
@ -96,9 +96,6 @@ BEGIN_C_DECLS
|
|||||||
/** MCA parameter: number of regions per memory bank */
|
/** MCA parameter: number of regions per memory bank */
|
||||||
size_t sm2_num_regions_per_bank;
|
size_t sm2_num_regions_per_bank;
|
||||||
|
|
||||||
/* size of shared memory backing file */
|
|
||||||
size_t size_sm2_backing_file;
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -110,6 +107,16 @@ BEGIN_C_DECLS
|
|||||||
struct mca_coll_sm2_module_t {
|
struct mca_coll_sm2_module_t {
|
||||||
/* base structure */
|
/* base structure */
|
||||||
mca_coll_base_module_1_1_0_t super;
|
mca_coll_base_module_1_1_0_t super;
|
||||||
|
|
||||||
|
/* Shared Memory file name */
|
||||||
|
char *coll_sm2_file_name;
|
||||||
|
|
||||||
|
/* size of shared memory backing file */
|
||||||
|
size_t size_sm2_backing_file;
|
||||||
|
|
||||||
|
/* Memory pointer to shared file */
|
||||||
|
char *shared_memory_region;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct mca_coll_sm2_module_t mca_coll_sm2_module_t;
|
typedef struct mca_coll_sm2_module_t mca_coll_sm2_module_t;
|
||||||
|
@ -24,6 +24,9 @@
|
|||||||
|
|
||||||
#include "ompi_config.h"
|
#include "ompi_config.h"
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/mman.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
|
||||||
#include "ompi/constants.h"
|
#include "ompi/constants.h"
|
||||||
#include "ompi/communicator/communicator.h"
|
#include "ompi/communicator/communicator.h"
|
||||||
@ -31,6 +34,8 @@
|
|||||||
#include "opal/util/show_help.h"
|
#include "opal/util/show_help.h"
|
||||||
#include "coll_sm2.h"
|
#include "coll_sm2.h"
|
||||||
#include "ompi/mca/coll/base/base.h"
|
#include "ompi/mca/coll/base/base.h"
|
||||||
|
#include "orte/mca/rml/rml.h"
|
||||||
|
#include "orte/util/sys_info.h"
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -76,8 +81,141 @@ static inline int mca_coll_sm2_param_register_int(
|
|||||||
return param_value;
|
return param_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Create mmaped shared file
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int allocate_shared_file(size_t size, char *file_name,
|
||||||
|
struct ompi_communicator_t *comm, char **sm_backing_file)
|
||||||
|
{
|
||||||
|
int fd = -1;
|
||||||
|
int group_size,my_rank;
|
||||||
|
|
||||||
|
bool i_create_shared_file=false;
|
||||||
|
size_t p;
|
||||||
|
int rc=0, sm_file_inited=0;
|
||||||
|
struct iovec iov[2];
|
||||||
|
int sm_file_created;
|
||||||
|
ompi_proc_t **comm_proc_list;
|
||||||
|
|
||||||
|
/* get the list of procs */
|
||||||
|
comm_proc_list=comm->c_local_group->grp_proc_pointers;
|
||||||
|
|
||||||
|
group_size=ompi_comm_size(comm);
|
||||||
|
my_rank=ompi_comm_rank(comm);
|
||||||
|
|
||||||
|
/* determine who will actually create the file */
|
||||||
|
if( my_rank == 0 ) {
|
||||||
|
i_create_shared_file=true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* open the backing file. */
|
||||||
|
if( i_create_shared_file ) {
|
||||||
|
/* process initializing the file */
|
||||||
|
fd = open(file_name, O_CREAT|O_RDWR, 0600);
|
||||||
|
if (fd < 0) {
|
||||||
|
opal_output(0,"mca_common_sm_mmap_init: open %s failed with errno=%d\n",
|
||||||
|
file_name, errno);
|
||||||
|
goto file_opened;
|
||||||
|
}
|
||||||
|
/* map the file and initialize segment state */
|
||||||
|
*sm_backing_file = (char *)
|
||||||
|
mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
|
||||||
|
if( (void*)-1 == sm_backing_file ) {
|
||||||
|
opal_output(0, "mca_common_sm_mmap_init: mmap failed with errno=%d\n",
|
||||||
|
errno);
|
||||||
|
goto file_opened;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* truncate the file to the requested size */
|
||||||
|
if(ftruncate(fd, size) != 0) {
|
||||||
|
opal_output(0,
|
||||||
|
"mca_common_sm_mmap_init: ftruncate failed with errno=%d\n",
|
||||||
|
errno);
|
||||||
|
goto file_opened;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if we got this far, the file has been initialized correctly */
|
||||||
|
sm_file_inited=1;
|
||||||
|
|
||||||
|
file_opened:
|
||||||
|
|
||||||
|
/* signal the rest of the local procs that the backing file
|
||||||
|
* has been created - not very scalable, but for small shared
|
||||||
|
* memory nodes is adequate for now
|
||||||
|
*/
|
||||||
|
for(p=1 ; p < group_size ; p++ ) {
|
||||||
|
sm_file_created=OMPI_RML_TAG_COLL_SM2_BACK_FILE_CREATED;
|
||||||
|
iov[0].iov_base=&sm_file_created;
|
||||||
|
iov[0].iov_len=sizeof(sm_file_created);
|
||||||
|
iov[1].iov_base=&sm_file_inited;
|
||||||
|
iov[1].iov_len=sizeof(sm_file_inited);
|
||||||
|
rc=orte_rml.send(&(comm_proc_list[p]->proc_name),iov,2,
|
||||||
|
OMPI_RML_TAG_COLL_SM2_BACK_FILE_CREATED,0);
|
||||||
|
if( rc < 0 ) {
|
||||||
|
opal_output(0,
|
||||||
|
"allocate_shared_file: orte_rml.send failed to %lu with errno=%d\n",
|
||||||
|
(unsigned long)p, errno);
|
||||||
|
goto return_error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ( 0 == sm_file_inited ) {
|
||||||
|
/* error - the sm backing file did not get opened correctly */
|
||||||
|
goto return_error;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* all other procs wait for the file to be initialized
|
||||||
|
before using the backing file */
|
||||||
|
iov[0].iov_base=&sm_file_created;
|
||||||
|
iov[0].iov_len=sizeof(sm_file_created);
|
||||||
|
iov[1].iov_base=&sm_file_inited;
|
||||||
|
iov[1].iov_len=sizeof(sm_file_inited);
|
||||||
|
rc=orte_rml.recv(&(comm_proc_list[0]->proc_name),iov,2,
|
||||||
|
OMPI_RML_TAG_COLL_SM2_BACK_FILE_CREATED,0);
|
||||||
|
if( rc < 0 ) {
|
||||||
|
opal_output(0, "allocate_shared_file: orte_rml.recv failed from %ld with errno=%d\n",
|
||||||
|
0L, errno);
|
||||||
|
goto return_error;
|
||||||
|
}
|
||||||
|
/* check to see if file inited correctly */
|
||||||
|
if( 0 == sm_file_inited ) {
|
||||||
|
goto return_error;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* open backing file */
|
||||||
|
fd = open(file_name, O_RDWR, 0600);
|
||||||
|
if (fd < 0) {
|
||||||
|
opal_output(0,"mca_common_sm_mmap_init: open %s failed with errno=%d\n",
|
||||||
|
file_name, errno);
|
||||||
|
goto return_error;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* map the file and initialize segment state */
|
||||||
|
*sm_backing_file = (char *)
|
||||||
|
mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
|
||||||
|
if( (void*)-1 == sm_backing_file ) {
|
||||||
|
opal_output(0, "mca_common_sm_mmap_init: mmap failed with errno=%d\n",
|
||||||
|
errno);
|
||||||
|
goto return_error;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/* enable access by other processes on this host */
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
|
||||||
|
return_error:
|
||||||
|
if( -1 != fd ) {
|
||||||
|
close(fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( NULL != sm_backing_file ) munmap((void*) sm_backing_file,size);
|
||||||
|
|
||||||
|
return OMPI_ERROR;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Instantiate the public struct with all of our public information
|
* Instantiate the public struct with all of our public information
|
||||||
@ -145,10 +283,13 @@ mca_coll_sm2_module_construct(mca_coll_sm2_module_t *module)
|
|||||||
static void
|
static void
|
||||||
mca_coll_sm2_module_destruct(mca_coll_sm2_module_t *module)
|
mca_coll_sm2_module_destruct(mca_coll_sm2_module_t *module)
|
||||||
{
|
{
|
||||||
/* debug */
|
int ret;
|
||||||
fprintf(stderr," sm2 destructor called \n");
|
/* remove shared memory backing file */
|
||||||
fflush(stderr);
|
if( module->shared_memory_region) {
|
||||||
/* end debug */
|
ret=munmap(module->shared_memory_region,
|
||||||
|
module->size_sm2_backing_file);
|
||||||
|
/* this is cleanup, no recovery will be done */
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -158,7 +299,6 @@ mca_coll_sm2_module_destruct(mca_coll_sm2_module_t *module)
|
|||||||
static int sm2_open(void)
|
static int sm2_open(void)
|
||||||
{
|
{
|
||||||
/* local variables */
|
/* local variables */
|
||||||
int size;
|
|
||||||
mca_coll_sm2_component_t *cs = &mca_coll_sm2_component;
|
mca_coll_sm2_component_t *cs = &mca_coll_sm2_component;
|
||||||
|
|
||||||
/* set component priority */
|
/* set component priority */
|
||||||
@ -233,9 +373,7 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
|
|||||||
{
|
{
|
||||||
/* local variables */
|
/* local variables */
|
||||||
mca_coll_sm2_module_t *sm_module;
|
mca_coll_sm2_module_t *sm_module;
|
||||||
size_t coll_sm2_comm_backing_file_size=0;
|
int group_size,ret;
|
||||||
size_t coll_sm2_per_proc_segment_size=0;
|
|
||||||
int group_size;
|
|
||||||
size_t alignment,size,size_tot,size_tot_per_proc_per_seg;
|
size_t alignment,size,size_tot,size_tot_per_proc_per_seg;
|
||||||
size_t tot_size_per_bank,size_tot_per_segment;
|
size_t tot_size_per_bank,size_tot_per_segment;
|
||||||
size_t tot_size_mem_banks;
|
size_t tot_size_mem_banks;
|
||||||
@ -244,6 +382,7 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
|
|||||||
size_t mem_management_per_proc;
|
size_t mem_management_per_proc;
|
||||||
size_t mem_management_total;
|
size_t mem_management_total;
|
||||||
size_t size_sm2_backing_file;
|
size_t size_sm2_backing_file;
|
||||||
|
size_t len;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is activated only for intra-communicators
|
* This is activated only for intra-communicators
|
||||||
@ -360,8 +499,31 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
|
|||||||
|
|
||||||
/* total size of backing file */
|
/* total size of backing file */
|
||||||
size_sm2_backing_file=mem_management_total+tot_size_mem_banks;
|
size_sm2_backing_file=mem_management_total+tot_size_mem_banks;
|
||||||
|
sm_module->size_sm2_backing_file=size_sm2_backing_file;
|
||||||
|
|
||||||
|
/* set file name */
|
||||||
|
len=asprintf(&(sm_module->coll_sm2_file_name),
|
||||||
|
"%s"OPAL_PATH_SEP"sm_coll_v2%s_%0d",orte_process_info.job_session_dir,
|
||||||
|
orte_system_info.nodename,ompi_comm_get_cid(comm));
|
||||||
|
if( 0 > len ) {
|
||||||
|
goto CLEANUP;
|
||||||
|
}
|
||||||
|
|
||||||
/* allocate backing file */
|
/* allocate backing file */
|
||||||
|
ret=allocate_shared_file(size_sm2_backing_file,
|
||||||
|
sm_module->coll_sm2_file_name, comm,
|
||||||
|
&(sm_module->shared_memory_region));
|
||||||
|
if( MPI_SUCCESS != ret ) {
|
||||||
|
goto CLEANUP;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* debug */
|
||||||
|
fprintf(stderr," GGGG file %s len %ld prt %p \n",
|
||||||
|
sm_module->coll_sm2_file_name,
|
||||||
|
sm_module->size_sm2_backing_file,
|
||||||
|
sm_module->shared_memory_region);
|
||||||
|
fflush(stderr);
|
||||||
|
/* end debug */
|
||||||
|
|
||||||
/* initialize local counters */
|
/* initialize local counters */
|
||||||
|
|
||||||
@ -373,6 +535,15 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
|
|||||||
/* return */
|
/* return */
|
||||||
return &(sm_module->super);
|
return &(sm_module->super);
|
||||||
|
|
||||||
|
|
||||||
|
CLEANUP:
|
||||||
|
OBJ_RELEASE(sm_module);
|
||||||
|
|
||||||
|
if( NULL == sm_module->coll_sm2_file_name ) {
|
||||||
|
free(sm_module->coll_sm2_file_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user