bcol/basesmuma: fix remaining memory leaks in basesmuma
We were still leaking 1) file descriptors for data files, and 2) some control files. I fixed both of these leaks and everything is looking good. This should fix the bug where we are running out of file descriptors when running the loop_spawn test. I also too the opportunity to refactor the code a bit to make the mapping/unmapping simpler. This should help avoid these sorts of issues in the future. Depends on #4678 cmr=v1.8.2:reviewer=manjugv This commit was SVN r31893.
Этот коммит содержится в:
родитель
f840013b41
Коммит
2614dfc4bf
@ -256,42 +256,10 @@ static int basesmuma_open(void)
|
||||
*/
|
||||
static int mca_bcol_basesmuma_deregister_ctl_sm(mca_bcol_basesmuma_component_t *bcol_component)
|
||||
{
|
||||
|
||||
/* local variables */
|
||||
int ret;
|
||||
bcol_basesmuma_smcm_mmap_t *sm_ctl_structs;
|
||||
|
||||
/* get a handle on the backing file */
|
||||
sm_ctl_structs=bcol_component->sm_ctl_structs;
|
||||
/* Nothing to free */
|
||||
if (!sm_ctl_structs){
|
||||
return OMPI_SUCCESS;
|
||||
if (NULL != bcol_component->sm_ctl_structs) {
|
||||
OBJ_RELEASE(bcol_component->sm_ctl_structs);
|
||||
}
|
||||
|
||||
/* unmap the shared memory file */
|
||||
ret=munmap((void *) sm_ctl_structs->map_addr, sm_ctl_structs->map_size);
|
||||
if( 0 > ret) {
|
||||
opal_output (ompi_bcol_base_framework.framework_output, "Failed to munmap the shared memory file %s",
|
||||
sm_ctl_structs->map_path);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* set the pointer to NULL */
|
||||
/*sm_ctl_structs->map_addr = NULL;*/
|
||||
|
||||
/* remove the file */
|
||||
#if 0
|
||||
ret = remove(sm_ctl_structs->map_path);
|
||||
if( 0 > ret) {
|
||||
opal_output (ompi_bcol_base_framework.framework_output, "Failed to remove the shared memory file %s. reason = %s",
|
||||
sm_ctl_structs->map_path, strerror (errno));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
#endif
|
||||
|
||||
free (bcol_component->sm_ctl_structs);
|
||||
bcol_component->sm_ctl_structs = NULL;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -86,31 +86,16 @@ int mca_bcol_basesmuma_register_sm(void *context_data, void *base, size_t size,
|
||||
|
||||
int mca_bcol_basesmuma_deregister_sm(void *context_data, void *reg)
|
||||
{
|
||||
/* NTH -- code was disable. why? */
|
||||
#if 0
|
||||
/* local variables */
|
||||
int ret;
|
||||
bcol_basesmuma_registration_data_t *sm_reg =
|
||||
(bcol_basesmuma_registration_data_t*) context_data;
|
||||
|
||||
/* unmap the shared memory file */
|
||||
ret = munmap((void *) sm_reg->base_addr, sm_reg->size);
|
||||
if( 0 > ret) {
|
||||
opal_output (ompi_bcol_base_framework.framework_output, "Failed to munmap the shared memory file %s",sm_reg->file_name);
|
||||
return OMPI_ERROR;
|
||||
if (sm_reg->sm_mmap) {
|
||||
OBJ_RELEASE(sm_reg->sm_mmap);
|
||||
}
|
||||
|
||||
/* set the pointer to NULL */
|
||||
sm_reg->base_addr = NULL;
|
||||
|
||||
/* remove the file */
|
||||
ret = remove(sm_reg->file_name);
|
||||
if( 0 > ret) {
|
||||
opal_output (ompi_bcol_base_framework.framework_output, "Failed to remove the shared memory file %s. reason = %s",
|
||||
sm_reg->file_name, strerror (errno));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
#endif
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -41,6 +41,10 @@
|
||||
|
||||
#define SM_BACKING_FILE_NAME_MAX_LEN 256
|
||||
|
||||
static bcol_basesmuma_smcm_mmap_t * bcol_basesmuma_smcm_reg_mmap(void *in_ptr, int fd, size_t length,
|
||||
size_t addr_offset, size_t alignment,
|
||||
char *file_name);
|
||||
|
||||
struct file_info_t {
|
||||
uint32_t vpid;
|
||||
uint32_t jobid;
|
||||
@ -60,13 +64,7 @@ static void bcol_basesmuma_smcm_proc_item_t_construct (bcol_basesmuma_smcm_proc_
|
||||
static void bcol_basesmuma_smcm_proc_item_t_destruct (bcol_basesmuma_smcm_proc_item_t * item)
|
||||
{
|
||||
if (item->sm_mmap) {
|
||||
bcol_basesmuma_smcm_mmap_t *map = item->sm_mmap;
|
||||
|
||||
if (map->map_seg) {
|
||||
(void) munmap ((void *) map->map_seg, map->map_size);
|
||||
}
|
||||
|
||||
free (map);
|
||||
OBJ_RELEASE(item->sm_mmap);
|
||||
}
|
||||
|
||||
if (item->sm_file.file_name) {
|
||||
@ -80,57 +78,27 @@ OBJ_CLASS_INSTANCE(bcol_basesmuma_smcm_proc_item_t,
|
||||
bcol_basesmuma_smcm_proc_item_t_construct,
|
||||
bcol_basesmuma_smcm_proc_item_t_destruct);
|
||||
|
||||
|
||||
|
||||
|
||||
bcol_basesmuma_smcm_mmap_t* bcol_basesmuma_smcm_create_mmap(int fd, size_t size, char *file_name,
|
||||
size_t size_ctl_structure,
|
||||
size_t data_seg_alignment)
|
||||
static void bcol_basesmuma_smcm_mmap_construct (bcol_basesmuma_smcm_mmap_t *smcm_mmap)
|
||||
{
|
||||
bcol_basesmuma_smcm_mmap_t *map;
|
||||
bcol_basesmuma_smcm_file_header_t *seg;
|
||||
unsigned char *addr = NULL;
|
||||
|
||||
/* map the file and initialize segment state */
|
||||
seg = (bcol_basesmuma_smcm_file_header_t*)
|
||||
mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
if((void*)-1 == seg) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* set up the map object */
|
||||
map = (bcol_basesmuma_smcm_mmap_t* )malloc(sizeof(bcol_basesmuma_smcm_mmap_t));
|
||||
assert(map);
|
||||
|
||||
strncpy(map->map_path, file_name, OPAL_PATH_MAX);
|
||||
/* the first entry in the file is the control structure. The first
|
||||
entry in the control structure is an mca_common_sm_file_header_t
|
||||
element */
|
||||
map->map_seg = seg;
|
||||
|
||||
addr = ((unsigned char *)seg) + size_ctl_structure;
|
||||
/* If we have a data segment (i.e., if 0 != data_seg_alignment),
|
||||
then make it the first aligned address after the control
|
||||
structure. */
|
||||
if (0 != data_seg_alignment) {
|
||||
addr = OPAL_ALIGN_PTR(addr, data_seg_alignment, unsigned char*);
|
||||
|
||||
/* is addr past end of file ? */
|
||||
if((unsigned char*)seg + size < addr) {
|
||||
opal_output (ompi_bcol_base_framework.framework_output, "bcol_basesmuma_smcm_mmap_init: "
|
||||
"memory region too small len %lu addr %p",
|
||||
(unsigned long)size, addr);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
map->data_addr = addr;
|
||||
map->map_addr = (unsigned char *)seg;
|
||||
map->map_size = size;
|
||||
|
||||
return map;
|
||||
memset ((char *) smcm_mmap + sizeof (smcm_mmap->super), 0, sizeof (*smcm_mmap) - sizeof (smcm_mmap->super));
|
||||
}
|
||||
|
||||
static void bcol_basesmuma_smcm_mmap_destruct (bcol_basesmuma_smcm_mmap_t *smcm_mmap)
|
||||
{
|
||||
if (smcm_mmap->map_seg) {
|
||||
munmap (smcm_mmap->map_seg, smcm_mmap->map_size);
|
||||
smcm_mmap->map_seg = NULL;
|
||||
}
|
||||
|
||||
if (smcm_mmap->map_path) {
|
||||
free (smcm_mmap->map_path);
|
||||
smcm_mmap->map_path = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(bcol_basesmuma_smcm_mmap_t, opal_list_item_t,
|
||||
bcol_basesmuma_smcm_mmap_construct,
|
||||
bcol_basesmuma_smcm_mmap_destruct);
|
||||
|
||||
|
||||
/* smcm_allgather_connection:
|
||||
@ -187,8 +155,7 @@ int bcol_basesmuma_smcm_allgather_connection(
|
||||
bcol_basesmuma_smcm_proc_item_t **backing_files;
|
||||
struct file_info_t local_file;
|
||||
struct file_info_t *all_files=NULL;
|
||||
signal(SIGSEGV, SIG_DFL);
|
||||
signal(SIGABRT, SIG_DFL);
|
||||
|
||||
/* sanity check */
|
||||
if (strlen(input.file_name) > SM_BACKING_FILE_NAME_MAX_LEN-1) {
|
||||
opal_output (ompi_bcol_base_framework.framework_output, "backing file name too long: %s len :: %d",
|
||||
@ -317,10 +284,11 @@ int bcol_basesmuma_smcm_allgather_connection(
|
||||
}
|
||||
|
||||
/* map the file */
|
||||
temp->sm_mmap = bcol_basesmuma_smcm_create_mmap(fd,temp->sm_file.size,
|
||||
temp->sm_file.file_name,
|
||||
temp->sm_file.size_ctl_structure,
|
||||
getpagesize());
|
||||
temp->sm_mmap = bcol_basesmuma_smcm_reg_mmap (NULL, fd, temp->sm_file.size,
|
||||
temp->sm_file.size_ctl_structure,
|
||||
temp->sm_file.data_seg_alignment,
|
||||
temp->sm_file.file_name);
|
||||
close (fd);
|
||||
if (NULL == temp->sm_mmap) {
|
||||
opal_output (ompi_bcol_base_framework.framework_output, "mmapping failed to map remote peer's file");
|
||||
OBJ_RELEASE(temp);
|
||||
@ -376,16 +344,6 @@ int bcol_basesmuma_smcm_release_connections (mca_bcol_basesmuma_module_t *sm_bco
|
||||
}
|
||||
|
||||
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
bcol_basesmuma_smcm_mmap_t,
|
||||
opal_list_item_t,
|
||||
NULL,
|
||||
NULL
|
||||
);
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* mmap the specified file as a shared file. No information exchange with other
|
||||
* processes takes place within this routine.
|
||||
@ -423,7 +381,7 @@ bcol_basesmuma_smcm_mmap_t *bcol_basesmuma_smcm_mem_reg(void *in_ptr,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
map = bcol_basesmuma_smcm_reg_mmap(in_ptr, fd, length, alignment, file_name);
|
||||
map = bcol_basesmuma_smcm_reg_mmap(in_ptr, fd, length, 0, alignment, file_name);
|
||||
if (NULL == map) {
|
||||
return NULL;
|
||||
}
|
||||
@ -437,11 +395,9 @@ bcol_basesmuma_smcm_mmap_t *bcol_basesmuma_smcm_mem_reg(void *in_ptr,
|
||||
|
||||
}
|
||||
|
||||
bcol_basesmuma_smcm_mmap_t * bcol_basesmuma_smcm_reg_mmap(void *in_ptr,
|
||||
int fd,
|
||||
size_t length,
|
||||
size_t alignment,
|
||||
char *file_name)
|
||||
static bcol_basesmuma_smcm_mmap_t * bcol_basesmuma_smcm_reg_mmap(void *in_ptr, int fd, size_t length,
|
||||
size_t addr_offset, size_t alignment,
|
||||
char *file_name)
|
||||
{
|
||||
|
||||
/* local variables */
|
||||
@ -450,6 +406,12 @@ bcol_basesmuma_smcm_mmap_t * bcol_basesmuma_smcm_reg_mmap(void *in_ptr,
|
||||
unsigned char* myaddr = NULL;
|
||||
int flags = MAP_SHARED;
|
||||
|
||||
/* set up the map object */
|
||||
map = OBJ_NEW(bcol_basesmuma_smcm_mmap_t);
|
||||
if (OPAL_UNLIKELY(NULL == map)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* map the file and initialize the segment state */
|
||||
if (NULL != in_ptr) {
|
||||
flags |= MAP_FIXED;
|
||||
@ -457,32 +419,28 @@ bcol_basesmuma_smcm_mmap_t * bcol_basesmuma_smcm_reg_mmap(void *in_ptr,
|
||||
seg = (bcol_basesmuma_smcm_file_header_t *)
|
||||
mmap(in_ptr, length, PROT_READ|PROT_WRITE, flags, fd, 0);
|
||||
if((void*)-1 == seg) {
|
||||
OBJ_RELEASE(map);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* set up the map object */
|
||||
|
||||
/*map = OBJ_NEW(mca_common_sm_mmap_t); */
|
||||
map=(bcol_basesmuma_smcm_mmap_t *)malloc(sizeof(bcol_basesmuma_smcm_mmap_t));
|
||||
assert(map);
|
||||
strncpy(map->map_path, file_name, OPAL_PATH_MAX);
|
||||
map->map_path = strdup (file_name);
|
||||
|
||||
/* the first entry in the file is the control structure. the first entry
|
||||
in the control structure is an mca_common_sm_file_header_t element */
|
||||
map->map_seg = seg;
|
||||
|
||||
myaddr = (unsigned char *) seg;
|
||||
myaddr = (unsigned char *) seg + addr_offset;
|
||||
/* if we have a data segment (i.e. if 0 != data_seg_alignement) */
|
||||
|
||||
/* all mmaped regions are required to be at least page size aligned so this
|
||||
* code does nothing unless you want greater alignment */
|
||||
if (alignment > getpagesize ()) {
|
||||
if (alignment) {
|
||||
myaddr = OPAL_ALIGN_PTR(myaddr, alignment, unsigned char*);
|
||||
|
||||
/* is addr past the end of the file? */
|
||||
if ((unsigned char *) seg+length < myaddr) {
|
||||
opal_output (ompi_bcol_base_framework.framework_output, "mca_bcol_basesmuma_sm_alloc_mmap: memory region too small len %lu add %p",
|
||||
(unsigned long) length, myaddr);
|
||||
OBJ_RELEASE(map);
|
||||
munmap (seg, length);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -44,7 +44,7 @@ typedef struct bcol_basesmuma_smcm_file_header_t {
|
||||
|
||||
typedef struct bcol_basesmuma_smcm_mmap_t {
|
||||
/* double link list element */
|
||||
opal_list_item_t map_item;
|
||||
opal_list_item_t super;
|
||||
/* pointer to header imbeded in the shared memory file */
|
||||
bcol_basesmuma_smcm_file_header_t *map_seg;
|
||||
/* base address of the mmap'ed file */
|
||||
@ -54,7 +54,7 @@ typedef struct bcol_basesmuma_smcm_mmap_t {
|
||||
/* How big it is (in bytes) */
|
||||
size_t map_size;
|
||||
/* Filename */
|
||||
char map_path[OPAL_PATH_MAX];
|
||||
char *map_path;
|
||||
} bcol_basesmuma_smcm_mmap_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(bcol_basesmuma_smcm_mmap_t);
|
||||
@ -97,12 +97,6 @@ OMPI_DECLSPEC extern bcol_basesmuma_smcm_mmap_t *bcol_basesmuma_smcm_mem_reg(voi
|
||||
size_t alignment,
|
||||
char* file_name);
|
||||
|
||||
OMPI_DECLSPEC extern bcol_basesmuma_smcm_mmap_t *bcol_basesmuma_smcm_reg_mmap(void *in_ptr,
|
||||
int fd,
|
||||
size_t length,
|
||||
size_t alignment,
|
||||
char *file_name);
|
||||
|
||||
OMPI_DECLSPEC extern bcol_basesmuma_smcm_mmap_t* bcol_basesmuma_smcm_create_mmap(int fd,
|
||||
size_t size, char *file_name,
|
||||
size_t size_ctl_structure,
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user