diff --git a/ompi/mca/common/sm/common_sm_mmap.c b/ompi/mca/common/sm/common_sm_mmap.c index afb7772a0a..f29ed5cd27 100644 --- a/ompi/mca/common/sm/common_sm_mmap.c +++ b/ompi/mca/common/sm/common_sm_mmap.c @@ -46,6 +46,7 @@ #include "common_sm_mmap.h" #include "opal/util/basename.h" #include "opal/util/output.h" +#include "opal/align.h" #include "orte/util/proc_info.h" #include "orte/mca/rml/rml_types.h" #include "orte/mca/rml/base/base.h" @@ -64,234 +65,162 @@ OBJ_CLASS_INSTANCE( */ mca_common_sm_mmap_t *mca_common_sm_mmap = NULL; -mca_common_sm_mmap_t* mca_common_sm_mmap_init(size_t size, char *file_name, - size_t size_ctl_structure, size_t data_seg_alignment) -{ #if !defined(__WINDOWS__) - int fd = -1; - mca_common_sm_file_header_t* seg = NULL; - mca_common_sm_mmap_t* map = NULL; + +static mca_common_sm_mmap_t* create_map(int fd, size_t size, char *file_name, + size_t size_ctl_structure, + size_t data_seg_alignment) +{ + mca_common_sm_mmap_t *map; + mca_common_sm_file_header_t *seg; unsigned char *addr = NULL; - size_t tmp,mem_offset; - bool i_create_shared_file=false; + /* map the file and initialize segment state */ + seg = (mca_common_sm_file_header_t*) + mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + if((void*)-1 == seg) { + opal_output(0, "mca_common_sm_mmap_init: " + "mmap failed with errno=%d\n", errno); + return NULL; + } + + /* set up the map object */ + map = OBJ_NEW(mca_common_sm_mmap_t); + strncpy(map->map_path, file_name, OMPI_PATH_MAX); + /* the first entry in the file is the control structure. The first + entry in the control structure is an mca_common_sm_file_header_t + element */ + map->map_seg = seg; + + addr = ((unsigned char *)seg) + size_ctl_structure; + /* If we have a data segment (i.e., if 0 != data_seg_alignment), + then make it the first aligned address after the control + structure. */ + if (0 != data_seg_alignment) { + addr = OPAL_ALIGN_PTR(addr, data_seg_alignment, unsigned char*); + + /* is addr past end of file ? */ + if((unsigned char*)seg + size < addr) { + opal_output(0, "mca_common_sm_mmap_init: " + "memory region too small len %lu addr %p\n", + (unsigned long)size, addr); + return NULL; + } + } + map->data_addr = addr; + map->map_addr = (unsigned char *)seg; + map->map_size = size; + + return map; +} + +mca_common_sm_mmap_t* mca_common_sm_mmap_init(size_t size, char *file_name, + size_t size_ctl_structure, + size_t data_seg_alignment) +{ + int fd = -1; + mca_common_sm_mmap_t* map = NULL; + size_t mem_offset; ompi_proc_t **procs = NULL; - size_t n_local_procs=0, n_total_procs=0,n,p; - ompi_proc_t *my_proc; - int rc=0, sm_file_inited=0; - struct iovec iov[2]; - int sm_file_created; + size_t n_local_procs = 0, n_total_procs = 0, p; + int rc = 0, sm_file_inited = 0; + struct iovec iov[2]; + int sm_file_created = OMPI_RML_TAG_SM_BACK_FILE_CREATED; - /* figure out how many local procs are on this host */ - procs=ompi_proc_world(&n_total_procs); - for(p=0 ; p < n_total_procs ; p++ ) { - if( procs[p]->proc_flags & OMPI_PROC_FLAG_LOCAL){ - n_local_procs++; + /* figure out how many local procs are on this host and create list of + local proc_t pointers by compressing the original list */ + procs = ompi_proc_world(&n_total_procs); + + for(p=0; p < n_total_procs; p++) { + if(procs[p]->proc_flags & OMPI_PROC_FLAG_LOCAL) { + procs[n_local_procs++] = procs[p]; } } - /* create list of local proc_t pointers - compress the original - list */ - n=0; - for(p=0; p < n_total_procs ; p++ ) { - if( procs[p]->proc_flags & OMPI_PROC_FLAG_LOCAL) { - procs[n]=procs[p]; - n++; - } - } - + iov[0].iov_base = &sm_file_created; + iov[0].iov_len = sizeof(sm_file_created); + iov[1].iov_base = &sm_file_inited; + iov[1].iov_len = sizeof(sm_file_inited); + /* figure out if I am the lowest rank on host, who will create the shared file */ - my_proc=ompi_proc_local(); - if( my_proc == procs[0] ) { - i_create_shared_file=true; - } - - /* open the backing file. */ - if( i_create_shared_file ) { + if(ompi_proc_local() == procs[0]) { /* process initializing the file */ fd = open(file_name, O_CREAT|O_RDWR, 0600); - if (fd < 0) { - opal_output(0,"mca_common_sm_mmap_init: open %s failed with errno=%d\n", - file_name, errno); - goto file_opened; - } - - /* truncate the file to the requested size */ - if(ftruncate(fd, size) != 0) { - opal_output(0, - "mca_common_sm_mmap_init: ftruncate failed with errno=%d\n", - errno); - goto file_opened; - } - /* map the file and initialize segment state */ - seg = (mca_common_sm_file_header_t*) - mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - if( (void*)-1 == seg ) { - opal_output(0, "mca_common_sm_mmap_init: mmap failed with errno=%d\n", - errno); - goto file_opened; - } - /* set up the map object */ - map = OBJ_NEW(mca_common_sm_mmap_t); - strncpy(map->map_path, file_name, OMPI_PATH_MAX); - /* the first entry in the file is the control structure. The first - entry in the control structure is an mca_common_sm_file_header_t - element */ - map->map_seg = seg; - - /* If we have a data segment (i.e., if 0 != data_seg_alignment), - then make it the first aligned address after the control - structure. */ - if (0 != data_seg_alignment) { - addr = ((unsigned char *) seg) + size_ctl_structure; - /* calculate how far off alignment we are */ - tmp = ((size_t) addr) % data_seg_alignment; - /* if we're off alignment, then move up to the next alignment */ - if( tmp > 0 ) - addr += (data_seg_alignment - tmp); - - /* is addr past end of file ? */ - if( (unsigned char*)seg+size < addr ) { - opal_output(0, "mca_common_sm_mmap_init: memory region too small len %lu addr %p\n", - (unsigned long)size, addr); - goto file_opened; - } - map->data_addr = addr; + if(fd < 0) { + opal_output(0, "mca_common_sm_mmap_init: " + "open %s failed with errno=%d\n", file_name, errno); + } else if(ftruncate(fd, size) != 0) { + opal_output(0, "mca_common_sm_mmap_init: " + "ftruncate failed with errno=%d\n", errno); } else { - map->data_addr = NULL; + + map = create_map(fd, size, file_name, size_ctl_structure, + data_seg_alignment); + + if(map != NULL) { + sm_file_inited = 1; + + /* initialize the segment - only the first process + to open the file */ + mem_offset = map->data_addr - (unsigned char *)map->map_seg; + map->map_seg->seg_offset = mem_offset; + map->map_seg->seg_size = size - mem_offset; + opal_atomic_unlock(&map->map_seg->seg_lock); + map->map_seg->seg_inited = false; + } } - mem_offset = addr-(unsigned char *)seg; - map->map_addr = (unsigned char *)seg; - map->map_size = size; - - /* initialize the segment - only the first process to open the file */ - seg->seg_offset = mem_offset; - /* initialize size after subtracting out space used by the header */ - seg->seg_size = size - mem_offset; - opal_atomic_unlock(&seg->seg_lock); - seg->seg_inited = false; - - /* if we got this far, the file has been initialized correctly */ - sm_file_inited=1; - - file_opened: /* signal the rest of the local procs that the backing file has been created */ - for(p=1 ; p < n_local_procs ; p++ ) { - sm_file_created=OMPI_RML_TAG_SM_BACK_FILE_CREATED; - iov[0].iov_base=&sm_file_created; - iov[0].iov_len=sizeof(sm_file_created); - iov[1].iov_base=&sm_file_inited; - iov[1].iov_len=sizeof(sm_file_inited); - rc=orte_rml.send(&(procs[p]->proc_name),iov,2, - OMPI_RML_TAG_SM_BACK_FILE_CREATED,0); - if( rc < 0 ) { - opal_output(0, - "mca_common_sm_mmap_init: orte_rml.send failed to %lu with errno=%d\n", - (unsigned long)p, errno); - goto return_error; + for(p=1; p < n_local_procs; p++) { + rc = orte_rml.send(&(procs[p]->proc_name), iov, 2, + OMPI_RML_TAG_SM_BACK_FILE_CREATED, 0); + if(rc < 0) { + opal_output(0, "mca_common_sm_mmap_init: " + "orte_rml.send failed to %lu with errno=%d\n", + (unsigned long)p, errno); + goto out; } } - if ( 0 == sm_file_inited ) { - /* error - the sm backing file did not get opened correctly */ - goto return_error; - } } else { /* all other procs wait for the file to be initialized before using the backing file */ - iov[0].iov_base=&sm_file_created; - iov[0].iov_len=sizeof(sm_file_created); - iov[1].iov_base=&sm_file_inited; - iov[1].iov_len=sizeof(sm_file_inited); - rc=orte_rml.recv(&(procs[0]->proc_name),iov,2, - OMPI_RML_TAG_SM_BACK_FILE_CREATED,0); - if( rc < 0 ) { - opal_output(0, "mca_common_sm_mmap_init: orte_rml.recv failed from %ld with errno=%d\n", - 0L, errno); - goto return_error; + rc = orte_rml.recv(&(procs[0]->proc_name), iov, 2, + OMPI_RML_TAG_SM_BACK_FILE_CREATED, 0); + if(rc < 0) { + opal_output(0, "mca_common_sm_mmap_init: " + "orte_rml.recv failed from %d with errno=%d\n", + 0, errno); + goto out; } + /* check to see if file inited correctly */ - if( 0 == sm_file_inited ) { - goto return_error; + if(sm_file_inited != 0) { + fd = open(file_name, O_RDWR, 0600); + + if(fd != -1) + map = create_map(fd, size, file_name, size_ctl_structure, + data_seg_alignment); } - - /* open backing file */ - fd = open(file_name, O_RDWR, 0600); - if (fd < 0) { - opal_output(0,"mca_common_sm_mmap_init: open %s failed with errno=%d\n", - file_name, errno); - return NULL; - } - - /* map the file and initialize segment state */ - seg = (mca_common_sm_file_header_t*) - mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - if( (void*)-1 == seg ) { - opal_output(0, "mca_common_sm_mmap_init: mmap failed with errno=%d\n", - errno); - goto return_error; - } - /* set up the map object */ - map = OBJ_NEW(mca_common_sm_mmap_t); - strncpy(map->map_path, file_name, OMPI_PATH_MAX); - /* the first entry in the file is the control structure. The first - entry in the control structure is an mca_common_sm_file_header_t - element */ - map->map_seg = seg; - - /* If we have a data segment (i.e., if 0 != data_seg_alignment), - then make it the first aligned address after the control - structure. */ - if (0 != data_seg_alignment) { - addr = ((unsigned char *) seg) + size_ctl_structure; - /* calculate how far off alignment we are */ - tmp = ((size_t) addr) % data_seg_alignment; - /* if we're off alignment, then move up to the next alignment */ - if( tmp > 0 ) - addr += (data_seg_alignment - tmp); - - /* is addr past end of file ? */ - if( (unsigned char*)seg+size < addr ) { - opal_output(0, "mca_common_sm_mmap_init: memory region too small len %lu addr %p\n", - (unsigned long)size,addr); - goto return_error; - } - map->data_addr = addr; - } else { - map->data_addr = NULL; - } - mem_offset = addr-(unsigned char *)seg; - map->map_addr = (unsigned char *)seg; - map->map_size = size; - - } - - if ( NULL != procs ) free(procs); - - /* enable access by other processes on this host */ - close(fd); - - return map; - - return_error: - if( -1 != fd ) { - close(fd); } - if( NULL != seg ) munmap((void*) seg,size); - if ( NULL != procs ) free(procs); - - return NULL; +out: + if(NULL != procs) free(procs); + if(fd != -1) close(fd); + return map; +} #else +mca_common_sm_mmap_t* mca_common_sm_mmap_init(size_t size, char *file_name, + size_t size_ctl_structure, size_t data_seg_alignment) +{ int fd = -1, return_code = OMPI_SUCCESS; bool file_previously_opened = false; mca_common_sm_file_header_t* seg = NULL; mca_common_sm_mmap_t* map = NULL; unsigned char *addr = NULL; - size_t tmp,mem_offset; + size_t tmp, mem_offset; HANDLE hMapObject = INVALID_HANDLE_VALUE; LPVOID lpvMem = NULL; @@ -396,8 +325,8 @@ mca_common_sm_mmap_t* mca_common_sm_mmap_init(size_t size, char *file_name, if( NULL != hMapObject ) CloseHandle(hMapObject); return NULL; -#endif } +#endif int mca_common_sm_mmap_fini( mca_common_sm_mmap_t* sm_mmap ) {