1
1

Change the allocation of the shared memory backing file. The file

is allocated on a per comm_world instance, with the lowest rank
in comm_world on the given host creating and initializing the file,
and then notifying the remaining files via the OOB.

Reviewed: Ralph Castain, Brian Barrett
Addressing ticket #674.

This commit was SVN r12949.
Этот коммит содержится в:
Rich Graham 2007-01-01 02:39:02 +00:00
родитель b5057d923e
Коммит 6cb2377015
2 изменённых файлов: 186 добавлений и 84 удалений

Просмотреть файл

@ -41,12 +41,14 @@
#endif #endif
#include "ompi/constants.h" #include "ompi/constants.h"
#include "ompi/proc/proc.h"
#include "common_sm_mmap.h" #include "common_sm_mmap.h"
#include "opal/util/basename.h" #include "opal/util/basename.h"
#include "opal/util/output.h" #include "opal/util/output.h"
#include "orte/util/sys_info.h" #include "orte/util/sys_info.h"
#include "orte/util/proc_info.h" #include "orte/util/proc_info.h"
#include "orte/mca/rml/rml_types.h"
#include "orte/mca/rml/base/base.h"
OBJ_CLASS_INSTANCE( OBJ_CLASS_INSTANCE(
mca_common_sm_mmap_t, mca_common_sm_mmap_t,
@ -60,75 +62,57 @@ OBJ_CLASS_INSTANCE(
*/ */
mca_common_sm_mmap_t *mca_common_sm_mmap = NULL; mca_common_sm_mmap_t *mca_common_sm_mmap = NULL;
#if !defined(__WINDOWS__)
static int mca_common_sm_mmap_open(char* path)
{
int fd = -1;
struct timespec ts;
/* loop until file can be opened, or until an error, other than
* access error, occurs */
while (fd < 0) {
fd = open(path, O_CREAT|O_RDWR, 0000);
if (fd < 0 && errno != EACCES) {
opal_output(0,
"mca_common_sm_mmap_open: open %s failed with errno=%d\n",
path, errno);
return -1;
}
ts.tv_sec = 0;
ts.tv_nsec = 500000;
nanosleep(&ts, NULL);
}
return fd;
}
#endif /* !defined(__WINDOWS__) */
mca_common_sm_mmap_t* mca_common_sm_mmap_init(size_t size, char *file_name, mca_common_sm_mmap_t* mca_common_sm_mmap_init(size_t size, char *file_name,
size_t size_ctl_structure, size_t data_seg_alignment) size_t size_ctl_structure, size_t data_seg_alignment)
{ {
int fd = -1, return_code = OMPI_SUCCESS; #if !defined(__WINDOWS__)
bool file_previously_opened = false; int fd = -1;
mca_common_sm_file_header_t* seg = NULL; mca_common_sm_file_header_t* seg = NULL;
mca_common_sm_mmap_t* map = NULL; mca_common_sm_mmap_t* map = NULL;
unsigned char *addr = NULL; unsigned char *addr = NULL;
size_t tmp,mem_offset; size_t tmp,mem_offset;
#if !defined(__WINDOWS__) bool i_create_shared_file=false;
struct stat s_stat; ompi_proc_t **procs;
size_t n_local_procs=0, n_total_procs=0,n,p;
ompi_proc_t *my_proc;
int rc=0;
struct iovec iov;
int sm_file_created;
/* input parameter error checks */ /* figure out how many local procs are on this host */
if( (size < sizeof(mca_common_sm_file_header_t) ) || procs=ompi_proc_world(&n_total_procs);
( file_name == NULL ) || for(p=0 ; p < n_total_procs ; p++ ) {
( size_ctl_structure < if( procs[p]->proc_flags & OMPI_PROC_FLAG_LOCAL)
sizeof(mca_common_sm_file_header_t ) )) { n_local_procs++;
return NULL;
} }
/* open the backing file. The first process to succeed here will /* create list of local proc_t pointers - compress the original
effectively block the others until most of the rest of the list */
setup in this function is complete because the initial perms n=0;
are 000 (an fchmod() is executed below, enabling the other for(p=0; p < n_total_procs ; p++ ) {
processes to get in) */ if( procs[p]->proc_flags & OMPI_PROC_FLAG_LOCAL) {
fd = mca_common_sm_mmap_open(file_name); procs[n]=procs[p];
if( -1 == fd ) { n++;
opal_output(0, "mca_common_sm_mmap_init: mca_common_sm_mmap_open failed \n"); }
return NULL;
} }
/* figure out if I am the lowest rank on host, who will create
the shared file */
my_proc=ompi_proc_local();
if( my_proc == procs[0] )
i_create_shared_file=true;
/* figure out if I am first to attach to file */ /* open the backing file. */
file_previously_opened=false; if( i_create_shared_file ) {
return_code=fstat(fd,&s_stat); /* process initializing the file */
if( 0 > return_code ) { fd = open(file_name, O_CREAT|O_RDWR, 0600);
opal_output(0, "mca_common_sm_mmap_init: fstat failed with errno=%d\n", errno); if (fd < 0) {
goto return_error; opal_output(0,"mca_common_sm_mmap_init: open %s failed with errno=%d\n",
} file_name, errno);
if( s_stat.st_size > 0 ) return NULL;
file_previously_opened=true; }
/* first process to open the file, so needs to initialize it */
if( !file_previously_opened ) {
/* truncate the file to the requested size */ /* truncate the file to the requested size */
if(ftruncate(fd, size) != 0) { if(ftruncate(fd, size) != 0) {
opal_output(0, opal_output(0,
@ -136,17 +120,153 @@ mca_common_sm_mmap_t* mca_common_sm_mmap_init(size_t size, char *file_name,
errno); errno);
goto return_error; goto return_error;
} }
/* map the file and initialize segment state */
seg = (mca_common_sm_file_header_t*)
mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
if( (void*)-1 == seg ) {
opal_output(0, "mca_common_sm_mmap_init: mmap failed with errno=%d\n",
errno);
goto return_error;
}
/* set up the map object */
map = OBJ_NEW(mca_common_sm_mmap_t);
strncpy(map->map_path, file_name, OMPI_PATH_MAX);
/* the first entry in the file is the control structure. The first
entry in the control structure is an mca_common_sm_file_header_t
element */
map->map_seg = seg;
/* If we have a data segment (i.e., if 0 != data_seg_alignment),
then make it the first aligned address after the control
structure. */
if (0 != data_seg_alignment) {
addr = ((unsigned char *) seg) + size_ctl_structure;
/* calculate how far off alignment we are */
tmp = ((size_t) addr) % data_seg_alignment;
/* if we're off alignment, then move up to the next alignment */
if( tmp > 0 )
addr += (data_seg_alignment - tmp);
/* is addr past end of file ? */
if( (unsigned char*)seg+size < addr ) {
opal_output(0, "mca_common_sm_mmap_init: memory region too small len %d addr %p\n",
size,addr);
goto return_error;
}
map->data_addr = addr;
} else {
map->data_addr = NULL;
}
mem_offset = addr-(unsigned char *)seg;
map->map_addr = (unsigned char *)seg;
map->map_size = size;
/* initialize the segment - only the first process to open the file */
seg->seg_offset = mem_offset;
seg->seg_size = size;
opal_atomic_unlock(&seg->seg_lock);
seg->seg_inited = false;
/* signal the rest of the local procs that the backing file
has been created */
for(p=1 ; p < n_local_procs ; p++ ) {
sm_file_created=ORTE_RML_TAG_SM_BACK_FILE_CREATED;
iov.iov_base=&sm_file_created;
iov.iov_len=sizeof(sm_file_created);
rc=orte_rml.send(&(procs[p]->proc_name),&iov,1,
ORTE_RML_TAG_SM_BACK_FILE_CREATED,0);
if( rc < 0 ) {
opal_output(0,
"mca_common_sm_mmap_init: orte_rml.send failed to %l with errno=%d\n",
p,errno);
goto return_error;
}
}
} else {
/* all other procs wait for the file to be initialized
before using the backing file */
iov.iov_base=&sm_file_created;
iov.iov_len=sizeof(sm_file_created);
rc=orte_rml.recv(&(procs[0]->proc_name),&iov,1,
ORTE_RML_TAG_SM_BACK_FILE_CREATED,0);
if( rc < 0 ) {
opal_output(0, "mca_common_sm_mmap_init: orte_rml.recv failed from %l with errno=%d\n",
0,errno);
goto return_error;
}
/* open backing file */
fd = open(file_name, O_RDWR, 0600);
if (fd < 0) {
opal_output(0,"mca_common_sm_mmap_init: open %s failed with errno=%d\n",
file_name, errno);
return NULL;
}
/* map the file and initialize segment state */
seg = (mca_common_sm_file_header_t*)
mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
if( (void*)-1 == seg ) {
opal_output(0, "mca_common_sm_mmap_init: mmap failed with errno=%d\n",
errno);
goto return_error;
}
/* set up the map object */
map = OBJ_NEW(mca_common_sm_mmap_t);
strncpy(map->map_path, file_name, OMPI_PATH_MAX);
/* the first entry in the file is the control structure. The first
entry in the control structure is an mca_common_sm_file_header_t
element */
map->map_seg = seg;
/* If we have a data segment (i.e., if 0 != data_seg_alignment),
then make it the first aligned address after the control
structure. */
if (0 != data_seg_alignment) {
addr = ((unsigned char *) seg) + size_ctl_structure;
/* calculate how far off alignment we are */
tmp = ((size_t) addr) % data_seg_alignment;
/* if we're off alignment, then move up to the next alignment */
if( tmp > 0 )
addr += (data_seg_alignment - tmp);
/* is addr past end of file ? */
if( (unsigned char*)seg+size < addr ) {
opal_output(0, "mca_common_sm_mmap_init: memory region too small len %d addr %p\n",
size,addr);
goto return_error;
}
map->data_addr = addr;
} else {
map->data_addr = NULL;
}
mem_offset = addr-(unsigned char *)seg;
map->map_addr = (unsigned char *)seg;
map->map_size = size;
}
/* enable access by other processes on this host */
close(fd);
return map;
return_error:
if( -1 != fd ) {
close(fd);
} }
/* map the file and initialize segment state */ if( NULL != seg ) munmap((void*) seg,size);
seg = (mca_common_sm_file_header_t*)
mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); return NULL;
if( (void*)-1 == seg ) {
opal_output(0, "mca_common_sm_mmap_init: mmap failed with errno=%d\n",
errno);
goto return_error;
}
#else #else
int fd = -1, return_code = OMPI_SUCCESS;
bool file_previously_opened = false;
mca_common_sm_file_header_t* seg = NULL;
mca_common_sm_mmap_t* map = NULL;
unsigned char *addr = NULL;
size_t tmp,mem_offset;
HANDLE hMapObject = INVALID_HANDLE_VALUE; HANDLE hMapObject = INVALID_HANDLE_VALUE;
LPVOID lpvMem = NULL; LPVOID lpvMem = NULL;
char *temp1, *temp2; char *temp1, *temp2;
@ -191,7 +311,6 @@ mca_common_sm_mmap_t* mca_common_sm_mmap_init(size_t size, char *file_name,
goto return_error; goto return_error;
} }
seg = (mca_common_sm_file_header_t*)lpvMem; seg = (mca_common_sm_file_header_t*)lpvMem;
#endif /* !defined(__WINDOWS__) */
/* set up the map object */ /* set up the map object */
map = OBJ_NEW(mca_common_sm_mmap_t); map = OBJ_NEW(mca_common_sm_mmap_t);
@ -234,29 +353,11 @@ mca_common_sm_mmap_t* mca_common_sm_mmap_init(size_t size, char *file_name,
seg->seg_size = size; seg->seg_size = size;
} }
#if !defined(__WINDOWS__)
/* enable access by other processes on this host */
if(fchmod(fd, 0600) != 0) {
opal_output(0, "mca_common_sm_mmap_init: fchmod failed with errno=%d :: fd %d\n",
errno,fd);
OBJ_RELEASE(map);
goto return_error;
}
close(fd);
#else
map->hMappedObject = hMapObject; map->hMappedObject = hMapObject;
#endif /* !defined(__WINDOWS__) */
return map; return map;
return_error: return_error:
#if !defined(__WINDOWS__)
if( -1 != fd ) {
fchmod(fd, 0600);
close(fd);
}
if( NULL != seg ) munmap((void*) seg,size);
#else
{ {
char* localbuf = NULL; char* localbuf = NULL;
FormatMessage( FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, FormatMessage( FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM,
@ -266,9 +367,9 @@ mca_common_sm_mmap_t* mca_common_sm_mmap_init(size_t size, char *file_name,
} }
if( NULL != lpvMem ) UnmapViewOfFile( lpvMem ); if( NULL != lpvMem ) UnmapViewOfFile( lpvMem );
if( NULL != hMapObject ) CloseHandle(hMapObject); if( NULL != hMapObject ) CloseHandle(hMapObject);
#endif /* !defined(__WINDOWS__) */
return NULL; return NULL;
#endif
} }
int mca_common_sm_mmap_fini( mca_common_sm_mmap_t* sm_mmap ) int mca_common_sm_mmap_fini( mca_common_sm_mmap_t* sm_mmap )

Просмотреть файл

@ -53,6 +53,7 @@ typedef uint32_t orte_rml_tag_t;
#define ORTE_RML_TAG_ERRMGR 16 #define ORTE_RML_TAG_ERRMGR 16
#define ORTE_RML_TAG_BPROC 17 #define ORTE_RML_TAG_BPROC 17
#define ORTE_RML_TAG_BPROC_ABORT 18 #define ORTE_RML_TAG_BPROC_ABORT 18
#define ORTE_RML_TAG_SM_BACK_FILE_CREATED 19
#define ORTE_RML_TAG_DYNAMIC 2000 #define ORTE_RML_TAG_DYNAMIC 2000