1
1

Added a new System V (sysv) shared memory component for Open MPI.

Configure Option:
--enable-sysv

MCA Parameter:
mpi_common_sm

mpi_common_sm accepts a comma delimited list of: [sysv],mmap (order
dependent).  The first component that is successfully selected is used. For
example, -mca mpi_common_sm sysv,mmap will first try sysv. If sysv is not
successfully selected, then mmap will be used.  mmap will be used if 
mpi_common_sm is not provided.

Notes:
Please make certain that your system's shmmax limit, or equivalent, is larger
than mpool_sm_min_size.  Otherwise, shmget may fail.

This commit was SVN r23260.
Этот коммит содержится в:
Samuel Gutierrez 2010-06-09 16:58:52 +00:00
родитель da43547983
Коммит 2fb7c344fc
25 изменённых файлов: 2076 добавлений и 355 удалений

Просмотреть файл

@ -62,6 +62,7 @@ rolfv Rolf Vandevaart Sun
rta Rob Awles LANL
rusraink Rainer Keller HLRS, ORNL
sami Sami Ayyorgun LANL
samuel Samuel K. Gutierrez LANL
santhana Gopal Santhanaraman OSU
sharonm Sharon Melamed Voltaire
shiqing Shiqing Fan HLRS

Просмотреть файл

@ -14,7 +14,7 @@ Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
University of Stuttgart. All rights reserved.
Copyright (c) 2004-2007 The Regents of the University of California.
All rights reserved.
Copyright (c) 2006-2008 Los Alamos National Security, LLC. All rights
Copyright (c) 2006-2010 Los Alamos National Security, LLC. All rights
reserved.
Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved.
Copyright (c) 2006-2008 Voltaire, Inc. All rights reserved.

4
README
Просмотреть файл

@ -895,6 +895,10 @@ for a full list); a summary of the more commonly used ones follows:
with different endian representations). Heterogeneous support is
disabled by default because it imposes a minor performance penalty.
--enable-sysv
Enable System V (sysv) shared memory support. By default, System V
shared memory support is disabled.
--with-wrapper-cflags=<cflags>
--with-wrapper-cxxflags=<cxxflags>
--with-wrapper-fflags=<fflags>

Просмотреть файл

@ -87,7 +87,7 @@ FOREACH (MCA_FRAMEWORK ${MCA_FRAMEWORK_LIST})
IF(EXISTS "${CURRENT_PATH}/.windows")
#MESSAGE("MCA_FRAMEWORK_BASE_FILES:${MCA_FRAMEWORK_BASE_FILES}")
SET(EXCLUDE_LIST "")
SET(EXCLUDE_LIST"")
FILE(STRINGS ${CURRENT_PATH}/.windows EXCLUDE_LIST REGEX "^exclude_list=")
IF(NOT EXCLUDE_LIST STREQUAL "")
@ -122,6 +122,19 @@ FOREACH (MCA_FRAMEWORK ${MCA_FRAMEWORK_LIST})
FILE(GLOB COMPONENT_FILES "${CURRENT_PATH}/*.C" "${CURRENT_PATH}/*.h"
"${CURRENT_PATH}/*.cc" "${CURRENT_PATH}/*.cpp")
#check exclude list
SET(EXCLUDE_LIST"")
FILE(STRINGS ${CURRENT_PATH}/.windows EXCLUDE_LIST REGEX "^exclude_list=")
IF(NOT EXCLUDE_LIST STREQUAL "")
STRING(REPLACE "exclude_list=" "" EXCLUDE_LIST ${EXCLUDE_LIST})
ENDIF(NOT EXCLUDE_LIST STREQUAL "")
# remove the files in the exclude list
FOREACH(FILE ${EXCLUDE_LIST})
LIST(REMOVE_ITEM MCA_FRAMEWORK_BASE_FILES "${CURRENT_PATH}/${FILE}")
ENDFOREACH(FILE)
# by default, build this component.
SET(BUILD_COMPONENT TRUE)

Просмотреть файл

@ -171,6 +171,9 @@ OMPI_DEF_VAR(HAVE_INTERLOCKEDCOMPAREEXCHANGE64 "Whether we support 64 bits atomi
OMPI_DEF_VAR(HAVE_INTERLOCKEDCOMPAREEXCHANGEACQUIRE "Whether we support 32 bits atomic operations on Windows" 0 0)
OMPI_DEF_VAR(HAVE_INTERLOCKEDCOMPAREEXCHANGERELEASE "Whether we support 32 bits atomic operations on Windows" 0 0)
OMPI_DEF(MCA_COMMON_SM_WINDOWS 1 "Whether we have shared memory support for Windows or not." 0 1)
OMPI_DEF(MCA_COMMON_SM_SYSV 0 "Whether we have shared memory support for Windows or not." 0 1)
OMPI_CHECK_INCLUDE_FILE (windows.h HAVE_WINDOWS_H)
OMPI_CHECK_INCLUDE_FILE (winsock2.h HAVE_WINSOCK2_H)

Просмотреть файл

@ -11,6 +11,8 @@
* All rights reserved.
* Copyright (c) 2006-2007 Voltaire. All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -230,7 +232,7 @@ static int sm_btl_first_time_init(mca_btl_sm_t *sm_btl, int n)
/* before we multiply by n, make sure the result won't overflow */
/* Stick that little pad in, particularly since we'll eventually
* need a little extra space. E.g., in mca_mpool_sm_init() in
* mpool_sm_component.c when sizeof(mca_common_sm_mmap_t) is
* mpool_sm_component.c when sizeof(mca_common_sm_module_t) is
* added.
*/
if ( ((double) res.size) * n > LONG_MAX - 4096 )
@ -270,13 +272,13 @@ static int sm_btl_first_time_init(mca_btl_sm_t *sm_btl, int n)
/* Pass in a data segment alignment of 0 to get no data
segment (only the shared control structure) */
size = sizeof(mca_common_sm_file_header_t) +
size = sizeof(mca_common_sm_seg_header_t) +
n * (sizeof(sm_fifo_t*) + sizeof(char *) + sizeof(uint16_t)) + CACHE_LINE_SIZE;
procs = ompi_proc_world(&num_procs);
if (!(mca_btl_sm_component.mmap_file =
mca_common_sm_mmap_init(procs, num_procs, size, sm_ctl_file,
sizeof(mca_common_sm_file_header_t),
CACHE_LINE_SIZE))) {
if (!(mca_btl_sm_component.sm_seg =
mca_common_sm_init(procs, num_procs, size, sm_ctl_file,
sizeof(mca_common_sm_seg_header_t),
CACHE_LINE_SIZE))) {
opal_output(0, "mca_btl_sm_add_procs: unable to create shared memory "
"BTL coordinating strucure :: size %lu \n",
(unsigned long)size);
@ -289,7 +291,7 @@ static int sm_btl_first_time_init(mca_btl_sm_t *sm_btl, int n)
/* set the pointer to the shared memory control structure */
mca_btl_sm_component.sm_ctl_header =
(mca_common_sm_file_header_t*)mca_btl_sm_component.mmap_file->map_seg;
(mca_common_sm_seg_header_t*)mca_btl_sm_component.sm_seg->module_seg;
/* check to make sure number of local procs is within the
@ -300,7 +302,7 @@ static int sm_btl_first_time_init(mca_btl_sm_t *sm_btl, int n)
return OMPI_ERROR;
}
mca_btl_sm_component.shm_fifo = (volatile sm_fifo_t **)mca_btl_sm_component.mmap_file->data_addr;
mca_btl_sm_component.shm_fifo = (volatile sm_fifo_t **)mca_btl_sm_component.sm_seg->module_data_addr;
mca_btl_sm_component.shm_bases = (char**)(mca_btl_sm_component.shm_fifo + n);
mca_btl_sm_component.shm_mem_nodes = (uint16_t*)(mca_btl_sm_component.shm_bases + n);
@ -538,9 +540,9 @@ int mca_btl_sm_add_procs(
/* Sync with other local procs. Force the FIFO initialization to always
* happens before the readers access it.
*/
opal_atomic_add_32( &mca_btl_sm_component.mmap_file->map_seg->seg_inited, 1);
opal_atomic_add_32( &mca_btl_sm_component.sm_seg->module_seg->seg_inited, 1);
while( n_local_procs >
mca_btl_sm_component.mmap_file->map_seg->seg_inited) {
mca_btl_sm_component.sm_seg->module_seg->seg_inited) {
opal_progress();
opal_atomic_rmb();
}
@ -1106,13 +1108,13 @@ int mca_btl_sm_ft_event(int state) {
}
if(OPAL_CRS_CHECKPOINT == state) {
if( NULL != mca_btl_sm_component.mmap_file ) {
if( NULL != mca_btl_sm_component.sm_seg ) {
/* On restart we need the old file names to exist (not necessarily
* contain content) so the CRS component does not fail when searching
* for these old file handles. The restart procedure will make sure
* these files get cleaned up appropriately.
*/
opal_crs_base_metadata_write_token(NULL, CRS_METADATA_TOUCH, mca_btl_sm_component.mmap_file->map_path);
opal_crs_base_metadata_write_token(NULL, CRS_METADATA_TOUCH, mca_btl_sm_component.sm_seg->map_path);
/* Record the job session directory */
opal_crs_base_metadata_write_token(NULL, CRS_METADATA_MKDIR, orte_process_info.job_session_dir);
@ -1120,11 +1122,11 @@ int mca_btl_sm_ft_event(int state) {
}
else if(OPAL_CRS_CONTINUE == state) {
if( ompi_cr_continue_like_restart ) {
if( NULL != mca_btl_sm_component.mmap_file ) {
if( NULL != mca_btl_sm_component.sm_seg ) {
/* Do not Add session directory on continue */
/* Add shared memory file */
opal_crs_base_cleanup_append(mca_btl_sm_component.mmap_file->map_path, false);
opal_crs_base_cleanup_append(mca_btl_sm_component.sm_seg->map_path, false);
}
/* Clear this so we force the module to re-init the sm files */
@ -1133,7 +1135,7 @@ int mca_btl_sm_ft_event(int state) {
}
else if(OPAL_CRS_RESTART == state ||
OPAL_CRS_RESTART_PRE == state) {
if( NULL != mca_btl_sm_component.mmap_file ) {
if( NULL != mca_btl_sm_component.sm_seg ) {
/* Add session directory */
opal_crs_base_cleanup_append(orte_process_info.job_session_dir, true);
tmp_dir = opal_dirname(orte_process_info.job_session_dir);
@ -1143,7 +1145,7 @@ int mca_btl_sm_ft_event(int state) {
tmp_dir = NULL;
}
/* Add shared memory file */
opal_crs_base_cleanup_append(mca_btl_sm_component.mmap_file->map_path, false);
opal_crs_base_cleanup_append(mca_btl_sm_component.sm_seg->map_path, false);
}
/* Clear this so we force the module to re-init the sm files */

Просмотреть файл

@ -1,4 +1,3 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -11,7 +10,9 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2007 Voltaire. All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -67,7 +68,7 @@
#include "ompi/mca/btl/base/base.h"
#include "ompi/mca/mpool/mpool.h"
#include "ompi/mca/common/sm/common_sm_mmap.h"
#include "ompi/mca/common/sm/common_sm.h"
BEGIN_C_DECLS
@ -151,8 +152,8 @@ struct mca_btl_sm_component_t {
size_t eager_limit; /**< first fragment size */
size_t max_frag_size; /**< maximum (second and beyone) fragment size */
opal_mutex_t sm_lock;
mca_common_sm_mmap_t *mmap_file; /**< description of mmap'ed file */
mca_common_sm_file_header_t *sm_ctl_header; /* control header in
mca_common_sm_module_t *sm_seg; /**< description of shared memory segment */
mca_common_sm_seg_header_t *sm_ctl_header; /* control header in
shared memory */
volatile sm_fifo_t **shm_fifo; /**< pointer to fifo 2D array in shared memory */
char **shm_bases; /**< pointer to base pointers in shared memory */
@ -365,30 +366,6 @@ static inline void *sm_fifo_read(sm_fifo_t *fifo)
return value;
}
/**
* Register shared memory module parameters with the MCA framework
*/
extern int mca_btl_sm_component_open(void);
/**
* Any final cleanup before being unloaded.
*/
extern int mca_btl_sm_component_close(void);
/**
* SM module initialization.
*
* @param num_btls (OUT) Number of BTLs returned in BTL array.
* @param enable_progress_threads (IN) Flag indicating whether BTL is allowed to have progress threads
* @param enable_mpi_threads (IN) Flag indicating whether BTL must support multilple simultaneous invocations from different threads
*
*/
extern mca_btl_base_module_t** mca_btl_sm_component_init(
int *num_btls,
bool enable_progress_threads,
bool enable_mpi_threads
);
/**
* shared memory component progress.
*/

Просмотреть файл

@ -10,7 +10,9 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2007 Voltaire. All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -48,7 +50,7 @@
#include "opal/mca/base/mca_base_param.h"
#include "ompi/mca/mpool/base/base.h"
#include "ompi/mca/common/sm/common_sm_mmap.h"
#include "ompi/mca/common/sm/common_sm.h"
#include "ompi/mca/btl/base/btl_base_error.h"
#if OPAL_ENABLE_FT_CR == 1
@ -59,6 +61,16 @@
#include "btl_sm_frag.h"
#include "btl_sm_fifo.h"
static int mca_btl_sm_component_open(void);
static int mca_btl_sm_component_close(void);
static int sm_register(void);
static mca_btl_base_module_t** mca_btl_sm_component_init(
int *num_btls,
bool enable_progress_threads,
bool enable_mpi_threads
);
/*
* Shared Memory (SM) component instance.
*/
@ -74,7 +86,9 @@ mca_btl_sm_component_t mca_btl_sm_component = {
OMPI_MINOR_VERSION, /* MCA component minor version */
OMPI_RELEASE_VERSION, /* MCA component release version */
mca_btl_sm_component_open, /* component open */
mca_btl_sm_component_close /* component close */
mca_btl_sm_component_close, /* component close */
NULL,
sm_register,
},
{
/* The component is checkpoint ready */
@ -112,12 +126,7 @@ static inline int mca_btl_sm_param_register_int(
}
/*
* Called by MCA framework to open the component, registers
* component parameters.
*/
int mca_btl_sm_component_open(void)
static int sm_register(void)
{
int i;
@ -162,7 +171,6 @@ int mca_btl_sm_component_open(void)
false, false, 0,
&mca_btl_sm_component.knem_max_simultaneous);
mca_btl_sm_component.sm_max_btls = 1;
/* register SM component parameters */
mca_btl_sm_component.sm_free_list_num =
mca_btl_sm_param_register_int("free_list_num", 8);
@ -178,6 +186,32 @@ int mca_btl_sm_component_open(void)
mca_btl_sm_param_register_int("fifo_size", 4096);
mca_btl_sm_component.nfifos =
mca_btl_sm_param_register_int("num_fifos", 1);
mca_btl_sm_component.fifo_lazy_free =
mca_btl_sm_param_register_int("fifo_lazy_free", 120);
/* default number of extra procs to allow for future growth */
mca_btl_sm_component.sm_extra_procs =
mca_btl_sm_param_register_int("sm_extra_procs", 0);
/* Call the BTL based to register its MCA params */
mca_btl_base_param_register(&mca_btl_sm_component.super.btl_version,
&mca_btl_sm.super);
/* Call down to sm common to register its MCA params */
mca_common_sm_param_register(&mca_btl_sm_component.super.btl_version);
return OMPI_SUCCESS;
}
/*
* Called by MCA framework to open the component, registers
* component parameters.
*/
static int mca_btl_sm_component_open(void)
{
mca_btl_sm_component.sm_max_btls = 1;
/* make sure the number of fifos is a power of 2 */
{
int i = 1;
@ -185,8 +219,6 @@ int mca_btl_sm_component_open(void)
i <<= 1;
mca_btl_sm_component.nfifos = i;
}
mca_btl_sm_component.fifo_lazy_free =
mca_btl_sm_param_register_int("fifo_lazy_free", 120);
/* make sure that queue size and lazy free parameter are compatible */
if (mca_btl_sm_component.fifo_lazy_free >= (mca_btl_sm_component.fifo_size >> 1) )
@ -194,9 +226,6 @@ int mca_btl_sm_component_open(void)
if (mca_btl_sm_component.fifo_lazy_free <= 0)
mca_btl_sm_component.fifo_lazy_free = 1;
/* default number of extra procs to allow for future growth */
mca_btl_sm_component.sm_extra_procs =
mca_btl_sm_param_register_int("sm_extra_procs", 0);
mca_btl_sm.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH-1;
mca_btl_sm.super.btl_eager_limit = 4*1024;
@ -214,8 +243,6 @@ int mca_btl_sm_component_open(void)
mca_btl_sm.super.btl_bandwidth = 9000; /* Mbs */
mca_btl_sm.super.btl_latency = 1; /* Microsecs */
mca_btl_base_param_register(&mca_btl_sm_component.super.btl_version,
&mca_btl_sm.super);
mca_btl_sm_component.max_frag_size = mca_btl_sm.super.btl_max_send_size;
mca_btl_sm_component.eager_limit = mca_btl_sm.super.btl_eager_limit;
@ -233,7 +260,7 @@ int mca_btl_sm_component_open(void)
* component cleanup - sanity checking of queue lengths
*/
int mca_btl_sm_component_close(void)
static int mca_btl_sm_component_close(void)
{
int return_value = OMPI_SUCCESS;
@ -263,12 +290,12 @@ int mca_btl_sm_component_close(void)
/*OBJ_DESTRUCT(&mca_btl_sm_component.sm_frags_max);*/
/* unmap the shared memory control structure */
if(mca_btl_sm_component.mmap_file != NULL) {
return_value = mca_common_sm_mmap_fini( mca_btl_sm_component.mmap_file );
if(mca_btl_sm_component.sm_seg != NULL) {
return_value = mca_common_sm_fini( mca_btl_sm_component.sm_seg );
if( OMPI_SUCCESS != return_value ) {
return_value=OMPI_ERROR;
opal_output(0," munmap failed :: file - %s :: errno - %d \n",
mca_btl_sm_component.mmap_file->map_addr,
mca_btl_sm_component.sm_seg->module_seg_addr,
errno);
goto CLEANUP;
}
@ -283,12 +310,12 @@ int mca_btl_sm_component_close(void)
*/
if(OPAL_CR_STATUS_RESTART_PRE != opal_cr_checkpointing_state &&
OPAL_CR_STATUS_RESTART_POST != opal_cr_checkpointing_state ) {
unlink(mca_btl_sm_component.mmap_file->map_path);
unlink(mca_btl_sm_component.sm_seg->module_seg_path);
}
#else
unlink(mca_btl_sm_component.mmap_file->map_path);
unlink(mca_btl_sm_component.sm_seg->module_seg_path);
#endif
OBJ_RELEASE(mca_btl_sm_component.mmap_file);
OBJ_RELEASE(mca_btl_sm_component.sm_seg);
}
#if OPAL_ENABLE_PROGRESS_THREADS == 1
@ -320,7 +347,7 @@ CLEANUP:
/*
* SM component initialization
*/
mca_btl_base_module_t** mca_btl_sm_component_init(
static mca_btl_base_module_t** mca_btl_sm_component_init(
int *num_btls,
bool enable_progress_threads,
bool enable_mpi_threads)

Просмотреть файл

@ -28,7 +28,7 @@
#include "opal/datatype/opal_convertor.h"
#include "orte/types.h"
#include "ompi/mca/coll/coll.h"
#include "ompi/mca/common/sm/common_sm_mmap.h"
#include "ompi/mca/common/sm/common_sm.h"
BEGIN_C_DECLS
@ -142,7 +142,7 @@ BEGIN_C_DECLS
typedef struct mca_coll_sm_comm_t {
/* Meta data that we get back from the common mmap allocation
function */
mca_common_sm_mmap_t *mcb_mmap;
mca_common_sm_module_t *sm_bootstrap_meta;
/** Pointer to my barrier control pages (odd index pages are
"in", even index pages are "out") */

Просмотреть файл

@ -11,6 +11,8 @@
* All rights reserved.
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -96,10 +98,10 @@ static void mca_coll_sm_module_destruct(mca_coll_sm_module_t *module)
if (NULL != c) {
/* Munmap the per-communicator shmem data segment */
if (NULL != c->mcb_mmap) {
if (NULL != c->sm_bootstrap_meta) {
/* Ignore any errors -- what are we going to do about
them? */
mca_common_sm_mmap_fini(c->mcb_mmap);
mca_common_sm_fini(c->sm_bootstrap_meta);
}
free(c);
}
@ -376,7 +378,7 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module,
children are contiguous, so having the first pointer and the
num_children from the mcb_tree data is sufficient). */
control_size = c->sm_control_size;
base = data->mcb_mmap->data_addr;
base = data->sm_bootstrap_meta->module_data_addr;
data->mcb_barrier_control_me = (uint32_t*)
(base + (rank * control_size * num_barrier_buffers * 2));
if (data->mcb_tree[rank].mcstn_parent) {
@ -472,20 +474,20 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module,
OBJ_RETAIN(sm_module->previous_reduce_module);
/* Indicate that we have successfully attached and setup */
opal_atomic_add(&(data->mcb_mmap->map_seg->seg_inited), 1);
opal_atomic_add(&(data->sm_bootstrap_meta->module_seg->seg_inited), 1);
/* Wait for everyone in this communicator to attach and setup */
opal_output_verbose(10, mca_coll_base_output,
"coll:sm:enable (%d/%s): waiting for peers to attach",
comm->c_contextid, comm->c_name);
SPIN_CONDITION(size == data->mcb_mmap->map_seg->seg_inited, seg_init_exit);
SPIN_CONDITION(size == data->sm_bootstrap_meta->module_seg->seg_inited, seg_init_exit);
/* Once we're all here, remove the mmap file; it's not needed anymore */
if (0 == rank) {
unlink(data->mcb_mmap->map_path);
unlink(data->sm_bootstrap_meta->module_seg_path);
opal_output_verbose(10, mca_coll_base_output,
"coll:sm:enable (%d/%s): removed mmap file %s",
comm->c_contextid, comm->c_name, data->mcb_mmap->map_path);
comm->c_contextid, comm->c_name, data->sm_bootstrap_meta->module_seg_path);
}
/* All done */
@ -589,13 +591,13 @@ static int bootstrap_comm(ompi_communicator_t *comm,
opal_output_verbose(10, mca_coll_base_output,
"coll:sm:enable:bootstrap comm (%d/%s): attaching to %" PRIsize_t " byte mmap: %s",
comm->c_contextid, comm->c_name, size, fullpath);
data->mcb_mmap =
mca_common_sm_mmap_init_group(comm->c_local_group, size, fullpath,
sizeof(mca_common_sm_file_header_t),
sizeof(void*));
if (NULL == data->mcb_mmap) {
data->sm_bootstrap_meta =
mca_common_sm_init_group(comm->c_local_group, size, fullpath,
sizeof(mca_common_sm_seg_header_t),
sizeof(void*));
if (NULL == data->sm_bootstrap_meta) {
opal_output_verbose(10, mca_coll_base_output,
"coll:sm:enable:bootstrap comm (%d/%s): common_sm_mmap_init_group failed",
"coll:sm:enable:bootstrap comm (%d/%s): mca_common_sm_init_group failed",
comm->c_contextid, comm->c_name);
return OMPI_ERR_OUT_OF_RESOURCE;
}

Просмотреть файл

@ -9,4 +9,4 @@
#
# Specific to this module
exclude_list=common_sm_mmap.c;common_sm_mmap.h;common_sm_sysv.c;common_sm_sysv.h

Просмотреть файл

@ -44,13 +44,31 @@ EXTRA_DIST = .windows
# Header files
headers = \
common_sm.h \
common_sm_mmap.h
# Source files
sources = \
common_sm.c \
common_sm_mmap.c
# Only build the Windows support if we're building on windows, but
# always include the files in the tarball.
if MCA_common_sm_windows
headers += common_sm_windows.h
sources += common_sm_windows.c
endif
# Only build the SYSV support if we have the right stuff, but
# always include the files in the tarball.
if MCA_common_sm_sysv
headers += common_sm_sysv.h
sources += common_sm_sysv.c
endif
# Help file
dist_pkgdata_DATA = help-mpi-common-sm.txt

269
ompi/mca/common/sm/common_sm.c Обычный файл
Просмотреть файл

@ -0,0 +1,269 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#include "ompi/constants.h"
#include "orte/util/show_help.h"
#include "opal/util/argv.h"
#include "common_sm_mmap.h"
#if MCA_COMMON_SM_SYSV
#include "common_sm_sysv.h"
#endif /* MCA_COMMON_SM_SYSV */
#if MCA_COMMON_SM_WINDOWS
#include "common_sm_windows.h"
#endif /* MCA_COMMON_SM_WINDOWS */
static int initialized = 0;
static int sysv_index = -1;
static char **sm_argv = NULL;
/* let mmap be the default selection */
static char *sm_params = "mmap";
static mca_common_sm_init_fn_t sm_init = NULL;
static mca_common_sm_init_group_fn_t sm_init_group = NULL;
static mca_common_sm_seg_alloc_fn_t sm_seg_alloc = NULL;
static mca_common_sm_fini_fn_t sm_fini = NULL;
static char sm_all_buff[OPAL_PATH_MAX];
mca_common_sm_module_t *mca_common_sm_module = NULL;
/******************************************************************************/
int
mca_common_sm_param_register(mca_base_component_t *c)
{
char sm_avail_help_str[OPAL_PATH_MAX];
if (-1 == sysv_index)
{
if (MCA_COMMON_SM_SYSV)
{
snprintf(
sm_avail_help_str,
sizeof(sm_avail_help_str) - 1,
"Which shared memory support will be used. "
"Valid values: sysv,mmap - or a comma delimited "
"combination of them (order dependent). The first component "
"that is successfully selected is used."
);
/**
* construct a comma-separated list of valid options for "all".
* notice that we are going to try sysv first.
*/
snprintf(sm_all_buff, sizeof(sm_all_buff) - 1, "sysv,mmap");
}
else /* only mmap is available */
{
snprintf(
sm_avail_help_str,
sizeof(sm_avail_help_str) - 1,
"Which shared memory support will be used. "
"Valid values: mmap."
);
snprintf(sm_all_buff, sizeof(sm_all_buff) - 1, "mmap");
}
mca_base_param_reg_string_name("mpi",
"common_sm",
sm_avail_help_str,
false,
false,
sm_params,
&sm_params);
/* empty == try all available */
if (0 == strcmp(sm_params, ""))
{
if (NULL == (sm_argv = opal_argv_split(sm_all_buff, ',')))
{
opal_output(0,
"WARNING: could not parse mpi_common_sm request.");
}
}
else
{
if (NULL == (sm_argv = opal_argv_split(sm_params, ',')))
{
opal_output(0,
"WARNING: could not parse mpi_common_sm request.");
}
}
sysv_index = mca_base_param_reg_int_name(
"mpi",
"common_sm_have_sysv_support",
"Whether shared memory has System V support or not",
false,
true,
MCA_COMMON_SM_SYSV,
NULL
);
}
/* Also register MCA param synonyms for the component */
mca_base_param_reg_syn(sysv_index, c, "have_sysv_support", false);
return OMPI_SUCCESS;
}
/******************************************************************************/
mca_common_sm_module_t *
mca_common_sm_init(ompi_proc_t **procs,
size_t num_procs,
size_t size,
char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment)
{
if (!initialized)
{
int help_msg_displayed = 0;
int i;
if (NULL != sm_argv)
{
/**
* iterate through the entire list
* stop when a valid component has been selected.
*
* warn the user when an invalid option was specified,
* but continue searching for a valid alternative.
*/
for (i = 0; NULL != sm_argv[i] && NULL == sm_init; ++i)
{
if (0 == strcasecmp(sm_argv[i], "mmap"))
{
#if !MCA_COMMON_SM_WINDOWS
sm_init = mca_common_sm_mmap_init;
sm_init_group = mca_common_sm_mmap_init_group;
sm_seg_alloc = mca_common_sm_mmap_seg_alloc;
sm_fini = mca_common_sm_mmap_fini;
#else /* MCA_COMMON_SM_WINDOWS */
sm_init = mca_common_sm_windows_init;
sm_init_group = mca_common_sm_windows_init_group;
sm_seg_alloc = mca_common_sm_windows_seg_alloc;
sm_fini = mca_common_sm_windows_fini;
#endif
}
else if (0 == strcasecmp(sm_argv[i], "sysv"))
{
#if !MCA_COMMON_SM_SYSV
if (!help_msg_displayed)
{
orte_show_help("help-mpi-common-sm.txt",
"sm support",
1,
sm_argv[i]);
help_msg_displayed = 1;
}
#else /* MCA_COMMON_SM_SYSV */
/* make sure that we can safely use sysv on this system */
if (OMPI_SUCCESS == mca_common_sm_sysv_component_query())
{
sm_init = mca_common_sm_sysv_init;
sm_init_group = mca_common_sm_sysv_init_group;
sm_seg_alloc = mca_common_sm_sysv_seg_alloc;
sm_fini = mca_common_sm_sysv_fini;
}
else /* let the user know that we tried sysv and failed */
{
orte_show_help("help-mpi-common-sm.txt",
"sysv rt test fail",
1);
}
#endif
}
else /* unknown value */
{
if (!help_msg_displayed)
{
orte_show_help("help-mpi-common-sm.txt",
"sm support",
1,
sm_argv[i]);
help_msg_displayed = 1;
}
}
}
if (NULL != sm_argv)
{
opal_argv_free(sm_argv);
}
}
initialized = 1;
}
/* call the selected init function */
if (NULL != sm_init)
{
return sm_init(procs, num_procs, size,
file_name, size_ctl_structure,
data_seg_alignment);
}
return NULL;
}
/******************************************************************************/
mca_common_sm_module_t *
mca_common_sm_init_group(ompi_group_t *group,
size_t size,
char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment)
{
if (NULL != sm_init_group)
{
return sm_init_group(group, size,
file_name, size_ctl_structure,
data_seg_alignment);
}
return NULL;
}
/******************************************************************************/
void *
mca_common_sm_seg_alloc(struct mca_mpool_base_module_t* mpool,
size_t* size,
mca_mpool_base_registration_t** registration)
{
if (NULL != sm_seg_alloc)
{
return sm_seg_alloc(mpool, size, registration);
}
return NULL;
}
/******************************************************************************/
int
mca_common_sm_fini(mca_common_sm_module_t *mca_common_sm_module)
{
if (NULL != sm_fini)
{
return sm_fini(mca_common_sm_module);
}
return OMPI_ERR_NOT_FOUND;
}

192
ompi/mca/common/sm/common_sm.h Обычный файл
Просмотреть файл

@ -0,0 +1,192 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef _COMMON_SM_H_
#define _COMMON_SM_H_
#include "ompi_config.h"
#include "opal/mca/mca.h"
#include "opal/class/opal_object.h"
#include "opal/class/opal_list.h"
#include "opal/sys/atomic.h"
#include "ompi/mca/mpool/mpool.h"
#include "ompi/proc/proc.h"
#include "ompi/group/group.h"
BEGIN_C_DECLS
struct mca_mpool_base_module_t;
typedef struct mca_common_sm_seg_header_t
{
/* lock to control atomic access */
opal_atomic_lock_t seg_lock;
/* is the segment ready for use */
volatile int32_t seg_inited;
/* offset to next available memory location available for allocation */
size_t seg_offset;
/* total size of the segment */
size_t seg_size;
} mca_common_sm_seg_header_t;
typedef struct mca_common_sm_module_t
{
/* double link list element */
opal_list_item_t module_item;
/* pointer to header embedded in the shared memory segment */
mca_common_sm_seg_header_t *module_seg;
/* base address of the segment */
unsigned char *module_seg_addr;
/* base address of data segment */
unsigned char *module_data_addr;
/* how big it is (in bytes) */
size_t module_size;
char module_seg_path[OPAL_PATH_MAX];
#if defined(__WINDOWS__)
/* handle to the object */
HANDLE hMappedObject;
#endif /* defined(__WINDOWS__) */
} mca_common_sm_module_t;
OBJ_CLASS_DECLARATION(mca_common_sm_module_t);
OMPI_DECLSPEC extern int
mca_common_sm_param_register(mca_base_component_t *c);
/**
* Register the MCA parameters for common sm.
*/
int
mca_common_sm_param_register(mca_base_component_t *c);
/**
* This routine is used to set up a shared memory segment (whether
* it's an mmaped file or a SYSV IPC segment). It is assumed that
* the shared memory segment does not exist before any of the current
* set of processes try and open it.
*
* @param procs - array of (ompi_proc_t*)'s to create this shared
* memory segment for. This array must be writable; it may be edited
* (in undefined ways) if the array contains procs that are not on
* this host. It is assumed that the caller will simply free this
* array upon return. (INOUT)
*
* @param num_procs - length of the procs array (IN)
*
* @param size - size of the segment, in bytes (IN)
*
* @param name - unique string identifier of this segment (IN)
*
* @param size_ctl_structure size of the control structure at
* the head of the segment. The control structure
* is assumed to have mca_common_sm_seg_header_t
* as its first segment (IN)
*
* @param data_set_alignment alignment of the data segment. this
* follows the control structure. If this
* value if 0, then assume that there will
* be no data segment following the control
* structure. (IN)
*
* @returnvalue pointer to control structure at head of shared memory segment.
*/
OMPI_DECLSPEC extern mca_common_sm_module_t *
mca_common_sm_init(ompi_proc_t **procs,
size_t num_procs,
size_t size,
char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment);
typedef mca_common_sm_module_t *
(*mca_common_sm_init_fn_t)(ompi_proc_t **procs,
size_t num_procs,
size_t size,
char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment);
/**
* This routine is used to set up a shared memory segment (whether
* it's an mmaped file or a SYSV IPC segment). It is assumed that
* the shared memory segment does not exist before any of the current
* set of processes try and open it.
*
* This routine is the same as mca_common_sm_mmap_init() except that
* it takes an (ompi_group_t*) parameter to specify the peers rather
* than an array of procs. Unlike mca_common_sm_mmap_init(), the
* group must contain *only* local peers, or this function will return
* NULL and not create any shared memory segment.
*/
OMPI_DECLSPEC extern mca_common_sm_module_t *
mca_common_sm_init_group(ompi_group_t *group,
size_t size,
char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment);
typedef mca_common_sm_module_t *
(*mca_common_sm_init_group_fn_t)(ompi_group_t *group,
size_t size,
char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment);
/**
* callback from the sm mpool
*/
OMPI_DECLSPEC extern void *
mca_common_sm_seg_alloc(struct mca_mpool_base_module_t *mpool,
size_t* size,
mca_mpool_base_registration_t **registration);
typedef void *
(*mca_common_sm_seg_alloc_fn_t)(struct mca_mpool_base_module_t *mpool,
size_t* size,
mca_mpool_base_registration_t **registration);
/**
* This function will release all local resources attached to the
* shared memory segment. We assume that the operating system will
* release the memory resources when the last process release it.
*
* @param mca_common_sm_module - instance that is shared between
* components that use shared memory.
*
* @returnvalue 0 if everything was OK, otherwise a negative value.
*/
OMPI_DECLSPEC extern int
mca_common_sm_fini(mca_common_sm_module_t *mca_common_sm_module);
typedef int
(*mca_common_sm_fini_fn_t)(mca_common_sm_module_t *mca_common_sm_module);
/*
* instance that is shared between components that use shared memory
*/
OMPI_DECLSPEC extern mca_common_sm_module_t *mca_common_sm_module;
END_C_DECLS
#endif /* _COMMON_SM_H_ */

Просмотреть файл

@ -11,8 +11,8 @@
* All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2010 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -63,7 +63,7 @@
#include "common_sm_mmap.h"
OBJ_CLASS_INSTANCE(
mca_common_sm_mmap_t,
mca_common_sm_module_mmap_t,
opal_object_t,
NULL,
NULL
@ -91,23 +91,23 @@ typedef struct {
opal_list_item_t super;
char file_name[OPAL_PATH_MAX];
int sm_file_inited;
} pending_rml_msg_t;
} pending_mmap_rml_msg_t;
OBJ_CLASS_INSTANCE(pending_rml_msg_t, opal_list_item_t, NULL, NULL);
OBJ_CLASS_INSTANCE(pending_mmap_rml_msg_t, opal_list_item_t, NULL, NULL);
#if !defined(__WINDOWS__)
static mca_common_sm_mmap_t* create_map(int fd, size_t size, char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment)
static mca_common_sm_module_mmap_t *
create_map(int fd, size_t size,
char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment)
{
mca_common_sm_mmap_t *map;
mca_common_sm_file_header_t *seg;
mca_common_sm_module_mmap_t *map;
mca_common_sm_seg_header_t *seg;
unsigned char *addr = NULL;
/* map the file and initialize segment state */
seg = (mca_common_sm_file_header_t*)
mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
seg = (mca_common_sm_seg_header_t *)
mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
if (MAP_FAILED == seg) {
orte_show_help("help-mpi-common-sm.txt", "sys call fail", 1,
orte_process_info.nodename,
@ -117,12 +117,12 @@ static mca_common_sm_mmap_t* create_map(int fd, size_t size, char *file_name,
}
/* set up the map object */
map = OBJ_NEW(mca_common_sm_mmap_t);
strncpy(map->map_path, file_name, OPAL_PATH_MAX);
map = OBJ_NEW(mca_common_sm_module_mmap_t);
strncpy(map->super.module_seg_path, file_name, OPAL_PATH_MAX);
/* the first entry in the file is the control structure. The first
entry in the control structure is an mca_common_sm_file_header_t
entry in the control structure is an mca_common_sm_seg_header_t
element */
map->map_seg = seg;
map->super.module_seg = seg;
addr = ((unsigned char *)seg) + size_ctl_structure;
/* If we have a data segment (i.e., if 0 != data_seg_alignment),
@ -142,28 +142,39 @@ static mca_common_sm_mmap_t* create_map(int fd, size_t size, char *file_name,
return NULL;
}
}
map->data_addr = addr;
map->map_addr = (unsigned char *)seg;
map->map_size = size;
map->super.module_data_addr = addr;
map->super.module_seg_addr = (unsigned char *)seg;
map->super.module_size = size;
return map;
}
mca_common_sm_mmap_t* mca_common_sm_mmap_init(ompi_proc_t **procs,
size_t num_procs,
size_t size, char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment)
/******************************************************************************/
/**
* mca_common_sm_mmap_component_query
*/
int
mca_common_sm_mmap_component_query(void)
{
return OMPI_SUCCESS;
}
mca_common_sm_module_t *
mca_common_sm_mmap_init(ompi_proc_t **procs,
size_t num_procs,
size_t size, char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment)
{
int fd = -1;
mca_common_sm_mmap_t* map = NULL;
mca_common_sm_module_mmap_t *map = NULL;
size_t mem_offset, p;
int rc = 0, sm_file_inited = 0, num_local_procs;
struct iovec iov[3];
int sm_file_created = OMPI_RML_TAG_SM_BACK_FILE_CREATED;
char filename_to_send[OPAL_PATH_MAX];
opal_list_item_t *item;
pending_rml_msg_t *rml_msg;
pending_mmap_rml_msg_t *rml_msg;
ompi_proc_t *temp_proc;
bool found_lowest = false;
@ -225,7 +236,7 @@ mca_common_sm_mmap_t* mca_common_sm_mmap_init(ompi_proc_t **procs,
if (0 == orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
ORTE_PROC_MY_NAME,
&(procs[0]->proc_name))) {
/* Check, whether the specified filename is on a network file system */
/* check, whether the specified filename is on a network file system */
if (opal_path_nfs(file_name)) {
orte_show_help("help-mpi-common-sm.txt", "mmap on nfs", 1,
orte_process_info.nodename, file_name);
@ -255,11 +266,13 @@ mca_common_sm_mmap_t* mca_common_sm_mmap_init(ompi_proc_t **procs,
/* initialize the segment - only the first process
to open the file */
mem_offset = map->data_addr - (unsigned char *)map->map_seg;
map->map_seg->seg_offset = mem_offset;
map->map_seg->seg_size = size - mem_offset;
opal_atomic_unlock(&map->map_seg->seg_lock);
map->map_seg->seg_inited = 0;
mem_offset =
map->super.module_data_addr -
(unsigned char *)map->super.module_seg;
map->super.module_seg->seg_offset = mem_offset;
map->super.module_seg->seg_size = size - mem_offset;
opal_atomic_unlock(&map->super.module_seg->seg_lock);
map->super.module_seg->seg_inited = 0;
} else {
close(fd);
unlink(file_name);
@ -280,7 +293,7 @@ mca_common_sm_mmap_t* mca_common_sm_mmap_init(ompi_proc_t **procs,
/* Free it all -- bad things are going to happen */
if (1 == sm_file_inited) {
munmap(map, size);
munmap(map->super.module_seg_addr, size);
close(fd);
unlink(file_name);
fd = -1;
@ -299,7 +312,7 @@ mca_common_sm_mmap_t* mca_common_sm_mmap_init(ompi_proc_t **procs,
for (item = opal_list_get_first(&pending_rml_msgs);
opal_list_get_end(&pending_rml_msgs) != item;
item = opal_list_get_next(item)) {
rml_msg = (pending_rml_msg_t*) item;
rml_msg = (pending_mmap_rml_msg_t*) item;
if (0 == strcmp(rml_msg->file_name, file_name)) {
opal_list_remove_item(&pending_rml_msgs, item);
sm_file_inited = rml_msg->sm_file_inited;
@ -331,7 +344,7 @@ mca_common_sm_mmap_t* mca_common_sm_mmap_init(ompi_proc_t **procs,
}
/* If not, put it on the pending list and try again */
rml_msg = OBJ_NEW(pending_rml_msg_t);
rml_msg = OBJ_NEW(pending_mmap_rml_msg_t);
if (NULL == rml_msg) {
ORTE_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
/* fd/map wasn't opened here; no need to close/reset */
@ -362,128 +375,8 @@ out:
close(fd);
}
return map;
return &(map->super);
}
#else
mca_common_sm_mmap_t* mca_common_sm_mmap_init(ompi_proc_t **procs,
size_t num_procs,
size_t size, char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment)
{
int fd = -1, return_code = OMPI_SUCCESS;
bool file_previously_opened = false;
mca_common_sm_file_header_t* seg = NULL;
mca_common_sm_mmap_t* map = NULL;
unsigned char *addr = NULL;
size_t tmp, mem_offset;
HANDLE hMapObject = INVALID_HANDLE_VALUE;
LPVOID lpvMem = NULL;
char *temp1, *temp2;
int rc;
/**
* On Windows the shared file will be created by the OS directly on
* the system ressources. Therefore, no file get involved in the
* operation. However, a unique key should be used as name for the
* shared memory object in order to allow all processes to access
* the same unique shared memory region. The key will be obtained
* from the original file_name by replacing all path separator
* occurences by '/' (as '\' is not allowed on the object name).
*/
temp1 = strdup(file_name);
temp2 = temp1;
while( NULL != (temp2 = strchr(temp2, OPAL_PATH_SEP[0])) ) {
*temp2 = '/';
}
hMapObject = CreateFileMapping( INVALID_HANDLE_VALUE, /* use paging file */
NULL, /* no security attributes */
PAGE_READWRITE, /* read/write access */
0, /* size: high 32-bits */
(DWORD)size, /* size: low 32-bits */
temp1); /* name of map object */
if( NULL == hMapObject ) {
rc = GetLastError();
goto return_error;
}
if( ERROR_ALREADY_EXISTS == GetLastError() )
file_previously_opened=true;
free(temp1); /* relase the temporary file name */
/* Get a pointer to the file-mapped shared memory. */
lpvMem = MapViewOfFile( hMapObject, /* object to map view of */
FILE_MAP_WRITE, /* read/write access */
0, /* high offset: map from */
0, /* low offset: beginning */
0); /* default: map entire file */
if( NULL == lpvMem ) {
rc = GetLastError();
goto return_error;
}
seg = (mca_common_sm_file_header_t*)lpvMem;
/* set up the map object */
map = OBJ_NEW(mca_common_sm_mmap_t);
strncpy(map->map_path, file_name, OPAL_PATH_MAX);
/* the first entry in the file is the control structure. The first
entry in the control structure is an mca_common_sm_file_header_t
element */
map->map_seg = seg;
/* If we have a data segment (i.e., if 0 != data_seg_alignment),
then make it the first aligned address after the control
structure. */
if (0 != data_seg_alignment) {
addr = ((unsigned char *) seg) + size_ctl_structure;
/* calculate how far off alignment we are */
tmp = ((size_t) addr) % data_seg_alignment;
/* if we're off alignment, then move up to the next alignment */
if( tmp > 0 )
addr += (data_seg_alignment - tmp);
/* is addr past end of file ? */
if( (unsigned char*)seg+size < addr ) {
opal_output(0, "mca_common_sm_mmap_init: memory region too small len %d addr %p\n",
size,addr);
goto return_error;
}
map->data_addr = addr;
} else {
map->data_addr = NULL;
}
mem_offset = addr-(unsigned char *)seg;
map->map_addr = (unsigned char *)seg;
map->map_size = size;
/* initialize the segment - only the first process to open the file */
if( !file_previously_opened ) {
opal_atomic_unlock(&seg->seg_lock);
seg->seg_inited = false;
seg->seg_offset = mem_offset;
/* initialize size after subtracting out space used by the header */
seg->seg_size = size - mem_offset;
}
map->hMappedObject = hMapObject;
return map;
return_error:
{
char* localbuf = NULL;
FormatMessage( FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM,
NULL, rc, 0, (LPTSTR)&localbuf, 1024, NULL );
opal_output( 0, "%s\n", localbuf );
LocalFree( localbuf );
}
if( NULL != lpvMem ) UnmapViewOfFile( lpvMem );
if( NULL != hMapObject ) CloseHandle(hMapObject);
return NULL;
}
#endif
/*
* Same as mca_common_sm_mmap_init(), but takes an (ompi_group_t*)
@ -492,15 +385,16 @@ mca_common_sm_mmap_t* mca_common_sm_mmap_init(ompi_proc_t **procs,
* This function just checks the group to ensure that all the procs
* are local, and if they are, calls mca_common_sm_mmap_init().
*/
mca_common_sm_mmap_t* mca_common_sm_mmap_init_group(ompi_group_t *group,
size_t size,
char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment)
mca_common_sm_module_t *
mca_common_sm_mmap_init_group(ompi_group_t *group,
size_t size,
char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment)
{
size_t i, group_size;
ompi_proc_t *proc, **procs;
mca_common_sm_mmap_t *ret;
mca_common_sm_module_t *ret;
group_size = ompi_group_size(group);
procs = (ompi_proc_t**) malloc(sizeof(ompi_proc_t*) * group_size);
@ -522,23 +416,18 @@ mca_common_sm_mmap_t* mca_common_sm_mmap_init_group(ompi_group_t *group,
return ret;
}
int mca_common_sm_mmap_fini( mca_common_sm_mmap_t* sm_mmap )
int
mca_common_sm_mmap_fini(mca_common_sm_module_t *mca_common_sm_module)
{
mca_common_sm_module_mmap_t *mmap_module =
(mca_common_sm_module_mmap_t *)mca_common_sm_module;
int rc = OMPI_SUCCESS;
if( NULL != sm_mmap->map_seg ) {
#if !defined(__WINDOWS__)
rc = munmap((void*) sm_mmap->map_addr, sm_mmap->map_size );
sm_mmap->map_addr = NULL;
sm_mmap->map_size = 0;
#else
BOOL return_error = UnmapViewOfFile( sm_mmap->map_addr );
if( false == return_error ) {
rc = GetLastError();
}
CloseHandle(sm_mmap->hMappedObject);
#endif /* !defined(__WINDOWS__) */
if( NULL != mmap_module->super.module_seg ) {
rc = munmap((void*) mmap_module->super.module_seg_addr,
mmap_module->super.module_size);
mmap_module->super.module_seg_addr = NULL;
mmap_module->super.module_size = 0;
}
return rc;
}
@ -552,14 +441,15 @@ int mca_common_sm_mmap_fini( mca_common_sm_mmap_t* sm_mmap )
* @retval addr virtual address
*/
void* mca_common_sm_mmap_seg_alloc(
struct mca_mpool_base_module_t* mpool,
size_t* size,
mca_mpool_base_registration_t** registration)
void *
mca_common_sm_mmap_seg_alloc(struct mca_mpool_base_module_t* mpool,
size_t* size,
mca_mpool_base_registration_t** registration)
{
mca_mpool_sm_module_t *sm_module = (mca_mpool_sm_module_t*) mpool;
mca_common_sm_mmap_t *map = sm_module->sm_common_mmap;
mca_common_sm_file_header_t* seg = map->map_seg;
mca_common_sm_module_mmap_t *map =
(mca_common_sm_module_mmap_t *)sm_module->sm_common_module;
mca_common_sm_seg_header_t* seg = map->super.module_seg;
void* addr;
opal_atomic_lock(&seg->seg_lock);
@ -569,7 +459,7 @@ void* mca_common_sm_mmap_seg_alloc(
size_t fixup;
/* add base address to segment offset */
addr = map->data_addr + seg->seg_offset;
addr = map->super.module_data_addr + seg->seg_offset;
seg->seg_offset += *size;
/* fix up seg_offset so next allocation is aligned on a

Просмотреть файл

@ -10,6 +10,8 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -28,47 +30,18 @@
#include "ompi/mca/mpool/mpool.h"
#include "ompi/proc/proc.h"
#include "ompi/group/group.h"
#include "ompi/mca/common/sm/common_sm.h"
BEGIN_C_DECLS
struct mca_mpool_base_module_t;
typedef struct mca_common_sm_file_header_t {
/* lock to control atomic access */
opal_atomic_lock_t seg_lock;
/* is the segment ready for use */
volatile int32_t seg_inited;
/* Offset to next available memory location available for allocation */
size_t seg_offset;
/* total size of the segment */
size_t seg_size;
} mca_common_sm_file_header_t;
typedef struct mca_common_sm_mmap_t {
/* double link list element */
opal_list_item_t map_item;
/* pointer to header embedded in the shared memory file */
mca_common_sm_file_header_t *map_seg;
/* base address of the mmap'ed file */
unsigned char *map_addr;
/* base address of data segment */
unsigned char *data_addr;
/* How big it is (in bytes) */
size_t map_size;
/* Filename */
char map_path[OPAL_PATH_MAX];
#if defined(__WINDOWS__)
/* Handle to the object */
HANDLE hMappedObject;
#endif /* defined(__WINDOWS__) */
} mca_common_sm_mmap_t;
OBJ_CLASS_DECLARATION(mca_common_sm_mmap_t);
typedef struct mca_common_sm_module_mmap_t
{
mca_common_sm_module_t super;
} mca_common_sm_module_mmap_t;
OBJ_CLASS_DECLARATION(mca_common_sm_module_mmap_t);
/**
* This routine is used to set up a shared memory file, backed
@ -90,7 +63,7 @@ OBJ_CLASS_DECLARATION(mca_common_sm_mmap_t);
*
* @param size_ctl_structure size of the control structure at
* the head of the file. The control structure
* is assumed to have mca_common_sm_file_header_t
* is assumed to have mca_common_sm_seg_header_t
* as its first segment (IN)
*
* @param data_set_alignment alignment of the data segment. this
@ -101,14 +74,13 @@ OBJ_CLASS_DECLARATION(mca_common_sm_mmap_t);
*
* @return value pointer to control structure at head of file.
*/
OMPI_DECLSPEC extern
mca_common_sm_mmap_t* mca_common_sm_mmap_init(
ompi_proc_t **procs,
size_t num_procs,
size_t size,
char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment);
OMPI_DECLSPEC extern mca_common_sm_module_t *
mca_common_sm_mmap_init(ompi_proc_t **procs,
size_t num_procs,
size_t size,
char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment);
/**
* This routine is used to set up a shared memory file, backed
@ -122,22 +94,20 @@ mca_common_sm_mmap_t* mca_common_sm_mmap_init(
* group must contain *only* local peers, or this function will return
* NULL and not create any shared memory segment.
*/
OMPI_DECLSPEC extern
mca_common_sm_mmap_t* mca_common_sm_mmap_init_group(
ompi_group_t *group,
size_t size,
char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment);
OMPI_DECLSPEC extern mca_common_sm_module_t *
mca_common_sm_mmap_init_group(ompi_group_t *group,
size_t size,
char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment);
/*
* Callback from the sm mpool
*/
OMPI_DECLSPEC extern
void* mca_common_sm_mmap_seg_alloc(
struct mca_mpool_base_module_t* mpool,
size_t* size,
mca_mpool_base_registration_t** registration);
OMPI_DECLSPEC extern void *
mca_common_sm_mmap_seg_alloc(struct mca_mpool_base_module_t *mpool,
size_t *size,
mca_mpool_base_registration_t **registration);
/**
* This function will release all local resources attached to the
@ -149,8 +119,14 @@ void* mca_common_sm_mmap_seg_alloc(
* @returnvalue 0 if everything was OK, otherwise a negative value.
*/
OMPI_DECLSPEC extern
int mca_common_sm_mmap_fini( mca_common_sm_mmap_t* sm_mmap );
OMPI_DECLSPEC extern int
mca_common_sm_mmap_fini(mca_common_sm_module_t *mca_common_sm_module);
/**
* component query routine
*/
OMPI_DECLSPEC extern int
mca_common_sm_mmap_component_query(void);
END_C_DECLS

666
ompi/mca/common/sm/common_sm_sysv.c Обычный файл
Просмотреть файл

@ -0,0 +1,666 @@
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <errno.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif /* HAVE_FCNTL_H */
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif /* HAVE_SYS_STAT_H */
#if MCA_COMMON_SM_SYSV
#include <sys/ipc.h>
#include <sys/shm.h>
#endif /* MCA_COMMON_SM_SYSV */
#include "opal/util/output.h"
#include "opal/util/path.h"
#include "opal/align.h"
#include "opal/threads/mutex.h"
#include "opal/util/opal_sos.h"
#include "orte/mca/rml/rml.h"
#include "orte/util/name_fns.h"
#include "orte/util/show_help.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/errmgr/errmgr.h"
#include "ompi/constants.h"
#include "ompi/proc/proc.h"
#include "ompi/mca/dpm/dpm.h"
#include "ompi/mca/mpool/sm/mpool_sm.h"
#include "common_sm_sysv.h"
OBJ_CLASS_INSTANCE(
mca_common_sm_module_sysv_t,
opal_object_t,
NULL,
NULL
);
/**
* lock to protect multiple instances of sysv_init() from
* being invoked simultaneously (because of RML usage).
*/
static opal_mutex_t mutex;
/**
* list of RML messages that have arrived that have not yet been
* consumed by the thread who is looking to attach to the shared
* memory segment that the RML message corresponds to.
*/
static opal_list_t pending_rml_msgs;
static bool pending_rml_msgs_init = false;
/**
* items on the pending_rml_msgs list
*/
typedef struct
{
opal_list_item_t super;
char file_name[OPAL_PATH_MAX];
int shmem_seg_inited;
int shmid;
} pending_sysv_rml_msg_t;
OBJ_CLASS_INSTANCE(
pending_sysv_rml_msg_t,
opal_list_item_t,
NULL,
NULL
);
static mca_common_sm_module_sysv_t *
create_shmem_seg(int shmid,
int is_root,
size_t size,
size_t size_ctl_structure,
size_t data_seg_alignment)
{
unsigned char *addr = NULL;
mca_common_sm_module_sysv_t *map;
mca_common_sm_seg_header_t *seg;
/* attach to the shared memory segment */
if ((mca_common_sm_seg_header_t *)-1 ==
(seg = (mca_common_sm_seg_header_t *)shmat(shmid, NULL, 0)))
{
int err = errno;
/**
* something really bad happened.
*/
orte_show_help("help-mpi-common-sm.txt",
"sys call fail",
1,
orte_process_info.nodename,
"shmat(2)",
"",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
strerror(err),
err);
return NULL;
}
/**
* only the the root will set IPC_RMID
*/
if (is_root)
{
/**
* mark the segment for destruction immediately after shmat. our hope
* is that the segment will only actually be destroyed after the last
* process detaches from it (i.e., when the shm_nattch member of the
* associated structure shmid_ds is zero). if we are here, we should
* be okay - our run-time test reported adequate system support.
*/
if (-1 == shmctl(shmid, IPC_RMID, NULL))
{
int err = errno;
orte_show_help("help-mpi-common-sm.txt",
"sys call fail",
1,
orte_process_info.nodename,
"shmctl(2)",
"",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
strerror(err),
err);
shmdt(seg);
return NULL;
}
}
/**
* if we are here, shmctl(shmid, IPC_RMID, NULL) was successful, so we
* don't have to worry about segment cleanup - the OS -should- take care
* of it - happy days...
*/
/* set up the map object */
map = OBJ_NEW(mca_common_sm_module_sysv_t);
/**
* the first entry in the file is the control structure. The first
* entry in the control structure is an mca_common_sm_seg_header_t
* element
*/
map->super.module_seg = seg;
addr = ((unsigned char *)seg) + size_ctl_structure;
/**
* if we have a data segment (i.e., if 0 != data_seg_alignment),
* then make it the first aligned address after the control
* structure. IF THIS HAPPENS, THIS IS A PROGRAMMING ERROR IN
* OPEN MPI!
*/
if (0 != data_seg_alignment)
{
addr = OPAL_ALIGN_PTR(addr, data_seg_alignment, unsigned char *);
/* is addr past the end of the shared memory segment ? */
if ((unsigned char *)seg + size < addr)
{
orte_show_help("help-mpi-common-sm.txt",
"mmap too small",
1,
orte_process_info.nodename,
(unsigned long)size,
(unsigned long)size_ctl_structure,
(unsigned long)data_seg_alignment);
return NULL;
}
}
map->super.module_data_addr = addr;
map->super.module_seg_addr = (unsigned char *)seg;
map->super.module_size = size;
return map;
}
/******************************************************************************/
/**
* mca_common_sm_sysv_component_query
* the run-time test
*/
int
mca_common_sm_sysv_component_query(void)
{
char c = 'j';
int shmid = -1;
int rc = OMPI_ERR_NOT_SUPPORTED;
char *a = NULL;
char *addr = (char *)-1;
struct shmid_ds tmp_buff;
if (-1 == (shmid = shmget(IPC_PRIVATE,
(size_t)(getpagesize()),
IPC_CREAT | IPC_EXCL | SHM_R | SHM_W)))
{
goto out;
}
else if ((char *)-1 == (addr = (char *)shmat(shmid, NULL, 0)))
{
goto out;
}
/* protect against lazy establishment - may not be needed, but can't hurt */
a = addr;
*a = c;
if (-1 == shmctl(shmid, IPC_RMID, NULL))
{
goto out;
}
else if (-1 == shmctl(shmid, IPC_STAT, &tmp_buff))
{
goto out;
}
else /* all is well - rainbows and butterflies */
{
rc = OMPI_SUCCESS;
}
out:
if ((char *)-1 != addr)
{
shmdt(addr);
}
return rc;
}
/******************************************************************************/
/**
* mca_common_sm_sysv_init
*/
mca_common_sm_module_t *
mca_common_sm_sysv_init(ompi_proc_t **procs,
size_t num_procs,
size_t size,
char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment)
{
mca_common_sm_module_sysv_t *map = NULL;
bool found_lowest = false;
int shmid = -1;
int rc = 0;
size_t num_local_procs = 0;
size_t mem_offset;
size_t p;
struct iovec iov[2];
char filename_to_send[OPAL_PATH_MAX];
opal_list_item_t *item;
pending_sysv_rml_msg_t *rml_msg;
ompi_proc_t *temp_proc;
/**
* reorder procs array to have all the local procs at the beginning.
* simultaneously look for the local proc with the lowest name. ensure
* that procs[0] is the lowest named process.
*/
for (p = 0; p < num_procs; ++p)
{
if (OPAL_PROC_ON_LOCAL_NODE(procs[p]->proc_flags))
{
/* if we don't have a lowest, save the first one */
if (!found_lowest)
{
procs[0] = procs[p];
found_lowest = true;
}
else
{
/* save this proc */
procs[num_local_procs] = procs[p];
/**
* if we have a new lowest, swap it with position 0
* so that procs[0] is always the lowest named proc
*/
if (orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
&(procs[p]->proc_name),
&(procs[0]->proc_name)) < 0)
{
temp_proc = procs[0];
procs[0] = procs[p];
procs[num_local_procs] = temp_proc;
}
}
/**
* Regardless of the comparisons above, we found
* another proc on the local node, so increment
*/
++num_local_procs;
}
}
/* if there are no local procs, there's nothing to do */
if (0 == num_local_procs)
{
return NULL;
}
strncpy(filename_to_send, file_name, sizeof(filename_to_send) - 1);
iov[0].iov_base = &shmid;
iov[0].iov_len = sizeof(shmid);
iov[1].iov_base = filename_to_send;
iov[1].iov_len = sizeof(filename_to_send);
/**
* lock here to prevent multiple threads from invoking this function
* simultaneously. the critical section we're protecting is usage of
* the RML in this block.
*/
opal_mutex_lock(&mutex);
if (!pending_rml_msgs_init)
{
OBJ_CONSTRUCT(&(pending_rml_msgs), opal_list_t);
pending_rml_msgs_init = true;
}
/**
* figure out if i am the lowest proc in the group (aka "the root").
* if i am, initialize the shared memory segment.
*/
if (0 == orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
ORTE_PROC_MY_NAME,
&(procs[0]->proc_name)))
{
/* create a new shared memory segment and save the shmid. */
if (-1 == (shmid = shmget(IPC_PRIVATE,
size,
IPC_CREAT | IPC_EXCL | SHM_R | SHM_W)))
{
/**
* if we are here, a few of things could have happened:
* o the system's shmmax limit is lower than the requested
* segment size. the user can either up shmmax or set
* mpool_sm_min_size to a value less than the system's current
* shmmax limit.
* o something else i don't know about ...
*/
int err = errno;
orte_show_help("help-mpi-common-sm.txt",
"shmget call fail",
1,
orte_process_info.nodename,
"shmget(2)",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
strerror(err),
err,
size);
}
else /* ftok and shmget were both successful */
{
map = create_shmem_seg(shmid,
1, /* i am the root */
size,
size_ctl_structure,
data_seg_alignment);
if (NULL != map)
{
/* initialize the segment */
mem_offset =
map->super.module_data_addr -
(unsigned char *)map->super.module_seg;
map->super.module_seg->seg_offset = mem_offset;
map->super.module_seg->seg_size = size - mem_offset;
map->super.module_seg->seg_inited = 0;
opal_atomic_unlock(&map->super.module_seg->seg_lock);
}
else
{
/**
* best effort to delete the segment.
* may not be needed, but can't hurt.
*/
shmctl(shmid, IPC_RMID, NULL);
/**
* setting shmid to -1 here will tell
* the other procs that we failed.
*/
shmid = -1;
}
}
/**
* signal the rest of the local procs that a new shared memory segment
* has successfully been created and is ready to be attached to. bump
* up the libevent polling frequency while we're using the RML.
*/
opal_progress_event_users_increment();
for (p = 1; p < num_local_procs; ++p)
{
rc = orte_rml.send(&(procs[p]->proc_name),
iov,
2,
OMPI_RML_TAG_SM_BACK_FILE_CREATED,
0);
if (rc < (ssize_t)(iov[0].iov_len +
iov[1].iov_len))
{
ORTE_ERROR_LOG(OMPI_ERR_COMM_FAILURE);
opal_progress_event_users_decrement();
/* free it all -- bad things are going to happen */
if (NULL != map)
{
shmdt(map->super.module_seg_addr);
}
goto out;
}
}
opal_progress_event_users_decrement();
}
else /* i am NOT the lowest local rank */
{
/**
* all other procs will wait for the shared memory segment to be
* initialized before attaching to it. because the shared memory
* segment may be initialized simultaneously in multiple threads,
* the RML messages may arrive in any order. so, first check to
* see if we previously received a message for me.
*/
for (item = opal_list_get_first(&pending_rml_msgs);
opal_list_get_end(&pending_rml_msgs) != item;
item = opal_list_get_next(item))
{
rml_msg = (pending_sysv_rml_msg_t *)item;
/* was the message for me? */
if (0 == strcmp(rml_msg->file_name, file_name))
{
opal_list_remove_item(&pending_rml_msgs, item);
/* set the shmid so i know what shared mem seg to attach to */
shmid = rml_msg->shmid;
OBJ_RELEASE(item);
break;
}
}
/**
* if we didn't find a message already waiting, block on
* receiving from the RML.
*/
if (opal_list_get_end(&pending_rml_msgs) == item)
{
while (1)
{
/**
* bump up the libevent polling frequency while we're
* in this RML recv, just to ensure we're checking
* libevent more frequently.
*/
opal_progress_event_users_increment();
rc = orte_rml.recv(&(procs[0]->proc_name),
iov,
2,
OMPI_RML_TAG_SM_BACK_FILE_CREATED,
0);
opal_progress_event_users_decrement();
if (rc < 0)
{
ORTE_ERROR_LOG(OMPI_ERR_RECV_LESS_THAN_POSTED);
goto out;
}
/* was the message for me? if so, we're done */
if (0 == strcmp(filename_to_send, file_name))
{
break;
}
/* if not, put it on the pending list and try again */
rml_msg = OBJ_NEW(pending_sysv_rml_msg_t);
if (NULL == rml_msg)
{
ORTE_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
goto out;
}
memcpy(rml_msg->file_name,
filename_to_send,
sizeof(rml_msg->file_name));
rml_msg->shmid = shmid;
opal_list_append(&pending_rml_msgs, &(rml_msg->super));
} /* end while 1 */
}
/* did the root setup the shmid correctly? if so, attach to it */
if (-1 != shmid)
{
map = create_shmem_seg(shmid,
0, /* i am NOT the root */
size,
size_ctl_structure,
data_seg_alignment);
if (NULL == map)
{
goto out;
}
}
} /* end else - i am NOT the lowest local rank */
out:
opal_mutex_unlock(&mutex);
return &(map->super);
}
/******************************************************************************/
/**
* same as mca_common_sm_sysv_init(), but takes an (ompi_group_t *)
* argument instead of an array of ompi_proc_t's.
*
* this function just checks the group to ensure that all the procs
* are local, and if they are, calls mca_common_sm_sysv_init().
*/
mca_common_sm_module_t *
mca_common_sm_sysv_init_group(ompi_group_t *group,
size_t size,
char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment)
{
size_t i;
size_t group_size;
ompi_proc_t *proc;
ompi_proc_t **procs;
mca_common_sm_module_t *ret;
group_size = ompi_group_size(group);
procs = (ompi_proc_t **) malloc(sizeof(ompi_proc_t *) * group_size);
if (NULL == procs)
{
return NULL;
}
for (i = 0; i < group_size; ++i)
{
proc = ompi_group_peer_lookup(group,i);
if (!OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags))
{
free(procs);
return NULL;
}
procs[i] = proc;
}
ret = mca_common_sm_sysv_init(procs,
group_size,
size,
file_name,
size_ctl_structure,
data_seg_alignment);
free(procs);
return ret;
}
/******************************************************************************/
/**
* sys v module finalization routine.
*/
int
mca_common_sm_sysv_fini(mca_common_sm_module_t *mca_common_sm_module)
{
int rc = OMPI_SUCCESS;
mca_common_sm_module_sysv_t *sysv_module =
(mca_common_sm_module_sysv_t *)mca_common_sm_module;
/**
* no need to shmctl to remove the segment, because we set
* IPC_RMID on the segment, meaning that when everyone detaches,
* the OS will automatically delete it.
*/
if (NULL != sysv_module->super.module_seg)
{
rc = shmdt(sysv_module->super.module_seg_addr);
sysv_module->super.module_seg_addr = NULL;
sysv_module->super.module_size = 0;
}
return rc;
}
/******************************************************************************/
/**
* allocate memory from a previously allocated shared memory block.
*
* @param size size of request, in bytes (IN)
*
* @retval addr virtual address
*/
void *
mca_common_sm_sysv_seg_alloc(struct mca_mpool_base_module_t* mpool,
size_t* size,
mca_mpool_base_registration_t** registration)
{
mca_mpool_sm_module_t *sm_module = (mca_mpool_sm_module_t*)mpool;
mca_common_sm_module_sysv_t *map =
(mca_common_sm_module_sysv_t *)sm_module->sm_common_module;
mca_common_sm_seg_header_t* seg = map->super.module_seg;
void *addr;
opal_atomic_lock(&seg->seg_lock);
if(seg->seg_offset + *size > seg->seg_size)
{
addr = NULL;
}
else
{
size_t fixup;
/* add base address to segment offset */
addr = map->super.module_data_addr + seg->seg_offset;
seg->seg_offset += *size;
/**
* fix up seg_offset so next allocation is aligned on a
* sizeof(long) boundry. do it here so that we don't have to
* check before checking remaining size in buffer
*/
if (0 < (fixup = (seg->seg_offset & (sizeof(long) - 1))))
{
seg->seg_offset += sizeof(long) - fixup;
}
}
if (NULL != registration)
{
*registration = NULL;
}
opal_atomic_unlock(&seg->seg_lock);
return addr;
}

145
ompi/mca/common/sm/common_sm_sysv.h Обычный файл
Просмотреть файл

@ -0,0 +1,145 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef _COMMON_SM_SYSV_H_
#define _COMMON_SM_SYSV_H_
#include "ompi_config.h"
#include "opal/class/opal_object.h"
#include "opal/class/opal_list.h"
#include "opal/sys/atomic.h"
#include "ompi/mca/mpool/mpool.h"
#include "ompi/proc/proc.h"
#include "ompi/group/group.h"
#include "ompi/mca/common/sm/common_sm.h"
BEGIN_C_DECLS
struct mca_mpool_base_module_t;
typedef struct mca_common_sm_module_sysv_t
{
mca_common_sm_module_t super;
} mca_common_sm_module_sysv_t;
OBJ_CLASS_DECLARATION(mca_common_sm_module_sysv_t);
/**
* This routine is used to set up a System V shared memory segment.
* It is assumed that NO shared memory segment already exists with
* key = ftok(file_name, 0) when the "creator proccess" tries to
* shmget(key, size, ...).
*
* @param procs - array of (ompi_proc_t*)'s to create this shared
* memory segment for. This array must be writable; it may be edited
* (in undefined ways) if the array contains procs that are not on
* this host. It is assumed that the caller will simply free this
* array upon return. (INOUT)
*
* @param num_procs - length of the procs array (IN)
*
* @param size - size of the shared memory segment, in bytes (IN)
*
* @param file_name name of file to be opened that is
* used for shmget key generation. (IN)
*
* @param size_ctl_structure size of the control structure at
* the head of the file. The control structure
* is assumed to have mca_common_sm_seg_header_t
* as its first segment (IN)
*
* @param data_set_alignment alignment of the data segment. this
* follows the control structure. If this
* value if 0, then assume that there will
* be no data segment following the control
* structure. (IN)
*
* @return value pointer to control structure at head of file.
*/
OMPI_DECLSPEC extern mca_common_sm_module_t *
mca_common_sm_sysv_init(ompi_proc_t **procs,
size_t num_procs,
size_t size,
char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment);
/**
* This routine is used to set up a System V shared memory segment.
* It is assumed that NO shared memory segment already exists with
* key = ftok(file_name, 0) when the "creator (root) proccess" tries to
* shmget(key, size, ...).
*
* This routine is the same as mca_common_sm_sysv_init() except that
* it takes an (ompi_group_t*) parameter to specify the peers rather
* than an array of procs. Unlike mca_common_sm_sysv_init(), the
* group must contain *only* local peers, or this function will return
* NULL and not create any shared memory segment.
*/
OMPI_DECLSPEC extern mca_common_sm_module_t *
mca_common_sm_sysv_init_group(ompi_group_t *group,
size_t size,
char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment);
/**
* Callback from the sm mpool
*/
OMPI_DECLSPEC extern void *
mca_common_sm_sysv_seg_alloc(struct mca_mpool_base_module_t *mpool,
size_t *size,
mca_mpool_base_registration_t **registration);
/**
* This function will release all local resources attached to the
* shared memory segment. We assume that the operating system will destroy the
* shared memory segment when the last process detaches from it.
*
* It is assumed that the operating system's System V IPC implementation
* supports the following IPC_RMID semantics.
*
* Calling shmctl(shmid, IPC_RMID, ...) will actually destroy the shared memory
* segment *after* the last process detaches from it (i.e., when the shm_nattch
* member of the associated structure shmid_ds is zero). This behavior is
* important because we rely on it to release all allocated shared memory
* segments upon job termination - including abnormal job termination.
*
* @param mca_common_sm_module - the control structure at head of the segment.
*
* @returnvalue 0 if everything was OK, otherwise a negative value.
*/
OMPI_DECLSPEC extern int
mca_common_sm_sysv_fini(mca_common_sm_module_t *mca_common_sm_module);
/**
* component query routine
*/
OMPI_DECLSPEC extern int
mca_common_sm_sysv_component_query(void);
END_C_DECLS
#endif /* _COMMON_SM_SYSV_H_ */

299
ompi/mca/common/sm/common_sm_windows.c Обычный файл
Просмотреть файл

@ -0,0 +1,299 @@
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <errno.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif /* HAVE_FCNTL_H */
#ifdef HAVE_TIME_H
#include <time.h>
#endif /* HAVE_TIME_H */
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif /* HAVE_SYS_STAT_H */
#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif
#include "opal/util/output.h"
#include "opal/util/path.h"
#include "opal/align.h"
#include "opal/threads/mutex.h"
#include "opal/util/opal_sos.h"
#include "orte/util/name_fns.h"
#include "orte/util/show_help.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/errmgr/errmgr.h"
#include "ompi/constants.h"
#include "ompi/proc/proc.h"
#include "ompi/mca/dpm/dpm.h"
#include "ompi/mca/mpool/sm/mpool_sm.h"
#include "common_sm_windows.h"
OBJ_CLASS_INSTANCE(
mca_common_sm_module_windows_t,
opal_object_t,
NULL,
NULL
);
/******************************************************************************/
/**
* mca_common_sm_windows_component_query
*/
int
mca_common_sm_windows_component_query(void)
{
return OMPI_SUCCESS;
}
mca_common_sm_module_t *
mca_common_sm_windows_init(ompi_proc_t **procs,
size_t num_procs,
size_t size, char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment)
{
int fd = -1, return_code = OMPI_SUCCESS;
bool file_previously_opened = false;
mca_common_sm_seg_header_t* seg = NULL;
mca_common_sm_module_windows_t* map = NULL;
unsigned char *addr = NULL;
size_t tmp, mem_offset;
HANDLE hMapObject = INVALID_HANDLE_VALUE;
LPVOID lpvMem = NULL;
char *temp1, *temp2;
int rc;
/**
* On Windows the shared file will be created by the OS directly on
* the system ressources. Therefore, no file get involved in the
* operation. However, a unique key should be used as name for the
* shared memory object in order to allow all processes to access
* the same unique shared memory region. The key will be obtained
* from the original file_name by replacing all path separator
* occurences by '/' (as '\' is not allowed on the object name).
*/
temp1 = strdup(file_name);
temp2 = temp1;
while( NULL != (temp2 = strchr(temp2, OPAL_PATH_SEP[0])) ) {
*temp2 = '/';
}
hMapObject = CreateFileMapping( INVALID_HANDLE_VALUE, /* use paging file */
NULL, /* no security attributes */
PAGE_READWRITE, /* read/write access */
0, /* size: high 32-bits */
(DWORD)size, /* size: low 32-bits */
temp1); /* name of map object */
if( NULL == hMapObject ) {
rc = GetLastError();
goto return_error;
}
if( ERROR_ALREADY_EXISTS == GetLastError() )
file_previously_opened=true;
free(temp1); /* relase the temporary file name */
/* Get a pointer to the file-mapped shared memory. */
lpvMem = MapViewOfFile( hMapObject, /* object to map view of */
FILE_MAP_WRITE, /* read/write access */
0, /* high offset: map from */
0, /* low offset: beginning */
0); /* default: map entire file */
if( NULL == lpvMem ) {
rc = GetLastError();
goto return_error;
}
seg = (mca_common_sm_seg_header_t*)lpvMem;
/* set up the map object */
map = OBJ_NEW(mca_common_sm_module_windows_t);
strncpy(map->super.module_seg_path, file_name, OPAL_PATH_MAX);
/* the first entry in the file is the control structure. The first
entry in the control structure is an mca_common_sm_seg_header_t
element */
map->super.module_seg = seg;
/* If we have a data segment (i.e., if 0 != data_seg_alignment),
then make it the first aligned address after the control
structure. */
if (0 != data_seg_alignment) {
addr = ((unsigned char *) seg) + size_ctl_structure;
/* calculate how far off alignment we are */
tmp = ((size_t) addr) % data_seg_alignment;
/* if we're off alignment, then move up to the next alignment */
if( tmp > 0 )
addr += (data_seg_alignment - tmp);
/* is addr past end of file ? */
if( (unsigned char*)seg+size < addr ) {
opal_output(0, "mca_common_sm_init: memory region too small len %d addr %p\n",
size,addr);
goto return_error;
}
map->super.module_data_addr = addr;
} else {
map->super.module_data_addr = NULL;
}
mem_offset = addr-(unsigned char *)seg;
map->super.module_seg_addr = (unsigned char *)seg;
map->super.module_size = size;
/* initialize the segment - only the first process to open the file */
if( !file_previously_opened ) {
opal_atomic_unlock(&seg->seg_lock);
seg->seg_inited = false;
seg->seg_offset = mem_offset;
/* initialize size after subtracting out space used by the header */
seg->seg_size = size - mem_offset;
}
map->hMappedObject = hMapObject;
return (mca_common_sm_module_t *)map;
return_error:
{
char* localbuf = NULL;
FormatMessage( FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM,
NULL, rc, 0, (LPTSTR)&localbuf, 1024, NULL );
opal_output( 0, "%s\n", localbuf );
LocalFree( localbuf );
}
if( NULL != lpvMem ) UnmapViewOfFile( lpvMem );
if( NULL != hMapObject ) CloseHandle(hMapObject);
return NULL;
}
/*
* Same as mca_common_sm_windows_init(), but takes an (ompi_group_t*)
* argument instead of na array of ompi_proc_t's.
*
* This function just checks the group to ensure that all the procs
* are local, and if they are, calls mca_common_sm_windows_init().
*/
mca_common_sm_module_t *
mca_common_sm_windows_init_group(ompi_group_t *group,
size_t size,
char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment)
{
size_t i, group_size;
ompi_proc_t *proc, **procs;
mca_common_sm_module_t *ret;
group_size = ompi_group_size(group);
procs = (ompi_proc_t**) malloc(sizeof(ompi_proc_t*) * group_size);
if (NULL == procs) {
return NULL;
}
for (i = 0; i < group_size; ++i) {
proc = ompi_group_peer_lookup(group,i);
if (!OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) {
free(procs);
return NULL;
}
procs[i] = proc;
}
ret = mca_common_sm_windows_init(procs, group_size, size, file_name,
size_ctl_structure, data_seg_alignment);
free(procs);
return ret;
}
int
mca_common_sm_windows_fini(mca_common_sm_module_t *mca_common_sm_module)
{
mca_common_sm_module_windows_t *windows_module =
(mca_common_sm_module_windows_t *)mca_common_sm_module;
int rc = OMPI_SUCCESS;
if( NULL != windows_module->super.module_seg ) {
BOOL return_error = UnmapViewOfFile( windows_module->super.module_seg_addr );
if( false == return_error ) {
rc = GetLastError();
}
CloseHandle(windows_module->super.hMappedObject);
}
return rc;
}
/**
* allocate memory from a previously allocated shared memory
* block.
*
* @param size size of request, in bytes (IN)
*
* @retval addr virtual address
*/
void *
mca_common_sm_windows_seg_alloc(struct mca_mpool_base_module_t* mpool,
size_t* size,
mca_mpool_base_registration_t** registration)
{
mca_mpool_sm_module_t *sm_module = (mca_mpool_sm_module_t*) mpool;
mca_common_sm_module_windows_t *map =
(mca_common_sm_module_windows_t *)sm_module->sm_common_module;
mca_common_sm_seg_header_t* seg = map->super.module_seg;
void* addr;
opal_atomic_lock(&seg->seg_lock);
if(seg->seg_offset + *size > seg->seg_size) {
addr = NULL;
} else {
size_t fixup;
/* add base address to segment offset */
addr = map->super.module_data_addr + seg->seg_offset;
seg->seg_offset += *size;
/* fix up seg_offset so next allocation is aligned on a
sizeof(long) boundry. Do it here so that we don't have to
check before checking remaining size in buffer */
if ((fixup = (seg->seg_offset & (sizeof(long) - 1))) > 0) {
seg->seg_offset += sizeof(long) - fixup;
}
}
if (NULL != registration) {
*registration = NULL;
}
opal_atomic_unlock(&seg->seg_lock);
return addr;
}

135
ompi/mca/common/sm/common_sm_windows.h Обычный файл
Просмотреть файл

@ -0,0 +1,135 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef _COMMON_SM_WINDOWS_H_
#define _COMMON_SM_WINDOWS_H_
#include "ompi_config.h"
#include "opal/class/opal_object.h"
#include "opal/class/opal_list.h"
#include "opal/sys/atomic.h"
#include "ompi/mca/mpool/mpool.h"
#include "ompi/proc/proc.h"
#include "ompi/group/group.h"
#include "ompi/mca/common/sm/common_sm.h"
BEGIN_C_DECLS
struct mca_mpool_base_module_t;
typedef struct mca_common_sm_module_windows_t
{
mca_common_sm_module_t super;
HANDLE hMappedObject;
} mca_common_sm_module_windows_t;
OBJ_CLASS_DECLARATION(mca_common_sm_module_windows_t);
/**
* This routine is used to set up a shared memory file, backed
* by a specified file. It is assumed that the file does not
* exist before any of the current set of processes try and open
* it.
*
* @param procs - array of (ompi_proc_t*)'s to create this shared
* memory segment for. This array must be writable; it may be edited
* (in undefined ways) if the array contains procs that are not on
* this host. It is assumed that the caller will simply free this
* array upon return. (INOUT)
*
* @param num_procs - length of the procs array (IN)
*
* @param size - size of the file, in bytes (IN)
*
* @param file_name name of file to be opened. (IN)
*
* @param size_ctl_structure size of the control structure at
* the head of the file. The control structure
* is assumed to have mca_common_sm_seg_header_t
* as its first segment (IN)
*
* @param data_set_alignment alignment of the data segment. this
* follows the control structure. If this
* value if 0, then assume that there will
* be no data segment following the control
* structure. (IN)
*
* @return value pointer to control structure at head of file.
*/
OMPI_DECLSPEC extern mca_common_sm_module_t *
mca_common_sm_windows_init(ompi_proc_t **procs,
size_t num_procs,
size_t size,
char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment);
/**
* This routine is used to set up a shared memory file, backed
* by a specified file. It is assumed that the file does not
* exist before any of the current set of processes try and open
* it.
*
* This routine is the same as mca_common_sm_windows_init() except that
* it takes an (ompi_group_t*) parameter to specify the peers rather
* than an array of procs. Unlike mca_common_sm_windows_init(), the
* group must contain *only* local peers, or this function will return
* NULL and not create any shared memory segment.
*/
OMPI_DECLSPEC extern mca_common_sm_module_t *
mca_common_sm_windows_init_group(ompi_group_t *group,
size_t size,
char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment);
/*
* Callback from the sm mpool
*/
OMPI_DECLSPEC extern void *
mca_common_sm_windows_seg_alloc(struct mca_mpool_base_module_t *mpool,
size_t *size,
mca_mpool_base_registration_t **registration);
/**
* This function will release all local resources attached to the
* mmapped file. We assume that the operating system will destroy the
* file when the last process release it.
*
* @param sm_windows - the control structure at head of file.
*
* @returnvalue 0 if everything was OK, otherwise a negative value.
*/
OMPI_DECLSPEC extern int
mca_common_sm_windows_fini(mca_common_sm_module_t *mca_common_sm_module);
/**
* component query routine
*/
OMPI_DECLSPEC extern int
mca_common_sm_windows_component_query(void);
END_C_DECLS
#endif

73
ompi/mca/common/sm/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,73 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2010 Los Alamos National Security, LLC.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_common_sm_POST_CONFIG([should_build])
# ------------------------------------------
AC_DEFUN([MCA_common_sm_POST_CONFIG], [
AM_CONDITIONAL([MCA_common_sm_windows],
[test $1 -eq 1 -a "x$MCA_common_sm_windows" = "x1"])
AM_CONDITIONAL([MCA_common_sm_sysv],
[test $1 -eq 1 -a "x$MCA_common_sm_sysv" = "x1"])
])dnl
# MCA_common_sm_CONFIG([action-if-can-compile],
# [action-if-cant-compile])
# ------------------------------------------------
AC_DEFUN([MCA_common_sm_CONFIG], [
OMPI_VAR_SCOPE_PUSH([MCA_common_sm_windows MCA_common_sm_sysv])
# Are we building on Windows?
AC_CHECK_FUNC(CreateFileMapping,
[MCA_common_sm_windows=1],
[MCA_common_sm_windows=0])
AC_DEFINE_UNQUOTED([MCA_COMMON_SM_WINDOWS],
[$MCA_common_sm_windows],
[Whether we have shared memory support for Windows or not])
# do we have sysv shared memory support on this system?
AC_CHECK_FUNC(shmget,
[ompi_check_sysv_happy="yes"],
[ompi_check_sysv_happy="no"])
# do we want to enable System V shared memory support?
AC_MSG_CHECKING([if want sysv support])
AC_ARG_ENABLE(sysv,
AC_HELP_STRING([--enable-sysv],
[enable sysv shared memory support (default: disabled)]))
if test "$enable_sysv" = "yes"; then
if test "$ompi_check_sysv_happy" = "yes"; then
AC_MSG_RESULT([yes])
MCA_common_sm_sysv=1
else
MCA_common_sm_sysv=0
AC_MSG_ERROR([sysv support requested but not found. aborting])
fi
else
AC_MSG_RESULT([no])
MCA_common_sm_sysv=0
fi
AC_DEFINE_UNQUOTED([MCA_COMMON_SM_SYSV],
[$MCA_common_sm_sysv],
[Whether we have shared memory support for SYSV or not])
])dnl

Просмотреть файл

@ -1,6 +1,8 @@
# -*- text -*-
#
# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2010 Los Alamos National Security, LLC.
# All rights reserved.
#
# $COPYRIGHT$
#
@ -20,6 +22,19 @@ experience performance degradation.
Process: %s
Error: %s (errno %d)
#
[shmget call fail]
A shmget call failed during shared memory initialization that should
not have. It is likely that your MPI job will now either abort or
experience performance degradation.
Local host: %s
System call: %s
Process: %s
Error: %s (errno %d)
Please verify that your system's shmax limit, or equivalent, is larger than
%d. On some Unix-like systems this can be done via: "sysctl -a | grep shm"
#
[mmap too small]
Open MPI requested a shared memory segment that was too small to do
anything useful. This is likely an error in Open MPI itself. If you
@ -49,3 +64,11 @@ the MCA parameter "orte_no_session_dir".
Local host: %s
Fileame: %s
#
[sm support]
WARNING: "%s" not recognized - ignoring option. Suppressing additional
unrecognized option warnings.
#
[sysv rt test fail]
WARNING: It appears as if your system does not provide the run-time behavior
that we rely on to safely provide System V shared memory support.

Просмотреть файл

@ -11,6 +11,8 @@
* All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -27,7 +29,7 @@
#include "opal/event/event.h"
#include "ompi/mca/common/sm/common_sm_mmap.h"
#include "ompi/mca/common/sm/common_sm.h"
#include "ompi/mca/mpool/mpool.h"
#include "ompi/mca/allocator/allocator.h"
@ -54,7 +56,7 @@ typedef struct mca_mpool_sm_module_t {
long sm_size;
mca_allocator_base_module_t * sm_allocator;
struct mca_mpool_sm_mmap_t *sm_mmap;
mca_common_sm_mmap_t *sm_common_mmap;
mca_common_sm_module_t *sm_common_module;
int32_t mem_node;
} mca_mpool_sm_module_t;

Просмотреть файл

@ -11,6 +11,8 @@
* All rights reserved.
* Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -33,7 +35,7 @@
#include "ompi/mca/allocator/base/base.h"
#include "mpool_sm.h"
#include "ompi/mca/common/sm/common_sm_mmap.h"
#include "ompi/mca/common/sm/common_sm.h"
#include "ompi/proc/proc.h"
#if OPAL_ENABLE_FT_CR == 1
@ -172,7 +174,7 @@ static mca_mpool_base_module_t* mca_mpool_sm_init(
}
/* add something for the control structure */
mpool_module->sm_size += sizeof(mca_common_sm_mmap_t);
mpool_module->sm_size += sizeof(mca_common_sm_module_t);
allocator_component = mca_allocator_component_lookup(
mca_mpool_sm_component.sm_allocator_name);
@ -209,11 +211,11 @@ static mca_mpool_base_module_t* mca_mpool_sm_init(
"mca_mpool_sm_init: shared memory size used: (%ld)",
mpool_module->sm_size);
if (NULL == (mpool_module->sm_common_mmap =
mca_common_sm_mmap_init(procs, num_all_procs,
mpool_module->sm_size,
file_name,
sizeof(mca_common_sm_mmap_t), 8))) {
if (NULL == (mpool_module->sm_common_module =
mca_common_sm_init(procs, num_all_procs,
mpool_module->sm_size,
file_name,
sizeof(mca_common_sm_module_t), 8))) {
opal_output(mca_mpool_sm_component.verbose,
"mca_mpool_sm_init: unable to create shared memory mapping (%s)", file_name);
free(file_name);
@ -227,7 +229,7 @@ static mca_mpool_base_module_t* mca_mpool_sm_init(
/* setup allocator */
mpool_module->sm_allocator =
allocator_component->allocator_init(true,
mca_common_sm_mmap_seg_alloc,
mca_common_sm_seg_alloc,
NULL, &(mpool_module->super));
if(NULL == mpool_module->sm_allocator) {
opal_output(0, "mca_mpool_sm_init: unable to initialize allocator");

Просмотреть файл

@ -10,6 +10,8 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -20,7 +22,7 @@
#include "ompi_config.h"
#include <string.h>
#include "ompi/mca/mpool/sm/mpool_sm.h"
#include "ompi/mca/common/sm/common_sm_mmap.h"
#include "ompi/mca/common/sm/common_sm.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
@ -57,7 +59,7 @@ void mca_mpool_sm_module_init(mca_mpool_sm_module_t* mpool)
mpool->sm_size = 0;
mpool->sm_allocator = NULL;
mpool->sm_mmap = NULL;
mpool->sm_common_mmap = NULL;
mpool->sm_common_module = NULL;
mpool->mem_node = -1;
}
@ -67,8 +69,8 @@ void mca_mpool_sm_module_init(mca_mpool_sm_module_t* mpool)
void* mca_mpool_sm_base(mca_mpool_base_module_t* mpool)
{
mca_mpool_sm_module_t *sm_mpool = (mca_mpool_sm_module_t*) mpool;
return (NULL != sm_mpool->sm_common_mmap) ?
sm_mpool->sm_common_mmap->map_addr : NULL;
return (NULL != sm_mpool->sm_common_module) ?
sm_mpool->sm_common_module->module_seg_addr : NULL;
}
/**
@ -132,23 +134,23 @@ static void sm_module_finalize(mca_mpool_base_module_t* module)
{
mca_mpool_sm_module_t *sm_module = (mca_mpool_sm_module_t*) module;
if (NULL != sm_module->sm_common_mmap) {
if (NULL != sm_module->sm_common_module) {
if (OMPI_SUCCESS ==
mca_common_sm_mmap_fini(sm_module->sm_common_mmap)) {
mca_common_sm_fini(sm_module->sm_common_module)) {
#if OPAL_ENABLE_FT_CR == 1
/* Only unlink the file if we are *not* restarting. If we
are restarting the file will be unlinked at a later
time. */
if (OPAL_CR_STATUS_RESTART_PRE != opal_cr_checkpointing_state &&
OPAL_CR_STATUS_RESTART_POST != opal_cr_checkpointing_state ) {
unlink(sm_module->sm_common_mmap->map_path);
unlink(sm_module->sm_common_module->module_seg_path);
}
#else
unlink(sm_module->sm_common_mmap->map_path);
unlink(sm_module->sm_common_module->module_seg_path);
#endif
}
OBJ_RELEASE(sm_module->sm_common_mmap);
sm_module->sm_common_mmap = NULL;
OBJ_RELEASE(sm_module->sm_common_module);
sm_module->sm_common_module = NULL;
}
}
@ -178,8 +180,8 @@ int mca_mpool_sm_ft_event(int state) {
self_sm_module = (mca_mpool_sm_module_t*) self_module;
/* Mark the old sm file for eventual removal via CRS */
if (NULL != self_sm_module->sm_common_mmap) {
opal_crs_base_cleanup_append(self_sm_module->sm_common_mmap->map_path, false);
if (NULL != self_sm_module->sm_common_module) {
opal_crs_base_cleanup_append(self_sm_module->sm_common_module->module_seg_path, false);
}
/* Remove self from the list of all modules */
@ -193,8 +195,8 @@ int mca_mpool_sm_ft_event(int state) {
self_sm_module = (mca_mpool_sm_module_t*) self_module;
/* Mark the old sm file for eventual removal via CRS */
if (NULL != self_sm_module->sm_common_mmap) {
opal_crs_base_cleanup_append(self_sm_module->sm_common_mmap->map_path, false);
if (NULL != self_sm_module->sm_common_module) {
opal_crs_base_cleanup_append(self_sm_module->sm_common_module->module_seg_path, false);
}
/* Remove self from the list of all modules */