From 81f38b258a7b2ffabe1553bdda20eb00bfc8b4e1 Mon Sep 17 00:00:00 2001 From: Samuel Gutierrez Date: Tue, 21 Jun 2011 15:41:57 +0000 Subject: [PATCH] commit of new shared memory backing facility framework (shmem) and its components. This commit was SVN r24795. --- ompi/mca/btl/sm/btl_sm_component.c | 18 +- ompi/mca/coll/sm/coll_sm_component.c | 11 +- ompi/mca/coll/sm/coll_sm_module.c | 7 +- ompi/mca/common/sm/Makefile.am | 32 +- ompi/mca/common/sm/common_sm.c | 784 ++++++------------ ompi/mca/common/sm/common_sm.h | 81 +- ompi/mca/common/sm/common_sm_mmap.c | 357 -------- ompi/mca/common/sm/common_sm_mmap.h | 116 --- ompi/mca/common/sm/common_sm_posix.c | 504 ----------- ompi/mca/common/sm/common_sm_posix.h | 113 --- ompi/mca/common/sm/common_sm_rml.c | 99 +-- ompi/mca/common/sm/common_sm_rml.h | 30 +- ompi/mca/common/sm/common_sm_sysv.c | 464 ----------- ompi/mca/common/sm/common_sm_sysv.h | 124 --- ompi/mca/common/sm/common_sm_windows.c | 261 ------ ompi/mca/common/sm/common_sm_windows.h | 117 --- ompi/mca/common/sm/configure.m4 | 52 +- ompi/mca/common/sm/help-mpi-common-sm.txt | 57 +- ompi/mca/mpool/sm/mpool_sm_module.c | 4 +- ompi/tools/ompi_info/components.c | 12 + ompi/tools/ompi_info/ompi_info.c | 1 + opal/mca/shmem/Makefile.am | 39 + opal/mca/shmem/base/Makefile.am | 28 + opal/mca/shmem/base/base.h | 200 +++++ opal/mca/shmem/base/shmem_base_close.c | 54 ++ opal/mca/shmem/base/shmem_base_open.c | 103 +++ opal/mca/shmem/base/shmem_base_select.c | 216 +++++ opal/mca/shmem/base/shmem_base_wrappers.c | 85 ++ opal/mca/shmem/mmap/Makefile.am | 49 ++ opal/mca/shmem/mmap/configure.m4 | 50 ++ opal/mca/shmem/mmap/help-opal-shmem-mmap.txt | 41 + opal/mca/shmem/mmap/shmem_mmap.h | 52 ++ opal/mca/shmem/mmap/shmem_mmap_component.c | 125 +++ opal/mca/shmem/mmap/shmem_mmap_module.c | 453 ++++++++++ opal/mca/shmem/posix/Makefile.am | 50 ++ opal/mca/shmem/posix/configure.m4 | 50 ++ .../mca/shmem/posix/help-opal-shmem-posix.txt | 22 + opal/mca/shmem/posix/shmem_posix.h | 67 ++ .../shmem/posix/shmem_posix_common_utils.c | 99 +++ .../shmem/posix/shmem_posix_common_utils.h | 50 ++ opal/mca/shmem/posix/shmem_posix_component.c | 193 +++++ opal/mca/shmem/posix/shmem_posix_module.c | 450 ++++++++++ opal/mca/shmem/shmem.h | 189 +++++ opal/mca/shmem/shmem_types.h | 135 +++ opal/mca/shmem/sysv/Makefile.am | 48 ++ opal/mca/shmem/sysv/configure.m4 | 50 ++ opal/mca/shmem/sysv/help-opal-shmem-sysv.txt | 22 + opal/mca/shmem/sysv/shmem_sysv.h | 52 ++ opal/mca/shmem/sysv/shmem_sysv_component.c | 202 +++++ opal/mca/shmem/sysv/shmem_sysv_module.c | 404 +++++++++ opal/mca/shmem/windows/.windows | 11 + opal/mca/shmem/windows/Makefile.am | 49 ++ opal/mca/shmem/windows/configure.m4 | 50 ++ .../shmem/windows/help-opal-shmem-windows.txt | 41 + opal/mca/shmem/windows/shmem_windows.h | 52 ++ .../shmem/windows/shmem_windows_component.c | 125 +++ opal/mca/shmem/windows/shmem_windows_module.c | 537 ++++++++++++ opal/runtime/opal_finalize.c | 6 + opal/runtime/opal_init.c | 14 + opal/runtime/opal_params.c | 9 + orte/mca/grpcomm/bad/.windows | 12 + orte/mca/grpcomm/cnos/.windows | 12 + orte/mca/grpcomm/hier/.windows | 12 + orte/mca/grpcomm/mcast/.windows | 12 + orte/mca/odls/base/odls_base_default_fns.c | 19 + orte/tools/orte-info/components.c | 12 + orte/tools/orte-info/orte-info.c | 3 + 67 files changed, 4902 insertions(+), 2896 deletions(-) delete mode 100644 ompi/mca/common/sm/common_sm_mmap.c delete mode 100644 ompi/mca/common/sm/common_sm_mmap.h delete mode 100644 ompi/mca/common/sm/common_sm_posix.c delete mode 100644 ompi/mca/common/sm/common_sm_posix.h delete mode 100644 ompi/mca/common/sm/common_sm_sysv.c delete mode 100644 ompi/mca/common/sm/common_sm_sysv.h delete mode 100644 ompi/mca/common/sm/common_sm_windows.c delete mode 100644 ompi/mca/common/sm/common_sm_windows.h create mode 100644 opal/mca/shmem/Makefile.am create mode 100644 opal/mca/shmem/base/Makefile.am create mode 100644 opal/mca/shmem/base/base.h create mode 100644 opal/mca/shmem/base/shmem_base_close.c create mode 100644 opal/mca/shmem/base/shmem_base_open.c create mode 100644 opal/mca/shmem/base/shmem_base_select.c create mode 100644 opal/mca/shmem/base/shmem_base_wrappers.c create mode 100644 opal/mca/shmem/mmap/Makefile.am create mode 100644 opal/mca/shmem/mmap/configure.m4 create mode 100644 opal/mca/shmem/mmap/help-opal-shmem-mmap.txt create mode 100644 opal/mca/shmem/mmap/shmem_mmap.h create mode 100644 opal/mca/shmem/mmap/shmem_mmap_component.c create mode 100644 opal/mca/shmem/mmap/shmem_mmap_module.c create mode 100644 opal/mca/shmem/posix/Makefile.am create mode 100644 opal/mca/shmem/posix/configure.m4 create mode 100644 opal/mca/shmem/posix/help-opal-shmem-posix.txt create mode 100644 opal/mca/shmem/posix/shmem_posix.h create mode 100644 opal/mca/shmem/posix/shmem_posix_common_utils.c create mode 100644 opal/mca/shmem/posix/shmem_posix_common_utils.h create mode 100644 opal/mca/shmem/posix/shmem_posix_component.c create mode 100644 opal/mca/shmem/posix/shmem_posix_module.c create mode 100644 opal/mca/shmem/shmem.h create mode 100644 opal/mca/shmem/shmem_types.h create mode 100644 opal/mca/shmem/sysv/Makefile.am create mode 100644 opal/mca/shmem/sysv/configure.m4 create mode 100644 opal/mca/shmem/sysv/help-opal-shmem-sysv.txt create mode 100644 opal/mca/shmem/sysv/shmem_sysv.h create mode 100644 opal/mca/shmem/sysv/shmem_sysv_component.c create mode 100644 opal/mca/shmem/sysv/shmem_sysv_module.c create mode 100644 opal/mca/shmem/windows/.windows create mode 100644 opal/mca/shmem/windows/Makefile.am create mode 100644 opal/mca/shmem/windows/configure.m4 create mode 100644 opal/mca/shmem/windows/help-opal-shmem-windows.txt create mode 100644 opal/mca/shmem/windows/shmem_windows.h create mode 100644 opal/mca/shmem/windows/shmem_windows_component.c create mode 100644 opal/mca/shmem/windows/shmem_windows_module.c create mode 100644 orte/mca/grpcomm/bad/.windows create mode 100644 orte/mca/grpcomm/cnos/.windows create mode 100644 orte/mca/grpcomm/hier/.windows create mode 100644 orte/mca/grpcomm/mcast/.windows diff --git a/ompi/mca/btl/sm/btl_sm_component.c b/ompi/mca/btl/sm/btl_sm_component.c index 91f2d6cfce..7813c3bbd9 100644 --- a/ompi/mca/btl/sm/btl_sm_component.c +++ b/ompi/mca/btl/sm/btl_sm_component.c @@ -11,8 +11,8 @@ * All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010 Los Alamos National Security, LLC. - * All rights reserved. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. + * All rights reserved. * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. * $COPYRIGHT$ * @@ -214,9 +214,6 @@ static int sm_register(void) mca_btl_base_param_register(&mca_btl_sm_component.super.btl_version, &mca_btl_sm.super); - /* Call down to sm common to register its MCA params */ - mca_common_sm_param_register(&mca_btl_sm_component.super.btl_version); - return OMPI_SUCCESS; } @@ -288,17 +285,12 @@ static int mca_btl_sm_component_close(void) /*OBJ_DESTRUCT(&mca_btl_sm_component.sm_frags_eager);*/ /*OBJ_DESTRUCT(&mca_btl_sm_component.sm_frags_max);*/ - /* Free resources associated with common sm MCA params */ - mca_common_sm_param_unregister(&mca_btl_sm_component.super.btl_version); - /* unmap the shared memory control structure */ if(mca_btl_sm_component.sm_seg != NULL) { return_value = mca_common_sm_fini( mca_btl_sm_component.sm_seg ); if( OMPI_SUCCESS != return_value ) { return_value=OMPI_ERROR; - opal_output(0," munmap failed :: file - %s :: errno - %d \n", - mca_btl_sm_component.sm_seg->module_seg_addr, - errno); + opal_output(0," mca_common_sm_fini failed\n"); goto CLEANUP; } @@ -312,10 +304,10 @@ static int mca_btl_sm_component_close(void) */ if(OPAL_CR_STATUS_RESTART_PRE != opal_cr_checkpointing_state && OPAL_CR_STATUS_RESTART_POST != opal_cr_checkpointing_state ) { - unlink(mca_btl_sm_component.sm_seg->module_seg_path); + unlink(mca_btl_sm_component.sm_seg->shmem_ds.name); } #else - unlink(mca_btl_sm_component.sm_seg->module_seg_path); + unlink(mca_btl_sm_component.sm_seg->shmem_ds.seg_name); #endif OBJ_RELEASE(mca_btl_sm_component.sm_seg); } diff --git a/ompi/mca/coll/sm/coll_sm_component.c b/ompi/mca/coll/sm/coll_sm_component.c index f64a96a2b3..d74495eefd 100644 --- a/ompi/mca/coll/sm/coll_sm_component.c +++ b/ompi/mca/coll/sm/coll_sm_component.c @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011 Los Alamos National Security, LLC. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -122,15 +124,9 @@ mca_coll_sm_component_t mca_coll_sm_component = { */ static int sm_close(void) { - mca_base_component_t *c = &mca_coll_sm_component.super.collm_version; - - /* Common SM MCA params */ - mca_common_sm_param_unregister(c); - return OMPI_SUCCESS; } - /* * Register MCA params */ @@ -225,8 +221,5 @@ static int sm_register(void) false, true, (int)size, NULL); - /* Common SM MCA params */ - mca_common_sm_param_register(c); - return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/sm/coll_sm_module.c b/ompi/mca/coll/sm/coll_sm_module.c index 4bdac1970d..4247a9765c 100644 --- a/ompi/mca/coll/sm/coll_sm_module.c +++ b/ompi/mca/coll/sm/coll_sm_module.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010 Los Alamos National Security, LLC. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. * All rights reserved. * $COPYRIGHT$ * @@ -484,10 +484,11 @@ int ompi_coll_sm_lazy_enable(mca_coll_base_module_t *module, /* Once we're all here, remove the mmap file; it's not needed anymore */ if (0 == rank) { - unlink(data->sm_bootstrap_meta->module_seg_path); + unlink(data->sm_bootstrap_meta->shmem_ds.seg_name); opal_output_verbose(10, mca_coll_base_output, "coll:sm:enable (%d/%s): removed mmap file %s", - comm->c_contextid, comm->c_name, data->sm_bootstrap_meta->module_seg_path); + comm->c_contextid, comm->c_name, + data->sm_bootstrap_meta->shmem_ds.seg_name); } /* All done */ diff --git a/ompi/mca/common/sm/Makefile.am b/ompi/mca/common/sm/Makefile.am index 64b28e31f2..7790d4bc4f 100644 --- a/ompi/mca/common/sm/Makefile.am +++ b/ompi/mca/common/sm/Makefile.am @@ -10,7 +10,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2010 Los Alamos National Security, LLC. +# Copyright (c) 2010-2011 Los Alamos National Security, LLC. # All rights reserved. # $COPYRIGHT$ # @@ -46,40 +46,14 @@ EXTRA_DIST = .windows # Header files headers = \ - common_sm.h \ common_sm_rml.h \ - common_sm_mmap.h + common_sm.h # Source files sources = \ - common_sm.c \ common_sm_rml.c \ - common_sm_mmap.c - -# Only build the Windows support if we're building on windows, but -# always include the files in the tarball. - -if COMMON_SM_BUILD_WINDOWS -headers += common_sm_windows.h -sources += common_sm_windows.c -endif - -# Only build the SYSV support if we have the right stuff, but -# always include the files in the tarball. - -if COMMON_SM_BUILD_SYSV -headers += common_sm_sysv.h -sources += common_sm_sysv.c -endif - -# Only build the POSIX support if we have the right stuff, but -# always include the files in the tarball. - -if COMMON_SM_BUILD_POSIX -headers += common_sm_posix.h -sources += common_sm_posix.c -endif + common_sm.c # Help file diff --git a/ompi/mca/common/sm/common_sm.c b/ompi/mca/common/sm/common_sm.c index 2925456f5b..b163694137 100644 --- a/ompi/mca/common/sm/common_sm.c +++ b/ompi/mca/common/sm/common_sm.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010 Los Alamos National Security, LLC. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. * All rights reserved. * $COPYRIGHT$ * @@ -26,11 +26,12 @@ #include #endif +#include "opal/align.h" #include "opal/util/argv.h" #if OPAL_ENABLE_FT_CR == 1 #include "opal/runtime/opal_cr.h" #endif -#include "orte/mca/rml/rml.h" + #include "orte/util/name_fns.h" #include "orte/util/show_help.h" #include "orte/runtime/orte_globals.h" @@ -38,413 +39,112 @@ #include "ompi/constants.h" #include "ompi/mca/dpm/dpm.h" +#include "ompi/mca/mpool/sm/mpool_sm.h" #include "common_sm_rml.h" -#include "common_sm_mmap.h" -#if MCA_COMMON_SM_SYSV -#include "common_sm_sysv.h" -#endif /* MCA_COMMON_SM_SYSV */ -#if MCA_COMMON_SM_WINDOWS -#include "common_sm_windows.h" -#endif /* MCA_COMMON_SM_WINDOWS */ -#if MCA_COMMON_SM_POSIX -#include "common_sm_posix.h" -#endif /* MCA_COMMON_SM_POSIX */ -/** - * ASSUMING local proc homogeneity with respect to all utilized shared memory - * facilities. that is, if one local proc deems a particular shared memory - * facility acceptable, then ALL local procs should be able to utilize that +/* ASSUMING local process homogeneity with respect to all utilized shared memory + * facilities. that is, if one local process deems a particular shared memory + * facility acceptable, then ALL local processes should be able to utilize that * facility. as it stands, this is an important point because one process - * dictates to all other local procs which common sm component will be selected - * based on its own, local run-time test. + * dictates to all other local processes which common sm component will be + * selected based on its own, local run-time test. */ -static bool initialized = false; -static int num_times_registered = 0; -static int sysv_index = -1; -static int posix_index = -1; -static int common_sm_index = -1; -static char **sm_argv = NULL; -static char *sm_params = NULL; -static mca_common_sm_init_fn_t sm_init = NULL; -static mca_common_sm_seg_alloc_fn_t sm_seg_alloc = NULL; -static mca_common_sm_fini_fn_t sm_fini = NULL; -/* should be more than enough to store all common sm component names */ -static char sm_default[32]; -/* holds common sm help string */ -char sm_avail_help_str[OPAL_PATH_MAX]; +OBJ_CLASS_INSTANCE( + mca_common_sm_module_t, + opal_object_t, + NULL, + NULL +); -/** - * lock to protect multiple instances of query_sm_components() - * from being invoked simultaneously (because of rml usage). - */ -static opal_mutex_t mutex; - -/* common shared memory component information */ -typedef struct -{ - /* flag indicating whether or not the component is available */ - bool avail; - /* component name */ - char *sm_name; -} mca_common_sm_info_t; -/** - * NOTE: - * o array position dictates the default order in which - * the common shared memory components will be queried. - * o first component successfully queried gets selected. - * o sm_name format: {component availability, "component name,"} - * - * if you change the order of sm_avail_table below, - * don't forget to update mca_common_sm_comp_index_map_t. - * - * placing mmap before sysv in the list prevents sysv from ever being selected - * (in the default case). this is because, at least for now, mmap's selection - * query always succeeds. that is, sysv must be explicitly requested. - * NOTE: mmap is the default for now. - * - * {component availability, component name} - */ -static const mca_common_sm_info_t sm_avail_table[] = -{ - {true , "mmap," }, /* assume mmap is always available */ - {(bool)MCA_COMMON_SM_POSIX, "posix,"}, - {(bool)MCA_COMMON_SM_SYSV , "sysv," }, - {false , NULL } /* MUST BE LAST ITEM */ -}; -/* component index enum */ -typedef enum -{ - MCA_COMMON_SM_COMP_INDEX_MMAP = 0, - MCA_COMMON_SM_COMP_INDEX_POSIX, - MCA_COMMON_SM_COMP_INDEX_SYSV, - MCA_COMMON_SM_COMP_INDEX_NONE /* MUST BE LAST ITEM */ -} mca_common_sm_comp_index_map_t; - -/** - * list of RML messages that have arrived that have not yet been +/* list of RML messages that have arrived that have not yet been * consumed by the thread who is looking to complete its component * initialization based on the contents of the RML message. */ static opal_list_t pending_rml_msgs; - -/******************************************************************************/ - /* STATIC UTILITY FUNCTIONS */ -/******************************************************************************/ - -/******************************************************************************/ -/** - * this routine selects the common sm component that corresponds to - * sm_component_index's value. - * - * @param sm_component_index index corresponding to the common sm component that - * is to be selected. (IN) +/* flag indicating whether or not pending_rml_msgs has been initialized */ +static bool pending_rml_msgs_init = false; +/* lock to protect multiple instances of mca_common_sm_init() from being + * invoked simultaneously (because of RML usage). */ -static void -select_common_sm_component(int sm_component_index) +static opal_mutex_t mutex; +/* shared memory information used for initialization and setup. */ +static opal_shmem_ds_t shmem_ds; +/* number of local processes */ +static size_t num_local_procs = 0; +/* indicates whether or not i'm the lowest named process */ +static bool lowest_local_proc = false; + +/* ////////////////////////////////////////////////////////////////////////// */ +/* static utility functions */ +/* ////////////////////////////////////////////////////////////////////////// */ + +/* ////////////////////////////////////////////////////////////////////////// */ +static mca_common_sm_module_t * +attach_and_init(const char *file_name, + size_t size_ctl_structure, + size_t data_seg_alignment) { - switch (sm_component_index) - { -#if MCA_COMMON_SM_POSIX - case MCA_COMMON_SM_COMP_INDEX_POSIX: - sm_init = mca_common_sm_posix_init; - sm_seg_alloc = mca_common_sm_posix_seg_alloc; - sm_fini = mca_common_sm_posix_fini; - break; -#endif - case MCA_COMMON_SM_COMP_INDEX_MMAP: -#if !MCA_COMMON_SM_WINDOWS - sm_init = mca_common_sm_mmap_init; - sm_seg_alloc = mca_common_sm_mmap_seg_alloc; - sm_fini = mca_common_sm_mmap_fini; -#else /* MCA_COMMON_SM_WINDOWS */ - sm_init = mca_common_sm_windows_init; - sm_seg_alloc = mca_common_sm_windows_seg_alloc; - sm_fini = mca_common_sm_windows_fini; -#endif - break; -#if MCA_COMMON_SM_SYSV - case MCA_COMMON_SM_COMP_INDEX_SYSV: - sm_init = mca_common_sm_sysv_init; - sm_seg_alloc = mca_common_sm_sysv_seg_alloc; - sm_fini = mca_common_sm_sysv_fini; - break; -#endif - case MCA_COMMON_SM_COMP_INDEX_NONE: - sm_init = NULL; - sm_seg_alloc = NULL; - sm_fini = NULL; - break; - default: - sm_init = NULL; - sm_seg_alloc = NULL; - sm_fini = NULL; - opal_output(0, "WARNING: invalid common sm component index."); - break; + mca_common_sm_module_t *map = NULL; + mca_common_sm_seg_header_t *seg = NULL; + unsigned char *addr = NULL; + + /* map the file and initialize segment state */ + if (NULL == (seg = (mca_common_sm_seg_header_t *) + opal_shmem_segment_attach(&shmem_ds))) { + return NULL; } + opal_atomic_rmb(); + + /* set up the map object */ + if (NULL == (map = OBJ_NEW(mca_common_sm_module_t))) { + ORTE_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE); + return NULL; + } + + /* copy information: from ====> to */ + opal_shmem_ds_copy(&shmem_ds, &map->shmem_ds); + + /* the first entry in the file is the control structure. the first + * entry in the control structure is an mca_common_sm_seg_header_t + * element + */ + map->module_seg = seg; + + addr = ((unsigned char *)seg) + size_ctl_structure; + /* if we have a data segment (i.e., if 0 != data_seg_alignment), + * then make it the first aligned address after the control + * structure. IF THIS HAPPENS, THIS IS A PROGRAMMING ERROR IN + * OPEN MPI! + */ + if (0 != data_seg_alignment) { + addr = OPAL_ALIGN_PTR(addr, data_seg_alignment, unsigned char *); + /* is addr past end of the shared memory segment? */ + if ((unsigned char *)seg + shmem_ds.seg_size < addr) { + orte_show_help("help-mpi-common-sm.txt", "mmap too small", 1, + orte_process_info.nodename, + (unsigned long)shmem_ds.seg_size, + (unsigned long)size_ctl_structure, + (unsigned long)data_seg_alignment); + return NULL; + } + } + + map->module_data_addr = addr; + map->module_seg_addr = (unsigned char *)seg; + + /* map object successfully initialized - we can safely increment + * seg_num_procs_attached_and_inited. this value is used by + * opal_shmem_unlink. + */ + opal_atomic_add_size_t(&map->module_seg->seg_num_procs_inited, 1); + opal_atomic_wmb(); + + return map; } -/******************************************************************************/ -/** - * this routine performs a series of run-time tests that determines whether or - * not a particular common sm component can be selected safely. once a component - * is successfully selected, its component index is returned. - * - * @return index corresponding to the selected common sm component. see - * mca_common_sm_comp_index_map_t for valid values. - */ -static int -query_sm_components(void) -{ - int help_msg_displayed = 0; - int sm_component_index = MCA_COMMON_SM_COMP_INDEX_NONE; - int i; - - if (NULL != sm_argv) - { - MCA_COMMON_SM_OUTPUT_VERBOSE("looking for available components"); - for (i = 0; NULL != sm_argv[i]; ++i) - { - if (0 == strcasecmp(sm_argv[i], "posix")) - { -#if !MCA_COMMON_SM_POSIX - if (!help_msg_displayed) - { - orte_show_help("help-mpi-common-sm.txt", - "sm support", - 1, - sm_argv[i]); - help_msg_displayed = 1; - } -#else /* MCA_COMMON_SM_POSIX */ - MCA_COMMON_SM_OUTPUT_VERBOSE("querying posix"); - /** - * make sure that we can safely use posix sm on this system - */ - if (OMPI_SUCCESS == - mca_common_sm_posix_component_query()) - { - MCA_COMMON_SM_OUTPUT_VERBOSE("selecting posix"); - sm_component_index = MCA_COMMON_SM_COMP_INDEX_POSIX; - break; - } - else /* let the user know that we tried posix and failed */ - { - MCA_COMMON_SM_OUTPUT_VERBOSE("cannot select posix"); - orte_show_help("help-mpi-common-sm.txt", - "sm rt test fail", - 1, - "Posix"); - } -#endif - } - else if (0 == strcasecmp(sm_argv[i], "mmap")) - { - MCA_COMMON_SM_OUTPUT_VERBOSE("selecting mmap"); - /* there is no run-time test for mmap, so just select it */ - sm_component_index = MCA_COMMON_SM_COMP_INDEX_MMAP; - break; - } - else if (0 == strcasecmp(sm_argv[i], "sysv")) - { -#if !MCA_COMMON_SM_SYSV - if (!help_msg_displayed) - { - orte_show_help("help-mpi-common-sm.txt", - "sm support", - 1, - sm_argv[i]); - help_msg_displayed = 1; - } -#else /* MCA_COMMON_SM_SYSV */ - MCA_COMMON_SM_OUTPUT_VERBOSE("querying sysv"); - /* SKG - disable sysv support when cr is enabled. - * could presumably work properly someday. - */ -#if OPAL_ENABLE_FT_CR == 1 - if (!opal_cr_is_enabled) - { -#endif /* OPAL_ENABLE_FT_CR */ - /* make sure that we can safely use sysv on this system */ - if (OMPI_SUCCESS == mca_common_sm_sysv_component_query()) - { - MCA_COMMON_SM_OUTPUT_VERBOSE("selecting sysv"); - sm_component_index = MCA_COMMON_SM_COMP_INDEX_SYSV; - break; - } - else /* let the user know that we tried sysv and failed */ - { - MCA_COMMON_SM_OUTPUT_VERBOSE("cannot select sysv"); - orte_show_help("help-mpi-common-sm.txt", - "sm rt test fail", - 1, - "System V"); - } -#if OPAL_ENABLE_FT_CR == 1 - } - else - { - orte_show_help("help-mpi-common-sm.txt", - "sysv with cr", - 1); - help_msg_displayed = 1; - } -#endif /* OPAL_ENABLE_FT_CR */ -#endif - } - else /* unknown value */ - { - if (!help_msg_displayed) - { - orte_show_help("help-mpi-common-sm.txt", - "sm support", - 1, - sm_argv[i]); - help_msg_displayed = 1; - } - } - } - } - - if (MCA_COMMON_SM_COMP_INDEX_NONE == sm_component_index) - { - MCA_COMMON_SM_OUTPUT_VERBOSE("no component selected"); - } - - return sm_component_index; -} - -/******************************************************************************/ -int -mca_common_sm_param_register(mca_base_component_t *c) -{ - if (++num_times_registered > 1) { - return OMPI_SUCCESS; - } - if (num_times_registered < 1) { - /* This should never happen -- programmer error */ - return OMPI_ERROR; - } - - /* also using sysv_index's value as an initialization flag */ - if (-1 == sysv_index) - { - int i; - char *last_char; - - memset(sm_default, '\0', sizeof(sm_default)); - - /* populate sm_default with all available common sm component names */ - for (i = 0; NULL != sm_avail_table[i].sm_name; ++i) - { - if (sm_avail_table[i].avail) - { - strncat(sm_default, - sm_avail_table[i].sm_name, - sizeof(sm_default) - 1); - } - } - /* remove the last comma from the char buff */ - if (NULL != (last_char = strrchr(sm_default, ','))) - { - *last_char = '\0'; - } - /* set up help string */ - snprintf( - sm_avail_help_str, - sizeof(sm_avail_help_str) - 1, - "Which shared memory support will be used. Valid values: (%s)%s", - sm_default, - (i > 1) ? " - or a comma delimited combination of them " - "(order dependent). The first component that is successfully " - "selected is used." : "." - ); - sysv_index = mca_base_param_reg_int_name( - "mpi", - "common_sm_have_sysv_support", - "Whether shared memory has System V support or not", - false, - true, - MCA_COMMON_SM_SYSV, - NULL - ); - posix_index = mca_base_param_reg_int_name( - "mpi", - "common_sm_have_posix_support", - "Whether shared memory has POSIX support or not", - false, - true, - MCA_COMMON_SM_POSIX, - NULL - ); - } - - /* register mpi_common_sm */ - common_sm_index = mca_base_param_reg_string_name("mpi", - "common_sm", - sm_avail_help_str, - false, - false, - /* default value */ - sm_default, - NULL); - - /* also register MCA param synonyms for the component */ - mca_base_param_reg_syn(sysv_index, c, "have_sysv_support", false); - mca_base_param_reg_syn(posix_index, c, "have_posix_support", false); - mca_base_param_reg_syn(common_sm_index, c, "store", false); - - /* Once the synonyms are registered, look up the value */ - if (OPAL_SUCCESS != mca_base_param_lookup_string(common_sm_index, - &sm_params)) - { - return OMPI_ERROR; - } - - /* empty string == try all available */ - if (0 == strcmp(sm_params, "")) - { - if (NULL == (sm_argv = opal_argv_split(sm_default, ','))) - { - opal_output(0, - "WARNING: could not parse mpi_common_sm request."); - } - } - /* try what the user specified */ - else - { - if (NULL == (sm_argv = opal_argv_split(sm_params, ','))) - { - opal_output(0, - "WARNING: could not parse mpi_common_sm request."); - } - } - free(sm_params); - - return OMPI_SUCCESS; -} - -/******************************************************************************/ -int mca_common_sm_param_unregister(mca_base_component_t *c) -{ - if (--num_times_registered > 0) { - return OMPI_SUCCESS; - } - if (num_times_registered < 0) { - /* This should never happen -- programmer error */ - return OMPI_ERROR; - } - - if (NULL != sm_argv) { - opal_argv_free(sm_argv); - sm_argv = NULL; - } - - return OMPI_SUCCESS; -} - -/******************************************************************************/ +/* ////////////////////////////////////////////////////////////////////////// */ mca_common_sm_module_t * mca_common_sm_init(ompi_proc_t **procs, size_t num_procs, @@ -453,53 +153,42 @@ mca_common_sm_init(ompi_proc_t **procs, size_t size_ctl_structure, size_t data_seg_alignment) { - size_t num_local_procs = 0; - bool found_lowest = false; - bool lowest; + mca_common_sm_module_t *map = NULL; + bool found_lowest = false; size_t p; + size_t mem_offset; ompi_proc_t *temp_proc; - /** - * NOTE: the selected component's init routine, unlike mca_common_sm_init, - * must be provided with: - * o a SORTED procs array - * o the number of LOCAL processes within procs array - * - * so always do the following before calling sm_init: - * o reorder procs array to have all the local procs at the beginning. - * o look for the local proc with the lowest name. - * o determine the number of local procs. - * o ensure that procs[0] is the lowest named process. + num_local_procs = 0; + lowest_local_proc = false; + + /* o reorder procs array to have all the local procs at the beginning. + * o look for the local proc with the lowest name. + * o determine the number of local procs. + * o ensure that procs[0] is the lowest named process. */ - for (p = 0; p < num_procs; ++p) - { - if (OPAL_PROC_ON_LOCAL_NODE(procs[p]->proc_flags)) - { + for (p = 0; p < num_procs; ++p) { + if (OPAL_PROC_ON_LOCAL_NODE(procs[p]->proc_flags)) { /* if we don't have a lowest, save the first one */ - if (!found_lowest) - { + if (!found_lowest) { procs[0] = procs[p]; found_lowest = true; } - else - { + else { /* save this proc */ procs[num_local_procs] = procs[p]; - /** - * if we have a new lowest, swap it with position 0 + /* if we have a new lowest, swap it with position 0 * so that procs[0] is always the lowest named proc */ if (orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &(procs[p]->proc_name), - &(procs[0]->proc_name)) < 0) - { + &(procs[0]->proc_name)) < 0) { temp_proc = procs[0]; procs[0] = procs[p]; procs[num_local_procs] = temp_proc; } } - /** - * regardless of the comparisons above, we found + /* regardless of the comparisons above, we found * another proc on the local node, so increment */ ++num_local_procs; @@ -507,79 +196,93 @@ mca_common_sm_init(ompi_proc_t **procs, } /* if there is less than 2 local processes, there's nothing to do. */ - if (num_local_procs < 2) - { + if (num_local_procs < 2) { return NULL; } - if (!initialized) - { - mca_common_sm_rml_sm_info_t sm_info; - sm_info.id = MCA_COMMON_SM_COMP_INDEX_NONE; - memset(sm_info.posix_fname_buff, - '\0', - OMPI_COMMON_SM_POSIX_FILE_LEN_MAX); + /* determine whether or not i am the lowest local process */ + lowest_local_proc = (0 == orte_util_compare_name_fields( + ORTE_NS_CMP_ALL, + ORTE_PROC_MY_NAME, + &(procs[0]->proc_name))); - lowest = (0 == orte_util_compare_name_fields( - ORTE_NS_CMP_ALL, - ORTE_PROC_MY_NAME, - &(procs[0]->proc_name))); - - /** - * lock here to prevent multiple threads from invoking this function - * simultaneously. the critical section we're protecting is usage of - * the RML in this block. - */ - opal_mutex_lock(&mutex); + /* lock here to prevent multiple threads from invoking this + * function simultaneously. the critical section we're protecting + * is usage of the RML in this block. + */ + opal_mutex_lock(&mutex); + if (!pending_rml_msgs_init) { OBJ_CONSTRUCT(&(pending_rml_msgs), opal_list_t); - - /** - * figure out if i am the lowest proc in the group. - * if i am, select a common sm component and send its index to the rest - * of the local procs so they can select the same common sm component. - */ - if (lowest) - { - /* get the component index */ - sm_info.id = query_sm_components(); + pending_rml_msgs_init = true; + } + /* figure out if i am the lowest rank in the group. + * if so, i will create the shared memory backing store + */ + if (lowest_local_proc) { + if (OPAL_SUCCESS == opal_shmem_segment_create(&shmem_ds, file_name, + size)) { + map = attach_and_init(file_name, size_ctl_structure, + data_seg_alignment); + if (NULL != map) { + mem_offset = map->module_data_addr - + (unsigned char *)map->module_seg; + map->module_seg->seg_offset = mem_offset; + map->module_seg->seg_size = size - mem_offset; + opal_atomic_init(&map->module_seg->seg_lock, + OPAL_ATOMIC_UNLOCKED); + map->module_seg->seg_inited = 0; + } + else { + /* fail! + * only invalidate the shmem_ds. doing so will let the rest + * of the local processes know that the lowest local rank + * failed to properly initialize the shared memory segment, so + * they should try to carry on without shared memory support + */ + OPAL_SHMEM_DS_INVALIDATE(&shmem_ds); + } } - /* no return code check here because the error - * path is the same as the expected path */ - mca_common_sm_rml_info_bcast(&sm_info, - procs, - num_local_procs, - OMPI_RML_TAG_COMMON_SM_COMP_INDEX, - lowest, - file_name, - &(pending_rml_msgs)); - - opal_mutex_unlock(&mutex); - select_common_sm_component(sm_info.id); - initialized = true; } - if (NULL != sm_init) - { - /* notice that we are passing a SORTED procs array to the selected - * component along with the number of LOCAL processes found within - * procs. - */ - return sm_init(procs, - num_local_procs, - size, - file_name, - size_ctl_structure, - data_seg_alignment); + /* send shmem info to the rest of the local procs. */ + if (OMPI_SUCCESS != mca_common_sm_rml_info_bcast( + &shmem_ds, procs, num_local_procs, + OMPI_RML_TAG_SM_BACK_FILE_CREATED, + lowest_local_proc, file_name, + &(pending_rml_msgs))) { + goto out; } - return NULL; + + /* are we dealing with a valid shmem_ds? that is, did the lowest + * process successfully initialize the shared memory segment? + */ + if (OPAL_SHMEM_DS_IS_VALID(&shmem_ds)) { + if (!lowest_local_proc) { + map = attach_and_init(file_name, size_ctl_structure, + data_seg_alignment); + } + else { + /* wait until every other participating process has attached to the + * shared memory segment. + */ + while (num_local_procs > map->module_seg->seg_num_procs_inited) { + opal_atomic_rmb(); + } + opal_shmem_unlink(&shmem_ds); + } + } + +out: + opal_mutex_unlock(&mutex); + return map; } -/******************************************************************************/ +/* ////////////////////////////////////////////////////////////////////////// */ /** - * This routine is the same as mca_common_sm_mmap_init() except that + * this routine is the same as mca_common_sm_mmap_init() except that * it takes an (ompi_group_t *) parameter to specify the peers rather - * than an array of procs. Unlike mca_common_sm_mmap_init(), the + * than an array of procs. unlike mca_common_sm_mmap_init(), the * group must contain *only* local peers, or this function will return * NULL and not create any shared memory segment. */ @@ -591,77 +294,94 @@ mca_common_sm_init_group(ompi_group_t *group, size_t data_seg_alignment) { mca_common_sm_module_t *ret = NULL; - ompi_proc_t **procs = NULL; + ompi_proc_t **procs = NULL; + size_t i; + size_t group_size; + ompi_proc_t *proc; - /* make sure sm_init has been properly initialized. do this because - * sm_init_group only does prep work before passing along the real work to - * sm_init. - */ - if (NULL != sm_init) - { - size_t i; - size_t group_size; - ompi_proc_t *proc; - - /* if there is less than 2 procs, there's nothing to do */ - if ((group_size = ompi_group_size(group)) < 2) - { - goto out; - } - if (NULL == (procs = (ompi_proc_t **) - malloc(sizeof(ompi_proc_t *) * group_size))) - - { - ORTE_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE); - goto out; - } - /* make sure that all the procs in the group are local */ - for (i = 0; i < group_size; ++i) - { - proc = ompi_group_peer_lookup(group, i); - if (!OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) - { - goto out; - } - procs[i] = proc; - } - /* let sm_init take care of the rest ... */ - ret = sm_init(procs, - group_size, - size, - file_name, - size_ctl_structure, - data_seg_alignment); + /* if there is less than 2 procs, there's nothing to do */ + if ((group_size = ompi_group_size(group)) < 2) { + goto out; } - + else if (NULL == (procs = (ompi_proc_t **) + malloc(sizeof(ompi_proc_t *) * group_size))) { + ORTE_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE); + goto out; + } + /* make sure that all the procs in the group are local */ + for (i = 0; i < group_size; ++i) { + proc = ompi_group_peer_lookup(group, i); + if (!OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) { + goto out; + } + procs[i] = proc; + } + /* let mca_common_sm_init take care of the rest ... */ + ret = mca_common_sm_init(procs, group_size, size, file_name, + size_ctl_structure, data_seg_alignment); out: - if (NULL != procs) - { + if (NULL != procs) { free(procs); } return ret; } -/******************************************************************************/ +/* ////////////////////////////////////////////////////////////////////////// */ +/** + * allocate memory from a previously allocated shared memory + * block. + * + * @param size size of request, in bytes (IN) + * + * @retval addr virtual address + */ void * mca_common_sm_seg_alloc(struct mca_mpool_base_module_t *mpool, size_t *size, mca_mpool_base_registration_t **registration) { - if (NULL != sm_seg_alloc) - { - return sm_seg_alloc(mpool, size, registration); + mca_mpool_sm_module_t *sm_module = (mca_mpool_sm_module_t *)mpool; + mca_common_sm_seg_header_t* seg = sm_module->sm_common_module->module_seg; + void *addr; + + opal_atomic_lock(&seg->seg_lock); + if (seg->seg_offset + *size > seg->seg_size) { + addr = NULL; } - return NULL; + else { + size_t fixup; + + /* add base address to segment offset */ + addr = sm_module->sm_common_module->module_data_addr + seg->seg_offset; + seg->seg_offset += *size; + + /* fix up seg_offset so next allocation is aligned on a + * sizeof(long) boundry. Do it here so that we don't have to + * check before checking remaining size in buffer + */ + if ((fixup = (seg->seg_offset & (sizeof(long) - 1))) > 0) { + seg->seg_offset += sizeof(long) - fixup; + } + } + if (NULL != registration) { + *registration = NULL; + } + opal_atomic_unlock(&seg->seg_lock); + return addr; } -/******************************************************************************/ +/* ////////////////////////////////////////////////////////////////////////// */ int mca_common_sm_fini(mca_common_sm_module_t *mca_common_sm_module) { - if (NULL != sm_fini && NULL != mca_common_sm_module) { - return sm_fini(mca_common_sm_module); + int rc = OMPI_SUCCESS; + + if (NULL != mca_common_sm_module->module_seg) { + if (OPAL_SUCCESS != + opal_shmem_segment_detach(&mca_common_sm_module->shmem_ds)) { + rc = OMPI_ERROR; + } } - return OMPI_ERR_NOT_FOUND; + return rc; } diff --git a/ompi/mca/common/sm/common_sm.h b/ompi/mca/common/sm/common_sm.h index 437913956c..1b37ab28e5 100644 --- a/ompi/mca/common/sm/common_sm.h +++ b/ompi/mca/common/sm/common_sm.h @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010 Los Alamos National Security, LLC. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. * All rights reserved. * $COPYRIGHT$ * @@ -28,49 +28,36 @@ #include "opal/class/opal_object.h" #include "opal/class/opal_list.h" #include "opal/sys/atomic.h" +#include "opal/mca/shmem/shmem.h" + #include "ompi/mca/mpool/mpool.h" #include "ompi/proc/proc.h" #include "ompi/group/group.h" #include "ompi/mca/btl/base/base.h" #include "ompi/mca/btl/base/btl_base_error.h" -#define MCA_COMMON_SM_OUTPUT_VERBOSE(msg) \ -opal_output_verbose(100, \ - mca_btl_base_output, \ - "mca: common: sm: %s", msg); - -/* posix sm file name length max. on some systems shm_open's file name limit - * is pretty low (32 chars, for instance ). 16 is plenty for our needs, but - * extra work on our end is needed to ensure things work properly. if a - * system's limit is lower than OMPI_COMMON_SM_POSIX_FILE_LEN_MAX, then the - * run-time test will catch that fact and posix sm will be disqualified. see - * comments regarding this in common_sm_posix.c. - */ -#define OMPI_COMMON_SM_POSIX_FILE_LEN_MAX 16 - BEGIN_C_DECLS struct mca_mpool_base_module_t; -typedef struct mca_common_sm_seg_header_t -{ +typedef struct mca_common_sm_seg_header_t { /* lock to control atomic access */ opal_atomic_lock_t seg_lock; - /* is the segment ready for use */ + /* indicates whether or not the segment is ready for use */ volatile int32_t seg_inited; - /** - * number of local processes that are - * attached to the shared memory segment + /* number of local processes that are attached to the shared memory segment. + * this is primarily used as a way of determining whether or not it is safe + * to unlink the shared memory backing store. for example, once seg_att + * is equal to the number of local processes, then we can safely unlink. */ - volatile int32_t seg_att; + volatile size_t seg_num_procs_inited; /* offset to next available memory location available for allocation */ size_t seg_offset; /* total size of the segment */ size_t seg_size; } mca_common_sm_seg_header_t; -typedef struct mca_common_sm_module_t -{ +typedef struct mca_common_sm_module_t { /* double link list element */ opal_list_item_t module_item; /* pointer to header embedded in the shared memory segment */ @@ -79,9 +66,8 @@ typedef struct mca_common_sm_module_t unsigned char *module_seg_addr; /* base address of data segment */ unsigned char *module_data_addr; - /* how big it is (in bytes) */ - size_t module_size; - char module_seg_path[OPAL_PATH_MAX]; + /* shared memory backing facility object that encapsulates shmem info */ + opal_shmem_ds_t shmem_ds; #if defined(__WINDOWS__) /* handle to the object */ HANDLE hMappedObject; @@ -90,18 +76,6 @@ typedef struct mca_common_sm_module_t OBJ_CLASS_DECLARATION(mca_common_sm_module_t); -/** - * Register the MCA parameters for common sm. - */ -OMPI_DECLSPEC int -mca_common_sm_param_register(mca_base_component_t *c); - -/** - * Free resources associated with registering MCA params for common sm. - */ -OMPI_DECLSPEC int -mca_common_sm_param_unregister(mca_base_component_t *c); - /** * This routine is used to set up a shared memory segment (whether * it's an mmaped file or a SYSV IPC segment). It is assumed that @@ -141,14 +115,6 @@ mca_common_sm_init(ompi_proc_t **procs, size_t size_ctl_structure, size_t data_seg_alignment); -typedef mca_common_sm_module_t * -(*mca_common_sm_init_fn_t)(ompi_proc_t **procs, - size_t num_procs, - size_t size, - char *file_name, - size_t size_ctl_structure, - size_t data_seg_alignment); - /** * This routine is used to set up a shared memory segment (whether * it's an mmaped file or a SYSV IPC segment). It is assumed that @@ -168,13 +134,6 @@ mca_common_sm_init_group(ompi_group_t *group, size_t size_ctl_structure, size_t data_seg_alignment); -typedef mca_common_sm_module_t * -(*mca_common_sm_init_group_fn_t)(ompi_group_t *group, - size_t size, - char *file_name, - size_t size_ctl_structure, - size_t data_seg_alignment); - /** * callback from the sm mpool */ @@ -183,11 +142,6 @@ mca_common_sm_seg_alloc(struct mca_mpool_base_module_t *mpool, size_t* size, mca_mpool_base_registration_t **registration); -typedef void * -(*mca_common_sm_seg_alloc_fn_t)(struct mca_mpool_base_module_t *mpool, - size_t *size, - mca_mpool_base_registration_t **registration); - /** * This function will release all local resources attached to the * shared memory segment. We assume that the operating system will @@ -196,17 +150,14 @@ typedef void * * @param mca_common_sm_module - instance that is shared between * components that use shared memory. * - * @returnvalue 0 if everything was OK, otherwise a negative value. + * @return OMPI_SUCCESS if everything was okay, otherwise return OMPI_ERROR. */ OMPI_DECLSPEC extern int mca_common_sm_fini(mca_common_sm_module_t *mca_common_sm_module); -typedef int -(*mca_common_sm_fini_fn_t)(mca_common_sm_module_t *mca_common_sm_module); - -/* - * instance that is shared between components that use shared memory +/** + * instance that is shared between components that use shared memory. */ OMPI_DECLSPEC extern mca_common_sm_module_t *mca_common_sm_module; diff --git a/ompi/mca/common/sm/common_sm_mmap.c b/ompi/mca/common/sm/common_sm_mmap.c deleted file mode 100644 index ccd43c94f8..0000000000 --- a/ompi/mca/common/sm/common_sm_mmap.c +++ /dev/null @@ -1,357 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010-2011 Los Alamos National Security, LLC. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_STRING_H -#include -#endif /* HAVE_STRING_H */ -#ifdef HAVE_FCNTL_H -#include -#endif /* HAVE_FCNTL_H */ -#ifdef HAVE_TIME_H -#include -#endif /* HAVE_TIME_H */ -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_SYS_STAT_H -#include -#endif /* HAVE_SYS_STAT_H */ -#ifdef HAVE_SYS_MMAN_H -#include -#endif - -#include "opal/util/output.h" -#include "opal/util/path.h" -#include "opal/align.h" -#include "opal/threads/mutex.h" -#include "opal/util/opal_sos.h" - -#include "orte/mca/rml/rml.h" -#include "orte/util/name_fns.h" -#include "orte/util/show_help.h" -#include "orte/runtime/orte_globals.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "ompi/constants.h" -#include "ompi/proc/proc.h" -#include "ompi/mca/dpm/dpm.h" -#include "ompi/mca/mpool/sm/mpool_sm.h" - -#include "common_sm_rml.h" -#include "common_sm_mmap.h" - -OBJ_CLASS_INSTANCE( - mca_common_sm_module_mmap_t, - opal_object_t, - NULL, - NULL -); - -/** - * list of RML messages that have arrived that have not yet been - * consumed by the thread who is looking to complete its component - * initialization based on the contents of the RML message. - */ -static opal_list_t pending_rml_msgs; -static bool pending_rml_msgs_init = false; - -/* - * Lock to protect multiple instances of mmap_init() from being - * invoked simultaneously (because of RML usage). - */ -static opal_mutex_t mutex; - -/** - * shared memory information used for initialization and setup. - */ -static mca_common_sm_rml_sm_info_t sm_info; - -static mca_common_sm_module_mmap_t * -create_map(int fd, size_t size, - char *file_name, - size_t size_ctl_structure, - size_t data_seg_alignment) -{ - mca_common_sm_module_mmap_t *map; - mca_common_sm_seg_header_t *seg; - unsigned char *addr = NULL; - - /* map the file and initialize segment state */ - seg = (mca_common_sm_seg_header_t *) - mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - if (MAP_FAILED == seg) { - orte_show_help("help-mpi-common-sm.txt", "sys call fail", 1, - orte_process_info.nodename, - "mmap(2)", "", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - strerror(errno), errno); - return NULL; - } - - /* set up the map object */ - map = OBJ_NEW(mca_common_sm_module_mmap_t); - strncpy(map->super.module_seg_path, file_name, OPAL_PATH_MAX - 1); - /* the first entry in the file is the control structure. The first - entry in the control structure is an mca_common_sm_seg_header_t - element */ - map->super.module_seg = seg; - - addr = ((unsigned char *)seg) + size_ctl_structure; - /* If we have a data segment (i.e., if 0 != data_seg_alignment), - then make it the first aligned address after the control - structure. IF THIS HAPPENS, THIS IS A PROGRAMMING ERROR IN - OPEN MPI!*/ - if (0 != data_seg_alignment) { - addr = OPAL_ALIGN_PTR(addr, data_seg_alignment, unsigned char*); - - /* is addr past end of file ? */ - if((unsigned char*)seg + size < addr) { - orte_show_help("help-mpi-common-sm.txt", "mmap too small", 1, - orte_process_info.nodename, - (unsigned long) size, - (unsigned long) size_ctl_structure, - (unsigned long) data_seg_alignment); - return NULL; - } - } - map->super.module_data_addr = addr; - map->super.module_seg_addr = (unsigned char *)seg; - map->super.module_size = size; - - /* map object successful initialized - we can safely increment seg_att */ - opal_atomic_wmb(); - opal_atomic_add_32(&map->super.module_seg->seg_att, 1); - - return map; -} - -/******************************************************************************/ -/** - * mca_common_sm_mmap_component_query - */ -int -mca_common_sm_mmap_component_query(void) -{ - return OMPI_SUCCESS; -} - -mca_common_sm_module_t * -mca_common_sm_mmap_init(ompi_proc_t **sorted_procs, - size_t num_loc_procs, - size_t size, char *file_name, - size_t size_ctl_structure, - size_t data_seg_alignment) -{ - int fd = -1; - bool lowest; - mca_common_sm_module_mmap_t *map = NULL; - size_t mem_offset; - int num_local_procs; - - lowest = (0 == orte_util_compare_name_fields( - ORTE_NS_CMP_ALL, - ORTE_PROC_MY_NAME, - &(sorted_procs[0]->proc_name))); - - /* using sm_info.id as an initialization marker: - * o 0 -> not initialized; 1 -> initialized - */ - sm_info.id = 0; - memset(sm_info.posix_fname_buff, '\0', OMPI_COMMON_SM_POSIX_FILE_LEN_MAX); - /** - * remember that this function was passed - * a sorted procs array and a local proc count. - */ - num_local_procs = num_loc_procs; - - /* Lock here to prevent multiple threads from invoking this - function simultaneously. The critical section we're protecting - is usage of the RML in this block. */ - opal_mutex_lock(&mutex); - - if (!pending_rml_msgs_init) - { - OBJ_CONSTRUCT(&(pending_rml_msgs), opal_list_t); - pending_rml_msgs_init = true; - } - - /* Figure out if I am the lowest rank in the group. If so, I will - create the shared file. */ - if (lowest) { - /* check, whether the specified filename is on a network file system */ - if (opal_path_nfs(file_name)) { - orte_show_help("help-mpi-common-sm.txt", "mmap on nfs", 1, - orte_process_info.nodename, file_name); - } - /* process initializing the file */ - fd = open(file_name, O_CREAT|O_RDWR, 0600); - if (fd < 0) { - int err = errno; - orte_show_help("help-mpi-common-sm.txt", "sys call fail", 1, - orte_process_info.nodename, - "open(2)", file_name, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - strerror(err), err); - } else if (ftruncate(fd, size) != 0) { - int err = errno; - orte_show_help("help-mpi-common-sm.txt", "sys call fail", 1, - orte_process_info.nodename, - "ftruncate(2)", "", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - strerror(err), err); - close(fd); - unlink(file_name); - fd = -1; - } else { - map = create_map(fd, size, file_name, size_ctl_structure, - data_seg_alignment); - if (map != NULL) { - sm_info.id = 1; - - /* initialize the segment - only the first process - to open the file */ - mem_offset = - map->super.module_data_addr - - (unsigned char *)map->super.module_seg; - map->super.module_seg->seg_offset = mem_offset; - map->super.module_seg->seg_size = size - mem_offset; - opal_atomic_unlock(&map->super.module_seg->seg_lock); - map->super.module_seg->seg_inited = 0; - } else { - close(fd); - unlink(file_name); - fd = -1; - } - } - } - - /* Signal the rest of the local procs that the backing file - has been created. */ - if (OMPI_SUCCESS != mca_common_sm_rml_info_bcast( - &sm_info, - sorted_procs, - num_local_procs, - OMPI_RML_TAG_SM_BACK_FILE_CREATED, - lowest, - file_name, - &(pending_rml_msgs))) { - goto out; - } - - if (lowest) - { - if (1 == sm_info.id) - { - /* wait until all other local procs have reported in */ - while (num_local_procs > map->super.module_seg->seg_att) - { - opal_atomic_rmb(); - } - /** - * all other local procs reported in, so it's safe to unlink - */ - unlink(file_name); - } - } else { - /* check to see if file initialized correctly */ - if (sm_info.id != 0) { - fd = open(file_name, O_RDWR, 0600); - - if (fd != -1) { - map = create_map(fd, size, file_name, size_ctl_structure, - data_seg_alignment); - } - } - } - -out: - opal_mutex_unlock(&mutex); - - if (fd != -1) { - close(fd); - } - - return &(map->super); -} - -int -mca_common_sm_mmap_fini(mca_common_sm_module_t *mca_common_sm_module) -{ - mca_common_sm_module_mmap_t *mmap_module = - (mca_common_sm_module_mmap_t *)mca_common_sm_module; - int rc = OMPI_SUCCESS; - - if( NULL != mmap_module->super.module_seg ) { - rc = munmap((void*) mmap_module->super.module_seg_addr, - mmap_module->super.module_size); - mmap_module->super.module_seg_addr = NULL; - mmap_module->super.module_size = 0; - } - return rc; -} - -/** - * allocate memory from a previously allocated shared memory - * block. - * - * @param size size of request, in bytes (IN) - * - * @retval addr virtual address - */ - -void * -mca_common_sm_mmap_seg_alloc(struct mca_mpool_base_module_t* mpool, - size_t* size, - mca_mpool_base_registration_t** registration) -{ - mca_mpool_sm_module_t *sm_module = (mca_mpool_sm_module_t*) mpool; - mca_common_sm_module_mmap_t *map = - (mca_common_sm_module_mmap_t *)sm_module->sm_common_module; - mca_common_sm_seg_header_t* seg = map->super.module_seg; - void* addr; - - opal_atomic_lock(&seg->seg_lock); - if(seg->seg_offset + *size > seg->seg_size) { - addr = NULL; - } else { - size_t fixup; - - /* add base address to segment offset */ - addr = map->super.module_data_addr + seg->seg_offset; - seg->seg_offset += *size; - - /* fix up seg_offset so next allocation is aligned on a - sizeof(long) boundry. Do it here so that we don't have to - check before checking remaining size in buffer */ - if ((fixup = (seg->seg_offset & (sizeof(long) - 1))) > 0) { - seg->seg_offset += sizeof(long) - fixup; - } - } - if (NULL != registration) { - *registration = NULL; - } - opal_atomic_unlock(&seg->seg_lock); - return addr; -} - diff --git a/ompi/mca/common/sm/common_sm_mmap.h b/ompi/mca/common/sm/common_sm_mmap.h deleted file mode 100644 index 4475d773a8..0000000000 --- a/ompi/mca/common/sm/common_sm_mmap.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010 Los Alamos National Security, LLC. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef _COMMON_SM_MMAP_H_ -#define _COMMON_SM_MMAP_H_ - -#include "ompi_config.h" - -#include "opal/class/opal_object.h" -#include "opal/class/opal_list.h" -#include "opal/sys/atomic.h" -#include "ompi/mca/mpool/mpool.h" -#include "ompi/proc/proc.h" -#include "ompi/mca/common/sm/common_sm.h" - -BEGIN_C_DECLS - -struct mca_mpool_base_module_t; - -typedef struct mca_common_sm_module_mmap_t -{ - mca_common_sm_module_t super; -} mca_common_sm_module_mmap_t; - -OBJ_CLASS_DECLARATION(mca_common_sm_module_mmap_t); - -/** - * This routine is used to set up a shared memory file, backed - * by a specified file. It is assumed that the file does not - * exist before any of the current set of processes try and open - * it. - * - * @param sorted_procs - array of (ompi_proc_t *)'s to create this shared memory - * segment for. this routine, unlike the top-level - * mca_common_sm_init routine, assumes that sorted_procs - * is in the following state: all the local procs at the - * beginning; sorted_procs[0] is the lowest named process. - * (IN) - * - * @param num_loc_procs - number of local procs contained within sorted_procs - * (IN) - * - * @param size - size of the file, in bytes (IN) - * - * @param file_name name of file to be opened. (IN) - * - * @param size_ctl_structure size of the control structure at - * the head of the file. The control structure - * is assumed to have mca_common_sm_seg_header_t - * as its first segment (IN) - * - * @param data_set_alignment alignment of the data segment. this - * follows the control structure. If this - * value if 0, then assume that there will - * be no data segment following the control - * structure. (IN) - * - * @return value pointer to control structure at head of file. - */ -OMPI_DECLSPEC extern mca_common_sm_module_t * -mca_common_sm_mmap_init(ompi_proc_t **sorted_procs, - size_t num_loc_procs, - size_t size, - char *file_name, - size_t size_ctl_structure, - size_t data_seg_alignment); - -/* - * Callback from the sm mpool - */ -OMPI_DECLSPEC extern void * -mca_common_sm_mmap_seg_alloc(struct mca_mpool_base_module_t *mpool, - size_t *size, - mca_mpool_base_registration_t **registration); - -/** - * This function will release all local resources attached to the - * mmapped file. We assume that the operating system will destroy the - * file when the last process release it. - * - * @param sm_mmap - the control structure at head of file. - * - * @returnvalue 0 if everything was OK, otherwise a negative value. - */ - -OMPI_DECLSPEC extern int -mca_common_sm_mmap_fini(mca_common_sm_module_t *mca_common_sm_module); - -/** - * component query routine - */ -OMPI_DECLSPEC extern int -mca_common_sm_mmap_component_query(void); - -END_C_DECLS - -#endif - diff --git a/ompi/mca/common/sm/common_sm_posix.c b/ompi/mca/common/sm/common_sm_posix.c deleted file mode 100644 index f0a122b835..0000000000 --- a/ompi/mca/common/sm/common_sm_posix.c +++ /dev/null @@ -1,504 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010 Los Alamos National Security, LLC. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_STRING_H -#include -#endif /* HAVE_STRING_H */ -#ifdef HAVE_FCNTL_H -#include -#endif /* HAVE_FCNTL_H */ -#ifdef HAVE_TIME_H -#include -#endif /* HAVE_TIME_H */ -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_SYS_STAT_H -#include -#endif /* HAVE_SYS_STAT_H */ -#ifdef HAVE_SYS_MMAN_H -#include -#endif - -#include "opal/util/output.h" -#include "opal/util/path.h" -#include "opal/align.h" -#include "opal/threads/mutex.h" -#include "opal/util/opal_sos.h" - -#include "orte/mca/rml/rml.h" -#include "orte/util/name_fns.h" -#include "orte/util/show_help.h" -#include "orte/runtime/orte_globals.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "ompi/constants.h" -#include "ompi/proc/proc.h" -#include "ompi/mca/dpm/dpm.h" -#include "ompi/mca/mpool/sm/mpool_sm.h" - -#include "common_sm_rml.h" -#include "common_sm_posix.h" - -/* max number of attempts to find an available - * shm_open file name. see comments below. - */ -#define OMPI_COMMON_SM_POSIX_MAX_ATTEMPTS 64 -/* need the / for Solaris 10 and others, i'm sure */ -#define OMPI_COMMON_SM_POSIX_FILE_NAME_PREFIX "/open_mpi." - -OBJ_CLASS_INSTANCE( - mca_common_sm_module_posix_t, - opal_object_t, - NULL, - NULL -); - -/** - * lock to protect multiple instances of posix_init() from being - * invoked simultaneously (because of rml usage). - */ -static opal_mutex_t mutex; - -/** - * shared memory information used for initialization and setup. - */ -static mca_common_sm_rml_sm_info_t sm_info; - -/** - * list of RML messages that have arrived that have not yet been - * consumed by the thread who is looking to complete its component - * initialization based on the contents of the RML message. - */ -static opal_list_t pending_rml_msgs; -static bool pending_rml_msgs_init = false; - -/******************************************************************************/ -/** - * this routine searches for an available shm_open file name. - * - * @return if successful, a non-negative file descriptor is returned and - * posix_file_name_buff will contain the file name associated with the - * successful shm_open. otherwise, -1 is returned and the contents of - * posix_file_name_buff is undefined. - */ -static int -posix_shm_open(char *posix_file_name_buff, size_t size) -{ - int attempt = 0; - int fd = -1; - /* format: /open_mpi.nnnn - * see comment in common_sm.h that explains - * why we chose to do things this way. - */ - snprintf(posix_file_name_buff, - size, - "%s%04d", - OMPI_COMMON_SM_POSIX_FILE_NAME_PREFIX, - attempt++); - /** - * workaround for simultaneous posix shm_opens on the same node (e.g. - * multiple Open MPI jobs sharing a node). name collision during - * component runtime will happen, so protect against it. - */ - while (attempt < OMPI_COMMON_SM_POSIX_MAX_ATTEMPTS) - { - /* the check for the existence of the object and its - * creation if it does not exist are performed atomically. - */ - if ((fd = shm_open(posix_file_name_buff, - O_CREAT | O_EXCL | O_RDWR, - 0600)) < 0) - { - int err = errno; - if (EEXIST == err) - { - /* try again with a different name */ - snprintf(posix_file_name_buff, - size, - "%s%04d", - OMPI_COMMON_SM_POSIX_FILE_NAME_PREFIX, - attempt++); - continue; - } - else /* a "real" error occurred, notify the user and return -1 */ - { - orte_show_help("help-mpi-common-sm.txt", - "sys call fail", - 1, - orte_process_info.nodename, - "shm_open(2)", - posix_file_name_buff, - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - strerror(err), - err); - fd = -1; - break; - } - } - else /* success! */ - { - break; - } - } - if (OMPI_COMMON_SM_POSIX_MAX_ATTEMPTS <= attempt) - { - MCA_COMMON_SM_OUTPUT_VERBOSE("max attempts exceeded: could not find an " - "available posix shared object file name"); - } - return fd; -} - -/******************************************************************************/ -static mca_common_sm_module_posix_t * -create_map(int fd, - size_t size, - size_t size_ctl_structure, - size_t data_seg_alignment) -{ - unsigned char *addr = NULL; - mca_common_sm_module_posix_t *map = NULL; - mca_common_sm_seg_header_t *seg = NULL; - - /* map the file and initialize segment state */ - if (MAP_FAILED == (seg = (mca_common_sm_seg_header_t *) - mmap(NULL, - size, - PROT_READ | PROT_WRITE, - MAP_SHARED, - fd, - 0))) - { - int err = errno; - orte_show_help("help-mpi-common-sm.txt", - "sys call fail", - 1, - orte_process_info.nodename, - "mmap(2)", - "", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - strerror(err), - err); - return NULL; - } - - /* set up the map object */ - map = OBJ_NEW(mca_common_sm_module_posix_t); - - /** - * the first entry in the file is the control structure. the first entry - * in the control structure is an mca_common_sm_seg_header_t element - */ - map->super.module_seg = seg; - - addr = ((unsigned char *)seg) + size_ctl_structure; - /** - * if we have a data segment (i.e., if 0 != data_seg_alignment), - * then make it the first aligned address after the control - * structure. if this happens, this is a programming error in - * Open MPI! - */ - if (0 != data_seg_alignment) - { - addr = OPAL_ALIGN_PTR(addr, data_seg_alignment, unsigned char *); - - /* is addr past end of shared memory object ? */ - if ((unsigned char *)seg + size < addr) - { - orte_show_help("help-mpi-common-sm.txt", - "mmap too small", - 1, - orte_process_info.nodename, - (unsigned long)size, - (unsigned long)size_ctl_structure, - (unsigned long)data_seg_alignment); - return NULL; - } - } - - map->super.module_data_addr = addr; - map->super.module_seg_addr = (unsigned char *)seg; - map->super.module_size = size; - - /* map object successful initialized - we can safely increment seg_att */ - opal_atomic_wmb(); - opal_atomic_add_32(&map->super.module_seg->seg_att, 1); - - return map; -} - -/******************************************************************************/ -/** - * this routine performs the posix sm component run-time test. - * - * @return OMPI_SUCCESS if posix sm can be used, OMPI_ERR_NOT_SUPPORTED - * otherwise. - */ -int -mca_common_sm_posix_component_query(void) -{ - int rc = OMPI_SUCCESS; - int fd = -1; - - if (-1 == (fd = posix_shm_open(sm_info.posix_fname_buff, - (OMPI_COMMON_SM_POSIX_FILE_LEN_MAX - 1)))) - { - rc = OMPI_ERR_NOT_SUPPORTED; - } - - if (-1 != fd) - { - shm_unlink(sm_info.posix_fname_buff); - } - return rc; -} - -/******************************************************************************/ -/** - * this routine assumes that sorted_procs is in the following state: - * o all the local procs at the beginning. - * o sorted_procs[0] is the lowest named process. - */ -mca_common_sm_module_t * -mca_common_sm_posix_init(ompi_proc_t **sorted_procs, - size_t num_local_procs, - size_t size, - char *file_name, - size_t size_ctl_structure, - size_t data_seg_alignment) -{ - int fd = -1; - mca_common_sm_module_posix_t *map = NULL; - bool lowest; - size_t mem_offset; - int n_local_procs; - - lowest = (0 == orte_util_compare_name_fields( - ORTE_NS_CMP_ALL, - ORTE_PROC_MY_NAME, - &(sorted_procs[0]->proc_name))); - - /* using sm_info.id as an initialization marker: - * o 0 -> not initialized; 1 -> initialized - */ - sm_info.id = 0; - memset(sm_info.posix_fname_buff, '\0', OMPI_COMMON_SM_POSIX_FILE_LEN_MAX); - - /** - * lock here to prevent multiple threads from invoking this function - * simultaneously. the critical section we're protecting is usage of - * the RML in this block. - */ - opal_mutex_lock(&mutex); - - if (!pending_rml_msgs_init) - { - OBJ_CONSTRUCT(&(pending_rml_msgs), opal_list_t); - pending_rml_msgs_init = true; - } - - /** - * figure out if i am the lowest proc in the group. - * if i am, initialize the shared memory object. - */ - if (lowest) - { - /* initialize POSIX shared memory object */ - if (-1 == (fd = posix_shm_open(sm_info.posix_fname_buff, - (OMPI_COMMON_SM_POSIX_FILE_LEN_MAX - 1)))) - { - /* do nothing. if a real error occurred or the file name search - * limit was reached, posix_shm_open will take care of the - * notification part. - */ - ; - } - else if (0 != ftruncate(fd, size)) - { - int err = errno; - orte_show_help("help-mpi-common-sm.txt", - "sys call fail", - 1, - orte_process_info.nodename, - "ftruncate(2)", - "", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - strerror(err), - err); - shm_unlink(sm_info.posix_fname_buff); - } - else - { - map = create_map(fd, - size, - size_ctl_structure, - data_seg_alignment); - if (NULL != map) - { - sm_info.id = 1; - /* initialize the segment */ - mem_offset = - map->super.module_data_addr - - (unsigned char *)map->super.module_seg; - map->super.module_seg->seg_offset = mem_offset; - map->super.module_seg->seg_size = size - mem_offset; -#if 0 /* i don't think this unlock is needed, but it's in mmap's source */ - opal_atomic_unlock(&map->super.module_seg->seg_lock); -#endif - map->super.module_seg->seg_inited = 0; - } - else - { - shm_unlink(sm_info.posix_fname_buff); - } - } - } - - /** - * signal the rest of the local procs that a - * new shared memory object has been created. - */ - if (OMPI_SUCCESS != mca_common_sm_rml_info_bcast( - &sm_info, - sorted_procs, - num_local_procs, - OMPI_RML_TAG_SM_BACK_FILE_CREATED, - lowest, - file_name, - &(pending_rml_msgs))) - { - goto out; - } - - if (!lowest) - { - /* make certain that things were initialized correctly */ - if (0 != sm_info.id) - { - if ((fd = shm_open(sm_info.posix_fname_buff, O_RDWR, 0600)) > 0) - { - map = create_map(fd, - size, - size_ctl_structure, - data_seg_alignment); - } - } - } - /* if all things were initialized properly, wait until all other local - * procs have reported in before calling shm_unlink - */ - else - { - if (1 == sm_info.id) - { - n_local_procs = (int)num_local_procs; - while (n_local_procs > map->super.module_seg->seg_att) - { - opal_atomic_rmb(); - } - /* all other local procs reported in, so it's safe to shm_unlink */ - shm_unlink(sm_info.posix_fname_buff); - } - } - -out: - opal_mutex_unlock(&mutex); - - return &(map->super); -} - -/******************************************************************************/ -int -mca_common_sm_posix_fini(mca_common_sm_module_t *mca_common_sm_module) -{ - /* no need for shm_unlink here because it was already taken care of */ - mca_common_sm_module_posix_t *posix_module = - (mca_common_sm_module_posix_t *)mca_common_sm_module; - int rc = OMPI_SUCCESS; - - if (NULL != posix_module->super.module_seg) - { - rc = munmap((void *)posix_module->super.module_seg_addr, - posix_module->super.module_size); - posix_module->super.module_seg_addr = NULL; - posix_module->super.module_size = 0; - } - return rc; -} - -/******************************************************************************/ -/** - * allocate memory from a previously allocated shared memory - * block. - * - * @param size size of request, in bytes (IN) - * - * @retval addr virtual address - */ - -void * -mca_common_sm_posix_seg_alloc(struct mca_mpool_base_module_t* mpool, - size_t* size, - mca_mpool_base_registration_t** registration) -{ - mca_mpool_sm_module_t *sm_module = (mca_mpool_sm_module_t *) mpool; - mca_common_sm_module_posix_t *map = - (mca_common_sm_module_posix_t *)sm_module->sm_common_module; - mca_common_sm_seg_header_t *seg = map->super.module_seg; - void *addr; - - opal_atomic_lock(&seg->seg_lock); - if(seg->seg_offset + *size > seg->seg_size) - { - addr = NULL; - } - else - { - size_t fixup; - - /* add base address to segment offset */ - addr = map->super.module_data_addr + seg->seg_offset; - seg->seg_offset += *size; - - /** - * fix up seg_offset so next allocation is aligned on a - * sizeof(long) boundry. Do it here so that we don't have to - * check before checking remaining size in buffer - */ - if ((fixup = (seg->seg_offset & (sizeof(long) - 1))) > 0) - { - seg->seg_offset += sizeof(long) - fixup; - } - } - if (NULL != registration) - { - *registration = NULL; - } - opal_atomic_unlock(&seg->seg_lock); - return addr; -} - diff --git a/ompi/mca/common/sm/common_sm_posix.h b/ompi/mca/common/sm/common_sm_posix.h deleted file mode 100644 index 022c26e584..0000000000 --- a/ompi/mca/common/sm/common_sm_posix.h +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010 Los Alamos National Security, LLC. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef _COMMON_SM_POSIX_H_ -#define _COMMON_SM_POSIX_H_ - -#include "ompi_config.h" - -#include "opal/class/opal_object.h" -#include "opal/class/opal_list.h" -#include "opal/sys/atomic.h" -#include "ompi/mca/mpool/mpool.h" -#include "ompi/proc/proc.h" -#include "ompi/mca/common/sm/common_sm.h" - -BEGIN_C_DECLS - -struct mca_mpool_base_module_t; - -typedef struct mca_common_sm_module_posix_t -{ - mca_common_sm_module_t super; -} mca_common_sm_module_posix_t; - -OBJ_CLASS_DECLARATION(mca_common_sm_module_posix_t); - -/** - * This routine is used to set up a POSIX shared memory object. - * - * @param sorted_procs - array of (ompi_proc_t *)'s to create this shared memory - * segment for. this routine, unlike the top-level - * mca_common_sm_init routine, assumes that sorted_procs - * is in the following state: all the local procs at the - * beginning; sorted_procs[0] is the lowest named process. - * (IN) - * - * @param num_local_procs - number of local procs contained within - * sorted_procs (IN) - * - * @param size - size of the shared memory segment, in bytes (IN) - * - * @param file_name - strictly used for RML message identification/queueing (IN) - * - * @param size_ctl_structure size of the control structure at - * the head of the file. The control structure - * is assumed to have mca_common_sm_seg_header_t - * as its first segment (IN) - * - * @param data_set_alignment alignment of the data segment. this - * follows the control structure. If this - * value if 0, then assume that there will - * be no data segment following the control - * structure. (IN) - * - * @return value pointer to control structure at head of file. - */ -OMPI_DECLSPEC extern mca_common_sm_module_t * -mca_common_sm_posix_init(ompi_proc_t **sorted_procs, - size_t num_local_procs, - size_t size, - char *file_name, - size_t size_ctl_structure, - size_t data_seg_alignment); - -/** - * Callback from the sm mpool - */ -OMPI_DECLSPEC extern void * -mca_common_sm_posix_seg_alloc(struct mca_mpool_base_module_t *mpool, - size_t *size, - mca_mpool_base_registration_t **registration); - -/** - * This function will release all local resources attached to the - * shared memory segment. - * - * @param mca_common_sm_module - the control structure at head of the segment. - * - * @returnvalue 0 if everything was OK, otherwise a negative value. - */ - -OMPI_DECLSPEC extern int -mca_common_sm_posix_fini(mca_common_sm_module_t *mca_common_sm_module); - -/** - * component query routine - */ - -OMPI_DECLSPEC extern int -mca_common_sm_posix_component_query(void); - -END_C_DECLS - -#endif /* _COMMON_SM_POSIX_H_ */ - diff --git a/ompi/mca/common/sm/common_sm_rml.c b/ompi/mca/common/sm/common_sm_rml.c index 0c34a52df0..7ad5eb1101 100644 --- a/ompi/mca/common/sm/common_sm_rml.c +++ b/ompi/mca/common/sm/common_sm_rml.c @@ -43,53 +43,45 @@ OBJ_CLASS_INSTANCE( NULL ); -/******************************************************************************/ +/* ////////////////////////////////////////////////////////////////////////// */ /** * this routine assumes that sorted_procs is in the following state: * o all the local procs at the beginning. * o sorted_procs[0] is the lowest named process. */ int -mca_common_sm_rml_info_bcast(mca_common_sm_rml_sm_info_t *sm_info, +mca_common_sm_rml_info_bcast(opal_shmem_ds_t *ds_buf, ompi_proc_t **procs, size_t num_procs, int tag, bool bcast_root, - const char *file_name, + char *msg_id_str, opal_list_t *pending_rml_msgs) { int rc = OMPI_SUCCESS; struct iovec iov[MCA_COMMON_SM_RML_MSG_LEN]; int iovrc; size_t p; - char filename_to_send[OPAL_PATH_MAX]; + char msg_id_str_to_tx[OPAL_PATH_MAX]; - strncpy(filename_to_send, file_name, sizeof(filename_to_send) - 1); + strncpy(msg_id_str_to_tx, msg_id_str, sizeof(msg_id_str_to_tx) - 1); /* let the first item be the queueing id name */ - iov[0].iov_base = (ompi_iov_base_ptr_t)filename_to_send; - iov[0].iov_len = sizeof(filename_to_send); - iov[1].iov_base = (ompi_iov_base_ptr_t)sm_info; - iov[1].iov_len = sizeof(mca_common_sm_rml_sm_info_t); + iov[0].iov_base = (ompi_iov_base_ptr_t)msg_id_str_to_tx; + iov[0].iov_len = sizeof(msg_id_str_to_tx); + iov[1].iov_base = (ompi_iov_base_ptr_t)ds_buf; + iov[1].iov_len = sizeof(opal_shmem_ds_t); - /** - * figure out if i am the root proc in the group. + /* figure out if i am the root proc in the group. * if i am, bcast the message the rest of the local procs. */ - if (bcast_root) - { + if (bcast_root) { opal_progress_event_users_increment(); /* first num_procs items should be local procs */ - for (p = 1; p < num_procs; ++p) - { - iovrc = orte_rml.send(&(procs[p]->proc_name), - iov, - MCA_COMMON_SM_RML_MSG_LEN, - tag, - 0); - if ((ssize_t)(iov[0].iov_len + - iov[1].iov_len) > iovrc) - { + for (p = 1; p < num_procs; ++p) { + iovrc = orte_rml.send(&(procs[p]->proc_name), iov, + MCA_COMMON_SM_RML_MSG_LEN, tag, 0); + if ((ssize_t)(iov[0].iov_len + iov[1].iov_len) > iovrc) { ORTE_ERROR_LOG(OMPI_ERR_COMM_FAILURE); opal_progress_event_users_decrement(); rc = OMPI_ERROR; @@ -98,60 +90,45 @@ mca_common_sm_rml_info_bcast(mca_common_sm_rml_sm_info_t *sm_info, } opal_progress_event_users_decrement(); } - else /* i am NOT the root ("lowest") proc */ - { + else { /* i am NOT the root ("lowest") proc */ opal_list_item_t *item; mca_common_sm_rml_pending_rml_msg_types_t *rml_msg; - /** - * because a component query can be performed simultaneously in multiple + /* because a component query can be performed simultaneously in multiple * threads, the RML messages may arrive in any order. so first check to * see if we previously received a message for me. */ for (item = opal_list_get_first(pending_rml_msgs); opal_list_get_end(pending_rml_msgs) != item; - item = opal_list_get_next(item)) - { + item = opal_list_get_next(item)) { rml_msg = (mca_common_sm_rml_pending_rml_msg_types_t *)item; /* was the message for me? */ - if (0 == strcmp(rml_msg->rml_file_name, file_name)) - { + if (0 == strcmp(rml_msg->msg_id_str, msg_id_str)) { opal_list_remove_item(pending_rml_msgs, item); - memcpy(sm_info->posix_fname_buff, rml_msg->posix_fname_buff, - OMPI_COMMON_SM_POSIX_FILE_LEN_MAX); - sm_info->id = rml_msg->id; + /* from ==============> to */ + opal_shmem_ds_copy(&rml_msg->shmem_ds, ds_buf); OBJ_RELEASE(item); break; } } - /** - * if we didn't find a message already waiting, block on - * receiving from the RML. + /* if we didn't find a message already waiting, block on receiving from + * the RML. */ - if (opal_list_get_end(pending_rml_msgs) == item) - { - do - { - /** - * bump up the libevent polling frequency while we're - * in this RML recv, just to ensure we're checking - * libevent frequently. + if (opal_list_get_end(pending_rml_msgs) == item) { + do { + /* bump up the libevent polling frequency while we're in this + * RML recv, just to ensure we're checking libevent frequently. */ opal_progress_event_users_increment(); - iovrc = orte_rml.recv(&(procs[0]->proc_name), - iov, - MCA_COMMON_SM_RML_MSG_LEN, - tag, - 0); + iovrc = orte_rml.recv(&(procs[0]->proc_name), iov, + MCA_COMMON_SM_RML_MSG_LEN, tag, 0); opal_progress_event_users_decrement(); - if (iovrc < 0) - { + if (iovrc < 0) { ORTE_ERROR_LOG(OMPI_ERR_RECV_LESS_THAN_POSTED); rc = OMPI_ERROR; goto out; } /* was the message for me? if so, we're done */ - if (0 == strcmp(filename_to_send, file_name)) - { + if (0 == strcmp(msg_id_str_to_tx, msg_id_str)) { break; } /* if not, put it on the pending list and try again */ @@ -162,16 +139,10 @@ mca_common_sm_rml_info_bcast(mca_common_sm_rml_sm_info_t *sm_info, rc = OMPI_ERROR; goto out; } - /* safe because sizeof(rml_msg->file_name) == - * sizeof(filename_to_send), same same goes for - * rml_msg->posix_fname_buff and sm_info->posix_fname_buff */ - memcpy(rml_msg->rml_file_name, - filename_to_send, - OPAL_PATH_MAX); - memcpy(rml_msg->posix_fname_buff, - sm_info->posix_fname_buff, - OMPI_COMMON_SM_POSIX_FILE_LEN_MAX); - rml_msg->id = sm_info->id; + /* not for me, so place on list */ + /* from ========> to */ + opal_shmem_ds_copy(ds_buf, &rml_msg->shmem_ds); + memcpy(rml_msg->msg_id_str, msg_id_str_to_tx, OPAL_PATH_MAX); opal_list_append(pending_rml_msgs, &(rml_msg->super)); } while(1); } diff --git a/ompi/mca/common/sm/common_sm_rml.h b/ompi/mca/common/sm/common_sm_rml.h index 6939cd4180..79f02f1b1b 100644 --- a/ompi/mca/common/sm/common_sm_rml.h +++ b/ompi/mca/common/sm/common_sm_rml.h @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010 Los Alamos National Security, LLC. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. * All rights reserved. * $COPYRIGHT$ * @@ -27,44 +27,36 @@ #include "opal/mca/mca.h" #include "opal/class/opal_object.h" #include "opal/class/opal_list.h" +#include "opal/mca/shmem/base/base.h" +#include "opal/mca/shmem/shmem.h" + #include "ompi/proc/proc.h" #include "ompi/mca/common/sm/common_sm.h" -#define MCA_COMMON_SM_RML_MSG_LEN 2 +#define MCA_COMMON_SM_RML_MSG_LEN 2 BEGIN_C_DECLS -/** - * shared memory information used for initialization and setup. - */ -typedef struct mca_common_sm_rml_sm_info_t -{ - char posix_fname_buff[OMPI_COMMON_SM_POSIX_FILE_LEN_MAX]; - int id; -} mca_common_sm_rml_sm_info_t; - /** * items on the pending_rml_msgs list */ -typedef struct mca_common_sm_rml_pending_rml_msg_types_t -{ +typedef struct mca_common_sm_rml_pending_rml_msg_types_t { opal_list_item_t super; - char rml_file_name[OPAL_PATH_MAX]; - char posix_fname_buff[OMPI_COMMON_SM_POSIX_FILE_LEN_MAX]; - int id; + char msg_id_str[OPAL_PATH_MAX]; + opal_shmem_ds_t shmem_ds; } mca_common_sm_rml_pending_rml_msg_types_t; /** - * routine used to broadcast common sm initialization information to all local + * routine used to send common sm initialization information to all local * processes in procs. */ OMPI_DECLSPEC extern int -mca_common_sm_rml_info_bcast(mca_common_sm_rml_sm_info_t *sm_info, +mca_common_sm_rml_info_bcast(opal_shmem_ds_t *ds_buf, ompi_proc_t **procs, size_t num_procs, int tag, bool bcast_root, - const char *file_name, + char *msg_id_str, opal_list_t *pending_rml_msgs); END_C_DECLS diff --git a/ompi/mca/common/sm/common_sm_sysv.c b/ompi/mca/common/sm/common_sm_sysv.c deleted file mode 100644 index 58a396ab6c..0000000000 --- a/ompi/mca/common/sm/common_sm_sysv.c +++ /dev/null @@ -1,464 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010 Los Alamos National Security, LLC. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_STRING_H -#include -#endif /* HAVE_STRING_H */ -#ifdef HAVE_FCNTL_H -#include -#endif /* HAVE_FCNTL_H */ -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_SYS_STAT_H -#include -#endif /* HAVE_SYS_STAT_H */ -#if MCA_COMMON_SM_SYSV -#include -#include -#endif /* MCA_COMMON_SM_SYSV */ - -#include "opal/util/output.h" -#include "opal/util/path.h" -#include "opal/align.h" -#include "opal/threads/mutex.h" -#include "opal/util/opal_sos.h" - -#include "orte/mca/rml/rml.h" -#include "orte/util/name_fns.h" -#include "orte/util/show_help.h" -#include "orte/runtime/orte_globals.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "ompi/constants.h" -#include "ompi/proc/proc.h" -#include "ompi/mca/dpm/dpm.h" -#include "ompi/mca/mpool/sm/mpool_sm.h" - -#include "common_sm_rml.h" -#include "common_sm_sysv.h" - -OBJ_CLASS_INSTANCE( - mca_common_sm_module_sysv_t, - opal_object_t, - NULL, - NULL -); - -/** - * lock to protect multiple instances of sysv_init() from - * being invoked simultaneously (because of RML usage). - */ -static opal_mutex_t mutex; - -/** - * shared memory information used for initialization and setup. - */ -static mca_common_sm_rml_sm_info_t sm_info; - -/** - * list of RML messages that have arrived that have not yet been - * consumed by the thread who is looking to complete its component - * initialization based on the contents of the RML message. - */ -static opal_list_t pending_rml_msgs; -static bool pending_rml_msgs_init = false; - -/******************************************************************************/ -static mca_common_sm_module_sysv_t * -create_shmem_seg(int shmid, - int is_root, - size_t size, - size_t size_ctl_structure, - size_t data_seg_alignment) -{ - unsigned char *addr = NULL; - mca_common_sm_module_sysv_t *map; - mca_common_sm_seg_header_t *seg; - - /* attach to the shared memory segment */ - if ((mca_common_sm_seg_header_t *)-1 == - (seg = (mca_common_sm_seg_header_t *)shmat(shmid, NULL, 0))) - { - int err = errno; - /** - * something really bad happened. - */ - orte_show_help("help-mpi-common-sm.txt", - "sys call fail", - 1, - orte_process_info.nodename, - "shmat(2)", - "", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - strerror(err), - err); - return NULL; - } - /** - * only the the root will set IPC_RMID - */ - if (is_root) - { - /** - * mark the segment for destruction immediately after shmat. our hope - * is that the segment will only actually be destroyed after the last - * process detaches from it (i.e., when the shm_nattch member of the - * associated structure shmid_ds is zero). if we are here, we should - * be okay - our run-time test reported adequate system support. - */ - if (-1 == shmctl(shmid, IPC_RMID, NULL)) - { - int err = errno; - orte_show_help("help-mpi-common-sm.txt", - "sys call fail", - 1, - orte_process_info.nodename, - "shmctl(2)", - "", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - strerror(err), - err); - shmdt(seg); - return NULL; - } - } - - /** - * if we are here, shmctl(shmid, IPC_RMID, NULL) was successful, so we - * don't have to worry about segment cleanup - the OS -should- take care - * of it - happy days... - */ - - /* set up the map object */ - map = OBJ_NEW(mca_common_sm_module_sysv_t); - /** - * the first entry in the file is the control structure. The first - * entry in the control structure is an mca_common_sm_seg_header_t - * element - */ - map->super.module_seg = seg; - - addr = ((unsigned char *)seg) + size_ctl_structure; - /** - * if we have a data segment (i.e., if 0 != data_seg_alignment), - * then make it the first aligned address after the control - * structure. IF THIS HAPPENS, THIS IS A PROGRAMMING ERROR IN - * OPEN MPI! - */ - if (0 != data_seg_alignment) - { - addr = OPAL_ALIGN_PTR(addr, data_seg_alignment, unsigned char *); - - /* is addr past the end of the shared memory segment ? */ - if ((unsigned char *)seg + size < addr) - { - orte_show_help("help-mpi-common-sm.txt", - "mmap too small", - 1, - orte_process_info.nodename, - (unsigned long)size, - (unsigned long)size_ctl_structure, - (unsigned long)data_seg_alignment); - return NULL; - } - } - - map->super.module_data_addr = addr; - map->super.module_seg_addr = (unsigned char *)seg; - map->super.module_size = size; - - return map; -} - -/******************************************************************************/ -/** - * mca_common_sm_sysv_component_query - * the sysv run-time test - */ -int -mca_common_sm_sysv_component_query(void) -{ - char c = 'j'; - int shmid = -1; - int rc = OMPI_ERR_NOT_SUPPORTED; - char *a = NULL; - char *addr = (char *)-1; - struct shmid_ds tmp_buff; - - if (-1 == (shmid = shmget(IPC_PRIVATE, - (size_t)(getpagesize()), - IPC_CREAT | IPC_EXCL | SHM_R | SHM_W))) - { - goto out; - } - else if ((char *)-1 == (addr = (char *)shmat(shmid, NULL, 0))) - { - goto out; - } - - /* protect against lazy establishment - may not be needed, but can't hurt */ - a = addr; - *a = c; - - if (-1 == shmctl(shmid, IPC_RMID, NULL)) - { - goto out; - } - else if (-1 == shmctl(shmid, IPC_STAT, &tmp_buff)) - { - goto out; - } - else /* all is well - rainbows and butterflies */ - { - rc = OMPI_SUCCESS; - } - -out: - if ((char *)-1 != addr) - { - shmdt(addr); - } - return rc; -} - -/******************************************************************************/ -/** - * mca_common_sm_sysv_init - */ -mca_common_sm_module_t * -mca_common_sm_sysv_init(ompi_proc_t **sorted_procs, - size_t num_local_procs, - size_t size, - char *file_name, - size_t size_ctl_structure, - size_t data_seg_alignment) -{ - mca_common_sm_module_sysv_t *map = NULL; - bool lowest; - size_t mem_offset; - - sm_info.id = -1; - memset(sm_info.posix_fname_buff, '\0', OMPI_COMMON_SM_POSIX_FILE_LEN_MAX); - - lowest = (0 == orte_util_compare_name_fields( - ORTE_NS_CMP_ALL, - ORTE_PROC_MY_NAME, - &(sorted_procs[0]->proc_name))); - - /** - * lock here to prevent multiple threads from invoking this function - * simultaneously. the critical section we're protecting is usage of - * the RML in this block. - */ - opal_mutex_lock(&mutex); - - if (!pending_rml_msgs_init) - { - OBJ_CONSTRUCT(&(pending_rml_msgs), opal_list_t); - pending_rml_msgs_init = true; - } - - /** - * figure out if i am the lowest proc in the group (aka "the root"). - * if i am, initialize the shared memory segment. - */ - if (lowest) - { - /* create a new shared memory segment and save the shmid. */ - if (-1 == (sm_info.id = shmget(IPC_PRIVATE, - size, - IPC_CREAT | IPC_EXCL | SHM_R | SHM_W))) - { - /** - * if we are here, a few of things could have happened: - * o the system's shmmax limit is lower than the requested - * segment size. the user can either up shmmax or set - * mpool_sm_min_size to a value less than the system's current - * shmmax limit. - * o something else i don't know about ... - */ - int err = errno; - orte_show_help("help-mpi-common-sm.txt", - "shmget call fail", - 1, - orte_process_info.nodename, - "shmget(2)", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - strerror(err), - err, - size); - } - else - { - map = create_shmem_seg(sm_info.id, - 1, /* i am the root */ - size, - size_ctl_structure, - data_seg_alignment); - if (NULL != map) - { - /* initialize the segment */ - mem_offset = - map->super.module_data_addr - - (unsigned char *)map->super.module_seg; - map->super.module_seg->seg_offset = mem_offset; - map->super.module_seg->seg_size = size - mem_offset; - map->super.module_seg->seg_inited = 0; -#if 0 /* i don't think this unlock is needed, but it's in mmap's source */ - opal_atomic_unlock(&map->super.module_seg->seg_lock); -#endif - } - else - { - /** - * best effort to delete the segment. - * may not be needed, but can't hurt. - */ - shmctl(sm_info.id, IPC_RMID, NULL); - /** - * setting shmid to -1 here will tell - * the other procs that we failed. - */ - sm_info.id = -1; - } - } - } - - /** - * signal the rest of the local procs that a - * new shared memory object has been created. - */ - if (OMPI_SUCCESS != mca_common_sm_rml_info_bcast( - &sm_info, - sorted_procs, - num_local_procs, - OMPI_RML_TAG_SM_BACK_FILE_CREATED, - lowest, - file_name, - &(pending_rml_msgs))) - { - goto out; - } - /* did the root setup the shmid correctly? if so, attach to it */ - if (!lowest && -1 != sm_info.id) - { - /* no return value check here because the error - * path is the same as the expected path */ - map = create_shmem_seg(sm_info.id, - 0, /* i am NOT the root */ - size, - size_ctl_structure, - data_seg_alignment); - } - -out: - opal_mutex_unlock(&mutex); - - return &(map->super); -} - -/******************************************************************************/ -/** - * sys v module finalization routine. - */ -int -mca_common_sm_sysv_fini(mca_common_sm_module_t *mca_common_sm_module) -{ - int rc = OMPI_SUCCESS; - mca_common_sm_module_sysv_t *sysv_module = - (mca_common_sm_module_sysv_t *)mca_common_sm_module; - - /** - * no need to shmctl to remove the segment, because we set - * IPC_RMID on the segment, meaning that when everyone detaches, - * the OS will automatically delete it. - */ - if (NULL != sysv_module->super.module_seg) - { - rc = shmdt(sysv_module->super.module_seg_addr); - sysv_module->super.module_seg_addr = NULL; - sysv_module->super.module_size = 0; - } - return rc; -} - -/******************************************************************************/ -/** - * allocate memory from a previously allocated shared memory block. - * - * @param size size of request, in bytes (IN) - * - * @retval addr virtual address - */ -void * -mca_common_sm_sysv_seg_alloc(struct mca_mpool_base_module_t* mpool, - size_t* size, - mca_mpool_base_registration_t** registration) -{ - mca_mpool_sm_module_t *sm_module = (mca_mpool_sm_module_t*)mpool; - - mca_common_sm_module_sysv_t *map = - (mca_common_sm_module_sysv_t *)sm_module->sm_common_module; - - mca_common_sm_seg_header_t* seg = map->super.module_seg; - - void *addr; - - opal_atomic_lock(&seg->seg_lock); - - if (seg->seg_offset + *size > seg->seg_size) - { - addr = NULL; - } - else - { - size_t fixup; - - /* add base address to segment offset */ - addr = map->super.module_data_addr + seg->seg_offset; - seg->seg_offset += *size; - - /** - * fix up seg_offset so next allocation is aligned on a - * sizeof(long) boundry. do it here so that we don't have to - * check before checking remaining size in buffer - */ - if (0 < (fixup = (seg->seg_offset & (sizeof(long) - 1)))) - { - seg->seg_offset += sizeof(long) - fixup; - } - } - if (NULL != registration) - { - *registration = NULL; - } - opal_atomic_unlock(&seg->seg_lock); - return addr; -} - diff --git a/ompi/mca/common/sm/common_sm_sysv.h b/ompi/mca/common/sm/common_sm_sysv.h deleted file mode 100644 index e64d4ca114..0000000000 --- a/ompi/mca/common/sm/common_sm_sysv.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010 Los Alamos National Security, LLC. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef _COMMON_SM_SYSV_H_ -#define _COMMON_SM_SYSV_H_ - -#include "ompi_config.h" - -#include "opal/class/opal_object.h" -#include "opal/class/opal_list.h" -#include "opal/sys/atomic.h" -#include "ompi/mca/mpool/mpool.h" -#include "ompi/proc/proc.h" -#include "ompi/group/group.h" -#include "ompi/mca/common/sm/common_sm.h" - -BEGIN_C_DECLS - -struct mca_mpool_base_module_t; - -typedef struct mca_common_sm_module_sysv_t -{ - mca_common_sm_module_t super; -} mca_common_sm_module_sysv_t; - -OBJ_CLASS_DECLARATION(mca_common_sm_module_sysv_t); - -/** - * This routine is used to set up a System V shared memory segment. - * - * @param sorted_procs - array of (ompi_proc_t *)'s to create this shared memory - * segment for. this routine, unlike the top-level - * mca_common_sm_init routine, assumes that sorted_procs - * is in the following state: all the local procs at the - * beginning; sorted_procs[0] is the lowest named process. - * (IN) - * - * @param num_local_procs - number of local procs contained within - * sorted_procs (IN) - * - * @param size - size of the shared memory segment, in bytes (IN) - * - * @param file_name - strictly used for RML message identification/queueing (IN) - * - * @param size_ctl_structure size of the control structure at - * the head of the file. the control structure - * is assumed to have mca_common_sm_seg_header_t - * as its first segment (IN) - * - * @param data_set_alignment alignment of the data segment. This - * follows the control structure. If this - * value if 0, then assume that there will - * be no data segment following the control - * structure. (IN) - * - * @return value pointer to control structure at head of file. - */ -OMPI_DECLSPEC extern mca_common_sm_module_t * -mca_common_sm_sysv_init(ompi_proc_t **sorted_procs, - size_t num_local_procs, - size_t size, - char *file_name, - size_t size_ctl_structure, - size_t data_seg_alignment); - -/** - * Callback from the sm mpool - */ -OMPI_DECLSPEC extern void * -mca_common_sm_sysv_seg_alloc(struct mca_mpool_base_module_t *mpool, - size_t *size, - mca_mpool_base_registration_t **registration); - -/** - * This function will release all local resources attached to the - * shared memory segment. We assume that the operating system will destroy the - * shared memory segment when the last process detaches from it. - * - * It is assumed that the operating system's System V IPC implementation - * supports the following IPC_RMID semantics. - * - * Calling shmctl(shmid, IPC_RMID, ...) will actually destroy the shared memory - * segment *after* the last process detaches from it (i.e., when the shm_nattch - * member of the associated structure shmid_ds is zero). This behavior is - * important because we rely on it to release all allocated shared memory - * segments upon job termination - including abnormal job termination. - * - * @param mca_common_sm_module - the control structure at head of the segment. - * - * @returnvalue 0 if everything was OK, otherwise a negative value. - */ - -OMPI_DECLSPEC extern int -mca_common_sm_sysv_fini(mca_common_sm_module_t *mca_common_sm_module); - -/** - * component query routine - */ - -OMPI_DECLSPEC extern int -mca_common_sm_sysv_component_query(void); - -END_C_DECLS - -#endif /* _COMMON_SM_SYSV_H_ */ - diff --git a/ompi/mca/common/sm/common_sm_windows.c b/ompi/mca/common/sm/common_sm_windows.c deleted file mode 100644 index 31315aabe2..0000000000 --- a/ompi/mca/common/sm/common_sm_windows.c +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010 Los Alamos National Security, LLC. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_STRING_H -#include -#endif /* HAVE_STRING_H */ -#ifdef HAVE_FCNTL_H -#include -#endif /* HAVE_FCNTL_H */ -#ifdef HAVE_TIME_H -#include -#endif /* HAVE_TIME_H */ -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_SYS_STAT_H -#include -#endif /* HAVE_SYS_STAT_H */ -#ifdef HAVE_SYS_MMAN_H -#include -#endif - -#include "opal/util/output.h" -#include "opal/util/path.h" -#include "opal/align.h" -#include "opal/threads/mutex.h" -#include "opal/util/opal_sos.h" - -#include "orte/util/name_fns.h" -#include "orte/util/show_help.h" -#include "orte/runtime/orte_globals.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "ompi/constants.h" -#include "ompi/proc/proc.h" -#include "ompi/mca/dpm/dpm.h" -#include "ompi/mca/mpool/sm/mpool_sm.h" -#include "common_sm_windows.h" - -OBJ_CLASS_INSTANCE( - mca_common_sm_module_windows_t, - opal_object_t, - NULL, - NULL -); - -/******************************************************************************/ -/** - * mca_common_sm_windows_component_query - */ -int -mca_common_sm_windows_component_query(void) -{ - return OMPI_SUCCESS; -} - -mca_common_sm_module_t * -mca_common_sm_windows_init(ompi_proc_t **sorted_procs, - size_t num_local_procs, - size_t size, char *file_name, - size_t size_ctl_structure, - size_t data_seg_alignment) -{ - int fd = -1, return_code = OMPI_SUCCESS; - bool file_previously_opened = false; - mca_common_sm_seg_header_t* seg = NULL; - mca_common_sm_module_windows_t* map = NULL; - unsigned char *addr = NULL; - size_t tmp, mem_offset; - - HANDLE hMapObject = INVALID_HANDLE_VALUE; - LPVOID lpvMem = NULL; - char *temp1, *temp2; - int rc; - - /** - * On Windows the shared file will be created by the OS directly on - * the system ressources. Therefore, no file get involved in the - * operation. However, a unique key should be used as name for the - * shared memory object in order to allow all processes to access - * the same unique shared memory region. The key will be obtained - * from the original file_name by replacing all path separator - * occurences by '/' (as '\' is not allowed on the object name). - */ - temp1 = strdup(file_name); - temp2 = temp1; - while( NULL != (temp2 = strchr(temp2, OPAL_PATH_SEP[0])) ) { - *temp2 = '/'; - } - hMapObject = CreateFileMapping( INVALID_HANDLE_VALUE, /* use paging file */ - NULL, /* no security attributes */ - PAGE_READWRITE, /* read/write access */ - 0, /* size: high 32-bits */ - (DWORD)size, /* size: low 32-bits */ - temp1); /* name of map object */ - if( NULL == hMapObject ) { - rc = GetLastError(); - goto return_error; - } - if( ERROR_ALREADY_EXISTS == GetLastError() ) - file_previously_opened=true; - free(temp1); /* relase the temporary file name */ - - /* Get a pointer to the file-mapped shared memory. */ - lpvMem = MapViewOfFile( hMapObject, /* object to map view of */ - FILE_MAP_WRITE, /* read/write access */ - 0, /* high offset: map from */ - 0, /* low offset: beginning */ - 0); /* default: map entire file */ - if( NULL == lpvMem ) { - rc = GetLastError(); - goto return_error; - } - seg = (mca_common_sm_seg_header_t*)lpvMem; - - /* set up the map object */ - map = OBJ_NEW(mca_common_sm_module_windows_t); - strncpy(map->super.module_seg_path, file_name, OPAL_PATH_MAX); - /* the first entry in the file is the control structure. The first - entry in the control structure is an mca_common_sm_seg_header_t - element */ - map->super.module_seg = seg; - - /* If we have a data segment (i.e., if 0 != data_seg_alignment), - then make it the first aligned address after the control - structure. */ - if (0 != data_seg_alignment) { - addr = ((unsigned char *) seg) + size_ctl_structure; - /* calculate how far off alignment we are */ - tmp = ((size_t) addr) % data_seg_alignment; - /* if we're off alignment, then move up to the next alignment */ - if( tmp > 0 ) - addr += (data_seg_alignment - tmp); - - /* is addr past end of file ? */ - if( (unsigned char*)seg+size < addr ) { - opal_output(0, "mca_common_sm_init: memory region too small len %d addr %p\n", - size,addr); - goto return_error; - } - map->super.module_data_addr = addr; - } else { - map->super.module_data_addr = NULL; - } - mem_offset = addr-(unsigned char *)seg; - map->super.module_seg_addr = (unsigned char *)seg; - map->super.module_size = size; - - /* initialize the segment - only the first process to open the file */ - if( !file_previously_opened ) { - opal_atomic_unlock(&seg->seg_lock); - seg->seg_inited = false; - seg->seg_offset = mem_offset; - /* initialize size after subtracting out space used by the header */ - seg->seg_size = size - mem_offset; - } - - map->super.hMappedObject = hMapObject; - - return (mca_common_sm_module_t *)map; - - return_error: - { - char* localbuf = NULL; - FormatMessage( FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, - NULL, rc, 0, (LPTSTR)&localbuf, 1024, NULL ); - opal_output( 0, "%s\n", localbuf ); - LocalFree( localbuf ); - } - if( NULL != lpvMem ) UnmapViewOfFile( lpvMem ); - if( NULL != hMapObject ) CloseHandle(hMapObject); - - return NULL; -} - -int -mca_common_sm_windows_fini(mca_common_sm_module_t *mca_common_sm_module) -{ - mca_common_sm_module_windows_t *windows_module = - (mca_common_sm_module_windows_t *)mca_common_sm_module; - int rc = OMPI_SUCCESS; - - if( NULL != windows_module->super.module_seg ) { - BOOL return_error = UnmapViewOfFile( windows_module->super.module_seg_addr ); - if( false == return_error ) { - rc = GetLastError(); - } - CloseHandle(windows_module->super.hMappedObject); - - } - return rc; -} - -/** - * allocate memory from a previously allocated shared memory - * block. - * - * @param size size of request, in bytes (IN) - * - * @retval addr virtual address - */ - -void * -mca_common_sm_windows_seg_alloc(struct mca_mpool_base_module_t* mpool, - size_t* size, - mca_mpool_base_registration_t** registration) -{ - mca_mpool_sm_module_t *sm_module = (mca_mpool_sm_module_t*) mpool; - mca_common_sm_module_windows_t *map = - (mca_common_sm_module_windows_t *)sm_module->sm_common_module; - mca_common_sm_seg_header_t* seg = map->super.module_seg; - void* addr; - - opal_atomic_lock(&seg->seg_lock); - if(seg->seg_offset + *size > seg->seg_size) { - addr = NULL; - } else { - size_t fixup; - - /* add base address to segment offset */ - addr = map->super.module_data_addr + seg->seg_offset; - seg->seg_offset += *size; - - /* fix up seg_offset so next allocation is aligned on a - sizeof(long) boundry. Do it here so that we don't have to - check before checking remaining size in buffer */ - if ((fixup = (seg->seg_offset & (sizeof(long) - 1))) > 0) { - seg->seg_offset += sizeof(long) - fixup; - } - } - if (NULL != registration) { - *registration = NULL; - } - opal_atomic_unlock(&seg->seg_lock); - return addr; -} - diff --git a/ompi/mca/common/sm/common_sm_windows.h b/ompi/mca/common/sm/common_sm_windows.h deleted file mode 100644 index addb35b825..0000000000 --- a/ompi/mca/common/sm/common_sm_windows.h +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010 Los Alamos National Security, LLC. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef _COMMON_SM_WINDOWS_H_ -#define _COMMON_SM_WINDOWS_H_ - -#include "ompi_config.h" - -#include "opal/class/opal_object.h" -#include "opal/class/opal_list.h" -#include "opal/sys/atomic.h" -#include "ompi/mca/mpool/mpool.h" -#include "ompi/proc/proc.h" -#include "ompi/group/group.h" -#include "ompi/mca/common/sm/common_sm.h" - -BEGIN_C_DECLS - -struct mca_mpool_base_module_t; - -typedef struct mca_common_sm_module_windows_t -{ - mca_common_sm_module_t super; -} mca_common_sm_module_windows_t; - -OBJ_CLASS_DECLARATION(mca_common_sm_module_windows_t); - -/** - * This routine is used to set up a shared memory file, backed - * by a specified file. It is assumed that the file does not - * exist before any of the current set of processes try and open - * it. - * - * @param sorted_procs - array of (ompi_proc_t *)'s to create this shared memory - * segment for. this routine, unlike the top-level - * mca_common_sm_init routine, assumes that sorted_procs - * is in the following state: all the local procs at the - * beginning; sorted_procs[0] is the lowest named process. - * (IN) - * - * @param num_local_procs - number of local procs contained within - * sorted_procs (IN) - * - * @param size - size of the file, in bytes (IN) - * - * @param file_name name of file to be opened. (IN) - * - * @param size_ctl_structure size of the control structure at - * the head of the file. The control structure - * is assumed to have mca_common_sm_seg_header_t - * as its first segment (IN) - * - * @param data_set_alignment alignment of the data segment. this - * follows the control structure. If this - * value if 0, then assume that there will - * be no data segment following the control - * structure. (IN) - * - * @return value pointer to control structure at head of file. - */ -OMPI_DECLSPEC extern mca_common_sm_module_t * -mca_common_sm_windows_init(ompi_proc_t **sorted_procs, - size_t num_local_procs, - size_t size, - char *file_name, - size_t size_ctl_structure, - size_t data_seg_alignment); - -/* - * Callback from the sm mpool - */ -OMPI_DECLSPEC extern void * -mca_common_sm_windows_seg_alloc(struct mca_mpool_base_module_t *mpool, - size_t *size, - mca_mpool_base_registration_t **registration); - -/** - * This function will release all local resources attached to the - * mmapped file. We assume that the operating system will destroy the - * file when the last process release it. - * - * @param sm_windows - the control structure at head of file. - * - * @returnvalue 0 if everything was OK, otherwise a negative value. - */ - -OMPI_DECLSPEC extern int -mca_common_sm_windows_fini(mca_common_sm_module_t *mca_common_sm_module); - -/** - * component query routine - */ -OMPI_DECLSPEC extern int -mca_common_sm_windows_component_query(void); - -END_C_DECLS - -#endif - diff --git a/ompi/mca/common/sm/configure.m4 b/ompi/mca/common/sm/configure.m4 index c5ff40cdf3..366cda3601 100644 --- a/ompi/mca/common/sm/configure.m4 +++ b/ompi/mca/common/sm/configure.m4 @@ -11,7 +11,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2010 Los Alamos National Security, LLC. +# Copyright (c) 2010-2011 Los Alamos National Security, LLC. # All rights reserved. # $COPYRIGHT$ # @@ -25,64 +25,20 @@ AC_DEFUN([MCA_ompi_common_sm_POST_CONFIG], [ AM_CONDITIONAL([COMMON_SM_BUILD_WINDOWS], [test $1 -eq 1 -a "x$common_sm_build_windows" = "x1"]) - AM_CONDITIONAL([COMMON_SM_BUILD_SYSV], - [test $1 -eq 1 -a "x$common_sm_build_sysv" = "x1"]) - AM_CONDITIONAL([COMMON_SM_BUILD_POSIX], - [test $1 -eq 1 -a "x$common_sm_build_posix" = "x1"]) ])dnl -# MCA_common_sm_CONFIG([action-if-can-compile], -# [action-if-cant-compile]) +# MCA_ompi_common_sm_CONFIG([action-if-can-compile], +# [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_ompi_common_sm_CONFIG], [ AC_CONFIG_FILES([ompi/mca/common/sm/Makefile]) # Are we building on Windows? - AC_CHECK_FUNC(CreateFileMapping, + AC_CHECK_FUNC(CreateFileMapping, [common_sm_build_windows=1], [common_sm_build_windows=0]) AC_DEFINE_UNQUOTED([MCA_COMMON_SM_WINDOWS], [$common_sm_build_windows], [Whether we have shared memory support for Windows or not]) - - # do we want to enable System V shared memory support? - AC_MSG_CHECKING([if want sysv shared memory support]) - AC_ARG_ENABLE(sysv, - AC_HELP_STRING([--disable-sysv], - [disable sysv shared memory support (default: enabled)])) - AS_IF([test "$enable_sysv" = "no"], - [AC_MSG_RESULT([no]) - common_sm_build_sysv=0], - [AC_MSG_RESULT([yes]) - AC_CHECK_FUNC(shmget, - [common_sm_build_sysv=1], - [common_sm_build_sysv=0])]) - AS_IF([test "$enable_sysv" = "yes" -a "$common_sm_build_sysv" = "0"], - [AC_MSG_WARN([System V shared memory support requested but not found]) - AC_MSG_ERROR([Cannot continue])]) - - AC_DEFINE_UNQUOTED([MCA_COMMON_SM_SYSV], - [$common_sm_build_sysv], - [Whether we have shared memory support for SYSV or not]) - - # do we have the posix shm stuff? - AC_MSG_CHECKING([if want POSIX shared memory support]) - AC_ARG_ENABLE(posix-shmem, - AC_HELP_STRING([--disable-posix-shmem], - [disable posix shared memory support (default: enabled)])) - AS_IF([test "$enable_posix_shmem" = "no"], - [AC_MSG_RESULT([no]) - common_sm_build_posix=0], - [AC_MSG_RESULT([yes]) - AC_SEARCH_LIBS([shm_open], [rt], - [common_sm_build_posix=1], - [common_sm_build_posix=0])]) - AS_IF([test "$enable_posix_shmem" = "yes" -a "$common_sm_build_posix" = "0"], - [AC_MSG_WARN([POSIX shared memory support requested but not found]) - AC_MSG_ERROR([Cannot continue])]) - - AC_DEFINE_UNQUOTED([MCA_COMMON_SM_POSIX], - [$common_sm_build_posix], - [Whether we have shared memory support for POSIX or not]) ])dnl diff --git a/ompi/mca/common/sm/help-mpi-common-sm.txt b/ompi/mca/common/sm/help-mpi-common-sm.txt index 7330203072..0e5993fded 100644 --- a/ompi/mca/common/sm/help-mpi-common-sm.txt +++ b/ompi/mca/common/sm/help-mpi-common-sm.txt @@ -1,7 +1,7 @@ # -*- text -*- # # Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2010 Los Alamos National Security, LLC. +# Copyright (c) 2010-2011 Los Alamos National Security, LLC. # All rights reserved. # # $COPYRIGHT$ @@ -12,29 +12,6 @@ # # This is the US/English help file for Open MPI's common shmem support. # -[sys call fail] -A system call failed during shared memory initialization that should -not have. It is likely that your MPI job will now either abort or -experience performance degradation. - - Local host: %s - System call: %s %s - Process: %s - Error: %s (errno %d) -# -[shmget call fail] -A shmget call failed during shared memory initialization that should -not have. It is likely that your MPI job will now either abort or -experience performance degradation. - - Local host: %s - System call: %s - Process: %s - Error: %s (errno %d) - -Please verify that your system's shmax limit, or equivalent, is larger than -%d. On some Unix-like systems this can be done via: "sysctl -a | grep shm" -# [mmap too small] Open MPI requested a shared memory segment that was too small to do anything useful. This is likely an error in Open MPI itself. If you @@ -45,35 +22,3 @@ Open MPI, and if not, contact the Open MPI developers. Requested size: %ul Control seg size: %ul Data seg aligment: %ul -# -[mmap on nfs] -WARNING: Open MPI will create a shared memory backing file in a -directory that appears to be mounted on a network filesystem. -Creating the shared memory backup file on a network file system, such -as NFS or Lustre is not recommended -- it may cause excessive network -traffic to your file servers and/or cause shared memory traffic in -Open MPI to be much slower than expected. - -You may want to check what the typical temporary directory is on your -node. Possible sources of the location of this temporary directory -include the $TEMPDIR, $TEMP, and $TMP environment variables. - -Note, too, that system administrators can set a list of filesystems -where Open MPI is disallowed from creating temporary files by settings -the MCA parameter "orte_no_session_dir". - - Local host: %s - Fileame: %s -# -[sm support] -WARNING: "%s" not recognized - ignoring option. Suppressing additional -unrecognized option warnings. -# -[sm rt test fail] -WARNING: It appears as if your system does not provide the run-time behavior -that we rely on to safely provide %s shared memory support. -# -[sysv with cr] -WARNING: It appears as if you requested System V shared memory support in -conjunction with checkpoint/restart. This combination is not currently -supported. diff --git a/ompi/mca/mpool/sm/mpool_sm_module.c b/ompi/mca/mpool/sm/mpool_sm_module.c index 52916b033f..7adf6f7f0b 100644 --- a/ompi/mca/mpool/sm/mpool_sm_module.c +++ b/ompi/mca/mpool/sm/mpool_sm_module.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010 Los Alamos National Security, LLC. + * Copyright (c) 2011 Los Alamos National Security, LLC. * All rights reserved. * $COPYRIGHT$ * @@ -147,7 +147,7 @@ static void sm_module_finalize(mca_mpool_base_module_t* module) unlink(sm_module->sm_common_module->module_seg_path); } #else - unlink(sm_module->sm_common_module->module_seg_path); + unlink(sm_module->sm_common_module->shmem_ds.seg_name); #endif } OBJ_RELEASE(sm_module->sm_common_module); diff --git a/ompi/tools/ompi_info/components.c b/ompi/tools/ompi_info/components.c index a0917d8719..a95b413f4b 100644 --- a/ompi/tools/ompi_info/components.c +++ b/ompi/tools/ompi_info/components.c @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010 Los Alamos National Security, LLC. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -36,6 +38,8 @@ #include "opal/mca/paffinity/base/base.h" #include "opal/mca/carto/carto.h" #include "opal/mca/carto/base/base.h" +#include "opal/mca/shmem/shmem.h" +#include "opal/mca/shmem/base/base.h" #include "opal/mca/maffinity/maffinity.h" #include "opal/mca/maffinity/base/base.h" #include "opal/mca/memory/memory.h" @@ -311,6 +315,14 @@ void ompi_info_open_components(void) map->type = strdup("carto"); map->components = &opal_carto_base_components_opened; opal_pointer_array_add(&component_map, map); + + if (OPAL_SUCCESS != opal_shmem_base_open()) { + goto error; + } + map = OBJ_NEW(ompi_info_component_map_t); + map->type = strdup("shmem"); + map->components = &opal_shmem_base_components_opened; + opal_pointer_array_add(&component_map, map); if (OPAL_SUCCESS != opal_maffinity_base_open()) { goto error; diff --git a/ompi/tools/ompi_info/ompi_info.c b/ompi/tools/ompi_info/ompi_info.c index 3d98227086..9438de5c05 100644 --- a/ompi/tools/ompi_info/ompi_info.c +++ b/ompi/tools/ompi_info/ompi_info.c @@ -194,6 +194,7 @@ int main(int argc, char *argv[]) opal_pointer_array_add(&mca_types, "memory"); opal_pointer_array_add(&mca_types, "paffinity"); opal_pointer_array_add(&mca_types, "carto"); + opal_pointer_array_add(&mca_types, "shmem"); opal_pointer_array_add(&mca_types, "maffinity"); opal_pointer_array_add(&mca_types, "timer"); opal_pointer_array_add(&mca_types, "installdirs"); diff --git a/opal/mca/shmem/Makefile.am b/opal/mca/shmem/Makefile.am new file mode 100644 index 0000000000..0006822373 --- /dev/null +++ b/opal/mca/shmem/Makefile.am @@ -0,0 +1,39 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010 Los Alamos National Security, LLC. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# main library setup +noinst_LTLIBRARIES = libmca_shmem.la +libmca_shmem_la_SOURCES = + +# local files +headers = shmem.h shmem_types.h +libmca_shmem_la_SOURCES += $(headers) + +# Conditionally install the header files +if WANT_INSTALL_HEADERS +opaldir = $(includedir)/openmpi/$(subdir) +nobase_opal_HEADERS = $(headers) +endif + +include base/Makefile.am + +distclean-local: + rm -f base/static-components.h diff --git a/opal/mca/shmem/base/Makefile.am b/opal/mca/shmem/base/Makefile.am new file mode 100644 index 0000000000..9234579bbb --- /dev/null +++ b/opal/mca/shmem/base/Makefile.am @@ -0,0 +1,28 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2010-2011 Los Alamos National Security, LLC. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +headers += \ + base/base.h + +libmca_shmem_la_SOURCES += \ + base/shmem_base_close.c \ + base/shmem_base_select.c \ + base/shmem_base_open.c \ + base/shmem_base_wrappers.c diff --git a/opal/mca/shmem/base/base.h b/opal/mca/shmem/base/base.h new file mode 100644 index 0000000000..bab7ab9a53 --- /dev/null +++ b/opal/mca/shmem/base/base.h @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_SHMEM_BASE_H +#define OPAL_SHMEM_BASE_H + +#include "opal_config.h" + +#include "opal/mca/shmem/shmem.h" + +BEGIN_C_DECLS + +/* ////////////////////////////////////////////////////////////////////////// */ +/* Public API for the shmem framework */ +/* ////////////////////////////////////////////////////////////////////////// */ +OPAL_DECLSPEC int +opal_shmem_segment_create(opal_shmem_ds_t *ds_buf, + const char *file_name, + size_t size); + +OPAL_DECLSPEC int +opal_shmem_ds_copy(const opal_shmem_ds_t *from, + opal_shmem_ds_t *to); + +OPAL_DECLSPEC void * +opal_shmem_segment_attach(opal_shmem_ds_t *ds_buf); + +OPAL_DECLSPEC int +opal_shmem_segment_detach(opal_shmem_ds_t *ds_buf); + +OPAL_DECLSPEC int +opal_shmem_unlink(opal_shmem_ds_t *ds_buf); +/* ////////////////////////////////////////////////////////////////////////// */ +/* End Public API for the shmem framework */ +/* ////////////////////////////////////////////////////////////////////////// */ + +/* + * Global functions for MCA overall shmem open and close + */ + +/** + * Register MCA params for the shmem base. + * + * @retval OPAL_SUCCESS Upon success + * + * This function is invoked by opal_shmem_base_register_params(). It registers + * some shmem-wide MCA parameters. + */ +OPAL_DECLSPEC int +opal_shmem_base_register_params(void); + +/** + * Performs a run-time query across all available shmem components. Similar to + * mca_base_select, but take into consideration environment hints provided by + * orte. + * + * see: orte/mca/odls/base/odls_base_default_fns.c + */ +OPAL_DECLSPEC int +opal_shmem_base_runtime_query(mca_base_module_t **best_module, + mca_base_component_t **best_component); + +/** + * returns the name of the best, runnable shmem component. the caller is + * responsible for freeing returned resources. + * + * @retval name of best component. NULL if no component is found. + * + * see: orte/mca/odls/base/odls_base_default_fns.c + */ +OPAL_DECLSPEC char * +opal_shmem_base_best_runnable_component_name(void); + +/** + * Initialize the shmem MCA framework + * + * @retval OPAL_SUCCESS Upon success + * @retval OPAL_ERROR Upon failure + * + * This must be the first function invoked in the shmem MCA + * framework. It initializes the shmem MCA framework, finds + * and opens shmem components, etc. + * + * This function is invoked during opal_init(). + * + * This function fills in the internal global variable + * opal_shmem_base_components_opened, which is a list of all + * shmem components that were successfully opened. This + * variable should \em only be used by other shmem base + * functions -- it is not considered a public interface member -- + * and is only mentioned here for completeness. + */ +OPAL_DECLSPEC int +opal_shmem_base_open(void); + +/** + * Select an available component. + * + * @return OPAL_SUCCESS Upon success. + * @return OPAL_NOT_FOUND If no component can be selected. + * @return OPAL_ERROR Upon other failure. + * + * This function invokes the selection process for shmem components, + * which works as follows: + * + * - If the \em shmem MCA parameter is not specified, the + * selection set is all available shmem components. + * - If the \em shmem MCA parameter is specified, the + * selection set is just that component. + * - All components in the selection set are queried to see if + * they want to run. All components that want to run are ranked + * by their priority and the highest priority component is + * selected. All non-selected components have their "close" + * function invoked to let them know that they were not selected. + * - The selected component will have its "init" function invoked to + * let it know that it was selected. + * + * If we fall through this entire process and no component is + * selected, then return OPAL_NOT_FOUND (this is not a fatal + * error). + * + * At the end of this process, we'll either have a single + * component that is selected and initialized, or no component was + * selected. If no component was selected, subsequent invocation + * of the shmem wrapper functions will return an error. + */ +OPAL_DECLSPEC int +opal_shmem_base_select(void); + +/** + * Shut down the shmem MCA framework. + * + * @retval OPAL_SUCCESS Always + * + * This function shuts down everything in the shmem MCA + * framework, and is called during opal_finalize(). + * + * It must be the last function invoked on the shmem MCA + * framework. + */ +OPAL_DECLSPEC int +opal_shmem_base_close(void); + +/** + * Indication of whether a component was successfully selected or + * not + */ +OPAL_DECLSPEC extern bool opal_shmem_base_selected; + +/** + * Global component struct for the selected component + */ +OPAL_DECLSPEC extern const opal_shmem_base_component_2_0_0_t +*opal_shmem_base_component; + +/** + * Global module struct for the selected module + */ +OPAL_DECLSPEC extern const opal_shmem_base_module_2_0_0_t +*opal_shmem_base_module; + +/** + * Indicator as to whether the list of opened shmem components + * is valid or not. + */ +OPAL_DECLSPEC extern bool opal_shmem_base_components_opened_valid; + +/** + * List of all opened components; created when the shmem + * framework is initialized and destroyed when we reduce the list + * to all available shmem components. + */ +OPAL_DECLSPEC extern opal_list_t opal_shmem_base_components_opened; + +/** + * Debugging output stream + */ +OPAL_DECLSPEC extern int opal_shmem_base_output; + +END_C_DECLS + +#endif /* OPAL_BASE_SHMEM_H */ diff --git a/opal/mca/shmem/base/shmem_base_close.c b/opal/mca/shmem/base/shmem_base_close.c new file mode 100644 index 0000000000..a4755eeaf3 --- /dev/null +++ b/opal/mca/shmem/base/shmem_base_close.c @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010 Los Alamos National Security, LLC. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include "opal/constants.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/shmem/shmem.h" +#include "opal/mca/shmem/base/base.h" + +/* ////////////////////////////////////////////////////////////////////////// */ +int +opal_shmem_base_close(void) +{ + /* if there is a selected shmem module, finalize it */ + if (NULL != opal_shmem_base_module && + NULL != opal_shmem_base_module->module_finalize) { + opal_shmem_base_module->module_finalize(); + } + + /** + * close all components that are still open (this should only + * happen during ompi_info). + */ + if (opal_shmem_base_components_opened_valid) { + mca_base_components_close(opal_shmem_base_output, + &opal_shmem_base_components_opened, NULL); + OBJ_DESTRUCT(&opal_shmem_base_components_opened); + opal_shmem_base_components_opened_valid = false; + } + + /* all done */ + return OPAL_SUCCESS; +} + diff --git a/opal/mca/shmem/base/shmem_base_open.c b/opal/mca/shmem/base/shmem_base_open.c new file mode 100644 index 0000000000..88ee683157 --- /dev/null +++ b/opal/mca/shmem/base/shmem_base_open.c @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include "opal/constants.h" +#include "opal/util/output.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_param.h" +#include "opal/mca/shmem/shmem.h" +#include "opal/mca/shmem/base/base.h" + +/* + * The following file was created by configure. It contains extern + * statements and the definition of an array of pointers to each + * component's public mca_base_component_t struct. + */ +#include "opal/mca/shmem/base/static-components.h" + +/** + * globals + */ +OPAL_DECLSPEC int opal_shmem_base_output = -1; +bool opal_shmem_base_components_opened_valid = false; +opal_list_t opal_shmem_base_components_opened; + +/* ////////////////////////////////////////////////////////////////////////// */ +/** + * Register some shmem-wide MCA params + */ +int +opal_shmem_base_register_params(void) +{ + int value; + + /* debugging/verbose output */ + mca_base_param_reg_int_name("shmem", "base_verbose", + "Verbosity level of the shmem framework", + false, false, 0, &value); + /* register an INTERNAL parameter used to provide a component selection + * hint to the shmem framework. + */ + mca_base_param_reg_string_name("shmem", "RUNTIME_QUERY_hint", + "Internal OMPI parameter used to provide a " + "component selection hint to the shmem " + "framework. The value of this parameter " + "is the name of the component that is " + "available, selectable, and meets our " + "run-time behavior requirements.", + true, true, NULL, NULL); + if (0 != value) { + opal_shmem_base_output = opal_output_open(NULL); + } + else { + opal_shmem_base_output = -1; + } + + return OPAL_SUCCESS; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +/** + * Function for finding and opening either all MCA components, or the one + * that was specifically requested via a MCA parameter. + */ +int +opal_shmem_base_open(void) +{ + opal_shmem_base_components_opened_valid = false; + + /* open up all available components */ + if (OPAL_SUCCESS != + mca_base_components_open("shmem", opal_shmem_base_output, + mca_shmem_base_static_components, + &opal_shmem_base_components_opened, true)) { + return OPAL_ERROR; + } + + opal_shmem_base_components_opened_valid = true; + + /* all done */ + return OPAL_SUCCESS; +} + diff --git a/opal/mca/shmem/base/shmem_base_select.c b/opal/mca/shmem/base/shmem_base_select.c new file mode 100644 index 0000000000..bbb3c8b55c --- /dev/null +++ b/opal/mca/shmem/base/shmem_base_select.c @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#ifdef HAVE_STRING_H +#include +#endif /* HAVE_STRING_H */ + +#include "opal/constants.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_param.h" +#include "opal/mca/shmem/shmem.h" +#include "opal/mca/shmem/base/base.h" + +/* + * globals + */ +bool opal_shmem_base_selected = false; +const opal_shmem_base_component_2_0_0_t *opal_shmem_base_component = NULL; +const opal_shmem_base_module_2_0_0_t *opal_shmem_base_module = NULL; + +/* ////////////////////////////////////////////////////////////////////////// */ +char * +opal_shmem_base_best_runnable_component_name(void) +{ + mca_base_component_t *best_component = NULL; + mca_base_module_t *best_module = NULL; + + opal_output_verbose(10, opal_shmem_base_output, + "shmem: base: best_runnable_component_name: " + "Searching for best runnable component."); + /* select the best component so we can get its name. */ + if (OPAL_SUCCESS != opal_shmem_base_runtime_query(&best_module, + &best_component)) { + /* fail! */ + return NULL; + } + else { + if (NULL != best_component) { + opal_output_verbose(10, opal_shmem_base_output, + "shmem: base: best_runnable_component_name: " + "Found best runnable component: (%s).", + best_component->mca_component_name); + return strdup(best_component->mca_component_name); + } + else { + opal_output_verbose(10, opal_shmem_base_output, + "shmem: base: best_runnable_component_name: " + "Could not find runnable component."); + /* no component returned, so return NULL */ + return NULL; + } + } +} + +/* ////////////////////////////////////////////////////////////////////////// */ +int +opal_shmem_base_runtime_query(mca_base_module_t **best_module, + mca_base_component_t **best_component) +{ + mca_base_component_list_item_t *cli = NULL; + mca_base_component_t *component = NULL; + mca_base_module_t *module = NULL; + opal_list_item_t *item = NULL; + int priority = 0, best_priority = INT32_MIN; + char *env_hint_name = NULL, *env_hint_val = NULL; + + *best_module = NULL; + *best_component = NULL; + + opal_output_verbose(10, opal_shmem_base_output, + "shmem: base: runtime_query: " + "Auto-selecting shmem components"); + /* we are using a nonstandard name here because shmem_RUNTIME_QUERY_hint + * is for internal use only! + * see odls_base_default_fns.c for more details. + */ + env_hint_name = mca_base_param_environ_variable("shmem_RUNTIME_QUERY_hint", + NULL, NULL); + env_hint_val = getenv(env_hint_name); + + /* traverse the list of available components. + * for each call their 'run-time query' functions to determine relative + * priority. + */ + for (item = opal_list_get_first(&opal_shmem_base_components_opened); + item != opal_list_get_end(&opal_shmem_base_components_opened); + item = opal_list_get_next(item) ) { + cli = (mca_base_component_list_item_t *)item; + component = (mca_base_component_t *)cli->cli_component; + + /* if there is a run-time query function then use it. otherwise, skip + * the component. + */ + if (NULL == ((opal_shmem_base_component_2_0_0_t *) + component)->runtime_query) { + opal_output_verbose(5, opal_shmem_base_output, + "shmem: base: runtime_query: " + "(shmem) Skipping component [%s]. It does not " + "implement a run-time query function", + component->mca_component_name); + continue; + } + + /* query this component for the module and priority */ + opal_output_verbose(5, opal_shmem_base_output, + "shmem: base: runtime_query: " + "(shmem) Querying component (run-time) [%s]", + component->mca_component_name); + + ((opal_shmem_base_component_2_0_0_t *) + component)->runtime_query(&module, &priority, env_hint_val); + + /* if no module was returned, then skip component. + * this probably means that the run-time test deemed the shared memory + * backing facility unusable or unsafe. + */ + if (NULL == module) { + opal_output_verbose(5, opal_shmem_base_output, + "shmem: base: runtime_query: " + "(shmem) Skipping component [%s]. Run-time " + "Query failed to return a module", + component->mca_component_name); + continue; + } + + /* determine if this is the best module we have seen by looking the + * priority + */ + opal_output_verbose(5, opal_shmem_base_output, + "shmem: base: runtime_query: " + "(%5s) Query of component [%s] set priority to %d", + "shmem", component->mca_component_name, priority); + if (priority > best_priority) { + best_priority = priority; + *best_module = module; + *best_component = component; + } + } + + /* finished querying all components. + * make sure we found something in the process. + */ + if (NULL == *best_component) { + opal_output_verbose(5, opal_shmem_base_output, + "shmem: base: runtime_query: " + "(%5s) No component selected!", "shmem"); + /* still close the non-selected components. + * pass 0 to keep this from closing the output handle. + */ + mca_base_components_close(0, &opal_shmem_base_components_opened, NULL); + return OPAL_ERR_NOT_FOUND; + } + + opal_output_verbose(5, opal_shmem_base_output, + "shmem: base: runtime_query: " + "(%5s) Selected component [%s]", "shmem", + (*best_component)->mca_component_name); + /* close the non-selected components */ + mca_base_components_close(opal_shmem_base_output, + &opal_shmem_base_components_opened, + (mca_base_component_t *)(*best_component)); + + return OPAL_SUCCESS; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +int +opal_shmem_base_select(void) +{ + opal_shmem_base_component_2_0_0_t *best_component = NULL; + opal_shmem_base_module_2_0_0_t *best_module = NULL; + /* select the best component */ + if (OPAL_SUCCESS != opal_shmem_base_runtime_query( + (mca_base_module_t **)&best_module, + (mca_base_component_t **)&best_component)) { + /* it is NOT okay if we don't find a module because we need at + * least one shared memory backing facility component instance. + */ + return OPAL_ERROR; + } + + /* save the winner */ + opal_shmem_base_component = best_component; + opal_shmem_base_module = best_module; + opal_shmem_base_selected = true; + + /* initialize the winner */ + if (NULL != opal_shmem_base_module) { + return opal_shmem_base_module->module_init(); + } + else { + return OPAL_ERROR; + } +} + diff --git a/opal/mca/shmem/base/shmem_base_wrappers.c b/opal/mca/shmem/base/shmem_base_wrappers.c new file mode 100644 index 0000000000..c323c3b9ac --- /dev/null +++ b/opal/mca/shmem/base/shmem_base_wrappers.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include "opal/constants.h" +#include "opal/mca/shmem/shmem.h" +#include "opal/mca/shmem/base/base.h" + +/* ////////////////////////////////////////////////////////////////////////// */ +int +opal_shmem_segment_create(opal_shmem_ds_t *ds_buf, + const char *file_name, + size_t size) +{ + if (!opal_shmem_base_selected) { + return OPAL_ERROR; + } + + return opal_shmem_base_module->segment_create(ds_buf, file_name, size); +} + +/* ////////////////////////////////////////////////////////////////////////// */ +int +opal_shmem_ds_copy(const opal_shmem_ds_t *from, + opal_shmem_ds_t *to) +{ + if (!opal_shmem_base_selected) { + return OPAL_ERROR; + } + + return opal_shmem_base_module->ds_copy(from, to); +} + +/* ////////////////////////////////////////////////////////////////////////// */ +void * +opal_shmem_segment_attach(opal_shmem_ds_t *ds_buf) +{ + if (!opal_shmem_base_selected) { + return NULL; + } + + return opal_shmem_base_module->segment_attach(ds_buf); +} + +/* ////////////////////////////////////////////////////////////////////////// */ +int +opal_shmem_segment_detach(opal_shmem_ds_t *ds_buf) +{ + if (!opal_shmem_base_selected) { + return OPAL_ERROR; + } + + return opal_shmem_base_module->segment_detach(ds_buf); +} + +/* ////////////////////////////////////////////////////////////////////////// */ +int +opal_shmem_unlink(opal_shmem_ds_t *ds_buf) +{ + if (!opal_shmem_base_selected) { + return OPAL_ERROR; + } + + return opal_shmem_base_module->unlink(ds_buf); +} + diff --git a/opal/mca/shmem/mmap/Makefile.am b/opal/mca/shmem/mmap/Makefile.am new file mode 100644 index 0000000000..7cf84af2da --- /dev/null +++ b/opal/mca/shmem/mmap/Makefile.am @@ -0,0 +1,49 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010-2011 Los Alamos National Security, LLC. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + shmem_mmap.h \ + shmem_mmap_component.c \ + shmem_mmap_module.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_opal_shmem_mmap_DSO +component_noinst = +component_install = mca_shmem_mmap.la +else +component_noinst = libmca_shmem_mmap.la +component_install = +endif + +# help file +dist_pkgdata_DATA = help-opal-shmem-mmap.txt + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_shmem_mmap_la_SOURCES = $(sources) +mca_shmem_mmap_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_shmem_mmap_la_SOURCES =$(sources) +libmca_shmem_mmap_la_LDFLAGS = -module -avoid-version diff --git a/opal/mca/shmem/mmap/configure.m4 b/opal/mca/shmem/mmap/configure.m4 new file mode 100644 index 0000000000..785e9cae27 --- /dev/null +++ b/opal/mca/shmem/mmap/configure.m4 @@ -0,0 +1,50 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010-2011 Los Alamos National Security, LLC. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_opal_shmem_mmap_CONFIG(action-if-can-compile, +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_opal_shmem_mmap_CONFIG],[ + AC_CONFIG_FILES([opal/mca/shmem/mmap/Makefile]) + + # do we have the mmap shm stuff? + AC_MSG_CHECKING([if want mmap shared memory support]) + AC_ARG_ENABLE(mmap-shmem, + AC_HELP_STRING([--disable-mmap-shmem], + [disable mmap shared memory support (default: enabled)])) + AS_IF([test "$enable_mmap_shmem" = "no"], + [AC_MSG_RESULT([no]) + shmem_mmap_sm_build_mmap=0], + [AC_MSG_RESULT([yes]) + AC_SEARCH_LIBS([mmap], [c], + [shmem_mmap_sm_build_mmap=1], + [shmem_mmap_sm_build_mmap=0])]) + AS_IF([test "$enable_mmap_shmem" = "yes" -a "$shmem_mmap_sm_build_mmap" = "0"], + [AC_MSG_WARN([mmap shared memory support requested but not found]) + AC_MSG_ERROR([Cannot continue])]) + + AS_IF([test "$shmem_mmap_sm_build_mmap" = "1"], [$1], [$2]) + + AC_DEFINE_UNQUOTED([OPAL_SHMEM_MMAP], + [$shmem_mmap_sm_build_mmap], + [Whether we have shared memory support for mmap or not]) +])dnl diff --git a/opal/mca/shmem/mmap/help-opal-shmem-mmap.txt b/opal/mca/shmem/mmap/help-opal-shmem-mmap.txt new file mode 100644 index 0000000000..372dee6f1f --- /dev/null +++ b/opal/mca/shmem/mmap/help-opal-shmem-mmap.txt @@ -0,0 +1,41 @@ +# -*- text -*- +# +# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010 Los Alamos National Security, LLC. +# All rights reserved. +# +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# This is the US/English help file for Open MPI's common shmem support. +# +[sys call fail] +A system call failed during shared memory initialization that should +not have. It is likely that your MPI job will now either abort or +experience performance degradation. + + Local host: %s + System call: %s %s + Error: %s (errno %d) +# +[mmap on nfs] +WARNING: Open MPI will create a shared memory backing file in a +directory that appears to be mounted on a network filesystem. +Creating the shared memory backup file on a network file system, such +as NFS or Lustre is not recommended -- it may cause excessive network +traffic to your file servers and/or cause shared memory traffic in +Open MPI to be much slower than expected. + +You may want to check what the typical temporary directory is on your +node. Possible sources of the location of this temporary directory +include the $TEMPDIR, $TEMP, and $TMP environment variables. + +Note, too, that system administrators can set a list of filesystems +where Open MPI is disallowed from creating temporary files by settings +the MCA parameter "orte_no_session_dir". + + Local host: %s + Fileame: %s diff --git a/opal/mca/shmem/mmap/shmem_mmap.h b/opal/mca/shmem/mmap/shmem_mmap.h new file mode 100644 index 0000000000..015a8ab414 --- /dev/null +++ b/opal/mca/shmem/mmap/shmem_mmap.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_SHMEM_MMAP_EXPORT_H +#define MCA_SHMEM_MMAP_EXPORT_H + +#include "opal_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/shmem/shmem.h" + +BEGIN_C_DECLS + +/** + * globally exported variable to hold the mmap component. + */ +typedef struct opal_shmem_mmap_component_t { + /* base component struct */ + opal_shmem_base_component_t super; + /* priority for mmap component */ + int priority; +} opal_shmem_mmap_component_t; + +OPAL_MODULE_DECLSPEC extern opal_shmem_mmap_component_t +mca_shmem_mmap_component; + +typedef struct opal_shmem_mmap_module_t { + opal_shmem_base_module_t super; +} opal_shmem_mmap_module_t; +extern opal_shmem_mmap_module_t opal_shmem_mmap_module; + +END_C_DECLS + +#endif /* MCA_SHMEM_MMAP_EXPORT_H */ diff --git a/opal/mca/shmem/mmap/shmem_mmap_component.c b/opal/mca/shmem/mmap/shmem_mmap_component.c new file mode 100644 index 0000000000..3787b60849 --- /dev/null +++ b/opal/mca/shmem/mmap/shmem_mmap_component.c @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "opal_config.h" + +#include "opal/constants.h" +#include "opal/mca/shmem/shmem.h" +#include "shmem_mmap.h" + +/** + * public string showing the shmem ompi_mmap component version number + */ +const char *opal_shmem_mmap_component_version_string = + "OPAL mmap shmem MCA component version " OPAL_VERSION; + +/** + * local functions + */ +static int mmap_open(void); +static int mmap_query(mca_base_module_t **module, int *priority); +static int mmap_runtime_query(mca_base_module_t **module, + int *priority, + const char *hint); + +/** + * instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +opal_shmem_mmap_component_t mca_shmem_mmap_component = { + /* ////////////////////////////////////////////////////////////////////// */ + /* super */ + /* ////////////////////////////////////////////////////////////////////// */ + { + /** + * common MCA component data + */ + { + OPAL_SHMEM_BASE_VERSION_2_0_0, + + /* component name and version */ + "mmap", + OPAL_MAJOR_VERSION, + OPAL_MINOR_VERSION, + OPAL_RELEASE_VERSION, + + /* component open */ + mmap_open, + /* component close */ + NULL, + /* component query */ + mmap_query + }, + /* MCA v2.0.0 component meta data */ + { + /* the component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + mmap_runtime_query, + }, + /* ////////////////////////////////////////////////////////////////////// */ + /* mmap component-specific information */ + /* see: shmem_mmap.h for more information */ + /* ////////////////////////////////////////////////////////////////////// */ + /* (default) priority - set high to make mmap the default */ + 50 +}; + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +mmap_runtime_query(mca_base_module_t **module, + int *priority, + const char *hint) +{ + /* no run-time query needed for mmap, so this is easy */ + *priority = mca_shmem_mmap_component.priority; + *module = (mca_base_module_t *)&opal_shmem_mmap_module.super; + return OPAL_SUCCESS; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +mmap_open(void) +{ + mca_base_param_reg_int( + &mca_shmem_mmap_component.super.base_version, + "priority", "Priority of the mmap shmem component", false, false, + mca_shmem_mmap_component.priority, &mca_shmem_mmap_component.priority + ); + + return OPAL_SUCCESS; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +mmap_query(mca_base_module_t **module, int *priority) +{ + *priority = mca_shmem_mmap_component.priority; + *module = (mca_base_module_t *)&opal_shmem_mmap_module.super; + return OPAL_SUCCESS; +} + diff --git a/opal/mca/shmem/mmap/shmem_mmap_module.c b/opal/mca/shmem/mmap/shmem_mmap_module.c new file mode 100644 index 0000000000..36b0fc7453 --- /dev/null +++ b/opal/mca/shmem/mmap/shmem_mmap_module.c @@ -0,0 +1,453 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. + * All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include +#ifdef HAVE_FCNTL_H +#include +#endif /* HAVE_FCNTL_H */ +#ifdef HAVE_SYS_MMAN_H +#include +#endif /* HAVE_SYS_MMAN_H */ +#ifdef HAVE_UNISTD_H +#include +#endif /* HAVE_UNISTD_H */ +#ifdef HAVE_SYS_TYPES_H +#include +#endif /* HAVE_SYS_TYPES_H */ +#ifdef HAVE_STRING_H +#include +#endif /* HAVE_STRING_H */ + +#include "opal/constants.h" +#include "opal_stdint.h" +#include "opal/util/output.h" +#include "opal/util/path.h" +#include "opal/util/show_help.h" +#include "opal/mca/base/mca_base_param.h" +#include "opal/mca/shmem/shmem.h" +#include "opal/mca/shmem/base/base.h" + +#include "shmem_mmap.h" + +/* for tons of debug output: -mca shmem_base_verbose 70 */ + +/* ////////////////////////////////////////////////////////////////////////// */ +/*local functions */ +/* local functions */ +static int +module_init(void); + +static int +segment_create(opal_shmem_ds_t *ds_buf, + const char *file_name, + size_t size); + +static int +ds_copy(const opal_shmem_ds_t *from, + opal_shmem_ds_t *to); + +static void * +segment_attach(opal_shmem_ds_t *ds_buf); + +static int +segment_detach(opal_shmem_ds_t *ds_buf); + +static int +segment_unlink(opal_shmem_ds_t *ds_buf); + +static int +module_finalize(void); + +/* + * mmap shmem module + */ +opal_shmem_mmap_module_t opal_shmem_mmap_module = { + /* super */ + { + module_init, + segment_create, + ds_copy, + segment_attach, + segment_detach, + segment_unlink, + module_finalize + } +}; + +/* ////////////////////////////////////////////////////////////////////////// */ +/* private utility functions */ +/* ////////////////////////////////////////////////////////////////////////// */ + +/* ////////////////////////////////////////////////////////////////////////// */ +/** + * completely resets the contents of *ds_buf + */ +static inline void +shmem_ds_reset(opal_shmem_ds_t *ds_buf) +{ + OPAL_OUTPUT_VERBOSE( + (70, opal_shmem_base_output, + "%s: %s: shmem_ds_resetting " + "(opid: %lu id: %d, size: %"PRIsize_t", name: %s)\n", + mca_shmem_mmap_component.super.base_version.mca_type_name, + mca_shmem_mmap_component.super.base_version.mca_component_name, + (unsigned long)ds_buf->opid, ds_buf->seg_id, ds_buf->seg_size, + ds_buf->seg_name) + ); + + ds_buf->opid = 0; + ds_buf->seg_cpid = 0; + OPAL_SHMEM_DS_RESET_FLAGS(ds_buf); + ds_buf->seg_id = OPAL_SHMEM_DS_ID_INVALID; + ds_buf->seg_size = 0; + memset(ds_buf->seg_name, '\0', OPAL_PATH_MAX); +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +module_init(void) +{ + /* nothing to do */ + return OPAL_SUCCESS; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +module_finalize(void) +{ + /* nothing to do */ + return OPAL_SUCCESS; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +ds_copy(const opal_shmem_ds_t *from, + opal_shmem_ds_t *to) +{ + pid_t my_pid = getpid(); + + /* inter-process copy - exclude process-specific data */ + if (from->opid != my_pid) { + /* mask out internal flags */ + to->flags = (from->flags & OPAL_SHMEM_DS_FLAGS_INTERNAL_MASK); + to->seg_base_addr = NULL; + } + /* i am the owner process, so i can safely copy all the information */ + else { + to->flags = from->flags; + to->seg_base_addr = from->seg_base_addr; + } + + to->opid = my_pid; + to->seg_id = from->seg_id; + to->seg_size = from->seg_size; + to->seg_cpid = from->seg_cpid; + memcpy(to->seg_name, from->seg_name, OPAL_PATH_MAX); + + OPAL_OUTPUT_VERBOSE( + (70, opal_shmem_base_output, + "%s: %s: ds_copy complete " + "from: (opid: %lu, id: %d, size: %"PRIsize_t", " + "name: %s flags: 0x%02x) " + "to: (opid: %lu, id: %d, size: %"PRIsize_t", " + "name: %s flags: 0x%02x)\n", + mca_shmem_mmap_component.super.base_version.mca_type_name, + mca_shmem_mmap_component.super.base_version.mca_component_name, + (unsigned long)from->opid, from->seg_id, from->seg_size, + from->seg_name, from->flags, (unsigned long)to->opid, to->seg_id, + to->seg_size, to->seg_name, to->flags) + ); + + return OPAL_SUCCESS; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +segment_create(opal_shmem_ds_t *ds_buf, + const char *file_name, + size_t size) +{ + int rc = OPAL_SUCCESS; + char *tmp_fn = NULL; + pid_t my_pid = getpid(); + /* the real size of the shared memory segment. this includes enough space + * to store our segment header. + */ + size_t real_size = size + sizeof(opal_shmem_seg_hdr_t); + opal_shmem_seg_hdr_t *seg_hdrp = MAP_FAILED; + + /* init the contents of opal_shmem_ds_t */ + shmem_ds_reset(ds_buf); + + /* determine whether the specified filename is on a network file system. + * this is an important check because if the backing store is located on + * a network filesystem, the user will see a shared memory performance hit. + * + * strduping file_name because opal_path_nfs doesn't take a const char * + */ + if (NULL == (tmp_fn = strdup(file_name))) { + /* out of resources */ + return OPAL_ERROR; + } + else if (opal_path_nfs(tmp_fn)) { + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-mmap.txt", "mmap on nfs", 1, hn, + tmp_fn); + } + free(tmp_fn); + + if (-1 == (ds_buf->seg_id = open(file_name, O_CREAT | O_RDWR, 0600))) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-mmap.txt", "sys call fail", 1, hn, + "open(2)", "", strerror(err), err); + rc = OPAL_ERROR; + goto out; + } + /* size backing file - note the use of real_size here */ + else if (0 != ftruncate(ds_buf->seg_id, real_size)) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-mmap.txt", "sys call fail", 1, hn, + "ftruncate(2)", "", strerror(err), err); + rc = OPAL_ERROR; + goto out; + } + else if (MAP_FAILED == (seg_hdrp = mmap(NULL, real_size, + PROT_READ | PROT_WRITE, MAP_SHARED, + ds_buf->seg_id, 0))) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-mmap.txt", "sys call fail", 1, hn, + "mmap(2)", "", strerror(err), err); + rc = OPAL_ERROR; + goto out; + } + /* all is well */ + else { + /* -- initialize the shared memory segment -- */ + opal_atomic_rmb(); + + /* init segment lock */ + opal_atomic_init(&seg_hdrp->lock, OPAL_ATOMIC_UNLOCKED); + /* i was the creator of this segment, so note that fact */ + seg_hdrp->cpid = my_pid; + + opal_atomic_wmb(); + + /* -- initialize the contents of opal_shmem_ds_t -- */ + ds_buf->opid = my_pid; + ds_buf->seg_cpid = my_pid; + ds_buf->seg_size = real_size; + ds_buf->seg_base_addr = (unsigned char *)seg_hdrp; + strncpy(ds_buf->seg_name, file_name, OPAL_PATH_MAX - 1); + + /* set "valid" bit because setment creation was successful */ + OPAL_SHMEM_DS_SET_VALID(ds_buf); + + OPAL_OUTPUT_VERBOSE( + (70, opal_shmem_base_output, + "%s: %s: create successful " + "(opid: %lu id: %d, size: %"PRIsize_t", name: %s)\n", + mca_shmem_mmap_component.super.base_version.mca_type_name, + mca_shmem_mmap_component.super.base_version.mca_component_name, + (unsigned long)ds_buf->opid, ds_buf->seg_id, ds_buf->seg_size, + ds_buf->seg_name) + ); + } + +out: + /* in this component, the id is the file descriptor returned by open. this + * check is here to see if it is safe to call close on the file descriptor. + * that is, we are making sure that our call to open was successful and + * we are not not in an error path. + */ + if (-1 != ds_buf->seg_id) { + if (0 != close(ds_buf->seg_id)) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-mmap.txt", "sys call fail", 1, hn, + "close(2)", "", strerror(err), err); + rc = OPAL_ERROR; + } + } + + /* an error occured, so invalidate the shmem object and munmap if needed */ + if (OPAL_SUCCESS != rc) { + if (MAP_FAILED != seg_hdrp) { + munmap(seg_hdrp, real_size); + } + shmem_ds_reset(ds_buf); + } + return rc; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +/** + * segment_attach can only be called after a successful call to segment_create + */ +static void * +segment_attach(opal_shmem_ds_t *ds_buf) +{ + pid_t my_pid = getpid(); + + if (my_pid != ds_buf->seg_cpid) { + if (-1 == (ds_buf->seg_id = open(ds_buf->seg_name, O_CREAT | O_RDWR, + 0600))) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-mmap.txt", "sys call fail", 1, hn, + "open(2)", "", strerror(err), err); + return NULL; + } + else if (MAP_FAILED == (ds_buf->seg_base_addr = + mmap(NULL, ds_buf->seg_size, + PROT_READ | PROT_WRITE, MAP_SHARED, + ds_buf->seg_id, 0))) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-mmap.txt", "sys call fail", 1, hn, + "mmap(2)", "", strerror(err), err); + /* mmap failed, so close the file and return NULL - no error check + * here because we are already in an error path... + */ + close(ds_buf->seg_id); + return NULL; + } + /* all is well */ + else { + /* if close fails here, that's okay. just let the user know and + * continue. if we got this far, open and mmap were successful... + */ + if (0 != close(ds_buf->seg_id)) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-mmap.txt", "sys call fail", 1, + hn, "close(2)", "", strerror(err), err); + } + } + } + /* else i was the segment creator. nothing to do here because all the hard + * work was done in segment_create :-). + */ + + OPAL_OUTPUT_VERBOSE( + (70, opal_shmem_base_output, + "%s: %s: attach successful " + "(opid: %lu id: %d, size: %"PRIsize_t", name: %s)\n", + mca_shmem_mmap_component.super.base_version.mca_type_name, + mca_shmem_mmap_component.super.base_version.mca_component_name, + (unsigned long)ds_buf->opid, ds_buf->seg_id, ds_buf->seg_size, + ds_buf->seg_name) + ); + + /* update returned base pointer with an offset that hides our stuff */ + return (ds_buf->seg_base_addr + sizeof(opal_shmem_seg_hdr_t)); +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +segment_detach(opal_shmem_ds_t *ds_buf) +{ + int rc = OPAL_SUCCESS; + + OPAL_OUTPUT_VERBOSE( + (70, opal_shmem_base_output, + "%s: %s: detaching " + "(opid: %lu id: %d, size: %"PRIsize_t", name: %s)\n", + mca_shmem_mmap_component.super.base_version.mca_type_name, + mca_shmem_mmap_component.super.base_version.mca_component_name, + (unsigned long)ds_buf->opid, ds_buf->seg_id, ds_buf->seg_size, + ds_buf->seg_name) + ); + + if (0 != munmap(ds_buf->seg_base_addr, ds_buf->seg_size)) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-mmap.txt", "sys call fail", 1, hn, + "munmap(2)", "", strerror(err), err); + rc = OPAL_ERROR; + } + /* reset the contents of the opal_shmem_ds_t associated with this + * shared memory segment. + */ + shmem_ds_reset(ds_buf); + return rc; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +segment_unlink(opal_shmem_ds_t *ds_buf) +{ + OPAL_OUTPUT_VERBOSE( + (70, opal_shmem_base_output, + "%s: %s: unlinking " + "(opid: %lu id: %d, size: %"PRIsize_t", name: %s)\n", + mca_shmem_mmap_component.super.base_version.mca_type_name, + mca_shmem_mmap_component.super.base_version.mca_component_name, + (unsigned long)ds_buf->opid, ds_buf->seg_id, ds_buf->seg_size, + ds_buf->seg_name) + ); + + if (-1 == unlink(ds_buf->seg_name)) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-mmap.txt", "sys call fail", 1, hn, + "unlink(2)", ds_buf->seg_name, strerror(err), err); + return OPAL_ERROR; + } + + /* don't completely reset the opal_shmem_ds_t. in particular, only reset + * the id and flip the invalid bit. size and name values will remain valid + * across unlinks. other information stored in flags will remain untouched. + */ + ds_buf->seg_id = OPAL_SHMEM_DS_ID_INVALID; + /* note: this is only chaning the valid bit to 0. this is not the same + * as calling invalidate(ds_buf). + */ + OPAL_SHMEM_DS_INVALIDATE(ds_buf); + return OPAL_SUCCESS; +} + diff --git a/opal/mca/shmem/posix/Makefile.am b/opal/mca/shmem/posix/Makefile.am new file mode 100644 index 0000000000..77c162c353 --- /dev/null +++ b/opal/mca/shmem/posix/Makefile.am @@ -0,0 +1,50 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010-2011 Los Alamos National Security, LLC. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + shmem_posix.h \ + shmem_posix_common_utils.h shmem_posix_common_utils.c \ + shmem_posix_component.c \ + shmem_posix_module.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_opal_shmem_posix_DSO +component_noinst = +component_install = mca_shmem_posix.la +else +component_noinst = libmca_shmem_posix.la +component_install = +endif + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_shmem_posix_la_SOURCES = $(sources) +mca_shmem_posix_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_shmem_posix_la_SOURCES = $(sources) +libmca_shmem_posix_la_LDFLAGS = -module -avoid-version + +# help file +dist_pkgdata_DATA = help-opal-shmem-posix.txt diff --git a/opal/mca/shmem/posix/configure.m4 b/opal/mca/shmem/posix/configure.m4 new file mode 100644 index 0000000000..326bbce827 --- /dev/null +++ b/opal/mca/shmem/posix/configure.m4 @@ -0,0 +1,50 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010-2011 Los Alamos National Security, LLC. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_opal_shmem_posix_CONFIG(action-if-can-compile, +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_opal_shmem_posix_CONFIG],[ + AC_CONFIG_FILES([opal/mca/shmem/posix/Makefile]) + + # do we have the posix shm stuff? + AC_MSG_CHECKING([if want POSIX shared memory support]) + AC_ARG_ENABLE(posix-shmem, + AC_HELP_STRING([--disable-posix-shmem], + [disable posix shared memory support (default: enabled)])) + AS_IF([test "$enable_posix_shmem" = "no"], + [AC_MSG_RESULT([no]) + shmem_posix_sm_build_posix=0], + [AC_MSG_RESULT([yes]) + AC_SEARCH_LIBS([shm_open], [rt], + [shmem_posix_sm_build_posix=1], + [shmem_posix_sm_build_posix=0])]) + AS_IF([test "$enable_posix_shmem" = "yes" -a "$shmem_posix_sm_build_posix" = "0"], + [AC_MSG_WARN([POSIX shared memory support requested but not found]) + AC_MSG_ERROR([Cannot continue])]) + + AS_IF([test "$shmem_posix_sm_build_posix" = "1"], [$1], [$2]) + + AC_DEFINE_UNQUOTED([OPAL_SHMEM_POSIX], + [$shmem_posix_sm_build_posix], + [Whether we have shared memory support for POSIX or not]) +])dnl diff --git a/opal/mca/shmem/posix/help-opal-shmem-posix.txt b/opal/mca/shmem/posix/help-opal-shmem-posix.txt new file mode 100644 index 0000000000..ffa4585fb8 --- /dev/null +++ b/opal/mca/shmem/posix/help-opal-shmem-posix.txt @@ -0,0 +1,22 @@ +# -*- text -*- +# +# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010-2011 Los Alamos National Security, LLC. +# All rights reserved. +# +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# This is the US/English help file for Open MPI's common shmem support. +# +[sys call fail] +A system call failed during shared memory initialization that should +not have. It is likely that your MPI job will now either abort or +experience performance degradation. + + Local host: %s + System call: %s %s + Error: %s (errno %d) diff --git a/opal/mca/shmem/posix/shmem_posix.h b/opal/mca/shmem/posix/shmem_posix.h new file mode 100644 index 0000000000..9794bb22d8 --- /dev/null +++ b/opal/mca/shmem/posix/shmem_posix.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_SHMEM_POSIX_EXPORT_H +#define MCA_SHMEM_POSIX_EXPORT_H + +#include "opal_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/shmem/shmem.h" + +/* max number of attempts to find an available shm_open file name. see + * comments below for more details. + */ +#define OPAL_SHMEM_POSIX_MAX_ATTEMPTS 128 + +/* need the '/' for Solaris 10 and others, i'm sure */ +#define OPAL_SHMEM_POSIX_FILE_NAME_PREFIX "/open_mpi." + +/* posix sm file name length max. on some systems shm_open's file name limit + * is pretty low (32 chars, for instance). 16 is plenty for our needs, but + * extra work on our end is needed to ensure things work properly. if a + * system's limit is lower than OPAL_SHMEM_POSIX_FILE_LEN_MAX, then the + * run-time test will catch that fact and posix sm will be disqualified. see + * comments regarding this in shmem_posix_module.c. + */ +#define OPAL_SHMEM_POSIX_FILE_LEN_MAX 16 + +BEGIN_C_DECLS + +/* globally exported variable to hold the posix component. */ +typedef struct opal_shmem_posix_component_t { + /* base component struct */ + opal_shmem_base_component_t super; + /* priority for posix component */ + int priority; +} opal_shmem_posix_component_t; + +OPAL_MODULE_DECLSPEC extern opal_shmem_posix_component_t +mca_shmem_posix_component; + +typedef struct opal_shmem_posix_module_t { + opal_shmem_base_module_t super; +} opal_shmem_posix_module_t; +extern opal_shmem_posix_module_t opal_shmem_posix_module; + +END_C_DECLS + +#endif /* MCA_SHMEM_POSIX_EXPORT_H */ diff --git a/opal/mca/shmem/posix/shmem_posix_common_utils.c b/opal/mca/shmem/posix/shmem_posix_common_utils.c new file mode 100644 index 0000000000..3c701d8626 --- /dev/null +++ b/opal/mca/shmem/posix/shmem_posix_common_utils.c @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. + * All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include +#ifdef HAVE_FCNTL_H +#include +#endif /* HAVE_FCNTL_H */ +#ifdef HAVE_SYS_MMAN_H +#include +#endif /* HAVE_SYS_MMAN_H */ +#ifdef HAVE_UNISTD_H +#include +#endif /* HAVE_UNISTD_H */ +#ifdef HAVE_SYS_TYPES_H +#include +#endif /* HAVE_SYS_TYPES_H */ + +#include "opal/util/output.h" +#include "opal/util/show_help.h" +#include "opal/mca/shmem/shmem.h" + +#include "shmem_posix.h" +#include "shmem_posix_common_utils.h" + +/* ////////////////////////////////////////////////////////////////////////// */ +int +shmem_posix_shm_open(char *posix_file_name_buff, size_t size) +{ + int attempt = 0, fd = -1; + + /* workaround for simultaneous posix shm_opens on the same node (e.g. + * multiple Open MPI jobs sharing a node). name collision during component + * runtime will happen, so protect against it by trying a few times. + */ + do { + /* format: /open_mpi.nnnn + * see comment in shmem_posix.h that explains why we chose to do things + * this way. + */ + snprintf(posix_file_name_buff, size, "%s%04d", + OPAL_SHMEM_POSIX_FILE_NAME_PREFIX, attempt++); + /* the check for the existence of the object and its * creation if it + * does not exist are performed atomically. + */ + if (-1 == (fd = shm_open(posix_file_name_buff, + O_CREAT | O_EXCL | O_RDWR, 0600))) { + int err = errno; + /* the object already exists, so try again with a new name */ + if (EEXIST == err) { + continue; + } + /* a real error occurred, notify the user and set fd to -1 */ + else { + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-posix.txt", "sys call fail", 1, + hn, "shm_open(2)", posix_file_name_buff, + strerror(err), err); + fd = -1; + break; + } + } + /* we found an available file name */ + else { + break; + } + } while (attempt < OPAL_SHMEM_POSIX_MAX_ATTEMPTS); + + /* if we didn't find a name, let the user know that we tried and failed */ + if (attempt >= OPAL_SHMEM_POSIX_MAX_ATTEMPTS) { + opal_output(0, "shmem: posix: file name search - max attempts exceeded." + "cannot continue with posix.\n"); + } + return fd; +} + diff --git a/opal/mca/shmem/posix/shmem_posix_common_utils.h b/opal/mca/shmem/posix/shmem_posix_common_utils.h new file mode 100644 index 0000000000..e83a2bc736 --- /dev/null +++ b/opal/mca/shmem/posix/shmem_posix_common_utils.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2010 IBM Corporation. All rights reserved. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/** + * @file + * + * shmem (shared memory backing facility) framework utilities + */ + +#ifndef OPAL_SHMEM_POSIX_COMMON_UTILS_H +#define OPAL_SHMEM_POSIX_COMMON_UTILS_H + +BEGIN_C_DECLS + +#include "opal_config.h" + +/** + * this routine searches for an available shm_open file name. + * + * @return if successful, a non-negative file descriptor is returned and + * posix_file_name_buff will contain the file name associated with the + * successful shm_open. otherwise, -1 is returned and the contents of + * posix_file_name_buff are undefined. + */ +OPAL_DECLSPEC extern int shmem_posix_shm_open(char *posix_file_name_buff, + size_t size); + +END_C_DECLS + +#endif /* OPAL_SHMEM_POSIX_COMMON_UTILS_H */ diff --git a/opal/mca/shmem/posix/shmem_posix_component.c b/opal/mca/shmem/posix/shmem_posix_component.c new file mode 100644 index 0000000000..98729bfca1 --- /dev/null +++ b/opal/mca/shmem/posix/shmem_posix_component.c @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "opal_config.h" + +#ifdef HAVE_SYS_MMAN_H +#include +#endif /* HAVE_SYS_MMAN_H */ +#ifdef HAVE_UNISTD_H +#include +#endif /* HAVE_UNISTD_H */ + +#include "opal/constants.h" +#include "opal/util/show_help.h" +#include "opal/util/output.h" +#include "opal/mca/shmem/base/base.h" +#include "opal/mca/shmem/shmem.h" +#include "shmem_posix.h" +#include "shmem_posix_common_utils.h" + +/* public string showing the shmem ompi_posix component version number */ +const char *opal_shmem_posix_component_version_string = + "OPAL posix shmem MCA component version " OPAL_VERSION; + +/* local functions */ +static int posix_open(void); +static int posix_query(mca_base_module_t **module, int *priority); +static int posix_runtime_query(mca_base_module_t **module, + int *priority, + const char *hint); + +/* local variables */ +static bool rt_successful = false; + +/* instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +opal_shmem_posix_component_t mca_shmem_posix_component = { + /* ////////////////////////////////////////////////////////////////////// */ + /* super */ + /* ////////////////////////////////////////////////////////////////////// */ + { + /* common MCA component data */ + { + OPAL_SHMEM_BASE_VERSION_2_0_0, + + /* component name and version */ + "posix", + OPAL_MAJOR_VERSION, + OPAL_MINOR_VERSION, + OPAL_RELEASE_VERSION, + + /* component open */ + posix_open, + /* component close */ + NULL, + /* component query */ + posix_query + }, + /* MCA v2.0.0 component meta data */ + { + /* the component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + posix_runtime_query, + }, + /* ////////////////////////////////////////////////////////////////////// */ + /* posix component-specific information */ + /* see: shmem_posix.h for more information */ + /* ////////////////////////////////////////////////////////////////////// */ + /* (default) priority - set lower than mmap's priority */ + 40 +}; + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +posix_open(void) +{ + mca_base_param_reg_int( + &mca_shmem_posix_component.super.base_version, + "priority", "Priority of the posix shmem component", false, false, + mca_shmem_posix_component.priority, &mca_shmem_posix_component.priority + ); + + return OPAL_SUCCESS; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +/** + * this routine performs a test that indicates whether or not posix shared + * memory can safely be used during this run. + * note: that we want to run this test as few times as possible. + * + * @return OPAL_SUCCESS when posix can safely be used. + */ +static int +posix_runtime_query(mca_base_module_t **module, + int *priority, + const char *hint) +{ + char tmp_buff[OPAL_SHMEM_POSIX_FILE_LEN_MAX]; + int fd = -1; + + *priority = 0; + *module = NULL; + + /* if hint isn't null, then someone else already figured out who is the + * best runnable component is AND the caller is relaying that info so we + * don't have to perform a run-time query. + */ + if (NULL != hint) { + OPAL_OUTPUT_VERBOSE( + (70, opal_shmem_base_output, + "shmem: posix: runtime_query: " + "attempting to use runtime hint (%s)\n", hint) + ); + /* was i selected? if so, then we are done. + * otherwise, disqualify myself. + */ + if (0 == strcasecmp(hint, + mca_shmem_posix_component.super.base_version.mca_component_name)) { + *priority = mca_shmem_posix_component.priority; + *module = (mca_base_module_t *)&opal_shmem_posix_module.super; + return OPAL_SUCCESS; + } + else { + *priority = 0; + *module = NULL; + return OPAL_SUCCESS; + } + } + /* if we are here, then perform a run-time query because we didn't get a + * hint. it's either up to us to figure it out, or the caller wants us to + * re-run the runtime query. + */ + /* shmem_posix_shm_open successfully shm_opened - we can use posix sm! */ + if (-1 != (fd = shmem_posix_shm_open(tmp_buff, + OPAL_SHMEM_POSIX_FILE_LEN_MAX -1))) { + /* free up allocated resources before we return */ + if (0 != shm_unlink(tmp_buff)) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-posix.txt", "sys call fail", 1, + hn, "shm_unlink(2)", "", strerror(err), err); + /* something strange happened, so consider this a run-time test + * failure even though shmem_posix_shm_open was successful */ + } + /* all is well */ + else { + *priority = mca_shmem_posix_component.priority; + *module = (mca_base_module_t *)&opal_shmem_posix_module.super; + rt_successful = true; + } + } + + return OPAL_SUCCESS; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +posix_query(mca_base_module_t **module, int *priority) +{ + *priority = mca_shmem_posix_component.priority; + *module = (mca_base_module_t *)&opal_shmem_posix_module.super; + return OPAL_SUCCESS; +} + diff --git a/opal/mca/shmem/posix/shmem_posix_module.c b/opal/mca/shmem/posix/shmem_posix_module.c new file mode 100644 index 0000000000..7608a3c5e0 --- /dev/null +++ b/opal/mca/shmem/posix/shmem_posix_module.c @@ -0,0 +1,450 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. + * All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include +#ifdef HAVE_FCNTL_H +#include +#endif /* HAVE_FCNTL_H */ +#ifdef HAVE_SYS_MMAN_H +#include +#endif /* HAVE_SYS_MMAN_H */ +#ifdef HAVE_UNISTD_H +#include +#endif /* HAVE_UNISTD_H */ +#ifdef HAVE_SYS_TYPES_H +#include +#endif /* HAVE_SYS_TYPES_H */ +#ifdef HAVE_STRING_H +#include +#endif /* HAVE_STRING_H */ + +#include "opal/constants.h" +#include "opal_stdint.h" +#include "opal/util/output.h" +#include "opal/util/path.h" +#include "opal/util/show_help.h" +#include "opal/mca/base/mca_base_param.h" +#include "opal/mca/shmem/shmem.h" +#include "opal/mca/shmem/base/base.h" + +#include "shmem_posix.h" +#include "shmem_posix_common_utils.h" + +/* for tons of debug output: -mca shmem_base_verbose 70 */ + +/* ////////////////////////////////////////////////////////////////////////// */ +/* local functions */ +static int +module_init(void); + +static int +segment_create(opal_shmem_ds_t *ds_buf, + const char *file_name, + size_t size); + +static int +ds_copy(const opal_shmem_ds_t *from, + opal_shmem_ds_t *to); + +static void * +segment_attach(opal_shmem_ds_t *ds_buf); + +static int +segment_detach(opal_shmem_ds_t *ds_buf); + +static int +segment_unlink(opal_shmem_ds_t *ds_buf); + +static int +module_finalize(void); + +/* posix shmem module */ +opal_shmem_posix_module_t opal_shmem_posix_module = { + /* super */ + { + module_init, + segment_create, + ds_copy, + segment_attach, + segment_detach, + segment_unlink, + module_finalize + } +}; + +/* ////////////////////////////////////////////////////////////////////////// */ +/* private utility functions */ +/* ////////////////////////////////////////////////////////////////////////// */ + +/* ////////////////////////////////////////////////////////////////////////// */ +/** + * completely resets the contents of *ds_buf + */ +static inline void +shmem_ds_reset(opal_shmem_ds_t *ds_buf) +{ + OPAL_OUTPUT_VERBOSE( + (70, opal_shmem_base_output, + "%s: %s: shmem_ds_resetting " + "(opid: %lu id: %d, size: %"PRIsize_t", name: %s)\n", + mca_shmem_posix_component.super.base_version.mca_type_name, + mca_shmem_posix_component.super.base_version.mca_component_name, + (unsigned long)ds_buf->opid, ds_buf->seg_id, ds_buf->seg_size, + ds_buf->seg_name) + ); + + ds_buf->opid = 0; + ds_buf->seg_cpid = 0; + OPAL_SHMEM_DS_RESET_FLAGS(ds_buf); + ds_buf->seg_id = OPAL_SHMEM_DS_ID_INVALID; + ds_buf->seg_size = 0; + memset(ds_buf->seg_name, '\0', OPAL_PATH_MAX); +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +module_init(void) +{ + /* nothing to do */ + return OPAL_SUCCESS; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +module_finalize(void) +{ + /* nothing to do */ + return OPAL_SUCCESS; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +ds_copy(const opal_shmem_ds_t *from, + opal_shmem_ds_t *to) +{ + pid_t my_pid = getpid(); + + /* inter-process copy - exclude process-specific data */ + if (from->opid != my_pid) { + /* mask out internal flags */ + to->flags = (from->flags & OPAL_SHMEM_DS_FLAGS_INTERNAL_MASK); + to->seg_base_addr = NULL; + } + /* i am the owner process, so i can safely copy all the information */ + else { + to->flags = from->flags; + to->seg_base_addr = from->seg_base_addr; + } + + to->opid = my_pid; + to->seg_id = from->seg_id; + to->seg_size = from->seg_size; + to->seg_cpid = from->seg_cpid; + memcpy(to->seg_name, from->seg_name, OPAL_PATH_MAX); + + OPAL_OUTPUT_VERBOSE( + (70, opal_shmem_base_output, + "%s: %s: ds_copy complete " + "from: (opid: %lu, id: %d, size: %"PRIsize_t", " + "name: %s flags: 0x%02x) " + "to: (opid: %lu, id: %d, size: %"PRIsize_t", " + "name: %s flags: 0x%02x)\n", + mca_shmem_posix_component.super.base_version.mca_type_name, + mca_shmem_posix_component.super.base_version.mca_component_name, + (unsigned long)from->opid, from->seg_id, from->seg_size, + from->seg_name, from->flags, (unsigned long)to->opid, to->seg_id, + to->seg_size, to->seg_name, to->flags) + ); + + return OPAL_SUCCESS; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +segment_create(opal_shmem_ds_t *ds_buf, + const char *file_name, + size_t size) +{ + int rc = OPAL_SUCCESS; + pid_t my_pid = getpid(); + /* the real size of the shared memory segment. this includes enough space + * to store our segment header. + */ + size_t real_size = size + sizeof(opal_shmem_seg_hdr_t); + opal_shmem_seg_hdr_t *seg_hdrp = MAP_FAILED; + + /* init the contents of opal_shmem_ds_t */ + shmem_ds_reset(ds_buf); + + /* for posix shared memory we don't have to worry about the backing store + * being located on a network file system... so no check is needed here. + */ + + /* calling shmem_posix_shm_open searches for an available posix shared + * memory object name and upon successful completion populates the name + * buffer + */ + if (-1 == (ds_buf->seg_id = shmem_posix_shm_open( + ds_buf->seg_name, + OPAL_SHMEM_POSIX_FILE_LEN_MAX - 1))) { + /* snaps! something happened in posix_shm_open. don't report anything + * here because posix_shm_open will display all the necessary info. + */ + rc = OPAL_ERROR; + goto out; + } + /* size backing file - note the use of real_size here */ + else if (0 != ftruncate(ds_buf->seg_id, real_size)) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-posix.txt", "sys call fail", 1, hn, + "ftruncate(2)", "", strerror(err), err); + rc = OPAL_ERROR; + goto out; + } + else if (MAP_FAILED == (seg_hdrp = mmap(NULL, real_size, + PROT_READ | PROT_WRITE, MAP_SHARED, + ds_buf->seg_id, 0))) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-posix.txt", "sys call fail", 1, hn, + "mmap(2)", "", strerror(err), err); + rc = OPAL_ERROR; + goto out; + } + /* all is well */ + else { + /* -- initialize the shared memory segment -- */ + opal_atomic_rmb(); + + /* init segment lock */ + opal_atomic_init(&seg_hdrp->lock, OPAL_ATOMIC_UNLOCKED); + /* i was the creator of this segment, so note that fact */ + seg_hdrp->cpid = my_pid; + + opal_atomic_wmb(); + + /* -- initialize the contents of opal_shmem_ds_t -- */ + ds_buf->opid = my_pid; + ds_buf->seg_cpid = my_pid; + ds_buf->seg_size = real_size; + ds_buf->seg_base_addr = (unsigned char *)seg_hdrp; + + /* notice that we are not setting ds_buf->name here. at this point, + * posix_shm_open was successful, so the contents of ds_buf->name are + * already set for us :-) + */ + + /* set "valid" bit because setment creation was successful */ + OPAL_SHMEM_DS_SET_VALID(ds_buf); + + OPAL_OUTPUT_VERBOSE( + (70, opal_shmem_base_output, + "%s: %s: create successful " + "(opid: %lu id: %d, size: %"PRIsize_t", name: %s)\n", + mca_shmem_posix_component.super.base_version.mca_type_name, + mca_shmem_posix_component.super.base_version.mca_component_name, + (unsigned long)ds_buf->opid, ds_buf->seg_id, ds_buf->seg_size, + ds_buf->seg_name) + ); + } + +out: + /* in this component, the id is the file descriptor returned by open. this + * check is here to see if it is safe to call close on the file descriptor. + * that is, we are making sure that our call to open was successful and + * we are not not in an error path. + */ + if (-1 != ds_buf->seg_id) { + if (0 != close(ds_buf->seg_id)) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-mmap.txt", "sys call fail", 1, hn, + "close(2)", "", strerror(err), err); + rc = OPAL_ERROR; + } + } + /* an error occured, so invalidate the shmem object and release any + * allocated resources. + */ + if (OPAL_SUCCESS != rc) { + /* posix_shm_open was successful, but something else wasn't. + * note: if the id is not equal to -1 and we are here, name will be + * valid. that is, we can safely call shm_unlink with ds_buf->name. + */ + if (-1 != ds_buf->seg_id) { + shm_unlink(ds_buf->seg_name); + } + if (MAP_FAILED != seg_hdrp) { + munmap(seg_hdrp, real_size); + } + /* always invalidate in this error path */ + shmem_ds_reset(ds_buf); + } + return rc; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +/** + * segment_attach can only be called after a successful call to segment_create + */ +static void * +segment_attach(opal_shmem_ds_t *ds_buf) +{ + pid_t my_pid = getpid(); + + if (my_pid != ds_buf->seg_cpid) { + if (-1 == (ds_buf->seg_id = shm_open(ds_buf->seg_name, O_RDWR, 0600))) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-posix.txt", "sys call fail", 1, hn, + "open(2)", "", strerror(err), err); + return NULL; + } + else if (MAP_FAILED == (ds_buf->seg_base_addr = + mmap(NULL, ds_buf->seg_size, + PROT_READ | PROT_WRITE, MAP_SHARED, + ds_buf->seg_id, 0))) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-posix.txt", "sys call fail", 1, hn, + "mmap(2)", "", strerror(err), err); + /* mmap failed, so shm_unlink and return NULL - no error check here + * because we are already in an error path... + */ + shm_unlink(ds_buf->seg_name); + return NULL; + } + /* all is well */ + else { + /* if close fails here, that's okay. just let the user know and + * continue. if we got this far, open and mmap were successful... + */ + if (0 != close(ds_buf->seg_id)) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-mmap.txt", "sys call fail", 1, + hn, "close(2)", "", strerror(err), err); + } + } + } + /* else i was the segment creator. nothing to do here because all the hard + * work was done in segment_create :-). + */ + + OPAL_OUTPUT_VERBOSE( + (70, opal_shmem_base_output, + "%s: %s: attach successful " + "(opid: %lu id: %d, size: %"PRIsize_t", name: %s)\n", + mca_shmem_posix_component.super.base_version.mca_type_name, + mca_shmem_posix_component.super.base_version.mca_component_name, + (unsigned long)ds_buf->opid, ds_buf->seg_id, ds_buf->seg_size, + ds_buf->seg_name) + ); + + /* update returned base pointer with an offset that hides our stuff */ + return (ds_buf->seg_base_addr + sizeof(opal_shmem_seg_hdr_t)); +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +segment_detach(opal_shmem_ds_t *ds_buf) +{ + int rc = OPAL_SUCCESS; + + OPAL_OUTPUT_VERBOSE( + (70, opal_shmem_base_output, + "%s: %s: detaching " + "(opid: %lu id: %d, size: %"PRIsize_t", name: %s)\n", + mca_shmem_posix_component.super.base_version.mca_type_name, + mca_shmem_posix_component.super.base_version.mca_component_name, + (unsigned long)ds_buf->opid, ds_buf->seg_id, ds_buf->seg_size, + ds_buf->seg_name) + ); + + if (0 != munmap(ds_buf->seg_base_addr, ds_buf->seg_size)) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-posix.txt", "sys call fail", 1, hn, + "munmap(2)", "", strerror(err), err); + rc = OPAL_ERROR; + } + /* reset the contents of the opal_shmem_ds_t associated with this + * shared memory segment. + */ + shmem_ds_reset(ds_buf); + return rc; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +segment_unlink(opal_shmem_ds_t *ds_buf) +{ + OPAL_OUTPUT_VERBOSE( + (70, opal_shmem_base_output, + "%s: %s: unlinking " + "(opid: %lu id: %d, size: %"PRIsize_t", name: %s)\n", + mca_shmem_posix_component.super.base_version.mca_type_name, + mca_shmem_posix_component.super.base_version.mca_component_name, + (unsigned long)ds_buf->opid, ds_buf->seg_id, ds_buf->seg_size, + ds_buf->seg_name) + ); + + if (-1 == shm_unlink(ds_buf->seg_name)) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-posix.txt", "sys call fail", 1, hn, + "shm_unlink(2)", ds_buf->seg_name, strerror(err), err); + return OPAL_ERROR; + } + + /* don't completely reset the opal_shmem_ds_t. in particular, only reset + * the id and flip the invalid bit. size and name values will remain valid + * across unlinks. other information stored in flags will remain untouched. + */ + ds_buf->seg_id = OPAL_SHMEM_DS_ID_INVALID; + /* note: this is only chaning the valid bit to 0. this is not the same + * as calling invalidate(ds_buf). + */ + OPAL_SHMEM_DS_INVALIDATE(ds_buf); + return OPAL_SUCCESS; +} + diff --git a/opal/mca/shmem/shmem.h b/opal/mca/shmem/shmem.h new file mode 100644 index 0000000000..34e0037fb8 --- /dev/null +++ b/opal/mca/shmem/shmem.h @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2010 IBM Corporation. All rights reserved. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/** + * @file + * + * shmem (shared memory backing facility) framework component interface + * definitions. + * + * usage example: see ompi/mca/common/sm + * + * The module has the following functions: + * + * - module_init + * - segment_create + * - ds_copy + * - segment_attach + * - segment_detach + * - unlink + * - module_finalize + */ + +#ifndef OPAL_SHMEM_H +#define OPAL_SHMEM_H + +#include "opal_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" + +#include "opal/mca/shmem/shmem_types.h" + +BEGIN_C_DECLS + +/* ////////////////////////////////////////////////////////////////////////// */ +typedef int +(*mca_shmem_base_component_runtime_query_fn_t)(mca_base_module_t **module, + int *priority, + const char *hint); + +/* structure for shmem components. */ +struct opal_shmem_base_component_2_0_0_t { + /* base MCA component */ + mca_base_component_t base_version; + /* base MCA data */ + mca_base_component_data_t base_data; + /* component runtime query */ + mca_shmem_base_component_runtime_query_fn_t runtime_query; +}; + +/* convenience typedefs */ +typedef struct opal_shmem_base_component_2_0_0_t +opal_shmem_base_component_2_0_0_t; + +typedef struct opal_shmem_base_component_2_0_0_t opal_shmem_base_component_t; + +/* ////////////////////////////////////////////////////////////////////////// */ +/* shmem API function pointers */ + +/** + * module initialization function. + * @return OPAL_SUCCESS on success. + */ +typedef int +(*opal_shmem_base_module_init_fn_t)(void); + +/** + * copy shmem data structure information pointed to by from to the structure + * pointed to by to. + * + * @param from source pointer (IN). + * + * @param to destination pointer (OUT). + * + * @return OPAL_SUCCESS on success. + */ +typedef int +(*opal_shmem_base_ds_copy_fn_t)(const opal_shmem_ds_t *from, + opal_shmem_ds_t *to); + +/** + * create a new shared memory segment and initialize members in structure + * pointed to by ds_buf. + * + * @param ds_buf pointer to opal_shmem_ds_t typedef'd structure + * defined in shmem_types.h (OUT). + * + * @param file_name file_name unique string identifier that must be a valid, + * writable path (IN). + * + * @param size size of the shared memory segment. + * + * @return OPAL_SUCCESS on success. + */ +typedef int +(*opal_shmem_base_module_segment_create_fn_t)(opal_shmem_ds_t *ds_buf, + const char *file_name, + size_t size); + +/** + * attach to an existing shared memory segment initialized by segment_create. + * + * @param ds_buf pointer to initialized opal_shmem_ds_t typedef'd + * structure (IN/OUT). + * + * @return base address of shared memory segment on success. returns + * NULL otherwise. + */ +typedef void * +(*opal_shmem_base_module_segment_attach_fn_t)(opal_shmem_ds_t *ds_buf); + +/** + * detach from an existing shared memory segment. + * + * @param ds_buf pointer to initialized opal_shmem_ds_t typedef'd structure + * (IN/OUT). + * + * @return OPAL_SUCCESS on success. + */ +typedef int +(*opal_shmem_base_module_segment_detach_fn_t)(opal_shmem_ds_t *ds_buf); + +/** + * unlink an existing shared memory segment. + * + * @param ds_buf pointer to initialized opal_shmem_ds_t typedef'd structure + * (IN/OUT). + * + * @return OPAL_SUCCESS on success. + */ +typedef int +(*opal_shmem_base_module_unlink_fn_t)(opal_shmem_ds_t *ds_buf); + +/** + * module finalize function. invoked by the base on the selected + * module when the shmem framework is being shut down. + */ +typedef int (*opal_shmem_base_module_finalize_fn_t)(void); + +/** + * structure for shmem modules + */ +struct opal_shmem_base_module_2_0_0_t { + opal_shmem_base_module_init_fn_t module_init; + opal_shmem_base_module_segment_create_fn_t segment_create; + opal_shmem_base_ds_copy_fn_t ds_copy; + opal_shmem_base_module_segment_attach_fn_t segment_attach; + opal_shmem_base_module_segment_detach_fn_t segment_detach; + opal_shmem_base_module_unlink_fn_t unlink; + opal_shmem_base_module_finalize_fn_t module_finalize; +}; + +/** + * convenience typedefs + */ +typedef struct opal_shmem_base_module_2_0_0_t opal_shmem_base_module_2_0_0_t; +typedef struct opal_shmem_base_module_2_0_0_t opal_shmem_base_module_t; + +/** + * macro for use in components that are of type shmem + * see: opal/mca/mca.h for more information + */ +#define OPAL_SHMEM_BASE_VERSION_2_0_0 \ + MCA_BASE_VERSION_2_0_0, \ + "shmem", 2, 0, 0 + +END_C_DECLS + +#endif /* OPAL_SHMEM_H */ diff --git a/opal/mca/shmem/shmem_types.h b/opal/mca/shmem/shmem_types.h new file mode 100644 index 0000000000..aadb9bd153 --- /dev/null +++ b/opal/mca/shmem/shmem_types.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2010 IBM Corporation. All rights reserved. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/** + * @file + * + * shmem (shared memory backing facility) framework types, convenience macros, + * etc. + */ + +#ifndef OPAL_SHMEM_TYPES_H +#define OPAL_SHMEM_TYPES_H + +#include "opal_config.h" + +BEGIN_C_DECLS + +/* ////////////////////////////////////////////////////////////////////////// */ +/** + * ds_buf: pointer to opal_shmem_ds_t typedef'd struct + */ + +/** + * flag indicating the state (valid/invalid) of the shmem data structure + * 0x0* - reserved for non-internal flags + */ +#define OPAL_SHMEM_DS_FLAGS_VALID 0x01 + +/** + * 0x1* - reserved for internal flags. that is, flags that will NOT be + * propagated via ds_copy during inter-process information sharing. + */ + +/** + * masks out internal flags + */ +#define OPAL_SHMEM_DS_FLAGS_INTERNAL_MASK 0x0F + +/** + * invalid id value + */ +#define OPAL_SHMEM_DS_ID_INVALID -1 + +/** + * macro that sets all bits in flags to 0 + */ +#define OPAL_SHMEM_DS_RESET_FLAGS(ds_buf) \ +do { \ + (ds_buf)->flags = 0x00; \ +} while (0) + +/** + * sets valid bit in flags to 1 + */ +#define OPAL_SHMEM_DS_SET_VALID(ds_buf) \ +do { \ + (ds_buf)->flags |= OPAL_SHMEM_DS_FLAGS_VALID; \ +} while (0) + +#define OPAL_SHMEM_DS_SET_CREATOR(ds_buf) \ +do { \ + (ds_buf)->flags |= OPAL_SHMEM_DS_FLAGS_CREATOR; \ +} while (0) + +/** + * sets valid bit in flags to 0 + */ +#define OPAL_SHMEM_DS_INVALIDATE(ds_buf) \ +do { \ + (ds_buf)->flags &= ~OPAL_SHMEM_DS_FLAGS_VALID; \ +} while (0) + +/** + * evaluates to 1 if the valid bit in flags is set to 1. evaluates to 0 + * otherwise. + */ +#define OPAL_SHMEM_DS_IS_VALID(ds_buf) \ + ( (ds_buf)->flags & OPAL_SHMEM_DS_FLAGS_VALID ) + +#define OPAL_SHMEM_DS_IS_CREATOR(ds_buf) \ + ( (ds_buf)->flags & OPAL_SHMEM_DS_FLAGS_CREATOR ) + +/* ////////////////////////////////////////////////////////////////////////// */ +typedef uint8_t opal_shmem_ds_flag_t; + +/* shared memory segment header */ +struct opal_shmem_seg_hdr_t { + /* segment lock */ + opal_atomic_lock_t lock; + /* pid of the segment creator */ + pid_t cpid; +}; +typedef struct opal_shmem_seg_hdr_t opal_shmem_seg_hdr_t; + +struct opal_shmem_ds_t { + /* owner pid of the opal_shmem_ds_t */ + pid_t opid; + /* state flags */ + opal_shmem_ds_flag_t flags; + /* pid of the shared memory segment creator */ + pid_t seg_cpid; + /* ds id */ + int seg_id; + /* size of shared memory segment */ + size_t seg_size; + /* path to backing store */ + char seg_name[OPAL_PATH_MAX]; + /* base address of shared memory segment */ + unsigned char *seg_base_addr; +}; +typedef struct opal_shmem_ds_t opal_shmem_ds_t; + +END_C_DECLS + +#endif /* OPAL_SHMEM_TYPES_H */ diff --git a/opal/mca/shmem/sysv/Makefile.am b/opal/mca/shmem/sysv/Makefile.am new file mode 100644 index 0000000000..8dfcdc083e --- /dev/null +++ b/opal/mca/shmem/sysv/Makefile.am @@ -0,0 +1,48 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010-2011 Los Alamos National Security, LLC. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +sources = \ + shmem_sysv.h \ + shmem_sysv_component.c \ + shmem_sysv_module.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_opal_shmem_sysv_DSO +component_noinst = +component_install = mca_shmem_sysv.la +else +component_noinst = libmca_shmem_sysv.la +component_install = +endif + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_shmem_sysv_la_SOURCES = $(sources) +mca_shmem_sysv_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_shmem_sysv_la_SOURCES = $(sources) +libmca_shmem_sysv_la_LDFLAGS = -module -avoid-version + +# help file +dist_pkgdata_DATA = help-opal-shmem-sysv.txt diff --git a/opal/mca/shmem/sysv/configure.m4 b/opal/mca/shmem/sysv/configure.m4 new file mode 100644 index 0000000000..4d9065f130 --- /dev/null +++ b/opal/mca/shmem/sysv/configure.m4 @@ -0,0 +1,50 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010-2011 Los Alamos National Security, LLC. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_opal_shmem_sysv_CONFIG(action-if-can-compile, +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_opal_shmem_sysv_CONFIG],[ + AC_CONFIG_FILES([opal/mca/shmem/sysv/Makefile]) + + # do we have the sysv shm stuff? + AC_MSG_CHECKING([if want SYSV shared memory support]) + AC_ARG_ENABLE(sysv-shmem, + AC_HELP_STRING([--disable-sysv-shmem], + [disable sysv shared memory support (default: enabled)])) + AS_IF([test "$enable_sysv_shmem" = "no"], + [AC_MSG_RESULT([no]) + shmem_sysv_sm_build_sysv=0], + [AC_MSG_RESULT([yes]) + AC_CHECK_FUNC(shmget, + [shmem_sysv_sm_build_sysv=1], + [shmem_sysv_sm_build_sysv=0])]) + AS_IF([test "$enable_sysv_shmem" = "yes" -a "$shmem_sysv_sm_build_sysv" = "0"], + [AC_MSG_WARN([SYSV shared memory support requested but not found]) + AC_MSG_ERROR([Cannot continue])]) + + AS_IF([test "$shmem_sysv_sm_build_sysv" = "1"], [$1], [$2]) + + AC_DEFINE_UNQUOTED([OPAL_SHMEM_SYSV], + [$shmem_sysv_sm_build_sysv], + [Whether we have shared memory support for SYSV or not]) +])dnl diff --git a/opal/mca/shmem/sysv/help-opal-shmem-sysv.txt b/opal/mca/shmem/sysv/help-opal-shmem-sysv.txt new file mode 100644 index 0000000000..ffa4585fb8 --- /dev/null +++ b/opal/mca/shmem/sysv/help-opal-shmem-sysv.txt @@ -0,0 +1,22 @@ +# -*- text -*- +# +# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010-2011 Los Alamos National Security, LLC. +# All rights reserved. +# +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# This is the US/English help file for Open MPI's common shmem support. +# +[sys call fail] +A system call failed during shared memory initialization that should +not have. It is likely that your MPI job will now either abort or +experience performance degradation. + + Local host: %s + System call: %s %s + Error: %s (errno %d) diff --git a/opal/mca/shmem/sysv/shmem_sysv.h b/opal/mca/shmem/sysv/shmem_sysv.h new file mode 100644 index 0000000000..b2bb3719d5 --- /dev/null +++ b/opal/mca/shmem/sysv/shmem_sysv.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_SHMEM_SYSV_EXPORT_H +#define MCA_SHMEM_SYSV_EXPORT_H + +#include "opal_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/shmem/shmem.h" + +BEGIN_C_DECLS + +/** + * globally exported variable to hold the sysv component. + */ +typedef struct opal_shmem_sysv_component_t { + /* base component struct */ + opal_shmem_base_component_t super; + /* priority for sysv component */ + int priority; +} opal_shmem_sysv_component_t; + +OPAL_MODULE_DECLSPEC extern opal_shmem_sysv_component_t +mca_shmem_sysv_component; + +typedef struct opal_shmem_sysv_module_t { + opal_shmem_base_module_t super; +} opal_shmem_sysv_module_t; +extern opal_shmem_sysv_module_t opal_shmem_sysv_module; + +END_C_DECLS + +#endif /* MCA_SHMEM_SYSV_EXPORT_H */ diff --git a/opal/mca/shmem/sysv/shmem_sysv_component.c b/opal/mca/shmem/sysv/shmem_sysv_component.c new file mode 100644 index 0000000000..bfe2ffc8e5 --- /dev/null +++ b/opal/mca/shmem/sysv/shmem_sysv_component.c @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "opal_config.h" + +#ifdef HAVE_SYS_MMAN_H +#include +#endif /* HAVE_SYS_MMAN_H */ +#ifdef HAVE_UNISTD_H +#include +#endif /* HAVE_UNISTD_H */ + +#ifdef HAVE_SYS_IPC_H +#include +#endif /* HAVE_SYS_IPC_H */ +#if HAVE_SYS_SHM_H +#include +#endif /* HAVE_SYS_SHM_H */ + +#include "opal/constants.h" +#include "opal/util/show_help.h" +#include "opal/util/output.h" +#include "opal/mca/shmem/base/base.h" +#include "opal/mca/shmem/shmem.h" +#include "shmem_sysv.h" + +/* public string showing the shmem ompi_sysv component version number */ +const char *opal_shmem_sysv_component_version_string = + "OPAL sysv shmem MCA component version " OPAL_VERSION; + +/* local functions */ +static int sysv_open(void); +static int sysv_query(mca_base_module_t **module, int *priority); +static int sysv_runtime_query(mca_base_module_t **module, + int *priority, + const char *hint); + +/* instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +opal_shmem_sysv_component_t mca_shmem_sysv_component = { + /* ////////////////////////////////////////////////////////////////////// */ + /* super */ + /* ////////////////////////////////////////////////////////////////////// */ + { + /* common MCA component data */ + { + OPAL_SHMEM_BASE_VERSION_2_0_0, + + /* component name and version */ + "sysv", + OPAL_MAJOR_VERSION, + OPAL_MINOR_VERSION, + OPAL_RELEASE_VERSION, + + /* component open */ + sysv_open, + /* component close */ + NULL, + /* component query */ + sysv_query + }, + /* MCA v2.0.0 component meta data */ + { + /* the component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + sysv_runtime_query, + }, + /* ////////////////////////////////////////////////////////////////////// */ + /* sysv component-specific information */ + /* see: shmem_sysv.h for more information */ + /* ////////////////////////////////////////////////////////////////////// */ + /* (default) priority - set lower than mmap's priority */ + 30 +}; + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +sysv_open(void) +{ + mca_base_param_reg_int( + &mca_shmem_sysv_component.super.base_version, + "priority", "Priority of the sysv shmem component", false, false, + mca_shmem_sysv_component.priority, &mca_shmem_sysv_component.priority + ); + + return OPAL_SUCCESS; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +/** + * this routine performs a test that indicates whether or not sysv shared + * memory can safely be used during this run. + * note: that we want to run this test as few times as possible. + * + * @return OPAL_SUCCESS when sysv can safely be used. + */ +static int +sysv_runtime_query(mca_base_module_t **module, int *priority, const char *hint) +{ + char c = 'j'; + int shmid = -1; + char *a = NULL; + char *addr = NULL; + struct shmid_ds tmp_buff; + + *priority = 0; + *module = NULL; + + /* if hint isn't null, then someone else already figured out who is the + * best runnable component is AND the caller is relaying that info so we + * don't have to perform a run-time query. + */ + if (NULL != hint) { + OPAL_OUTPUT_VERBOSE( + (70, opal_shmem_base_output, + "shmem: sysv: runtime_query: " + "attempting to use runtime hint (%s)\n", hint) + ); + /* was i selected? if so, then we are done. + * otherwise, disqualify myself. + */ + if (0 == strcasecmp(hint, + mca_shmem_sysv_component.super.base_version.mca_component_name)) { + *priority = mca_shmem_sysv_component.priority; + *module = (mca_base_module_t *)&opal_shmem_sysv_module.super; + return OPAL_SUCCESS; + } + else { + *priority = 0; + *module = NULL; + return OPAL_SUCCESS; + } + } + + /* if we are here, then let the run-time test games begin */ + + if (-1 == (shmid = shmget(IPC_PRIVATE, (size_t)(getpagesize()), + IPC_CREAT | IPC_EXCL | SHM_R | SHM_W))) { + goto out; + } + else if ((void *)-1 == (addr = shmat(shmid, NULL, 0))) { + goto out; + } + + /* protect against lazy establishment - may not be needed, but can't hurt */ + a = addr; + *a = c; + + if (-1 == shmctl(shmid, IPC_RMID, NULL)) { + goto out; + } + else if (-1 == shmctl(shmid, IPC_STAT, &tmp_buff)) { + goto out; + } + /* all is well - rainbows and butterflies */ + else { + *priority = mca_shmem_sysv_component.priority; + *module = (mca_base_module_t *)&opal_shmem_sysv_module.super; + } + +out: + if ((char *)-1 != addr) { + shmdt(addr); + } + return OPAL_SUCCESS; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +sysv_query(mca_base_module_t **module, int *priority) +{ + *priority = mca_shmem_sysv_component.priority; + *module = (mca_base_module_t *)&opal_shmem_sysv_module.super; + return OPAL_SUCCESS; +} + diff --git a/opal/mca/shmem/sysv/shmem_sysv_module.c b/opal/mca/shmem/sysv/shmem_sysv_module.c new file mode 100644 index 0000000000..50043ec116 --- /dev/null +++ b/opal/mca/shmem/sysv/shmem_sysv_module.c @@ -0,0 +1,404 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. + * All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include +#ifdef HAVE_FCNTL_H +#include +#endif /* HAVE_FCNTL_H */ +#ifdef HAVE_SYS_MMAN_H +#include +#endif /* HAVE_SYS_MMAN_H */ +#ifdef HAVE_UNISTD_H +#include +#endif /* HAVE_UNISTD_H */ +#ifdef HAVE_SYS_TYPES_H +#include +#endif /* HAVE_SYS_TYPES_H */ +#ifdef HAVE_SYS_IPC_H +#include +#endif /* HAVE_SYS_IPC_H */ +#if HAVE_SYS_SHM_H +#include +#endif /* HAVE_SYS_SHM_H */ +#ifdef HAVE_STRING_H +#include +#endif /* HAVE_STRING_H */ + +#include "opal/constants.h" +#include "opal_stdint.h" +#include "opal/util/output.h" +#include "opal/util/path.h" +#include "opal/util/show_help.h" +#include "opal/mca/base/mca_base_param.h" +#include "opal/mca/shmem/shmem.h" +#include "opal/mca/shmem/base/base.h" + +#include "shmem_sysv.h" + +/* for tons of debug output: -mca shmem_base_verbose 70 */ + +/* ////////////////////////////////////////////////////////////////////////// */ +/* local functions */ +static int +module_init(void); + +static int +segment_create(opal_shmem_ds_t *ds_buf, + const char *file_name, + size_t size); + +static int +ds_copy(const opal_shmem_ds_t *from, + opal_shmem_ds_t *to); + +static void * +segment_attach(opal_shmem_ds_t *ds_buf); + +static int +segment_detach(opal_shmem_ds_t *ds_buf); + +static int +segment_unlink(opal_shmem_ds_t *ds_buf); + +static int +module_finalize(void); + +/* sysv shmem module */ +opal_shmem_sysv_module_t opal_shmem_sysv_module = { + /* super */ + { + module_init, + segment_create, + ds_copy, + segment_attach, + segment_detach, + segment_unlink, + module_finalize + } +}; + +/* ////////////////////////////////////////////////////////////////////////// */ +/* private utility functions */ +/* ////////////////////////////////////////////////////////////////////////// */ + +/* ////////////////////////////////////////////////////////////////////////// */ +/** + * completely resets the contents of *ds_buf + */ +static inline void +shmem_ds_reset(opal_shmem_ds_t *ds_buf) +{ + OPAL_OUTPUT_VERBOSE( + (70, opal_shmem_base_output, + "%s: %s: shmem_ds_resetting " + "(opid: %lu id: %d, size: %"PRIsize_t", name: %s)\n", + mca_shmem_sysv_component.super.base_version.mca_type_name, + mca_shmem_sysv_component.super.base_version.mca_component_name, + (unsigned long)ds_buf->opid, ds_buf->seg_id, ds_buf->seg_size, + ds_buf->seg_name) + ); + + ds_buf->opid = 0; + ds_buf->seg_cpid = 0; + OPAL_SHMEM_DS_RESET_FLAGS(ds_buf); + ds_buf->seg_id = OPAL_SHMEM_DS_ID_INVALID; + ds_buf->seg_size = 0; + memset(ds_buf->seg_name, '\0', OPAL_PATH_MAX); +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +module_init(void) +{ + /* nothing to do */ + return OPAL_SUCCESS; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +module_finalize(void) +{ + /* nothing to do */ + return OPAL_SUCCESS; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +ds_copy(const opal_shmem_ds_t *from, + opal_shmem_ds_t *to) +{ + pid_t my_pid = getpid(); + + /* inter-process copy - exclude process-specific data */ + if (from->opid != my_pid) { + /* mask out internal flags */ + to->flags = (from->flags & OPAL_SHMEM_DS_FLAGS_INTERNAL_MASK); + to->seg_base_addr = NULL; + } + /* i am the owner process, so i can safely copy all the information */ + else { + to->flags = from->flags; + to->seg_base_addr = from->seg_base_addr; + } + + to->opid = my_pid; + to->seg_id = from->seg_id; + to->seg_size = from->seg_size; + to->seg_cpid = from->seg_cpid; + memcpy(to->seg_name, from->seg_name, OPAL_PATH_MAX); + + OPAL_OUTPUT_VERBOSE( + (70, opal_shmem_base_output, + "%s: %s: ds_copy complete " + "from: (opid: %lu, id: %d, size: %"PRIsize_t", " + "name: %s flags: 0x%02x) " + "to: (opid: %lu, id: %d, size: %"PRIsize_t", " + "name: %s flags: 0x%02x)\n", + mca_shmem_sysv_component.super.base_version.mca_type_name, + mca_shmem_sysv_component.super.base_version.mca_component_name, + (unsigned long)from->opid, from->seg_id, from->seg_size, + from->seg_name, from->flags, (unsigned long)to->opid, to->seg_id, + to->seg_size, to->seg_name, to->flags) + ); + + return OPAL_SUCCESS; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +segment_create(opal_shmem_ds_t *ds_buf, + const char *file_name, + size_t size) +{ + int rc = OPAL_SUCCESS; + pid_t my_pid = getpid(); + /* the real size of the shared memory segment. this includes enough space + * to store our segment header. + */ + size_t real_size = size + sizeof(opal_shmem_seg_hdr_t); + opal_shmem_seg_hdr_t *seg_hdrp = MAP_FAILED; + + /* init the contents of opal_shmem_ds_t */ + shmem_ds_reset(ds_buf); + + /* for sysv shared memory we don't have to worry about the backing store + * being located on a network file system... so no check is needed here. + */ + + /* create a new shared memory segment and save the shmid. note the use of + * real_size here + */ + if (-1 == (ds_buf->seg_id = shmget(IPC_PRIVATE, real_size, + IPC_CREAT | IPC_EXCL | SHM_R | SHM_W))) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-sysv.txt", "sys call fail", 1, hn, + "shmget(2)", "", strerror(err), err); + rc = OPAL_ERROR; + goto out; + } + /* attach to the sement */ + else if ((void *)-1 == (seg_hdrp = shmat(ds_buf->seg_id, NULL, 0))) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-sysv.txt", "sys call fail", 1, hn, + "shmat(2)", "", strerror(err), err); + shmctl(ds_buf->seg_id, IPC_RMID, NULL); + rc = OPAL_ERROR; + goto out; + } + /* mark the segment for destruction - if we are here, then the run-time + * component selection test detected adequate support for this type of + * thing. + */ + else if (0 != shmctl(ds_buf->seg_id, IPC_RMID, NULL)) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-sysv.txt", "sys call fail", 1, hn, + "shmctl(2)", "", strerror(err), err); + rc = OPAL_ERROR; + goto out; + } + /* all is well */ + else { + /* -- initialize the shared memory segment -- */ + opal_atomic_rmb(); + + /* init segment lock */ + opal_atomic_init(&seg_hdrp->lock, OPAL_ATOMIC_UNLOCKED); + /* i was the creator of this segment, so note that fact */ + seg_hdrp->cpid = my_pid; + + opal_atomic_wmb(); + + /* -- initialize the contents of opal_shmem_ds_t -- */ + ds_buf->opid = my_pid; + ds_buf->seg_cpid = my_pid; + ds_buf->seg_size = real_size; + ds_buf->seg_base_addr = (unsigned char *)seg_hdrp; + + /* notice that we are not setting ds_buf->name here. sysv doesn't use + * it, so don't worry about it - shmem_ds_reset took care of + * initialization, so we aren't passing garbage around. + */ + + /* set "valid" bit because setment creation was successful */ + OPAL_SHMEM_DS_SET_VALID(ds_buf); + + OPAL_OUTPUT_VERBOSE( + (70, opal_shmem_base_output, + "%s: %s: create successful " + "(opid: %lu id: %d, size: %"PRIsize_t", name: %s)\n", + mca_shmem_sysv_component.super.base_version.mca_type_name, + mca_shmem_sysv_component.super.base_version.mca_component_name, + (unsigned long)ds_buf->opid, ds_buf->seg_id, ds_buf->seg_size, + ds_buf->seg_name) + ); + } + +out: + /* an error occured, so invalidate the shmem object and release any + * allocated resources. + */ + if (OPAL_SUCCESS != rc) { + /* best effort to delete the segment. */ + if ((void *)-1 != seg_hdrp) { + shmdt(seg_hdrp); + } + shmctl(ds_buf->seg_id, IPC_RMID, NULL); + + /* always invalidate in this error path */ + shmem_ds_reset(ds_buf); + } + return rc; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +/** + * segment_attach can only be called after a successful call to segment_create + */ +static void * +segment_attach(opal_shmem_ds_t *ds_buf) +{ + pid_t my_pid = getpid(); + + if (my_pid != ds_buf->seg_cpid) { + if ((void *)-1 == (ds_buf->seg_base_addr = shmat(ds_buf->seg_id, NULL, + 0))) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-sysv.txt", "sys call fail", 1, hn, + "shmat(2)", "", strerror(err), err); + shmctl(ds_buf->seg_id, IPC_RMID, NULL); + return NULL; + } + } + /* else i was the segment creator. nothing to do here because all the hard + * work was done in segment_create :-). + */ + + OPAL_OUTPUT_VERBOSE( + (70, opal_shmem_base_output, + "%s: %s: attach successful " + "(opid: %lu id: %d, size: %"PRIsize_t", name: %s)\n", + mca_shmem_sysv_component.super.base_version.mca_type_name, + mca_shmem_sysv_component.super.base_version.mca_component_name, + (unsigned long)ds_buf->opid, ds_buf->seg_id, ds_buf->seg_size, + ds_buf->seg_name) + ); + + /* update returned base pointer with an offset that hides our stuff */ + return (ds_buf->seg_base_addr + sizeof(opal_shmem_seg_hdr_t)); +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +segment_detach(opal_shmem_ds_t *ds_buf) +{ + int rc = OPAL_SUCCESS; + + OPAL_OUTPUT_VERBOSE( + (70, opal_shmem_base_output, + "%s: %s: detaching " + "(opid: %lu id: %d, size: %"PRIsize_t", name: %s)\n", + mca_shmem_sysv_component.super.base_version.mca_type_name, + mca_shmem_sysv_component.super.base_version.mca_component_name, + (unsigned long)ds_buf->opid, ds_buf->seg_id, ds_buf->seg_size, + ds_buf->seg_name) + ); + + if (0 != shmdt(ds_buf->seg_base_addr)) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-sysv.txt", "sys call fail", 1, hn, + "shmdt(2)", "", strerror(err), err); + rc = OPAL_ERROR; + } + + /* reset the contents of the opal_shmem_ds_t associated with this + * shared memory segment. + */ + shmem_ds_reset(ds_buf); + return rc; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +segment_unlink(opal_shmem_ds_t *ds_buf) +{ + /* not much unlink work needed for sysv */ + + OPAL_OUTPUT_VERBOSE( + (70, opal_shmem_base_output, + "%s: %s: unlinking " + "(opid: %lu id: %d, size: %"PRIsize_t", name: %s)\n", + mca_shmem_sysv_component.super.base_version.mca_type_name, + mca_shmem_sysv_component.super.base_version.mca_component_name, + (unsigned long)ds_buf->opid, ds_buf->seg_id, ds_buf->seg_size, + ds_buf->seg_name) + ); + + /* don't completely reset the opal_shmem_ds_t. in particular, only reset + * the id and flip the invalid bit. size and name values will remain valid + * across unlinks. other information stored in flags will remain untouched. + */ + ds_buf->seg_id = OPAL_SHMEM_DS_ID_INVALID; + /* note: this is only chaning the valid bit to 0. this is not the same + * as calling invalidate(ds_buf). + */ + OPAL_SHMEM_DS_INVALIDATE(ds_buf); + return OPAL_SUCCESS; +} + diff --git a/opal/mca/shmem/windows/.windows b/opal/mca/shmem/windows/.windows new file mode 100644 index 0000000000..48475aa466 --- /dev/null +++ b/opal/mca/shmem/windows/.windows @@ -0,0 +1,11 @@ +# +# Copyright (c) 2009 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +not_single_shared_lib=1 diff --git a/opal/mca/shmem/windows/Makefile.am b/opal/mca/shmem/windows/Makefile.am new file mode 100644 index 0000000000..08baa57d5d --- /dev/null +++ b/opal/mca/shmem/windows/Makefile.am @@ -0,0 +1,49 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010-2011 Los Alamos National Security, LLC. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + shmem_windows.h \ + shmem_windows_component.c \ + shmem_windows_module.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_opal_shmem_windows_DSO +component_noinst = +component_install = mca_shmem_windows.la +else +component_noinst = libmca_shmem_windows.la +component_install = +endif + +# help file +dist_pkgdata_DATA = help-opal-shmem-windows.txt + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_shmem_windows_la_SOURCES = $(sources) +mca_shmem_windows_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_shmem_windows_la_SOURCES =$(sources) +libmca_shmem_windows_la_LDFLAGS = -module -avoid-version diff --git a/opal/mca/shmem/windows/configure.m4 b/opal/mca/shmem/windows/configure.m4 new file mode 100644 index 0000000000..ed06a75e13 --- /dev/null +++ b/opal/mca/shmem/windows/configure.m4 @@ -0,0 +1,50 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010-2011 Los Alamos National Security, LLC. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_opal_shmem_windows_CONFIG(action-if-can-compile, +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_opal_shmem_windows_CONFIG],[ + AC_CONFIG_FILES([opal/mca/shmem/windows/Makefile]) + + # do we have the windows shm stuff? + AC_MSG_CHECKING([if want Windows shared memory support]) + AC_ARG_ENABLE(windows-shmem, + AC_HELP_STRING([--disable-windows-shmem], + [disable windows shared memory support (default: enabled)])) + AS_IF([test "$enable_windows_shmem" = "no"], + [AC_MSG_RESULT([no]) + shmem_windows_sm_build_windows=0], + [AC_MSG_RESULT([yes]) + AC_CHECK_FUNC(CreateFileMapping, + [shmem_windows_sm_build_windows=1], + [shmem_windows_sm_build_windows=0])]) + AS_IF([test "$enable_windows_shmem" = "yes" -a "$shmem_windows_sm_build_windows" = "0"], + [AC_MSG_WARN([Windows shared memory support requested but not found]) + AC_MSG_ERROR([Cannot continue])]) + + AS_IF([test "$shmem_windows_sm_build_windows" = "1"], [$1], [$2]) + + AC_DEFINE_UNQUOTED([OPAL_SHMEM_WINDOWS], + [$shmem_windows_sm_build_windows], + [Whether we have shared memory support for POSIX or not]) +])dnl diff --git a/opal/mca/shmem/windows/help-opal-shmem-windows.txt b/opal/mca/shmem/windows/help-opal-shmem-windows.txt new file mode 100644 index 0000000000..372dee6f1f --- /dev/null +++ b/opal/mca/shmem/windows/help-opal-shmem-windows.txt @@ -0,0 +1,41 @@ +# -*- text -*- +# +# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010 Los Alamos National Security, LLC. +# All rights reserved. +# +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# This is the US/English help file for Open MPI's common shmem support. +# +[sys call fail] +A system call failed during shared memory initialization that should +not have. It is likely that your MPI job will now either abort or +experience performance degradation. + + Local host: %s + System call: %s %s + Error: %s (errno %d) +# +[mmap on nfs] +WARNING: Open MPI will create a shared memory backing file in a +directory that appears to be mounted on a network filesystem. +Creating the shared memory backup file on a network file system, such +as NFS or Lustre is not recommended -- it may cause excessive network +traffic to your file servers and/or cause shared memory traffic in +Open MPI to be much slower than expected. + +You may want to check what the typical temporary directory is on your +node. Possible sources of the location of this temporary directory +include the $TEMPDIR, $TEMP, and $TMP environment variables. + +Note, too, that system administrators can set a list of filesystems +where Open MPI is disallowed from creating temporary files by settings +the MCA parameter "orte_no_session_dir". + + Local host: %s + Fileame: %s diff --git a/opal/mca/shmem/windows/shmem_windows.h b/opal/mca/shmem/windows/shmem_windows.h new file mode 100644 index 0000000000..e78a7059a2 --- /dev/null +++ b/opal/mca/shmem/windows/shmem_windows.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_SHMEM_WINDOWS_EXPORT_H +#define MCA_SHMEM_WINDOWS_EXPORT_H + +#include "opal_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/shmem/shmem.h" + +BEGIN_C_DECLS + +/** + * globally exported variable to hold the windows component. + */ +typedef struct opal_shmem_windows_component_t { + /* base component struct */ + opal_shmem_base_component_t super; + /* priority for windows component */ + int priority; +} opal_shmem_windows_component_t; + +OPAL_MODULE_DECLSPEC extern opal_shmem_windows_component_t +mca_shmem_windows_component; + +typedef struct opal_shmem_windows_module_t { + opal_shmem_base_module_t super; +} opal_shmem_windows_module_t; +extern opal_shmem_windows_module_t opal_shmem_windows_module; + +END_C_DECLS + +#endif /* MCA_SHMEM_WINDOWS_EXPORT_H */ diff --git a/opal/mca/shmem/windows/shmem_windows_component.c b/opal/mca/shmem/windows/shmem_windows_component.c new file mode 100644 index 0000000000..68385ded88 --- /dev/null +++ b/opal/mca/shmem/windows/shmem_windows_component.c @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "opal_config.h" + +#include "opal/constants.h" +#include "opal/mca/shmem/shmem.h" +#include "opal/mca/shmem/windows/shmem_windows.h" + +/** + * public string showing the shmem ompi_windows component version number + */ +const char *opal_shmem_windows_component_version_string = + "OPAL windows shmem MCA component version " OPAL_VERSION; + +/** + * local functions + */ +static int windows_open(void); +static int windows_query(mca_base_module_t **module, int *priority); +static int windows_runtime_query(mca_base_module_t **module, + int *priority, + const char *hint); + +/** + * instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ +opal_shmem_windows_component_t mca_shmem_windows_component = { + /* ////////////////////////////////////////////////////////////////////// */ + /* super */ + /* ////////////////////////////////////////////////////////////////////// */ + { + /** + * common MCA component data + */ + { + OPAL_SHMEM_BASE_VERSION_2_0_0, + + /* component name and version */ + "windows", + OPAL_MAJOR_VERSION, + OPAL_MINOR_VERSION, + OPAL_RELEASE_VERSION, + + /* component open */ + windows_open, + /* component close */ + NULL, + /* component query */ + windows_query + }, + /* MCA v2.0.0 component meta data */ + { + /* the component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + windows_runtime_query, + }, + /* ////////////////////////////////////////////////////////////////////// */ + /* windows component-specific information */ + /* see: shmem_windows.h for more information */ + /* ////////////////////////////////////////////////////////////////////// */ + /* (default) priority - set high to make windows the default */ + 45 +}; + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +windows_runtime_query(mca_base_module_t **module, + int *priority, + const char *hint) +{ + /* no run-time query needed for windows, so this is easy */ + *priority = mca_shmem_windows_component.priority; + *module = (mca_base_module_t *)&opal_shmem_windows_module.super; + return OPAL_SUCCESS; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +windows_open(void) +{ + mca_base_param_reg_int( + &mca_shmem_windows_component.super.base_version, + "priority", "Priority of the windows shmem component", false, false, + mca_shmem_windows_component.priority, &mca_shmem_windows_component.priority + ); + + return OPAL_SUCCESS; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +windows_query(mca_base_module_t **module, int *priority) +{ + *priority = mca_shmem_windows_component.priority; + *module = (mca_base_module_t *)&opal_shmem_windows_module.super; + return OPAL_SUCCESS; +} + diff --git a/opal/mca/shmem/windows/shmem_windows_module.c b/opal/mca/shmem/windows/shmem_windows_module.c new file mode 100644 index 0000000000..48ed0a1d1e --- /dev/null +++ b/opal/mca/shmem/windows/shmem_windows_module.c @@ -0,0 +1,537 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. + * All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include +#ifdef HAVE_FCNTL_H +#include +#endif /* HAVE_FCNTL_H */ +#ifdef HAVE_SYS_MMAN_H +#include +#endif /* HAVE_SYS_MMAN_H */ +#ifdef HAVE_UNISTD_H +#include +#endif /* HAVE_UNISTD_H */ +#ifdef HAVE_SYS_TYPES_H +#include +#endif /* HAVE_SYS_TYPES_H */ +#ifdef HAVE_STRING_H +#include +#endif /* HAVE_STRING_H */ + +#include "opal/constants.h" +#include "opal_stdint.h" +#include "opal/util/output.h" +#include "opal/util/path.h" +#include "opal/util/show_help.h" +#include "opal/mca/base/mca_base_param.h" +#include "opal/mca/shmem/shmem.h" +#include "opal/mca/shmem/base/base.h" + +#include "opal/mca/shmem/windows/shmem_windows.h" + +/* for tons of debug output: -mca shmem_base_verbose 70 */ + +/* ////////////////////////////////////////////////////////////////////////// */ +/*local functions */ +/* local functions */ +static int +module_init(void); + +static int +segment_create(opal_shmem_ds_t *ds_buf, + const char *file_name, + size_t size); + +static int +ds_copy(const opal_shmem_ds_t *from, + opal_shmem_ds_t *to); + +static void * +segment_attach(opal_shmem_ds_t *ds_buf); + +static int +segment_detach(opal_shmem_ds_t *ds_buf); + +static int +segment_unlink(opal_shmem_ds_t *ds_buf); + +static int +module_finalize(void); + +/* + * windows shmem module + */ +opal_shmem_windows_module_t opal_shmem_windows_module = { + /* super */ + { + module_init, + segment_create, + ds_copy, + segment_attach, + segment_detach, + segment_unlink, + module_finalize + } +}; + +/* ////////////////////////////////////////////////////////////////////////// */ +/* private utility functions */ +/* ////////////////////////////////////////////////////////////////////////// */ + +/* ////////////////////////////////////////////////////////////////////////// */ +/** + * completely resets the contents of *ds_buf + */ +static inline void +shmem_ds_reset(opal_shmem_ds_t *ds_buf) +{ + OPAL_OUTPUT_VERBOSE( + (70, opal_shmem_base_output, + "%s: %s: shmem_ds_resetting " + "(opid: %lu id: %d, size: %"PRIsize_t", name: %s)\n", + mca_shmem_windows_component.super.base_version.mca_type_name, + mca_shmem_windows_component.super.base_version.mca_component_name, + (unsigned long)ds_buf->opid, ds_buf->seg_id, ds_buf->seg_size, + ds_buf->seg_name) + ); + + ds_buf->opid = 0; + ds_buf->seg_cpid = 0; + OPAL_SHMEM_DS_RESET_FLAGS(ds_buf); + ds_buf->seg_id = OPAL_SHMEM_DS_ID_INVALID; + ds_buf->seg_size = 0; + memset(ds_buf->seg_name, '\0', OPAL_PATH_MAX); +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +module_init(void) +{ + /* nothing to do */ + return OPAL_SUCCESS; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +module_finalize(void) +{ + /* nothing to do */ + return OPAL_SUCCESS; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +ds_copy(const opal_shmem_ds_t *from, + opal_shmem_ds_t *to) +{ + pid_t my_pid = getpid(); + + /* inter-process copy - exclude process-specific data */ + if (from->opid != my_pid) { + /* mask out internal flags */ + to->flags = (from->flags & OPAL_SHMEM_DS_FLAGS_INTERNAL_MASK); + to->seg_base_addr = NULL; + } + /* i am the owner process, so i can safely copy all the information */ + else { + to->flags = from->flags; + to->seg_base_addr = from->seg_base_addr; + } + + to->opid = my_pid; + to->seg_id = from->seg_id; + to->seg_size = from->seg_size; + to->seg_cpid = from->seg_cpid; + memcpy(to->seg_name, from->seg_name, OPAL_PATH_MAX); + + OPAL_OUTPUT_VERBOSE( + (70, opal_shmem_base_output, + "%s: %s: ds_copy complete " + "from: (opid: %lu, id: %d, size: %"PRIsize_t", " + "name: %s flags: 0x%02x) " + "to: (opid: %lu, id: %d, size: %"PRIsize_t", " + "name: %s flags: 0x%02x)\n", + mca_shmem_windows_component.super.base_version.mca_type_name, + mca_shmem_windows_component.super.base_version.mca_component_name, + (unsigned long)from->opid, from->seg_id, from->seg_size, + from->seg_name, from->flags, (unsigned long)to->opid, to->seg_id, + to->seg_size, to->seg_name, to->flags) + ); + + return OPAL_SUCCESS; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +/* mmap equivalent implementation on Windows */ + +#ifdef __USE_FILE_OFFSET64 +# define DWORD_HI(x) (x >> 32) +# define DWORD_LO(x) ((x) & 0xffffffff) +#else +# define DWORD_HI(x) (0) +# define DWORD_LO(x) (x) +#endif + +/* define mmap flags */ +#define PROT_READ 0x1 +#define PROT_WRITE 0x2 +#define MAP_SHARED 0x01 +#define MAP_PRIVATE 0x02 +#define MAP_ANONYMOUS 0x20 +#define MAP_ANON MAP_ANONYMOUS +#define MAP_FAILED ((void *) -1) +/* This flag is only available in WinXP+ */ +#ifdef FILE_MAP_EXECUTE +#define PROT_EXEC 0x4 +#else +#define PROT_EXEC 0x0 +#define FILE_MAP_EXECUTE 0 +#endif + +static void *mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) +{ + DWORD flProtect; + HANDLE mmap_fd, h; + DWORD dwDesiredAccess; + off_t end; + void *ret; + + if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) + return MAP_FAILED; + if (fd == -1) { + if (!(flags & MAP_ANON) || offset) + return MAP_FAILED; + } else if (flags & MAP_ANON) + return MAP_FAILED; + + if (prot & PROT_WRITE) { + if (prot & PROT_EXEC) + flProtect = PAGE_EXECUTE_READWRITE; + else + flProtect = PAGE_READWRITE; + } else if (prot & PROT_EXEC) { + if (prot & PROT_READ) + flProtect = PAGE_EXECUTE_READ; + else if (prot & PROT_EXEC) + flProtect = PAGE_EXECUTE; + } else + flProtect = PAGE_READONLY; + + end = length + offset; + if (fd == -1) + mmap_fd = INVALID_HANDLE_VALUE; + else + mmap_fd = (HANDLE)_get_osfhandle(fd); + h = CreateFileMapping(mmap_fd, NULL, flProtect, DWORD_HI(end), DWORD_LO(end), NULL); + if (h == NULL) + return MAP_FAILED; + + if (prot & PROT_WRITE) + dwDesiredAccess = FILE_MAP_WRITE; + else + dwDesiredAccess = FILE_MAP_READ; + if (prot & PROT_EXEC) + dwDesiredAccess |= FILE_MAP_EXECUTE; + if (flags & MAP_PRIVATE) + dwDesiredAccess |= FILE_MAP_COPY; + ret = MapViewOfFile(h, dwDesiredAccess, DWORD_HI(offset), DWORD_LO(offset), length); + if (ret == NULL) { + CloseHandle(h); + ret = MAP_FAILED; + } + return ret; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +/* SKG will not compile, but it's a start */ +static int +segment_create(opal_shmem_ds_t *ds_buf, + const char *file_name, + size_t size) +{ + int rc = OPAL_SUCCESS; + bool file_previously_opened = false; + pid_t my_pid = getpid(); + char *temp1 = NULL, *temp2 = NULL; + /* the real size of the shared memory segment. this includes enough space + * to store our segment header. + */ + size_t real_size = size + sizeof(opal_shmem_seg_hdr_t); + opal_shmem_seg_hdr_t *seg_hdrp = MAP_FAILED; + HANDLE hMapObject = INVALID_HANDLE_VALUE; + LPVOID lpvMem = NULL; + + /* init the contents of opal_shmem_ds_t */ + shmem_ds_reset(ds_buf); + + /* On Windows the shared file will be created by the OS directly on the + * system ressources. Therefore, no file get involved in the operation. + * However, a unique key should be used as name for the shared memory object + * in order to allow all processes to access the same unique shared memory + * region. The key will be obtained from the original file_name by replacing + * all path separator occurences by '/' (as '\' is not allowed on the object + * name). + */ + temp1 = strdup(file_name); + temp2 = temp1; + while (NULL != (temp2 = strchr(temp2, OPAL_PATH_SEP[0])) ) { + *temp2 = '/'; + } + /* update path change in ds_buf */ + memcpy(ds_buf->seg_name, temp2, OPAL_PATH_MAX); + /* relase the temporary file name */ + free(temp1); /* relase the temporary file name */ + + /* use paging file */ + hMapObject = CreateFileMapping(INVALID_HANDLE_VALUE, + /* no security attributes */ + NULL, + /* read/write access */ + PAGE_READWRITE, + /* size: high 32-bits */ + 0, + /* size: low 32-bits */ + (DWORD)real_size, + /* name of map object */ + ds_buf->seg_name); + if (NULL == hMapObject) { + rc = GetLastError(); + goto out; + } + if (ERROR_ALREADY_EXISTS == GetLastError()) { + file_previously_opened = true; + } + + /* Get a pointer to the file-mapped shared memory. */ + lpvMem = MapViewOfFile(hMapObject, /* object to map view of */ + FILE_MAP_WRITE, /* read/write access */ + 0, /* high offset: map from */ + 0, /* low offset: beginning */ + 0); /* default: map entire file */ + if (NULL == lpvMem) { + rc = GetLastError(); + goto out; + } + + seg_hdrp = (opal_shmem_seg_hdr_t *)lpvMem; + + /* all is well */ + { + /* -- initialize the shared memory segment -- */ + opal_atomic_rmb(); + + /* init segment lock */ + opal_atomic_init(&seg_hdrp->lock, OPAL_ATOMIC_UNLOCKED); + /* i was the creator of this segment, so note that fact */ + seg_hdrp->cpid = my_pid; + + opal_atomic_wmb(); + + /* -- initialize the contents of opal_shmem_ds_t -- */ + ds_buf->opid = my_pid; + ds_buf->seg_cpid = my_pid; + ds_buf->seg_size = real_size; + ds_buf->seg_base_addr = (unsigned char *)seg_hdrp; + /* ds_buf->seg_name already set above */ + + /* set "valid" bit because setment creation was successful */ + OPAL_SHMEM_DS_SET_VALID(ds_buf); + + OPAL_OUTPUT_VERBOSE( + (70, opal_shmem_base_output, + "%s: %s: create successful " + "(opid: %lu id: %d, size: %"PRIsize_t", name: %s)\n", + mca_shmem_windows_component.super.base_version.mca_type_name, + mca_shmem_windows_component.super.base_version.mca_component_name, + (unsigned long)ds_buf->opid, ds_buf->seg_id, ds_buf->seg_size, + ds_buf->seg_name) + ); + } + +out: + /* in this component, the id is the file descriptor returned by open. this + * check is here to see if it is safe to call close on the file descriptor. + * that is, we are making sure that our call to open was successful and + * we are not not in an error path. + */ + if (-1 != ds_buf->seg_id) { + if (0 != close(ds_buf->seg_id)) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-windows.txt", "sys call fail", 1, hn, + "close(2)", "", strerror(err), err); + rc = OPAL_ERROR; + } + } + + /* an error occured, so invalidate the shmem object and munmap if needed */ + if (OPAL_SUCCESS != rc) { + if (MAP_FAILED != seg_hdrp) { + UnmapViewOfFile(seg_hdrp); + } + shmem_ds_reset(ds_buf); + } + return rc; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +/** + * segment_attach can only be called after a successful call to segment_create + */ +static void * +segment_attach(opal_shmem_ds_t *ds_buf) +{ + pid_t my_pid = getpid(); + + if (my_pid != ds_buf->seg_cpid) { + if (-1 == (ds_buf->seg_id = open(ds_buf->seg_name, O_CREAT | O_RDWR, + 0600))) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-windows.txt", "sys call fail", 1, hn, + "open(2)", "", strerror(err), err); + return NULL; + } + else if (MAP_FAILED == (ds_buf->seg_base_addr = + mmap(NULL, ds_buf->seg_size, + PROT_READ | PROT_WRITE, MAP_SHARED, + ds_buf->seg_id, 0))) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-windows.txt", "sys call fail", 1, hn, + "mmap(2)", "", strerror(err), err); + /* windows module failed, so close the file and return NULL - no error check + * here because we are already in an error path... + */ + close(ds_buf->seg_id); + return NULL; + } + /* all is well */ + else { + /* if close fails here, that's okay. just let the user know and + * continue. if we got this far, open and mmap were successful... + */ + if (0 != close(ds_buf->seg_id)) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-windows.txt", "sys call fail", 1, + hn, "close(2)", "", strerror(err), err); + } + } + } + /* else i was the segment creator. nothing to do here because all the hard + * work was done in segment_create :-). + */ + + OPAL_OUTPUT_VERBOSE( + (70, opal_shmem_base_output, + "%s: %s: attach successful " + "(opid: %lu id: %d, size: %"PRIsize_t", name: %s)\n", + mca_shmem_windows_component.super.base_version.mca_type_name, + mca_shmem_windows_component.super.base_version.mca_component_name, + (unsigned long)ds_buf->opid, ds_buf->seg_id, ds_buf->seg_size, + ds_buf->seg_name) + ); + + /* update returned base pointer with an offset that hides our stuff */ + return (ds_buf->seg_base_addr + sizeof(opal_shmem_seg_hdr_t)); +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +segment_detach(opal_shmem_ds_t *ds_buf) +{ + int rc = OPAL_SUCCESS; + + OPAL_OUTPUT_VERBOSE( + (70, opal_shmem_base_output, + "%s: %s: detaching " + "(opid: %lu id: %d, size: %"PRIsize_t", name: %s)\n", + mca_shmem_windows_component.super.base_version.mca_type_name, + mca_shmem_windows_component.super.base_version.mca_component_name, + (unsigned long)ds_buf->opid, ds_buf->seg_id, ds_buf->seg_size, + ds_buf->seg_name) + ); + + if (0 != UnmapViewOfFile(ds_buf->seg_base_addr)) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-windows.txt", "sys call fail", 1, hn, + "munmap(2)", "", strerror(err), err); + rc = OPAL_ERROR; + } + /* reset the contents of the opal_shmem_ds_t associated with this + * shared memory segment. + */ + shmem_ds_reset(ds_buf); + return rc; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +static int +segment_unlink(opal_shmem_ds_t *ds_buf) +{ + OPAL_OUTPUT_VERBOSE( + (70, opal_shmem_base_output, + "%s: %s: unlinking " + "(opid: %lu id: %d, size: %"PRIsize_t", name: %s)\n", + mca_shmem_windows_component.super.base_version.mca_type_name, + mca_shmem_windows_component.super.base_version.mca_component_name, + (unsigned long)ds_buf->opid, ds_buf->seg_id, ds_buf->seg_size, + ds_buf->seg_name) + ); + + if (-1 == unlink(ds_buf->seg_name)) { + int err = errno; + char hn[MAXHOSTNAMELEN]; + gethostname(hn, MAXHOSTNAMELEN - 1); + hn[MAXHOSTNAMELEN - 1] = '\0'; + opal_show_help("help-opal-shmem-windows.txt", "sys call fail", 1, hn, + "unlink(2)", ds_buf->seg_name, strerror(err), err); + return OPAL_ERROR; + } + + /* don't completely reset the opal_shmem_ds_t. in particular, only reset + * the id and flip the invalid bit. size and name values will remain valid + * across unlinks. other information stored in flags will remain untouched. + */ + ds_buf->seg_id = OPAL_SHMEM_DS_ID_INVALID; + /* note: this is only chaning the valid bit to 0. this is not the same + * as calling invalidate(ds_buf). + */ + OPAL_SHMEM_DS_INVALIDATE(ds_buf); + return OPAL_SUCCESS; +} + diff --git a/opal/runtime/opal_finalize.c b/opal/runtime/opal_finalize.c index 1755d5a7c2..741451c104 100644 --- a/opal/runtime/opal_finalize.c +++ b/opal/runtime/opal_finalize.c @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010 Los Alamos National Security, LLC. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -44,6 +46,7 @@ #include "opal/mca/event/base/base.h" #include "opal/runtime/opal_progress.h" #include "opal/mca/carto/base/base.h" +#include "opal/mca/shmem/base/base.h" #if OPAL_ENABLE_FT_CR == 1 #include "opal/mca/compress/base/base.h" #endif @@ -121,6 +124,9 @@ opal_finalize(void) #if OPAL_ENABLE_FT_CR == 1 opal_compress_base_close(); #endif + + /* close the shmem framework */ + opal_shmem_base_close(); opal_progress_finalize(); diff --git a/opal/runtime/opal_init.c b/opal/runtime/opal_init.c index e840b367c3..eb570bf38b 100644 --- a/opal/runtime/opal_init.c +++ b/opal/runtime/opal_init.c @@ -12,6 +12,8 @@ * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2010 Los Alamos National Security, LLC. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -40,6 +42,7 @@ #include "opal/mca/memchecker/base/base.h" #include "opal/dss/dss.h" #include "opal/mca/carto/base/base.h" +#include "opal/mca/shmem/base/base.h" #if OPAL_ENABLE_FT_CR == 1 #include "opal/mca/compress/base/base.h" #endif @@ -429,6 +432,17 @@ opal_init(int* pargc, char*** pargv) /* we want to tick the event library whenever possible */ opal_progress_event_users_increment(); + /* setup the shmem framework */ + if (OPAL_SUCCESS != (ret = opal_shmem_base_open())) { + error = "opal_shmem_base_open"; + goto return_error; + } + + if (OPAL_SUCCESS != (ret = opal_shmem_base_select())) { + error = "opal_shmem_base_select"; + goto return_error; + } + #if OPAL_ENABLE_FT_CR == 1 /* * Initialize the compression framework diff --git a/opal/runtime/opal_params.c b/opal/runtime/opal_params.c index b110393869..51ce43cabf 100644 --- a/opal/runtime/opal_params.c +++ b/opal/runtime/opal_params.c @@ -13,6 +13,8 @@ * reserved. * Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2010 Los Alamos National Security, LLC. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,6 +36,7 @@ #include "opal/threads/mutex.h" #include "opal/threads/threads.h" #include "opal/mca/paffinity/base/base.h" +#include "opal/mca/shmem/base/base.h" int opal_register_params(void) { @@ -106,6 +109,12 @@ int opal_register_params(void) return ret; } + /* shmem base also has a few parameters */ + ret = opal_shmem_base_register_params(); + if (OPAL_SUCCESS != ret) { + return ret; + } + /* Paffinity base also has some parameters */ return opal_paffinity_base_register_params(); } diff --git a/orte/mca/grpcomm/bad/.windows b/orte/mca/grpcomm/bad/.windows new file mode 100644 index 0000000000..47925ab72e --- /dev/null +++ b/orte/mca/grpcomm/bad/.windows @@ -0,0 +1,12 @@ +'# +# Copyright (c) 2008-2010 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Specific to this module +mca_link_libraries=libopen-rte Ws2_32.lib diff --git a/orte/mca/grpcomm/cnos/.windows b/orte/mca/grpcomm/cnos/.windows new file mode 100644 index 0000000000..47925ab72e --- /dev/null +++ b/orte/mca/grpcomm/cnos/.windows @@ -0,0 +1,12 @@ +'# +# Copyright (c) 2008-2010 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Specific to this module +mca_link_libraries=libopen-rte Ws2_32.lib diff --git a/orte/mca/grpcomm/hier/.windows b/orte/mca/grpcomm/hier/.windows new file mode 100644 index 0000000000..47925ab72e --- /dev/null +++ b/orte/mca/grpcomm/hier/.windows @@ -0,0 +1,12 @@ +'# +# Copyright (c) 2008-2010 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Specific to this module +mca_link_libraries=libopen-rte Ws2_32.lib diff --git a/orte/mca/grpcomm/mcast/.windows b/orte/mca/grpcomm/mcast/.windows new file mode 100644 index 0000000000..47925ab72e --- /dev/null +++ b/orte/mca/grpcomm/mcast/.windows @@ -0,0 +1,12 @@ +'# +# Copyright (c) 2008-2010 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Specific to this module +mca_link_libraries=libopen-rte Ws2_32.lib diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index 533d4470fe..24ba112032 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -11,6 +11,8 @@ * All rights reserved. * Copyright (c) 2007-2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. + * Copyright (c) 2011 Los Alamos National Security, LLC. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -46,6 +48,7 @@ #include "opal/util/sys_limits.h" #include "opal/dss/dss.h" #include "opal/mca/paffinity/base/base.h" +#include "opal/mca/shmem/base/base.h" #include "opal/mca/pstat/pstat.h" #include "orte/mca/errmgr/errmgr.h" @@ -1191,6 +1194,22 @@ static int odls_base_default_setup_fork(orte_app_context_t *context, opal_setenv(param, orte_local_cpu_model, true, environ_copy); free(param); } + + /* get shmem's best component name so we can provide a hint to the shmem + * framework. the idea here is to have someone figure out what component to + * select (via the shmem framework) and then have the rest of the + * components in shmem obey that decision. for more details take a look at + * the shmem framework in opal. + */ + if (NULL != (param2 = opal_shmem_base_best_runnable_component_name())) { + if (NULL != (param = + mca_base_param_environ_variable("shmem_RUNTIME_QUERY_hint", + NULL, NULL))) { + opal_setenv(param, param2, true, environ_copy); + free(param); + } + free(param2); + } /* push data into environment - don't push any single proc * info, though. We are setting the environment up on a diff --git a/orte/tools/orte-info/components.c b/orte/tools/orte-info/components.c index 11b6d3a1d3..08302c2530 100644 --- a/orte/tools/orte-info/components.c +++ b/orte/tools/orte-info/components.c @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010 Los Alamos National Security, LLC. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,6 +37,8 @@ #include "opal/mca/paffinity/base/base.h" #include "opal/mca/carto/carto.h" #include "opal/mca/carto/base/base.h" +#include "opal/mca/shmem/shmem.h" +#include "opal/mca/shmem/base/base.h" #include "opal/mca/maffinity/maffinity.h" #include "opal/mca/maffinity/base/base.h" #include "opal/mca/memory/memory.h" @@ -273,6 +277,14 @@ void orte_info_open_components(void) map->type = strdup("carto"); map->components = &opal_carto_base_components_opened; opal_pointer_array_add(&component_map, map); + + if (OPAL_SUCCESS != opal_shmem_base_open()) { + goto error; + } + map = OBJ_NEW(orte_info_component_map_t); + map->type = strdup("shmem"); + map->components = &opal_shmem_base_components_opened; + opal_pointer_array_add(&component_map, map); if (OPAL_SUCCESS != opal_maffinity_base_open()) { goto error; diff --git a/orte/tools/orte-info/orte-info.c b/orte/tools/orte-info/orte-info.c index 03a9eced68..9c45a6e906 100644 --- a/orte/tools/orte-info/orte-info.c +++ b/orte/tools/orte-info/orte-info.c @@ -10,6 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copryight (c) 2007-2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2010 Los Alamos National Security, LLC. + * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -189,6 +191,7 @@ int main(int argc, char *argv[]) opal_pointer_array_add(&mca_types, "memory"); opal_pointer_array_add(&mca_types, "paffinity"); opal_pointer_array_add(&mca_types, "carto"); + opal_pointer_array_add(&mca_types, "shmem"); opal_pointer_array_add(&mca_types, "maffinity"); opal_pointer_array_add(&mca_types, "timer"); opal_pointer_array_add(&mca_types, "installdirs");