From a0d5c80ce0cef6b67929dd0d87b85b639ab8bc9a Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Mon, 30 Nov 2009 23:11:25 +0000 Subject: [PATCH] Add a new framework for discovering local resource information such as cpu type/model, #cpus, available physical memory, etc. Two initial components (darwin and linux) are provided. This is needed to support bootstrap operations where daemons are started at node boot, and applications where initial knowledge of cpu identification is needed to guide framework component selection. Add orte configuration option to control the use of the framework in the system. Although the code will build, it will not be active unless configured with --enable-bootstrap. If bootstrap is enabled and the new opal_sysinfo framework can successfully determine the cpu model, pass that info to the application as an MCA param to support some work at Sun. Also, have daemons report back the resources they find to guide process mapping in bootstrap operations (i.e., where the daemon starts at node boot as opposed to being launched at application start). Adjust some platform files to enable these capabilities. This commit was SVN r22244. --- contrib/platform/cisco/hlfr/debug | 2 + contrib/platform/cisco/hlfr/ebuild | 3 +- contrib/platform/cisco/hlfr/optimized | 2 + contrib/platform/cisco/ludd-1/debug | 1 + contrib/platform/cisco/ludd-1/optimized | 1 + contrib/platform/cisco/macosx-dynamic | 1 + .../platform/cisco/macosx-dynamic-optimized | 2 + opal/mca/sysinfo/Makefile.am | 34 +++ opal/mca/sysinfo/base/Makefile.am | 17 ++ opal/mca/sysinfo/base/base.h | 70 ++++++ opal/mca/sysinfo/base/sysinfo_base_close.c | 43 ++++ opal/mca/sysinfo/base/sysinfo_base_open.c | 110 +++++++++ opal/mca/sysinfo/base/sysinfo_base_select.c | 67 ++++++ opal/mca/sysinfo/configure.m4 | 13 ++ opal/mca/sysinfo/darwin/Makefile.am | 35 +++ opal/mca/sysinfo/darwin/configure.m4 | 22 ++ opal/mca/sysinfo/darwin/configure.params | 17 ++ opal/mca/sysinfo/darwin/sysinfo_darwin.h | 30 +++ .../sysinfo/darwin/sysinfo_darwin_component.c | 78 +++++++ .../sysinfo/darwin/sysinfo_darwin_module.c | 120 ++++++++++ opal/mca/sysinfo/linux/Makefile.am | 35 +++ opal/mca/sysinfo/linux/configure.m4 | 31 +++ opal/mca/sysinfo/linux/configure.params | 18 ++ opal/mca/sysinfo/linux/sysinfo_linux.h | 38 ++++ .../sysinfo/linux/sysinfo_linux_component.c | 73 ++++++ opal/mca/sysinfo/linux/sysinfo_linux_module.c | 214 ++++++++++++++++++ opal/mca/sysinfo/sysinfo.h | 101 +++++++++ opal/mca/sysinfo/sysinfo_types.h | 50 ++++ orte/config/orte_configure_options.m4 | 18 ++ orte/mca/ess/base/ess_base_std_orted.c | 23 ++ orte/mca/ess/cm/ess_cm_module.c | 45 ++++ orte/mca/ess/hnp/ess_hnp_module.c | 50 ++++ orte/mca/odls/base/odls_base_default_fns.c | 10 + orte/orted/orted_main.c | 49 ++++ .../data_type_support/orte_dt_print_fns.c | 29 +++ orte/runtime/orte_globals.c | 9 + orte/runtime/orte_globals.h | 3 + orte/test/system/Makefile | 2 +- orte/test/system/sysinfo.c | 67 ++++++ 39 files changed, 1531 insertions(+), 2 deletions(-) create mode 100644 opal/mca/sysinfo/Makefile.am create mode 100644 opal/mca/sysinfo/base/Makefile.am create mode 100644 opal/mca/sysinfo/base/base.h create mode 100644 opal/mca/sysinfo/base/sysinfo_base_close.c create mode 100644 opal/mca/sysinfo/base/sysinfo_base_open.c create mode 100644 opal/mca/sysinfo/base/sysinfo_base_select.c create mode 100644 opal/mca/sysinfo/configure.m4 create mode 100644 opal/mca/sysinfo/darwin/Makefile.am create mode 100644 opal/mca/sysinfo/darwin/configure.m4 create mode 100644 opal/mca/sysinfo/darwin/configure.params create mode 100644 opal/mca/sysinfo/darwin/sysinfo_darwin.h create mode 100644 opal/mca/sysinfo/darwin/sysinfo_darwin_component.c create mode 100644 opal/mca/sysinfo/darwin/sysinfo_darwin_module.c create mode 100644 opal/mca/sysinfo/linux/Makefile.am create mode 100644 opal/mca/sysinfo/linux/configure.m4 create mode 100644 opal/mca/sysinfo/linux/configure.params create mode 100644 opal/mca/sysinfo/linux/sysinfo_linux.h create mode 100644 opal/mca/sysinfo/linux/sysinfo_linux_component.c create mode 100644 opal/mca/sysinfo/linux/sysinfo_linux_module.c create mode 100644 opal/mca/sysinfo/sysinfo.h create mode 100644 opal/mca/sysinfo/sysinfo_types.h create mode 100644 orte/test/system/sysinfo.c diff --git a/contrib/platform/cisco/hlfr/debug b/contrib/platform/cisco/hlfr/debug index 8d69167574..593501efb7 100644 --- a/contrib/platform/cisco/hlfr/debug +++ b/contrib/platform/cisco/hlfr/debug @@ -1,4 +1,6 @@ enable_dlopen=no +enable_multicast=yes +enable_bootstrap=yes enable_mem_debug=no enable_mem_profile=no enable_memchecker=no diff --git a/contrib/platform/cisco/hlfr/ebuild b/contrib/platform/cisco/hlfr/ebuild index e7f6ef2a3a..3b937b2123 100644 --- a/contrib/platform/cisco/hlfr/ebuild +++ b/contrib/platform/cisco/hlfr/ebuild @@ -1,4 +1,6 @@ enable_dlopen=no +enable_multicast=yes +enable_bootstrap=yes enable_mem_debug=no enable_mem_profile=no with_memory_manager=no @@ -19,7 +21,6 @@ enable_cxx_exceptions=no enable_ft_thread=no enable_per_user_config_files=no enable_script_wrapper_compilers=yes -enable_multicast=yes enable_orterun_prefix_by_default=yes enable_io_romio=no #enable_mca_direct=ras-cm,rmaps-resilient,routed-cm diff --git a/contrib/platform/cisco/hlfr/optimized b/contrib/platform/cisco/hlfr/optimized index 302a5ffc5e..b18f0f68e5 100644 --- a/contrib/platform/cisco/hlfr/optimized +++ b/contrib/platform/cisco/hlfr/optimized @@ -1,4 +1,6 @@ enable_dlopen=no +enable_multicast=yes +enable_bootstrap=yes enable_mem_debug=no enable_mem_profile=no enable_memchecker=no diff --git a/contrib/platform/cisco/ludd-1/debug b/contrib/platform/cisco/ludd-1/debug index 1e2bd3f35a..b28de5df7b 100644 --- a/contrib/platform/cisco/ludd-1/debug +++ b/contrib/platform/cisco/ludd-1/debug @@ -1,4 +1,5 @@ enable_multicast=yes +enable_bootstrap=yes with_memory_manager=no enable_mem_debug=yes enable_mem_profile=no diff --git a/contrib/platform/cisco/ludd-1/optimized b/contrib/platform/cisco/ludd-1/optimized index e3bed31f1e..04ef58794e 100644 --- a/contrib/platform/cisco/ludd-1/optimized +++ b/contrib/platform/cisco/ludd-1/optimized @@ -1,4 +1,5 @@ enable_multicast=yes +enable_bootstrap=yes with_memory_manager=no enable_mem_debug=no enable_mem_profile=no diff --git a/contrib/platform/cisco/macosx-dynamic b/contrib/platform/cisco/macosx-dynamic index 7ff2226c8a..02dcece79c 100644 --- a/contrib/platform/cisco/macosx-dynamic +++ b/contrib/platform/cisco/macosx-dynamic @@ -1,4 +1,5 @@ enable_multicast=yes +enable_bootstrap=yes with_memory_manager=no enable_mem_debug=yes enable_mem_profile=no diff --git a/contrib/platform/cisco/macosx-dynamic-optimized b/contrib/platform/cisco/macosx-dynamic-optimized index 5c7b9ef3f0..0eb120274a 100644 --- a/contrib/platform/cisco/macosx-dynamic-optimized +++ b/contrib/platform/cisco/macosx-dynamic-optimized @@ -1,3 +1,5 @@ +enable_multicast=yes +enable_bootstrap=yes with_memory_manager=no enable_mem_debug=no enable_mem_profile=no diff --git a/opal/mca/sysinfo/Makefile.am b/opal/mca/sysinfo/Makefile.am new file mode 100644 index 0000000000..5b5738a282 --- /dev/null +++ b/opal/mca/sysinfo/Makefile.am @@ -0,0 +1,34 @@ +# +# Copyright (c) 2009 Cisco Systems, Inc +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# main library setup +noinst_LTLIBRARIES = libmca_sysinfo.la +libmca_sysinfo_la_SOURCES = + +# header setup +nobase_opal_HEADERS = + +# local files +headers = sysinfo.h \ + sysinfo_types.h +libmca_sysinfo_la_SOURCES += $(headers) + +# Conditionally install the header files +if WANT_INSTALL_HEADERS +nobase_opal_HEADERS += $(headers) +opaldir = $(includedir)/openmpi/opal/mca/sysinfo +else +opaldir = $(includedir) +endif + +include base/Makefile.am + +distclean-local: + rm -f base/static-components.h diff --git a/opal/mca/sysinfo/base/Makefile.am b/opal/mca/sysinfo/base/Makefile.am new file mode 100644 index 0000000000..83578cc271 --- /dev/null +++ b/opal/mca/sysinfo/base/Makefile.am @@ -0,0 +1,17 @@ +# +# Copyright (c) 2009 Cisco Systems, Inc. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +headers += \ + base/base.h + +libmca_sysinfo_la_SOURCES += \ + base/sysinfo_base_close.c \ + base/sysinfo_base_select.c \ + base/sysinfo_base_open.c diff --git a/opal/mca/sysinfo/base/base.h b/opal/mca/sysinfo/base/base.h new file mode 100644 index 0000000000..8cda9fec3f --- /dev/null +++ b/opal/mca/sysinfo/base/base.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#ifndef OPAL_SYSINFO_BASE_H +#define OPAL_SYSINFO_BASE_H + +#include "opal_config.h" + +#include "opal/mca/sysinfo/sysinfo.h" + +/* + * Global functions for MCA overall sysinfo open and close + */ + +BEGIN_C_DECLS + +/** + * Initialize the sysinfo MCA framework + * + * @retval OPAL_SUCCESS Upon success + * @retval OPAL_ERROR Upon failure + * + * This must be the first function invoked in the sysinfo MCA + * framework. It initializes the sysinfo MCA framework, finds + * and opens sysinfo components, etc. + * + * This function is invoked during opal_init(). + */ +OPAL_DECLSPEC int opal_sysinfo_base_open(void); + +/** + * Close the sysinfo MCA framework + * + * @retval OPAL_SUCCESS Upon success + * @retval OPAL_ERROR Upon failure + * + * This must be the last function invoked in the sysinfo MCA + * framework. + * + * This function is invoked during opal_finalize(). + */ +OPAL_DECLSPEC int opal_sysinfo_base_close(void); + +/** + * Select all available components. + * + * @return OPAL_SUCCESS Upon success. + * @return OPAL_ERROR Upon other failure. + * + * At the end of this process, we'll have a list of all available + * components. If the list is empty, that is okay too. All + * available components will have their init function called. + */ +OPAL_DECLSPEC int opal_sysinfo_base_select(void); + +OPAL_DECLSPEC extern int opal_sysinfo_base_output; +OPAL_DECLSPEC extern opal_list_t opal_sysinfo_base_components_opened; +OPAL_DECLSPEC extern opal_list_t opal_sysinfo_avail_modules; + +END_C_DECLS + +#endif /* OPAL_SYSINFO_BASE_H */ diff --git a/opal/mca/sysinfo/base/sysinfo_base_close.c b/opal/mca/sysinfo/base/sysinfo_base_close.c new file mode 100644 index 0000000000..7212ecda8c --- /dev/null +++ b/opal/mca/sysinfo/base/sysinfo_base_close.c @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include "opal/constants.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/sysinfo/sysinfo.h" +#include "opal/mca/sysinfo/base/base.h" + +int opal_sysinfo_base_close(void) +{ + opal_list_item_t *item; + opal_sysinfo_module_t *mod; + + /* call the finalize of all available modules */ + while (NULL != (item = opal_list_remove_first(&opal_sysinfo_avail_modules))) { + mod = (opal_sysinfo_module_t*)item; + if (NULL != mod->module->finalize) { + mod->module->finalize(); + } + } + OBJ_DESTRUCT(&opal_sysinfo_avail_modules); + + /* Close all components that are still open (this should only + happen during ompi_info). */ + + mca_base_components_close(opal_sysinfo_base_output, + &opal_sysinfo_base_components_opened, NULL); + OBJ_DESTRUCT(&opal_sysinfo_base_components_opened); + + /* All done */ + + return OPAL_SUCCESS; +} diff --git a/opal/mca/sysinfo/base/sysinfo_base_open.c b/opal/mca/sysinfo/base/sysinfo_base_open.c new file mode 100644 index 0000000000..82c4884a7c --- /dev/null +++ b/opal/mca/sysinfo/base/sysinfo_base_open.c @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "opal_config.h" + +#include "opal/constants.h" +#include "opal/util/output.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_param.h" +#include "opal/mca/sysinfo/sysinfo.h" +#include "opal/mca/sysinfo/base/base.h" + + +/* + * The following file was created by configure. It contains extern + * statements and the definition of an array of pointers to each + * component's public mca_base_component_t struct. + */ +#include "opal/mca/sysinfo/base/static-components.h" + +/* unsupported functions */ +static int opal_sysinfo_base_query(char **keys, opal_list_t *values); + +/* + * Globals + */ +int opal_sysinfo_base_output = -1; +opal_list_t opal_sysinfo_base_components_opened; +opal_list_t opal_sysinfo_avail_modules; + +opal_sysinfo_API_module_t opal_sysinfo = { + opal_sysinfo_base_query +}; + + +/* + * Function for finding and opening either all MCA components, or the one + * that was specifically requested via a MCA parameter. + */ +int opal_sysinfo_base_open(void) +{ + opal_sysinfo_base_output = opal_output_open(NULL); + + /* init the list of available modules */ + OBJ_CONSTRUCT(&opal_sysinfo_avail_modules, opal_list_t); + + /* Open up all available components */ + OBJ_CONSTRUCT( &opal_sysinfo_base_components_opened, opal_list_t ); + + if (OPAL_SUCCESS != + mca_base_components_open("sysinfo", opal_sysinfo_base_output, + mca_sysinfo_base_static_components, + &opal_sysinfo_base_components_opened, + true)) { + return OPAL_ERROR; + } + + /* All done */ + + return OPAL_SUCCESS; +} + +static int opal_sysinfo_base_query(char **keys, opal_list_t *values) +{ + opal_list_item_t *item; + opal_sysinfo_module_t *mod; + + /* query all the available modules */ + for (item = opal_list_get_first(&opal_sysinfo_avail_modules); + item != opal_list_get_end(&opal_sysinfo_avail_modules); + item = opal_list_get_next(item)) { + mod = (opal_sysinfo_module_t*)item; + if (NULL != mod->module->query) { + mod->module->query(keys, values); + } + } + return OPAL_SUCCESS; +} + +/**** SETUP SYSINFO MODULE OBJECTS ****/ +static void mod_constructor(opal_sysinfo_module_t *ptr) +{ + ptr->module = NULL; +} +OBJ_CLASS_INSTANCE(opal_sysinfo_module_t, + opal_list_item_t, + mod_constructor, NULL); + +static void val_constructor(opal_sysinfo_value_t *ptr) +{ + ptr->key = NULL; +} +static void val_destructor(opal_sysinfo_value_t *ptr) +{ + if (NULL != ptr->key) { + free(ptr->key); + } +} +OBJ_CLASS_INSTANCE(opal_sysinfo_value_t, + opal_list_item_t, + val_constructor, val_destructor); diff --git a/opal/mca/sysinfo/base/sysinfo_base_select.c b/opal/mca/sysinfo/base/sysinfo_base_select.c new file mode 100644 index 0000000000..d1077a375b --- /dev/null +++ b/opal/mca/sysinfo/base/sysinfo_base_select.c @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "opal_config.h" + +#include "opal/constants.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_param.h" + +#include "opal/mca/sysinfo/sysinfo.h" +#include "opal/mca/sysinfo/base/base.h" + +/* + * Globals + */ + +int opal_sysinfo_base_select(void) +{ + int pri; + opal_sysinfo_module_t *module; + opal_sysinfo_base_module_t *sysmod; + mca_base_module_t *mod; + mca_base_component_list_item_t *cli; + mca_base_component_t *component; + opal_list_item_t *item; + + /* + * Select all available components + */ + for (item = opal_list_get_first(&opal_sysinfo_base_components_opened); + item != opal_list_get_end(&opal_sysinfo_base_components_opened); + item = opal_list_get_next(item)) { + + cli = (mca_base_component_list_item_t *) item; + component = (mca_base_component_t *) cli->cli_component; + + if (NULL == component->mca_query_component) { + /* no way to get the module */ + continue; + } + if (OPAL_SUCCESS != component->mca_query_component(&mod, &pri)) { + continue; + } + /* init the module */ + sysmod = (opal_sysinfo_base_module_t*)mod; + if (NULL != sysmod->init) { + if (OPAL_SUCCESS != sysmod->init()) { + /* can't run */ + continue; + } + } + module = OBJ_NEW(opal_sysinfo_module_t); + module->module = sysmod; + opal_list_append(&opal_sysinfo_avail_modules, &module->super); + } + + return OPAL_SUCCESS; +} diff --git a/opal/mca/sysinfo/configure.m4 b/opal/mca/sysinfo/configure.m4 new file mode 100644 index 0000000000..366495698d --- /dev/null +++ b/opal/mca/sysinfo/configure.m4 @@ -0,0 +1,13 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2009 Cisco Systems, Inc +dnl All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + +dnl we only want those at same priority +m4_define(MCA_sysinfo_CONFIGURE_MODE, STOP_AT_FIRST_PRIORITY) diff --git a/opal/mca/sysinfo/darwin/Makefile.am b/opal/mca/sysinfo/darwin/Makefile.am new file mode 100644 index 0000000000..57abe85b51 --- /dev/null +++ b/opal/mca/sysinfo/darwin/Makefile.am @@ -0,0 +1,35 @@ +# +# Copyright (c) 20049 Cisco Systems, Inc. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + sysinfo_darwin.h \ + sysinfo_darwin_component.c \ + sysinfo_darwin_module.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if OMPI_BUILD_sysinfo_darwin_DSO +component_noinst = +component_install = mca_sysinfo_darwin.la +else +component_noinst = libmca_sysinfo_darwin.la +component_install = +endif + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_sysinfo_darwin_la_SOURCES = $(sources) +mca_sysinfo_darwin_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_sysinfo_darwin_la_SOURCES =$(sources) +libmca_sysinfo_darwin_la_LDFLAGS = -module -avoid-version diff --git a/opal/mca/sysinfo/darwin/configure.m4 b/opal/mca/sysinfo/darwin/configure.m4 new file mode 100644 index 0000000000..273f74e780 --- /dev/null +++ b/opal/mca/sysinfo/darwin/configure.m4 @@ -0,0 +1,22 @@ +# -*- shell-script -*- +# +# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_sysinfo_darwin_CONFIG([action-if-found], [action-if-not-found]) +# ----------------------------------------------------------- +AC_DEFUN([MCA_sysinfo_darwin_CONFIG],[ + OMPI_VAR_SCOPE_PUSH([sysinfo_darwin_happy]) + # check to see if we have + # as this is a Darwin-specific thing + AC_CHECK_HEADER([mach/mach_host.h], [sysinfo_darwin_happy=yes], [sysinfo_darwin_happy=no]) + + AS_IF([test "$sysinfo_darwin_happy" = "yes"], [$1], [$2]) + OMPI_VAR_SCOPE_POP +])dnl + diff --git a/opal/mca/sysinfo/darwin/configure.params b/opal/mca/sysinfo/darwin/configure.params new file mode 100644 index 0000000000..e91853fe17 --- /dev/null +++ b/opal/mca/sysinfo/darwin/configure.params @@ -0,0 +1,17 @@ +# -*- shell-script -*- +# +# Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +PARAM_CONFIG_FILES="Makefile" + +# +# Set the config priority so that, if we can build, +# only this component will build + +PARAM_CONFIG_PRIORITY=50 diff --git a/opal/mca/sysinfo/darwin/sysinfo_darwin.h b/opal/mca/sysinfo/darwin/sysinfo_darwin.h new file mode 100644 index 0000000000..379652f8e9 --- /dev/null +++ b/opal/mca/sysinfo/darwin/sysinfo_darwin.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_SYSINFO_DARWIN_H +#define MCA_SYSINFO_DARWIN_H + +#include "opal_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/sysinfo/sysinfo.h" + +BEGIN_C_DECLS + +/* + * Globally exported variable + */ + +OPAL_DECLSPEC extern const opal_sysinfo_base_component_t mca_sysinfo_darwin_component; + +OPAL_DECLSPEC extern const opal_sysinfo_base_module_t opal_sysinfo_darwin_module; + +END_C_DECLS + +#endif /* MCA_SYSINFO_DARWIN_H */ diff --git a/opal/mca/sysinfo/darwin/sysinfo_darwin_component.c b/opal/mca/sysinfo/darwin/sysinfo_darwin_component.c new file mode 100644 index 0000000000..a8ddb9eb42 --- /dev/null +++ b/opal/mca/sysinfo/darwin/sysinfo_darwin_component.c @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "opal_config.h" + +#include "opal/constants.h" +#include "opal/mca/sysinfo/sysinfo.h" +#include "sysinfo_darwin.h" + +/* + * Public string showing the sysinfo ompi_darwin component version number + */ +const char *opal_sysinfo_darwin_component_version_string = + "OPAL darwin sysinfo MCA component version " OPAL_VERSION; + +/* + * Local function + */ +static int sysinfo_darwin_component_query(mca_base_module_t **module, int *priority); + + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ + +const opal_sysinfo_base_component_t mca_sysinfo_darwin_component = { + + /* First, the mca_component_t struct containing meta information + about the component itself */ + + { + /* Indicate that we are a sysinfo v1.1.0 component (which also + implies a specific MCA version) */ + + OPAL_SYSINFO_BASE_VERSION_2_0_0, + + /* Component name and version */ + + "darwin", + OPAL_MAJOR_VERSION, + OPAL_MINOR_VERSION, + OPAL_RELEASE_VERSION, + + /* Component open and close functions */ + + NULL, + NULL, + sysinfo_darwin_component_query, + NULL + }, + /* Next the MCA v1.0.0 component meta data */ + { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + } +}; + + +static int sysinfo_darwin_component_query(mca_base_module_t **module, int *priority) +{ + *priority = 20; + *module = (mca_base_module_t *)&opal_sysinfo_darwin_module; + + return OPAL_SUCCESS; +} diff --git a/opal/mca/sysinfo/darwin/sysinfo_darwin_module.c b/opal/mca/sysinfo/darwin/sysinfo_darwin_module.c new file mode 100644 index 0000000000..3a182e9f0a --- /dev/null +++ b/opal/mca/sysinfo/darwin/sysinfo_darwin_module.c @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/constants.h" + +/* This component will only be compiled on Mac OSX, where we are + guaranteed to have these headers */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "opal/mca/base/mca_base_param.h" +#include "opal/mca/sysinfo/sysinfo.h" +#include "opal/mca/sysinfo/base/base.h" + +#include "sysinfo_darwin.h" + +static int init(void); +static int query(char **keys, opal_list_t *values); +static int fini(void); + +/* + * Darwin sysinfo module + */ +const opal_sysinfo_base_module_t opal_sysinfo_darwin_module = { + init, + query, + fini +}; + +static int init(void) +{ + return OPAL_SUCCESS; +} + +static int fini(void) +{ + return OPAL_SUCCESS; +} + +/* Mac OSX does things a little differently than Linux + * by providing process stats via an API. This means we + * don't have to parse files that could change! + */ +static int query(char **keys, opal_list_t *values) +{ + int mib[2], i; + size_t len; + int64_t i64; + int iint; + opal_sysinfo_value_t *data; + char strval[128]; + + mib[0] = CTL_HW; + + /* cycle through the requested keys */ + for (i=0; NULL != keys[i]; i++) { + if (0 == strcmp(keys[i], OPAL_SYSINFO_CPU_TYPE)) { + mib[1] = HW_MACHINE; + len = 128; + sysctl(mib, 2, &strval, &len, NULL, 0); + data = OBJ_NEW(opal_sysinfo_value_t); + data->key = strdup(OPAL_SYSINFO_CPU_TYPE); + data->type = OPAL_STRING; + data->data.str = strdup(strval); + opal_list_append(values, &data->super); + continue; + } + if (0 == strcmp(keys[i], OPAL_SYSINFO_CPU_MODEL)) { + mib[1] = HW_MODEL; + len = 128; + sysctl(mib, 2, &strval, &len, NULL, 0); + data = OBJ_NEW(opal_sysinfo_value_t); + data->key = strdup(OPAL_SYSINFO_CPU_MODEL); + data->type = OPAL_STRING; + data->data.str = strdup(strval); + opal_list_append(values, &data->super); + continue; + } + if (0 == strcmp(keys[i], OPAL_SYSINFO_NUM_CPUS)) { + mib[1] = HW_NCPU; + len = sizeof(int); + sysctl(mib, 2, &iint, &len, NULL, 0); + data = OBJ_NEW(opal_sysinfo_value_t); + data->key = strdup(OPAL_SYSINFO_NUM_CPUS); + data->type = OPAL_INT64; + data->data.i64 = (int64_t)iint; + opal_list_append(values, &data->super); + continue; + } + if (0 == strcmp(keys[i], OPAL_SYSINFO_MEM_SIZE)) { + mib[1] = HW_MEMSIZE; + len = sizeof(int64_t); + sysctl(mib, 2, &i64, &len, NULL, 0); + data = OBJ_NEW(opal_sysinfo_value_t); + data->key = strdup(OPAL_SYSINFO_MEM_SIZE); + data->type = OPAL_INT64; + data->data.i64 = i64 / (1 << 20); + opal_list_append(values, &data->super); + continue; + } + } + + return OPAL_SUCCESS; +} diff --git a/opal/mca/sysinfo/linux/Makefile.am b/opal/mca/sysinfo/linux/Makefile.am new file mode 100644 index 0000000000..264a2ae0d6 --- /dev/null +++ b/opal/mca/sysinfo/linux/Makefile.am @@ -0,0 +1,35 @@ +# +# Copyright (c) 20049 Cisco Systems, Inc. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + sysinfo_linux.h \ + sysinfo_linux_component.c \ + sysinfo_linux_module.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if OMPI_BUILD_sysinfo_linux_DSO +component_noinst = +component_install = mca_sysinfo_linux.la +else +component_noinst = libmca_sysinfo_linux.la +component_install = +endif + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_sysinfo_linux_la_SOURCES = $(sources) +mca_sysinfo_linux_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_sysinfo_linux_la_SOURCES =$(sources) +libmca_sysinfo_linux_la_LDFLAGS = -module -avoid-version diff --git a/opal/mca/sysinfo/linux/configure.m4 b/opal/mca/sysinfo/linux/configure.m4 new file mode 100644 index 0000000000..43f1cf59ea --- /dev/null +++ b/opal/mca/sysinfo/linux/configure.m4 @@ -0,0 +1,31 @@ +# -*- shell-script -*- +# +# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. +# +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_sysinfo_linux_CONFIG(action-if-can-compile, +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_sysinfo_linux_CONFIG],[ + + case "${host}" in + i?86-*|x86_64*|ia64-*|powerpc-*|powerpc64-*|sparc*-*) + AS_IF([test -r "/proc/cpuinfo"], + [sysinfo_linux_happy="yes"], + [sysinfo_linux_happy="no"]) + ;; + *) + sysinfo_linux_happy="no" + ;; + esac + + AS_IF([test "$sysinfo_linux_happy" = "yes"], + [$1], + [$2]) +]) diff --git a/opal/mca/sysinfo/linux/configure.params b/opal/mca/sysinfo/linux/configure.params new file mode 100644 index 0000000000..0d7017d566 --- /dev/null +++ b/opal/mca/sysinfo/linux/configure.params @@ -0,0 +1,18 @@ +# -*- shell-script -*- +# +# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. +# +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +PARAM_CONFIG_FILES="Makefile" + +# +# Set the config priority so that, if we can build, +# only this component will build + +PARAM_CONFIG_PRIORITY=60 diff --git a/opal/mca/sysinfo/linux/sysinfo_linux.h b/opal/mca/sysinfo/linux/sysinfo_linux.h new file mode 100644 index 0000000000..1b6852f4cd --- /dev/null +++ b/opal/mca/sysinfo/linux/sysinfo_linux.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/** + * @file + * + * System resource info for Posix systems. + * + */ + + +#ifndef MCA_SYSINFO_LINUX_H +#define MCA_SYSINFO_LINUX_H + +#include "opal_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/sysinfo/sysinfo.h" + + +BEGIN_C_DECLS + +/** + * Globally exported variable + */ +OPAL_DECLSPEC extern const opal_sysinfo_base_component_t mca_sysinfo_linux_component; + +OPAL_DECLSPEC extern const opal_sysinfo_base_module_t opal_sysinfo_linux_module; + +END_C_DECLS +#endif /* MCA_SYSINFO_LINUX_H */ diff --git a/opal/mca/sysinfo/linux/sysinfo_linux_component.c b/opal/mca/sysinfo/linux/sysinfo_linux_component.c new file mode 100644 index 0000000000..54129cf636 --- /dev/null +++ b/opal/mca/sysinfo/linux/sysinfo_linux_component.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "opal_config.h" + +#include "opal/constants.h" +#include "opal/mca/sysinfo/sysinfo.h" +#include "sysinfo_linux.h" + +/* + * Public string showing the sysinfo ompi_linux component version number + */ +const char *opal_sysinfo_linux_component_version_string = + "OPAL linux sysinfo MCA component version " OPAL_VERSION; + +/* + * Local function + */ +static int sysinfo_linux_component_query(mca_base_module_t **module, int *priority); + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ + +const opal_sysinfo_base_component_t mca_sysinfo_linux_component = { + + /* First, the mca_component_t struct containing meta information + about the component itself */ + + { + OPAL_SYSINFO_BASE_VERSION_2_0_0, + + /* Component name and version */ + "linux", + OPAL_MAJOR_VERSION, + OPAL_MINOR_VERSION, + OPAL_RELEASE_VERSION, + + /* Component open and close functions */ + NULL, + NULL, + sysinfo_linux_component_query, + NULL, + }, + { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + } +}; + + +static int sysinfo_linux_component_query(mca_base_module_t **module, int *priority) +{ + *priority = 20; + *module = (mca_base_module_t *)&opal_sysinfo_linux_module; + + return OPAL_SUCCESS; +} + diff --git a/opal/mca/sysinfo/linux/sysinfo_linux_module.c b/opal/mca/sysinfo/linux/sysinfo_linux_module.c new file mode 100644 index 0000000000..e483651f48 --- /dev/null +++ b/opal/mca/sysinfo/linux/sysinfo_linux_module.c @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/constants.h" + +/* This component will only be compiled on Linux, where we are + guaranteed to have and friends */ +#include +#include +#include +#include +#include +#include +#include + +#include /* for HZ to convert jiffies to actual time */ + +#include "opal/mca/base/mca_base_param.h" +#include "opal/dss/dss_types.h" +#include "opal/util/printf.h" + +#include "sysinfo_linux.h" + +/* + * Module functions + */ +static int linux_module_init(void); +static int query(char **keys, opal_list_t *values); +static int linux_module_fini(void); + +/* + * Linux sysinfo module + */ +const opal_sysinfo_base_module_t opal_sysinfo_linux_module = { + linux_module_init, + query, + linux_module_fini +}; + + +/* Local functions */ +static char *local_getline(FILE *fp); +static char *local_stripper(char *data); + +/* Local data */ +static char *cpu_type=NULL; +static char *cpu_model=NULL; +static int num_cpus=0; +static int64_t mem_size=0; +static char input[256]; + +static int linux_module_init(void) +{ + FILE *fp; + char *data, *value, *ptr; + + /* see if we can open the cpuinfo file */ + if (NULL == (fp = fopen("/proc/cpuinfo", "r"))) { + /* can't access this file - most likely, this means we + * aren't really on a supported system, or the proc no + * longer exists. Just return an error + */ + return OPAL_ERR_NOT_SUPPORTED; + } + + /* read the file one line at a time */ + while (NULL != (data = local_getline(fp))) { + if (NULL == (value = local_stripper(data))) { + /* cannot process */ + continue; + } + if (NULL == cpu_type && 0 == strcmp(data, "vendor_id")) { + cpu_type = strdup(value); + continue; + } + if (NULL == cpu_model && 0 == strcmp(data, "model name")) { + cpu_model = strdup(value); + } + if (0 == strcmp(data, "processor")) { + /* increment the num_cpus */ + ++num_cpus; + } + } + fclose(fp); + + /* see if we can open the meminfo file */ + if (NULL == (fp = fopen("/proc/meminfo", "r"))) { + /* ignore this */ + return OPAL_SUCCESS; + } + + /* read the file one line at a time */ + while (NULL != (data = local_getline(fp))) { + if (NULL == (value = local_stripper(data))) { + /* cannot process */ + continue; + } + if (0 == strcmp(data, "MemTotal")) { + /* find units */ + ptr = &value[strlen(value)-2]; + value[strlen(value)-3] = '\0'; + /* compute base value */ + mem_size = strtol(value, NULL, 10); + /* get the unit multiplier */ + if (0 == strcmp(ptr, "kB")) { + mem_size = mem_size / 1024; + } + continue; + } + } + fclose(fp); + + return OPAL_SUCCESS; +} + +static int linux_module_fini(void) +{ + return OPAL_SUCCESS; +} + +static int query(char **keys, opal_list_t *values) +{ + int i; + opal_sysinfo_value_t *data; + + /* cycle through the requested keys */ + for (i=0; NULL != keys[i]; i++) { + if (0 == strcmp(keys[i], OPAL_SYSINFO_CPU_TYPE) && + NULL != cpu_type) { + data = OBJ_NEW(opal_sysinfo_value_t); + data->key = strdup(OPAL_SYSINFO_CPU_TYPE); + data->type = OPAL_STRING; + data->data.str = strdup(cpu_type); + opal_list_append(values, &data->super); + continue; + } + if (0 == strcmp(keys[i], OPAL_SYSINFO_CPU_MODEL) && + NULL != cpu_model) { + data = OBJ_NEW(opal_sysinfo_value_t); + data->key = strdup(OPAL_SYSINFO_CPU_MODEL); + data->type = OPAL_STRING; + data->data.str = strdup(cpu_model); + opal_list_append(values, &data->super); + continue; + } + if (0 == strcmp(keys[i], OPAL_SYSINFO_NUM_CPUS) && + num_cpus > 0) { + data = OBJ_NEW(opal_sysinfo_value_t); + data->key = strdup(OPAL_SYSINFO_NUM_CPUS); + data->type = OPAL_INT64; + data->data.i64 = (int64_t)num_cpus; + opal_list_append(values, &data->super); + continue; + } + if (0 == strcmp(keys[i], OPAL_SYSINFO_MEM_SIZE) && + mem_size > 0) { + data = OBJ_NEW(opal_sysinfo_value_t); + data->key = strdup(OPAL_SYSINFO_MEM_SIZE); + data->type = OPAL_INT64; + data->data.i64 = mem_size; + opal_list_append(values, &data->super); + continue; + } + } + + return OPAL_SUCCESS; +} + +static char *local_getline(FILE *fp) +{ + char *ret; + + ret = fgets(input, 256, fp); + if (NULL != ret) { + input[strlen(input)-1] = '\0'; /* remove newline */ + return input; + } + + return NULL; +} + +static char *local_stripper(char *data) +{ + char *ptr, *end, *enddata; + int len = strlen(data); + + /* find the colon */ + if (NULL == (end = strchr(data, ':'))) { + return NULL; + } + ptr = end; + --end; + /* working backwards, look for first non-whitespace */ + while (end != data && !isalnum(*end)) { + --end; + } + ++end; + *end = '\0'; + /* now look for value */ + ptr++; + enddata = &(data[len-1]); + while (ptr != enddata && !isalnum(*ptr)) { + ++ptr; + } + return ptr; +} diff --git a/opal/mca/sysinfo/sysinfo.h b/opal/mca/sysinfo/sysinfo.h new file mode 100644 index 0000000000..acfe55a673 --- /dev/null +++ b/opal/mca/sysinfo/sysinfo.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/** + * @file + * + * system resource info framework component interface. + * + * Intent + * + * self-discovery of available local resources. + * + */ + +#ifndef OPAL_MCA_SYSINFO_H +#define OPAL_MCA_SYSINFO_H + +#include "opal_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/class/opal_list.h" + +#include "opal/mca/sysinfo/sysinfo_types.h" + +BEGIN_C_DECLS + +/** + * Module initialization function. Should return OPAL_SUCCESS. + */ +typedef int (*opal_sysinfo_base_module_init_fn_t)(void); + +typedef int (*opal_sysinfo_base_module_query_fn_t)(char **keys, opal_list_t *values); + +typedef int (*opal_sysinfo_base_module_fini_fn_t)(void); + +/* Public API module */ +struct opal_sysinfo_API_module_1_0_0_t { + opal_sysinfo_base_module_query_fn_t query; +}; +typedef struct opal_sysinfo_API_module_1_0_0_t opal_sysinfo_API_module_t; + + +/** + * Structure for sysinfo components. + */ +struct opal_sysinfo_base_component_2_0_0_t { + /** MCA base component */ + mca_base_component_t base_version; + /** MCA base data */ + mca_base_component_data_t base_data; +}; + +/** + * Convenience typedef + */ +typedef struct opal_sysinfo_base_component_2_0_0_t opal_sysinfo_base_component_2_0_0_t; +typedef struct opal_sysinfo_base_component_2_0_0_t opal_sysinfo_base_component_t; + +/** + * Structure for sysinfo modules + */ +struct opal_sysinfo_base_module_1_0_0_t { + opal_sysinfo_base_module_init_fn_t init; + opal_sysinfo_base_module_query_fn_t query; + opal_sysinfo_base_module_fini_fn_t finalize; +}; + +/** + * Convenience typedef + */ +typedef struct opal_sysinfo_base_module_1_0_0_t opal_sysinfo_base_module_1_0_0_t; +typedef struct opal_sysinfo_base_module_1_0_0_t opal_sysinfo_base_module_t; + +typedef struct { + opal_list_item_t super; + opal_sysinfo_base_module_t *module; +} opal_sysinfo_module_t; +OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_sysinfo_module_t); + + +/** + * Macro for use in components that are of type sysinfo + */ +#define OPAL_SYSINFO_BASE_VERSION_2_0_0 \ + MCA_BASE_VERSION_2_0_0, \ + "sysinfo", 2, 0, 0 + +/* Global structure for accessing sysinfo functions */ +OPAL_DECLSPEC extern opal_sysinfo_API_module_t opal_sysinfo; + +END_C_DECLS + +#endif /* OPAL_MCA_SYSINFO_H */ diff --git a/opal/mca/sysinfo/sysinfo_types.h b/opal/mca/sysinfo/sysinfo_types.h new file mode 100644 index 0000000000..e21e4161d9 --- /dev/null +++ b/opal/mca/sysinfo/sysinfo_types.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/** + * @file + * + * Resource flags + * + * Intent + * + * self-discovery of available local resources. + * + */ + +#ifndef OPAL_MCA_SYSINFO_TYPE_H +#define OPAL_MCA_SYSINFO_TYPE_H + +#include "opal_config.h" + +#include "opal/class/opal_list.h" +#include "opal/dss/dss_types.h" + +BEGIN_C_DECLS + +#define OPAL_SYSINFO_CPU_TYPE "CPU_TYPE" +#define OPAL_SYSINFO_CPU_MODEL "CPU_MODEL" +#define OPAL_SYSINFO_NUM_CPUS "NUM_CPUS" +#define OPAL_SYSINFO_MEM_SIZE "MEMORY" + +typedef struct { + opal_list_item_t super; + char *key; + opal_data_type_t type; + union { + int64_t i64; + char *str; + } data; +} opal_sysinfo_value_t; +OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_sysinfo_value_t); + +END_C_DECLS + +#endif /* OPAL_MCA_SYSINFO_TYPE_H */ diff --git a/orte/config/orte_configure_options.m4 b/orte/config/orte_configure_options.m4 index 1736478038..d33c211d3a 100644 --- a/orte/config/orte_configure_options.m4 +++ b/orte/config/orte_configure_options.m4 @@ -95,5 +95,23 @@ fi AC_DEFINE_UNQUOTED([ORTE_ENABLE_MULTICAST], [$orte_want_multicast], [Enable reliable multicast messaging]) +# +# Do we want bootstrap of daemons enabled? +# + +AC_MSG_CHECKING([if want bootstrap]) +AC_ARG_ENABLE([bootstrap], + [AC_HELP_STRING([--enable-bootstrap], + [Enable bootstrap of daemons at node startup (default: disabled)])]) +if test "$enable_bootstrap" = "yes"; then + AC_MSG_RESULT([yes]) + orte_want_bootstrap=1 +else + AC_MSG_RESULT([no]) + orte_want_bootstrap=0 +fi +AC_DEFINE_UNQUOTED([ORTE_ENABLE_BOOTSTRAP], [$orte_want_bootstrap], + [Enable bootstrap of daemons at node startup]) + ])dnl diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index 41e111a492..1e9413f25d 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -36,6 +36,9 @@ #include "opal/runtime/opal_cr.h" #include "opal/mca/pstat/base/base.h" #include "opal/mca/paffinity/base/base.h" +#if ORTE_ENABLE_BOOTSTRAP +#include "opal/mca/sysinfo/base/base.h" +#endif #include "orte/mca/rml/base/base.h" #include "orte/mca/routed/base/base.h" @@ -115,6 +118,20 @@ int orte_ess_base_orted_setup(char **hosts) goto error; } +#if ORTE_ENABLE_BOOTSTRAP + /* open and setup the local resource discovery framework */ + if (ORTE_SUCCESS != (ret = opal_sysinfo_base_open())) { + ORTE_ERROR_LOG(ret); + error = "opal_sysinfo_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = opal_sysinfo_base_select())) { + ORTE_ERROR_LOG(ret); + error = "opal_sysinfo_base_select"; + goto error; + } +#endif + /* some environments allow remote launches - e.g., ssh - so * open the PLM and select something -only- if we are given * a specific module to use @@ -440,5 +457,11 @@ int orte_ess_base_orted_finalize(void) /* cleanup any lingering session directories */ orte_session_dir_cleanup(ORTE_JOBID_WILDCARD); + /* handle the orted-specific OPAL stuff */ +#if ORTE_ENABLE_BOOTSTRAP + opal_sysinfo_base_close(); +#endif + opal_pstat_base_close(); + return ORTE_SUCCESS; } diff --git a/orte/mca/ess/cm/ess_cm_module.c b/orte/mca/ess/cm/ess_cm_module.c index 201a88a283..f83070e639 100644 --- a/orte/mca/ess/cm/ess_cm_module.c +++ b/orte/mca/ess/cm/ess_cm_module.c @@ -27,6 +27,9 @@ #include "opal/util/argv.h" #include "opal/util/if.h" #include "opal/mca/paffinity/paffinity.h" +#if ORTE_ENABLE_BOOTSTRAP +#include "opal/mca/sysinfo/sysinfo.h" +#endif #include "orte/mca/rmcast/base/base.h" #include "orte/mca/errmgr/errmgr.h" @@ -452,6 +455,48 @@ static int cm_set_name(void) /* always include our node name */ opal_dss.pack(&buf, &orte_process_info.nodename, 1, OPAL_STRING); +#if ORTE_ENABLE_BOOTSTRAP + { + /* get our local resources */ + char *keys[] = { + OPAL_SYSINFO_CPU_TYPE, + OPAL_SYSINFO_CPU_MODEL, + OPAL_SYSINFO_NUM_CPUS, + OPAL_SYSINFO_MEM_SIZE, + NULL + }; + opal_list_t resources; + opal_list_item_t *item; + opal_sysinfo_value_t *info; + int32_t num_values; + + if (ORTE_PROC_IS_DAEMON) { + OBJ_CONSTRUCT(&resources, opal_list_t); + opal_sysinfo.query(keys, &resources); + /* add number of values to the buffer */ + num_values = opal_list_get_size(&resources); + opal_dss.pack(&buf, &num_values, 1, OPAL_INT32); + /* add them to the buffer */ + while (NULL != (item = opal_list_remove_first(&resources))) { + info = (opal_sysinfo_value_t*)item; + opal_dss.pack(&buf, &info, 1, OPAL_STRING); + opal_dss.pack(&buf, &info->type, 1, OPAL_DATA_TYPE_T); + if (OPAL_INT64 == info->type) { + opal_dss.pack(&buf, &(info->data.i64), 1, OPAL_INT64); + } else if (OPAL_STRING == info->type) { + opal_dss.pack(&buf, &(info->data.str), 1, OPAL_STRING); + } + /* if this is the cpu model, save it for later use */ + if (0 == strcmp(info->key, OPAL_SYSINFO_CPU_MODEL)) { + orte_local_cpu_model = strdup(info->data.str); + } + OBJ_RELEASE(info); + } + OBJ_DESTRUCT(&resources); + } + } +#endif + /* set the recv to get the answer */ if (ORTE_SUCCESS != (rc = orte_rmcast.recv_buffer_nb(ORTE_RMCAST_SYS_CHANNEL, ORTE_RMCAST_TAG_BOOTSTRAP, diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index 4b2787db60..72781422e3 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -39,6 +39,9 @@ #include "opal/util/basename.h" #include "opal/mca/pstat/base/base.h" #include "opal/mca/paffinity/base/base.h" +#if ORTE_ENABLE_BOOTSTRAP +#include "opal/mca/sysinfo/base/base.h" +#endif #include "orte/util/show_help.h" #include "orte/mca/rml/base/base.h" @@ -165,6 +168,20 @@ static int rte_init(void) goto error; } +#if ORTE_ENABLE_BOOTSTRAP + /* open and setup the local resource discovery framework */ + if (ORTE_SUCCESS != (ret = opal_sysinfo_base_open())) { + ORTE_ERROR_LOG(ret); + error = "opal_sysinfo_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = opal_sysinfo_base_select())) { + ORTE_ERROR_LOG(ret); + error = "opal_sysinfo_base_select"; + goto error; + } +#endif + /* Since we are the HNP, then responsibility for * defining the name falls to the PLM component for our * respective environment - hence, we have to open the PLM @@ -405,6 +422,33 @@ static int rte_init(void) node = OBJ_NEW(orte_node_t); node->name = strdup(orte_process_info.nodename); node->index = opal_pointer_array_add(orte_node_pool, node); +#if ORTE_ENABLE_BOOTSTRAP + { + /* get and store our local resources */ + char *keys[] = { + OPAL_SYSINFO_CPU_TYPE, + OPAL_SYSINFO_CPU_MODEL, + OPAL_SYSINFO_NUM_CPUS, + OPAL_SYSINFO_MEM_SIZE, + NULL + }; + opal_list_item_t *item; + opal_sysinfo_value_t *info; + + opal_sysinfo.query(keys, &node->resources); + /* find our cpu model and save it for later */ + for (item = opal_list_get_first(&node->resources); + item != opal_list_get_end(&node->resources); + item = opal_list_get_next(item)) { + info = (opal_sysinfo_value_t*)item; + + if (0 == strcmp(info->key, OPAL_SYSINFO_CPU_MODEL)) { + orte_local_cpu_model = strdup(info->data.str); + break; + } + } + } +#endif /* create and store a proc object for us */ proc = OBJ_NEW(orte_proc_t); @@ -658,6 +702,12 @@ static int rte_finalize(void) } } + /* handle the orted-specific OPAL stuff */ +#if ORTE_ENABLE_BOOTSTRAP + opal_sysinfo_base_close(); +#endif + opal_pstat_base_close(); + return ORTE_SUCCESS; } diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index c3dd9a679a..cea7ed4e9e 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -1096,6 +1096,16 @@ static int odls_base_default_setup_fork(orte_app_context_t *context, free(param); free(param2); + /* pass a param telling the child what model of cpu we are on, + * if we know it + */ + if (NULL != orte_local_cpu_model) { + param = mca_base_param_environ_variable("cpu", NULL,"model"); + /* do not overwrite what the user may have provided */ + opal_setenv(param, orte_local_cpu_model, false, environ_copy); + free(param); + } + /* push data into environment - don't push any single proc * info, though. We are setting the environment up on a * per-context basis, and will add the individual proc diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index 003ccefd52..4aabc990ce 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -58,6 +58,9 @@ #include "opal/mca/base/mca_base_param.h" #include "opal/util/daemon_init.h" #include "opal/dss/dss.h" +#if ORTE_ENABLE_BOOTSTRAP +#include "opal/mca/sysinfo/sysinfo.h" +#endif #include "orte/constants.h" #include "orte/util/show_help.h" @@ -719,6 +722,52 @@ int orte_daemon(int argc, char *argv[]) goto DONE; } } else if (orte_daemon_bootstrap) { + /* include our node name */ + opal_dss.pack(buffer, &orte_process_info.nodename, 1, OPAL_STRING); + +#if !ORTE_ENABLE_MULTICAST + /* if we have multicast, then this info was already sent */ +#if ORTE_ENABLE_BOOTSTRAP + { + /* get our local resources */ + char *keys[] = { + OPAL_SYSINFO_CPU_TYPE, + OPAL_SYSINFO_CPU_MODEL, + OPAL_SYSINFO_NUM_CPUS, + OPAL_SYSINFO_MEM_SIZE, + NULL + }; + opal_list_t resources; + opal_list_item_t *item; + opal_sysinfo_value_t *info; + int32_t num_values; + + OBJ_CONSTRUCT(&resources, opal_list_t); + opal_sysinfo.query(keys, &resources); + /* add number of values to the buffer */ + num_values = opal_list_get_size(&resources); + opal_dss.pack(buffer, &num_values, 1, OPAL_INT32); + /* add them to the buffer */ + while (NULL != (item = opal_list_remove_first(&resources))) { + info = (opal_sysinfo_value_t*)item; + opal_dss.pack(buffer, &info, 1, OPAL_STRING); + opal_dss.pack(buffer, &info->type, 1, OPAL_DATA_TYPE_T); + if (OPAL_INT64 == info->type) { + opal_dss.pack(buffer, &(info->data.i64), 1, OPAL_INT64); + } else if (OPAL_STRING == info->type) { + opal_dss.pack(buffer, &(info->data.str), 1, OPAL_STRING); + } + /* if this is the cpu model, save it for later use */ + if (0 == strcmp(info->key, OPAL_SYSINFO_CPU_MODEL)) { + orte_local_cpu_model = strdup(info->data.str); + } + OBJ_RELEASE(info); + } + OBJ_DESTRUCT(&resources); + } +#endif +#endif + /* send to a different callback location as the * HNP didn't launch us and isn't waiting for a * callback diff --git a/orte/runtime/data_type_support/orte_dt_print_fns.c b/orte/runtime/data_type_support/orte_dt_print_fns.c index ed29230638..8e0a4ef0ac 100644 --- a/orte/runtime/data_type_support/orte_dt_print_fns.c +++ b/orte/runtime/data_type_support/orte_dt_print_fns.c @@ -22,6 +22,9 @@ #include #include "opal/util/argv.h" +#if ORTE_ENABLE_BOOTSTRAP +#include "opal/mca/sysinfo/sysinfo.h" +#endif #include "orte/mca/errmgr/errmgr.h" #include "opal/dss/dss.h" @@ -393,6 +396,32 @@ int orte_dt_print_node(char **output, char *prefix, orte_node_t *src, opal_data_ free(tmp); tmp = tmp2; +#if ORTE_ENABLE_BOOTSTRAP + { + opal_list_item_t *item; + opal_sysinfo_value_t *info; + + asprintf(&tmp2, "%s\n%s\tDetected Resources:", tmp, pfx2); + free(tmp); + tmp = tmp2; + + for (item = opal_list_get_first(&src->resources); + item != opal_list_get_end(&src->resources); + item = opal_list_get_next(item)) { + info = (opal_sysinfo_value_t*)item; + if (OPAL_INT64 == info->type) { + asprintf(&tmp2, "%s\n%s\t\t%s: %ld", tmp, pfx2, + info->key, (long int)info->data.i64); + } else if (OPAL_STRING == info->type) { + asprintf(&tmp2, "%s\n%s\t\t%s: %s", tmp, pfx2, + info->key, info->data.str); + } + free(tmp); + tmp = tmp2; + } + } +#endif + asprintf(&tmp2, "%s\n%s\tNum procs: %ld\tNext node_rank: %ld", tmp, pfx2, (long)src->num_procs, (long)src->next_node_rank); free(tmp); diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index bd639e9e94..6af3c1365e 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -56,6 +56,7 @@ bool orte_leave_session_attached; bool orte_do_not_launch = false; bool orted_spin_flag = false; bool orte_daemon_bootstrap = false; +char *orte_local_cpu_model = NULL; /* ORTE OOB port flags */ bool orte_static_ports = false; @@ -708,11 +709,14 @@ static void orte_node_construct(orte_node_t* node) } node->username = NULL; + + OBJ_CONSTRUCT(&node->resources, opal_list_t); } static void orte_node_destruct(orte_node_t* node) { int i; + opal_list_item_t *item; if (NULL != node->name) { free(node->name); @@ -747,6 +751,11 @@ static void orte_node_destruct(orte_node_t* node) free(node->username); node->username = NULL; } + + while (NULL != (item = opal_list_remove_first(&node->resources))) { + OBJ_RELEASE(item); + } + OBJ_DESTRUCT(&node->resources); } diff --git a/orte/runtime/orte_globals.h b/orte/runtime/orte_globals.h index e8c0b35cf9..aa99eec8ca 100644 --- a/orte/runtime/orte_globals.h +++ b/orte/runtime/orte_globals.h @@ -264,6 +264,8 @@ typedef struct { char *cpu_set; /** Username on this node, if specified */ char *username; + /* list of known system resources for this node */ + opal_list_t resources; } orte_node_t; ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_node_t); @@ -524,6 +526,7 @@ ORTE_DECLSPEC extern bool orte_leave_session_attached; ORTE_DECLSPEC extern bool orte_do_not_launch; ORTE_DECLSPEC extern bool orted_spin_flag; ORTE_DECLSPEC extern bool orte_daemon_bootstrap; +ORTE_DECLSPEC extern char *orte_local_cpu_model; /* ORTE OOB port flags */ ORTE_DECLSPEC extern bool orte_static_ports; diff --git a/orte/test/system/Makefile b/orte/test/system/Makefile index 977553ede1..9199e50c95 100644 --- a/orte/test/system/Makefile +++ b/orte/test/system/Makefile @@ -1,4 +1,4 @@ -PROGS = no_op sigusr_trap spin orte_nodename orte_spawn orte_loop_spawn orte_loop_child orte_abort get_limits orte_ring spawn_child orte_tool orte_no_op binom oob_stress iof_stress iof_delay radix orte_barrier orte_mcast opal_interface mcast mcast_recv orte_spin segfault +PROGS = no_op sigusr_trap spin orte_nodename orte_spawn orte_loop_spawn orte_loop_child orte_abort get_limits orte_ring spawn_child orte_tool orte_no_op binom oob_stress iof_stress iof_delay radix orte_barrier orte_mcast opal_interface mcast mcast_recv orte_spin segfault sysinfo all: $(PROGS) diff --git a/orte/test/system/sysinfo.c b/orte/test/system/sysinfo.c new file mode 100644 index 0000000000..73bc470c8b --- /dev/null +++ b/orte/test/system/sysinfo.c @@ -0,0 +1,67 @@ +/* -*- C -*- + * + * $HEADER$ + * + * The most basic of applications + */ + +#include +#include "orte/constants.h" + +#include + +#include "opal/mca/sysinfo/sysinfo.h" +#include "opal/mca/sysinfo/base/base.h" + +#include "orte/runtime/runtime.h" + +int main(int argc, char* argv[]) +{ + char *keys[] = { + OPAL_SYSINFO_CPU_TYPE, + OPAL_SYSINFO_CPU_MODEL, + OPAL_SYSINFO_NUM_CPUS, + OPAL_SYSINFO_MEM_SIZE, + NULL + }; + opal_list_t values; + opal_sysinfo_value_t *info; + opal_list_item_t *item; + char *model; + + if (ORTE_SUCCESS != orte_init(ORTE_PROC_NON_MPI)) { + fprintf(stderr, "Failed orte_init\n"); + exit(1); + } + + OBJ_CONSTRUCT(&values, opal_list_t); + + opal_sysinfo_base_open(); + opal_sysinfo_base_select(); + + opal_sysinfo.query(keys, &values); + + while (NULL != (item = opal_list_remove_first(&values))) { + info = (opal_sysinfo_value_t*)item; + fprintf(stderr, "Key: %s Value: ", info->key); + if (OPAL_INT64 == info->type) { + fprintf(stderr, "%ld\n", (long int)info->data.i64); + } else if (OPAL_STRING == info->type) { + fprintf(stderr, "%s\n", info->data.str); + } + OBJ_RELEASE(info); + } + + OBJ_DESTRUCT(&values); + + opal_sysinfo_base_close(); + + model = getenv("OMPI_MCA_cpu_model"); + fprintf(stderr, "Envar cpu_model: %s\n", (NULL == model) ? "NULL" : model); + + if (ORTE_SUCCESS != orte_finalize()) { + fprintf(stderr, "Failed orte_finalize\n"); + exit(1); + } + return 0; +}