From 0b9552cd4e872f9d13a967a30c1340f3a8c6ca30 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 23 Mar 2010 20:47:41 +0000 Subject: [PATCH] Expand the ESS framework's API to include a new function "query_sys_info" that allows the caller to retrieve key-value pairs of info on the local system capabilities (e.g., cpu type/model). Have each daemon and the HNP "sense" that information and provide it to their local procs to avoid having every proc querying the system directly. This commit was SVN r22870. --- orte/mca/ess/alps/ess_alps_module.c | 1 + orte/mca/ess/base/Makefile.am | 3 +- orte/mca/ess/base/base.h | 2 + orte/mca/ess/base/ess_base_sysinfo.c | 100 ++++++++++++++++++ orte/mca/ess/bproc/ess_bproc_module.c | 1 + orte/mca/ess/cm/ess_cm_module.c | 1 + orte/mca/ess/cnos/ess_cnos_module.c | 9 +- orte/mca/ess/env/ess_env_module.c | 1 + orte/mca/ess/ess.h | 12 ++- orte/mca/ess/hnp/ess_hnp_module.c | 28 +---- orte/mca/ess/lsf/ess_lsf_module.c | 1 + .../portals_utcp/ess_portals_utcp_module.c | 8 ++ orte/mca/ess/singleton/ess_singleton_module.c | 1 + orte/mca/ess/slave/ess_slave_module.c | 1 + orte/mca/ess/slurm/ess_slurm_module.c | 1 + orte/mca/ess/slurmd/ess_slurmd_module.c | 1 + orte/mca/ess/tm/ess_tm_module.c | 1 + orte/mca/ess/tool/ess_tool_module.c | 7 ++ orte/mca/odls/base/odls_base_close.c | 6 ++ orte/mca/odls/base/odls_base_default_fns.c | 2 + orte/mca/odls/base/odls_base_open.c | 37 ++++++- orte/mca/odls/base/odls_private.h | 2 + orte/orted/orted_main.c | 27 ++--- orte/runtime/orte_globals.c | 5 + orte/runtime/orte_globals.h | 4 +- orte/test/system/orte_nodename.c | 30 +++++- orte/util/nidmap.c | 67 ++++++++++++ orte/util/nidmap.h | 3 + 28 files changed, 305 insertions(+), 57 deletions(-) create mode 100644 orte/mca/ess/base/ess_base_sysinfo.c diff --git a/orte/mca/ess/alps/ess_alps_module.c b/orte/mca/ess/alps/ess_alps_module.c index 580f1b33a5..7d02da2d83 100644 --- a/orte/mca/ess/alps/ess_alps_module.c +++ b/orte/mca/ess/alps/ess_alps_module.c @@ -63,6 +63,7 @@ orte_ess_base_module_t orte_ess_alps_module = { proc_get_node_rank, update_pidmap, update_nidmap, + orte_ess_base_query_sys_info, NULL /* ft_event */ }; diff --git a/orte/mca/ess/base/Makefile.am b/orte/mca/ess/base/Makefile.am index a15b579b8d..2d53a996a6 100644 --- a/orte/mca/ess/base/Makefile.am +++ b/orte/mca/ess/base/Makefile.am @@ -34,6 +34,7 @@ libmca_ess_la_SOURCES += \ base/ess_base_std_tool.c \ base/ess_base_std_app.c \ base/ess_base_std_orted.c \ - base/ess_base_std_prolog.c + base/ess_base_std_prolog.c \ + base/ess_base_sysinfo.c endif diff --git a/orte/mca/ess/base/base.h b/orte/mca/ess/base/base.h index b5552953f7..9cc90cb72d 100644 --- a/orte/mca/ess/base/base.h +++ b/orte/mca/ess/base/base.h @@ -77,6 +77,8 @@ ORTE_DECLSPEC int orte_ess_base_tool_finalize(void); ORTE_DECLSPEC int orte_ess_base_orted_setup(char **hosts); ORTE_DECLSPEC int orte_ess_base_orted_finalize(void); +ORTE_DECLSPEC int orte_ess_base_query_sys_info(char *node, char **keys, opal_list_t *values); + /* * Put functions */ diff --git a/orte/mca/ess/base/ess_base_sysinfo.c b/orte/mca/ess/base/ess_base_sysinfo.c new file mode 100644 index 0000000000..d9bab36309 --- /dev/null +++ b/orte/mca/ess/base/ess_base_sysinfo.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#ifdef HAVE_UNISTD_H +#include +#endif +#include + +#include "opal/util/opal_environ.h" +#include "opal/util/if.h" +#include "opal/class/opal_list.h" +#include "opal/mca/sysinfo/sysinfo_types.h" + +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/odls/base/odls_private.h" +#include "orte/util/proc_info.h" +#include "orte/util/nidmap.h" +#include "orte/runtime/orte_globals.h" + +#include "orte/mca/ess/base/base.h" + +int orte_ess_base_query_sys_info(char *node, char **keys, opal_list_t *values) +{ + orte_nid_t *nid; + opal_sysinfo_value_t *sys, *sys2; + opal_list_item_t *item; + int i; + + /* we currently only support the local node */ + if (NULL != node && + (0 != strcmp(node, orte_process_info.nodename) || !opal_ifislocal(node))) { + return ORTE_SUCCESS; + } + + if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) { + /* cycle through local sysinfo */ + for (i=0; NULL != keys[i]; i++) { + for (item = opal_list_get_first(&orte_odls_globals.sysinfo); + item != opal_list_get_end(&orte_odls_globals.sysinfo); + item = opal_list_get_next(item)) { + sys = (opal_sysinfo_value_t*)item; + if (0 != strcmp(keys[i], sys->key)) { + continue; + } + /* matched - pass the value back */ + sys2 = OBJ_NEW(opal_sysinfo_value_t); + sys2->key = strdup(sys->key); + sys2->type = sys->type; + if (OPAL_STRING == sys->type) { + sys2->data.str = strdup(sys->data.str); + } else { + sys2->data.i64 = sys->data.i64; + } + } + } + return ORTE_SUCCESS; + } + + /* get the nid of our local node */ + if (NULL == (nid = orte_util_lookup_nid(ORTE_PROC_MY_NAME))) { + /* can't get it */ + return ORTE_ERR_NOT_FOUND; + } + + /* cycle through this node's attributes looking for the keys */ + for (i=0; NULL != keys[i]; i++) { + for (item = opal_list_get_first(&nid->sysinfo); + item != opal_list_get_end(&nid->sysinfo); + item = opal_list_get_next(item)) { + sys = (opal_sysinfo_value_t*)item; + + if (0 != strcmp(keys[i], sys->key)) { + continue; + } + /* matched - pass the value back */ + sys2 = OBJ_NEW(opal_sysinfo_value_t); + sys2->key = strdup(sys->key); + sys2->type = sys->type; + if (OPAL_STRING == sys->type) { + sys2->data.str = strdup(sys->data.str); + } else { + sys2->data.i64 = sys->data.i64; + } + opal_list_append(values, &sys2->super); + /* can only be one entry for each key */ + break; + } + } + + return ORTE_SUCCESS; +} diff --git a/orte/mca/ess/bproc/ess_bproc_module.c b/orte/mca/ess/bproc/ess_bproc_module.c index ebae557266..650ead4ebd 100644 --- a/orte/mca/ess/bproc/ess_bproc_module.c +++ b/orte/mca/ess/bproc/ess_bproc_module.c @@ -55,6 +55,7 @@ orte_ess_base_module_t orte_ess_bproc_module = { proc_get_node_rank, NULL, /* update_pidmap */ NULL, /* update_nidmap */ + orte_ess_base_query_sys_info, NULL /* no FT support for Bproc */ }; diff --git a/orte/mca/ess/cm/ess_cm_module.c b/orte/mca/ess/cm/ess_cm_module.c index a654c4e265..0baf0acf98 100644 --- a/orte/mca/ess/cm/ess_cm_module.c +++ b/orte/mca/ess/cm/ess_cm_module.c @@ -72,6 +72,7 @@ orte_ess_base_module_t orte_ess_cm_module = { proc_get_node_rank, update_pidmap, update_nidmap, + orte_ess_base_query_sys_info, NULL /* ft_event */ }; diff --git a/orte/mca/ess/cnos/ess_cnos_module.c b/orte/mca/ess/cnos/ess_cnos_module.c index 8c51257276..1ea933afbd 100644 --- a/orte/mca/ess/cnos/ess_cnos_module.c +++ b/orte/mca/ess/cnos/ess_cnos_module.c @@ -28,7 +28,7 @@ #include "opal/mca/paffinity/paffinity.h" #include "opal/util/output.h" - +#include "opal/class/opal_list.h" #include "orte/mca/errmgr/base/base.h" #include "orte/util/proc_info.h" @@ -47,6 +47,7 @@ static uint8_t proc_get_locality(orte_process_name_t *proc); static char* proc_get_hostname(orte_process_name_t *proc); static orte_local_rank_t proc_get_local_rank(orte_process_name_t *proc); static orte_node_rank_t proc_get_node_rank(orte_process_name_t *proc); +static int query_sys_info(char *node, char **keys, opal_list_t *values); orte_ess_base_module_t orte_ess_cnos_module = { rte_init, @@ -59,6 +60,7 @@ orte_ess_base_module_t orte_ess_cnos_module = { proc_get_node_rank, NULL, /* add_pidmap is only used in ORTE */ NULL, /* update_nidmap is only used in ORTE */ + query_sys_info, NULL /* ft_event */ }; @@ -157,3 +159,8 @@ static orte_node_rank_t proc_get_node_rank(orte_process_name_t *proc) */ return 0; } + +static int query_sys_info(char *node, char **keys, opal_list_t *values) +{ + return ORTE_SUCCESS; +} diff --git a/orte/mca/ess/env/ess_env_module.c b/orte/mca/ess/env/ess_env_module.c index 8899e4067e..6babafea2e 100644 --- a/orte/mca/ess/env/ess_env_module.c +++ b/orte/mca/ess/env/ess_env_module.c @@ -100,6 +100,7 @@ orte_ess_base_module_t orte_ess_env_module = { proc_get_node_rank, update_pidmap, update_nidmap, + orte_ess_base_query_sys_info, #if OPAL_ENABLE_FT_CR == 1 rte_ft_event #else diff --git a/orte/mca/ess/ess.h b/orte/mca/ess/ess.h index d2ea4798d0..5a883118c9 100644 --- a/orte/mca/ess/ess.h +++ b/orte/mca/ess/ess.h @@ -104,7 +104,7 @@ typedef orte_local_rank_t (*orte_ess_base_module_proc_get_local_rank_fn_t)(orte_ typedef orte_node_rank_t (*orte_ess_base_module_proc_get_node_rank_fn_t)(orte_process_name_t *proc); /** - * Update thr pidmap + * Update the pidmap * * When a job is dynamically launched via comm_spawn, the pre-existing daemons need to * update their knowledge of the process map within the job so they can properly do @@ -122,7 +122,14 @@ typedef int (*orte_ess_base_module_update_pidmap_fn_t)(opal_byte_object_t *bo); */ typedef int (*orte_ess_base_module_update_nidmap_fn_t)(opal_byte_object_t *bo); - +/** + * Query node configuration info + * + * Request information on the system capabilities of a specific node. A NULL nodename + * indicates that the local node info is requested. An empty list of results is + * returned on systems that do not support this functionality. + */ +typedef int (*orte_ess_base_module_query_sys_info_t)(char *node, char **keys, opal_list_t *values); /** * Handle fault tolerance updates @@ -148,6 +155,7 @@ struct orte_ess_base_module_1_0_0_t { orte_ess_base_module_proc_get_node_rank_fn_t get_node_rank; orte_ess_base_module_update_pidmap_fn_t update_pidmap; orte_ess_base_module_update_nidmap_fn_t update_nidmap; + orte_ess_base_module_query_sys_info_t query_sys_info; orte_ess_base_module_ft_event_fn_t ft_event; }; typedef struct orte_ess_base_module_1_0_0_t orte_ess_base_module_1_0_0_t; diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index afc64e763c..0e6293571a 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -29,10 +29,12 @@ #include #endif +#include "opal/class/opal_list.h" #include "opal/event/event.h" #include "opal/runtime/opal.h" #include "opal/runtime/opal_cr.h" +#include "opal/util/if.h" #include "opal/util/os_path.h" #include "opal/util/output.h" #include "opal/util/malloc.h" @@ -88,7 +90,6 @@ static orte_node_rank_t proc_get_node_rank(orte_process_name_t *proc); static int update_pidmap(opal_byte_object_t *bo); static int update_nidmap(opal_byte_object_t *bo); - orte_ess_base_module_t orte_ess_hnp_module = { rte_init, rte_finalize, @@ -100,10 +101,10 @@ orte_ess_base_module_t orte_ess_hnp_module = { proc_get_node_rank, update_pidmap, update_nidmap, + orte_ess_base_query_sys_info, NULL /* ft_event */ }; - static int rte_init(void) { int ret; @@ -113,15 +114,6 @@ static int rte_init(void) orte_node_t *node; orte_proc_t *proc; int value; - char *keys[] = { - OPAL_SYSINFO_CPU_TYPE, - OPAL_SYSINFO_CPU_MODEL, - OPAL_SYSINFO_NUM_CPUS, - OPAL_SYSINFO_MEM_SIZE, - NULL - }; - opal_list_item_t *item; - opal_sysinfo_value_t *info; /* initialize the global list of local children and job data */ OBJ_CONSTRUCT(&orte_local_children, opal_list_t); @@ -428,20 +420,6 @@ static int rte_init(void) node->name = strdup(orte_process_info.nodename); node->index = opal_pointer_array_add(orte_node_pool, node); - /* get and store our local resources */ - opal_sysinfo.query(keys, &node->resources); - /* find our cpu model and save it for later */ - for (item = opal_list_get_first(&node->resources); - item != opal_list_get_end(&node->resources); - item = opal_list_get_next(item)) { - info = (opal_sysinfo_value_t*)item; - - if (0 == strcmp(info->key, OPAL_SYSINFO_CPU_MODEL)) { - orte_local_cpu_model = strdup(info->data.str); - break; - } - } - /* create and store a proc object for us */ proc = OBJ_NEW(orte_proc_t); proc->name.jobid = ORTE_PROC_MY_NAME->jobid; diff --git a/orte/mca/ess/lsf/ess_lsf_module.c b/orte/mca/ess/lsf/ess_lsf_module.c index 61ca31b5dd..cbfa3f05ee 100644 --- a/orte/mca/ess/lsf/ess_lsf_module.c +++ b/orte/mca/ess/lsf/ess_lsf_module.c @@ -69,6 +69,7 @@ orte_ess_base_module_t orte_ess_lsf_module = { proc_get_node_rank, update_pidmap, update_nidmap, + orte_ess_base_query_sys_info, NULL /* ft_event */ }; diff --git a/orte/mca/ess/portals_utcp/ess_portals_utcp_module.c b/orte/mca/ess/portals_utcp/ess_portals_utcp_module.c index 530d8fb80b..9e54c389db 100644 --- a/orte/mca/ess/portals_utcp/ess_portals_utcp_module.c +++ b/orte/mca/ess/portals_utcp/ess_portals_utcp_module.c @@ -24,6 +24,7 @@ #include "opal/util/argv.h" #include "opal/mca/paffinity/paffinity.h" +#include "opal/class/opal_list.h" #include "orte/mca/errmgr/base/base.h" #include "orte/util/name_fns.h" @@ -42,6 +43,7 @@ static uint8_t proc_get_locality(orte_process_name_t *proc); static char* proc_get_hostname(orte_process_name_t *proc); static orte_local_rank_t proc_get_local_rank(orte_process_name_t *proc); static orte_node_rank_t proc_get_node_rank(orte_process_name_t *proc); +static int query_sys_info(char *node, char **keys, opal_list_t *values); orte_ess_base_module_t orte_ess_portals_utcp_module = { rte_init, @@ -54,6 +56,7 @@ orte_ess_base_module_t orte_ess_portals_utcp_module = { proc_get_node_rank, NULL, /* add_pidmap is only used in ORTE */ NULL, /* update_nidmap is only used in ORTE */ + query_sys_info, NULL /* ft_event */ }; @@ -165,3 +168,8 @@ static orte_node_rank_t proc_get_node_rank(orte_process_name_t *proc) */ return 0; } + +static int query_sys_info(char *node, char **keys, opal_list_t *values) +{ + return ORTE_SUCCESS; +} diff --git a/orte/mca/ess/singleton/ess_singleton_module.c b/orte/mca/ess/singleton/ess_singleton_module.c index 3225fedd1c..ba3d9ee91c 100644 --- a/orte/mca/ess/singleton/ess_singleton_module.c +++ b/orte/mca/ess/singleton/ess_singleton_module.c @@ -86,6 +86,7 @@ orte_ess_base_module_t orte_ess_singleton_module = { proc_get_node_rank, update_pidmap, update_nidmap, + orte_ess_base_query_sys_info, NULL /* ft_event */ }; diff --git a/orte/mca/ess/slave/ess_slave_module.c b/orte/mca/ess/slave/ess_slave_module.c index db982a440b..f34420f7fe 100644 --- a/orte/mca/ess/slave/ess_slave_module.c +++ b/orte/mca/ess/slave/ess_slave_module.c @@ -99,6 +99,7 @@ orte_ess_base_module_t orte_ess_slave_module = { proc_get_node_rank, update_pidmap, update_nidmap, + orte_ess_base_query_sys_info, #if OPAL_ENABLE_FT_CR == 1 rte_ft_event #else diff --git a/orte/mca/ess/slurm/ess_slurm_module.c b/orte/mca/ess/slurm/ess_slurm_module.c index 06d116eb00..172b25465d 100644 --- a/orte/mca/ess/slurm/ess_slurm_module.c +++ b/orte/mca/ess/slurm/ess_slurm_module.c @@ -73,6 +73,7 @@ orte_ess_base_module_t orte_ess_slurm_module = { proc_get_node_rank, update_pidmap, update_nidmap, + orte_ess_base_query_sys_info, NULL /* ft_event */ }; diff --git a/orte/mca/ess/slurmd/ess_slurmd_module.c b/orte/mca/ess/slurmd/ess_slurmd_module.c index 3a78d94e46..dbf7461a14 100644 --- a/orte/mca/ess/slurmd/ess_slurmd_module.c +++ b/orte/mca/ess/slurmd/ess_slurmd_module.c @@ -79,6 +79,7 @@ orte_ess_base_module_t orte_ess_slurmd_module = { proc_get_node_rank, update_pidmap, update_nidmap, + orte_ess_base_query_sys_info, NULL /* ft_event */ }; diff --git a/orte/mca/ess/tm/ess_tm_module.c b/orte/mca/ess/tm/ess_tm_module.c index f023b3116c..cd6cafbec8 100644 --- a/orte/mca/ess/tm/ess_tm_module.c +++ b/orte/mca/ess/tm/ess_tm_module.c @@ -72,6 +72,7 @@ orte_ess_base_module_t orte_ess_tm_module = { proc_get_node_rank, update_pidmap, update_nidmap, + orte_ess_base_query_sys_info, NULL /* ft_event */ }; diff --git a/orte/mca/ess/tool/ess_tool_module.c b/orte/mca/ess/tool/ess_tool_module.c index b4a23956de..2e5564a240 100644 --- a/orte/mca/ess/tool/ess_tool_module.c +++ b/orte/mca/ess/tool/ess_tool_module.c @@ -44,6 +44,7 @@ static int rte_init(void); static void rte_abort(int status, bool report) __opal_attribute_noreturn__; static orte_vpid_t proc_get_daemon(orte_process_name_t *proc); +static int query_sys_info(char *node, char **keys, opal_list_t *values); orte_ess_base_module_t orte_ess_tool_module = { @@ -57,6 +58,7 @@ orte_ess_base_module_t orte_ess_tool_module = { NULL, /* don't need a proc_get_node_rank fn */ NULL, /* don't need to update_pidmap */ NULL, /* don't need to update_nidmap */ + query_sys_info, NULL /* ft_event */ }; @@ -153,3 +155,8 @@ static orte_vpid_t proc_get_daemon(orte_process_name_t *proc) { return ORTE_VPID_INVALID; } + +static int query_sys_info(char *node, char **keys, opal_list_t *values) +{ + return ORTE_SUCCESS; +} diff --git a/orte/mca/odls/base/odls_base_close.c b/orte/mca/odls/base/odls_base_close.c index 17e07ed8f7..71ebb6178f 100644 --- a/orte/mca/odls/base/odls_base_close.c +++ b/orte/mca/odls/base/odls_base_close.c @@ -46,6 +46,12 @@ int orte_odls_base_close(void) free(orte_odls_globals.dmap); } + /* cleanup the sysinfo data */ + while (NULL != (item = opal_list_remove_first(&orte_odls_globals.sysinfo))) { + OBJ_RELEASE(item); + } + OBJ_DESTRUCT(&orte_odls_globals.sysinfo); + /* if no components are available, then punt */ if (!orte_odls_base.components_available) { return ORTE_SUCCESS; diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index 6518437032..bda084995d 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -2381,6 +2381,8 @@ int orte_odls_base_default_require_sync(orte_process_name_t *proc, opal_dss.pack(&buffer, &flag, 1, OPAL_INT8); opal_dss.pack(&buffer, &orte_odls_globals.dmap, 1, OPAL_BYTE_OBJECT); opal_dss.pack(&buffer, &jobdat->pmap, 1, OPAL_BYTE_OBJECT); + /* add the local system info */ + orte_util_encode_sysinfo(&buffer, &orte_odls_globals.sysinfo); } } diff --git a/orte/mca/odls/base/odls_base_open.c b/orte/mca/odls/base/odls_base_open.c index 642091c53a..f807e0693a 100644 --- a/orte/mca/odls/base/odls_base_open.c +++ b/orte/mca/odls/base/odls_base_open.c @@ -29,6 +29,7 @@ #include "opal/mca/base/base.h" #include "opal/mca/base/mca_base_param.h" #include "opal/mca/paffinity/base/base.h" +#include "opal/mca/sysinfo/sysinfo.h" #include "opal/util/output.h" #include "opal/util/path.h" #include "opal/util/argv.h" @@ -181,6 +182,15 @@ int orte_odls_base_open(void) int i, rank, sock, core; orte_namelist_t *nm; bool xterm_hold; + char *keys[] = { + OPAL_SYSINFO_CPU_TYPE, + OPAL_SYSINFO_CPU_MODEL, + OPAL_SYSINFO_NUM_CPUS, + OPAL_SYSINFO_MEM_SIZE, + NULL + }; + opal_list_item_t *item; + opal_sysinfo_value_t *info; /* Debugging / verbose output. Always have stream open, with verbose set by the mca open system... */ @@ -198,6 +208,7 @@ int orte_odls_base_open(void) orte_odls_globals.dmap = NULL; orte_odls_globals.debugger = NULL; orte_odls_globals.debugger_launched = false; + OBJ_CONSTRUCT(&orte_odls_globals.sysinfo, opal_list_t); /* get any external processor bindings */ OPAL_PAFFINITY_CPU_ZERO(orte_odls_globals.my_cores); @@ -282,14 +293,30 @@ int orte_odls_base_open(void) opal_argv_append_nosize(&orte_odls_globals.xtermcmd, "-e"); } + /* collect the system info */ + if (NULL != opal_sysinfo.query) { + /* get and store our local resources */ + opal_sysinfo.query(keys, &orte_odls_globals.sysinfo); + /* find our cpu model and save it for later */ + for (item = opal_list_get_first(&orte_odls_globals.sysinfo); + item != opal_list_get_end(&orte_odls_globals.sysinfo); + item = opal_list_get_next(item)) { + info = (opal_sysinfo_value_t*)item; + + if (0 == strcmp(info->key, OPAL_SYSINFO_CPU_MODEL)) { + orte_local_cpu_model = strdup(info->data.str); + break; + } + } + } + /* Open up all available components */ - if (ORTE_SUCCESS != mca_base_components_open("odls", orte_odls_globals.output, - mca_odls_base_static_components, - &orte_odls_base.available_components, true)) { - return ORTE_ERROR; - } + mca_odls_base_static_components, + &orte_odls_base.available_components, true)) { + return ORTE_ERROR; + } /* are there components available for use ? - * orte_odls_base.available_components is always initialized */ diff --git a/orte/mca/odls/base/odls_private.h b/orte/mca/odls/base/odls_private.h index 2a76dafd85..ef68eec7ce 100644 --- a/orte/mca/odls/base/odls_private.h +++ b/orte/mca/odls/base/odls_private.h @@ -78,6 +78,8 @@ typedef struct { opal_bitmap_t sockets; /* number of sockets available to us */ int num_sockets; + /* system capabilities */ + opal_list_t sysinfo; } orte_odls_globals_t; ORTE_DECLSPEC extern orte_odls_globals_t orte_odls_globals; diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index c320cb4cc4..7a13e07a42 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -65,6 +65,7 @@ #include "orte/util/proc_info.h" #include "orte/util/session_dir.h" #include "orte/util/name_fns.h" +#include "orte/util/nidmap.h" #include "orte/runtime/orte_locks.h" #include "orte/mca/rml/base/rml_contact.h" @@ -73,6 +74,7 @@ #include "orte/mca/rml/rml.h" #include "orte/mca/rml/rml_types.h" #include "orte/mca/odls/odls.h" +#include "orte/mca/odls/base/odls_private.h" #include "orte/mca/plm/plm.h" #include "orte/mca/ras/ras.h" #include "orte/mca/routed/routed.h" @@ -725,15 +727,6 @@ int orte_daemon(int argc, char *argv[]) goto DONE; } } else { - /* get our local resources */ - char *keys[] = { - OPAL_SYSINFO_CPU_TYPE, - OPAL_SYSINFO_CPU_MODEL, - OPAL_SYSINFO_NUM_CPUS, - OPAL_SYSINFO_MEM_SIZE, - NULL - }; - opal_list_t resources; opal_list_item_t *item; opal_sysinfo_value_t *info; int32_t num_values; @@ -741,13 +734,13 @@ int orte_daemon(int argc, char *argv[]) /* include our node name */ opal_dss.pack(buffer, &orte_process_info.nodename, 1, OPAL_STRING); - OBJ_CONSTRUCT(&resources, opal_list_t); - opal_sysinfo.query(keys, &resources); - /* add number of values to the buffer */ - num_values = opal_list_get_size(&resources); + /* add number of sysinfo values to the buffer */ + num_values = opal_list_get_size(&orte_odls_globals.sysinfo); opal_dss.pack(buffer, &num_values, 1, OPAL_INT32); /* add them to the buffer */ - while (NULL != (item = opal_list_remove_first(&resources))) { + for (item = opal_list_get_first(&orte_odls_globals.sysinfo); + item != opal_list_get_end(&orte_odls_globals.sysinfo); + item = opal_list_get_next(item)) { info = (opal_sysinfo_value_t*)item; opal_dss.pack(buffer, &info->key, 1, OPAL_STRING); opal_dss.pack(buffer, &info->type, 1, OPAL_DATA_TYPE_T); @@ -756,13 +749,7 @@ int orte_daemon(int argc, char *argv[]) } else if (OPAL_STRING == info->type) { opal_dss.pack(buffer, &(info->data.str), 1, OPAL_STRING); } - /* if this is the cpu model, save it for later use */ - if (0 == strcmp(info->key, OPAL_SYSINFO_CPU_MODEL)) { - orte_local_cpu_model = strdup(info->data.str); - } - OBJ_RELEASE(info); } - OBJ_DESTRUCT(&resources); if (orte_daemon_bootstrap) { /* send to a different callback location as the diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index 8811149c47..9d6b1c7881 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -865,6 +865,7 @@ static void orte_nid_construct(orte_nid_t *ptr) ptr->name = NULL; ptr->daemon = ORTE_VPID_INVALID; OBJ_CONSTRUCT(&ptr->attrs, opal_list_t); + OBJ_CONSTRUCT(&ptr->sysinfo, opal_list_t); } static void orte_nid_destruct(orte_nid_t *ptr) @@ -879,6 +880,10 @@ static void orte_nid_destruct(orte_nid_t *ptr) OBJ_RELEASE(item); } OBJ_DESTRUCT(&ptr->attrs); + while (NULL != (item = opal_list_remove_first(&ptr->sysinfo))) { + OBJ_RELEASE(item); + } + OBJ_DESTRUCT(&ptr->sysinfo); } OBJ_CLASS_INSTANCE(orte_nid_t, diff --git a/orte/runtime/orte_globals.h b/orte/runtime/orte_globals.h index 6f31389815..14a0b3b09e 100644 --- a/orte/runtime/orte_globals.h +++ b/orte/runtime/orte_globals.h @@ -469,8 +469,10 @@ typedef struct { char *name; /* vpid of this job family's daemon on this node */ orte_vpid_t daemon; - /* list of attributes */ + /* list of interface attributes */ opal_list_t attrs; + /* list of system info */ + opal_list_t sysinfo; } orte_nid_t; ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_nid_t); diff --git a/orte/test/system/orte_nodename.c b/orte/test/system/orte_nodename.c index e26552adec..3b8c7c465c 100644 --- a/orte/test/system/orte_nodename.c +++ b/orte/test/system/orte_nodename.c @@ -8,16 +8,30 @@ #include #include +#include "opal/class/opal_list.h" +#include "opal/mca/sysinfo/sysinfo_types.h" + #include "orte/util/proc_info.h" #include "orte/util/name_fns.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/runtime.h" +#include "orte/mca/ess/ess.h" int main(int argc, char* argv[]) { int rc, restart=-1; char hostname[512], *rstrt; pid_t pid; + char *keys[] = { + OPAL_SYSINFO_CPU_TYPE, + OPAL_SYSINFO_CPU_MODEL, + OPAL_SYSINFO_NUM_CPUS, + OPAL_SYSINFO_MEM_SIZE, + NULL + }; + opal_list_t values; + opal_list_item_t *item; + opal_sysinfo_value_t *sys; if (0 > (rc = orte_init(&argc, &argv, ORTE_PROC_NON_MPI))) { fprintf(stderr, "orte_nodename: couldn't init orte - error code %d\n", rc); @@ -31,9 +45,21 @@ int main(int argc, char* argv[]) gethostname(hostname, 512); pid = getpid(); - printf("orte_nodename: Node %s Name %s Pid %ld Restarts: %d\n", - hostname, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)pid, restart); + OBJ_CONSTRUCT(&values, opal_list_t); + orte_ess.query_sys_info(NULL, keys, &values); + printf("orte_nodename: Node %s Name %s Pid %ld Restarts: %d Num info %d\n", + hostname, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)pid, + restart, (int)opal_list_get_size(&values)); + while (NULL != (item = opal_list_remove_first(&values))) { + sys = (opal_sysinfo_value_t*)item; + if (OPAL_STRING == sys->type) { + printf("\t%s: %s\n", sys->key, sys->data.str); + } else { + printf("\t%s: %d\n", sys->key, (int)sys->data.i64); + + } + } orte_finalize(); return 0; } diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index b5c20f6a94..34be793be9 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -48,8 +48,10 @@ #include "opal/class/opal_pointer_array.h" #include "opal/util/output.h" #include "opal/util/argv.h" +#include "opal/mca/sysinfo/sysinfo_types.h" #include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/odls/base/odls_private.h" #include "orte/util/show_help.h" #include "orte/util/proc_info.h" #include "orte/util/name_fns.h" @@ -137,6 +139,9 @@ int orte_util_nidmap_init(opal_buffer_t *buffer) } /* the bytes in the object were free'd by the decode */ + /* unpack the system info */ + orte_util_decode_sysinfo(buffer); + return ORTE_SUCCESS; } @@ -1003,3 +1008,65 @@ void orte_jobmap_dump(void) } opal_output(orte_clean_output, "\n\n"); } + +void orte_util_encode_sysinfo(opal_buffer_t *buf, opal_list_t *info) +{ + opal_sysinfo_value_t *sys; + opal_list_item_t *item; + uint8_t flag; + + for (item = opal_list_get_first(info); + item != opal_list_get_end(info); + item = opal_list_get_next(item)) { + sys = (opal_sysinfo_value_t*)item; + /* pack the key */ + opal_dss.pack(buf, &sys->key, 1, OPAL_STRING); + /* pack the value */ + if (OPAL_STRING == sys->type) { + flag = 0; + opal_dss.pack(buf, &flag, 1, OPAL_UINT8); + opal_dss.pack(buf, &sys->data.str, 1, OPAL_STRING); + } else { + flag = 1; + opal_dss.pack(buf, &flag, 1, OPAL_UINT8); + opal_dss.pack(buf, &sys->data.i64, 1, OPAL_INT64); + } + } +} + +void orte_util_decode_sysinfo(opal_buffer_t *buf) +{ + orte_nid_t *nid; + int32_t n; + char *key; + opal_sysinfo_value_t *sys; + uint8_t flag; + + /* get the nid of our local node */ + if (NULL == (nid = orte_util_lookup_nid(ORTE_PROC_MY_NAME))) { + /* can't get it */ + return ; + } + + /* if it already has sysinfo, then we are receiving + * a repeat copy - so discard it + */ + if (0 < opal_list_get_size(&nid->sysinfo)) { + return; + } + + n=1; + while (ORTE_SUCCESS == opal_dss.unpack(buf, &key, &n, OPAL_STRING)) { + sys = OBJ_NEW(opal_sysinfo_value_t); + sys->key = key; + opal_dss.unpack(buf, &flag, &n, OPAL_UINT8); + if (0 == flag) { + sys->type = OPAL_STRING; + opal_dss.unpack(buf, &sys->data.str, &n, OPAL_STRING); + } else { + sys->type = OPAL_INT64; + opal_dss.unpack(buf, &sys->data.i64, &n, OPAL_INT64); + } + opal_list_append(&nid->sysinfo, &sys->super); + } +} diff --git a/orte/util/nidmap.h b/orte/util/nidmap.h index e7c13a0ef1..815f60ae88 100644 --- a/orte/util/nidmap.h +++ b/orte/util/nidmap.h @@ -54,6 +54,9 @@ ORTE_DECLSPEC int orte_util_decode_nodemap(opal_byte_object_t *boptr); ORTE_DECLSPEC int orte_util_encode_pidmap(opal_byte_object_t *boptr); ORTE_DECLSPEC int orte_util_decode_pidmap(opal_byte_object_t *boptr); +ORTE_DECLSPEC void orte_util_encode_sysinfo(opal_buffer_t *buf, opal_list_t *info); +ORTE_DECLSPEC void orte_util_decode_sysinfo(opal_buffer_t *buf); + ORTE_DECLSPEC int orte_util_build_daemon_nidmap(char **nodes); ORTE_DECLSPEC void orte_nidmap_dump(void);