From 74a3ece313acf2f2a62b5e2998b4e16d97b39e5e Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 26 Feb 2013 20:58:43 +0000 Subject: [PATCH] Remove unused component This commit was SVN r28117. --- orte/mca/routed/hdmon/Makefile.am | 36 -- orte/mca/routed/hdmon/configure.m4 | 19 - orte/mca/routed/hdmon/routed_hdmon.c | 464 ------------------ orte/mca/routed/hdmon/routed_hdmon.h | 30 -- .../mca/routed/hdmon/routed_hdmon_component.c | 73 --- 5 files changed, 622 deletions(-) delete mode 100644 orte/mca/routed/hdmon/Makefile.am delete mode 100644 orte/mca/routed/hdmon/configure.m4 delete mode 100644 orte/mca/routed/hdmon/routed_hdmon.c delete mode 100644 orte/mca/routed/hdmon/routed_hdmon.h delete mode 100644 orte/mca/routed/hdmon/routed_hdmon_component.c diff --git a/orte/mca/routed/hdmon/Makefile.am b/orte/mca/routed/hdmon/Makefile.am deleted file mode 100644 index 93b2a09391..0000000000 --- a/orte/mca/routed/hdmon/Makefile.am +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) 2012 Los Alamos National Security, LLC. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - routed_hdmon.h \ - routed_hdmon.c \ - routed_hdmon_component.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_routed_hdmon_DSO -component_noinst = -component_install = mca_routed_hdmon.la -else -component_noinst = libmca_routed_hdmon.la -component_install = -endif - -mcacomponentdir = $(pkglibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_routed_hdmon_la_SOURCES = $(sources) -mca_routed_hdmon_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_routed_hdmon_la_SOURCES = $(sources) -libmca_routed_hdmon_la_LDFLAGS = -module -avoid-version - diff --git a/orte/mca/routed/hdmon/configure.m4 b/orte/mca/routed/hdmon/configure.m4 deleted file mode 100644 index f3a6e7dfec..0000000000 --- a/orte/mca/routed/hdmon/configure.m4 +++ /dev/null @@ -1,19 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2012 Los Alamos National Security, LLC. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# MCA_routed_hdmon_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_orte_routed_hdmon_CONFIG], [ - AC_CONFIG_FILES([orte/mca/routed/hdmon/Makefile]) - - AS_IF([test "$orte_without_full_support" = 0], - [$1], - [$2]) -]) diff --git a/orte/mca/routed/hdmon/routed_hdmon.c b/orte/mca/routed/hdmon/routed_hdmon.c deleted file mode 100644 index ec3c8f5580..0000000000 --- a/orte/mca/routed/hdmon/routed_hdmon.c +++ /dev/null @@ -1,464 +0,0 @@ -/* - * Copyright (c) 2012 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include - -#include "opal/dss/dss.h" -#include "opal/class/opal_hash_table.h" -#include "opal/class/opal_bitmap.h" -#include "opal/runtime/opal_progress.h" -#include "opal/util/output.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/ess/ess.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/rml/rml_types.h" -#include "orte/mca/state/state.h" -#include "orte/util/name_fns.h" -#include "orte/runtime/orte_globals.h" -#include "orte/runtime/orte_wait.h" -#include "orte/runtime/runtime.h" -#include "orte/runtime/data_type_support/orte_dt_support.h" - -#include "orte/mca/rml/base/rml_contact.h" - -#include "orte/mca/routed/base/base.h" -#include "routed_hdmon.h" - -/* the hdmon module supports a network of daemons that host - * no local apps - they are simply monitoring the system - * resource usage. Thus, they use static ports to wire - * themselves up to a set of identified masters via a - * radix topology. If a peer dies, the daemon will rewire - * the network by reconnecting to the downstream peer(s). - */ - -static int init(void); -static int finalize(void); -static int delete_route(orte_process_name_t *proc); -static int update_route(orte_process_name_t *target, - orte_process_name_t *route); -static orte_process_name_t get_route(orte_process_name_t *target); -static int init_routes(orte_jobid_t job, opal_buffer_t *ndat); -static int route_lost(const orte_process_name_t *route); -static bool route_is_defined(const orte_process_name_t *target); -static void update_routing_plan(void); -static void get_routing_list(orte_grpcomm_coll_t type, - orte_grpcomm_collective_t *coll); -static int get_wireup_info(opal_buffer_t *buf); -static int set_lifeline(orte_process_name_t *proc); -static size_t num_routes(void); - -#if OPAL_ENABLE_FT_CR == 1 -static int hdmon_ft_event(int state); -#endif - -orte_routed_module_t orte_routed_hdmon_module = { - init, - finalize, - delete_route, - update_route, - get_route, - init_routes, - route_lost, - route_is_defined, - set_lifeline, - update_routing_plan, - get_routing_list, - get_wireup_info, - num_routes, -#if OPAL_ENABLE_FT_CR == 1 - hdmon_ft_event -#else - NULL -#endif -}; - -/* local globals */ -static orte_process_name_t *lifeline=NULL; -static orte_process_name_t local_lifeline; -static int num_children; -static opal_list_t my_children; -static orte_job_t *daemons=NULL; - -static int init(void) -{ - lifeline = NULL; - - /* setup the list of children */ - OBJ_CONSTRUCT(&my_children, opal_list_t); - num_children = 0; - ORTE_PROC_MY_PARENT->jobid = ORTE_PROC_MY_NAME->jobid; - - /* get the daemon object */ - daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); - - return ORTE_SUCCESS; -} - -static int finalize(void) -{ - opal_list_item_t *item; - - lifeline = NULL; - - /* deconstruct the list of children */ - while (NULL != (item = opal_list_remove_first(&my_children))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&my_children); - num_children = 0; - - return ORTE_SUCCESS; -} - -static int delete_route(orte_process_name_t *proc) -{ - /* There is nothing to do here. The routes will be - * redefined when we update the routing tree - */ - - return ORTE_SUCCESS; -} - -static int update_route(orte_process_name_t *target, - orte_process_name_t *route) -{ - /* There is nothing to do here. The routes will be - * redefined when we update the routing tree - */ - - return ORTE_SUCCESS; -} - - -static orte_process_name_t get_route(orte_process_name_t *target) -{ - orte_process_name_t *ret, step; - opal_list_item_t *item; - orte_routed_tree_t *child; - - if (!orte_routing_is_enabled) { - ret = target; - goto found; - } - step.jobid = ORTE_PROC_MY_NAME->jobid; - - /* if the target is me, then send direct to the target! */ - if (ORTE_PROC_MY_NAME->vpid == target->vpid) { - ret = target; - goto found; - } else { - /* search routing tree for next step to that target */ - for (item = opal_list_get_first(&my_children); - item != opal_list_get_end(&my_children); - item = opal_list_get_next(item)) { - child = (orte_routed_tree_t*)item; - if (child->vpid == target->vpid) { - /* this is the target - just send it there */ - ret = target; - goto found; - } - /* otherwise, see if the target we need is below the child */ - if (opal_bitmap_is_set_bit(&child->relatives, target->vpid)) { - /* yep - we need to step through this child */ - step.vpid = child->vpid; - ret = &step; - goto found; - } - } - } - - /* if we get here, then the target is not beneath - * any of our children, so we have to step up through our parent - */ - step.vpid = ORTE_PROC_MY_PARENT->vpid; - - ret = &step; - -found: - OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output, - "%s routed_hdmon_get(%s) --> %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(target), - ORTE_NAME_PRINT(ret))); - - return *ret; -} - -static int init_routes(orte_jobid_t job, opal_buffer_t *ndat) -{ - OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output, - "%s routed_hdmon: init routes for job %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_JOBID_PRINT(job))); - - /* we use static ports, so set my lifeline to point at my parent */ - lifeline = ORTE_PROC_MY_PARENT; - - OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output, - "%s routed_hdmon: completed init routes", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - return ORTE_SUCCESS; -} - -static int route_lost(const orte_process_name_t *route) -{ - opal_list_item_t *item; - orte_routed_tree_t *child; - orte_proc_t *proc; - orte_vpid_t n; - - OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output, - "%s route to %s lost", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(route))); - - /* if we lose the connection to the lifeline and we are NOT already, - * in finalize, rewire to route to the parent of my lifeline - */ - if (!orte_finalizing && - NULL != lifeline && - OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, route, lifeline)) { - OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output, - "%s routed:hdmon: Connection to lifeline %s lost", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(lifeline))); - /* if we are an app, just die */ - if (ORTE_PROC_IS_APP) { - return ORTE_ERR_FATAL; - } - proc = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, ORTE_PROC_MY_PARENT->vpid); - proc->alive = false; - n=0; - while (!proc->alive && n < orte_process_info.num_procs) { - proc = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, proc->parent); - n++; - } - if (!proc->alive) { - /* couldn't find a new parent - we must be last man alive */ - ORTE_PROC_MY_PARENT->vpid = -1; - } else { - ORTE_PROC_MY_PARENT->vpid = proc->name.vpid; - } - return ORTE_SUCCESS; - } - - /* if we are the HNP or daemon, and the route is a daemon, - * see if it is one of our children. If so, remove it and - * let the children beneath it reconnect to us - */ - if ((ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) && - route->jobid == ORTE_PROC_MY_NAME->jobid) { - for (item = opal_list_get_first(&my_children); - item != opal_list_get_end(&my_children); - item = opal_list_get_next(item)) { - child = (orte_routed_tree_t*)item; - if (child->vpid == route->vpid) { - opal_list_remove_item(&my_children, item); - OBJ_RELEASE(item); - return ORTE_SUCCESS; - } - } - } - - /* we don't care about this one, so return success */ - return ORTE_SUCCESS; -} - -static bool route_is_defined(const orte_process_name_t *target) -{ - /* by definition, we always have a route */ - return true; -} - -static int set_lifeline(orte_process_name_t *proc) -{ - /* we have to copy the proc data because there is no - * guarantee that it will be preserved - */ - local_lifeline.jobid = proc->jobid; - local_lifeline.vpid = proc->vpid; - lifeline = &local_lifeline; - - return ORTE_SUCCESS; -} - -static void radix_tree(int rank, int *num_children, - opal_list_t *children, opal_bitmap_t *relatives) -{ - int i, peer, Sum, NInLevel; - orte_routed_tree_t *child; - opal_bitmap_t *relations; - - /* compute how many procs are at my level */ - Sum=1; - NInLevel=1; - - while ( Sum < (rank+1) ) { - NInLevel *= mca_routed_hdmon_component.radix; - Sum += NInLevel; - } - - /* our children start at our rank + num_in_level */ - peer = rank + NInLevel; - for (i = 0; i < mca_routed_hdmon_component.radix; i++) { - if (peer < (int)orte_process_info.num_procs) { - child = OBJ_NEW(orte_routed_tree_t); - child->vpid = peer; - if (NULL != children) { - /* this is a direct child - add it to my list */ - opal_list_append(children, &child->super); - (*num_children)++; - /* setup the relatives bitmap */ - opal_bitmap_init(&child->relatives, orte_process_info.num_procs); - /* point to the relatives */ - relations = &child->relatives; - } else { - /* we are recording someone's relatives - set the bit */ - if (OPAL_SUCCESS != opal_bitmap_set_bit(relatives, peer)) { - opal_output(0, "%s Error: could not set relations bit!", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - } - /* point to this relations */ - relations = relatives; - } - /* search for this child's relatives */ - radix_tree(peer, NULL, NULL, relations); - } - peer += NInLevel; - } -} - -static void update_routing_plan(void) -{ - orte_routed_tree_t *child; - int j; - opal_list_item_t *item; - int level, nlevel; - orte_proc_t *proc; - orte_vpid_t sum, v, nprev; - - /* clear the list of children if any are already present */ - while (NULL != (item = opal_list_remove_first(&my_children))) { - OBJ_RELEASE(item); - } - num_children = 0; - - /* compute all parents */ - for (v = 1 ; v < orte_process_info.num_procs ; v++) { - level = 0; - sum = 1; - nlevel = 1; - - while (sum < (v+1)) { - level++; - nlevel *= mca_routed_hdmon_component.radix; - sum += nlevel; - } - sum -= nlevel; - nprev = nlevel/mca_routed_hdmon_component.radix; - proc = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, v); - proc->parent = ((v-sum) % nprev) + (sum - nprev); - opal_output_verbose(2, orte_routed_base_output, - "%s proc %s has parent %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_VPID_PRINT(v), ORTE_VPID_PRINT(proc->parent)); - } - /* set the parent for v=0 */ - proc = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, 0); - proc->parent = -1; - - /* now set my parent */ - proc = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, ORTE_PROC_MY_NAME->vpid); - ORTE_PROC_MY_PARENT->vpid = proc->parent; - - /* compute my direct children and the bitmap that shows which vpids - * lie underneath their branch - */ - radix_tree(ORTE_PROC_MY_NAME->vpid, &num_children, &my_children, NULL); - - if (0 < opal_output_get_verbosity(orte_routed_base_output)) { - opal_output(0, "%s: parent %d num_children %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_PROC_MY_PARENT->vpid, num_children); - for (item = opal_list_get_first(&my_children); - item != opal_list_get_end(&my_children); - item = opal_list_get_next(item)) { - child = (orte_routed_tree_t*)item; - opal_output(0, "%s: \tchild %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), child->vpid); - for (j=0; j < (int)orte_process_info.num_procs; j++) { - if (opal_bitmap_is_set_bit(&child->relatives, j)) { - opal_output(0, "%s: \t\trelation %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j); - } - } - } - } -} - -static void get_routing_list(orte_grpcomm_coll_t type, - orte_grpcomm_collective_t *coll) -{ - if (ORTE_GRPCOMM_XCAST == type) { - orte_routed_base_xcast_routing(coll, &my_children); - } else if (ORTE_GRPCOMM_COLL_RELAY == type) { - orte_routed_base_coll_relay_routing(coll); - } else if (ORTE_GRPCOMM_COLL_COMPLETE == type) { - orte_routed_base_coll_complete_routing(coll); - } else if (ORTE_GRPCOMM_COLL_PEERS == type) { - orte_routed_base_coll_peers(coll, &my_children); - } -} - -static int get_wireup_info(opal_buffer_t *buf) -{ - /* we use static ports, so just return */ - return ORTE_SUCCESS; -} - -static size_t num_routes(void) -{ - return opal_list_get_size(&my_children); -} - -#if OPAL_ENABLE_FT_CR == 1 -static int hdmon_ft_event(int state) -{ - int ret, exit_status = ORTE_SUCCESS; - - /******** Checkpoint Prep ********/ - if(OPAL_CRS_CHECKPOINT == state) { - } - /******** Continue Recovery ********/ - else if (OPAL_CRS_CONTINUE == state ) { - } - /******** Restart Recovery ********/ - else if (OPAL_CRS_RESTART == state ) { - /* - * Re-exchange the routes - */ - if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, NULL))) { - exit_status = ret; - goto cleanup; - } - } - else if (OPAL_CRS_TERM == state ) { - /* Nothing */ - } - else { - /* Error state = Nothing */ - } - - cleanup: - return exit_status; -} -#endif - diff --git a/orte/mca/routed/hdmon/routed_hdmon.h b/orte/mca/routed/hdmon/routed_hdmon.h deleted file mode 100644 index 28ef9d8b08..0000000000 --- a/orte/mca/routed/hdmon/routed_hdmon.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2012 Los Alamos National Security, LLC. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_ROUTED_HDMON_H -#define MCA_ROUTED_HDMON_H - -#include "orte_config.h" - -#include "orte/mca/routed/routed.h" - -BEGIN_C_DECLS - -typedef struct { - orte_routed_component_t super; - int radix; -} orte_routed_hdmon_component_t; -ORTE_MODULE_DECLSPEC extern orte_routed_hdmon_component_t mca_routed_hdmon_component; - -extern orte_routed_module_t orte_routed_hdmon_module; - -END_C_DECLS - -#endif diff --git a/orte/mca/routed/hdmon/routed_hdmon_component.c b/orte/mca/routed/hdmon/routed_hdmon_component.c deleted file mode 100644 index a3594b73f3..0000000000 --- a/orte/mca/routed/hdmon/routed_hdmon_component.c +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2012 Los Alamos National Security, LLC. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include "opal/mca/base/base.h" -#include "opal/mca/base/mca_base_param.h" - -#include "orte/util/proc_info.h" - -#include "orte/mca/routed/base/base.h" -#include "routed_hdmon.h" - -static int hdmon_open(void); -static int hdmon_query(mca_base_module_t **module, int *priority); - -/** - * component definition - */ -orte_routed_hdmon_component_t mca_routed_hdmon_component = { - { - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - - { - ORTE_ROUTED_BASE_VERSION_2_0_0, - - "hdmon", /* MCA component name */ - ORTE_MAJOR_VERSION, /* MCA component major version */ - ORTE_MINOR_VERSION, /* MCA component minor version */ - ORTE_RELEASE_VERSION, /* MCA component release version */ - hdmon_open, - NULL, - hdmon_query - }, - { - /* This component can be checkpointed */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - } - } -}; - -static int hdmon_open(void) -{ - mca_base_component_t *c = &mca_routed_hdmon_component.super.base_version; - - mca_base_param_reg_int(c, "radix", - "Radix to be used for routed hdmon tree", - false, false, 32, &mca_routed_hdmon_component.radix); - - return ORTE_SUCCESS; -} - -static int hdmon_query(mca_base_module_t **module, int *priority) -{ - if (ORTE_PROC_IS_CM || ORTE_PROC_IS_CMSLAVE) { - *priority = 1000; - *module = (mca_base_module_t *) &orte_routed_hdmon_module; - return ORTE_SUCCESS; - } - - *priority = 0; - *module = NULL; - return ORTE_ERROR; -}