From 05ac8fa71c0833eeeaa878b72a31503d361e145e Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 30 Oct 2018 07:49:41 -0700 Subject: [PATCH] Remove stale defunct tools Signed-off-by: Ralph Castain --- config/orte_config_files.m4 | 2 - ompi/mca/rte/orte/Makefile.am | 18 +- orte/tools/Makefile.am | 4 - orte/tools/orte-ps/Makefile.am | 47 -- orte/tools/orte-ps/help-orte-ps.txt | 46 -- orte/tools/orte-ps/orte-ps.1in | 101 --- orte/tools/orte-ps/orte-ps.c | 986 ----------------------- orte/tools/orte-top/Makefile.am | 47 -- orte/tools/orte-top/help-orte-top.txt | 82 -- orte/tools/orte-top/orte-top.1in | 106 --- orte/tools/orte-top/orte-top.c | 1042 ------------------------- 11 files changed, 1 insertion(+), 2480 deletions(-) delete mode 100644 orte/tools/orte-ps/Makefile.am delete mode 100644 orte/tools/orte-ps/help-orte-ps.txt delete mode 100644 orte/tools/orte-ps/orte-ps.1in delete mode 100644 orte/tools/orte-ps/orte-ps.c delete mode 100644 orte/tools/orte-top/Makefile.am delete mode 100644 orte/tools/orte-top/help-orte-top.txt delete mode 100644 orte/tools/orte-top/orte-top.1in delete mode 100644 orte/tools/orte-top/orte-top.c diff --git a/config/orte_config_files.m4 b/config/orte_config_files.m4 index b0f79dbb66..191d280131 100644 --- a/config/orte_config_files.m4 +++ b/config/orte_config_files.m4 @@ -25,9 +25,7 @@ AC_DEFUN([ORTE_CONFIG_FILES],[ orte/tools/wrappers/Makefile orte/tools/wrappers/ortecc-wrapper-data.txt orte/tools/wrappers/orte.pc - orte/tools/orte-ps/Makefile orte/tools/orte-clean/Makefile - orte/tools/orte-top/Makefile orte/tools/orte-info/Makefile orte/tools/orte-server/Makefile ]) diff --git a/ompi/mca/rte/orte/Makefile.am b/ompi/mca/rte/orte/Makefile.am index 30dd21b14d..34051dcea6 100644 --- a/ompi/mca/rte/orte/Makefile.am +++ b/ompi/mca/rte/orte/Makefile.am @@ -28,7 +28,7 @@ libmca_rte_orte_la_SOURCES =$(sources) $(headers) libmca_rte_orte_la_LDFLAGS = -module -avoid-version libmca_rte_orte_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la -man_pages = mpirun.1 mpiexec.1 ompi-ps.1 ompi-clean.1 ompi-top.1 ompi-server.1 +man_pages = mpirun.1 mpiexec.1 ompi-clean.1 ompi-server.1 if OPAL_INSTALL_BINARIES nodist_man_MANS = $(man_pages) @@ -36,17 +36,13 @@ nodist_man_MANS = $(man_pages) install-exec-hook: (cd $(DESTDIR)$(bindir); rm -f mpirun$(EXEEXT); $(LN_S) orterun$(EXEEXT) mpirun$(EXEEXT)) (cd $(DESTDIR)$(bindir); rm -f mpiexec$(EXEEXT); $(LN_S) orterun$(EXEEXT) mpiexec$(EXEEXT)) - (cd $(DESTDIR)$(bindir); rm -f ompi-ps$(EXEEXT); $(LN_S) orte-ps$(EXEEXT) ompi-ps$(EXEEXT)) (cd $(DESTDIR)$(bindir); rm -f ompi-clean$(EXEEXT); $(LN_S) orte-clean$(EXEEXT) ompi-clean$(EXEEXT)) - (cd $(DESTDIR)$(bindir); rm -f ompi-top$(EXEEXT); $(LN_S) orte-top$(EXEEXT) ompi-top$(EXEEXT)) (cd $(DESTDIR)$(bindir); rm -f ompi-server$(EXEEXT); $(LN_S) orte-server$(EXEEXT) ompi-server$(EXEEXT)) uninstall-local: rm -f $(DESTDIR)$(bindir)/mpirun$(EXEEXT) \ $(DESTDIR)$(bindir)/mpiexec$(EXEEXT) \ - $(DESTDIR)$(bindir)/ompi-ps$(EXEEXT) \ $(DESTDIR)$(bindir)/ompi-clean$(EXEEXT) \ - $(DESTDIR)$(bindir)/ompi-top$(EXEEXT) \ $(DESTDIR)$(bindir)/ompi-server$(EXEEXT) endif # OPAL_INSTALL_BINARIES @@ -60,24 +56,12 @@ mpirun.1: $(top_builddir)/orte/tools/orterun/orterun.1 mpiexec.1: $(top_builddir)/orte/tools/orterun/orterun.1 cp -f $(top_builddir)/orte/tools/orterun/orterun.1 mpiexec.1 -$(top_builddir)/orte/tools/orte-ps/orte-ps.1: - (cd $(top_builddir)/orte/tools/orte-ps && $(MAKE) $(AM_MAKEFLAGS) orte-ps.1) - -ompi-ps.1: $(top_builddir)/orte/tools/orte-ps/orte-ps.1 - cp -f $(top_builddir)/orte/tools/orte-ps/orte-ps.1 ompi-ps.1 - $(top_builddir)/orte/tools/orte-clean/orte-clean.1: (cd $(top_builddir)/orte/tools/orte-clean && $(MAKE) $(AM_MAKEFLAGS) orte-clean.1) ompi-clean.1: $(top_builddir)/orte/tools/orte-clean/orte-clean.1 cp -f $(top_builddir)/orte/tools/orte-clean/orte-clean.1 ompi-clean.1 -$(top_builddir)/orte/tools/orte-top/orte-top.1: - (cd $(top_builddir)/orte/tools/orte-top && $(MAKE) $(AM_MAKEFLAGS) orte-top.1) - -ompi-top.1: $(top_builddir)/orte/tools/orte-top/orte-top.1 - cp -f $(top_builddir)/orte/tools/orte-top/orte-top.1 ompi-top.1 - $(top_builddir)/orte/tools/orte-server/orte-server.1: (cd $(top_builddir)/orte/tools/orte-server && $(MAKE) $(AM_MAKEFLAGS) orte-server.1) diff --git a/orte/tools/Makefile.am b/orte/tools/Makefile.am index a1a3fcd35d..be78bb56ea 100644 --- a/orte/tools/Makefile.am +++ b/orte/tools/Makefile.am @@ -26,20 +26,16 @@ SUBDIRS += \ tools/orte-clean \ - tools/orte-ps \ tools/orted \ tools/orterun \ tools/wrappers \ - tools/orte-top \ tools/orte-info \ tools/orte-server DIST_SUBDIRS += \ tools/orte-clean \ - tools/orte-ps \ tools/orted \ tools/orterun \ tools/wrappers \ - tools/orte-top \ tools/orte-info \ tools/orte-server diff --git a/orte/tools/orte-ps/Makefile.am b/orte/tools/orte-ps/Makefile.am deleted file mode 100644 index 758ea92509..0000000000 --- a/orte/tools/orte-ps/Makefile.am +++ /dev/null @@ -1,47 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -include $(top_srcdir)/Makefile.ompi-rules - -man_pages = orte-ps.1 -EXTRA_DIST = $(man_pages:.1=.1in) - -if OPAL_INSTALL_BINARIES - -bin_PROGRAMS = orte-ps - -nodist_man_MANS = $(man_pages) - -# Ensure that the man pages are rebuilt if the opal_config.h file -# changes; a "good enough" way to know if configure was run again (and -# therefore the release date or version may have changed) -$(nodist_man_MANS): $(top_builddir)/opal/include/opal_config.h - -dist_ortedata_DATA = help-orte-ps.txt - -endif # OPAL_INSTALL_BINARIES - -orte_ps_SOURCES = orte-ps.c -orte_ps_LDADD = \ - $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la \ - $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la - -distclean-local: - rm -f $(man_pages) diff --git a/orte/tools/orte-ps/help-orte-ps.txt b/orte/tools/orte-ps/help-orte-ps.txt deleted file mode 100644 index 875f7cd1b3..0000000000 --- a/orte/tools/orte-ps/help-orte-ps.txt +++ /dev/null @@ -1,46 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English help file for Open MPI PS tool -# -[usage] -ompi-ps [OPTIONS] - Open MPI Job and Process Status Tool - -%s -# -[vpid-usage] -Error: You specified a vpid (%d) without also specifying a jobid. - Use the '-j' option to specify a jobid. -# -[need-vpid] -Error: You specified a jobid (%d) without also specifying a vpid. - Use the '-p' option to specify a vpid. -# -[invalid-vpid] -Error: The specified vpid (%d) is not valid for job %d. -# -[stale-hnp] -An attempt was made to obtain ps information from at least -one non-responsive HNP: - -HNP name: %s - -You may want to cleanup stale session directories in your temporary -directory (e.g., $TMPDIR). diff --git a/orte/tools/orte-ps/orte-ps.1in b/orte/tools/orte-ps/orte-ps.1in deleted file mode 100644 index aa6d3cb7cd..0000000000 --- a/orte/tools/orte-ps/orte-ps.1in +++ /dev/null @@ -1,101 +0,0 @@ -.\" -.\" Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana -.\" University Research and Technology -.\" Corporation. All rights reserved. -.\" Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. -.\" -.\" Man page for OMPI's ompi-ps command -.\" -.\" .TH name section center-footer left-footer center-header -.TH OMPI-PS 1 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" -.\" ************************** -.\" Name Section -.\" ************************** -.SH NAME -. -ompi-ps, orte-ps \- Displays information about the active jobs and processes -in Open MPI. -. -.PP -. -\fBNOTE:\fP \fIompi-ps\fP, and \fIorte-ps\fP are exact -synonyms for each other. Using any of the names will result in exactly -identical behavior. -. -.\" ************************** -.\" Synopsis Section -.\" ************************** -.SH SYNOPSIS -. -.B ompi-ps -.B [ options ] -. -.\" ************************** -.\" Options Section -.\" ************************** -.SH Options -. -\fIompi-ps\fR will display information about running job(s) in the current -universe. -. -.TP 10 -.B -h | --help -Display help for this command -. -. -.TP -.B -v | --verbose -Enable verbose output for debugging -. -. -.TP -.B --daemons -Display daemon job information. -. -. -.TP -.B -j | --jobid -Display the state of a specific job in the universe. By default all jobs will -be displayed. -. -. -.TP -.B -p | --vpid -Display the state of a specific vpid (process) in the universe. By default all -vpids cooresponding to processes will be displayed. Must be used in conjunction -with the \fB--jobid\fP option. -. -. -.TP -.B -n | --nodes -Display all of the allocated nodes, and their cooresponding states. By default -this is disabled. -. -. -.TP -.B -gmca | --gmca \fR \fP -Pass global MCA parameters that are applicable to all contexts. \fI\fP is -the parameter name; \fI\fP is the parameter value. -. -. -.TP -.B -mca | --mca -Send arguments to various MCA modules. -. -. -.\" ************************** -.\" Description Section -.\" ************************** -.SH DESCRIPTION -. -.PP -\fIompi-ps\fR displays the state of jobs running inside an Open RTE universe. -. -. -.\" ************************** -.\" See Also Section -.\" ************************** -. -.SH SEE ALSO -orterun(1), orte-clean(1) -. diff --git a/orte/tools/orte-ps/orte-ps.c b/orte/tools/orte-ps/orte-ps.c deleted file mode 100644 index 12a4a35785..0000000000 --- a/orte/tools/orte-ps/orte-ps.c +++ /dev/null @@ -1,986 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @fie - * ORTE PS command - * - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include -#include -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#include -#ifdef HAVE_SYS_STAT_H -#include -#endif /* HAVE_SYS_STAT_H */ -#ifdef HAVE_SYS_TYPES_H -#include -#endif /* HAVE_SYS_TYPES_H */ -#ifdef HAVE_SYS_WAIT_H -#include -#endif /* HAVE_SYS_WAIT_H */ -#include -#ifdef HAVE_DIRENT_H -#include -#endif /* HAVE_DIRENT_H */ - -#include "opal/util/basename.h" -#include "opal/util/cmd_line.h" -#include "opal/util/output.h" -#include "opal/util/opal_environ.h" -#include "opal/util/printf.h" -#include "opal/util/show_help.h" -#include "opal/mca/base/base.h" -#include "opal/runtime/opal.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "opal/runtime/opal_cr.h" -#endif - -#include "orte/runtime/runtime.h" -#include "orte/util/error_strings.h" -#include "orte/util/hnp_contact.h" -#include "orte/util/name_fns.h" -#include "orte/util/show_help.h" -#include "orte/util/proc_info.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/util/comm/comm.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "orte/mca/snapc/base/base.h" -#endif -#include "orte/runtime/orte_globals.h" - -struct orte_ps_mpirun_info_t { - /** This is an object, so it must have a super */ - opal_list_item_t super; - - /* HNP info */ - orte_hnp_contact_t *hnp; - - /* array of jobs */ - orte_std_cntr_t num_jobs; - orte_job_t **jobs; - - /* array of nodes */ - orte_std_cntr_t num_nodes; - orte_node_t **nodes; -}; -typedef struct orte_ps_mpirun_info_t orte_ps_mpirun_info_t; - -static void orte_ps_mpirun_info_construct(orte_ps_mpirun_info_t *ptr) -{ - ptr->hnp = NULL; - ptr->num_jobs = 0; - ptr->jobs = NULL; - ptr->num_nodes = 0; - ptr->nodes = NULL; -} -static void orte_ps_mpirun_info_destruct(orte_ps_mpirun_info_t *ptr) -{ - orte_std_cntr_t i; - - if (NULL != ptr->hnp) OBJ_RELEASE(ptr->hnp); - if (NULL != ptr->jobs) { - for (i=0; i < ptr->num_jobs; i++) { - OBJ_RELEASE(ptr->jobs[i]); - } - free(ptr->jobs); - } - if (NULL != ptr->nodes) { - for (i=0; i < ptr->num_nodes; i++) { - OBJ_RELEASE(ptr->nodes[i]); - } - free(ptr->nodes); - } -} - -OBJ_CLASS_INSTANCE(orte_ps_mpirun_info_t, - opal_list_item_t, - orte_ps_mpirun_info_construct, - orte_ps_mpirun_info_destruct); - -/****************** - * Local Functions - ******************/ -static int orte_ps_init(int argc, char *argv[]); -static int parse_args(int argc, char *argv[]); - -static int gather_information(orte_ps_mpirun_info_t *hnpinfo); -static int gather_active_jobs(orte_ps_mpirun_info_t *hnpinfo); -static int gather_nodes(orte_ps_mpirun_info_t *hnpinfo); -static int gather_vpid_info(orte_ps_mpirun_info_t *hnpinfo); - -static int pretty_print(orte_ps_mpirun_info_t *hnpinfo); -static int pretty_print_nodes(orte_node_t **nodes, orte_std_cntr_t num_nodes); -static int pretty_print_jobs(orte_job_t **jobs, orte_std_cntr_t num_jobs); -static int pretty_print_vpids(orte_job_t *job); -static void pretty_print_dashed_line(int len); - -static char *pretty_node_state(orte_node_state_t state); - -static int parseable_print(orte_ps_mpirun_info_t *hnpinfo); - -/***************************************** - * Global Vars for Command line Arguments - *****************************************/ -typedef struct { - bool help; - bool verbose; - bool parseable; - orte_jobid_t jobid; - bool nodes; - bool daemons; - int output; - pid_t pid; -} orte_ps_globals_t; - -orte_ps_globals_t orte_ps_globals = {0}; - -opal_cmd_line_init_t cmd_line_opts[] = { - { NULL, - 'h', NULL, "help", - 0, - &orte_ps_globals.help, OPAL_CMD_LINE_TYPE_BOOL, - "This help message" }, - - { NULL, - 'v', NULL, "verbose", - 0, - &orte_ps_globals.verbose, OPAL_CMD_LINE_TYPE_BOOL, - "Be Verbose" }, - - { NULL, - '\0', NULL, "parseable", - 0, - &orte_ps_globals.parseable, OPAL_CMD_LINE_TYPE_BOOL, - "Provide parseable output" }, - - { NULL, - '\0', NULL, "daemons", - 0, - &orte_ps_globals.daemons, OPAL_CMD_LINE_TYPE_INT, - "Display daemon job information" }, - - { NULL, - 'j', NULL, "jobid", - 1, - &orte_ps_globals.jobid, OPAL_CMD_LINE_TYPE_INT, - "Specify a local jobid for the given mpirun - a value from 0 to N" }, - - { NULL, - 'p', NULL, "pid", - 1, - &orte_ps_globals.pid, OPAL_CMD_LINE_TYPE_INT, - "Specify mpirun pid" }, - - { NULL, - 'n', NULL, "nodes", - 0, - &orte_ps_globals.nodes, OPAL_CMD_LINE_TYPE_INT, - "Display Node Information" }, - - /* End of list */ - { NULL, - '\0', NULL, NULL, - 0, - NULL, OPAL_CMD_LINE_TYPE_NULL, - NULL } -}; - -int -main(int argc, char *argv[]) -{ - int ret, exit_status = ORTE_SUCCESS; - opal_list_t hnp_list; - opal_list_item_t* item = NULL; - orte_ps_mpirun_info_t hnpinfo; - bool reported = false; - - /*************** - * Initialize - ***************/ - OBJ_CONSTRUCT(&hnp_list, opal_list_t); - - if (ORTE_SUCCESS != (ret = orte_ps_init(argc, argv))) { - exit_status = ret; - goto cleanup; - } - - /* - * Get the directory listing - */ - opal_output_verbose(10, orte_ps_globals.output, - "orte_ps: Acquiring list of HNPs and setting contact info into RML...\n"); - - if (ORTE_SUCCESS != (ret = orte_list_local_hnps(&hnp_list, true) ) ) { - exit_status = ret; - goto cleanup; - } - - opal_output_verbose(10, orte_ps_globals.output, - "orte_ps: Found %d HNPs\n", - (int)opal_list_get_size(&hnp_list)); - - /* - * For each hnp in the listing - */ - while (NULL != (item = opal_list_remove_first(&hnp_list))) { - orte_hnp_contact_t *hnp = (orte_hnp_contact_t*)item; - hnpinfo.hnp = hnp; - - opal_output_verbose(10, orte_ps_globals.output, - "orte_ps: Processing HNP %lu\n", - (unsigned long)hnpinfo.hnp->pid); - - if (0 < orte_ps_globals.pid && - hnpinfo.hnp->pid != orte_ps_globals.pid) { - continue; - } - - /* - * Gather the information - */ - opal_output_verbose(10, orte_ps_globals.output, - "orte_ps: Gathering Information for HNP: %s:%d\n", - ORTE_NAME_PRINT(&(hnpinfo.hnp->name)), - hnpinfo.hnp->pid); - - if( ORTE_SUCCESS != (ret = gather_information(&hnpinfo)) ) { - /* this could be due to a stale session directory - if so, - * just skip this entry, but don't abort - */ - if (!reported && ORTE_ERR_SILENT == ret) { - orte_show_help("help-orte-ps.txt", "stale-hnp", true, - ORTE_NAME_PRINT(&(hnpinfo.hnp->name))); - reported = true; - continue; - } - goto cleanup; - } - - /* Print the information */ - if (orte_ps_globals.parseable) { - if (ORTE_SUCCESS != (ret = parseable_print(&hnpinfo))) { - exit_status = ret; - goto cleanup; - } - } else { - if(ORTE_SUCCESS != (ret = pretty_print(&hnpinfo)) ) { - exit_status = ret; - goto cleanup; - } - } - } - - /*************** - * Cleanup - ***************/ - cleanup: - orte_finalize(); - - return exit_status; -} - -static int parse_args(int argc, char *argv[]) { - int ret; - opal_cmd_line_t cmd_line; - orte_ps_globals_t tmp = { false, /* help */ - false, /* verbose */ - false, /* parseable */ - ORTE_JOBID_WILDCARD, /* jobid */ - false, /* nodes */ - false, /* daemons */ - -1, /* output */ - 0}; /* pid */ - - orte_ps_globals = tmp; - - /* Parse the command line options */ - opal_cmd_line_create(&cmd_line, cmd_line_opts); - - mca_base_open(); - mca_base_cmd_line_setup(&cmd_line); - ret = opal_cmd_line_parse(&cmd_line, false, false, argc, argv); - - if (OPAL_SUCCESS != ret) { - if (OPAL_ERR_SILENT != ret) { - fprintf(stderr, "%s: command line error (%s)\n", argv[0], - opal_strerror(ret)); - } - return ret; - } - - /** - * Now start parsing our specific arguments - */ - if (orte_ps_globals.help) { - char *str, *args = NULL; - args = opal_cmd_line_get_usage_msg(&cmd_line); - str = opal_show_help_string("help-orte-ps.txt", "usage", true, - args); - if (NULL != str) { - printf("%s", str); - free(str); - } - free(args); - /* If we show the help message, that should be all we do */ - exit(0); - } - - /* if the jobid is given, then we need a pid */ - if (ORTE_JOBID_WILDCARD != orte_ps_globals.jobid && - 0 == orte_ps_globals.pid) { - orte_show_help("help-orte-ps.txt", "need-vpid", true, - orte_ps_globals.jobid); - return ORTE_ERROR; - } - - return ORTE_SUCCESS; -} - -static int orte_ps_init(int argc, char *argv[]) { - int ret; -#if OPAL_ENABLE_FT_CR == 1 - char * tmp_env_var = NULL; -#endif - - /* - * Make sure to init util before parse_args - * to ensure installdirs is setup properly - * before calling mca_base_open(); - */ - if( ORTE_SUCCESS != (ret = opal_init_util(&argc, &argv)) ) { - return ret; - } - - /* - * Parse Command Line Arguments - */ - if (ORTE_SUCCESS != (ret = parse_args(argc, argv))) { - return ret; - } - - /* - * Setup OPAL Output handle from the verbose argument - */ - if( orte_ps_globals.verbose ) { - orte_ps_globals.output = opal_output_open(NULL); - opal_output_set_verbosity(orte_ps_globals.output, 10); - } else { - orte_ps_globals.output = 0; /* Default=STDERR */ - } - -#if OPAL_ENABLE_FT_CR == 1 - /* Disable the checkpoint notification routine for this - * tool. As we will never need to checkpoint this tool. - * Note: This must happen before opal_init(). - */ - opal_cr_set_enabled(false); - - /* Select the none component, since we don't actually use a checkpointer */ - (void) mca_base_var_env_name("crs", &tmp_env_var); - opal_setenv(tmp_env_var, - "none", - true, &environ); - free(tmp_env_var); - tmp_env_var = NULL; - - (void) mca_base_var_env_name("opal_cr_is_tool", &tmp_env_var); - opal_setenv(tmp_env_var, - "1", - true, &environ); - free(tmp_env_var); -#endif - - /* we are never allowed to operate as a distributed tool, - * so insist on the ess/tool component */ - opal_setenv("OMPI_MCA_ess", "tool", true, &environ); - - /*************************** - * We need all of OPAL and the TOOL portion of ORTE - ***************************/ - ret = orte_init(&argc, &argv, ORTE_PROC_TOOL); - - return ret; -} - -static int pretty_print(orte_ps_mpirun_info_t *hnpinfo) { - char *header; - int len_hdr; - - /* - * Print header and remember header length - */ - len_hdr = opal_asprintf(&header, "Information from mpirun %s", ORTE_JOBID_PRINT(hnpinfo->hnp->name.jobid)); - - printf("\n\n%s\n", header); - free(header); - pretty_print_dashed_line(len_hdr); - - /* - * Print Node Information - */ - if( orte_ps_globals.nodes ) - pretty_print_nodes(hnpinfo->nodes, hnpinfo->num_nodes); - - /* - * Print Job Information - */ - pretty_print_jobs(hnpinfo->jobs, hnpinfo->num_jobs); - - return ORTE_SUCCESS; -} - -static int pretty_print_nodes(orte_node_t **nodes, orte_std_cntr_t num_nodes) { - int line_len; - int len_name = 0, - len_state = 0, - len_slots = 0, - len_slots_i = 0, - len_slots_m = 0; - orte_node_t *node; - orte_std_cntr_t i; - - /* - * Caculate segment lengths - */ - len_name = (int) strlen("Node Name"); - len_state = (int) strlen("State"); - len_slots = (int) strlen("Slots"); - len_slots_i = (int) strlen("Slots In Use"); - len_slots_m = (int) strlen("Slots Max"); - - for(i=0; i < num_nodes; i++) { - node = nodes[i]; - - if( NULL != node->name && - (int)strlen(node->name) > len_name) - len_name = (int) strlen(node->name); - - if( (int)strlen(pretty_node_state(node->state)) > len_state ) - len_state = (int)strlen(pretty_node_state(node->state)); - } - - line_len = (len_name + 3 + - len_state + 3 + - len_slots + 3 + - len_slots_i + 3 + - len_slots_m) + 2; - - /* - * Print the header - */ - printf("%*s | ", len_name, "Node Name"); - printf("%*s | ", len_state, "State"); - printf("%*s | ", len_slots, "Slots"); - printf("%*s | ", len_slots_m, "Slots Max"); - printf("%*s | ", len_slots_i, "Slots In Use"); - printf("\n"); - - pretty_print_dashed_line(line_len); - - /* - * Print Info - */ - for(i=0; i < num_nodes; i++) { - node = nodes[i]; - - printf("%*s | ", len_name, node->name); - printf("%*s | ", len_state, pretty_node_state(node->state)); - printf("%*d | ", len_slots, (uint)node->slots); - printf("%*d | ", len_slots_m, (uint)node->slots_max); - printf("%*d | ", len_slots_i, (uint)node->slots_inuse); - printf("\n"); - - } - - return ORTE_SUCCESS; -} - -static int pretty_print_jobs(orte_job_t **jobs, orte_std_cntr_t num_jobs) { - int len_jobid = 0, - len_state = 0, - len_slots = 0, - len_vpid_r = 0, - len_ckpt_s = 0, - len_ckpt_r = 0, - len_ckpt_l = 0; - int line_len; - orte_job_t *job; - orte_std_cntr_t i; - char *jobstr; - orte_jobid_t mask=0x0000ffff; -#if OPAL_ENABLE_FT_CR == 1 - char * state_str = NULL; - size_t ckpt_state; - char *snap_ref = NULL; - char *snap_loc = NULL; -#endif - - for(i=0; i < num_jobs; i++) { - job = jobs[i]; - - /* check the jobid to see if this is the daemons' job */ - if ((0 == (mask & job->jobid)) && !orte_ps_globals.daemons) { - continue; - } - - /* setup the printed name - do -not- free this! */ - jobstr = ORTE_JOBID_PRINT(job->jobid); - - /* - * Caculate segment lengths - */ - len_jobid = strlen(jobstr);; - len_state = (int) (strlen(orte_job_state_to_str(job->state)) < strlen("State") ? - strlen("State") : - strlen(orte_job_state_to_str(job->state))); - len_slots = 6; - len_vpid_r = (int) strlen("Num Procs"); -#if OPAL_ENABLE_FT_CR == 1 - orte_get_attribute(&job->attributes, ORTE_JOB_CKPT_STATE, (void**)&ckpt_state, OPAL_INT32); - orte_get_attribute(&job->attributes, ORTE_JOB_SNAPSHOT_REF, (void**)&snap_ref, OPAL_STRING); - orte_get_attribute(&job->attributes, ORTE_JOB_SNAPSHOT_LOC, (void**)&snap_loc, OPAL_STRING); - orte_snapc_ckpt_state_str(&state_str, ckpt_state); - len_ckpt_s = (int) (strlen(state_str) < strlen("Ckpt State") ? - strlen("Ckpt State") : - strlen(state_str) ); - len_ckpt_r = (int) (NULL == snap_ref ? strlen("Ckpt Ref") : (strlen(snap_ref) < strlen("Ckpt Ref") ? - strlen("Ckpt Ref") : strlen(snap_ref))); - len_ckpt_l = (int) (NULL == snap_loc ? strlen("Ckpt Loc") : (strlen(snap_loc) < strlen("Ckpt Loc") ? - strlen("Ckpt Loc") : strlen(snap_loc))); -#else - len_ckpt_s = -3; - len_ckpt_r = -3; - len_ckpt_l = -3; -#endif - - line_len = (len_jobid + 3 + - len_state + 3 + - len_slots + 3 + - len_vpid_r + 3 + - len_ckpt_s + 3 + - len_ckpt_r + 3 + - len_ckpt_l) - + 2; - - /* - * Print Header - */ - printf("\n"); - printf("%*s | ", len_jobid , "JobID"); - printf("%*s | ", len_state , "State"); - printf("%*s | ", len_slots , "Slots"); - printf("%*s | ", len_vpid_r , "Num Procs"); -#if OPAL_ENABLE_FT_CR == 1 - printf("%*s | ", len_ckpt_s , "Ckpt State"); - printf("%*s | ", len_ckpt_r , "Ckpt Ref"); - printf("%*s |", len_ckpt_l , "Ckpt Loc"); -#endif - printf("\n"); - - pretty_print_dashed_line(line_len); - - /* - * Print Info - */ - printf("%*s | ", len_jobid , ORTE_JOBID_PRINT(job->jobid)); - printf("%*s | ", len_state , orte_job_state_to_str(job->state)); - printf("%*d | ", len_slots , (uint)job->total_slots_alloc); - printf("%*d | ", len_vpid_r, job->num_procs); -#if OPAL_ENABLE_FT_CR == 1 - printf("%*s | ", len_ckpt_s, state_str); - printf("%*s | ", len_ckpt_r, (NULL == snap_ref ? "" : snap_ref)); - printf("%*s |", len_ckpt_l, (NULL == snap_loc ? "" : snap_loc)); -#endif - printf("\n"); - - - pretty_print_vpids(job); - printf("\n\n"); /* give a little room between job outputs */ - } - - return ORTE_SUCCESS; -} - -static int pretty_print_vpids(orte_job_t *job) { - int len_o_proc_name = 0, - len_proc_name = 0, - len_rank = 0, - len_pid = 0, - len_state = 0, - len_node = 0, - len_ckpt_s = 0, - len_ckpt_r = 0, - len_ckpt_l = 0; - int i, line_len; - orte_vpid_t v; - orte_proc_t *vpid; - orte_app_context_t *app; - char *o_proc_name; -#if OPAL_ENABLE_FT_CR == 1 - char *state_str = NULL; - size_t ckpt_state; - char *snap_ref = NULL; - char *snap_loc = NULL; -#endif - char **nodename = NULL; - - if (0 == job->num_procs) { - return ORTE_SUCCESS; - } - - /* - * Caculate segment lengths - */ - len_o_proc_name = (int)strlen("ORTE Name"); - len_proc_name = (int)strlen("Process Name"); - len_rank = (int)strlen("Local Rank"); - len_pid = 6; - len_state = 0; - len_node = 0; -#if OPAL_ENABLE_FT_CR == 1 - len_ckpt_s = strlen("Ckpt State"); - len_ckpt_r = strlen("Ckpt Ref"); - len_ckpt_l = strlen("Ckpt Loc"); -#else - len_ckpt_s = -3; - len_ckpt_r = -3; - len_ckpt_l = -3; -#endif - - nodename = (char **) malloc(job->num_procs * sizeof(char *)); - for(v=0; v < job->num_procs; v++) { - char *rankstr; - vpid = (orte_proc_t*)job->procs->addr[v]; - - /* - * Find my app context - */ - if( 0 >= (int)job->num_apps ) { - if( 0 == vpid->name.vpid ) { - if( (int)strlen("orterun") > len_proc_name) - len_proc_name = strlen("orterun"); - } - else { - if( (int)strlen("orted") > len_proc_name) - len_proc_name = strlen("orted"); - } - } - for( i = 0; i < (int)job->num_apps; ++i) { - app = (orte_app_context_t*)job->apps->addr[i]; - if( app->idx == vpid->app_idx ) { - if( (int)strlen(app->app) > len_proc_name) - len_proc_name = strlen(app->app); - break; - } - } - - o_proc_name = orte_util_print_name_args(&vpid->name); - if ((int)strlen(o_proc_name) > len_o_proc_name) - len_o_proc_name = strlen(o_proc_name); - - opal_asprintf(&rankstr, "%u", (uint)vpid->local_rank); - if ((int)strlen(rankstr) > len_rank) - len_rank = strlen(rankstr); - free(rankstr); - - nodename[v] = NULL; - if( orte_get_attribute(&vpid->attributes, ORTE_PROC_NODENAME, (void**)&nodename[v], OPAL_STRING) && - (int)strlen(nodename[v]) > len_node) { - len_node = strlen(nodename[v]); - } else if ((int)strlen("Unknown") > len_node) { - len_node = strlen("Unknown"); - } - - if( (int)strlen(orte_proc_state_to_str(vpid->state)) > len_state) - len_state = strlen(orte_proc_state_to_str(vpid->state)); - -#if OPAL_ENABLE_FT_CR == 1 - orte_get_attribute(&vpid->attributes, ORTE_PROC_CKPT_STATE, (void**)&ckpt_state, OPAL_INT32); - orte_get_attribute(&vpid->attributes, ORTE_PROC_SNAPSHOT_REF, (void**)&snap_ref, OPAL_STRING); - orte_get_attribute(&vpid->attributes, ORTE_PROC_SNAPSHOT_LOC, (void**)&snap_loc, OPAL_STRING); - orte_snapc_ckpt_state_str(&state_str, ckpt_state); - if( (int)strlen(state_str) > len_ckpt_s) - len_ckpt_s = strlen(state_str); - - if(NULL != snap_ref && (int)strlen(snap_ref) > len_ckpt_r) - len_ckpt_r = strlen(snap_ref); - - if(NULL != snap_loc && (int)strlen(snap_loc) > len_ckpt_l) - len_ckpt_l = strlen(snap_loc); -#endif - } - - line_len = (len_o_proc_name + 3 + - len_proc_name + 3 + - len_rank + 3 + - len_pid + 3 + - len_state + 3 + - len_node + 3 + - len_ckpt_s + 3 + - len_ckpt_r + 3 + - len_ckpt_l) - + 2; - - /* - * Print Header - */ - printf("\t"); - printf("%*s | ", len_proc_name , "Process Name"); - printf("%*s | ", len_o_proc_name , "ORTE Name"); - printf("%*s | ", len_rank , "Local Rank"); - printf("%*s | ", len_pid , "PID"); - printf("%*s | ", len_node , "Node"); - printf("%*s | ", len_state , "State"); -#if OPAL_ENABLE_FT_CR == 1 - printf("%*s | ", len_ckpt_s , "Ckpt State"); - printf("%*s | ", len_ckpt_r , "Ckpt Ref"); - printf("%*s |", len_ckpt_l , "Ckpt Loc"); -#endif - printf("\n"); - - printf("\t"); - pretty_print_dashed_line(line_len); - - /* - * Print Info - */ - for(v=0; v < job->num_procs; v++) { - vpid = (orte_proc_t*)job->procs->addr[v]; - - printf("\t"); - - if( 0 >= (int)job->num_apps ) { - if( 0 == vpid->name.vpid ) { - printf("%*s | ", len_proc_name, "orterun"); - } else { - printf("%*s | ", len_proc_name, "orted"); - } - } - for( i = 0; i < (int)job->num_apps; ++i) { - app = (orte_app_context_t*)job->apps->addr[i]; - if( app->idx == vpid->app_idx ) { - printf("%*s | ", len_proc_name, app->app); - break; - } - } - - o_proc_name = orte_util_print_name_args(&vpid->name); - - printf("%*s | ", len_o_proc_name, o_proc_name); - printf("%*u | ", len_rank , (uint)vpid->local_rank); - printf("%*d | ", len_pid , vpid->pid); - printf("%*s | ", len_node , (NULL == nodename[v]) ? "Unknown" : nodename[v]); - printf("%*s | ", len_state , orte_proc_state_to_str(vpid->state)); - - if (NULL != nodename[v]) { - free(nodename[v]); - } -#if OPAL_ENABLE_FT_CR == 1 - printf("%*s | ", len_ckpt_s, state_str); - printf("%*s | ", len_ckpt_r, (NULL == snap_ref ? "" : snap_ref)); - printf("%*s |", len_ckpt_l, (NULL == snap_loc ? "" : snap_loc)); -#endif - printf("\n"); - - } - if (NULL != nodename) { - free(nodename); - } - return ORTE_SUCCESS; -} - -static void pretty_print_dashed_line(int len) { - static const char dashes[9] = "--------"; - - while (len >= 8) { - printf("%8.8s", dashes); - len -= 8; - } - printf("%*.*s\n", len, len, dashes); -} - -static int gather_information(orte_ps_mpirun_info_t *hnpinfo) { - int ret; - - if( ORTE_SUCCESS != (ret = gather_active_jobs(hnpinfo) )) { - goto cleanup; - } - - if( ORTE_SUCCESS != (ret = gather_nodes(hnpinfo) )) { - goto cleanup; - } - - if( ORTE_SUCCESS != (ret = gather_vpid_info(hnpinfo) )) { - goto cleanup; - } - - cleanup: - return ret; -} - -static int gather_active_jobs(orte_ps_mpirun_info_t *hnpinfo) { - int ret; - - if (ORTE_SUCCESS != (ret = orte_util_comm_query_job_info(&(hnpinfo->hnp->name), orte_ps_globals.jobid, - &hnpinfo->num_jobs, &hnpinfo->jobs))) { - ORTE_ERROR_LOG(ret); - } - - return ret; -} - -static int gather_nodes(orte_ps_mpirun_info_t *hnpinfo) { - int ret; - - if (ORTE_SUCCESS != (ret = orte_util_comm_query_node_info(&(hnpinfo->hnp->name), NULL, - &hnpinfo->num_nodes, &hnpinfo->nodes))) { - ORTE_ERROR_LOG(ret); - } - opal_output(0, "RECEIVED %d NODES", hnpinfo->num_nodes); - return ret; -} - -static int gather_vpid_info(orte_ps_mpirun_info_t *hnpinfo) { - int ret; - orte_std_cntr_t i; - int cnt; - orte_job_t *job; - orte_proc_t **procs; - - /* - * For each Job in the HNP - */ - for(i=0; i < hnpinfo->num_jobs; i++) { - job = hnpinfo->jobs[i]; - - /* - * Skip getting the vpid's for the HNP, unless asked to do so - * The HNP is always the first in the array - */ - if( 0 == i && !orte_ps_globals.daemons) { - continue; - } - - /* query the HNP for info on the procs in this job */ - if (ORTE_SUCCESS != (ret = orte_util_comm_query_proc_info(&(hnpinfo->hnp->name), - job->jobid, - ORTE_VPID_WILDCARD, - &cnt, - &procs))) { - ORTE_ERROR_LOG(ret); - } - job->procs->addr = (void**)procs; - job->procs->size = cnt; - job->num_procs = cnt; - } - - return ORTE_SUCCESS; -} - -static char *pretty_node_state(orte_node_state_t state) { - switch(state) { - case ORTE_NODE_STATE_DOWN: - return strdup("Down"); - break; - case ORTE_NODE_STATE_UP: - return strdup("Up"); - break; - case ORTE_NODE_STATE_REBOOT: - return strdup("Reboot"); - break; - case ORTE_NODE_STATE_UNKNOWN: - default: - return strdup("Unknown"); - break; - } -} - -static int parseable_print(orte_ps_mpirun_info_t *hnpinfo) -{ - orte_job_t **jobs; - orte_node_t **nodes; - orte_proc_t *proc; - orte_app_context_t *app; - char *appname; - int i, j; - char *nodename; - - /* don't include the daemon job in the number of jobs reported */ - printf("mpirun:%lu:num nodes:%d:num jobs:%d\n", - (unsigned long)hnpinfo->hnp->pid, hnpinfo->num_nodes, hnpinfo->num_jobs-1); - - if (orte_ps_globals.nodes) { - nodes = hnpinfo->nodes; - for (i=0; i < hnpinfo->num_nodes; i++) { - printf("node:%s:state:%s:slots:%d:in use:%d\n", - nodes[i]->name, pretty_node_state(nodes[i]->state), - nodes[i]->slots, nodes[i]->slots_inuse); - } - } - - jobs = hnpinfo->jobs; - /* skip job=0 as that's the daemon job */ - for (i=1; i < hnpinfo->num_jobs; i++) { - printf("jobid:%d:state:%s:slots:%d:num procs:%d\n", - ORTE_LOCAL_JOBID(jobs[i]->jobid), - orte_job_state_to_str(jobs[i]->state), - jobs[i]->total_slots_alloc, - jobs[i]->num_procs); - /* print the proc info */ - for (j=0; j < jobs[i]->procs->size; j++) { - if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jobs[i]->procs, j))) { - continue; - } - app = (orte_app_context_t*)opal_pointer_array_get_item(jobs[i]->apps, proc->app_idx); - if (NULL == app) { - appname = strdup("NULL"); - } else { - appname = opal_basename(app->app); - } - nodename = NULL; - orte_get_attribute(&proc->attributes, ORTE_PROC_NODENAME, (void**)&nodename, OPAL_STRING); - printf("process:%s:rank:%s:pid:%lu:node:%s:state:%s\n", - appname, ORTE_VPID_PRINT(proc->name.vpid), - (unsigned long)proc->pid, - (NULL == nodename) ? "unknown" : nodename, - orte_proc_state_to_str(proc->state)); - free(appname); - if (NULL != nodename) { - free(nodename); - } - } - } - - return ORTE_SUCCESS; -} diff --git a/orte/tools/orte-top/Makefile.am b/orte/tools/orte-top/Makefile.am deleted file mode 100644 index ab3b86508a..0000000000 --- a/orte/tools/orte-top/Makefile.am +++ /dev/null @@ -1,47 +0,0 @@ -# -# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -include $(top_srcdir)/Makefile.ompi-rules - -man_pages = orte-top.1 -EXTRA_DIST = orte-top.1in - -if OPAL_INSTALL_BINARIES - -bin_PROGRAMS = orte-top - -nodist_man_MANS = $(man_pages) - -# Ensure that the man pages are rebuilt if the opal_config.h file -# changes; a "good enough" way to know if configure was run again (and -# therefore the release date or version may have changed) -$(nodist_man_MANS): $(top_builddir)/opal/include/opal_config.h - -dist_ortedata_DATA = help-orte-top.txt - -endif # OPAL_INSTALL_BINARIES - -orte_top_SOURCES = orte-top.c -orte_top_LDADD = \ - $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la \ - $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la - -distclean-local: - rm -f $(man_pages) diff --git a/orte/tools/orte-top/help-orte-top.txt b/orte/tools/orte-top/help-orte-top.txt deleted file mode 100644 index 5eae695eed..0000000000 --- a/orte/tools/orte-top/help-orte-top.txt +++ /dev/null @@ -1,82 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2009 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2012 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English general help file for Open MPI's orte-top tool. -# -[orte-top:usage] -Return statistics on specified process ranks - -Usage: %s [OPTIONS] - -%s -# -[orte-top:pid-not-found] -We could not find an mpirun matching the provided pid on this machine. - -Pid provided: %d -# -[orte-top:no-contact-given] -This tool requires that you specify contact info for the mpirun executing -the specified rank(s). Please use the --help option for more information. -# -[orte-top:hnp-filename-bad] -We are unable to parse the filename where contact info for the -mpirun to be contacted was to be found. The option we were given was: - ---%s %s - -This appears to be missing the required ':' following the -keyword "file". Please use the --help option for more information on -the correct format for this command line option. -# -[orte-top:hnp-filename-access] -We are unable to access the filename where contact info for the -mpirun to be contacted was to be found. The filename we were given was: - -File: %s - -Please use the --help option for more information on -the correct format for this command line option. -# -[orte-top:hnp-file-bad] -We are unable to read the mpirun's contact info from the -given filename. The filename we were given was: - -FILE: %s - -Please use the --help option for more information on -the correct format for this command line option. -# -[orte-top:hnp-uri-bad] -We are unable to correctly parse the mpirun's contact info. The uri we were given was: - -URI: %s - -Please remember that this is *not* a standard uri, but -a special format used internally by Open MPI for communications. It can -best be generated by simply directing mpirun to put its -uri in a file, and then giving us that filename. -# -[orte-top:cant-open-logfile] -We are unable to open the specified output log file. - -File: %s - -Please use the --help option for more information on -the correct format for this command line option. diff --git a/orte/tools/orte-top/orte-top.1in b/orte/tools/orte-top/orte-top.1in deleted file mode 100644 index c33654a5b4..0000000000 --- a/orte/tools/orte-top/orte-top.1in +++ /dev/null @@ -1,106 +0,0 @@ -.\" -.\" Copyright (c) 2007 Los Alamos National Security, LLC -.\" All rights reserved. -.\" Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. -.\" -.\" Man page for OMPI's ompi-server command -.\" -.\" .TH name section center-footer left-footer center-header -.TH OMPI-TOP 1 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" -.\" ************************** -.\" Name Section -.\" ************************** -.SH NAME -. -ompi-top, orte-top \- Diagnostic to provide process info similar to the popular "top" program. -. -.PP -. -\fBNOTE:\fP \fIompi-top\fP, and \fIorte-top\fP are exact -synonyms for each other. Using any of the names will result in exactly -identical behavior. -. - -.\" ************************** -.\" Synopsis Section -.\" ************************** -.SH SYNOPSIS -. -.BR ompi-top " [ options ]" -. -.\" ************************** -.\" Options Section -.\" ************************** -.SH Options -. -\fIompi-top\fR collects and displays process information in a manner similar -to that of the popular "top" program. -. -.TP 10 -.B -h | --help -Display help for this command -. -. -.TP -.B -pid | --pid \fR\fP -The pid of the mpirun whose processes you want information about, or the name -of the file (specified as file:filename) that contains that info. Note that -the ompi-top command must be executed on the same node as mpirun to use this option. -. -. -.TP -.B -uri | --uri \fR\fP -Specify the URI of the mpirun whose processes you want information about, or the name -of the file (specified as file:filename) that contains that info. Note that -the ompi-top command does not have to be executed on the same node as mpirun to use this option. -. -. -.TP -.B -rank | --rank \fR\fP -The rank of the processes to be monitored. This can consist of a single rank, or -a comma-separated list of ranks. These can include rank ranges separated by a '-'. -If this option is not provided, or a value of -1 is given, ompi-top will default -to displaying information on all ranks. -. -. -.TP -.B -bynode | --bynode -Display the results grouped by node, with each node's processes reported in rank -order. If this option is not provided, ompi-top will default to displaying all -results in rank order. -. -. -.TP -.B -update-rate | --update-rate \fR\fP -The time (in seconds) between updates of the displayed information. If this option -is not provided, ompi-top will default to executing only once. -. -. -.TP -.B -timestamp | --timestamp -Provide an approximate time when each sample was taken. This time is approximate as it -only shows the time when the sample command was issued. -. -. -.TP -.B -log-file | --log-file \fR\fP -Log the results to the specified file instead of displaying them to stdout. -. -. -.\" ************************** -.\" Description Section -.\" ************************** -.SH DESCRIPTION -. -.PP -\fIompi-top\fR collects and displays process information in a manner similar -to that of the popular "top" program. It doesn't do the fancy screen display, but -does allow you to monitor available process information (to the limits of the underlying -operating system) of processes irrespective of their location. -. -.\" ************************** -.\" See Also Section -.\" ************************** -. -.SH SEE ALSO -. diff --git a/orte/tools/orte-top/orte-top.c b/orte/tools/orte-top/orte-top.c deleted file mode 100644 index f221d24b5e..0000000000 --- a/orte/tools/orte-top/orte-top.c +++ /dev/null @@ -1,1042 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_NETDB_H -#include -#endif -#ifdef HAVE_SYS_PARAM_H -#include -#endif -#include -#include -#include -#include - -#include "opal/util/cmd_line.h" -#include "opal/util/argv.h" -#include "opal/util/show_help.h" -#include "opal/util/opal_environ.h" -#include "opal/util/printf.h" -#include "opal/dss/dss.h" -#include "opal/mca/base/base.h" -#include "opal/mca/pmix/pmix.h" -#include "opal/runtime/opal.h" -#include "opal/mca/event/event.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/iof/base/base.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/odls/odls_types.h" -#include "orte/mca/routed/routed.h" -#include "orte/runtime/orte_globals.h" -#include "orte/util/hnp_contact.h" -#include "orte/util/name_fns.h" -#include "orte/util/show_help.h" -#include "orte/util/proc_info.h" -#include "orte/util/threads.h" -#include "orte/runtime/orte_wait.h" -#include "orte/mca/rml/base/rml_contact.h" -#include "orte/runtime/orte_quit.h" - -/* - * Local variables & functions - */ -static void abort_exit_callback(int fd, short flags, void *arg); -static opal_event_t term_handler; -static opal_event_t int_handler; -static opal_list_t hnp_list; -static bool all_recvd; -static int32_t num_replies; -static int32_t num_recvd; -static opal_buffer_t cmdbuf; -static FILE *fp = NULL; -static bool help; -static char *hnppidstr; -static char *hnpuristr; -static char *ranks; -static orte_hnp_contact_t *target_hnp; -static int update_rate; -static bool timestamp; -static char *logfile; -static bool bynode; -static opal_list_t recvd_stats; -static char *sample_time; -static bool need_header = true; -static int num_lines=0; -static bool fields_set = false; -static int nodefield = 0; -static int rankfield = 0; -static int pidfield = 0; -static int cmdfield = 0; -static int timefield = 6; -static int prifield = 0; -static int thrfield = 0; -static int vsizefield = 0; -static int rssfield = 0; -static int pkvfield = 0; -static int pfield = 0; - -/* flag what fields were actually found */ -static bool pri_found = false; -static bool thr_found = false; -static bool vsize_found = false; -static bool rss_found = false; -static bool pkv_found = false; -static bool p_found = false; - -#define MAX_LINES 20 - -opal_cmd_line_init_t cmd_line_opts[] = { - { NULL, - 'h', NULL, "help", - 0, - &help, OPAL_CMD_LINE_TYPE_BOOL, - "This help message" }, - - { NULL, - '\0', "pid", "pid", - 1, - &hnppidstr, OPAL_CMD_LINE_TYPE_STRING, - "The pid of the mpirun that you wish to query/monitor" }, - - { NULL, - '\0', "uri", "uri", - 1, - &hnpuristr, OPAL_CMD_LINE_TYPE_STRING, - "The uri of the mpirun that you wish to query/monitor" }, - - { NULL, - '\0', "rank", "rank", - 1, - &ranks, OPAL_CMD_LINE_TYPE_STRING, - "Rank whose resource usage is to be displayed/monitored" }, - - { NULL, - '\0', "update-rate", "update-rate", - 1, - &update_rate, OPAL_CMD_LINE_TYPE_INT, - "Number of seconds between updates" }, - - { NULL, - '\0', "timestamp", "timestamp", - 0, - ×tamp, OPAL_CMD_LINE_TYPE_BOOL, - "Time stamp each sample" }, - - { NULL, - '\0', "log-file", "log-file", - 1, - &logfile, OPAL_CMD_LINE_TYPE_STRING, - "Output file for returned statistics" }, - - { NULL, - '\0', "bynode", "bynode", - 0, - &bynode, OPAL_CMD_LINE_TYPE_BOOL, - "Group statistics by node, sorted by rank within each node" }, - - /* End of list */ - { NULL, - '\0', NULL, NULL, - 0, - NULL, OPAL_CMD_LINE_TYPE_NULL, - NULL } -}; - - -static void recv_stats(int status, orte_process_name_t* sender, - opal_buffer_t *buffer, orte_rml_tag_t tag, - void* cbdata); - -static void pretty_print(void); -static void print_headers(void); - -static void send_cmd(int fd, short dummy, void *arg) -{ - int ret; - opal_buffer_t *buf; - - all_recvd = false; - num_replies = INT_MAX; - num_recvd = 0; - buf = OBJ_NEW(opal_buffer_t); - opal_dss.copy_payload(buf, &cmdbuf); - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &(target_hnp->name), buf, - ORTE_RML_TAG_DAEMON, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(buf); - orte_quit(0,0,NULL); - return; - } -} - -int -main(int argc, char *argv[]) -{ - int ret; - opal_cmd_line_t cmd_line; - opal_list_item_t* item = NULL; - orte_daemon_cmd_flag_t command; - pid_t hnppid; - orte_process_name_t proc; - char **r1=NULL, **r2; - int i; - orte_vpid_t vstart, vend; - int vint; - char *rtmod; - opal_value_t val; - - /*************** - * Initialize - ***************/ - - /* - * Make sure to init util before parse_args - * to ensure installdirs is setup properly - * before calling mca_base_open(); - */ - if( ORTE_SUCCESS != (ret = opal_init_util(&argc, &argv)) ) { - return ret; - } - - /* initialize the globals */ - help = false; - hnppidstr = NULL; - ranks = NULL; - target_hnp = NULL; - update_rate = -1; - timestamp = false; - logfile = NULL; - - /* Parse the command line options */ - opal_cmd_line_create(&cmd_line, cmd_line_opts); - - mca_base_open(); - mca_base_cmd_line_setup(&cmd_line); - ret = opal_cmd_line_parse(&cmd_line, false, false, argc, argv); - if (OPAL_SUCCESS != ret) { - if (OPAL_ERR_SILENT != ret) { - fprintf(stderr, "%s: command line error (%s)\n", argv[0], - opal_strerror(ret)); - } - return 1; - } - - /** - * Now start parsing our specific arguments - */ - if (help) { - char *str, *args = NULL; - args = opal_cmd_line_get_usage_msg(&cmd_line); - str = opal_show_help_string("help-orte-top.txt", "orte-top:usage", - true, "orte-top", args); - if (NULL != str) { - printf("%s", str); - free(str); - } - free(args); - /* If we show the help message, that should be all we do */ - return 0; - } - - /* we are never allowed to operate as a distributed tool, - * so insist on the ess/tool component */ - opal_setenv("OMPI_MCA_ess", "tool", true, &environ); - - /*************************** - * We need all of OPAL and the TOOL portion of ORTE - ***************************/ - if (ORTE_SUCCESS != orte_init(&argc, &argv, ORTE_PROC_TOOL)) { - orte_finalize(); - return 1; - } - - /* get our routed module */ - rtmod = orte_rml.get_routed(orte_mgmt_conduit); - - /* setup the list for recvd stats */ - OBJ_CONSTRUCT(&recvd_stats, opal_list_t); - - /** setup callbacks for abort signals - from this point - * forward, we need to abort in a manner that allows us - * to cleanup - */ - opal_event_signal_set(orte_event_base, &term_handler, SIGTERM, - abort_exit_callback, &term_handler); - opal_event_signal_add(&term_handler, NULL); - opal_event_signal_set(orte_event_base, &int_handler, SIGINT, - abort_exit_callback, &int_handler); - opal_event_signal_add(&int_handler, NULL); - - /* - * Must specify the mpirun pid - */ - if (NULL != hnppidstr) { - if (0 == strncmp(hnppidstr, "file", strlen("file")) || - 0 == strncmp(hnppidstr, "FILE", strlen("FILE"))) { - char input[1024], *filename; - FILE *fp; - - /* it is a file - get the filename */ - filename = strchr(hnppidstr, ':'); - if (NULL == filename) { - /* filename is not correctly formatted */ - orte_show_help("help-orte-top.txt", "orte-top:hnp-filename-bad", true, "pid", hnppidstr); - orte_finalize(); - exit(1); - } - ++filename; /* space past the : */ - - if (0 >= strlen(filename)) { - /* they forgot to give us the name! */ - orte_show_help("help-orte-top.txt", "orte-top:hnp-filename-bad", true, "pid", hnppidstr); - orte_finalize(); - exit(1); - } - - /* open the file and extract the pid */ - fp = fopen(filename, "r"); - if (NULL == fp) { /* can't find or read file! */ - orte_show_help("help-orte-top.txt", "orte-top:hnp-filename-access", true, filename); - orte_finalize(); - exit(1); - } - if (NULL == fgets(input, 1024, fp)) { - /* something malformed about file */ - fclose(fp); - orte_show_help("help-orte-top.txt", "orte-top:hnp-file-bad", true, filename); - orte_finalize(); - exit(1); - } - fclose(fp); - input[strlen(input)-1] = '\0'; /* remove newline */ - /* convert the pid */ - hnppid = strtoul(input, NULL, 10); - } else { - /* should just be the pid itself */ - hnppid = strtoul(hnppidstr, NULL, 10); - } - /* - * Get the list of available hnp's and setup contact info - * to them in the RML - */ - OBJ_CONSTRUCT(&hnp_list, opal_list_t); - if (ORTE_SUCCESS != (ret = orte_list_local_hnps(&hnp_list, true) ) ) { - orte_show_help("help-orte-top.txt", "orte-top:pid-not-found", true, hnppid); - orte_finalize(); - exit(1); - } - - /* - * For each hnp in the listing - */ - while (NULL != (item = opal_list_remove_first(&hnp_list))) { - orte_hnp_contact_t *hnp = (orte_hnp_contact_t*)item; - if (hnppid == hnp->pid) { - /* this is the one we want */ - target_hnp = hnp; - /* let it continue to run so we deconstruct the list */ - continue; - } - OBJ_RELEASE(hnp); - } - OBJ_DESTRUCT(&hnp_list); - - /* if we get here without finding the one we wanted, then abort */ - if (NULL == target_hnp) { - orte_show_help("help-orte-top.txt", "orte-top:pid-not-found", true, hnppid); - orte_finalize(); - exit(1); - } - } else if (NULL != hnpuristr) { - if (0 == strncmp(hnpuristr, "file", strlen("file")) || - 0 == strncmp(hnpuristr, "FILE", strlen("FILE"))) { - char input[1024], *filename; - FILE *fp; - - /* it is a file - get the filename */ - filename = strchr(hnpuristr, ':'); - if (NULL == filename) { - /* filename is not correctly formatted */ - orte_show_help("help-orte-top.txt", "orte-top:hnp-filename-bad", true, "uri", hnpuristr); - orte_finalize(); - exit(1); - } - ++filename; /* space past the : */ - - if (0 >= strlen(filename)) { - /* they forgot to give us the name! */ - orte_show_help("help-orte-top.txt", "orte-top:hnp-filename-bad", true, "uri", hnpuristr); - orte_finalize(); - exit(1); - } - - /* open the file and extract the uri */ - fp = fopen(filename, "r"); - if (NULL == fp) { /* can't find or read file! */ - orte_show_help("help-orte-top.txt", "orte-top:hnp-filename-access", true, filename); - orte_finalize(); - exit(1); - } - if (NULL == fgets(input, 1024, fp)) { - /* something malformed about file */ - fclose(fp); - orte_show_help("help-orte-top.txt", "orte-top:hnp-file-bad", true, filename); - orte_finalize(); - exit(1); - } - fclose(fp); - input[strlen(input)-1] = '\0'; /* remove newline */ - /* construct the target hnp info */ - target_hnp = OBJ_NEW(orte_hnp_contact_t); - target_hnp->rml_uri = strdup(input); - } else { - /* should just be the uri itself - construct the target hnp info */ - target_hnp = OBJ_NEW(orte_hnp_contact_t); - target_hnp->rml_uri = strdup(hnpuristr); - } - /* extract the name */ - if (ORTE_SUCCESS != orte_rml_base_parse_uris(target_hnp->rml_uri, &target_hnp->name, NULL)) { - orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, target_hnp->rml_uri); - orte_finalize(); - exit(1); - } - /* set the info in our contact table */ - OBJ_CONSTRUCT(&val, opal_value_t); - val.key = OPAL_PMIX_PROC_URI; - val.type = OPAL_STRING; - val.data.string = target_hnp->rml_uri; - if (OPAL_SUCCESS != (ret = opal_pmix.store_local(&target_hnp->name, &val))) { - ORTE_ERROR_LOG(ret); - val.key = NULL; - val.data.string = NULL; - OBJ_DESTRUCT(&val); - orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, target_hnp->rml_uri); - orte_finalize(); - exit(1); - } - val.key = NULL; - val.data.string = NULL; - OBJ_DESTRUCT(&val); - - /* set the route to be direct */ - if (ORTE_SUCCESS != orte_routed.update_route(rtmod, &target_hnp->name, &target_hnp->name)) { - orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, target_hnp->rml_uri); - orte_finalize(); - exit(1); - } - } else { - orte_show_help("help-orte-top.txt", "orte-top:no-contact-given", true); - orte_finalize(); - exit(1); - } - - /* set the target hnp as our lifeline so we will terminate if it exits */ - orte_routed.set_lifeline(rtmod, &target_hnp->name); - - /* if an output file was specified, open it */ - if (NULL != logfile) { - fp = fopen(logfile, "w"); - if (NULL == fp) { - orte_show_help("help-orte-top.txt", "orte-top:cant-open-logfile", true, logfile); - orte_finalize(); - exit(1); - } - } else { - fp = stdout; - } - - /* setup a non-blocking recv to get answers - we don't know how - * many daemons are going to send replies, so we just have to - * accept whatever comes back - */ - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_TOOL, - ORTE_RML_NON_PERSISTENT, recv_stats, NULL); - - - /* setup the command to get the resource usage */ - OBJ_CONSTRUCT(&cmdbuf, opal_buffer_t); - command = ORTE_DAEMON_TOP_CMD; - if (ORTE_SUCCESS != (ret = opal_dss.pack(&cmdbuf, &command, 1, ORTE_DAEMON_CMD))) { - ORTE_ERROR_LOG(ret); - goto cleanup; - } - - proc.jobid = ORTE_PROC_MY_NAME->jobid+1; /* only support initial launch at this time */ - - /* parse the rank list - this can be a comma-separated list of ranks, - * each element being either a single rank or a range. We also allow - * for a -1 to indicate all ranks. If not rank is given, we assume -1 - */ - if (NULL == ranks) { - /* take all ranks */ - proc.vpid = ORTE_VPID_WILDCARD; - if (ORTE_SUCCESS != (ret = opal_dss.pack(&cmdbuf, &proc, 1, ORTE_NAME))) { - ORTE_ERROR_LOG(ret); - goto cleanup; - } - goto SEND; - } - - /* split on commas */ - r1 = opal_argv_split(ranks, ','); - /* for each resulting element, check for range */ - for (i=0; i < opal_argv_count(r1); i++) { - r2 = opal_argv_split(r1[i], '-'); - if (1 < opal_argv_count(r2)) { - /* given range - get start and end */ - vstart = strtol(r2[0], NULL, 10); - vend = strtol(r2[1], NULL, 10); - } else { - /* check for wildcard - have to do this here because - * the -1 would have been caught in the split - */ - vint = strtol(r1[i], NULL, 10); - if (-1 == vint) { - proc.vpid = ORTE_VPID_WILDCARD; - if (ORTE_SUCCESS != (ret = opal_dss.pack(&cmdbuf, &proc, 1, ORTE_NAME))) { - ORTE_ERROR_LOG(ret); - goto cleanup; - } - opal_argv_free(r2); - goto SEND; - } - vstart = strtol(r2[0], NULL, 10); - vend = vstart + 1; - } - for (proc.vpid = vstart; proc.vpid < vend; proc.vpid++) { - if (ORTE_SUCCESS != (ret = opal_dss.pack(&cmdbuf, &proc, 1, ORTE_NAME))) { - ORTE_ERROR_LOG(ret); - goto cleanup; - } - } - opal_argv_free(r2); - } - -SEND: - if (NULL != r1) { - opal_argv_free(r1); - } - send_cmd(0, 0, NULL); - - /* now wait until the termination event fires */ - while (orte_event_base_active) { - opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE); - } - ORTE_ACQUIRE_OBJECT(orte_event_base_active); - - /*************** - * Cleanup - ***************/ -cleanup: - /* Remove the TERM and INT signal handlers */ - opal_event_signal_del(&term_handler); - opal_event_signal_del(&int_handler); - - while (NULL != (item = opal_list_remove_first(&recvd_stats))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&recvd_stats); - OBJ_DESTRUCT(&cmdbuf); - if (NULL != fp && fp != stdout) { - fclose(fp); - } - orte_finalize(); - - return ret; -} - -static void abort_exit_callback(int fd, short ign, void *arg) -{ - opal_list_item_t *item; - - /* Remove the TERM and INT signal handlers */ - opal_event_signal_del(&term_handler); - OBJ_DESTRUCT(&term_handler); - opal_event_signal_del(&int_handler); - OBJ_DESTRUCT(&int_handler); - - while (NULL != (item = opal_list_remove_first(&recvd_stats))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&recvd_stats); - OBJ_DESTRUCT(&cmdbuf); - if (NULL != fp && fp != stdout) { - fclose(fp); - } - ORTE_UPDATE_EXIT_STATUS(1); - orte_quit(0,0,NULL); -} - -static void recv_stats(int status, orte_process_name_t* sender, - opal_buffer_t *buffer, orte_rml_tag_t tag, - void* cbdata) -{ - int32_t n; - opal_pstats_t *stats; - orte_process_name_t proc; - int ret; - - /* if the sender is the HNP we contacted, this message - * contains info on the number of responses we should get - */ - if (sender->vpid == 0) { - n = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &num_replies, &n, OPAL_INT32))) { - ORTE_ERROR_LOG(ret); - goto cleanup; - } - n = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &sample_time, &n, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - goto cleanup; - } - } - - n = 1; - while (ORTE_SUCCESS == opal_dss.unpack(buffer, &proc, &n, ORTE_NAME)) { - n = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &stats, &n, OPAL_PSTAT))) { - ORTE_ERROR_LOG(ret); - goto cleanup; - } - /* if field sizes are not yet set, do so now */ - if (!fields_set) { - int tmp; - char *ctmp; - - tmp = strlen(stats->node); - if (nodefield < tmp) { - nodefield = tmp; - } - - opal_asprintf(&ctmp, "%d", stats->rank); - tmp = strlen(ctmp); - free(ctmp); - if (rankfield < tmp) { - rankfield = tmp; - } - - opal_asprintf(&ctmp, "%lu", (unsigned long)stats->pid); - tmp = strlen(ctmp); - free(ctmp); - if (pidfield < tmp) { - pidfield = tmp; - } - - tmp = strlen(stats->cmd); - if (cmdfield < tmp) { - cmdfield = tmp; - } - - if (0 <= stats->priority) { - pri_found = true; - opal_asprintf(&ctmp, "%d", stats->priority); - tmp = strlen(ctmp); - free(ctmp); - if (prifield < tmp) { - prifield = tmp; - } - } - - if (0 <= stats->num_threads) { - thr_found = true; - opal_asprintf(&ctmp, "%d", stats->num_threads); - tmp = strlen(ctmp); - free(ctmp); - if (thrfield < tmp) { - thrfield = tmp; - } - } - - if (0 < stats->vsize) { - vsize_found = true; - opal_asprintf(&ctmp, "%8.2f", stats->vsize); - tmp = strlen(ctmp); - free(ctmp); - if (vsizefield < tmp) { - vsizefield = tmp; - } - } - - if (0 < stats->rss) { - rss_found = true; - opal_asprintf(&ctmp, "%8.2f", stats->rss); - tmp = strlen(ctmp); - free(ctmp); - if (rssfield < tmp) { - rssfield = tmp; - } - } - - if (0 < stats->peak_vsize) { - pkv_found = true; - opal_asprintf(&ctmp, "%8.2f", stats->peak_vsize); - tmp = strlen(ctmp); - free(ctmp); - if (pkvfield < tmp) { - pkvfield = tmp; - } - } - - if (0 <= stats->processor) { - p_found = true; - opal_asprintf(&ctmp, "%d", stats->processor); - tmp = strlen(ctmp); - free(ctmp); - if (pfield < tmp) { - pfield = tmp; - } - } - } - /* add it to the list */ - opal_list_append(&recvd_stats, &stats->super); - } - - cleanup: - /* check for completion */ - num_recvd++; - if (num_replies <= num_recvd) { - /* flag that field sizes are set */ - fields_set = true; - - /* pretty-print what we got */ - pretty_print(); - - /* see if we want to do it again */ - if (0 < update_rate) { - ORTE_TIMER_EVENT(update_rate, 0, send_cmd, ORTE_SYS_PRI); - } else { - orte_finalize(); - exit(0); - } - } - - /* repost the receive */ - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_TOOL, - ORTE_RML_NON_PERSISTENT, recv_stats, NULL); -} - -/* static values needed for printing */ -static int lennode = 0; -static int lenrank = 0; -static int lenpid = 0; -static int lencmd = 0; -static int lenstate = 0; -static int lentime = 0; -static int lenpri = 0; -static int lenthr = 0; -static int lenvsize = 0; -static int lenrss = 0; -static int lenpkv = 0; -static int lensh = 0; -static int lenp = 0; - -static void print_ranks(opal_list_t *statlist) -{ - opal_list_item_t *item; - opal_pstats_t *stats, *pstats; - int32_t minrank; - char pretty_time[32]; - int i; - - /* sort the results by rank */ - while (0 < opal_list_get_size(statlist)) { - minrank = INT32_MAX; - pstats = NULL; - for (item = opal_list_get_first(statlist); - item != opal_list_get_end(statlist); - item = opal_list_get_next(item)) { - stats = (opal_pstats_t*)item; - if (stats->rank < minrank) { - pstats = stats; - minrank = stats->rank; - } - } - memset(pretty_time, 0, sizeof(pretty_time)); - if (pstats->time.tv_sec >= 3600) { - snprintf(pretty_time, sizeof(pretty_time), "%5.1fH", - (double)pstats->time.tv_sec / (double)(3600)); - } else { - snprintf(pretty_time, sizeof(pretty_time), "%3ld:%02ld", - (unsigned long)pstats->time.tv_sec/60, - (unsigned long)pstats->time.tv_sec % 60); - } - - if (bynode) { - /* print blanks in the nodename field */ - for (i=0; i < lennode; i++) { - fprintf(fp, " "); - } - fprintf(fp, " | "); - /* print fields */ - fprintf(fp, "%*d | ", lenrank, pstats->rank); - } else { - fprintf(fp, "%*d | ", lenrank, pstats->rank); - fprintf(fp, "%*s | ", lennode, pstats->node); - } - fprintf(fp, "%*s | ", lencmd, pstats->cmd); - fprintf(fp, "%*lu | ", lenpid, (unsigned long)pstats->pid); - fprintf(fp, "%*c | ", lenstate, pstats->state[0]); - fprintf(fp, "%*s | ", lentime, pretty_time); - if (pri_found) { - fprintf(fp, "%*d | ", lenpri, pstats->priority); - } - if (thr_found) { - fprintf(fp, "%*d | ", lenthr, pstats->num_threads); - } - if (vsize_found) { - fprintf(fp, "%*lu | ", lenvsize, (unsigned long)pstats->vsize); - } - if (rss_found) { - fprintf(fp, "%*lu | ", lenvsize, (unsigned long)pstats->rss); - } - if (pkv_found) { - fprintf(fp, "%*lu | ", lenpkv, (unsigned long)pstats->peak_vsize); - } - if (p_found) { - fprintf(fp, "%*d | ", lenp, pstats->processor); - } - fprintf(fp, "\n"); - num_lines++; - opal_list_remove_item(statlist, &pstats->super); - OBJ_RELEASE(pstats); - } -} - -static void pretty_print(void) -{ - opal_list_item_t *item, *next; - opal_pstats_t *stats; - opal_list_t tmplist; - char *node; - - if (bynode) { - if (need_header) { - print_headers(); - need_header = false; - } - if (timestamp) { - fprintf(fp, "TIMESTAMP: %s\n", sample_time); - } - if (NULL != sample_time) { - free(sample_time); - sample_time = NULL; - } - /* sort the results by node and then rank */ - while (NULL != (item = opal_list_remove_first(&recvd_stats))) { - OBJ_CONSTRUCT(&tmplist, opal_list_t); - stats = (opal_pstats_t*)item; - node = strdup(stats->node); - opal_list_append(&tmplist, &stats->super); - /* cycle through the rest of the list looking - * for matching nodes - */ - item = opal_list_get_first(&recvd_stats); - while (item != opal_list_get_end(&recvd_stats)) { - stats = (opal_pstats_t*)item; - next = opal_list_get_next(item); - if (0 == strcmp(stats->node, node)) { - opal_list_remove_item(&recvd_stats, item); - opal_list_append(&tmplist, &stats->super); - } - item = next; - } - fprintf(fp, "%*s\n", lennode, node); - free(node); - print_ranks(&tmplist); - OBJ_DESTRUCT(&tmplist); - } - } else { - if (need_header) { - print_headers(); - need_header = false; - } - if (timestamp) { - fprintf(fp, "\n\nTIMESTAMP: %s\n", sample_time); - } - if (NULL != sample_time) { - free(sample_time); - sample_time = NULL; - } - print_ranks(&recvd_stats); - } - - /* provide some separation between iterations */ - fprintf(fp, "\n"); - - /* if we have printed more than MAX_LINES since the last header, - * flag that we need to print the header next time - */ - if (MAX_LINES < num_lines) { - need_header = true; - num_lines = 0; - fprintf(fp, "\n\n"); - } -} - -static void print_headers(void) -{ - int num_fields = 0; - int i; - int linelen; - - lennode = strlen("Nodename"); - if (nodefield > lennode) { - lennode = nodefield; - } - num_fields++; - - lenrank = strlen("Rank"); - if (rankfield > lenrank) { - lenrank = rankfield; - } - num_fields++; - - lenpid = strlen("Pid"); - if (pidfield > lenpid) { - lenpid = pidfield; - } - num_fields++; - - lencmd = strlen("Command"); - if (cmdfield > lencmd) { - lencmd = cmdfield; - } - num_fields++; - - lenstate = strlen("State"); - num_fields++; - - lentime = strlen("Time"); - if (timefield > lentime) { - lentime = timefield; - } - num_fields++; - - if (pri_found) { - lenpri = strlen("Pri"); - if (prifield > lenpri) { - lenpri = prifield; - } - num_fields++; - } - - if (thr_found) { - lenthr = strlen("#threads"); - if (thrfield > lenthr) { - lenthr = thrfield; - } - num_fields++; - } - - if (vsize_found) { - lenvsize = strlen("Vsize"); - if (vsizefield > lenvsize) { - lenvsize = vsizefield; - } - num_fields++; - } - - if (rss_found) { - lenrss = strlen("RSS"); - if (rssfield > lenrss) { - lenrss = rssfield; - } - num_fields++; - } - - if (pkv_found) { - lenpkv = strlen("Peak Vsize"); - if (pkvfield > lenpkv) { - lenpkv = pkvfield; - } - num_fields++; - } - - if (p_found) { - lenp = strlen("Processor"); - if (pfield > lenp) { - lenp = pfield; - } - num_fields++; - } - - linelen = lennode + lenrank + lenpid + lencmd + lenstate + lentime + lenpri + lenthr + lenvsize + lenrss + lenpkv + lensh + lenp; - /* add spacing */ - linelen += num_fields * 3; - - /* print the rip line */ - for(i = 0; i < linelen; ++i) { - fprintf(fp, "="); - } - fprintf(fp, "\n"); - - /* print the header */ - if (bynode) { - fprintf(fp, "%*s | ", lennode , "Nodename"); - fprintf(fp, "%*s | ", lenrank , "Rank"); - } else { - fprintf(fp, "%*s | ", lenrank , "Rank"); - fprintf(fp, "%*s | ", lennode , "Nodename"); - } - fprintf(fp, "%*s | ", lencmd , "Command"); - fprintf(fp, "%*s | ", lenpid , "Pid"); - fprintf(fp, "%*s | ", lenstate , "State"); - fprintf(fp, "%*s | ", lentime , "Time"); - if (pri_found) { - fprintf(fp, "%*s | ", lenpri , "Pri"); - } - if (thr_found) { - fprintf(fp, "%*s | ", lenthr , "#threads"); - } - if (vsize_found) { - fprintf(fp, "%*s | ", lenvsize , "Vsize"); - } - if (rss_found) { - fprintf(fp, "%*s | ", lenrss , "RSS"); - } - if (pkv_found) { - fprintf(fp, "%*s | ", lenpkv , "Peak Vsize"); - } - if (p_found) { - fprintf(fp, "%*s | ", lenp , "Processor"); - } - fprintf(fp, "\n"); - - /* print the separator */ - for(i = 0; i < linelen; ++i) { - fprintf(fp, "-"); - } - fprintf(fp, "\n"); - -}