Remove stale defunct tools
Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
родитель
9c44e8c904
Коммит
05ac8fa71c
@ -25,9 +25,7 @@ AC_DEFUN([ORTE_CONFIG_FILES],[
|
||||
orte/tools/wrappers/Makefile
|
||||
orte/tools/wrappers/ortecc-wrapper-data.txt
|
||||
orte/tools/wrappers/orte.pc
|
||||
orte/tools/orte-ps/Makefile
|
||||
orte/tools/orte-clean/Makefile
|
||||
orte/tools/orte-top/Makefile
|
||||
orte/tools/orte-info/Makefile
|
||||
orte/tools/orte-server/Makefile
|
||||
])
|
||||
|
@ -28,7 +28,7 @@ libmca_rte_orte_la_SOURCES =$(sources) $(headers)
|
||||
libmca_rte_orte_la_LDFLAGS = -module -avoid-version
|
||||
libmca_rte_orte_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la
|
||||
|
||||
man_pages = mpirun.1 mpiexec.1 ompi-ps.1 ompi-clean.1 ompi-top.1 ompi-server.1
|
||||
man_pages = mpirun.1 mpiexec.1 ompi-clean.1 ompi-server.1
|
||||
|
||||
if OPAL_INSTALL_BINARIES
|
||||
nodist_man_MANS = $(man_pages)
|
||||
@ -36,17 +36,13 @@ nodist_man_MANS = $(man_pages)
|
||||
install-exec-hook:
|
||||
(cd $(DESTDIR)$(bindir); rm -f mpirun$(EXEEXT); $(LN_S) orterun$(EXEEXT) mpirun$(EXEEXT))
|
||||
(cd $(DESTDIR)$(bindir); rm -f mpiexec$(EXEEXT); $(LN_S) orterun$(EXEEXT) mpiexec$(EXEEXT))
|
||||
(cd $(DESTDIR)$(bindir); rm -f ompi-ps$(EXEEXT); $(LN_S) orte-ps$(EXEEXT) ompi-ps$(EXEEXT))
|
||||
(cd $(DESTDIR)$(bindir); rm -f ompi-clean$(EXEEXT); $(LN_S) orte-clean$(EXEEXT) ompi-clean$(EXEEXT))
|
||||
(cd $(DESTDIR)$(bindir); rm -f ompi-top$(EXEEXT); $(LN_S) orte-top$(EXEEXT) ompi-top$(EXEEXT))
|
||||
(cd $(DESTDIR)$(bindir); rm -f ompi-server$(EXEEXT); $(LN_S) orte-server$(EXEEXT) ompi-server$(EXEEXT))
|
||||
|
||||
uninstall-local:
|
||||
rm -f $(DESTDIR)$(bindir)/mpirun$(EXEEXT) \
|
||||
$(DESTDIR)$(bindir)/mpiexec$(EXEEXT) \
|
||||
$(DESTDIR)$(bindir)/ompi-ps$(EXEEXT) \
|
||||
$(DESTDIR)$(bindir)/ompi-clean$(EXEEXT) \
|
||||
$(DESTDIR)$(bindir)/ompi-top$(EXEEXT) \
|
||||
$(DESTDIR)$(bindir)/ompi-server$(EXEEXT)
|
||||
|
||||
endif # OPAL_INSTALL_BINARIES
|
||||
@ -60,24 +56,12 @@ mpirun.1: $(top_builddir)/orte/tools/orterun/orterun.1
|
||||
mpiexec.1: $(top_builddir)/orte/tools/orterun/orterun.1
|
||||
cp -f $(top_builddir)/orte/tools/orterun/orterun.1 mpiexec.1
|
||||
|
||||
$(top_builddir)/orte/tools/orte-ps/orte-ps.1:
|
||||
(cd $(top_builddir)/orte/tools/orte-ps && $(MAKE) $(AM_MAKEFLAGS) orte-ps.1)
|
||||
|
||||
ompi-ps.1: $(top_builddir)/orte/tools/orte-ps/orte-ps.1
|
||||
cp -f $(top_builddir)/orte/tools/orte-ps/orte-ps.1 ompi-ps.1
|
||||
|
||||
$(top_builddir)/orte/tools/orte-clean/orte-clean.1:
|
||||
(cd $(top_builddir)/orte/tools/orte-clean && $(MAKE) $(AM_MAKEFLAGS) orte-clean.1)
|
||||
|
||||
ompi-clean.1: $(top_builddir)/orte/tools/orte-clean/orte-clean.1
|
||||
cp -f $(top_builddir)/orte/tools/orte-clean/orte-clean.1 ompi-clean.1
|
||||
|
||||
$(top_builddir)/orte/tools/orte-top/orte-top.1:
|
||||
(cd $(top_builddir)/orte/tools/orte-top && $(MAKE) $(AM_MAKEFLAGS) orte-top.1)
|
||||
|
||||
ompi-top.1: $(top_builddir)/orte/tools/orte-top/orte-top.1
|
||||
cp -f $(top_builddir)/orte/tools/orte-top/orte-top.1 ompi-top.1
|
||||
|
||||
$(top_builddir)/orte/tools/orte-server/orte-server.1:
|
||||
(cd $(top_builddir)/orte/tools/orte-server && $(MAKE) $(AM_MAKEFLAGS) orte-server.1)
|
||||
|
||||
|
@ -26,20 +26,16 @@
|
||||
|
||||
SUBDIRS += \
|
||||
tools/orte-clean \
|
||||
tools/orte-ps \
|
||||
tools/orted \
|
||||
tools/orterun \
|
||||
tools/wrappers \
|
||||
tools/orte-top \
|
||||
tools/orte-info \
|
||||
tools/orte-server
|
||||
|
||||
DIST_SUBDIRS += \
|
||||
tools/orte-clean \
|
||||
tools/orte-ps \
|
||||
tools/orted \
|
||||
tools/orterun \
|
||||
tools/wrappers \
|
||||
tools/orte-top \
|
||||
tools/orte-info \
|
||||
tools/orte-server
|
||||
|
@ -1,47 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
include $(top_srcdir)/Makefile.ompi-rules
|
||||
|
||||
man_pages = orte-ps.1
|
||||
EXTRA_DIST = $(man_pages:.1=.1in)
|
||||
|
||||
if OPAL_INSTALL_BINARIES
|
||||
|
||||
bin_PROGRAMS = orte-ps
|
||||
|
||||
nodist_man_MANS = $(man_pages)
|
||||
|
||||
# Ensure that the man pages are rebuilt if the opal_config.h file
|
||||
# changes; a "good enough" way to know if configure was run again (and
|
||||
# therefore the release date or version may have changed)
|
||||
$(nodist_man_MANS): $(top_builddir)/opal/include/opal_config.h
|
||||
|
||||
dist_ortedata_DATA = help-orte-ps.txt
|
||||
|
||||
endif # OPAL_INSTALL_BINARIES
|
||||
|
||||
orte_ps_SOURCES = orte-ps.c
|
||||
orte_ps_LDADD = \
|
||||
$(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la \
|
||||
$(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la
|
||||
|
||||
distclean-local:
|
||||
rm -f $(man_pages)
|
@ -1,46 +0,0 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2012 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
# This is the US/English help file for Open MPI PS tool
|
||||
#
|
||||
[usage]
|
||||
ompi-ps [OPTIONS]
|
||||
Open MPI Job and Process Status Tool
|
||||
|
||||
%s
|
||||
#
|
||||
[vpid-usage]
|
||||
Error: You specified a vpid (%d) without also specifying a jobid.
|
||||
Use the '-j' option to specify a jobid.
|
||||
#
|
||||
[need-vpid]
|
||||
Error: You specified a jobid (%d) without also specifying a vpid.
|
||||
Use the '-p' option to specify a vpid.
|
||||
#
|
||||
[invalid-vpid]
|
||||
Error: The specified vpid (%d) is not valid for job %d.
|
||||
#
|
||||
[stale-hnp]
|
||||
An attempt was made to obtain ps information from at least
|
||||
one non-responsive HNP:
|
||||
|
||||
HNP name: %s
|
||||
|
||||
You may want to cleanup stale session directories in your temporary
|
||||
directory (e.g., $TMPDIR).
|
@ -1,101 +0,0 @@
|
||||
.\"
|
||||
.\" Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
.\" University Research and Technology
|
||||
.\" Corporation. All rights reserved.
|
||||
.\" Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved.
|
||||
.\"
|
||||
.\" Man page for OMPI's ompi-ps command
|
||||
.\"
|
||||
.\" .TH name section center-footer left-footer center-header
|
||||
.TH OMPI-PS 1 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
|
||||
.\" **************************
|
||||
.\" Name Section
|
||||
.\" **************************
|
||||
.SH NAME
|
||||
.
|
||||
ompi-ps, orte-ps \- Displays information about the active jobs and processes
|
||||
in Open MPI.
|
||||
.
|
||||
.PP
|
||||
.
|
||||
\fBNOTE:\fP \fIompi-ps\fP, and \fIorte-ps\fP are exact
|
||||
synonyms for each other. Using any of the names will result in exactly
|
||||
identical behavior.
|
||||
.
|
||||
.\" **************************
|
||||
.\" Synopsis Section
|
||||
.\" **************************
|
||||
.SH SYNOPSIS
|
||||
.
|
||||
.B ompi-ps
|
||||
.B [ options ]
|
||||
.
|
||||
.\" **************************
|
||||
.\" Options Section
|
||||
.\" **************************
|
||||
.SH Options
|
||||
.
|
||||
\fIompi-ps\fR will display information about running job(s) in the current
|
||||
universe.
|
||||
.
|
||||
.TP 10
|
||||
.B -h | --help
|
||||
Display help for this command
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B -v | --verbose
|
||||
Enable verbose output for debugging
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B --daemons
|
||||
Display daemon job information.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B -j | --jobid
|
||||
Display the state of a specific job in the universe. By default all jobs will
|
||||
be displayed.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B -p | --vpid
|
||||
Display the state of a specific vpid (process) in the universe. By default all
|
||||
vpids cooresponding to processes will be displayed. Must be used in conjunction
|
||||
with the \fB--jobid\fP option.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B -n | --nodes
|
||||
Display all of the allocated nodes, and their cooresponding states. By default
|
||||
this is disabled.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B -gmca | --gmca \fR<key> <value>\fP
|
||||
Pass global MCA parameters that are applicable to all contexts. \fI<key>\fP is
|
||||
the parameter name; \fI<value>\fP is the parameter value.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B -mca | --mca <key> <value>
|
||||
Send arguments to various MCA modules.
|
||||
.
|
||||
.
|
||||
.\" **************************
|
||||
.\" Description Section
|
||||
.\" **************************
|
||||
.SH DESCRIPTION
|
||||
.
|
||||
.PP
|
||||
\fIompi-ps\fR displays the state of jobs running inside an Open RTE universe.
|
||||
.
|
||||
.
|
||||
.\" **************************
|
||||
.\" See Also Section
|
||||
.\" **************************
|
||||
.
|
||||
.SH SEE ALSO
|
||||
orterun(1), orte-clean(1)
|
||||
.
|
@ -1,986 +0,0 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/**
|
||||
* @fie
|
||||
* ORTE PS command
|
||||
*
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif /* HAVE_UNISTD_H */
|
||||
#include <stdlib.h>
|
||||
#ifdef HAVE_SYS_STAT_H
|
||||
#include <sys/stat.h>
|
||||
#endif /* HAVE_SYS_STAT_H */
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif /* HAVE_SYS_TYPES_H */
|
||||
#ifdef HAVE_SYS_WAIT_H
|
||||
#include <sys/wait.h>
|
||||
#endif /* HAVE_SYS_WAIT_H */
|
||||
#include <string.h>
|
||||
#ifdef HAVE_DIRENT_H
|
||||
#include <dirent.h>
|
||||
#endif /* HAVE_DIRENT_H */
|
||||
|
||||
#include "opal/util/basename.h"
|
||||
#include "opal/util/cmd_line.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/opal_environ.h"
|
||||
#include "opal/util/printf.h"
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/runtime/opal.h"
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
#include "opal/runtime/opal_cr.h"
|
||||
#endif
|
||||
|
||||
#include "orte/runtime/runtime.h"
|
||||
#include "orte/util/error_strings.h"
|
||||
#include "orte/util/hnp_contact.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/util/comm/comm.h"
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
#include "orte/mca/snapc/base/base.h"
|
||||
#endif
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
struct orte_ps_mpirun_info_t {
|
||||
/** This is an object, so it must have a super */
|
||||
opal_list_item_t super;
|
||||
|
||||
/* HNP info */
|
||||
orte_hnp_contact_t *hnp;
|
||||
|
||||
/* array of jobs */
|
||||
orte_std_cntr_t num_jobs;
|
||||
orte_job_t **jobs;
|
||||
|
||||
/* array of nodes */
|
||||
orte_std_cntr_t num_nodes;
|
||||
orte_node_t **nodes;
|
||||
};
|
||||
typedef struct orte_ps_mpirun_info_t orte_ps_mpirun_info_t;
|
||||
|
||||
static void orte_ps_mpirun_info_construct(orte_ps_mpirun_info_t *ptr)
|
||||
{
|
||||
ptr->hnp = NULL;
|
||||
ptr->num_jobs = 0;
|
||||
ptr->jobs = NULL;
|
||||
ptr->num_nodes = 0;
|
||||
ptr->nodes = NULL;
|
||||
}
|
||||
static void orte_ps_mpirun_info_destruct(orte_ps_mpirun_info_t *ptr)
|
||||
{
|
||||
orte_std_cntr_t i;
|
||||
|
||||
if (NULL != ptr->hnp) OBJ_RELEASE(ptr->hnp);
|
||||
if (NULL != ptr->jobs) {
|
||||
for (i=0; i < ptr->num_jobs; i++) {
|
||||
OBJ_RELEASE(ptr->jobs[i]);
|
||||
}
|
||||
free(ptr->jobs);
|
||||
}
|
||||
if (NULL != ptr->nodes) {
|
||||
for (i=0; i < ptr->num_nodes; i++) {
|
||||
OBJ_RELEASE(ptr->nodes[i]);
|
||||
}
|
||||
free(ptr->nodes);
|
||||
}
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(orte_ps_mpirun_info_t,
|
||||
opal_list_item_t,
|
||||
orte_ps_mpirun_info_construct,
|
||||
orte_ps_mpirun_info_destruct);
|
||||
|
||||
/******************
|
||||
* Local Functions
|
||||
******************/
|
||||
static int orte_ps_init(int argc, char *argv[]);
|
||||
static int parse_args(int argc, char *argv[]);
|
||||
|
||||
static int gather_information(orte_ps_mpirun_info_t *hnpinfo);
|
||||
static int gather_active_jobs(orte_ps_mpirun_info_t *hnpinfo);
|
||||
static int gather_nodes(orte_ps_mpirun_info_t *hnpinfo);
|
||||
static int gather_vpid_info(orte_ps_mpirun_info_t *hnpinfo);
|
||||
|
||||
static int pretty_print(orte_ps_mpirun_info_t *hnpinfo);
|
||||
static int pretty_print_nodes(orte_node_t **nodes, orte_std_cntr_t num_nodes);
|
||||
static int pretty_print_jobs(orte_job_t **jobs, orte_std_cntr_t num_jobs);
|
||||
static int pretty_print_vpids(orte_job_t *job);
|
||||
static void pretty_print_dashed_line(int len);
|
||||
|
||||
static char *pretty_node_state(orte_node_state_t state);
|
||||
|
||||
static int parseable_print(orte_ps_mpirun_info_t *hnpinfo);
|
||||
|
||||
/*****************************************
|
||||
* Global Vars for Command line Arguments
|
||||
*****************************************/
|
||||
typedef struct {
|
||||
bool help;
|
||||
bool verbose;
|
||||
bool parseable;
|
||||
orte_jobid_t jobid;
|
||||
bool nodes;
|
||||
bool daemons;
|
||||
int output;
|
||||
pid_t pid;
|
||||
} orte_ps_globals_t;
|
||||
|
||||
orte_ps_globals_t orte_ps_globals = {0};
|
||||
|
||||
opal_cmd_line_init_t cmd_line_opts[] = {
|
||||
{ NULL,
|
||||
'h', NULL, "help",
|
||||
0,
|
||||
&orte_ps_globals.help, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"This help message" },
|
||||
|
||||
{ NULL,
|
||||
'v', NULL, "verbose",
|
||||
0,
|
||||
&orte_ps_globals.verbose, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Be Verbose" },
|
||||
|
||||
{ NULL,
|
||||
'\0', NULL, "parseable",
|
||||
0,
|
||||
&orte_ps_globals.parseable, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Provide parseable output" },
|
||||
|
||||
{ NULL,
|
||||
'\0', NULL, "daemons",
|
||||
0,
|
||||
&orte_ps_globals.daemons, OPAL_CMD_LINE_TYPE_INT,
|
||||
"Display daemon job information" },
|
||||
|
||||
{ NULL,
|
||||
'j', NULL, "jobid",
|
||||
1,
|
||||
&orte_ps_globals.jobid, OPAL_CMD_LINE_TYPE_INT,
|
||||
"Specify a local jobid for the given mpirun - a value from 0 to N" },
|
||||
|
||||
{ NULL,
|
||||
'p', NULL, "pid",
|
||||
1,
|
||||
&orte_ps_globals.pid, OPAL_CMD_LINE_TYPE_INT,
|
||||
"Specify mpirun pid" },
|
||||
|
||||
{ NULL,
|
||||
'n', NULL, "nodes",
|
||||
0,
|
||||
&orte_ps_globals.nodes, OPAL_CMD_LINE_TYPE_INT,
|
||||
"Display Node Information" },
|
||||
|
||||
/* End of list */
|
||||
{ NULL,
|
||||
'\0', NULL, NULL,
|
||||
0,
|
||||
NULL, OPAL_CMD_LINE_TYPE_NULL,
|
||||
NULL }
|
||||
};
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
int ret, exit_status = ORTE_SUCCESS;
|
||||
opal_list_t hnp_list;
|
||||
opal_list_item_t* item = NULL;
|
||||
orte_ps_mpirun_info_t hnpinfo;
|
||||
bool reported = false;
|
||||
|
||||
/***************
|
||||
* Initialize
|
||||
***************/
|
||||
OBJ_CONSTRUCT(&hnp_list, opal_list_t);
|
||||
|
||||
if (ORTE_SUCCESS != (ret = orte_ps_init(argc, argv))) {
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the directory listing
|
||||
*/
|
||||
opal_output_verbose(10, orte_ps_globals.output,
|
||||
"orte_ps: Acquiring list of HNPs and setting contact info into RML...\n");
|
||||
|
||||
if (ORTE_SUCCESS != (ret = orte_list_local_hnps(&hnp_list, true) ) ) {
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
opal_output_verbose(10, orte_ps_globals.output,
|
||||
"orte_ps: Found %d HNPs\n",
|
||||
(int)opal_list_get_size(&hnp_list));
|
||||
|
||||
/*
|
||||
* For each hnp in the listing
|
||||
*/
|
||||
while (NULL != (item = opal_list_remove_first(&hnp_list))) {
|
||||
orte_hnp_contact_t *hnp = (orte_hnp_contact_t*)item;
|
||||
hnpinfo.hnp = hnp;
|
||||
|
||||
opal_output_verbose(10, orte_ps_globals.output,
|
||||
"orte_ps: Processing HNP %lu\n",
|
||||
(unsigned long)hnpinfo.hnp->pid);
|
||||
|
||||
if (0 < orte_ps_globals.pid &&
|
||||
hnpinfo.hnp->pid != orte_ps_globals.pid) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Gather the information
|
||||
*/
|
||||
opal_output_verbose(10, orte_ps_globals.output,
|
||||
"orte_ps: Gathering Information for HNP: %s:%d\n",
|
||||
ORTE_NAME_PRINT(&(hnpinfo.hnp->name)),
|
||||
hnpinfo.hnp->pid);
|
||||
|
||||
if( ORTE_SUCCESS != (ret = gather_information(&hnpinfo)) ) {
|
||||
/* this could be due to a stale session directory - if so,
|
||||
* just skip this entry, but don't abort
|
||||
*/
|
||||
if (!reported && ORTE_ERR_SILENT == ret) {
|
||||
orte_show_help("help-orte-ps.txt", "stale-hnp", true,
|
||||
ORTE_NAME_PRINT(&(hnpinfo.hnp->name)));
|
||||
reported = true;
|
||||
continue;
|
||||
}
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* Print the information */
|
||||
if (orte_ps_globals.parseable) {
|
||||
if (ORTE_SUCCESS != (ret = parseable_print(&hnpinfo))) {
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
}
|
||||
} else {
|
||||
if(ORTE_SUCCESS != (ret = pretty_print(&hnpinfo)) ) {
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/***************
|
||||
* Cleanup
|
||||
***************/
|
||||
cleanup:
|
||||
orte_finalize();
|
||||
|
||||
return exit_status;
|
||||
}
|
||||
|
||||
static int parse_args(int argc, char *argv[]) {
|
||||
int ret;
|
||||
opal_cmd_line_t cmd_line;
|
||||
orte_ps_globals_t tmp = { false, /* help */
|
||||
false, /* verbose */
|
||||
false, /* parseable */
|
||||
ORTE_JOBID_WILDCARD, /* jobid */
|
||||
false, /* nodes */
|
||||
false, /* daemons */
|
||||
-1, /* output */
|
||||
0}; /* pid */
|
||||
|
||||
orte_ps_globals = tmp;
|
||||
|
||||
/* Parse the command line options */
|
||||
opal_cmd_line_create(&cmd_line, cmd_line_opts);
|
||||
|
||||
mca_base_open();
|
||||
mca_base_cmd_line_setup(&cmd_line);
|
||||
ret = opal_cmd_line_parse(&cmd_line, false, false, argc, argv);
|
||||
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
if (OPAL_ERR_SILENT != ret) {
|
||||
fprintf(stderr, "%s: command line error (%s)\n", argv[0],
|
||||
opal_strerror(ret));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Now start parsing our specific arguments
|
||||
*/
|
||||
if (orte_ps_globals.help) {
|
||||
char *str, *args = NULL;
|
||||
args = opal_cmd_line_get_usage_msg(&cmd_line);
|
||||
str = opal_show_help_string("help-orte-ps.txt", "usage", true,
|
||||
args);
|
||||
if (NULL != str) {
|
||||
printf("%s", str);
|
||||
free(str);
|
||||
}
|
||||
free(args);
|
||||
/* If we show the help message, that should be all we do */
|
||||
exit(0);
|
||||
}
|
||||
|
||||
/* if the jobid is given, then we need a pid */
|
||||
if (ORTE_JOBID_WILDCARD != orte_ps_globals.jobid &&
|
||||
0 == orte_ps_globals.pid) {
|
||||
orte_show_help("help-orte-ps.txt", "need-vpid", true,
|
||||
orte_ps_globals.jobid);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int orte_ps_init(int argc, char *argv[]) {
|
||||
int ret;
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
char * tmp_env_var = NULL;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Make sure to init util before parse_args
|
||||
* to ensure installdirs is setup properly
|
||||
* before calling mca_base_open();
|
||||
*/
|
||||
if( ORTE_SUCCESS != (ret = opal_init_util(&argc, &argv)) ) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Parse Command Line Arguments
|
||||
*/
|
||||
if (ORTE_SUCCESS != (ret = parse_args(argc, argv))) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup OPAL Output handle from the verbose argument
|
||||
*/
|
||||
if( orte_ps_globals.verbose ) {
|
||||
orte_ps_globals.output = opal_output_open(NULL);
|
||||
opal_output_set_verbosity(orte_ps_globals.output, 10);
|
||||
} else {
|
||||
orte_ps_globals.output = 0; /* Default=STDERR */
|
||||
}
|
||||
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
/* Disable the checkpoint notification routine for this
|
||||
* tool. As we will never need to checkpoint this tool.
|
||||
* Note: This must happen before opal_init().
|
||||
*/
|
||||
opal_cr_set_enabled(false);
|
||||
|
||||
/* Select the none component, since we don't actually use a checkpointer */
|
||||
(void) mca_base_var_env_name("crs", &tmp_env_var);
|
||||
opal_setenv(tmp_env_var,
|
||||
"none",
|
||||
true, &environ);
|
||||
free(tmp_env_var);
|
||||
tmp_env_var = NULL;
|
||||
|
||||
(void) mca_base_var_env_name("opal_cr_is_tool", &tmp_env_var);
|
||||
opal_setenv(tmp_env_var,
|
||||
"1",
|
||||
true, &environ);
|
||||
free(tmp_env_var);
|
||||
#endif
|
||||
|
||||
/* we are never allowed to operate as a distributed tool,
|
||||
* so insist on the ess/tool component */
|
||||
opal_setenv("OMPI_MCA_ess", "tool", true, &environ);
|
||||
|
||||
/***************************
|
||||
* We need all of OPAL and the TOOL portion of ORTE
|
||||
***************************/
|
||||
ret = orte_init(&argc, &argv, ORTE_PROC_TOOL);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int pretty_print(orte_ps_mpirun_info_t *hnpinfo) {
|
||||
char *header;
|
||||
int len_hdr;
|
||||
|
||||
/*
|
||||
* Print header and remember header length
|
||||
*/
|
||||
len_hdr = opal_asprintf(&header, "Information from mpirun %s", ORTE_JOBID_PRINT(hnpinfo->hnp->name.jobid));
|
||||
|
||||
printf("\n\n%s\n", header);
|
||||
free(header);
|
||||
pretty_print_dashed_line(len_hdr);
|
||||
|
||||
/*
|
||||
* Print Node Information
|
||||
*/
|
||||
if( orte_ps_globals.nodes )
|
||||
pretty_print_nodes(hnpinfo->nodes, hnpinfo->num_nodes);
|
||||
|
||||
/*
|
||||
* Print Job Information
|
||||
*/
|
||||
pretty_print_jobs(hnpinfo->jobs, hnpinfo->num_jobs);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int pretty_print_nodes(orte_node_t **nodes, orte_std_cntr_t num_nodes) {
|
||||
int line_len;
|
||||
int len_name = 0,
|
||||
len_state = 0,
|
||||
len_slots = 0,
|
||||
len_slots_i = 0,
|
||||
len_slots_m = 0;
|
||||
orte_node_t *node;
|
||||
orte_std_cntr_t i;
|
||||
|
||||
/*
|
||||
* Caculate segment lengths
|
||||
*/
|
||||
len_name = (int) strlen("Node Name");
|
||||
len_state = (int) strlen("State");
|
||||
len_slots = (int) strlen("Slots");
|
||||
len_slots_i = (int) strlen("Slots In Use");
|
||||
len_slots_m = (int) strlen("Slots Max");
|
||||
|
||||
for(i=0; i < num_nodes; i++) {
|
||||
node = nodes[i];
|
||||
|
||||
if( NULL != node->name &&
|
||||
(int)strlen(node->name) > len_name)
|
||||
len_name = (int) strlen(node->name);
|
||||
|
||||
if( (int)strlen(pretty_node_state(node->state)) > len_state )
|
||||
len_state = (int)strlen(pretty_node_state(node->state));
|
||||
}
|
||||
|
||||
line_len = (len_name + 3 +
|
||||
len_state + 3 +
|
||||
len_slots + 3 +
|
||||
len_slots_i + 3 +
|
||||
len_slots_m) + 2;
|
||||
|
||||
/*
|
||||
* Print the header
|
||||
*/
|
||||
printf("%*s | ", len_name, "Node Name");
|
||||
printf("%*s | ", len_state, "State");
|
||||
printf("%*s | ", len_slots, "Slots");
|
||||
printf("%*s | ", len_slots_m, "Slots Max");
|
||||
printf("%*s | ", len_slots_i, "Slots In Use");
|
||||
printf("\n");
|
||||
|
||||
pretty_print_dashed_line(line_len);
|
||||
|
||||
/*
|
||||
* Print Info
|
||||
*/
|
||||
for(i=0; i < num_nodes; i++) {
|
||||
node = nodes[i];
|
||||
|
||||
printf("%*s | ", len_name, node->name);
|
||||
printf("%*s | ", len_state, pretty_node_state(node->state));
|
||||
printf("%*d | ", len_slots, (uint)node->slots);
|
||||
printf("%*d | ", len_slots_m, (uint)node->slots_max);
|
||||
printf("%*d | ", len_slots_i, (uint)node->slots_inuse);
|
||||
printf("\n");
|
||||
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int pretty_print_jobs(orte_job_t **jobs, orte_std_cntr_t num_jobs) {
|
||||
int len_jobid = 0,
|
||||
len_state = 0,
|
||||
len_slots = 0,
|
||||
len_vpid_r = 0,
|
||||
len_ckpt_s = 0,
|
||||
len_ckpt_r = 0,
|
||||
len_ckpt_l = 0;
|
||||
int line_len;
|
||||
orte_job_t *job;
|
||||
orte_std_cntr_t i;
|
||||
char *jobstr;
|
||||
orte_jobid_t mask=0x0000ffff;
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
char * state_str = NULL;
|
||||
size_t ckpt_state;
|
||||
char *snap_ref = NULL;
|
||||
char *snap_loc = NULL;
|
||||
#endif
|
||||
|
||||
for(i=0; i < num_jobs; i++) {
|
||||
job = jobs[i];
|
||||
|
||||
/* check the jobid to see if this is the daemons' job */
|
||||
if ((0 == (mask & job->jobid)) && !orte_ps_globals.daemons) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* setup the printed name - do -not- free this! */
|
||||
jobstr = ORTE_JOBID_PRINT(job->jobid);
|
||||
|
||||
/*
|
||||
* Caculate segment lengths
|
||||
*/
|
||||
len_jobid = strlen(jobstr);;
|
||||
len_state = (int) (strlen(orte_job_state_to_str(job->state)) < strlen("State") ?
|
||||
strlen("State") :
|
||||
strlen(orte_job_state_to_str(job->state)));
|
||||
len_slots = 6;
|
||||
len_vpid_r = (int) strlen("Num Procs");
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
orte_get_attribute(&job->attributes, ORTE_JOB_CKPT_STATE, (void**)&ckpt_state, OPAL_INT32);
|
||||
orte_get_attribute(&job->attributes, ORTE_JOB_SNAPSHOT_REF, (void**)&snap_ref, OPAL_STRING);
|
||||
orte_get_attribute(&job->attributes, ORTE_JOB_SNAPSHOT_LOC, (void**)&snap_loc, OPAL_STRING);
|
||||
orte_snapc_ckpt_state_str(&state_str, ckpt_state);
|
||||
len_ckpt_s = (int) (strlen(state_str) < strlen("Ckpt State") ?
|
||||
strlen("Ckpt State") :
|
||||
strlen(state_str) );
|
||||
len_ckpt_r = (int) (NULL == snap_ref ? strlen("Ckpt Ref") : (strlen(snap_ref) < strlen("Ckpt Ref") ?
|
||||
strlen("Ckpt Ref") : strlen(snap_ref)));
|
||||
len_ckpt_l = (int) (NULL == snap_loc ? strlen("Ckpt Loc") : (strlen(snap_loc) < strlen("Ckpt Loc") ?
|
||||
strlen("Ckpt Loc") : strlen(snap_loc)));
|
||||
#else
|
||||
len_ckpt_s = -3;
|
||||
len_ckpt_r = -3;
|
||||
len_ckpt_l = -3;
|
||||
#endif
|
||||
|
||||
line_len = (len_jobid + 3 +
|
||||
len_state + 3 +
|
||||
len_slots + 3 +
|
||||
len_vpid_r + 3 +
|
||||
len_ckpt_s + 3 +
|
||||
len_ckpt_r + 3 +
|
||||
len_ckpt_l)
|
||||
+ 2;
|
||||
|
||||
/*
|
||||
* Print Header
|
||||
*/
|
||||
printf("\n");
|
||||
printf("%*s | ", len_jobid , "JobID");
|
||||
printf("%*s | ", len_state , "State");
|
||||
printf("%*s | ", len_slots , "Slots");
|
||||
printf("%*s | ", len_vpid_r , "Num Procs");
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
printf("%*s | ", len_ckpt_s , "Ckpt State");
|
||||
printf("%*s | ", len_ckpt_r , "Ckpt Ref");
|
||||
printf("%*s |", len_ckpt_l , "Ckpt Loc");
|
||||
#endif
|
||||
printf("\n");
|
||||
|
||||
pretty_print_dashed_line(line_len);
|
||||
|
||||
/*
|
||||
* Print Info
|
||||
*/
|
||||
printf("%*s | ", len_jobid , ORTE_JOBID_PRINT(job->jobid));
|
||||
printf("%*s | ", len_state , orte_job_state_to_str(job->state));
|
||||
printf("%*d | ", len_slots , (uint)job->total_slots_alloc);
|
||||
printf("%*d | ", len_vpid_r, job->num_procs);
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
printf("%*s | ", len_ckpt_s, state_str);
|
||||
printf("%*s | ", len_ckpt_r, (NULL == snap_ref ? "" : snap_ref));
|
||||
printf("%*s |", len_ckpt_l, (NULL == snap_loc ? "" : snap_loc));
|
||||
#endif
|
||||
printf("\n");
|
||||
|
||||
|
||||
pretty_print_vpids(job);
|
||||
printf("\n\n"); /* give a little room between job outputs */
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int pretty_print_vpids(orte_job_t *job) {
|
||||
int len_o_proc_name = 0,
|
||||
len_proc_name = 0,
|
||||
len_rank = 0,
|
||||
len_pid = 0,
|
||||
len_state = 0,
|
||||
len_node = 0,
|
||||
len_ckpt_s = 0,
|
||||
len_ckpt_r = 0,
|
||||
len_ckpt_l = 0;
|
||||
int i, line_len;
|
||||
orte_vpid_t v;
|
||||
orte_proc_t *vpid;
|
||||
orte_app_context_t *app;
|
||||
char *o_proc_name;
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
char *state_str = NULL;
|
||||
size_t ckpt_state;
|
||||
char *snap_ref = NULL;
|
||||
char *snap_loc = NULL;
|
||||
#endif
|
||||
char **nodename = NULL;
|
||||
|
||||
if (0 == job->num_procs) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Caculate segment lengths
|
||||
*/
|
||||
len_o_proc_name = (int)strlen("ORTE Name");
|
||||
len_proc_name = (int)strlen("Process Name");
|
||||
len_rank = (int)strlen("Local Rank");
|
||||
len_pid = 6;
|
||||
len_state = 0;
|
||||
len_node = 0;
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
len_ckpt_s = strlen("Ckpt State");
|
||||
len_ckpt_r = strlen("Ckpt Ref");
|
||||
len_ckpt_l = strlen("Ckpt Loc");
|
||||
#else
|
||||
len_ckpt_s = -3;
|
||||
len_ckpt_r = -3;
|
||||
len_ckpt_l = -3;
|
||||
#endif
|
||||
|
||||
nodename = (char **) malloc(job->num_procs * sizeof(char *));
|
||||
for(v=0; v < job->num_procs; v++) {
|
||||
char *rankstr;
|
||||
vpid = (orte_proc_t*)job->procs->addr[v];
|
||||
|
||||
/*
|
||||
* Find my app context
|
||||
*/
|
||||
if( 0 >= (int)job->num_apps ) {
|
||||
if( 0 == vpid->name.vpid ) {
|
||||
if( (int)strlen("orterun") > len_proc_name)
|
||||
len_proc_name = strlen("orterun");
|
||||
}
|
||||
else {
|
||||
if( (int)strlen("orted") > len_proc_name)
|
||||
len_proc_name = strlen("orted");
|
||||
}
|
||||
}
|
||||
for( i = 0; i < (int)job->num_apps; ++i) {
|
||||
app = (orte_app_context_t*)job->apps->addr[i];
|
||||
if( app->idx == vpid->app_idx ) {
|
||||
if( (int)strlen(app->app) > len_proc_name)
|
||||
len_proc_name = strlen(app->app);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
o_proc_name = orte_util_print_name_args(&vpid->name);
|
||||
if ((int)strlen(o_proc_name) > len_o_proc_name)
|
||||
len_o_proc_name = strlen(o_proc_name);
|
||||
|
||||
opal_asprintf(&rankstr, "%u", (uint)vpid->local_rank);
|
||||
if ((int)strlen(rankstr) > len_rank)
|
||||
len_rank = strlen(rankstr);
|
||||
free(rankstr);
|
||||
|
||||
nodename[v] = NULL;
|
||||
if( orte_get_attribute(&vpid->attributes, ORTE_PROC_NODENAME, (void**)&nodename[v], OPAL_STRING) &&
|
||||
(int)strlen(nodename[v]) > len_node) {
|
||||
len_node = strlen(nodename[v]);
|
||||
} else if ((int)strlen("Unknown") > len_node) {
|
||||
len_node = strlen("Unknown");
|
||||
}
|
||||
|
||||
if( (int)strlen(orte_proc_state_to_str(vpid->state)) > len_state)
|
||||
len_state = strlen(orte_proc_state_to_str(vpid->state));
|
||||
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
orte_get_attribute(&vpid->attributes, ORTE_PROC_CKPT_STATE, (void**)&ckpt_state, OPAL_INT32);
|
||||
orte_get_attribute(&vpid->attributes, ORTE_PROC_SNAPSHOT_REF, (void**)&snap_ref, OPAL_STRING);
|
||||
orte_get_attribute(&vpid->attributes, ORTE_PROC_SNAPSHOT_LOC, (void**)&snap_loc, OPAL_STRING);
|
||||
orte_snapc_ckpt_state_str(&state_str, ckpt_state);
|
||||
if( (int)strlen(state_str) > len_ckpt_s)
|
||||
len_ckpt_s = strlen(state_str);
|
||||
|
||||
if(NULL != snap_ref && (int)strlen(snap_ref) > len_ckpt_r)
|
||||
len_ckpt_r = strlen(snap_ref);
|
||||
|
||||
if(NULL != snap_loc && (int)strlen(snap_loc) > len_ckpt_l)
|
||||
len_ckpt_l = strlen(snap_loc);
|
||||
#endif
|
||||
}
|
||||
|
||||
line_len = (len_o_proc_name + 3 +
|
||||
len_proc_name + 3 +
|
||||
len_rank + 3 +
|
||||
len_pid + 3 +
|
||||
len_state + 3 +
|
||||
len_node + 3 +
|
||||
len_ckpt_s + 3 +
|
||||
len_ckpt_r + 3 +
|
||||
len_ckpt_l)
|
||||
+ 2;
|
||||
|
||||
/*
|
||||
* Print Header
|
||||
*/
|
||||
printf("\t");
|
||||
printf("%*s | ", len_proc_name , "Process Name");
|
||||
printf("%*s | ", len_o_proc_name , "ORTE Name");
|
||||
printf("%*s | ", len_rank , "Local Rank");
|
||||
printf("%*s | ", len_pid , "PID");
|
||||
printf("%*s | ", len_node , "Node");
|
||||
printf("%*s | ", len_state , "State");
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
printf("%*s | ", len_ckpt_s , "Ckpt State");
|
||||
printf("%*s | ", len_ckpt_r , "Ckpt Ref");
|
||||
printf("%*s |", len_ckpt_l , "Ckpt Loc");
|
||||
#endif
|
||||
printf("\n");
|
||||
|
||||
printf("\t");
|
||||
pretty_print_dashed_line(line_len);
|
||||
|
||||
/*
|
||||
* Print Info
|
||||
*/
|
||||
for(v=0; v < job->num_procs; v++) {
|
||||
vpid = (orte_proc_t*)job->procs->addr[v];
|
||||
|
||||
printf("\t");
|
||||
|
||||
if( 0 >= (int)job->num_apps ) {
|
||||
if( 0 == vpid->name.vpid ) {
|
||||
printf("%*s | ", len_proc_name, "orterun");
|
||||
} else {
|
||||
printf("%*s | ", len_proc_name, "orted");
|
||||
}
|
||||
}
|
||||
for( i = 0; i < (int)job->num_apps; ++i) {
|
||||
app = (orte_app_context_t*)job->apps->addr[i];
|
||||
if( app->idx == vpid->app_idx ) {
|
||||
printf("%*s | ", len_proc_name, app->app);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
o_proc_name = orte_util_print_name_args(&vpid->name);
|
||||
|
||||
printf("%*s | ", len_o_proc_name, o_proc_name);
|
||||
printf("%*u | ", len_rank , (uint)vpid->local_rank);
|
||||
printf("%*d | ", len_pid , vpid->pid);
|
||||
printf("%*s | ", len_node , (NULL == nodename[v]) ? "Unknown" : nodename[v]);
|
||||
printf("%*s | ", len_state , orte_proc_state_to_str(vpid->state));
|
||||
|
||||
if (NULL != nodename[v]) {
|
||||
free(nodename[v]);
|
||||
}
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
printf("%*s | ", len_ckpt_s, state_str);
|
||||
printf("%*s | ", len_ckpt_r, (NULL == snap_ref ? "" : snap_ref));
|
||||
printf("%*s |", len_ckpt_l, (NULL == snap_loc ? "" : snap_loc));
|
||||
#endif
|
||||
printf("\n");
|
||||
|
||||
}
|
||||
if (NULL != nodename) {
|
||||
free(nodename);
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static void pretty_print_dashed_line(int len) {
|
||||
static const char dashes[9] = "--------";
|
||||
|
||||
while (len >= 8) {
|
||||
printf("%8.8s", dashes);
|
||||
len -= 8;
|
||||
}
|
||||
printf("%*.*s\n", len, len, dashes);
|
||||
}
|
||||
|
||||
static int gather_information(orte_ps_mpirun_info_t *hnpinfo) {
|
||||
int ret;
|
||||
|
||||
if( ORTE_SUCCESS != (ret = gather_active_jobs(hnpinfo) )) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if( ORTE_SUCCESS != (ret = gather_nodes(hnpinfo) )) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if( ORTE_SUCCESS != (ret = gather_vpid_info(hnpinfo) )) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
cleanup:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int gather_active_jobs(orte_ps_mpirun_info_t *hnpinfo) {
|
||||
int ret;
|
||||
|
||||
if (ORTE_SUCCESS != (ret = orte_util_comm_query_job_info(&(hnpinfo->hnp->name), orte_ps_globals.jobid,
|
||||
&hnpinfo->num_jobs, &hnpinfo->jobs))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int gather_nodes(orte_ps_mpirun_info_t *hnpinfo) {
|
||||
int ret;
|
||||
|
||||
if (ORTE_SUCCESS != (ret = orte_util_comm_query_node_info(&(hnpinfo->hnp->name), NULL,
|
||||
&hnpinfo->num_nodes, &hnpinfo->nodes))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
}
|
||||
opal_output(0, "RECEIVED %d NODES", hnpinfo->num_nodes);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int gather_vpid_info(orte_ps_mpirun_info_t *hnpinfo) {
|
||||
int ret;
|
||||
orte_std_cntr_t i;
|
||||
int cnt;
|
||||
orte_job_t *job;
|
||||
orte_proc_t **procs;
|
||||
|
||||
/*
|
||||
* For each Job in the HNP
|
||||
*/
|
||||
for(i=0; i < hnpinfo->num_jobs; i++) {
|
||||
job = hnpinfo->jobs[i];
|
||||
|
||||
/*
|
||||
* Skip getting the vpid's for the HNP, unless asked to do so
|
||||
* The HNP is always the first in the array
|
||||
*/
|
||||
if( 0 == i && !orte_ps_globals.daemons) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* query the HNP for info on the procs in this job */
|
||||
if (ORTE_SUCCESS != (ret = orte_util_comm_query_proc_info(&(hnpinfo->hnp->name),
|
||||
job->jobid,
|
||||
ORTE_VPID_WILDCARD,
|
||||
&cnt,
|
||||
&procs))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
}
|
||||
job->procs->addr = (void**)procs;
|
||||
job->procs->size = cnt;
|
||||
job->num_procs = cnt;
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static char *pretty_node_state(orte_node_state_t state) {
|
||||
switch(state) {
|
||||
case ORTE_NODE_STATE_DOWN:
|
||||
return strdup("Down");
|
||||
break;
|
||||
case ORTE_NODE_STATE_UP:
|
||||
return strdup("Up");
|
||||
break;
|
||||
case ORTE_NODE_STATE_REBOOT:
|
||||
return strdup("Reboot");
|
||||
break;
|
||||
case ORTE_NODE_STATE_UNKNOWN:
|
||||
default:
|
||||
return strdup("Unknown");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int parseable_print(orte_ps_mpirun_info_t *hnpinfo)
|
||||
{
|
||||
orte_job_t **jobs;
|
||||
orte_node_t **nodes;
|
||||
orte_proc_t *proc;
|
||||
orte_app_context_t *app;
|
||||
char *appname;
|
||||
int i, j;
|
||||
char *nodename;
|
||||
|
||||
/* don't include the daemon job in the number of jobs reported */
|
||||
printf("mpirun:%lu:num nodes:%d:num jobs:%d\n",
|
||||
(unsigned long)hnpinfo->hnp->pid, hnpinfo->num_nodes, hnpinfo->num_jobs-1);
|
||||
|
||||
if (orte_ps_globals.nodes) {
|
||||
nodes = hnpinfo->nodes;
|
||||
for (i=0; i < hnpinfo->num_nodes; i++) {
|
||||
printf("node:%s:state:%s:slots:%d:in use:%d\n",
|
||||
nodes[i]->name, pretty_node_state(nodes[i]->state),
|
||||
nodes[i]->slots, nodes[i]->slots_inuse);
|
||||
}
|
||||
}
|
||||
|
||||
jobs = hnpinfo->jobs;
|
||||
/* skip job=0 as that's the daemon job */
|
||||
for (i=1; i < hnpinfo->num_jobs; i++) {
|
||||
printf("jobid:%d:state:%s:slots:%d:num procs:%d\n",
|
||||
ORTE_LOCAL_JOBID(jobs[i]->jobid),
|
||||
orte_job_state_to_str(jobs[i]->state),
|
||||
jobs[i]->total_slots_alloc,
|
||||
jobs[i]->num_procs);
|
||||
/* print the proc info */
|
||||
for (j=0; j < jobs[i]->procs->size; j++) {
|
||||
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jobs[i]->procs, j))) {
|
||||
continue;
|
||||
}
|
||||
app = (orte_app_context_t*)opal_pointer_array_get_item(jobs[i]->apps, proc->app_idx);
|
||||
if (NULL == app) {
|
||||
appname = strdup("NULL");
|
||||
} else {
|
||||
appname = opal_basename(app->app);
|
||||
}
|
||||
nodename = NULL;
|
||||
orte_get_attribute(&proc->attributes, ORTE_PROC_NODENAME, (void**)&nodename, OPAL_STRING);
|
||||
printf("process:%s:rank:%s:pid:%lu:node:%s:state:%s\n",
|
||||
appname, ORTE_VPID_PRINT(proc->name.vpid),
|
||||
(unsigned long)proc->pid,
|
||||
(NULL == nodename) ? "unknown" : nodename,
|
||||
orte_proc_state_to_str(proc->state));
|
||||
free(appname);
|
||||
if (NULL != nodename) {
|
||||
free(nodename);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
@ -1,47 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
include $(top_srcdir)/Makefile.ompi-rules
|
||||
|
||||
man_pages = orte-top.1
|
||||
EXTRA_DIST = orte-top.1in
|
||||
|
||||
if OPAL_INSTALL_BINARIES
|
||||
|
||||
bin_PROGRAMS = orte-top
|
||||
|
||||
nodist_man_MANS = $(man_pages)
|
||||
|
||||
# Ensure that the man pages are rebuilt if the opal_config.h file
|
||||
# changes; a "good enough" way to know if configure was run again (and
|
||||
# therefore the release date or version may have changed)
|
||||
$(nodist_man_MANS): $(top_builddir)/opal/include/opal_config.h
|
||||
|
||||
dist_ortedata_DATA = help-orte-top.txt
|
||||
|
||||
endif # OPAL_INSTALL_BINARIES
|
||||
|
||||
orte_top_SOURCES = orte-top.c
|
||||
orte_top_LDADD = \
|
||||
$(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la \
|
||||
$(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la
|
||||
|
||||
distclean-local:
|
||||
rm -f $(man_pages)
|
@ -1,82 +0,0 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2012 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
# This is the US/English general help file for Open MPI's orte-top tool.
|
||||
#
|
||||
[orte-top:usage]
|
||||
Return statistics on specified process ranks
|
||||
|
||||
Usage: %s [OPTIONS]
|
||||
|
||||
%s
|
||||
#
|
||||
[orte-top:pid-not-found]
|
||||
We could not find an mpirun matching the provided pid on this machine.
|
||||
|
||||
Pid provided: %d
|
||||
#
|
||||
[orte-top:no-contact-given]
|
||||
This tool requires that you specify contact info for the mpirun executing
|
||||
the specified rank(s). Please use the --help option for more information.
|
||||
#
|
||||
[orte-top:hnp-filename-bad]
|
||||
We are unable to parse the filename where contact info for the
|
||||
mpirun to be contacted was to be found. The option we were given was:
|
||||
|
||||
--%s %s
|
||||
|
||||
This appears to be missing the required ':' following the
|
||||
keyword "file". Please use the --help option for more information on
|
||||
the correct format for this command line option.
|
||||
#
|
||||
[orte-top:hnp-filename-access]
|
||||
We are unable to access the filename where contact info for the
|
||||
mpirun to be contacted was to be found. The filename we were given was:
|
||||
|
||||
File: %s
|
||||
|
||||
Please use the --help option for more information on
|
||||
the correct format for this command line option.
|
||||
#
|
||||
[orte-top:hnp-file-bad]
|
||||
We are unable to read the mpirun's contact info from the
|
||||
given filename. The filename we were given was:
|
||||
|
||||
FILE: %s
|
||||
|
||||
Please use the --help option for more information on
|
||||
the correct format for this command line option.
|
||||
#
|
||||
[orte-top:hnp-uri-bad]
|
||||
We are unable to correctly parse the mpirun's contact info. The uri we were given was:
|
||||
|
||||
URI: %s
|
||||
|
||||
Please remember that this is *not* a standard uri, but
|
||||
a special format used internally by Open MPI for communications. It can
|
||||
best be generated by simply directing mpirun to put its
|
||||
uri in a file, and then giving us that filename.
|
||||
#
|
||||
[orte-top:cant-open-logfile]
|
||||
We are unable to open the specified output log file.
|
||||
|
||||
File: %s
|
||||
|
||||
Please use the --help option for more information on
|
||||
the correct format for this command line option.
|
@ -1,106 +0,0 @@
|
||||
.\"
|
||||
.\" Copyright (c) 2007 Los Alamos National Security, LLC
|
||||
.\" All rights reserved.
|
||||
.\" Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved.
|
||||
.\"
|
||||
.\" Man page for OMPI's ompi-server command
|
||||
.\"
|
||||
.\" .TH name section center-footer left-footer center-header
|
||||
.TH OMPI-TOP 1 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
|
||||
.\" **************************
|
||||
.\" Name Section
|
||||
.\" **************************
|
||||
.SH NAME
|
||||
.
|
||||
ompi-top, orte-top \- Diagnostic to provide process info similar to the popular "top" program.
|
||||
.
|
||||
.PP
|
||||
.
|
||||
\fBNOTE:\fP \fIompi-top\fP, and \fIorte-top\fP are exact
|
||||
synonyms for each other. Using any of the names will result in exactly
|
||||
identical behavior.
|
||||
.
|
||||
|
||||
.\" **************************
|
||||
.\" Synopsis Section
|
||||
.\" **************************
|
||||
.SH SYNOPSIS
|
||||
.
|
||||
.BR ompi-top " [ options ]"
|
||||
.
|
||||
.\" **************************
|
||||
.\" Options Section
|
||||
.\" **************************
|
||||
.SH Options
|
||||
.
|
||||
\fIompi-top\fR collects and displays process information in a manner similar
|
||||
to that of the popular "top" program.
|
||||
.
|
||||
.TP 10
|
||||
.B -h | --help
|
||||
Display help for this command
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B -pid | --pid \fR<value>\fP
|
||||
The pid of the mpirun whose processes you want information about, or the name
|
||||
of the file (specified as file:filename) that contains that info. Note that
|
||||
the ompi-top command must be executed on the same node as mpirun to use this option.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B -uri | --uri \fR<value>\fP
|
||||
Specify the URI of the mpirun whose processes you want information about, or the name
|
||||
of the file (specified as file:filename) that contains that info. Note that
|
||||
the ompi-top command does not have to be executed on the same node as mpirun to use this option.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B -rank | --rank \fR<value>\fP
|
||||
The rank of the processes to be monitored. This can consist of a single rank, or
|
||||
a comma-separated list of ranks. These can include rank ranges separated by a '-'.
|
||||
If this option is not provided, or a value of -1 is given, ompi-top will default
|
||||
to displaying information on all ranks.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B -bynode | --bynode
|
||||
Display the results grouped by node, with each node's processes reported in rank
|
||||
order. If this option is not provided, ompi-top will default to displaying all
|
||||
results in rank order.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B -update-rate | --update-rate \fR<value>\fP
|
||||
The time (in seconds) between updates of the displayed information. If this option
|
||||
is not provided, ompi-top will default to executing only once.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B -timestamp | --timestamp
|
||||
Provide an approximate time when each sample was taken. This time is approximate as it
|
||||
only shows the time when the sample command was issued.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B -log-file | --log-file \fR<value>\fP
|
||||
Log the results to the specified file instead of displaying them to stdout.
|
||||
.
|
||||
.
|
||||
.\" **************************
|
||||
.\" Description Section
|
||||
.\" **************************
|
||||
.SH DESCRIPTION
|
||||
.
|
||||
.PP
|
||||
\fIompi-top\fR collects and displays process information in a manner similar
|
||||
to that of the popular "top" program. It doesn't do the fancy screen display, but
|
||||
does allow you to monitor available process information (to the limits of the underlying
|
||||
operating system) of processes irrespective of their location.
|
||||
.
|
||||
.\" **************************
|
||||
.\" See Also Section
|
||||
.\" **************************
|
||||
.
|
||||
.SH SEE ALSO
|
||||
.
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Загрузка…
Ссылка в новой задаче
Block a user