1
1
openmpi/orte/tools/console/orteconsole.c
Ralph Castain 5dfd54c778 With the branch to 1.2 made....
Clean up the remainder of the size_t references in the runtime itself. Convert to orte_std_cntr_t wherever it makes sense (only avoid those places where the actual memory size is referenced).

Remove the obsolete oob barrier function (we actually obsoleted it a long time ago - just never bothered to clean it up).

I have done my best to go through all the components and catch everything, even if I couldn't test compile them since I wasn't on that type of system. Still, I cannot guarantee that problems won't show up when you test this on specific systems. Usually, these will just show as "warning: comparison between signed and unsigned" notes which are easily fixed (just change a size_t to orte_std_cntr_t).

In some places, people didn't use size_t, but instead used some other variant (e.g., I found several places with uint32_t). I tried to catch all of them, but...

Once we get all the instances caught and fixed, this should once and for all resolve many of the heterogeneity problems.

This commit was SVN r11204.
2006-08-15 19:54:10 +00:00

1007 строки
31 KiB
C

/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file **/
#include "orte_config.h"
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include "orte/orte_constants.h"
#include "opal/util/cmd_line.h"
#include "opal/util/argv.h"
#include "opal/class/opal_list.h"
#include "opal/util/output.h"
#include "opal/util/os_path.h"
#include "opal/util/show_help.h"
#include "opal/mca/base/base.h"
#include "orte/dss/dss.h"
#include "orte/util/sys_info.h"
#include "orte/util/proc_info.h"
#include "orte/util/session_dir.h"
#include "orte/util/universe_setup_file_io.h"
#include "orte/runtime/runtime.h"
#include "orte/mca/rmgr/rmgr.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/ras/ras.h"
#include "orte/mca/rds/base/base.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/pls/base/base.h"
#include "orte/runtime/orte_setup_hnp.h"
#include "orte/tools/orted/orted.h"
#include "orte/tools/console/orteconsole.h"
/*
* Global Variables
*/
static bool exit_cmd;
static bool daemon_is_active;
/*
* Globals for catching command line options
*/
orte_console_globals_t orte_console_globals;
opal_cmd_line_init_t cmd_line_opts[] = {
{ NULL, NULL, NULL, 'h', NULL, "help", 0,
&orte_console_globals.help, OPAL_CMD_LINE_TYPE_BOOL,
"This help message" },
/* A Hostfile */
{ "rds", "hostfile", "path", '\0', "hostfile", "hostfile", 1,
&orte_console_globals.hostfile, OPAL_CMD_LINE_TYPE_STRING,
"Provide a hostfile" },
{ "rds", "hostfile", "path", '\0', "machinefile", "machinefile", 1,
&orte_console_globals.hostfile, OPAL_CMD_LINE_TYPE_STRING,
"Provide a hostfile" },
{ "orte", "debug", NULL, 'd', NULL, "debug-devel", 0,
NULL, OPAL_CMD_LINE_TYPE_BOOL,
"Enable debugging of OpenRTE" },
{ "orte", "debug", "daemons", '\0', NULL, "debug-daemons", 0,
NULL, OPAL_CMD_LINE_TYPE_INT,
"Enable debugging of any OpenRTE daemons used by this application" },
{ "orte", "debug", "daemons_file", '\0', NULL, "debug-daemons-file", 0,
NULL, OPAL_CMD_LINE_TYPE_BOOL,
"Enable debugging of any OpenRTE daemons used by this application, storing output in files" },
{ "orte", "no_daemonize", NULL, '\0', NULL, "no-daemonize", 0,
NULL, OPAL_CMD_LINE_TYPE_BOOL,
"Do not detach OpenRTE daemons used by this application" },
{ "universe", NULL, NULL, '\0', NULL, "universe", 1,
NULL, OPAL_CMD_LINE_TYPE_STRING,
"Set the universe name as username@hostname:universe_name for this application" },
{ NULL, NULL, NULL, '\0', NULL, "tmpdir", 1,
&orte_process_info.tmpdir_base, OPAL_CMD_LINE_TYPE_STRING,
"Set the root for the session directory tree for orterun ONLY" },
{ "orte", "universe", "exist", '\0', NULL, NULL, (int)false,
NULL, OPAL_CMD_LINE_TYPE_BOOL,
"Report error if universe does not already exist" },
/* End of list */
{ NULL, NULL, NULL, '\0', NULL, NULL, 0,
NULL, OPAL_CMD_LINE_TYPE_NULL,
NULL }
};
/*
* Global structure describing valid internal commands
*/
orte_console_command_t console_commands[] = {
{ "add", NULL, 1, ORTE_CONSOLE_TYPE_STD,
orte_console_add_host,
"add <hostname> [<hostname> <hostname> ...]",
"Add a host to the current universe" },
{ "alias", NULL, 0, ORTE_CONSOLE_TYPE_HIDDEN,
orte_console_not_imp,
"alias [<name> <cmd> <arg>]",
"Alias command" },
{ "boot", "b", 0, ORTE_CONSOLE_TYPE_STD,
orte_console_boot_daemons,
"boot [hostname] [username]",
"Launch Persistant Daemons. This will use the specifiec host or the first host added." },
{ "clean", "cl", 0, ORTE_CONSOLE_TYPE_HIDDEN,
orte_console_not_imp,
"clean",
"Kill all jobs in the universe, preserving all daemons" },
{ "conf", NULL, 0, ORTE_CONSOLE_TYPE_STD,
orte_console_display_configuration,
"conf [-a]",
"Diplay a list of the machines in the current universe" },
{ "contactinfo", "ci", 0, ORTE_CONSOLE_TYPE_STD,
orte_console_contactinfo,
"contactinfo",
"Query Contact Information from Daemons" },
{ "cwd", NULL, 0, ORTE_CONSOLE_TYPE_HIDDEN,
orte_console_not_imp,
"cwd [name]",
"Set or print the current working directory" },
{ "delete", "del", 1, ORTE_CONSOLE_TYPE_STD,
orte_console_remove_host,
"delete <hostid> [<hostid> <hostid> ...]",
"Delete a host from the current universe" },
{ "exit", "e", 0, ORTE_CONSOLE_TYPE_STD,
orte_console_exit,
"exit",
"Exit the console" },
{ "expire", NULL, 1, ORTE_CONSOLE_TYPE_HIDDEN,
orte_console_not_imp,
"expire <age (seconds)>",
"Expire process information" },
{ "halt", NULL, 0, ORTE_CONSOLE_TYPE_STD,
orte_console_halt,
"halt",
"Halt virtual machine" },
{ "haltall", NULL, 0, ORTE_CONSOLE_TYPE_HIDDEN,
orte_console_not_imp,
"haltall",
"Halt virtual machine and stop all services" },
{ "help", "h", 0, ORTE_CONSOLE_TYPE_STD,
orte_console_help,
"help [command]",
"Print this display" },
#ifdef HAVE_READLINE
{ "history", NULL, 0, ORTE_CONSOLE_TYPE_HIDDEN,
orte_console_not_imp,
"history",
"Display list of command history" },
#endif
{ "kill", NULL, 1, ORTE_CONSOLE_TYPE_HIDDEN,
orte_console_not_imp,
"kill <GID> [<GID> <GID> ...]",
"Terminate process(es)" },
{ "killall", NULL, 1, ORTE_CONSOLE_TYPE_HIDDEN,
orte_console_not_imp,
"killall <RunID> [<RunID> <RunID> ...]",
"Terminate all process(es) in runID" },
{ "mpispawn", NULL, 3, ORTE_CONSOLE_TYPE_HIDDEN,
orte_console_not_imp,
"mpispawn -np <number of process> [ <-option name> [option argument] ] <process name>",
"Spawn MPI process(es)" },
{ "ps", NULL, 0, ORTE_CONSOLE_TYPE_STD,
orte_console_ps,
"ps",
"Display process(es) status" },
{ "quit", "q", 0, ORTE_CONSOLE_TYPE_STD,
orte_console_exit,
"quit",
"Quit from console" },
{ "reset", NULL, 0, ORTE_CONSOLE_TYPE_HIDDEN,
orte_console_not_imp,
"reset",
"Kill all tasks" },
{ "service", NULL, 2, ORTE_CONSOLE_TYPE_HIDDEN,
orte_console_not_imp,
"service [service_name] [operation]",
"Service management" },
{ "sig", NULL, 2, ORTE_CONSOLE_TYPE_HIDDEN,
orte_console_not_imp,
"sig <signum> <GID> [<GID> <GID> ...]",
"Send signal to process(es)" },
{ "sigall", NULL, 2, ORTE_CONSOLE_TYPE_HIDDEN,
orte_console_not_imp,
"sigall <signum> <RunID> [<RunID> <RunID> ...]",
"Send signal to all process(es) in runID" },
{ "spawn", NULL, 3, ORTE_CONSOLE_TYPE_HIDDEN,
orte_console_not_imp,
"spawn -np <number of process> [ <-option name> [option argument] ] <process name>",
"Spawn process(es)" },
{ "unalias", NULL, 1, ORTE_CONSOLE_TYPE_HIDDEN,
orte_console_not_imp,
"unalias <name>",
"unalias command" },
{ "vmname", NULL, 0, ORTE_CONSOLE_TYPE_HIDDEN,
orte_console_not_imp,
"vmname [name]",
"Set or print the current virtual machine name" },
{ "dump", NULL, 0, ORTE_CONSOLE_TYPE_STD,
orte_console_dump,
"dump [arg1 arg2]",
"Dump registry data - [all, segment, triggers, subscriptions] [segment_name]" },
/* End of list */
{ NULL, NULL, 0, ORTE_CONSOLE_TYPE_NULL,
NULL,
NULL }
};
/* This should be added to opal_list.c ??? JJH */
static int opal_list_clear(opal_list_t *list) {
opal_list_item_t *item;
while ( NULL != (item = opal_list_remove_first(list) ) ) {
OBJ_RELEASE(item);
}
return ORTE_SUCCESS;
}
int main(int argc, char *argv[])
{
int ret=0;
opal_cmd_line_t *cmd_line;
char *usercmd;
orte_console_input_command_t input_command;
/*
* Setup to check common command line options
*/
memset(&orte_console_globals, 0, sizeof(orte_console_globals_t));
cmd_line = OBJ_NEW(opal_cmd_line_t);
opal_cmd_line_create(cmd_line, cmd_line_opts);
if (ORTE_SUCCESS != (ret = opal_cmd_line_parse(cmd_line, false,
argc, argv))) {
char *args = NULL;
args = opal_cmd_line_get_usage_msg(cmd_line);
opal_show_help("help-orteconsole.txt", "orteconsole:usage", false,
argv[0], args);
free(args);
return ret;
}
/* Check for help request */
if ( orte_console_globals.help ) {
char *args = NULL;
args = opal_cmd_line_get_usage_msg(cmd_line);
opal_show_help("help-orteconsole.txt", "orteconsole:usage", false,
argv[0], args);
free(args);
return 1;
}
opal_show_help("help-orteconsole.txt", "orteconsole:splash-screen", false);
/*
* Intialize the ORTE environment
*/
/* Set the flag telling orte_init that I am NOT a
* singleton, but am "infrastructure" - prevents setting
* up incorrect infrastructure that only a singleton would
* require
*/
daemon_is_active = true;
if (ORTE_SUCCESS != (ret = orte_init(true)) ) {
if (ORTE_ERR_UNREACH == ret) {
opal_output(0, "Specified universe could not be reached - please ensure it has been started\n");
return ret;
} else {
opal_show_help("help-orteconsole.txt", "orteconsole:init-failure", false,
"orte_init()", ret);
return ret;
}
}
if (orte_process_info.seed) {
daemon_is_active = false;
}
/*
* Work Loop
*/
OBJ_CONSTRUCT(&orte_console_hosts, opal_list_t);
if (NULL != orte_ras.node_query) orte_ras.node_query(&orte_console_hosts);
exit_cmd = false;
memset(&input_command, 0, sizeof(orte_console_input_command_t));
while ( !exit_cmd ) {
usercmd = orte_console_get_input_line();
if (NULL == usercmd || 0 >= strlen(usercmd) ) {
continue;
}
orte_console_parse_command(usercmd, &input_command);
orte_console_execute_command(input_command);
}
OBJ_DESTRUCT(&orte_console_hosts);
/*
* Finialize ORTE Environment
*/
if ( ORTE_SUCCESS != (ret = orte_finalize()) ) {
opal_show_help("help-orteconsole.txt", "orteconsole:finalize-failure", false,
"orte_finalize()", ret);
return ret;
}
return ORTE_SUCCESS;
}
static int command_cmp(char* user_command, orte_console_command_t system_command) {
/*
* Check for Full Name Match
*/
if ( 0 == strncmp(user_command, system_command.cmd_full_name,
strlen(system_command.cmd_full_name)) ) {
return 0;
}
/*
* Check for Short Name Match
*/
else if ( ( NULL != system_command.cmd_short_name ) &&
( strlen(user_command) == strlen(system_command.cmd_short_name) ) &&
( 0 == strncmp(user_command, system_command.cmd_short_name,
strlen(system_command.cmd_short_name)) ) ) {
return 0;
}
return -1;
}
static int orte_console_execute_command(orte_console_input_command_t input_command) {
orte_console_command_t *cur_cmd;
int i, ret;
for (i = 0; console_commands[i].cmd_type != ORTE_CONSOLE_TYPE_NULL; ++i) {
cur_cmd = &console_commands[i];
/* Check for matching command */
if ( 0 == command_cmp(input_command.cmd_name, *cur_cmd) ){
/* Check number of arguments */
if (input_command.argc < (cur_cmd->cmd_args+1)) {
opal_show_help("help-orteconsole.txt", "orteconsole:invalid-num-arguments", false,
input_command.cmd_name, cur_cmd->cmd_args,
input_command.argc, cur_cmd->cmd_full_name);
return ORTE_ERROR;
}
ret = cur_cmd->cmd_function(input_command);
/* Check Return Codes */
if ( ORTE_ERR_NOT_IMPLEMENTED == ret ) {
opal_show_help("help-orteconsole.txt", "orteconsole:unimplemented-command", false,
cur_cmd->cmd_full_name);
return ret;
}
else if ( ORTE_SUCCESS != ret ) {
opal_show_help("help-orteconsole.txt", "orteconsole:failed-command", false,
cur_cmd->cmd_full_name, ret);
return ret;
}
break;
}
}
/*
* If command was not found :(
*/
if ( ORTE_CONSOLE_TYPE_NULL == console_commands[i].cmd_type ) {
opal_show_help("help-orteconsole.txt", "orteconsole:unknown-command", false,
input_command.cmd_name);
return ORTE_ERR_NOT_IMPLEMENTED;
}
return ORTE_SUCCESS;
}
static int orte_console_parse_command(char * usercmd, orte_console_input_command_t *input_command){
input_command->argv = opal_argv_split(usercmd, ' ');
input_command->argc = opal_argv_count(input_command->argv);
input_command->cmd_name = strdup(input_command->argv[0]);
return ORTE_SUCCESS;
}
/* ===========================
* Actual Functionality below
* =========================== */
static int orte_console_not_imp(orte_console_input_command_t input_command) {
return ORTE_ERR_NOT_IMPLEMENTED;
}
static int orte_console_dump(orte_console_input_command_t input_command) {
int i, j;
if(daemon_is_active) {
if (NULL == input_command.argv[1]) { /** default to dump_all */
orte_gpr.dump_all();
} else if (strcmp(input_command.argv[1], "segment") == 0) {
if (2 < input_command.argc) { /** specific segment was requested */
for (i=2; i < input_command.argc; i++) orte_gpr.dump_segment(input_command.argv[i]);
} else { /** nothing specific - dump them all */
orte_gpr.dump_segment(NULL);
}
} else if (strcmp(input_command.argv[1], "trigger") == 0) {
if (2 < input_command.argc) { /** specific trigger was requested */
j = strtol(input_command.argv[2], NULL, 10);
orte_gpr.dump_triggers(j);
} else { /** nothing specific - dump them all */
orte_gpr.dump_triggers(0);
}
} else if (strcmp(input_command.argv[1], "subs") == 0) {
if (2 < input_command.argc) { /** specific subscription was requested */
j = strtol(input_command.argv[2], NULL, 10);
orte_gpr.dump_subscriptions(j);
} else { /** nothing specific - dump them all */
orte_gpr.dump_subscriptions(0);
}
} else if (strcmp(input_command.argv[1], "callbacks") == 0) {
orte_gpr.dump_callbacks();
} else if (strcmp(input_command.argv[1], "cells") == 0) {
orte_ns.dump_cells();
} else if (strcmp(input_command.argv[1], "jobs") == 0) {
orte_ns.dump_jobs();
} else if (strcmp(input_command.argv[1], "tags") == 0) {
orte_ns.dump_tags();
} else if (strcmp(input_command.argv[1], "datatypes") == 0) {
orte_ns.dump_datatypes();
} else {
/** let user know that this isn't recognized */
opal_output(0, "orteconsole: specified dump option not recognized\n");
}
} else {
/** let user know that this isn't available */
opal_output(0, "orteconsole: no daemon is active - dump cannot be executed\n");
}
return ORTE_SUCCESS;
}
static int orte_console_ps(orte_console_input_command_t input_command) {
if(daemon_is_active) {
/** find the jobids in the system */
/** for each jobid, get its status and output the info */
/** no real way to do this right now - need the 2.0 interface
* so let's just punt for the moment
*/
orte_ns.dump_jobs();
} else {
/** let user know that this isn't available */
opal_output(0, "orteconsole: no daemon is active - ps cannot be executed\n");
}
return ORTE_SUCCESS;
}
static int add_hosts_to_registry(opal_list_t *updates) {
orte_rds_cell_desc_t *rds_item;
orte_rds_cell_attr_t *new_attr;
orte_ras_node_t *ras_item;
opal_list_item_t *item;
opal_list_t rds_updates;
int ret;
orte_cellid_t local_cellid;
bool need_cellid = true;
OBJ_CONSTRUCT(&rds_updates, opal_list_t);
/* Convert RAS list to RDS list */
for ( item = opal_list_get_first(updates);
item != opal_list_get_end( updates);
item = opal_list_get_next( item)) {
ras_item = (orte_ras_node_t *) item;
rds_item = OBJ_NEW(orte_rds_cell_desc_t);
if (NULL == rds_item) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
rds_item->site = strdup("Console");
rds_item->name = strdup(ras_item->node_name);
if(need_cellid) {
#if 0 /* JJH Repair when cellid's are fixed */
/* Create a new cellid */
ret = orte_ns.create_cellid(&local_cellid, rds_item->site, rds_item->name);
if (ORTE_SUCCESS != ret) {
ORTE_ERROR_LOG(ret);
return ret;
}
#else
local_cellid = 0;
#endif
}
rds_item->cellid = local_cellid;
ras_item->node_cellid = local_cellid;
new_attr = OBJ_NEW(orte_rds_cell_attr_t);
if (NULL == new_attr) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
new_attr->keyval.value = OBJ_NEW(orte_data_value_t);
if (NULL == new_attr->keyval.value) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
new_attr->keyval.key = strdup(ORTE_RDS_NAME);
new_attr->keyval.value->type = ORTE_STRING;
new_attr->keyval.value->data = strdup(ras_item->node_name);
opal_list_append(&(rds_item->attributes), &new_attr->super);
new_attr = OBJ_NEW(orte_rds_cell_attr_t);
if (NULL == new_attr) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
new_attr->keyval.value = OBJ_NEW(orte_data_value_t);
if (NULL == new_attr->keyval.value) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
new_attr->keyval.key = strdup(ORTE_CELLID_KEY);
new_attr->keyval.value->type = ORTE_CELLID;
if (ORTE_SUCCESS != (ret = orte_dss.copy((void**)&(new_attr->keyval.value->data), &(rds_item->cellid), ORTE_CELLID))) {
ORTE_ERROR_LOG(ret);
return ret;
}
opal_list_append(&(rds_item->attributes), &new_attr->super);
opal_list_append(&rds_updates, &rds_item->super);
}
/* Add the hosts to the registry */
ret = orte_rds.store_resource(&rds_updates);
if (ORTE_SUCCESS != ret) {
return ret;
}
ret = orte_ras.node_insert(updates);
if (ORTE_SUCCESS != ret ) {
return ret;
}
opal_list_clear(&rds_updates);
OBJ_DESTRUCT(&rds_updates);
return ORTE_SUCCESS;
}
static int remove_hosts_from_registry(opal_list_t *updates) {
opal_list_t rds_updates;
/* int ret; */
OBJ_CONSTRUCT(&rds_updates, opal_list_t);
/* Add the hosts to the registry */
#if 0 /* This functionality needs to be written */
orte_rds_base_convert_ras_to_rds(updates, &rds_updates);
ret = orte_rds_base_node_delete(&rds_updates);
if (ORTE_SUCCESS != ret) {
return ret;
}
ret = orte_ras.node_delete(updates);
if (ORTE_SUCCESS != ret ) {
return ret;
}
#endif
opal_list_clear(&rds_updates);
OBJ_DESTRUCT(&rds_updates);
return ORTE_SUCCESS;
}
static int orte_console_add_host(orte_console_input_command_t input_command) {
int i, ret;
orte_ras_node_t *tmp_host;
opal_list_t hosts_to_add;
OBJ_CONSTRUCT(&hosts_to_add, opal_list_t);
for(i = 1; i < input_command.argc; ++i) {
tmp_host = OBJ_NEW(orte_ras_node_t);
if (NULL == tmp_host) {
return ORTE_ERR_OUT_OF_RESOURCE;
}
tmp_host->node_name = strdup(input_command.argv[i]);
tmp_host->node_arch = strdup("unknown");
tmp_host->node_cellid = 0; /* JJH Repair when cellid's are fixed */
tmp_host->node_slots_inuse = 0;
tmp_host->node_slots_max = 1;
tmp_host->node_slots = 1;
if (daemon_is_active) {
opal_list_append(&hosts_to_add, &tmp_host->super);
}
else {
opal_list_append(&orte_console_hosts, &tmp_host->super);
}
printf("Added Host: <%s>\n", input_command.argv[i]);
}
if ( !opal_list_is_empty(&hosts_to_add) && daemon_is_active) {
/*
* If there is an active daemon, then add to the registry
*/
ret = add_hosts_to_registry(&hosts_to_add);
if (ORTE_SUCCESS != ret) {
return ret;
}
/* Get a new list of registered hosts */
opal_list_clear(&orte_console_hosts);
ret = orte_ras.node_query(&orte_console_hosts);
if (ORTE_SUCCESS != ret) {
return ret;
}
}
opal_list_clear(&hosts_to_add);
OBJ_DESTRUCT(&hosts_to_add);
return ORTE_SUCCESS;
}
static int orte_console_remove_host(orte_console_input_command_t input_command) {
int i, ret;
orte_ras_node_t *tmp_host;
opal_list_item_t *item;
opal_list_t hosts_to_remove;
bool done;
OBJ_CONSTRUCT(&hosts_to_remove, opal_list_t);
for(i = 1; i < input_command.argc; ++i) {
done = false;
for (item = opal_list_get_first(&orte_console_hosts);
item != opal_list_get_end( &orte_console_hosts);
item = opal_list_get_next( item)) {
tmp_host = (orte_ras_node_t *)item;
if (0 == strcmp(tmp_host->node_name, input_command.argv[i])) {
opal_list_remove_item(&orte_console_hosts, item);
opal_list_append(&hosts_to_remove, item);
done = true;
break;
}
}
if(!done) {
printf("Could not find host <%s>\n", input_command.argv[i]);
}
}
if ( !opal_list_is_empty(&hosts_to_remove) && daemon_is_active) {
/* Delete hosts from registry */
ret = remove_hosts_from_registry(&hosts_to_remove);
if (ORTE_SUCCESS != ret) {
return ret;
}
/* Get a new list of registered hosts */
opal_list_clear(&orte_console_hosts);
ret = orte_ras.node_query(&orte_console_hosts);
if (ORTE_SUCCESS != ret) {
return ret;
}
}
opal_list_clear(&hosts_to_remove);
OBJ_DESTRUCT(&hosts_to_remove);
return ORTE_SUCCESS;
}
static int orte_console_display_configuration(orte_console_input_command_t input_command) {
orte_ras_node_t *tmp_host;
opal_list_item_t *item;
int i;
if ( opal_list_is_empty(&orte_console_hosts) ) {
opal_show_help("help-orteconsole.txt", "orteconsole:no-hosts", false);
return ORTE_SUCCESS;
}
printf("%6s %15s %10s %13s %15s\n", "Index",
"Hostname", "CPU(s)",
"CPU(s) Used", "Arch");
for (item = opal_list_get_first(&orte_console_hosts), i = 0;
item != opal_list_get_end( &orte_console_hosts);
item = opal_list_get_next( item), ++i) {
tmp_host = (orte_ras_node_t *)item;
printf("%6d %15s %10lu %13lu %15s\n", i,
tmp_host->node_name, (unsigned long)tmp_host->node_slots,
(unsigned long)tmp_host->node_slots_inuse,
tmp_host->node_arch);
}
return ORTE_SUCCESS;
}
static int orte_console_boot_daemons(orte_console_input_command_t input_command) {
int rc, id;
orte_ras_node_t *item;
char *remote_daemon;
char *username = NULL;
if ( opal_list_is_empty(&orte_console_hosts) && 1 >= input_command.argc ) {
opal_show_help("help-orteconsole.txt", "orteconsole:no-hosts", false);
return ORTE_ERROR;
}
/* If hostname supplied on command line use it */
if ( 1 < input_command.argc) {
remote_daemon = strdup(input_command.argv[1]);
}
/* Otherwise get first node in list to serve as the primary daemon */
else {
item = (orte_ras_node_t *)opal_list_get_first(&orte_console_hosts);
remote_daemon = strdup(item->node_name);
}
printf("Launching Remote Daemon on \"%s\"", remote_daemon);
/* If they supplied a username then use that,
otherwise assume same username as on the console system */
if ( 2 < input_command.argc) {
username = strdup(input_command.argv[2]);
printf(" Username \"%s\"\n", username);
}
else {
username = NULL;
printf("\n");
}
/* Create the persistent daemon */
id = mca_base_param_register_int("persistent",NULL,NULL,NULL,(int)false);
mca_base_param_set_int(id, (int)true);
rc = orte_setup_hnp(NULL, remote_daemon, username);
if ( ORTE_SUCCESS != rc) {
printf("Open RTE Boot: Failed!\n");
return rc;
}
printf("Open RTE Boot: Successful!\n");
daemon_is_active = true;
return ORTE_SUCCESS;
}
static int orte_console_halt(orte_console_input_command_t input_command) {
int ret;
if(!daemon_is_active) {
opal_show_help("help-orteconsole.txt", "orteconsole:no-daemon-started", false);
return ORTE_SUCCESS;
}
ret = orte_console_send_command(ORTE_DAEMON_EXIT_CMD);
if (ORTE_SUCCESS != ret) {
return ret;
}
return ORTE_SUCCESS;
}
static int orte_console_exit(orte_console_input_command_t input_command) {
exit_cmd = true;
return ORTE_SUCCESS;
}
static int orte_console_help(orte_console_input_command_t input_command) {
orte_console_command_t *cur_cmd;
int i;
/*
* Generic Help
*/
if ( input_command.argc <= 1 ) {
printf("Open RTE Console Commands:\n\n");
for (i = 0; console_commands[i].cmd_type != ORTE_CONSOLE_TYPE_NULL; ++i) {
cur_cmd = &console_commands[i];
if ( ORTE_CONSOLE_TYPE_HIDDEN != cur_cmd->cmd_type ) {
printf("%15s ", cur_cmd->cmd_full_name);
if ( NULL == cur_cmd->cmd_short_name ) {
printf(" ");
}
else {
printf(" | %5s ", cur_cmd->cmd_short_name);
}
printf("\t%s\n", cur_cmd->cmd_description);
}
}
printf("\n");
}
/*
* Specific Help Message for a Command
*/
else {
for(i = 0; console_commands[i].cmd_type != ORTE_CONSOLE_TYPE_NULL; ++i) {
cur_cmd = &console_commands[i];
if ( 0 == command_cmp(input_command.argv[1], *cur_cmd) ){
printf("Command:\n");
printf("\t%s ", cur_cmd->cmd_full_name);
if ( NULL != cur_cmd->cmd_short_name ) {
printf(" | %5s", cur_cmd->cmd_short_name);
}
printf("\n");
if ( NULL != cur_cmd->cmd_usage ) {
printf("Usage:\n");
printf("\t%s\n", cur_cmd->cmd_usage);
}
printf("Description:\n");
printf("\t%s\n", cur_cmd->cmd_description);
break;
}
}
/*
* Command Not Found
*/
if( ORTE_CONSOLE_TYPE_NULL == console_commands[i].cmd_type ) {
opal_show_help("help-orteconsole.txt", "orteconsole:unknown-command", false,
input_command.argv[1]);
return ORTE_SUCCESS;
}
printf("\n");
}
return ORTE_SUCCESS;
}
/*
* Get the contact information for the remote daemon
*/
static int orte_console_contactinfo(orte_console_input_command_t input_command) {
char * str_response;
orte_buffer_t *buffer = NULL;
orte_process_name_t seed={0,0,0};
int ret;
orte_std_cntr_t n;
if(!daemon_is_active) {
opal_show_help("help-orteconsole.txt", "orteconsole:no-daemon-started", false);
return ORTE_SUCCESS;
}
/** initialize the buffer */
buffer = OBJ_NEW(orte_buffer_t);
/* Start the exchange */
ret = orte_console_send_command(ORTE_DAEMON_CONTACT_QUERY_CMD);
if (ORTE_SUCCESS != ret ){
ORTE_ERROR_LOG(ret);
return ret;
}
ret = orte_rml.recv_buffer(&seed, buffer, ORTE_RML_TAG_DAEMON);
if ( 0 > ret) {
ORTE_ERROR_LOG(ret);
return ret;
}
n = 1;
ret = orte_dss.unpack(buffer, &str_response, &n, ORTE_STRING);
if ( ORTE_SUCCESS != ret ) {
ORTE_ERROR_LOG(ret);
return ret;
}
printf(str_response);
printf("\n");
/** cleanup the buffer */
OBJ_RELEASE(buffer);
return ORTE_SUCCESS;
}
/*
* Send a command to the remote daemon
*/
static int orte_console_send_command(orte_daemon_cmd_flag_t usercmd)
{
orte_buffer_t *cmd;
orte_daemon_cmd_flag_t command;
orte_process_name_t seed = {0,0,0};
int rc;
if(!daemon_is_active) {
opal_show_help("help-orteconsole.txt", "orteconsole:no-daemon-started", false);
return ORTE_SUCCESS;
}
cmd = OBJ_NEW(orte_buffer_t);
if (NULL == cmd) {
ORTE_ERROR_LOG(ORTE_ERROR);
return ORTE_ERROR;
}
command = usercmd;
rc = orte_dss.pack(cmd, &command, 1, ORTE_DAEMON_CMD);
if ( ORTE_SUCCESS != rc ) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(cmd);
return rc;
}
rc = orte_rml.send_buffer(&seed, cmd, ORTE_RML_TAG_DAEMON, 0);
if ( 0 > rc ) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(cmd);
return ORTE_ERR_COMM_FAILURE;
}
OBJ_RELEASE(cmd);
return ORTE_SUCCESS;
}
char *orte_console_get_input_line()
{
char *ret, *buff;
char input[ORTE_CONSOLE_MAX_LINE_LENGTH];
printf("orteconsole> ");
ret = fgets(input, ORTE_CONSOLE_MAX_LINE_LENGTH, stdin);
if (NULL != ret) {
input[strlen(input)-1] = '\0'; /* remove newline */
buff = strdup(input);
return buff;
}
return NULL;
}