1
1
openmpi/orte/util/error_strings.c

455 строки
15 KiB
C

/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file **/
#include "orte_config.h"
#include "orte/constants.h"
#include <stdio.h>
#ifdef HAVE_SYS_SIGNAL_H
#include <sys/signal.h>
#else
#ifdef HAVE_SIGNAL_H
#include <signal.h>
#endif
#endif
#include "orte/mca/plm/plm_types.h"
#include "orte/util/error_strings.h"
#include "orte/runtime/orte_globals.h"
int orte_err2str(int errnum, const char **errmsg)
{
const char *retval;
switch (errnum) {
case ORTE_SUCCESS:
retval = "Success";
break;
case ORTE_ERR_RECV_LESS_THAN_POSTED:
retval = "Receive was less than posted size";
break;
case ORTE_ERR_RECV_MORE_THAN_POSTED:
retval = "Receive was greater than posted size";
break;
case ORTE_ERR_NO_MATCH_YET:
retval = "No match for receive posted";
break;
case ORTE_ERR_REQUEST:
retval = "Request error";
break;
case ORTE_ERR_NO_CONNECTION_ALLOWED:
retval = "No connection allowed";
break;
case ORTE_ERR_CONNECTION_REFUSED:
retval = "Connection refused";
break;
case ORTE_ERR_TYPE_MISMATCH:
retval = "Type mismatch";
break;
case ORTE_ERR_COMPARE_FAILURE:
retval = "Data comparison failure";
break;
case ORTE_ERR_COPY_FAILURE:
retval = "Data copy failure";
break;
case ORTE_ERR_PROC_STATE_MISSING:
retval = "The process state information is missing on the registry";
break;
case ORTE_ERR_PROC_EXIT_STATUS_MISSING:
retval = "The process exit status is missing on the registry";
break;
case ORTE_ERR_INDETERMINATE_STATE_INFO:
retval = "Request for state returned multiple responses";
break;
case ORTE_ERR_NODE_FULLY_USED:
retval = "All the slots on a given node have been used";
break;
case ORTE_ERR_INVALID_NUM_PROCS:
retval = "Multiple applications were specified, but at least one failed to specify the number of processes to run";
break;
case ORTE_ERR_SILENT:
if (orte_report_silent_errors) {
retval = "Silent error";
} else {
retval = NULL;
}
break;
case ORTE_ERR_ADDRESSEE_UNKNOWN:
retval = "A message is attempting to be sent to a process whose contact information is unknown";
break;
case ORTE_ERR_SYS_LIMITS_PIPES:
retval = "The system limit on number of pipes a process can open was reached";
break;
case ORTE_ERR_PIPE_SETUP_FAILURE:
retval = "A pipe could not be setup between a daemon and one of its local processes";
break;
case ORTE_ERR_SYS_LIMITS_CHILDREN:
retval = "The system limit on number of children a process can have was reached";
break;
case ORTE_ERR_FAILED_GET_TERM_ATTRS:
retval = "The I/O forwarding system was unable to get the attributes of your terminal";
break;
case ORTE_ERR_WDIR_NOT_FOUND:
retval = "The specified working directory could not be found";
break;
case ORTE_ERR_EXE_NOT_FOUND:
retval = "The specified executable could not be found";
break;
case ORTE_ERR_PIPE_READ_FAILURE:
retval = "A pipe could not be read";
break;
case ORTE_ERR_EXE_NOT_ACCESSIBLE:
retval = "The specified executable could not be executed";
break;
case ORTE_ERR_FAILED_TO_START:
retval = "The specified application failed to start";
break;
case ORTE_ERR_FILE_NOT_EXECUTABLE:
retval = "A system-required executable either could not be found or was not executable by this user";
break;
case ORTE_ERR_HNP_COULD_NOT_START:
retval = "Unable to start a daemon on the local node";
break;
case ORTE_ERR_SYS_LIMITS_SOCKETS:
retval = "The system limit on number of network connections a process can open was reached";
break;
case ORTE_ERR_SOCKET_NOT_AVAILABLE:
retval = "Unable to open a TCP socket for out-of-band communications";
break;
case ORTE_ERR_SYSTEM_WILL_BOOTSTRAP:
retval = "System will determine resources during bootstrap of daemons";
break;
case ORTE_ERR_RESTART_LIMIT_EXCEEDED:
retval = "Limit on number of process restarts was exceeded";
break;
case ORTE_ERR_INVALID_NODE_RANK:
retval = "Invalid node rank";
break;
case ORTE_ERR_INVALID_LOCAL_RANK:
retval = "Invalid local rank";
break;
case ORTE_ERR_UNRECOVERABLE:
retval = "Unrecoverable error";
break;
case ORTE_ERR_MEM_LIMIT_EXCEEDED:
retval = "Memory limit exceeded";
break;
case ORTE_ERR_HEARTBEAT_LOST:
retval = "Heartbeat lost";
break;
case ORTE_ERR_PROC_STALLED:
retval = "Proc appears to be stalled";
break;
case ORTE_ERR_NO_APP_SPECIFIED:
retval = "No application specified";
break;
case ORTE_ERR_NO_EXE_SPECIFIED:
retval = "No executable specified";
break;
case ORTE_ERR_COMM_DISABLED:
retval = "Communications have been disabled";
break;
case ORTE_ERR_FAILED_TO_MAP:
retval = "Unable to map job";
break;
case ORTE_ERR_TAKE_NEXT_OPTION:
if (orte_report_silent_errors) {
retval = "Next option";
} else {
retval = NULL;
}
break;
case ORTE_ERR_SENSOR_LIMIT_EXCEEDED:
retval = "Sensor limit exceeded";
break;
case ORTE_ERR_PROC_ENTRY_NOT_FOUND:
retval = "Proc entry not found";
break;
case ORTE_ERR_DATA_VALUE_NOT_FOUND:
retval = "Data not found";
break;
case ORTE_ERR_ALLOCATION_PENDING:
retval = "Allocation pending";
break;
case ORTE_ERR_NO_PATH_TO_TARGET:
retval = "No OOB path to target";
break;
case ORTE_ERR_OP_IN_PROGRESS:
retval = "Operation in progress";
break;
case ORTE_ERR_OPEN_CHANNEL_PEER_FAIL:
retval = "Open channel to peer failed";
break;
case ORTE_ERR_OPEN_CHANNEL_PEER_REJECT:
retval = "Open channel to peer was rejected";
break;
case ORTE_ERR_QOS_TYPE_UNSUPPORTED:
retval = "QoS type unsupported";
break;
case ORTE_ERR_QOS_ACK_WINDOW_FULL:
retval = "QoS ack window full";
break;
case ORTE_ERR_ACK_TIMEOUT_SENDER:
retval = "Send ack timed out";
break;
case ORTE_ERR_ACK_TIMEOUT_RECEIVER:
retval = "Recv ack timed out";
break;
case ORTE_ERR_LOST_MSG_IN_WINDOW:
retval = "Msg lost in window";
break;
case ORTE_ERR_CHANNEL_BUSY:
retval = "Channel busy";
break;
case ORTE_ERR_DUPLICATE_MSG:
retval = "Duplicate message";
break;
case ORTE_ERR_OUT_OF_ORDER_MSG:
retval = "Out of order message";
break;
case ORTE_ERR_OPEN_CHANNEL_DUPLICATE:
retval = "Duplicate channel open request";
break;
case ORTE_ERR_FORCE_SELECT:
retval = "Force select";
break;
default:
if (orte_report_silent_errors) {
retval = "Unknown error";
} else {
retval = NULL;
}
}
*errmsg = retval;
return ORTE_SUCCESS;
}
const char *orte_job_state_to_str(orte_job_state_t state)
{
switch(state) {
case ORTE_JOB_STATE_UNDEF:
return "UNDEFINED";
case ORTE_JOB_STATE_INIT:
return "PENDING INIT";
case ORTE_JOB_STATE_INIT_COMPLETE:
return "INIT_COMPLETE";
case ORTE_JOB_STATE_ALLOCATE:
return "PENDING ALLOCATION";
case ORTE_JOB_STATE_ALLOCATION_COMPLETE:
return "ALLOCATION COMPLETE";
case ORTE_JOB_STATE_MAP:
return "PENDING MAPPING";
case ORTE_JOB_STATE_MAP_COMPLETE:
return "MAP COMPLETE";
case ORTE_JOB_STATE_SYSTEM_PREP:
return "PENDING FINAL SYSTEM PREP";
case ORTE_JOB_STATE_LAUNCH_DAEMONS:
return "PENDING DAEMON LAUNCH";
case ORTE_JOB_STATE_DAEMONS_LAUNCHED:
return "DAEMONS LAUNCHED";
case ORTE_JOB_STATE_DAEMONS_REPORTED:
return "ALL DAEMONS REPORTED";
case ORTE_JOB_STATE_VM_READY:
return "VM READY";
case ORTE_JOB_STATE_LAUNCH_APPS:
return "PENDING APP LAUNCH";
case ORTE_JOB_STATE_RUNNING:
return "RUNNING";
case ORTE_JOB_STATE_SUSPENDED:
return "SUSPENDED";
case ORTE_JOB_STATE_REGISTERED:
return "SYNC REGISTERED";
case ORTE_JOB_STATE_READY_FOR_DEBUGGERS:
return "READY FOR DEBUGGERS";
case ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE:
return "LOCAL LAUNCH COMPLETE";
case ORTE_JOB_STATE_UNTERMINATED:
return "UNTERMINATED";
case ORTE_JOB_STATE_TERMINATED:
return "NORMALLY TERMINATED";
case ORTE_JOB_STATE_NOTIFY_COMPLETED:
return "NOTIFY COMPLETED";
case ORTE_JOB_STATE_NOTIFIED:
return "NOTIFIED";
case ORTE_JOB_STATE_ALL_JOBS_COMPLETE:
return "ALL JOBS COMPLETE";
case ORTE_JOB_STATE_ERROR:
return "ARTIFICIAL BOUNDARY - ERROR";
case ORTE_JOB_STATE_KILLED_BY_CMD:
return "KILLED BY INTERNAL COMMAND";
case ORTE_JOB_STATE_ABORTED:
return "ABORTED";
case ORTE_JOB_STATE_FAILED_TO_START:
return "FAILED TO START";
case ORTE_JOB_STATE_ABORTED_BY_SIG:
return "ABORTED BY SIGNAL";
case ORTE_JOB_STATE_ABORTED_WO_SYNC:
return "TERMINATED WITHOUT SYNC";
case ORTE_JOB_STATE_COMM_FAILED:
return "COMMUNICATION FAILURE";
case ORTE_JOB_STATE_SENSOR_BOUND_EXCEEDED:
return "SENSOR BOUND EXCEEDED";
case ORTE_JOB_STATE_CALLED_ABORT:
return "PROC CALLED ABORT";
case ORTE_JOB_STATE_HEARTBEAT_FAILED:
return "HEARTBEAT FAILED";
case ORTE_JOB_STATE_NEVER_LAUNCHED:
return "NEVER LAUNCHED";
case ORTE_JOB_STATE_ABORT_ORDERED:
return "ABORT IN PROGRESS";
case ORTE_JOB_STATE_NON_ZERO_TERM:
return "AT LEAST ONE PROCESS EXITED WITH NON-ZERO STATUS";
case ORTE_JOB_STATE_FAILED_TO_LAUNCH:
return "FAILED TO LAUNCH";
case ORTE_JOB_STATE_FORCED_EXIT:
return "FORCED EXIT";
case ORTE_JOB_STATE_DAEMONS_TERMINATED:
return "DAEMONS TERMINATED";
case ORTE_JOB_STATE_SILENT_ABORT:
return "ERROR REPORTED ELSEWHERE";
case ORTE_JOB_STATE_REPORT_PROGRESS:
return "REPORT PROGRESS";
case ORTE_JOB_STATE_ALLOC_FAILED:
return "ALLOCATION FAILED";
case ORTE_JOB_STATE_MAP_FAILED:
return "MAP FAILED";
case ORTE_JOB_STATE_CANNOT_LAUNCH:
return "CANNOT LAUNCH";
case ORTE_JOB_STATE_FT_CHECKPOINT:
return "FAULT TOLERANCE CHECKPOINT";
case ORTE_JOB_STATE_FT_CONTINUE:
return "FAULT TOLERANCE CONTINUE";
case ORTE_JOB_STATE_FT_RESTART:
return "FAULT TOLERANCE RESTART";
case ORTE_JOB_STATE_ANY:
return "ANY";
default:
return "UNKNOWN STATE!";
}
}
const char *orte_app_ctx_state_to_str(orte_app_state_t state)
{
switch(state) {
case ORTE_APP_STATE_UNDEF:
return "UNDEFINED";
case ORTE_APP_STATE_INIT:
return "PENDING INIT";
case ORTE_APP_STATE_ALL_MAPPED:
return "ALL MAPPED";
case ORTE_APP_STATE_RUNNING:
return "RUNNING";
case ORTE_APP_STATE_COMPLETED:
return "COMPLETED";
default:
return "UNKNOWN STATE!";
}
}
const char *orte_proc_state_to_str(orte_proc_state_t state)
{
switch(state) {
case ORTE_PROC_STATE_UNDEF:
return "UNDEFINED";
case ORTE_PROC_STATE_INIT:
return "INITIALIZED";
case ORTE_PROC_STATE_RESTART:
return "RESTARTING";
case ORTE_PROC_STATE_TERMINATE:
return "MARKED FOR TERMINATION";
case ORTE_PROC_STATE_RUNNING:
return "RUNNING";
case ORTE_PROC_STATE_REGISTERED:
return "SYNC REGISTERED";
case ORTE_PROC_STATE_IOF_COMPLETE:
return "IOF COMPLETE";
case ORTE_PROC_STATE_WAITPID_FIRED:
return "WAITPID FIRED";
case ORTE_PROC_STATE_UNTERMINATED:
return "UNTERMINATED";
case ORTE_PROC_STATE_TERMINATED:
return "NORMALLY TERMINATED";
case ORTE_PROC_STATE_ERROR:
return "ARTIFICIAL BOUNDARY - ERROR";
case ORTE_PROC_STATE_KILLED_BY_CMD:
return "KILLED BY INTERNAL COMMAND";
case ORTE_PROC_STATE_ABORTED:
return "ABORTED";
case ORTE_PROC_STATE_FAILED_TO_START:
return "FAILED TO START";
case ORTE_PROC_STATE_ABORTED_BY_SIG:
return "ABORTED BY SIGNAL";
case ORTE_PROC_STATE_TERM_WO_SYNC:
return "TERMINATED WITHOUT SYNC";
case ORTE_PROC_STATE_COMM_FAILED:
return "COMMUNICATION FAILURE";
case ORTE_PROC_STATE_SENSOR_BOUND_EXCEEDED:
return "SENSOR BOUND EXCEEDED";
case ORTE_PROC_STATE_CALLED_ABORT:
return "CALLED ABORT";
case ORTE_PROC_STATE_HEARTBEAT_FAILED:
return "HEARTBEAT FAILED";
case ORTE_PROC_STATE_MIGRATING:
return "MIGRATING";
case ORTE_PROC_STATE_CANNOT_RESTART:
return "CANNOT BE RESTARTED";
case ORTE_PROC_STATE_TERM_NON_ZERO:
return "EXITED WITH NON-ZERO STATUS";
case ORTE_PROC_STATE_FAILED_TO_LAUNCH:
return "FAILED TO LAUNCH";
case ORTE_PROC_STATE_UNABLE_TO_SEND_MSG:
return "UNABLE TO SEND MSG";
case ORTE_PROC_STATE_LIFELINE_LOST:
return "LIFELINE LOST";
case ORTE_PROC_STATE_ANY:
return "ANY";
default:
return "UNKNOWN STATE!";
}
}
const char *orte_node_state_to_str(orte_node_state_t state)
{
switch(state) {
case ORTE_NODE_STATE_UNDEF:
return "UNDEF";
case ORTE_NODE_STATE_UNKNOWN:
return "UNKNOWN";
case ORTE_NODE_STATE_DOWN:
return "DOWN";
case ORTE_NODE_STATE_UP:
return "UP";
case ORTE_NODE_STATE_REBOOT:
return "REBOOT";
case ORTE_NODE_STATE_DO_NOT_USE:
return "DO_NOT_USE";
case ORTE_NODE_STATE_NOT_INCLUDED:
return "NOT_INCLUDED";
case ORTE_NODE_STATE_ADDED:
return "ADDED";
default:
return "UNKNOWN STATE!";
}
}