Extend the process and job states by adding values for exceeding sensor bounds. This changes the job state field to 32-bit to also provide room for future expansion.
This commit was SVN r23036.
Этот коммит содержится в:
родитель
aa5b9eb2ed
Коммит
43a89bbace
@ -40,63 +40,65 @@ typedef int32_t orte_exit_code_t;
|
||||
typedef uint16_t orte_proc_state_t;
|
||||
#define ORTE_PROC_STATE_T OPAL_UINT16
|
||||
|
||||
#define ORTE_PROC_STATE_UNDEF 0x0000 /* undefined process state */
|
||||
#define ORTE_PROC_STATE_INIT 0x0001 /* process entry has been created by rmaps */
|
||||
#define ORTE_PROC_STATE_RESTART 0x0002 /* the proc is ready for restart */
|
||||
#define ORTE_PROC_STATE_LAUNCHED 0x0004 /* process has been launched */
|
||||
#define ORTE_PROC_STATE_RUNNING 0x0010 /* daemon has locally fork'd process */
|
||||
#define ORTE_PROC_STATE_REGISTERED 0x0020 /* process has registered for sync */
|
||||
#define ORTE_PROC_STATE_UNDEF 0x0000 /* undefined process state */
|
||||
#define ORTE_PROC_STATE_INIT 0x0001 /* process entry has been created by rmaps */
|
||||
#define ORTE_PROC_STATE_RESTART 0x0002 /* the proc is ready for restart */
|
||||
#define ORTE_PROC_STATE_LAUNCHED 0x0004 /* process has been launched */
|
||||
#define ORTE_PROC_STATE_RUNNING 0x0010 /* daemon has locally fork'd process */
|
||||
#define ORTE_PROC_STATE_REGISTERED 0x0020 /* process has registered for sync */
|
||||
/*
|
||||
* Define a "boundary" so we can easily and quickly determine
|
||||
* if a proc is still running or not - any value less than
|
||||
* this one means that we are not terminated
|
||||
*/
|
||||
#define ORTE_PROC_STATE_UNTERMINATED 0x0040
|
||||
#define ORTE_PROC_STATE_UNTERMINATED 0x0040
|
||||
|
||||
#define ORTE_PROC_STATE_TERMINATED 0x0080 /* process has terminated and is no longer running */
|
||||
#define ORTE_PROC_STATE_KILLED_BY_CMD 0x0100 /* process was killed by ORTE cmd */
|
||||
#define ORTE_PROC_STATE_ABORTED 0x0200 /* process aborted */
|
||||
#define ORTE_PROC_STATE_FAILED_TO_START 0x0400 /* process failed to start */
|
||||
#define ORTE_PROC_STATE_ABORTED_BY_SIG 0x0800 /* process aborted by signal */
|
||||
#define ORTE_PROC_STATE_TERM_WO_SYNC 0x1000 /* process exit'd w/o required sync */
|
||||
#define ORTE_PROC_STATE_COMM_FAILED 0x2000 /* process communication has failed */
|
||||
#define ORTE_PROC_STATE_TERMINATED 0x0080 /* process has terminated and is no longer running */
|
||||
#define ORTE_PROC_STATE_KILLED_BY_CMD 0x0100 /* process was killed by ORTE cmd */
|
||||
#define ORTE_PROC_STATE_ABORTED 0x0200 /* process aborted */
|
||||
#define ORTE_PROC_STATE_FAILED_TO_START 0x0400 /* process failed to start */
|
||||
#define ORTE_PROC_STATE_ABORTED_BY_SIG 0x0800 /* process aborted by signal */
|
||||
#define ORTE_PROC_STATE_TERM_WO_SYNC 0x1000 /* process exit'd w/o required sync */
|
||||
#define ORTE_PROC_STATE_COMM_FAILED 0x2000 /* process communication has failed */
|
||||
#define ORTE_PROC_STATE_SENSOR_BOUND_EXCEEDED 0x4000 /* process exceeded a sensor limit */
|
||||
|
||||
/*
|
||||
* Job state codes
|
||||
*/
|
||||
|
||||
typedef uint16_t orte_job_state_t;
|
||||
#define ORTE_JOB_STATE_T OPAL_UINT16
|
||||
typedef uint32_t orte_job_state_t;
|
||||
#define ORTE_JOB_STATE_T OPAL_UINT32
|
||||
|
||||
#define ORTE_JOB_STATE_UNDEF 0x0000
|
||||
#define ORTE_JOB_STATE_INIT 0x0001 /* job entry has been created by rmaps */
|
||||
#define ORTE_JOB_STATE_RESTART 0x0002 /* the job is ready for restart after one or more procs failed */
|
||||
#define ORTE_JOB_STATE_LAUNCHED 0x0004 /* job has been launched by plm */
|
||||
#define ORTE_JOB_STATE_RUNNING 0x0008 /* all process have been fork'd */
|
||||
#define ORTE_JOB_STATE_SUSPENDED 0x0010 /* job has been suspended */
|
||||
#define ORTE_JOB_STATE_REGISTERED 0x0020 /* all procs registered for sync */
|
||||
#define ORTE_JOB_STATE_UNDEF 0x00000000
|
||||
#define ORTE_JOB_STATE_INIT 0x00000001 /* job entry has been created by rmaps */
|
||||
#define ORTE_JOB_STATE_RESTART 0x00000002 /* the job is ready for restart after one or more procs failed */
|
||||
#define ORTE_JOB_STATE_LAUNCHED 0x00000004 /* job has been launched by plm */
|
||||
#define ORTE_JOB_STATE_RUNNING 0x00000008 /* all process have been fork'd */
|
||||
#define ORTE_JOB_STATE_SUSPENDED 0x00000010 /* job has been suspended */
|
||||
#define ORTE_JOB_STATE_REGISTERED 0x00000020 /* all procs registered for sync */
|
||||
/*
|
||||
* Define a "boundary" so we can easily and quickly determine
|
||||
* if a job is still running or not - any value less than
|
||||
* this one means that we are not terminated
|
||||
*/
|
||||
#define ORTE_JOB_STATE_UNTERMINATED 0x0040
|
||||
#define ORTE_JOB_STATE_UNTERMINATED 0x00000040
|
||||
|
||||
#define ORTE_JOB_STATE_TERMINATED 0x0080 /* all processes have terminated and is no longer running */
|
||||
#define ORTE_JOB_STATE_ABORTED 0x0100 /* at least one process aborted, causing job to abort */
|
||||
#define ORTE_JOB_STATE_FAILED_TO_START 0x0200 /* at least one process failed to start */
|
||||
#define ORTE_JOB_STATE_ABORTED_BY_SIG 0x0400 /* job was killed by a signal */
|
||||
#define ORTE_JOB_STATE_ABORTED_WO_SYNC 0x0800 /* job was aborted because proc exit'd w/o required sync */
|
||||
#define ORTE_JOB_STATE_KILLED_BY_CMD 0x1000 /* job was killed by ORTE cmd */
|
||||
#define ORTE_JOB_STATE_COMM_FAILED 0x2000 /* communication has failed */
|
||||
#define ORTE_JOB_STATE_TERMINATED 0x00000080 /* all processes have terminated and is no longer running */
|
||||
#define ORTE_JOB_STATE_ABORTED 0x00000100 /* at least one process aborted, causing job to abort */
|
||||
#define ORTE_JOB_STATE_FAILED_TO_START 0x00000200 /* at least one process failed to start */
|
||||
#define ORTE_JOB_STATE_ABORTED_BY_SIG 0x00000400 /* job was killed by a signal */
|
||||
#define ORTE_JOB_STATE_ABORTED_WO_SYNC 0x00000800 /* job was aborted because proc exit'd w/o required sync */
|
||||
#define ORTE_JOB_STATE_KILLED_BY_CMD 0x00001000 /* job was killed by ORTE cmd */
|
||||
#define ORTE_JOB_STATE_COMM_FAILED 0x00002000 /* communication has failed */
|
||||
#define ORTE_JOB_STATE_SENSOR_BOUND_EXCEEDED 0x00004000 /* job had a process that exceeded a sensor limit */
|
||||
|
||||
/* the job never even attempted to launch due to an error earlier in the
|
||||
* launch procedure
|
||||
*/
|
||||
#define ORTE_JOB_STATE_NEVER_LAUNCHED 0x4000
|
||||
#define ORTE_JOB_STATE_NEVER_LAUNCHED 0x00008000
|
||||
|
||||
/* the processes in this job have been ordered to "die", but may not have completed it yet. Don't order it again */
|
||||
#define ORTE_JOB_STATE_ABORT_ORDERED 0x8000
|
||||
#define ORTE_JOB_STATE_ABORT_ORDERED 0x00010000
|
||||
|
||||
|
||||
/**
|
||||
|
@ -170,6 +170,10 @@ const char *orte_job_state_to_str(orte_job_state_t state)
|
||||
return strdup("KILLED BY INTERNAL COMMAND");
|
||||
case ORTE_JOB_STATE_COMM_FAILED:
|
||||
return strdup("COMMUNICATION FAILURE");
|
||||
case ORTE_JOB_STATE_SENSOR_BOUND_EXCEEDED:
|
||||
return strdup("SENSOR BOUND EXCEEDED");
|
||||
break;
|
||||
|
||||
case ORTE_JOB_STATE_NEVER_LAUNCHED:
|
||||
return strdup("NEVER LAUNCHED");
|
||||
case ORTE_JOB_STATE_ABORT_ORDERED:
|
||||
@ -210,6 +214,10 @@ const char *orte_proc_state_to_str(orte_proc_state_t state)
|
||||
return strdup("KILLED BY INTERNAL COMMAND");
|
||||
case ORTE_PROC_STATE_COMM_FAILED:
|
||||
return strdup("COMMUNICATION FAILURE");
|
||||
case ORTE_PROC_STATE_SENSOR_BOUND_EXCEEDED:
|
||||
return strdup("SENSOR BOUND EXCEEDED");
|
||||
break;
|
||||
|
||||
default:
|
||||
return strdup("UNKNOWN STATE!");
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user