1
1

Extend the process and job states by adding values for exceeding sensor bounds. This changes the job state field to 32-bit to also provide room for future expansion.

This commit was SVN r23036.
Этот коммит содержится в:
Ralph Castain 2010-04-26 12:36:40 +00:00
родитель aa5b9eb2ed
Коммит 43a89bbace
2 изменённых файлов: 43 добавлений и 33 удалений

Просмотреть файл

@ -40,63 +40,65 @@ typedef int32_t orte_exit_code_t;
typedef uint16_t orte_proc_state_t;
#define ORTE_PROC_STATE_T OPAL_UINT16
#define ORTE_PROC_STATE_UNDEF 0x0000 /* undefined process state */
#define ORTE_PROC_STATE_INIT 0x0001 /* process entry has been created by rmaps */
#define ORTE_PROC_STATE_RESTART 0x0002 /* the proc is ready for restart */
#define ORTE_PROC_STATE_LAUNCHED 0x0004 /* process has been launched */
#define ORTE_PROC_STATE_RUNNING 0x0010 /* daemon has locally fork'd process */
#define ORTE_PROC_STATE_REGISTERED 0x0020 /* process has registered for sync */
#define ORTE_PROC_STATE_UNDEF 0x0000 /* undefined process state */
#define ORTE_PROC_STATE_INIT 0x0001 /* process entry has been created by rmaps */
#define ORTE_PROC_STATE_RESTART 0x0002 /* the proc is ready for restart */
#define ORTE_PROC_STATE_LAUNCHED 0x0004 /* process has been launched */
#define ORTE_PROC_STATE_RUNNING 0x0010 /* daemon has locally fork'd process */
#define ORTE_PROC_STATE_REGISTERED 0x0020 /* process has registered for sync */
/*
* Define a "boundary" so we can easily and quickly determine
* if a proc is still running or not - any value less than
* this one means that we are not terminated
*/
#define ORTE_PROC_STATE_UNTERMINATED 0x0040
#define ORTE_PROC_STATE_UNTERMINATED 0x0040
#define ORTE_PROC_STATE_TERMINATED 0x0080 /* process has terminated and is no longer running */
#define ORTE_PROC_STATE_KILLED_BY_CMD 0x0100 /* process was killed by ORTE cmd */
#define ORTE_PROC_STATE_ABORTED 0x0200 /* process aborted */
#define ORTE_PROC_STATE_FAILED_TO_START 0x0400 /* process failed to start */
#define ORTE_PROC_STATE_ABORTED_BY_SIG 0x0800 /* process aborted by signal */
#define ORTE_PROC_STATE_TERM_WO_SYNC 0x1000 /* process exit'd w/o required sync */
#define ORTE_PROC_STATE_COMM_FAILED 0x2000 /* process communication has failed */
#define ORTE_PROC_STATE_TERMINATED 0x0080 /* process has terminated and is no longer running */
#define ORTE_PROC_STATE_KILLED_BY_CMD 0x0100 /* process was killed by ORTE cmd */
#define ORTE_PROC_STATE_ABORTED 0x0200 /* process aborted */
#define ORTE_PROC_STATE_FAILED_TO_START 0x0400 /* process failed to start */
#define ORTE_PROC_STATE_ABORTED_BY_SIG 0x0800 /* process aborted by signal */
#define ORTE_PROC_STATE_TERM_WO_SYNC 0x1000 /* process exit'd w/o required sync */
#define ORTE_PROC_STATE_COMM_FAILED 0x2000 /* process communication has failed */
#define ORTE_PROC_STATE_SENSOR_BOUND_EXCEEDED 0x4000 /* process exceeded a sensor limit */
/*
* Job state codes
*/
typedef uint16_t orte_job_state_t;
#define ORTE_JOB_STATE_T OPAL_UINT16
typedef uint32_t orte_job_state_t;
#define ORTE_JOB_STATE_T OPAL_UINT32
#define ORTE_JOB_STATE_UNDEF 0x0000
#define ORTE_JOB_STATE_INIT 0x0001 /* job entry has been created by rmaps */
#define ORTE_JOB_STATE_RESTART 0x0002 /* the job is ready for restart after one or more procs failed */
#define ORTE_JOB_STATE_LAUNCHED 0x0004 /* job has been launched by plm */
#define ORTE_JOB_STATE_RUNNING 0x0008 /* all process have been fork'd */
#define ORTE_JOB_STATE_SUSPENDED 0x0010 /* job has been suspended */
#define ORTE_JOB_STATE_REGISTERED 0x0020 /* all procs registered for sync */
#define ORTE_JOB_STATE_UNDEF 0x00000000
#define ORTE_JOB_STATE_INIT 0x00000001 /* job entry has been created by rmaps */
#define ORTE_JOB_STATE_RESTART 0x00000002 /* the job is ready for restart after one or more procs failed */
#define ORTE_JOB_STATE_LAUNCHED 0x00000004 /* job has been launched by plm */
#define ORTE_JOB_STATE_RUNNING 0x00000008 /* all process have been fork'd */
#define ORTE_JOB_STATE_SUSPENDED 0x00000010 /* job has been suspended */
#define ORTE_JOB_STATE_REGISTERED 0x00000020 /* all procs registered for sync */
/*
* Define a "boundary" so we can easily and quickly determine
* if a job is still running or not - any value less than
* this one means that we are not terminated
*/
#define ORTE_JOB_STATE_UNTERMINATED 0x0040
#define ORTE_JOB_STATE_UNTERMINATED 0x00000040
#define ORTE_JOB_STATE_TERMINATED 0x0080 /* all processes have terminated and is no longer running */
#define ORTE_JOB_STATE_ABORTED 0x0100 /* at least one process aborted, causing job to abort */
#define ORTE_JOB_STATE_FAILED_TO_START 0x0200 /* at least one process failed to start */
#define ORTE_JOB_STATE_ABORTED_BY_SIG 0x0400 /* job was killed by a signal */
#define ORTE_JOB_STATE_ABORTED_WO_SYNC 0x0800 /* job was aborted because proc exit'd w/o required sync */
#define ORTE_JOB_STATE_KILLED_BY_CMD 0x1000 /* job was killed by ORTE cmd */
#define ORTE_JOB_STATE_COMM_FAILED 0x2000 /* communication has failed */
#define ORTE_JOB_STATE_TERMINATED 0x00000080 /* all processes have terminated and is no longer running */
#define ORTE_JOB_STATE_ABORTED 0x00000100 /* at least one process aborted, causing job to abort */
#define ORTE_JOB_STATE_FAILED_TO_START 0x00000200 /* at least one process failed to start */
#define ORTE_JOB_STATE_ABORTED_BY_SIG 0x00000400 /* job was killed by a signal */
#define ORTE_JOB_STATE_ABORTED_WO_SYNC 0x00000800 /* job was aborted because proc exit'd w/o required sync */
#define ORTE_JOB_STATE_KILLED_BY_CMD 0x00001000 /* job was killed by ORTE cmd */
#define ORTE_JOB_STATE_COMM_FAILED 0x00002000 /* communication has failed */
#define ORTE_JOB_STATE_SENSOR_BOUND_EXCEEDED 0x00004000 /* job had a process that exceeded a sensor limit */
/* the job never even attempted to launch due to an error earlier in the
* launch procedure
*/
#define ORTE_JOB_STATE_NEVER_LAUNCHED 0x4000
#define ORTE_JOB_STATE_NEVER_LAUNCHED 0x00008000
/* the processes in this job have been ordered to "die", but may not have completed it yet. Don't order it again */
#define ORTE_JOB_STATE_ABORT_ORDERED 0x8000
#define ORTE_JOB_STATE_ABORT_ORDERED 0x00010000
/**

Просмотреть файл

@ -170,6 +170,10 @@ const char *orte_job_state_to_str(orte_job_state_t state)
return strdup("KILLED BY INTERNAL COMMAND");
case ORTE_JOB_STATE_COMM_FAILED:
return strdup("COMMUNICATION FAILURE");
case ORTE_JOB_STATE_SENSOR_BOUND_EXCEEDED:
return strdup("SENSOR BOUND EXCEEDED");
break;
case ORTE_JOB_STATE_NEVER_LAUNCHED:
return strdup("NEVER LAUNCHED");
case ORTE_JOB_STATE_ABORT_ORDERED:
@ -210,6 +214,10 @@ const char *orte_proc_state_to_str(orte_proc_state_t state)
return strdup("KILLED BY INTERNAL COMMAND");
case ORTE_PROC_STATE_COMM_FAILED:
return strdup("COMMUNICATION FAILURE");
case ORTE_PROC_STATE_SENSOR_BOUND_EXCEEDED:
return strdup("SENSOR BOUND EXCEEDED");
break;
default:
return strdup("UNKNOWN STATE!");
}