2014-06-01 20:14:10 +04:00
/*
2017-01-22 01:03:19 +03:00
* Copyright ( c ) 2014 - 2017 Intel , Inc . All rights reserved .
2016-10-16 05:58:05 +03:00
* Copyright ( c ) 2016 Research Organization for Information Science
* and Technology ( RIST ) . All rights reserved .
2014-06-01 20:14:10 +04:00
* $ COPYRIGHT $
2015-04-30 02:17:55 +03:00
*
2014-06-01 20:14:10 +04:00
* Additional copyrights may follow
2015-04-30 02:17:55 +03:00
*
2014-06-01 20:14:10 +04:00
* $ HEADER $
*/
# ifndef ORTE_ATTRS_H
# define ORTE_ATTRS_H
# include "orte_config.h"
# include "orte/types.h"
/*** FLAG FOR SETTING ATTRIBUTES - INDICATES IF THE
* * * ATTRIBUTE IS TO BE SHARED WITH REMOTE PROCS OR NOT
*/
# define ORTE_ATTR_LOCAL true // for local use only
# define ORTE_ATTR_GLOBAL false // include when sending this object
2014-11-19 20:36:47 +03:00
/* define the mininum value of the ORTE keys just in
* case someone someday puts a layer underneath us */
# define ORTE_ATTR_KEY_BASE 0
2014-06-01 20:14:10 +04:00
/*** ATTRIBUTE FLAGS - never sent anywwhere ***/
typedef uint8_t orte_app_context_flags_t ;
# define ORTE_APP_FLAG_USED_ON_NODE 0x01 // is being used on the local node
/* APP_CONTEXT ATTRIBUTE KEYS */
Per the discussion on the telecon, change the -host behavior so we only run one instance if no slots were provided and the user didn't specify #procs to run. However, if no slots are given and the user does specify #procs, then let the number of slots default to the #found processing elements
Ensure the returned exit status is non-zero if we fail to map
If no -np is given, but either -host and/or -hostfile was given, then error out with a message telling the user that this combination is not supported.
If -np is given, and -host is given with only one instance of each host, then default the #slots to the detected #pe's and enforce oversubscription rules.
If -np is given, and -host is given with more than one instance of a given host, then set the #slots for that host to the number of times it was given and enforce oversubscription rules. Alternatively, the #slots can be specified via "-host foo:N". I therefore believe that row #7 on Jeff's spreadsheet is incorrect.
With that one correction, this now passes all the given use-cases on that spreadsheet.
Make things behave under unmanaged allocations more like their managed cousins - if the #slots is given, then no-np shall fill things up.
Fixes #1344
2016-02-10 20:17:03 +03:00
# define ORTE_APP_HOSTFILE 1 // string - hostfile
# define ORTE_APP_ADD_HOSTFILE 2 // string - hostfile to be added
# define ORTE_APP_DASH_HOST 3 // string - hosts specified with -host option
# define ORTE_APP_ADD_HOST 4 // string - hosts to be added
# define ORTE_APP_USER_CWD 5 // bool - user specified cwd
# define ORTE_APP_SSNDIR_CWD 6 // bool - use session dir as cwd
# define ORTE_APP_PRELOAD_BIN 7 // bool - move binaries to remote nodes prior to exec
# define ORTE_APP_PRELOAD_FILES 8 // string - files to be moved to remote nodes prior to exec
# define ORTE_APP_SSTORE_LOAD 9 // string
# define ORTE_APP_RECOV_DEF 10 // bool - whether or not a recovery policy was defined
# define ORTE_APP_MAX_RESTARTS 11 // int32 - max number of times a process can be restarted
# define ORTE_APP_MIN_NODES 12 // int64 - min number of nodes required
# define ORTE_APP_MANDATORY 13 // bool - flag if nodes requested in -host are "mandatory" vs "optional"
# define ORTE_APP_MAX_PPN 14 // uint32 - maximum number of procs/node for this app
# define ORTE_APP_PREFIX_DIR 15 // string - prefix directory for this app, if override necessary
# define ORTE_APP_NO_CACHEDIR 16 // bool - flag that a cache dir is not to be specified for a Singularity container
2014-06-01 20:14:10 +04:00
# define ORTE_APP_MAX_KEY 100
/*** NODE FLAGS - never sent anywhere ***/
typedef uint8_t orte_node_flags_t ;
# define ORTE_NODE_FLAG_DAEMON_LAUNCHED 0x01 // whether or not the daemon on this node has been launched
# define ORTE_NODE_FLAG_LOC_VERIFIED 0x02 // whether or not the location has been verified - used for
// environments where the daemon's final destination is uncertain
# define ORTE_NODE_FLAG_OVERSUBSCRIBED 0x04 // whether or not this node is oversubscribed
# define ORTE_NODE_FLAG_MAPPED 0x08 // whether we have been added to the current map
# define ORTE_NODE_FLAG_SLOTS_GIVEN 0x10 // the number of slots was specified - used only in non-managed environments
/*** NODE ATTRIBUTE KEYS - never sent anywhere ***/
# define ORTE_NODE_START_KEY ORTE_APP_MAX_KEY
# define ORTE_NODE_USERNAME (ORTE_NODE_START_KEY + 1)
# define ORTE_NODE_LAUNCH_ID (ORTE_NODE_START_KEY + 2) // int32 - Launch id needed by some systems to launch a proc on this node
# define ORTE_NODE_HOSTID (ORTE_NODE_START_KEY + 3) // orte_vpid_t - if this "node" is a coprocessor being hosted on a different node, then
// we need to know the id of our "host" to help any procs on us to determine locality
# define ORTE_NODE_ALIAS (ORTE_NODE_START_KEY + 4) // comma-separate list of alternate names for the node
# define ORTE_NODE_SERIAL_NUMBER (ORTE_NODE_START_KEY + 5) // string - serial number: used if node is a coprocessor
2016-10-16 05:58:05 +03:00
# define ORTE_NODE_PORT (ORTE_NODE_START_KEY + 6) // int32 - Alternate port to be passed to plm
2014-06-01 20:14:10 +04:00
# define ORTE_NODE_MAX_KEY 200
/*** JOB FLAGS - included in orte_job_t transmissions ***/
typedef uint16_t orte_job_flags_t ;
# define ORTE_JOB_FLAGS_T OPAL_UINT16
# define ORTE_JOB_FLAG_UPDATED 0x0001 // job has been updated and needs to be included in the pidmap message
# define ORTE_JOB_FLAG_GANG_LAUNCHED 0x0002 // MPI is allowed on this job - i.e., all members of the job were simultaneously launched
# define ORTE_JOB_FLAG_RESTARTED 0x0004 // some procs in this job are being restarted
# define ORTE_JOB_FLAG_ABORTED 0x0008 // did this job abort?
# define ORTE_JOB_FLAG_DEBUGGER_DAEMON 0x0010 // job is launching debugger daemons
# define ORTE_JOB_FLAG_FORWARD_OUTPUT 0x0020 // forward output from the apps
# define ORTE_JOB_FLAG_DO_NOT_MONITOR 0x0040 // do not monitor apps for termination
2015-04-30 02:17:55 +03:00
# define ORTE_JOB_FLAG_FORWARD_COMM 0x0080 //
2014-06-01 20:14:10 +04:00
# define ORTE_JOB_FLAG_RECOVERABLE 0x0100 // job is recoverable
# define ORTE_JOB_FLAG_RESTART 0x0200 //
# define ORTE_JOB_FLAG_PROCS_MIGRATING 0x0400 // some procs in job are migrating from one node to another
2016-12-15 18:58:52 +03:00
# define ORTE_JOB_FLAG_OVERSUBSCRIBED 0x0800 // at least one node in the job is oversubscribed
2014-06-01 20:14:10 +04:00
/*** JOB ATTRIBUTE KEYS ***/
# define ORTE_JOB_START_KEY ORTE_NODE_MAX_KEY
# define ORTE_JOB_LAUNCH_MSG_SENT (ORTE_JOB_START_KEY + 1) // timeval - time launch message was sent
# define ORTE_JOB_LAUNCH_MSG_RECVD (ORTE_JOB_START_KEY + 2) // timeval - time launch message was recvd
# define ORTE_JOB_MAX_LAUNCH_MSG_RECVD (ORTE_JOB_START_KEY + 3) // timeval - max time for launch msg to be received
# define ORTE_JOB_FILE_MAPS (ORTE_JOB_START_KEY + 4) // opal_buffer_t - file maps associates with this job
# define ORTE_JOB_CKPT_STATE (ORTE_JOB_START_KEY + 5) // size_t - ckpt state
2014-06-25 19:26:24 +04:00
# define ORTE_JOB_SNAPSHOT_REF (ORTE_JOB_START_KEY + 6) // string - snapshot reference
# define ORTE_JOB_SNAPSHOT_LOC (ORTE_JOB_START_KEY + 7) // string - snapshot location
2014-06-01 20:14:10 +04:00
# define ORTE_JOB_SNAPC_INIT_BAR (ORTE_JOB_START_KEY + 8) // orte_grpcomm_coll_id_t - collective id
# define ORTE_JOB_SNAPC_FINI_BAR (ORTE_JOB_START_KEY + 9) // orte_grpcomm_coll_id_t - collective id
# define ORTE_JOB_NUM_NONZERO_EXIT (ORTE_JOB_START_KEY + 10) // int32 - number of procs with non-zero exit codes
# define ORTE_JOB_FAILURE_TIMER_EVENT (ORTE_JOB_START_KEY + 11) // opal_ptr (orte_timer_t*) - timer event for failure detect/response if fails to launch
# define ORTE_JOB_ABORTED_PROC (ORTE_JOB_START_KEY + 12) // opal_ptr (orte_proc_t*) - proc that caused abort to happen
# define ORTE_JOB_MAPPER (ORTE_JOB_START_KEY + 13) // bool - job consists of MapReduce mappers
# define ORTE_JOB_REDUCER (ORTE_JOB_START_KEY + 14) // bool - job consists of MapReduce reducers
# define ORTE_JOB_COMBINER (ORTE_JOB_START_KEY + 15) // bool - job consists of MapReduce combiners
# define ORTE_JOB_INDEX_ARGV (ORTE_JOB_START_KEY + 16) // bool - automatically index argvs
# define ORTE_JOB_NO_VM (ORTE_JOB_START_KEY + 17) // bool - do not use VM launch
# define ORTE_JOB_SPIN_FOR_DEBUG (ORTE_JOB_START_KEY + 18) // bool - job consists of continuously operating apps
# define ORTE_JOB_CONTINUOUS_OP (ORTE_JOB_START_KEY + 19) // bool - recovery policy defined for job
# define ORTE_JOB_RECOVER_DEFINED (ORTE_JOB_START_KEY + 20) // bool - recovery policy has been defined
# define ORTE_JOB_NON_ORTE_JOB (ORTE_JOB_START_KEY + 22) // bool - non-orte job
# define ORTE_JOB_STDOUT_TARGET (ORTE_JOB_START_KEY + 23) // orte_jobid_t - job that is to receive the stdout (on its stdin) from this one
# define ORTE_JOB_POWER (ORTE_JOB_START_KEY + 24) // string - power setting for nodes in job
# define ORTE_JOB_MAX_FREQ (ORTE_JOB_START_KEY + 25) // string - max freq setting for nodes in job
# define ORTE_JOB_MIN_FREQ (ORTE_JOB_START_KEY + 26) // string - min freq setting for nodes in job
# define ORTE_JOB_GOVERNOR (ORTE_JOB_START_KEY + 27) // string - governor used for nodes in job
2014-06-21 21:09:02 +04:00
# define ORTE_JOB_FAIL_NOTIFIED (ORTE_JOB_START_KEY + 28) // bool - abnormal term of proc within job has been reported
# define ORTE_JOB_TERM_NOTIFIED (ORTE_JOB_START_KEY + 29) // bool - normal term of job has been reported
2014-07-15 07:48:00 +04:00
# define ORTE_JOB_PEER_MODX_ID (ORTE_JOB_START_KEY + 30) // orte_grpcomm_coll_id_t - collective id
# define ORTE_JOB_INIT_BAR_ID (ORTE_JOB_START_KEY + 31) // orte_grpcomm_coll_id_t - collective id
# define ORTE_JOB_FINI_BAR_ID (ORTE_JOB_START_KEY + 32) // orte_grpcomm_coll_id_t - collective id
2014-10-23 19:15:00 +04:00
# define ORTE_JOB_FWDIO_TO_TOOL (ORTE_JOB_START_KEY + 33) // Forward IO for this job to the tool requesting its spawn
2015-02-11 00:56:14 +03:00
# define ORTE_JOB_PHYSICAL_CPUIDS (ORTE_JOB_START_KEY + 34) // bool - Hostfile contains physical jobids in cpuset
# define ORTE_JOB_LAUNCHED_DAEMONS (ORTE_JOB_START_KEY + 35) // bool - Job caused new daemons to be spawned
# define ORTE_JOB_REPORT_BINDINGS (ORTE_JOB_START_KEY + 36) // bool - Report process bindings
2017-01-24 22:43:14 +03:00
# define ORTE_JOB_CPU_LIST (ORTE_JOB_START_KEY + 37) // string - cpus to which procs are to be bound
2015-02-17 23:44:58 +03:00
# define ORTE_JOB_NOTIFICATIONS (ORTE_JOB_START_KEY + 38) // string - comma-separated list of desired notifications+methods
2015-06-18 19:53:20 +03:00
# define ORTE_JOB_ROOM_NUM (ORTE_JOB_START_KEY + 39) // int - number of remote request's hotel room
# define ORTE_JOB_LAUNCH_PROXY (ORTE_JOB_START_KEY + 40) // opal_process_name_t - name of spawn requestor
2015-09-07 19:03:59 +03:00
# define ORTE_JOB_NSPACE_REGISTERED (ORTE_JOB_START_KEY + 41) // bool - job has been registered with embedded PMIx server
2015-12-17 02:30:40 +03:00
# define ORTE_JOB_FIXED_DVM (ORTE_JOB_START_KEY + 42) // bool - do not change the size of the DVM for this job
# define ORTE_JOB_DVM_JOB (ORTE_JOB_START_KEY + 43) // bool - job is using a DVM
# define ORTE_JOB_CANCELLED (ORTE_JOB_START_KEY + 44) // bool - job was cancelled
2016-02-16 07:00:57 +03:00
# define ORTE_JOB_OUTPUT_TO_FILE (ORTE_JOB_START_KEY + 45) // string - name of directory to which stdout/err is to be directed
# define ORTE_JOB_MERGE_STDERR_STDOUT (ORTE_JOB_START_KEY + 46) // bool - merge stderr into stdout stream
# define ORTE_JOB_TAG_OUTPUT (ORTE_JOB_START_KEY + 47) // bool - tag stdout/stderr
# define ORTE_JOB_TIMESTAMP_OUTPUT (ORTE_JOB_START_KEY + 48) // bool - timestamp stdout/stderr
2016-05-30 04:56:18 +03:00
# define ORTE_JOB_MULTI_DAEMON_SIM (ORTE_JOB_START_KEY + 49) // bool - multiple daemons/node to simulate large cluster
2016-09-01 21:41:13 +03:00
# define ORTE_JOB_NOTIFY_COMPLETION (ORTE_JOB_START_KEY + 50) // bool - notify parent proc when spawned job terminates
2016-04-28 23:54:32 +03:00
# define ORTE_JOB_TRANSPORT_KEY (ORTE_JOB_START_KEY + 51) // string - transport keys assigned to this job
2016-12-13 19:19:21 +03:00
# define ORTE_JOB_INFO_CACHE (ORTE_JOB_START_KEY + 52) // opal_list_t - list of opal_value_t to be included in job_info
2014-06-01 20:14:10 +04:00
# define ORTE_JOB_MAX_KEY 300
/*** PROC FLAGS - never sent anywhere ***/
typedef uint16_t orte_proc_flags_t ;
# define ORTE_PROC_FLAG_ALIVE 0x0001 // proc has been launched and has not yet terminated
# define ORTE_PROC_FLAG_ABORT 0x0002 // proc called abort
# define ORTE_PROC_FLAG_UPDATED 0x0004 // proc has been updated and need to be included in the next pidmap message
# define ORTE_PROC_FLAG_LOCAL 0x0008 // indicate that this proc is local
# define ORTE_PROC_FLAG_REPORTED 0x0010 // indicate proc has reported in
# define ORTE_PROC_FLAG_REG 0x0020 // proc has registered
# define ORTE_PROC_FLAG_HAS_DEREG 0x0040 // proc has deregistered
# define ORTE_PROC_FLAG_AS_MPI 0x0080 // proc is MPI process
# define ORTE_PROC_FLAG_IOF_COMPLETE 0x0100 // IOF has completed
# define ORTE_PROC_FLAG_WAITPID 0x0200 // waitpid fired
2014-06-21 21:09:02 +04:00
# define ORTE_PROC_FLAG_RECORDED 0x0400 // termination has been recorded
2014-09-03 00:04:17 +04:00
# define ORTE_PROC_FLAG_DATA_IN_SM 0x0800 // modex data has been stored in the local shared memory region
# define ORTE_PROC_FLAG_DATA_RECVD 0x1000 // modex data for this proc has been received
2014-11-06 15:12:25 +03:00
# define ORTE_PROC_FLAG_SM_ACCESS 0x2000 // indicate if process can read modex data from shared memory region
2014-06-01 20:14:10 +04:00
/*** PROCESS ATTRIBUTE KEYS ***/
# define ORTE_PROC_START_KEY ORTE_JOB_MAX_KEY
# define ORTE_PROC_NOBARRIER (ORTE_PROC_START_KEY + 1) // bool - indicates proc should not barrier in orte_init
# define ORTE_PROC_CPU_BITMAP (ORTE_PROC_START_KEY + 2) // string - string representation of cpu bindings
# define ORTE_PROC_HWLOC_LOCALE (ORTE_PROC_START_KEY + 3) // opal_ptr (hwloc_obj_t) = pointer to object where proc was mapped
# define ORTE_PROC_HWLOC_BOUND (ORTE_PROC_START_KEY + 4) // opal_ptr (hwloc_obj_t) = pointer to object where proc was bound
# define ORTE_PROC_PRIOR_NODE (ORTE_PROC_START_KEY + 5) // void* - pointer to orte_node_t where this proc last executed
# define ORTE_PROC_NRESTARTS (ORTE_PROC_START_KEY + 6) // int32 - number of times this process has been restarted
# define ORTE_PROC_RESTART_TIME (ORTE_PROC_START_KEY + 7) // timeval - time of last restart
# define ORTE_PROC_FAST_FAILS (ORTE_PROC_START_KEY + 8) // int32 - number of failures in "fast" window
# define ORTE_PROC_CKPT_STATE (ORTE_PROC_START_KEY + 9) // size_t - ckpt state
2014-06-25 19:26:24 +04:00
# define ORTE_PROC_SNAPSHOT_REF (ORTE_PROC_START_KEY + 10) // string - snapshot reference
# define ORTE_PROC_SNAPSHOT_LOC (ORTE_PROC_START_KEY + 11) // string - snapshot location
2014-06-01 20:14:10 +04:00
# define ORTE_PROC_NODENAME (ORTE_PROC_START_KEY + 12) // string - node where proc is located, used only by tools
# define ORTE_PROC_CGROUP (ORTE_PROC_START_KEY + 13) // string - name of cgroup this proc shall be assigned to
2014-06-05 18:48:19 +04:00
# define ORTE_PROC_NBEATS (ORTE_PROC_START_KEY + 14) // int32 - number of heartbeats in current window
2014-06-01 20:14:10 +04:00
# define ORTE_PROC_MAX_KEY 400
2016-09-28 23:25:55 +03:00
/*** RML ATTRIBUTE keys ***/
# define ORTE_RML_START_KEY ORTE_PROC_MAX_KEY
2016-10-23 21:36:05 +03:00
# define ORTE_RML_TRANSPORT_TYPE (ORTE_RML_START_KEY + 1) // string - null terminated string containing transport type
# define ORTE_RML_PROTOCOL_TYPE (ORTE_RML_START_KEY + 2) // string - protocol type (e.g., as returned by fi_info)
# define ORTE_RML_CONDUIT_ID (ORTE_RML_START_KEY + 3) // orte_rml_conduit_t - conduit_id for this transport
# define ORTE_RML_INCLUDE_COMP_ATTRIB (ORTE_RML_START_KEY + 4) // string - comma delimited list of RML component names to be considered
# define ORTE_RML_EXCLUDE_COMP_ATTRIB (ORTE_RML_START_KEY + 5) // string - comma delimited list of RML component names to be excluded
# define ORTE_RML_TRANSPORT_ATTRIB (ORTE_RML_START_KEY + 6) // string - comma delimited list of transport types to be considered (e.g., "fabric,ethernet")
# define ORTE_RML_QUALIFIER_ATTRIB (ORTE_RML_START_KEY + 7) // string - comma delimited list of qualifiers (e.g., routed=direct,bandwidth=xxx)
# define ORTE_RML_PROVIDER_ATTRIB (ORTE_RML_START_KEY + 8) // string - comma delimited list of provider names to be considered
# define ORTE_RML_PROTOCOL_ATTRIB (ORTE_RML_START_KEY + 9) // string - comma delimited list of protocols to be considered (e.g., tcp,udp)
# define ORTE_RML_ROUTED_ATTRIB (ORTE_RML_START_KEY + 10) // string - comma delimited list of routed modules to be considered
2015-04-30 02:17:55 +03:00
2014-11-19 20:36:47 +03:00
# define ORTE_ATTR_KEY_MAX 1000
2014-11-18 19:47:55 +03:00
2014-06-01 20:14:10 +04:00
/*** FLAG OPS ***/
# define ORTE_FLAG_SET(p, f) ((p)->flags |= (f))
# define ORTE_FLAG_UNSET(p, f) ((p)->flags &= ~(f))
# define ORTE_FLAG_TEST(p, f) ((p)->flags & (f))
ORTE_DECLSPEC const char * orte_attr_key_to_str ( orte_attribute_key_t key ) ;
/* Retrieve the named attribute from a list */
ORTE_DECLSPEC bool orte_get_attribute ( opal_list_t * attributes , orte_attribute_key_t key ,
void * * data , opal_data_type_t type ) ;
/* Set the named attribute in a list, overwriting any prior entry */
ORTE_DECLSPEC int orte_set_attribute ( opal_list_t * attributes , orte_attribute_key_t key ,
bool local , void * data , opal_data_type_t type ) ;
/* Remove the named attribute from a list */
ORTE_DECLSPEC void orte_remove_attribute ( opal_list_t * attributes , orte_attribute_key_t key ) ;
2014-11-19 20:36:47 +03:00
/*
* Register a handler for converting attr keys to strings
*
* Handlers will be invoked by orte_attr_key_to_str to return the appropriate value .
*/
2014-12-09 12:04:31 +03:00
typedef char * ( * orte_attr2str_fn_t ) ( orte_attribute_key_t key ) ;
2014-11-19 20:36:47 +03:00
ORTE_DECLSPEC int orte_attr_register ( const char * project ,
orte_attribute_key_t key_base ,
orte_attribute_key_t key_max ,
orte_attr2str_fn_t converter ) ;
2014-06-01 20:14:10 +04:00
# endif