3e55fe6f6d
Update the rsh tree spawn capability so we spawn the next wave of daemons before launching our own local procs. Add an ability to encode nodenames for large clusters with contiguous node name numbering schemes - this allows communication of all node names in a few bytes instead of tens-of-bytes/node. This commit was SVN r18338.
349 строки
12 KiB
C
349 строки
12 KiB
C
/*
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
* University Research and Technology
|
|
* Corporation. All rights reserved.
|
|
* Copyright (c) 2004-2008 The University of Tennessee and The University
|
|
* of Tennessee Research Foundation. All rights
|
|
* reserved.
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
* University of Stuttgart. All rights reserved.
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
|
|
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
/**
|
|
* @file
|
|
*
|
|
* Global params for OpenRTE
|
|
*/
|
|
#ifndef ORTE_RUNTIME_ORTE_GLOBALS_H
|
|
#define ORTE_RUNTIME_ORTE_GLOBALS_H
|
|
|
|
#include "orte_config.h"
|
|
#include "orte/types.h"
|
|
|
|
#include <sys/types.h>
|
|
#ifdef HAVE_SYS_TIME_H
|
|
#include <sys/time.h>
|
|
#endif
|
|
|
|
#include "opal/threads/mutex.h"
|
|
#include "opal/threads/condition.h"
|
|
#include "opal/class/opal_pointer_array.h"
|
|
|
|
#include "orte/mca/plm/plm_types.h"
|
|
#include "orte/mca/rmaps/rmaps_types.h"
|
|
#include "orte/util/proc_info.h"
|
|
|
|
|
|
#define ORTE_GLOBAL_ARRAY_BLOCK_SIZE 64
|
|
#define ORTE_GLOBAL_ARRAY_MAX_SIZE INT_MAX
|
|
|
|
/* define a default error return code for ORTE */
|
|
#define ORTE_ERROR_DEFAULT_EXIT_CODE 1
|
|
|
|
BEGIN_C_DECLS
|
|
|
|
/* global type definitions used by RTE - instanced in orte_globals.c */
|
|
|
|
/************
|
|
* Declare this to allow us to use it before fully
|
|
* defining it - resolves potential circular definition
|
|
*/
|
|
struct orte_proc_t;
|
|
/************/
|
|
|
|
/**
|
|
* Information about a specific application to be launched in the RTE.
|
|
*/
|
|
typedef struct {
|
|
/** Parent object */
|
|
opal_object_t super;
|
|
/** Unique index when multiple apps per job */
|
|
int8_t idx;
|
|
/** Absolute pathname of argv[0] */
|
|
char *app;
|
|
/** Number of copies of this process that are to be launched */
|
|
orte_std_cntr_t num_procs;
|
|
/** Standard argv-style array, including a final NULL pointer */
|
|
char **argv;
|
|
/** Standard environ-style array, including a final NULL pointer */
|
|
char **env;
|
|
/** Current working directory for this app */
|
|
char *cwd;
|
|
/** Whether the cwd was set by the user or by the system */
|
|
bool user_specified_cwd;
|
|
/* Any hostfile that was specified */
|
|
char *hostfile;
|
|
/* Hostfile for adding hosts to an existing allocation */
|
|
char *add_hostfile;
|
|
/** argv of hosts passed in to -host */
|
|
char ** dash_host;
|
|
/** Prefix directory for this app (or NULL if no override necessary) */
|
|
char *prefix_dir;
|
|
/** Preload the binary on the remote machine (in PLS via FileM) */
|
|
bool preload_binary;
|
|
/** Preload the comma separated list of files to the remote machines cwd */
|
|
char * preload_files;
|
|
/** Destination directory for the preloaded files
|
|
* If NULL then the absolute and relative paths are obeyed */
|
|
char * preload_files_dest_dir;
|
|
} orte_app_context_t;
|
|
|
|
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_app_context_t);
|
|
|
|
|
|
typedef struct {
|
|
/** Base object so this can be put on a list */
|
|
opal_list_item_t super;
|
|
/* index of this node object in global array */
|
|
orte_std_cntr_t index;
|
|
/** String node name */
|
|
char *name;
|
|
/* whether or not this node is available for allocation */
|
|
bool allocate;
|
|
/* daemon on this node */
|
|
struct orte_proc_t *daemon;
|
|
/* whether or not this daemon has been launched */
|
|
bool daemon_launched;
|
|
/** Launch id - needed by some systems to launch a proc on this node */
|
|
int32_t launch_id;
|
|
/** number of procs on this node */
|
|
orte_vpid_t num_procs;
|
|
/* array of pointers to procs on this node */
|
|
opal_pointer_array_t *procs;
|
|
/* next node rank on this node */
|
|
uint8_t next_node_rank;
|
|
/* whether or not we are oversubscribed */
|
|
bool oversubscribed;
|
|
/** The node architecture, as reported by the remote node. This
|
|
* value is a bit-map that identifies whether or not the node
|
|
* is big/little endian, etc.
|
|
*/
|
|
int32_t arch;
|
|
/** State of this node */
|
|
orte_node_state_t state;
|
|
/** A "soft" limit on the number of slots available on the node.
|
|
This will typically correspond to the number of physical CPUs
|
|
that we have been allocated on this note and would be the
|
|
"ideal" number of processes for us to launch. */
|
|
orte_std_cntr_t slots;
|
|
/** How many processes have already been launched, used by one or
|
|
more jobs on this node. */
|
|
orte_std_cntr_t slots_inuse;
|
|
/** This represents the number of slots we (the allocator) are
|
|
attempting to allocate to the current job - or the number of
|
|
slots allocated to a specific job on a query for the jobs
|
|
allocations */
|
|
orte_std_cntr_t slots_alloc;
|
|
/** A "hard" limit (if set -- a value of 0 implies no hard limit)
|
|
on the number of slots that can be allocated on a given
|
|
node. This is for some environments (e.g. grid) there may be
|
|
fixed limits on the number of slots that can be used.
|
|
|
|
This value also could have been a boolean - but we may want to
|
|
allow the hard limit be different than the soft limit - in
|
|
other words allow the node to be oversubscribed up to a
|
|
specified limit. For example, if we have two processors, we
|
|
may want to allow up to four processes but no more. */
|
|
orte_std_cntr_t slots_max;
|
|
/** Username on this node, if specified */
|
|
char *username;
|
|
char *slot_list;
|
|
} orte_node_t;
|
|
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_node_t);
|
|
|
|
/* define a set of flags to control the launch of a job */
|
|
#define ORTE_JOB_CONTROL_LOCAL_SPAWN (uint16_t) 0x01
|
|
#define ORTE_JOB_CONTROL_NON_ORTE_JOB (uint16_t) 0x02
|
|
|
|
typedef struct {
|
|
/** Base object so this can be put on a list */
|
|
opal_list_item_t super;
|
|
/* jobid for this job */
|
|
orte_jobid_t jobid;
|
|
/* app_context array for this job */
|
|
opal_pointer_array_t *apps;
|
|
/* number of app_contexts in the array */
|
|
orte_std_cntr_t num_apps;
|
|
/* flags to control the launch of this job - see above
|
|
* for description of supported flags
|
|
*/
|
|
uint16_t controls;
|
|
/* total slots allocated to this job */
|
|
orte_std_cntr_t total_slots_alloc;
|
|
/* number of procs in this job */
|
|
orte_vpid_t num_procs;
|
|
/* array of pointers to procs in this job */
|
|
opal_pointer_array_t *procs;
|
|
/* map of the job */
|
|
orte_job_map_t *map;
|
|
/* bookmark for where we are in mapping - this
|
|
* indicates the node where we stopped
|
|
*/
|
|
orte_node_t *bookmark;
|
|
/** Whether or not to override oversubscription based on local
|
|
* hardware - used to indicate uncertainty in number of
|
|
* actual processors available on this node
|
|
*/
|
|
bool oversubscribe_override;
|
|
/* state of the overall job */
|
|
orte_job_state_t state;
|
|
/* number of procs launched */
|
|
orte_vpid_t num_launched;
|
|
/* number of procs reporting contact info */
|
|
orte_vpid_t num_reported;
|
|
/* number of procs terminated */
|
|
orte_vpid_t num_terminated;
|
|
/* did this job abort? */
|
|
bool abort;
|
|
/* proc that caused that to happen */
|
|
struct orte_proc_t *aborted_proc;
|
|
#if OPAL_ENABLE_FT == 1
|
|
/* ckpt state */
|
|
size_t ckpt_state;
|
|
/* snapshot reference */
|
|
char *ckpt_snapshot_ref;
|
|
/* snapshot location */
|
|
char *ckpt_snapshot_loc;
|
|
#endif
|
|
} orte_job_t;
|
|
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_job_t);
|
|
|
|
struct orte_proc_t {
|
|
/** Base object so this can be put on a list */
|
|
opal_list_item_t super;
|
|
/* process name */
|
|
orte_process_name_t name;
|
|
/* pid */
|
|
pid_t pid;
|
|
/* local rank amongst my peers on the node
|
|
* where this is running - this value is
|
|
* needed by MPI procs so that the lowest
|
|
* rank on a node can perform certain fns -
|
|
* e.g., open an sm backing file
|
|
*/
|
|
uint8_t local_rank;
|
|
/* local rank on the node across all procs
|
|
* and jobs known to this HNP - this is
|
|
* needed so that procs can do things like
|
|
* know which static IP port to use
|
|
*/
|
|
uint8_t node_rank;
|
|
/* process state */
|
|
orte_proc_state_t state;
|
|
/* exit code */
|
|
orte_exit_code_t exit_code;
|
|
/* the app_context that generated this proc */
|
|
int8_t app_idx;
|
|
/* a cpu list, if specified by the user */
|
|
char *slot_list;
|
|
/* pointer to the node where this proc is executing */
|
|
orte_node_t *node;
|
|
/* name of the node where this proc is executing - this
|
|
* is used simply to pass that info to a calling
|
|
* tool since it may not have a node array available
|
|
*/
|
|
char *nodename;
|
|
/* RML contact info */
|
|
char *rml_uri;
|
|
#if OPAL_ENABLE_FT == 1
|
|
/* ckpt state */
|
|
size_t ckpt_state;
|
|
/* snapshot reference */
|
|
char *ckpt_snapshot_ref;
|
|
/* snapshot location */
|
|
char *ckpt_snapshot_loc;
|
|
#endif
|
|
};
|
|
typedef struct orte_proc_t orte_proc_t;
|
|
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_proc_t);
|
|
|
|
|
|
typedef struct {
|
|
/* nodename */
|
|
char *name;
|
|
/* arch of node */
|
|
uint32_t arch;
|
|
} orte_nid_t;
|
|
|
|
typedef struct {
|
|
/* index to node */
|
|
int32_t node;
|
|
/* local rank */
|
|
uint8_t local_rank;
|
|
/* node rank */
|
|
uint8_t node_rank;
|
|
} orte_pmap_t;
|
|
|
|
/**
|
|
* Get a job data object
|
|
* We cannot just reference a job data object with its jobid as
|
|
* the jobid is no longer an index into the array. This change
|
|
* was necessitated by modification of the jobid to include
|
|
* an mpirun-unique qualifer to eliminate any global name
|
|
* service
|
|
*/
|
|
ORTE_DECLSPEC orte_job_t* orte_get_job_data_object(orte_jobid_t job);
|
|
|
|
/*
|
|
* Shortcut for some commonly used names
|
|
*/
|
|
#define ORTE_NAME_WILDCARD (&orte_globals_name_wildcard)
|
|
ORTE_DECLSPEC extern orte_process_name_t orte_globals_name_wildcard; /** instantiated in orte/runtime/orte_globals.c */
|
|
|
|
#define ORTE_NAME_INVALID (&orte_globals_name_invalid)
|
|
ORTE_DECLSPEC extern orte_process_name_t orte_globals_name_invalid; /** instantiated in orte/runtime/orte_globals.c */
|
|
|
|
#define ORTE_PROC_MY_NAME (&orte_process_info.my_name)
|
|
|
|
/* define a special name that belongs to orterun */
|
|
#define ORTE_PROC_MY_HNP (&orte_process_info.my_hnp)
|
|
|
|
/* define the name of my daemon */
|
|
#define ORTE_PROC_MY_DAEMON (&orte_process_info.my_daemon)
|
|
|
|
/* global variables used by RTE - instanced in orte_globals.c */
|
|
ORTE_DECLSPEC extern bool orte_debug_flag, orte_reuse_daemons, orte_timing;
|
|
ORTE_DECLSPEC extern bool orte_debug_daemons_flag, orte_debug_daemons_file_flag;
|
|
ORTE_DECLSPEC extern bool orte_do_not_launch;
|
|
ORTE_DECLSPEC extern bool orted_spin_flag;
|
|
ORTE_DECLSPEC extern bool orte_static_ports;
|
|
ORTE_DECLSPEC extern int32_t orte_contiguous_nodes;
|
|
ORTE_DECLSPEC extern int orte_debug_output;
|
|
ORTE_DECLSPEC extern bool orte_keep_fqdn_hostnames;
|
|
|
|
ORTE_DECLSPEC extern char **orte_launch_environ;
|
|
ORTE_DECLSPEC extern opal_pointer_array_t orte_daemonmap;
|
|
|
|
ORTE_DECLSPEC extern char **orted_cmd_line;
|
|
ORTE_DECLSPEC extern int orte_exit, orteds_exit;
|
|
ORTE_DECLSPEC extern int orte_exit_status;
|
|
ORTE_DECLSPEC extern bool orte_abnormal_term_ordered;
|
|
|
|
ORTE_DECLSPEC extern int orte_timeout_usec_per_proc;
|
|
ORTE_DECLSPEC extern float orte_max_timeout;
|
|
|
|
ORTE_DECLSPEC extern char *orte_default_hostfile;
|
|
|
|
/* global arrays for data storage */
|
|
ORTE_DECLSPEC extern opal_pointer_array_t *orte_job_data;
|
|
ORTE_DECLSPEC extern opal_pointer_array_t *orte_node_pool;
|
|
|
|
/**
|
|
* Whether ORTE is initialized or we are in orte_finalize
|
|
*/
|
|
ORTE_DECLSPEC extern bool orte_initialized;
|
|
ORTE_DECLSPEC extern bool orte_finalizing;
|
|
|
|
END_C_DECLS
|
|
|
|
#endif /* ORTE_RUNTIME_ORTE_GLOBALS_H */
|