2004-01-15 04:47:20 +00:00
|
|
|
/*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
2004-08-05 14:35:38 +00:00
|
|
|
/**
|
|
|
|
* @file
|
|
|
|
*
|
|
|
|
* Interface into the Open MPI Run Time Environment
|
|
|
|
*/
|
2004-06-07 15:33:53 +00:00
|
|
|
#ifndef OMPI_RUNTIME_H
|
|
|
|
#define OMPI_RUNTIME_H
|
2004-01-15 04:47:20 +00:00
|
|
|
|
2004-06-07 15:33:53 +00:00
|
|
|
#include "ompi_config.h"
|
2004-01-31 21:47:59 +00:00
|
|
|
|
2004-09-03 19:26:49 +00:00
|
|
|
#include <sys/types.h>
|
|
|
|
|
2004-08-10 03:48:41 +00:00
|
|
|
#include "runtime/runtime_types.h"
|
2004-09-03 19:19:59 +00:00
|
|
|
#include "mca/ns/ns.h"
|
2004-08-10 03:48:41 +00:00
|
|
|
|
2004-08-07 00:53:56 +00:00
|
|
|
/* For backwards compatibility. If you only need MPI stuff, please include
|
|
|
|
mpiruntime/mpiruntime.h directly */
|
2004-08-08 05:20:32 +00:00
|
|
|
#include "mpi/runtime/mpiruntime.h"
|
2004-03-17 20:00:24 +00:00
|
|
|
|
2004-09-20 18:25:00 +00:00
|
|
|
/* constants for spawn constraints */
|
2004-09-10 04:54:17 +00:00
|
|
|
#define OMPI_RTE_SPAWN_MULTI_CELL 0x0001
|
2004-09-20 18:25:00 +00:00
|
|
|
#define OMPI_RTE_SPAWN_DAEMON 0x0002
|
|
|
|
#define OMPI_RTE_SPAWN_HIGH_QOS 0x0004
|
|
|
|
#define OMPI_RTE_SPAWN_FROM_MPI 0x0008
|
2004-09-10 04:54:17 +00:00
|
|
|
|
2004-01-15 04:47:20 +00:00
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
2004-09-02 18:39:42 +00:00
|
|
|
/* globals used by RTE - instanced in ompi_rte_init.c */
|
|
|
|
|
|
|
|
extern int ompi_rte_debug_flag;
|
|
|
|
|
2004-09-03 16:26:15 +00:00
|
|
|
/* Define the info structure underlying the Open MPI universe system
|
|
|
|
* instanced in ompi_rte_init.c */
|
|
|
|
|
|
|
|
struct ompi_universe_t {
|
|
|
|
char *name;
|
|
|
|
char *host;
|
|
|
|
char *uid;
|
2004-09-03 19:26:49 +00:00
|
|
|
pid_t pid;
|
2004-09-03 16:26:15 +00:00
|
|
|
bool persistence;
|
|
|
|
char *scope;
|
2004-09-07 02:58:49 +00:00
|
|
|
bool probe;
|
2004-09-11 12:56:52 +00:00
|
|
|
bool console;
|
|
|
|
char *ns_replica; /**< OOB contact info for name server */
|
|
|
|
char *gpr_replica; /**< OOB contact info for GPR */
|
|
|
|
char *seed_contact_info; /**< OOB contact info for universe seed */
|
|
|
|
bool console_connected; /**< Indicates if console is connected */
|
|
|
|
char *scriptfile; /**< Name of file containing commands to be executed */
|
|
|
|
char *hostfile; /**< Name of file containing list of hosts to be built into virtual machine */
|
2004-09-03 16:26:15 +00:00
|
|
|
};
|
|
|
|
typedef struct ompi_universe_t ompi_universe_t;
|
|
|
|
|
|
|
|
extern ompi_universe_t ompi_universe_info;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Initialize the Open MPI support code
|
|
|
|
*
|
|
|
|
* This function initializes the Open MPI support code, including
|
|
|
|
* malloc debugging and threads. It should be called exactly once
|
|
|
|
* by every application that utilizes any of the Open MPI support
|
|
|
|
* libraries (including MPI applications, mpirun, and mpicc).
|
|
|
|
*
|
|
|
|
* This function should be called before \code ompi_rte_init, if
|
|
|
|
* \code ompi_rte_init is to be called.
|
|
|
|
*/
|
|
|
|
int ompi_init(int argc, char* argv[]);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Finalize the Open MPI support code
|
|
|
|
*
|
|
|
|
* Finalize the Open MPI support code. Any function calling \code
|
|
|
|
* ompi_init should call \code ompi_finalize. This function should
|
|
|
|
* be called after \code ompi_rte_finalize, if \code
|
|
|
|
* ompi_rte_finalize is called.
|
|
|
|
*/
|
|
|
|
int ompi_finalize(void);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Abort the current application with a pretty-print error message
|
|
|
|
*
|
|
|
|
* Aborts currently running application with \code abort(), pretty
|
|
|
|
* printing an error message if possible. Error message should be
|
|
|
|
* specified using the standard \code printf() format.
|
|
|
|
*/
|
|
|
|
int ompi_abort(int status, char *fmt, ...);
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Initialize the Open MPI run time environment
|
|
|
|
*
|
|
|
|
* Initlize the Open MPI run time environment, including process
|
|
|
|
* control and out of band messaging. This function should be
|
|
|
|
* called exactly once, after \code ompi_init. This function should
|
|
|
|
* be called by every application using the RTE interface, including
|
|
|
|
* MPI applications and mpirun.
|
|
|
|
*/
|
2004-09-03 21:17:33 +00:00
|
|
|
int ompi_rte_init_stage1(bool *allow_multi_user_threads, bool *have_hidden_threads);
|
|
|
|
int ompi_rte_init_stage2(bool *allow_multi_user_threads, bool *have_hidden_threads);
|
2004-09-10 03:21:03 +00:00
|
|
|
int ompi_rte_init_finalstage(bool *allow_multi_user_threads, bool *have_hidden_threads);
|
2004-09-03 16:26:15 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Finalize the Open MPI run time environment
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
int ompi_rte_finalize(void);
|
2004-01-15 04:47:20 +00:00
|
|
|
|
2004-09-21 20:27:41 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Request a handle for spawning jobs
|
|
|
|
*
|
|
|
|
* Request a handle for allocating resources and spawning a job.
|
|
|
|
* This is the first step in starting a new set of processes. It
|
|
|
|
* will load the best available set of pcm components for starting
|
|
|
|
* a job according to the \c criteria provided.
|
|
|
|
*
|
|
|
|
* The returned job handle should be OBJ_RELEASE()'ed when no
|
|
|
|
* further use of the particular job handle is needed. It is
|
|
|
|
* possible that consecutive calls to this function with the same
|
|
|
|
* \c criteria will return a pointer to the same object. In these
|
|
|
|
* situations, the reference count on the object will be adjusted
|
|
|
|
* as appropriate.
|
|
|
|
*
|
|
|
|
* The returned handle can be used to call the process startup
|
|
|
|
* related functions multiple times, both in the same job and in
|
|
|
|
* different jobs.
|
|
|
|
*
|
|
|
|
* @param criteria (IN) Selection criteria. A bitmask of the
|
|
|
|
* constants defined in \c runtime.h starting
|
|
|
|
* with \c OMPI_RTE_SPAWN_*
|
|
|
|
* @param have_threads (IN) Whether the current running process is
|
|
|
|
* multi-threaded or not. true means there
|
|
|
|
* may be concurrent access into the
|
|
|
|
* underlying components *and* that the
|
|
|
|
* components may launch new threads.
|
|
|
|
* @return jobhandle (OUT) Pointer to an \c ompi_rte_jobhandle.
|
|
|
|
* If no available pcm components are capable
|
|
|
|
* of meeting criteria, \c NULL is returned.
|
|
|
|
*/
|
|
|
|
ompi_rte_spawn_handle_t* ompi_rte_get_spawn_handle(int criteria,
|
|
|
|
bool have_threads);
|
|
|
|
|
|
|
|
|
2004-08-10 03:48:41 +00:00
|
|
|
/**
|
|
|
|
* Allocate requested resources
|
|
|
|
*
|
|
|
|
* Allocate the specified nodes / processes for use in a new job.
|
2004-09-21 20:27:41 +00:00
|
|
|
* This function should be called exactly once per call to \c
|
|
|
|
* ompi_rte_spawn_procs.
|
2004-08-10 03:48:41 +00:00
|
|
|
*
|
2004-09-21 20:27:41 +00:00
|
|
|
* @param handle (IN) Handle from \c ompi_rte_get_spawn_handle
|
2004-08-10 03:48:41 +00:00
|
|
|
* @param jobid (IN) Jobid with which to associate the given resources.
|
2004-09-03 19:19:59 +00:00
|
|
|
* @param nodes (IN) Number of ndoes to try to allocate. If 0, the
|
|
|
|
* allocator will try to allocate \c procs processes
|
2004-09-21 20:27:41 +00:00
|
|
|
* on as many nodes as are needed. If positive,
|
2004-09-03 19:19:59 +00:00
|
|
|
* will try to allocate \c procs process slots
|
2004-09-21 20:27:41 +00:00
|
|
|
* per node. If both nodes and procs are 0,
|
|
|
|
* will attempt to return as many resources as
|
|
|
|
* possible
|
2004-08-10 03:48:41 +00:00
|
|
|
* @param procs (IN) Number of processors to try to allocate. See the note
|
2004-09-21 20:27:41 +00:00
|
|
|
* for \c nodes for usage.
|
2004-08-13 19:39:06 +00:00
|
|
|
* @return List of <code>ompi_rte_node_allocation_t</code>s
|
2004-09-21 20:27:41 +00:00
|
|
|
* describing the allocated resources or NULL on
|
2004-09-22 22:27:40 +00:00
|
|
|
* error (error will be in errno). If the
|
|
|
|
* number of requested resources is not
|
|
|
|
* available, errno will be set to \c
|
|
|
|
* OMPI_ERR_OUT_OF_RESOURCE. This is not a
|
|
|
|
* fatal error - \c ompi_rte_allocate_resources
|
|
|
|
* can be called again, but with a smaller
|
|
|
|
* resource request.
|
2004-08-10 03:48:41 +00:00
|
|
|
*
|
2004-09-03 19:19:59 +00:00
|
|
|
* @note In the future, a more complex resource allocation
|
|
|
|
* function may be added, which allows for complicated
|
|
|
|
* resource requests. This function will continue to exist
|
|
|
|
* as a special case of that function.
|
2004-08-10 03:48:41 +00:00
|
|
|
*
|
2004-09-21 20:27:41 +00:00
|
|
|
* Some systems are not capable of providing a maximum
|
|
|
|
* available resource count and there is an inherent race
|
|
|
|
* condition to do so in many other systems. On these
|
|
|
|
* systems, errno will be set to \c OMPI_ERR_NOT_SUPPORTED.
|
|
|
|
* This is not a fatal error - \c
|
|
|
|
* ompi_rte_allocate_resources can be called again, but
|
|
|
|
* without nodes = 0, procs = 0.
|
2004-08-10 03:48:41 +00:00
|
|
|
*/
|
2004-09-21 20:27:41 +00:00
|
|
|
ompi_list_t* ompi_rte_allocate_resources(ompi_rte_spawn_handle_t* handle,
|
|
|
|
mca_ns_base_jobid_t jobid,
|
|
|
|
int nodes, int procs);
|
2004-08-10 03:48:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Spawn a job
|
|
|
|
*
|
|
|
|
* Start a job with given jobid and starting vpid (should probably be
|
|
|
|
* 0 for the forseeable future). The job is specified using an array
|
|
|
|
* of \c mca_pcm_base_schedule_t structures, which give both process
|
|
|
|
* and location information.
|
|
|
|
*
|
2004-09-21 20:27:41 +00:00
|
|
|
* @param handle (IN) Handle from \c ompi_rte_get_spawn_handle
|
2004-08-10 03:48:41 +00:00
|
|
|
*/
|
2004-09-21 20:27:41 +00:00
|
|
|
int ompi_rte_spawn_procs(ompi_rte_spawn_handle_t* handle,
|
|
|
|
mca_ns_base_jobid_t jobid,
|
2004-09-03 19:19:59 +00:00
|
|
|
ompi_list_t *schedule_list);
|
2004-08-10 03:48:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Get my name
|
|
|
|
*
|
|
|
|
* @return my name
|
|
|
|
*/
|
|
|
|
ompi_process_name_t* ompi_rte_get_self(void);
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Get names of peer processes which have been launched
|
|
|
|
*
|
2004-09-22 16:05:33 +00:00
|
|
|
* @param peers (OUT) Pointer to a pointer of
|
|
|
|
* ompi_process_name_t. \c *peers will be set
|
|
|
|
* to point to a statically allocated buffer
|
|
|
|
* containing the array of peer processes
|
|
|
|
* started with the current process. If \c
|
|
|
|
* peers is NULL, then only \c npeers is
|
|
|
|
* updated.
|
|
|
|
* @param npeers (OUT) pointer to an integer that will be updated
|
|
|
|
* with the total number of peers started with
|
|
|
|
* the current process. Also the length of \c
|
|
|
|
* *peers array if \c peers is not \c NULL
|
|
|
|
*
|
|
|
|
* @return OMPI_SUCCESS on success
|
|
|
|
* OMPI_ERR_NOT_IMPLEMENTED if the underlying module is
|
|
|
|
* not properly loaded.
|
|
|
|
*
|
2004-08-10 03:48:41 +00:00
|
|
|
*/
|
|
|
|
int ompi_rte_get_peers(ompi_process_name_t **peers, size_t *npeers);
|
|
|
|
|
2004-08-28 01:15:19 +00:00
|
|
|
/**
|
|
|
|
* Setup process info in the registry.
|
2004-09-03 16:26:15 +00:00
|
|
|
*/
|
2004-08-28 01:15:19 +00:00
|
|
|
|
|
|
|
int ompi_rte_register(void);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Monitor a job - currently implemented by monitoring process
|
|
|
|
* registration/deregistration to/from the GPR.
|
2004-09-03 16:26:15 +00:00
|
|
|
*/
|
2004-08-28 01:15:19 +00:00
|
|
|
|
|
|
|
int ompi_rte_notify(mca_ns_base_jobid_t job, int num_procs);
|
|
|
|
int ompi_rte_monitor(void);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Remove process registration.
|
|
|
|
*/
|
|
|
|
|
|
|
|
int ompi_rte_unregister(void);
|
2004-08-10 03:48:41 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Kill a specific process in this cell
|
|
|
|
*
|
|
|
|
* @param process_name Which process needs to be killed.
|
|
|
|
* @return Error code
|
|
|
|
*
|
|
|
|
* @warning flags is currently ignored, but should be set to 0 for
|
|
|
|
* future compatibility. Will be used to specify how to kill
|
|
|
|
* processes (0 will be same as a "kill <pid>"
|
|
|
|
*/
|
|
|
|
int ompi_rte_kill_proc(ompi_process_name_t *name, int flags);
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Kill all the processes in a job. This will probably find out all
|
|
|
|
* the processes in the job by contacting the registry and then call
|
|
|
|
* mca_pcm_kill_process for each process in the job (for a cell)
|
|
|
|
*
|
|
|
|
* @param jobid Job id
|
|
|
|
* @return Error code
|
|
|
|
*
|
|
|
|
* @warning flags is currently ignored, but should be set to 0 for
|
|
|
|
* future compatibility. Will be used to specify how to kill
|
|
|
|
* processes (0 will be same as a "kill <pid>"
|
|
|
|
*/
|
2004-09-03 19:19:59 +00:00
|
|
|
int ompi_rte_kill_job(mca_ns_base_jobid_t jobid, int flags);
|
2004-08-10 03:48:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Deallocate requested resources
|
|
|
|
*
|
|
|
|
* Return the resources for the given jobid to the system.
|
|
|
|
*
|
2004-09-21 20:27:41 +00:00
|
|
|
* @param handle (IN) Handle from \c ompi_rte_get_spawn_handle
|
2004-08-10 03:48:41 +00:00
|
|
|
* @param jobid (IN) Jobid associated with the resources to be freed.
|
|
|
|
* @param nodes (IN) Nodelist from associated allocate_resource call.
|
|
|
|
* All associated memory will be freed as appropriate.
|
|
|
|
*/
|
2004-09-21 20:27:41 +00:00
|
|
|
int ompi_rte_deallocate_resources(ompi_rte_spawn_handle_t *handle,
|
|
|
|
mca_ns_base_jobid_t jobid,
|
2004-09-03 19:19:59 +00:00
|
|
|
ompi_list_t *nodelist);
|
2004-08-10 03:48:41 +00:00
|
|
|
|
|
|
|
|
2004-09-02 18:39:42 +00:00
|
|
|
/**
|
|
|
|
* Setup rte command line options
|
|
|
|
*
|
|
|
|
* Defines the command line options specific to the rte/seed daemon
|
|
|
|
*
|
|
|
|
* @param cmd_line Pointer to an ompi_cmd_line_t object
|
|
|
|
* @retval None
|
|
|
|
*/
|
|
|
|
void ompi_rte_cmd_line_setup(ompi_cmd_line_t *cmd_line);
|
|
|
|
|
2004-09-03 16:26:15 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Parse the rte command line for options
|
|
|
|
*
|
2004-09-07 02:58:49 +00:00
|
|
|
* Parses the specified command line for rte specific options.
|
2004-09-03 16:26:15 +00:00
|
|
|
* Fills the relevant global structures with the information obtained.
|
|
|
|
*
|
|
|
|
* @param cmd_line Command line to be parsed.
|
|
|
|
* @retval None
|
|
|
|
*/
|
2004-09-07 02:58:49 +00:00
|
|
|
void ompi_rte_parse_cmd_line(ompi_cmd_line_t *cmd_line);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Parse the rte command line for daemon-specific options
|
|
|
|
*
|
|
|
|
* Parses the specified command line for rte daemon-specific options.
|
|
|
|
* Fills the relevant global structures with the information obtained.
|
|
|
|
*
|
|
|
|
* @param cmd_line Command line to be parsed.
|
|
|
|
* @retval None
|
|
|
|
*/
|
|
|
|
void ompi_rte_parse_daemon_cmd_line(ompi_cmd_line_t *cmd_line);
|
2004-09-03 16:26:15 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Check for universe existence
|
|
|
|
*
|
2004-09-11 12:56:52 +00:00
|
|
|
* Checks to see if a specified universe exists. If so, attempts
|
2004-09-03 16:26:15 +00:00
|
|
|
* to connect to verify that the universe is accepting connections.
|
2004-09-11 12:56:52 +00:00
|
|
|
* If both ns and gpr replicas provided, first checks for those
|
|
|
|
* connections. Gets any missing info from the universe contact.
|
2004-09-03 16:26:15 +00:00
|
|
|
*
|
|
|
|
* @param None Reads everything from the process_info and system_info
|
|
|
|
* structures
|
|
|
|
*
|
|
|
|
* @retval OMPI_SUCCESS Universe found and connection accepted
|
|
|
|
* @retval OMPI_NO_CONNECTION_ALLOWED Universe found, but not persistent or
|
|
|
|
* restricted to local scope
|
|
|
|
* @retval OMPI_CONNECTION_FAILED Universe found, but connection attempt
|
|
|
|
* failed. Probably caused by unclean termination of the universe seed
|
|
|
|
* daemon.
|
|
|
|
* @retval OMPI_CONNECTION_REFUSED Universe found and contact made, but
|
|
|
|
* universe refused to allow connection.
|
|
|
|
*/
|
2004-09-11 12:56:52 +00:00
|
|
|
int ompi_rte_universe_exists(void);
|
2004-09-03 16:26:15 +00:00
|
|
|
|
2004-09-10 03:21:03 +00:00
|
|
|
/**
|
|
|
|
* Parse the RTE environmental variables
|
|
|
|
*
|
|
|
|
* Checks the environmental variables and passes their info (where
|
2004-09-11 12:56:52 +00:00
|
|
|
* set) into the respective info structures. Sets ALL Open MPI
|
|
|
|
* default values in universe, process, and system structures.
|
2004-09-10 03:21:03 +00:00
|
|
|
*
|
|
|
|
* @param None
|
|
|
|
*
|
|
|
|
* @retval None
|
|
|
|
*/
|
|
|
|
void ompi_rte_parse_environ(void);
|
|
|
|
|
2004-09-14 14:21:04 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Register a daemon on the virtual machine segment.
|
|
|
|
*/
|
|
|
|
int ompi_vm_register(void);
|
|
|
|
|
|
|
|
|
2004-01-15 04:47:20 +00:00
|
|
|
#ifdef __cplusplus
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2004-06-07 15:33:53 +00:00
|
|
|
#endif /* OMPI_RUNTIME_H */
|