1
1
openmpi/src/mca/pcm/pcm.h
Jeff Squyres 251f68b94f Re-separate out the always-installable headers from the
LAM-development headers.

This commit was SVN r925.
2004-03-18 21:35:28 +00:00

458 строки
15 KiB
C

/* -*- C -*-
*
* $HEADER$
*/
/** @file **/
/**
* \brief LAM/MPI Interface for Parallel Job & Process Control (pcm)
*
* LAM/MPI assumes it is running under a fully operational parallel
* run-time environment (RTE). This environment may be provided by
* batch schedulers such as PBS and LSF, single system image tools
* such as bproc, or specially designed MPI control daemons (the MPICH
* mpd or the included LAM daemon). The functionality provided
* through the process control interface is dependant on the support
* of the underlying infrastructure. For example, lam_pcm_spawn
* (essentially, the "go do it" part of MPI_COMM_SPAWN) is not
* available for jobs running under the Qadrics/RMS RTE. The LAM
* daemons will always provide the complete pcm interface.
*
* Like the other LAM run-time interfaces, the pcm interface is
* implemented through mca modules (pcm). For details on the
* capabilities of a particular module, please see the individual
* module's documentation.
*
* A run-time environment suitable for use by LAM/MPI must provide the
* following capabilities:
*
* - Remote process startup at job-start time with the ability to:
* - push an environment (or a large chunk of an environment) to the started process
* - redirect the stdout and stderr of the process to either a file (batch
* schedulers) or the mpirun application (LAM daemons) without interaction from the
* started process
* - A working registry interface
* - A "unique" job id for each parallel job
* - The ability to "clean up" after a job when provided with that job id
* - The ability to receive UNIX wait-style notification of parallel job termination
*
* A run-time environment should proivde the following capabilities if supported:
* - Remote process spawning for MPI_SPAWN and friends
* - Fine-grained control over process cleanup (for example, only do final cleanup
* of resources when all applications are unavailable, kill on first death, kill on
* the 3rd process exit, etc.)
*
* The pcm interface is responsible for ensuring that each process
* started is capable of performing peer discovery during MPI_INIT.
* It is intended that mpirun will be actively calling into the pcm
* interface so that mpirun can be used as a redezvous point. Using
* mpirun is certainly not required, but it is anticipated this will
* be a common module design.
*
*/
#ifndef MCA_PCM_H_
#define MCA_PCM_H_
#include "lam_config.h"
#include "mca/mca.h"
#include "include/types.h"
#include <sys/param.h>
/*
* "PCM" global types
*/
#define LAM_PCM_PROC_MPIRUN 0
#define LAM_PCM_PROC_MPIAPP 1
#define LAM_PCM_PROC_OTHER 2
struct mca_pcm_rte_node_t {
char name[MAXHOSTNAMELEN];
int32_t node_num;
int32_t num_procs;
};
typedef struct mca_pcm_rte_node_t mca_pcm_rte_node_t;
struct mca_pcm_control_args_t {
char* request;
char* value;
};
typedef struct mca_pcm_control_args_t mca_pcm_control_args_t;
struct mca_pcm_proc_t {
lam_job_handle_t job_handle;
int vpid;
};
typedef struct mca_pcm_proc_t mca_pcm_proc_t;
/*
* functions every module must provide
*/
typedef struct mca_pcm_1_0_0_t*
(*mca_pcm_base_init_fn_t)(int *priority, bool *allow_multi_user_threads,
bool *have_hidden_threads);
/**
* \func mca_pcm_query_get_nodes
*
* Get list of nodes available for execution
*
* @param nodes Pointer that will contain array of nodes available
* @param nodes_len Length of nodes array
* @param available_procs Number of available processors in the RTE
*
* @retval LAM_SUCCESS success
* @retval LAM_NOT_SUPPORTED Not available
*
* Obtain a list of nodes available for execution. No promises are
* made that such information is available - for some environments
* (Quadrics/RMS, etc) nothing is really known about the environment
* until you ask for it. If a node/host mapping is unavailable,
* *nodes will be NULL and nodes_len will be 0. If the total number
* of available processors for MPI applications is not available, it
* will be set to -1 and the function will return "Not available".
* In the case where both are available, available_procs will be
* equal to the sum of nodes[0...n].num_procs.
*/
typedef int (*mca_pcm_base_query_get_nodes_fn_t)(mca_pcm_rte_node_t **nodes,
size_t *nodes_len,
int *available_procs);
/**
* Get new parallel job handle
*
* @param parent Parent job handle (NULL if not in parallel job)
*
* @retval NULL failure
* @retval non-NULL sucess
*
* The run-time environment tracks MPI applications through a
* parallel job handle, which is a char* string. This string can be
* used to request information about the status of a currently
* running job, kill the job, etc.
*
* The parent parameter allows the run-time system to provide a
* process tree (spawn, etc.) if the user really wants such
* information. For mpirun, it should just be NULL.
*
* \warning The handle must be released using mca_pcm_handle_free
*/
typedef lam_job_handle_t (*mca_pcm_base_handle_new_fn_t)(lam_job_handle_t parent);
/**
* Get my parallel job handle
*
* @retval NULL failure - environment not properly initialized
* @retval non-NULL success
*
* Return the parallel job handle for the currently running process
*
* \warning The handle must be released using mca_pcm_handle_free
*/
typedef lam_job_handle_t (*mca_pcm_base_handle_get_fn_t)(void);
/**
* Free a job handle
*
* @param job_handle Poiner to a lam_job_handle_t
*
* Free a job handle returned by mca_pcm_handle_new or
* mca_pcm_handle_get.
*/
typedef void (*mca_pcm_base_handle_free_fn_t)(lam_job_handle_t *job_handle);
/**
* Ask if mca module can spawn processes
*
* @param job_handle Parallel job handle of running process
*
* @retval LAM_SUCCESS LAM can spawn more jobs
* @retval LAM_NOT_SUPPORTED LAM can not spawn more jobs
*
* Ask the currently running mca module for the runtime environment
* if it supports spawning more processes. This question should
* always return LAM_SUCCESS (yes) if called from mpirun. Useful
* for asking if MPI_SPAWN and friends can run.
*/
typedef int (*mca_pcm_base_job_can_spawn_fn_t)(lam_job_handle_t job_handle);
/**
* Configure arguments for the parallel job to be started
*
* @param job_handle Parallel job handle to configure
* @param opts Array of key=value structures requesting job behaviour
* @param opts_len Length of opts array
*
* @retval LAM_SUCCESS Sucess
* @retval LAM_ERROR Unkonwn failure
*
* Configure the job using key=value arguments. The meanings of the
* arguments are up to the specific mca module providing run-time support.
*
* Common key values will be provided here once MCAs begin to use
* this function. The only existing module no-ops this entire
* function.
*
* \Warning It is an error to call this function more than once on a single
* job handle.
*/
typedef int (*mca_pcm_base_job_set_arguments_fn_t)(lam_job_handle_t job_handle,
mca_pcm_control_args_t* opts,
size_t opts_len);
/**
* Launch processes across parallel job
*
* @param job_handle Parallel job handle within which to launch processes
* @param nodes Array of nodes structures describing targets of launch
* @param nodes_len Length of nodes
* @param file Process to laucnh (does not have to be equal to argv[0])
* @param argc Length of argv
* @param argv Argv array for launched processes
* @param env Environment array for launched process. See note below
*
* @retval LAM_SUCCESS Success
* @retval LAM_ERR_RESOURCE_BUSY Try again real soon now
* @retval LAM_ERR_NOT_SUPPORTED non-MPIRUN process can not spawn jobs
* @retval LAM_FAILURE Unkonwn failure
*
* Launch num_procs nodes[?].processes on nodes[?].node_num for each
* nodes entry, as part of job_handle's job. The env array should
* contain any environment variables that should be pushed to the
* remote processes. The mca may provide a more detailed
* environment if necessary (bporc, etc.).
*
* LAM_ERR_NOT_SUPPORTED will be returned if the mca module does not
* support spawning of new applications from
*/
typedef int (*mca_pcm_base_job_launch_procs_fn_t)(lam_job_handle_t job_handle,
mca_pcm_rte_node_t *nodes,
size_t nodes_len, const char* file,
int argc, const char* argv[],
const char *env[]);
/**
* Do rendezvous duties after launching parallel job
*
* @param job_handle Parallel job handle to run through startup
*
* @retval LAM_SUCCESS Success
* @retval LAM_FAILURE Unknown failure
*
* Do the civic duties required to complete the rendezvous part of
* the startup protocol. After this, the MPI application should
* know who all its neighbors are. It is, of course, completely
* possible that the MCA module has been in the background doing
* this all along and didn't bother to tell you. When this function
* returns, it is safe to assume that all rendezvous is complete
* (ie, you can exit and not mess anything up
*
* This function only needs to be called by the launching procs.
*/
typedef int (*mca_pcm_base_job_rendezvous_fn_t)(lam_job_handle_t job_handle);
/**
* Wait for job completion
*
* @param job_handle Parallel job handle to wait on
*
* @retval LAM_SUCCESS Success
* @retval LAM_ERR_INTERUPTED Interupted (due to signal, etc.)
*
* The LAM parallel version of "wait". It is not required to wait
* on a job at termination, as job results will be expunged over
* time as resource limits dictate.
*/
typedef int (*mca_pcm_base_job_wait_fn_t)(lam_job_handle_t job_handle);
/**
* Request job status
*
* @param job_handle Parallel job handle to query
* @param running Job is running, if true
*
* @retval LAM_SUCCESS Success
* @retval LAM_ERR_BAD_PARAM Invalid job handle
*
* Ask if job is running. If job has recently finished, this does
* not imply wait the pcm interface will call wait for you.
*/
typedef int (*mca_pcm_base_job_running_fn_t)(lam_job_handle_t job_handle,
int* running);
/**
* Request list of job handles running in current environment
*
* @param handles Pointer to job handles array
* @param handles_len length of handles array
*
* @retval LAM_ERR_NOT_IMPLEMENTED Not implemented
*
* Query the environment about currently running jobs. Intended for
* applications outside MPI and mpirun, to be user friendly and all
* those things. mca modules are not required to support this function.
*
* \warning This function is not yet implemented.
*/
typedef int (*mca_pcm_base_job_list_running_fn_t)(lam_job_handle_t **handles,
size_t handles_len);
/**
* Do process startup code
*
* @retval LAM_SUCCESS Success
* @retval LAM_ERR_FATAL Fatal error occurred
* @retval LAM_ERROR Unkonwn failure
*
* Do all communication work required to get peer list and establish
* the out of band communictaion mechanism. If a pcm interface uses
* fork()/exec() to start other processes on the current node, it
* should do so and complete all rendezvous before returning from
* this function.
*
* The mca module is free to start the oob interface as soon as it
* as provided the oob interface enough information to do so (tight
* integration with the oob mca module is probably required to meet
* this constraint).
*/
typedef int (*mca_pcm_base_proc_startup_fn_t)(void);
/**
* Get peers list
*
* @param procs Ordered array of lam_proc_t entries describing the job peers
*
* @retval LAM_SUCCESS success
* @retval LAM_ERROR Unknown error
*
* Get list of peers in the parallel job. Should not require any
* communication with other nodes (communication with processes on
* this node are allowed).
*
* \warning This function is not implemented and its argument list
* may change in the very near future.
*/
typedef int (*mca_pcm_base_proc_get_peers_fn_t)(mca_pcm_proc_t **procs, size_t *nprocs);
/**
* Get my entry in the peers list
*
* @retval LAM_ERR_NOT_IMPLEMENTED Function not implemented
*
* Get my entry in the peers list
*
*/
typedef mca_pcm_proc_t* (*mca_pcm_base_proc_get_me_fn_t)(void);
/**
* Get my entry in the peers list
*
* @retval LAM_ERR_NOT_IMPLEMENTED Function not implemented
*
* Get my entry in the peers list
*
* \warning This function is not implemented and its argument list
* will obviously change in the very near future.
*/
typedef int (*mca_pcm_base_proc_get_parent_fn_t)(void);
typedef int (*mca_pcm_base_finalize_fn_t)(void);
/*
* Ver 1.0.0
*/
struct mca_pcm_base_module_1_0_0_t {
mca_base_module_t pcmm_version;
mca_base_module_data_1_0_0_t pcmm_data;
mca_pcm_base_init_fn_t pcmm_init;
mca_pcm_base_finalize_fn_t pcmm_finalize;
};
typedef struct mca_pcm_base_module_1_0_0_t mca_pcm_base_module_1_0_0_t;
struct mca_pcm_1_0_0_t {
mca_pcm_base_query_get_nodes_fn_t pcm_query_get_nodes;
mca_pcm_base_handle_new_fn_t pcm_handle_new;
mca_pcm_base_handle_get_fn_t pcm_handle_get;
mca_pcm_base_handle_free_fn_t pcm_handle_free;
mca_pcm_base_job_can_spawn_fn_t pcm_job_can_spawn;
mca_pcm_base_job_set_arguments_fn_t pcm_job_set_arguments;
mca_pcm_base_job_launch_procs_fn_t pcm_job_launch_procs;
mca_pcm_base_job_rendezvous_fn_t pcm_job_rendezvous;
mca_pcm_base_job_wait_fn_t pcm_job_wait;
mca_pcm_base_job_running_fn_t pcm_job_running;
mca_pcm_base_job_list_running_fn_t pcm_job_list_running;
mca_pcm_base_proc_startup_fn_t pcm_proc_startup;
mca_pcm_base_proc_get_peers_fn_t pcm_proc_get_peers;
mca_pcm_base_proc_get_me_fn_t pcm_proc_get_me;
mca_pcm_base_proc_get_parent_fn_t pcm_proc_get_parent;
};
typedef struct mca_pcm_1_0_0_t mca_pcm_1_0_0_t;
/*
* Macro for use in modules that are of type pcm v1.0.0
*/
#define MCA_PCM_BASE_VERSION_1_0_0 \
/* pcm v1.0 is chained to MCA v1.0 */ \
MCA_BASE_VERSION_1_0_0, \
/* pcm v1.0 */ \
"pcm", 1, 0, 0
typedef mca_pcm_base_module_1_0_0_t mca_pcm_base_module_t;
typedef mca_pcm_1_0_0_t mca_pcm_t;
/*
* Global functions for MCA overall collective open and close
*/
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
int mca_pcm_base_open(void);
int mca_pcm_base_select(bool *allow_multi_user_threads,
bool *have_hidden_threads);
int mca_pcm_base_close(void);
bool mca_pcm_base_is_checkpointable(void);
int mca_pcm_base_checkpoint(void);
int mca_pcm_base_continue(void);
int mca_pcm_base_restart(void);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
/*
* Macros
*/
/*
* Globals
*/
extern int mca_pcm_base_output;
extern lam_list_t mca_pcm_base_modules_available;
extern mca_pcm_base_module_t mca_pcm_base_selected_module;
extern mca_pcm_t mca_pcm;
#endif