1
1

* Remove the lam/runtime code, moving instead to singleton mca modules

directly
* Update pcm, oob, and registry mca module headers to match changes Jeff
  and I talked about tonight to do checkpoint/restart and all that

This commit was SVN r197.
Этот коммит содержится в:
Brian Barrett 2004-01-10 08:09:54 +00:00
родитель 8094a048a2
Коммит 0d6e729e0b
5 изменённых файлов: 574 добавлений и 15 удалений

Просмотреть файл

@ -397,7 +397,6 @@ AC_CONFIG_FILES([
src/lam/ctnetwork/Makefile
src/lam/lfc/Makefile
src/lam/mem/Makefile
src/lam/runtime/Makefile
src/lam/os/Makefile
src/lam/os/cygwin/Makefile

Просмотреть файл

@ -5,7 +5,7 @@
include $(top_srcdir)/config/Makefile.options
SUBDIRS = ctnetwork lfc mem os threads util runtime
SUBDIRS = ctnetwork lfc mem os threads util
# If the --enable-single-library flag was given to configure, then the
# user wants to merge liblam and libmpi into a single big, honkin'

Просмотреть файл

@ -3,16 +3,61 @@
* $HEADER$
*/
/**
* \brief Out of Band Messaging Interface
*
* LAM/MPI provides a simple point-to-point tagged messaging system
* intended for out-of-band communication. This interface should be
* used minimally in general LAM code and should not be used
* explicitly in the MPI layer. Not all run-time environments provide
* a sufficient out-of-band messaging system, so some environments may
* choose not to implement this interface, at the cost of reduced
* functionality.
*
* This interface can be brought up as soon as the process control
* interface is initiated. The process control interface is not
* strictly required, but it is unclear how one could determine the
* processes' parallel_job_id and vpid without asking the process
* control interface. It should not be a requirement of this
* interface that MPI exist or be properly initialized for a send to
* complete. One can possibly envision using a ptl progression engine
* for out-of-band messaging, but it must not put any MPI requirements
* on the interface.
*
* The out-of-band messaging interface is actually implemented through
* the lam/oob mca module - details of a particular implementation
* will be found there.
*/
#ifndef MCA_OOB_H_
#define MCA_OOB_H_
#include "lam_config.h"
#include "lam/runtime/oob.h"
#include "mca/mca.h"
#include <sys/types.h>
/*
* Global constants / types
*/
/* "Special" tags */
#define MCA_OOB_ANY_TAG -1
#define MCA_OOB_REGISTRY_TAG -2
/* "Special" vpids */
#define MCA_OOB_MPIRUN -1
typedef void (*mca_oob_recv_cb_t)(char* parallel_job_id, int tag,
int vpid, void* data, size_t data_len, int status);
/*
* Functions every module instance will have to provide
*/
typedef int (*mca_oob_query_fn_t)(int *priority);
typedef int (*mca_oob_init_fn_t)(char* parallel_job_id, int vpid);
typedef struct mca_oob_1_0_0* (*mca_oob_init_fn_t)(void);
typedef int (*mca_oob_send_fn_t)(char* parallel_job_id, int vpid, int tag,
void* data, size_t data_len);
typedef int (*mca_oob_recv_fn_t)(char* parallel_job_id, int* tag, int* vpid,
@ -20,26 +65,56 @@ typedef int (*mca_oob_recv_fn_t)(char* parallel_job_id, int* tag, int* vpid,
typedef int (*mca_oob_recv_nb_fn_t)(char* parallel_job_id, int* tag, int* vpid,
void** data, size_t* data_len);
typedef int (*mca_oob_recv_cb_fn_t)(char* parallel_job_id, int tag,
lam_oob_recv_cb_t callback);
mca_oob_recv_cb_t callback);
typedef int (*mca_oob_finalize_fn_t)(void);
/*
* Ver 1.0.0
*/
typedef struct mca_oob_module_1_0_0 {
mca_1_0_0_t super;
mca_module_1_0_0_t super;
mca_oob_query_fn_t oobm_query;
mca_oob_init_fn_t oobm_init;
mca_oob_finalize_fn_t oob_finalize;
} mca_oob_module_1_0_0_t;
typedef struct mca_oob_1_0_0 {
mca_1_0_0_t super;
mca_oob_send_fn_t oob_send;
mca_oob_recv_fn_t oob_recv;
mca_oob_recv_nb_fn_t oob_recv_nb;
mca_oob_recv_cb_fn_t oob_recv_cb;
} mca_oob_module_1_0_0_t;
} mca_oob_1_0_0_t;
typedef mca_oob_module_1_0_0_t mca_oob_module_t;
typedef mca_oob_1_0_0_t mca_oob_t;
/*
* Global functions for MCA overall collective open and close
*/
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
int mca_oob_base_open(lam_cmd_line_t *cmd);
int mca_oob_base_close(void);
bool mca_oob_base_is_checkpointable(void)
int mca_oob_base_checkpoint(void);
int mca_oob_base_continue(void);
int mca_oob_base_restart(void);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
/*
* Global struct holding the selected module's function pointers
*/
extern mca_oob_t mca_oob;
#endif

Просмотреть файл

@ -3,6 +3,53 @@
* $HEADER$
*/
/**
* \brief LAM/MPI Interface for Parallel Job & Process Control (pcm)
*
* LAM/MPI assumes it is running under a fully operational parallel
* run-time environment (RTE). This environment may be provided by
* batch schedulers such as PBS and LSF, single system image tools
* such as bproc, or specially designed MPI control daemons (the MPICH
* mpd or the included LAM daemon). The functionality provided
* through the process control interface is dependant on the support
* of the underlying infrastructure. For example, lam_pcm_spawn
* (essentially, the "go do it" part of MPI_COMM_SPAWN) is not
* available for jobs running under the Qadrics/RMS RTE. The LAM
* daemons will always provide the complete pcm interface.
*
* Like the other LAM run-time interfaces, the pcm interface is
* implemented through mca modules (lam/pcm). For details on the
* capabilities of a particular module, please see the individual
* module's documentation.
*
* A run-time environment suitable for use by LAM/MPI must provide the
* following capabilities:
*
* - Remote process startup at job-start time with the ability to:
* - push an environment (or a large chunk of an environment) to the started process
* - redirect the stdout and stderr of the process to either a file (batch
* schedulers) or the mpirun application (LAM daemons) without interaction from the
* started process
* - A working registry interface
* - A "unique" job id for each parallel job
* - The ability to "clean up" after a job when provided with that job id
* - The ability to receive UNIX wait-style notification of parallel job termination
*
* A run-time environment should proivde the following capabilities if supported:
* - Remote process spawning for MPI_SPAWN and friends
* - Fine-grained control over process cleanup (for example, only do final cleanup
* of resources when all applications are unavailable, kill on first death, kill on
* the 3rd process exit, etc.)
*
* The pcm interface is responsible for ensuring that each process
* started is capable of performing peer discovery during MPI_INIT.
* It is intended that mpirun will be actively calling into the pcm
* interface so that mpirun can be used as a redezvous point. Using
* mpirun is certainly not required, but it is anticipated this will
* be a common module design.
*
*/
#ifndef MCA_PCM_H_
#define MCA_PCM_H_
@ -10,27 +57,367 @@
#include "mca/mca.h"
/*
* "PCM" global types
*/
#define LAM_PCM_PROC_MPIRUN 0
#define LAM_PCM_PROC_MPIAPP 1
#define LAM_PCM_PROC_OTHER 2
typedef struct lam_pcm_node {
int32_t node_num;
int32_t num_procs;
} lam_pcm_node_t;
typedef struct lam_pcm_control_args {
char* request;
char* value;
} lam_pcm_control_args_t;
typedef char* lam_pcm_job_handle_t;
/*
* functions every module must provide
*/
typedef int (*mca_pcm_query_fn_t)(int *priority);
typedef int (*mca_pcm_init_fn_t)(char* parallel_job_id, int vpid);
typedef struct mca_pcm_1_0_0* (*mca_pcm_init_fn_t)(void);
/**
* \func lam_pcm_query_get_nodes
*
* Get list of nodes available for execution
*
* @param nodes Pointer that will contain array of nodes available
* @param nodes_len Length of nodes array
* @param available_procs Number of available processors in the RTE
*
* @retval LAM_SUCCESS success
* @retval LAM_NOT_SUPPORTED Not available
*
* Obtain a list of nodes available for execution. No promises are
* made that such information is available - for some environments
* (Quadrics/RMS, etc) nothing is really known about the environment
* until you ask for it. If a node/host mapping is unavailable,
* *nodes will be NULL and nodes_len will be 0. If the total number
* of available processors for MPI applications is not available, it
* will be set to -1 and the function will return "Not available".
* In the case where both are available, available_procs will be
* equal to the sum of nodes[0...n].num_procs.
*/
typedef int (*mca_pcm_query_get_nodes)(lam_pcm_node_t **nodes, size_t *nodes_len,
int available_procs);
/**
* Get new parallel job handle
*
* @param parent Parent job handle (NULL if not in parallel job)
*
* @retval NULL failure
* @retval non-NULL sucess
*
* The run-time environment tracks MPI applications through a
* parallel job handle, which is a char* string. This string can be
* used to request information about the status of a currently
* running job, kill the job, etc.
*
* The parent parameter allows the run-time system to provide a
* process tree (spawn, etc.) if the user really wants such
* information. For mpirun, it should just be NULL.
*
* \warning The handle must be released using lam_pcm_handle_free
*/
typedef lam_pcm_job_handle_t (*mca_pcm_handle_new_fn_t)(lam_pcm_job_handle_t parent);
/**
* Get my parallel job handle
*
* @retval NULL failure - environment not properly initialized
* @retval non-NULL success
*
* Return the parallel job handle for the currently running process
*
* \warning The handle must be released using lam_pcm_handle_free
*/
typedef lam_pcm_job_handle_t (*mca_pcm_handle_get_fn_t)(void);
/**
* Free a job handle
*
* @param job_handle Poiner to a lam_pcm_job_handle_t
*
* Free a job handle returned by lam_pcm_handle_new or
* lam_pcm_handle_get.
*/
typedef void (*mca_pcm_handle_free_fn_t)(lam_pcm_job_handle_t *job_handle);
/**
* Ask if mca module can spawn processes
*
* @param job_handle Parallel job handle of running process
*
* @retval LAM_SUCCESS LAM can spawn more jobs
* @retval LAM_NOT_SUPPORTED LAM can not spawn more jobs
*
* Ask the currently running mca module for the runtime environment
* if it supports spawning more processes. This question should
* always return LAM_SUCCESS (yes) if called from mpirun. Useful
* for asking if MPI_SPAWN and friends can run.
*/
typedef int (*mca_pcm_job_can_sapwn_fn_t)(lam_pcm_job_handle_t job_handle);
/**
* Configure arguments for the parallel job to be started
*
* @param job_handle Parallel job handle to configure
* @param opts Array of key=value structures requesting job behaviour
* @param opts_len Length of opts array
*
* @retval LAM_SUCCESS Sucess
* @retval LAM_ERROR Unkonwn failure
*
* Configure the job using key=value arguments. The meanings of the
* arguments are up to the specific mca module providing run-time support.
*
* Common key values will be provided here once MCAs begin to use
* this function. The only existing module no-ops this entire
* function.
*
* \Warning It is an error to call this function more than once on a single
* job handle.
*/
typedef int (*mca_pcm_job_set_arguments_fn_t)(lam_pcm_job_handle_t job_handle,
lam_pcm_control_args_t* opts,
size_t opts_len);
/**
* Launch processes across parallel job
*
* @param job_handle Parallel job handle within which to launch processes
* @param nodes Array of nodes structures describing targets of launch
* @param nodes_len Length of nodes
* @param file Process to laucnh (does not have to be equal to argv[0])
* @param argc Length of argv
* @param argv Argv array for launched processes
* @param env Environment array for launched process. See note below
*
* @retval LAM_SUCCESS Success
* @retval LAM_ERR_RESOURCE_BUSY Try again real soon now
* @retval LAM_ERR_NOT_SUPPORTED non-MPIRUN process can not spawn jobs
* @retval LAM_FAILURE Unkonwn failure
*
* Launch num_procs nodes[?].processes on nodes[?].node_num for each
* nodes entry, as part of job_handle's job. The env array should
* contain any environment variables that should be pushed to the
* remote processes. The mca may provide a more detailed
* environment if necessary (bporc, etc.).
*
* LAM_ERR_NOT_SUPPORTED will be returned if the mca module does not
* support spawning of new applications from
*/
typedef int (*mca_pcm_job_launch_procs_fn_t)(lam_pcm_job_handle_t job_handle,
lam_pcm_node_t *nodes,
size_t nodes_len, const char* file,
int argc, const char* argv[],
const char *env[]);
/**
* Do rendezvous duties after launching parallel job
*
* @param job_handle Parallel job handle to run through startup
*
* @retval LAM_SUCCESS Success
* @retval LAM_FAILURE Unknown failure
*
* Do the civic duties required to complete the rendezvous part of
* the startup protocol. After this, the MPI application should
* know who all its neighbors are. It is, of course, completely
* possible that the MCA module has been in the background doing
* this all along and didn't bother to tell you. When this function
* returns, it is safe to assume that all rendezvous is complete
* (ie, you can exit and not mess anything up
*/
typedef int (*mca_pcm_job_rendezvous_fn_t)(lam_pcm_job_handle_t job_handle);
/**
* Wait for job completion
*
* @param job_handle Parallel job handle to wait on
*
* @retval LAM_SUCCESS Success
* @retval LAM_ERR_INTERUPTED Interupted (due to signal, etc.)
*
* The LAM parallel version of "wait". It is not required to wait
* on a job at termination, as job results will be expunged over
* time as resource limits dictate.
*/
typedef int (*mca_pcm_job_wait_fn_t)(lam_pcm_job_handle_t job_handle);
/**
* Request job status
*
* @param job_handle Parallel job handle to query
* @param running Job is running, if true
*
* @retval LAM_SUCCESS Success
* @retval LAM_ERR_BAD_PARAM Invalid job handle
*
* Ask if job is running. If job has recently finished, this does
* not imply wait the pcm interface will call wait for you.
*/
typedef int (*mca_pcm_job_running_fn_t)(lam_pcm_job_handle_t job_handle,
int* running);
/**
* Request list of job handles running in current environment
*
* @param handles Pointer to job handles array
* @param handles_len length of handles array
*
* @retval LAM_ERR_NOT_IMPLEMENTED Not implemented
*
* Query the environment about currently running jobs. Intended for
* applications outside MPI and mpirun, to be user friendly and all
* those things. mca modules are not required to support this function.
*
* \warning This function is not yet implemented.
*/
typedef int (*mca_pcm_job_list_running_fn_t)(lam_pcm_job_handle_t **handles,
size_t handles_len);
/**
* Do process startup code
*
* @retval LAM_SUCCESS Success
* @retval LAM_ERR_FATAL Fatal error occurred
* @retval LAM_ERROR Unkonwn failure
*
* Do all communication work required to get peer list and establish
* the out of band communictaion mechanism. If a pcm interface uses
* fork()/exec() to start other processes on the current node, it
* should do so and complete all rendezvous before returning from
* this function.
*
* The mca module is free to start the oob interface as soon as it
* as provided the oob interface enough information to do so (tight
* integration with the oob mca module is probably required to meet
* this constraint).
*/
typedef int (*mca_pcm_proc_startup_fn_t)(void);
/**
* Get peers list
*
* @retval LAM_ERR_NOT_IMPLEMENTED Function not implemented
*
* Get list of peers in the parallel job. Should not require any
* communication with other nodes (communication with processes on
* this node are allowed).
*
* \warning This function is not implemented and its argument list
* will obviously change in the very near future.
*/
typedef int (*mca_pcm_proc_get_peers_fn_t)(void);
/**
* Get my entry in the peers list
*
* @retval LAM_ERR_NOT_IMPLEMENTED Function not implemented
*
* Get my entry in the peers list
*
* \warning This function is not implemented and its argument list
* will obviously change in the very near future.
*/
typedef int (*mca_pcm_proc_get_me_fn_t)(void);
/**
* Get my entry in the peers list
*
* @retval LAM_ERR_NOT_IMPLEMENTED Function not implemented
*
* Get my entry in the peers list
*
* \warning This function is not implemented and its argument list
* will obviously change in the very near future.
*/
typedef int (*mca_pcm_proc_get_parent_fn_t)(void);
typedef int (*mca_pcm_finalize_fn_t)(void);
/*
* Ver 1.0.0
*/
typedef struct mca_pcm_module_1_0_0 {
mca_1_0_0_t super;
mca_module_1_0_0_t super;
mca_pcm_query_fn_t pcmm_query;
mca_pcm_init_fn_t pcmm_init;
mca_pcm_finalize_fn_t pcmm_finalize;
} mca_pcm_module_1_0_0_t;
typedef struct mca_pcm_1_0_0 {
mca_pcm_publish_fn_t pcm_publish;
mca_pcm_lookup_fn_t pcm_lookup;
mca_pcm_finalize_fn_t pcm_finalize;
mca_1_0_0_t super;
mca_pcm_query_get_nodes_fn_t pcm_query_get_nodes;
mca_pcm_handle_new_fn_t pcm_handle_new;
mca_pcm_handle_get_fn_t pcm_handle_get;
mca_pcm_handle_free_fn_t pcm_handle_free;
mca_pcm_job_can_spwan_fn_t pcm_job_can_spawn;
mca_pcm_job_set_arguments_fn_t pcm_job_set_arguments;
mca_pcm_job_launch_procs_fn_t pcm_job_launch_procs;
mca_pcm_job_rendezvous_fn_t pcm_job_rendezvous;
mca_pcm_job_wait_fn_t pcm_job_wait;
mca_pcm_job_running_fn_t pcm_job_running;
mca_pcm_job_list_running_fn_t pcm_job_list_running;
mca_pcm_proc_startup_fn_t pcm_proc_startup;
mca_pcm_proc_get_peers_fn_t pcm_proc_get_peers;
mca_pcm_proc_get_me_fn_t pcm_proc_get_me;
mca_pcm_proc_get_parent_fn_t pcm_proc_get_parent;
} mca_pcm_module_1_0_0_t;
typedef mca_pcm_module_1_0_0_t mca_pcm_module_t;
typedef mca_pcm_1_0_0_t mca_pcm_t;
/*
* Global functions for MCA overall collective open and close
*/
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
int mca_pcm_base_open(lam_cmd_line_t *cmd);
int mca_pcm_base_close(void);
bool mca_pcm_base_is_checkpointable(void)
int mca_pcm_base_checkpoint(void);
int mca_pcm_base_continue(void);
int mca_pcm_base_restart(void);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
/*
* Global struct holding the selected module's function pointers
*/
extern mca_pcm_t mca_pcm;
#endif

Просмотреть файл

@ -3,6 +3,27 @@
* $HEADER$
*/
/**
* \brief Publish/Subscribe-style global registry database infrastructure
*
* LAM/MPI provides a global publish/subscribe-style registry database
* for use in both LAM and MPI layers. Data is stored in a flat
* key=value style database; keys are of type char* and value of type
* void*. No endian correction is performed on the data.
*
* The registry is implemented as an mca module, using the services
* provided by the current run-time environment. In environments with
* limited native out of band message passing and global registry
* infrastructure, this data may be stored in the MPI applications
* themselves. Care should therefore be used when storing potentially
* large amounts of data in the global registry.
*
* Locality of stored data is unspecified and unknown to the calling
* application. The underlying mca module is free to store data
* wherever practical. A high quality implementation will provide
* replication in case of process failure.
*/
#ifndef MCA_REGISTRY_H_
#define MCA_REGISTRY_H_
@ -10,29 +31,106 @@
#include "mca/mca.h"
/*
* Functions every module instance will have to provide
*/
typedef int (*mca_registry_query_fn_t)(int *priority);
typedef int (*mca_registry_init_fn_t)(char* parallel_job_id, int vpid);
typedef struct mca_registry_1_0_0* (*mca_registry_init_fn_t)(void);
/**
* Publish a key=value piece of information
*
* @param key Character string containing the key
* @param data Pointer to value data
* @param data_len Length of data buffer
*
* @retval 0 Update succeeded
* @retval
*
* Add key=value pair to the global registry. Will overwrite any
* existing data for the specified key (if it is already in use).
* Atomicity of the publish is guaranteed - the registry does not
* have to be locked for a publish to occur safely.
*
* \warning May block if the registry entry for key is currently
* locked by another process.
*/
typedef int (*mca_registry_publish_fn_t)(char* key, void* data, size_t data_len);
/**
* Get the value for given key
*
* @param key String containing key to search on
* @param data Pointer to a void* pointer to store data
* @param datalen Pointer to size_t containing length of data
*
* @retval 0 Key was found and data successfully obtained
* @retval ENOMATCH No such key was found in the database
* @retval ENOMEM Could not allocate enough memory for data
*
* Search for the given key, downloading the corresponding value
* into the pointer *data if the key exists. *data will point to
* the data buffer and data_len will contain the buffer length if
* the value could be obtained. On error, *data will be NULL and
* data_len will be 0.
*
* \warning Returned buffer was allocated via lam_malloc and must be
* freed by the caller using lam_free.
*
* \warning May block if the registry entry for key is currently
* locked by another process.
*/
typedef int (*mca_registry_lookup_fn_t)(char* key, void** data, size_t* data_len);
typedef int (*mca_registry_finalize_fn_t)(void);
/*
* Ver 1.0.0
*/
typedef struct mca_registry_module_1_0_0 {
mca_1_0_0_t super;
mca_module_1_0_0_t super;
mca_registry_query_fn_t registry_m_query;
mca_registry_init_fn_t registry_m_init;
mca_registry_finalize_fn_t registry_m_finalize;
} mca_registry_module_1_0_0_t;
typedef struct mca_registry_1_0_0 {
mca_1_0_0_t super;
mca_registry_publish_fn_t registry_publish;
mca_registry_lookup_fn_t registry_lookup;
mca_registry_finalize_fn_t registry_finalize;
} mca_registry_module_1_0_0_t;
typedef mca_registry_module_1_0_0_t mca_registry_module_t;
typedef mca_registry_1_0_0_t mca_registry_t;
/*
* Global functions for MCA overall collective open and close
*/
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
int mca_registry_base_open(lam_cmd_line_t *cmd);
int mca_registry_base_close(void);
bool mca_registry_base_is_checkpointable(void)
int mca_registry_base_checkpoint(void);
int mca_registry_base_continue(void);
int mca_registry_base_restart(void);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
/*
* Global struct holding the selected module's function pointers
*/
extern mca_registry_t mca_registry;
#endif