/* -*- C -*-
*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* The Open RTE Process Launch Subsystem
*
* The process launch subsystem (PLS) is responsible for actually
* launching a specified application's processes across the indicated
* resource. The PLS is invoked by the controlling program (mpirun or
* whatever) after the resource discovery, allocation, and mapping
* subsystems have performed their work. Thus, the PLS can assume that
* certain data structures have been created, and that some data MAY
* be present - the PLS must also be capable of appropriately dealing
* with situations where earlier subsystems may not have access to
* complete information. For example, while the discovery subsystem
* (RDS) will provide information on the launcher used by a particular
* resource, that information may NOT have been provided and hence may
* not be available when the PLS is invoked. Thus, the PLS components
* must include the ability to sense their environment where
* necessary.
*
* The PLS obtains its input information from several sources:
*
* - the ORTE_JOB_SEGMENT of the registry. Information on this segment
* includes: the application to be executed; the number of processes
* of each application to be run; the context (argv and enviro arrays)
* for each process.
*
* - the ORTE_RESOURCE_SEGMENT of the registry. This includes:
* identification of the launcher to be used on the indicated
* resource; location of temporary directory and other filesystem
* directory locations;
*
* - MCA parameters. This includes any directive from the user as to
* the launcher to be used and/or its configuration.
*
* The PLS uses this information to launch the processes upon the
* indicated resource(s). PLS components are free to ignore
* information that is not pertinent to their operation. For example,
* although the user may have specified a particular mapping of
* process to nodename, a PLS launching the application on a resource
* that does not permit such specifications would ignore the
* corresponding information that the mapper placed on the registry -
* it is irrelevant to that launcher's operation (although a warning
* to the user, in this case, might be appropriate).
*
* The PLS is tightly coupled to the PLSNDS - the PLS name discovery
* service - that each process uses to "discover" its official
* name. Each PLS MUST:
*
* - set the MCA parameter "pls_base_nds" to indicate the which name
* discoverty service should be used on the remote side to discover
* the process' name. The contents of the MCA parameter should be one
* of the string names in the PLSNDS (currently, this is hard-coded in
* plsnds_open_close.c -- see below -- but someday it will likely turn
* into another framework/set of components).
*
* - have a corresponding entry in the orte_plsnds table (defined in
* src/plsnds/plsnds_open_close.c) that identifies the NDS its
* associated function for obtaining the process name.
*
* - where necessary, provide a function in the orte_plsnds directory
* that can define the process name from whatever info that
* corresponding launcher provided
*
* More information on the requirements for the PLSNDS can be found in
* the header file src/plsnds/plsnds.h.
*
* Unless otherwise directed by the user and/or the system
* configuration, the PLS will utilize a daemon-based launch to
* maximize the availability of ORTE services. To accomplish this, the
* resource manager (RMGR) subsystem must support both the detection
* of daemon existence and the ability to execute a two-step launch
* sequence (with the first step being daemon launch, followed by the
* secondary application launch). In turn, the PLS must provide a
* component with the ability to launch via an existing daemon.
*
* NOTE: The RMGR may override local launcher specification to utilize
* the daemon-based launch component - it is expected that the daemons
* in the local environment will know how to launch in that
* environment. It is vital, therefore, that the PLS components NOT be
* directly called by any ORTE function - instead, all PLS
* functionality is to be accessed via the RMGR.
*
* As part of the launch procedure, PLS components must provide the
* following capabilities:
*
* - set the "pls_base_nds" MCA parameter indicating which NDS is to
* be used. This information is subsequently used by the name
* discovery service to determine a process' official name, as
* described above.
*
* - setup I/O forwarding for all processes (where possible). Some
* environments will, of course, not support this capability or will
* provide it natively. Those respective PLS components should behave
* accordingly. In other cases, however, the PLS component should
* establish the I/O forwarding interconnects and enable that
* subsystem.
*
*
* Since I/O forwarding is still under develpoment, this is not yet
* well-defined.
*
*
* - pass context info to each process. The argv and enviro arrays are
* stored on the registry by the resource allocation subsystem (RAS) -
* this includes any process- specific deviations from the
* application's general overall context. The PLS should obtain this
* information from the registry and pass the context along to each
* process.
*
* - utilize scalable launch methods (where possible). In environments
* that allow it, PLS components should utilize methods that support
* scalable launch of applications involving large numbers of
* processes.
*
* - detect that required libraries are present on involved compute
* nodes. This is a secondary feature for future implementations.
*
* - preposition files and libraries where required and possible. This
* is a secondary feature for future implementations.
*
* When launching an application, the PLS shall update the registry
* with information on batch jobid, assigned jobname, etc. that may
* have been provided by the local resource's launcher. This
* information is stored on the registry's ORTE_JOB_SEGMENT in the
* "global" container. In addition, any information relevant to
* state-of-health monitoring (e.g., sockets opened to an application
* process by a spawning daemon to detect completion of process
* startup) should be stored on the ORTE_JOB_SEGMENT in the respective
* process' container.
*
* Once a process is launched, two options exist for subsequent
* operations:
*
* - if it is an ORTE process (i.e., one that calls orte_init), the
* process will register itself on the ORTE_JOB_SEGMENT of the
* registry. This includes providing information on the nodename where
* the process is located, contact information for the runtime message
* library (RML) and other subsystems, local pid, etc.
*
* - if it is NOT an ORTE process, then registration will not take
* place. In this case, the ability to subsequently monitor the
* progress/state-of-health of the process and/or provide other
* services *may* be limited. The PLS has no further responsibilities
* for such processes.
*
* Once the PLS has completed launch of the application, it notifies
* the state-of-health (SOH) monitor that a jobid has been launched
* and is now available for monitoring. It is the SOH's
* responsibility to determine the level of monitoring that can be
* provided, and to notify the rest of the ORTE system of process
* failures/problems.
*
*
* Still to be defined:
*
* - Need to add a "kill process" module API function
*
* - If a PLS fails during a job launch, it should call the errmanager
* which will tell it what to do (abort, kill all those already
* launched and abort, continue, etc.).
*
*/
#ifndef ORTE_MCA_PLS_H
#define ORTE_MCA_PLS_H
#include "orte_config.h"
#include "opal/mca/mca.h"
#include "orte/mca/ns/ns_types.h"
#include "opal/class/opal_list.h"
/*
* pls module functions
*/
/**
* Launch the indicated jobid
*/
typedef int (*orte_pls_base_module_launch_job_fn_t)(orte_jobid_t);
/**
* Terminate any processes launched for the respective jobid by
* this component.
*/
typedef int (*orte_pls_base_module_terminate_job_fn_t)(orte_jobid_t, struct timeval *timeout, opal_list_t *attrs);
/**
* Terminate the daemons associated with this jobid
*/
typedef int (*orte_pls_base_module_terminate_orteds_fn_t)(struct timeval *timeout, opal_list_t *attrs);
/**
* Terminate a specific process.
*/
typedef int (*orte_pls_base_module_terminate_proc_fn_t)(const orte_process_name_t*);
/**
* Signal any processes launched for the respective jobid by
* this component.
*/
typedef int (*orte_pls_base_module_signal_job_fn_t)(orte_jobid_t, int32_t, opal_list_t *attrs);
/**
* Signal a specific process.
*/
typedef int (*orte_pls_base_module_signal_proc_fn_t)(const orte_process_name_t*, int32_t);
/**
* Cancel an ongoing operation involving communication to the orteds
*/
typedef int (*orte_pls_base_module_cancel_operation_fn_t)(void);
/**
* Cleanup all resources held by the module
*/
typedef int (*orte_pls_base_module_finalize_fn_t)(void);
/**
* pls module version 1.3.0
*/
struct orte_pls_base_module_1_3_0_t {
orte_pls_base_module_launch_job_fn_t launch_job;
orte_pls_base_module_terminate_job_fn_t terminate_job;
orte_pls_base_module_terminate_orteds_fn_t terminate_orteds;
orte_pls_base_module_terminate_proc_fn_t terminate_proc;
orte_pls_base_module_signal_job_fn_t signal_job;
orte_pls_base_module_signal_proc_fn_t signal_proc;
orte_pls_base_module_cancel_operation_fn_t cancel_operation;
orte_pls_base_module_finalize_fn_t finalize;
};
/** shorten orte_pls_base_module_1_3_0_t declaration */
typedef struct orte_pls_base_module_1_3_0_t orte_pls_base_module_1_3_0_t;
/** shorten orte_pls_base_module_t declaration */
typedef struct orte_pls_base_module_1_3_0_t orte_pls_base_module_t;
/**
* pls initialization function
*
* Called by the MCA framework to initialize the component. Invoked
* exactly once per process.
*
* @param priority (OUT) Relative priority or ranking use by MCA to
* select a module.
*/
typedef struct orte_pls_base_module_1_3_0_t*
(*orte_pls_base_component_init_fn_t)(int *priority);
/**
* pls component v1.3.0
*/
struct orte_pls_base_component_1_3_0_t {
/** component version */
mca_base_component_t pls_version;
/** component data */
mca_base_component_data_1_0_0_t pls_data;
/** Function called when component is initialized */
orte_pls_base_component_init_fn_t pls_init;
};
/** Convenience typedef */
typedef struct orte_pls_base_component_1_3_0_t orte_pls_base_component_1_3_0_t;
/** Convenience typedef */
typedef orte_pls_base_component_1_3_0_t orte_pls_base_component_t;
/**
* Macro for use in modules that are of type pls v1.0.0
*/
#define ORTE_PLS_BASE_VERSION_1_3_0 \
/* pls v1.3 is chained to MCA v1.0 */ \
MCA_BASE_VERSION_1_0_0, \
/* pls v1.3 */ \
"pls", 1, 3, 0
/* Global structure for accessing PLS functions
*/
ORTE_DECLSPEC extern orte_pls_base_module_t orte_pls; /* holds selected module's function pointers */
#endif /* MCA_PLS_H */