1
1
openmpi/opal/mca/pmix/pmix_server.h
Ralph Castain 0434b615b5 Update ORTE to support PMIx v3
This is a point-in-time update that includes support for several new PMIx features, mostly focused on debuggers and "instant on":

* initial prototype support for PMIx-based debuggers. For the moment, this is restricted to using the DVM. Supports direct launch of apps under debugger control, and indirect launch using prun as the intermediate launcher. Includes ability for debuggers to control the environment of both the launcher and the spawned app procs. Work continues on completing support for indirect launch

* IO forwarding for tools. Output of apps launched under tool control is directed to the tool and output there - includes support for XML formatting and output to files. Stdin can be forwarded from the tool to apps, but this hasn't been implemented in ORTE yet.

* Fabric integration for "instant on". Enable collection of network "blobs" to be delivered to network libraries on compute nodes prior to local proc spawn. Infrastructure is in place - implementation will come later.

* Harvesting and forwarding of envars. Enable network plugins to harvest envars and include them in the launch msg for setting the environment prior to local proc spawn. Currently, only OmniPath is supported. PMIx MCA params control which envars are included, and also allows envars to be excluded.

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
2018-03-02 02:00:31 -08:00

287 строки
16 KiB
C

/*
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OPAL_PMIX_SERVER_H
#define OPAL_PMIX_SERVER_H
#include "opal_config.h"
#include "opal/types.h"
#include "opal/mca/pmix/pmix_types.h"
BEGIN_C_DECLS
/**** SERVER FUNCTION-SHIPPED APIs ****/
/* NOTE: for performance purposes, the host server is required to
* return as quickly as possible from all functions. Execution of
* the function is thus to be done asynchronously so as to allow
* the server support library to handle multiple client requests
* as quickly and scalably as possible.
*
* ALL data passed to the host server functions is "owned" by the
* server support library and MUST NOT be free'd. Data returned
* by the host server via callback function is owned by the host
* server, which is free to release it upon return from the callback */
/* Notify the host server that a client connected to us */
typedef int (*opal_pmix_server_client_connected_fn_t)(opal_process_name_t *proc,
void* server_object,
opal_pmix_op_cbfunc_t cbfunc,
void *cbdata);
/* Notify the host server that a client called pmix.finalize - note
* that the client will be in a blocked state until the host server
* executes the callback function, thus allowing the server support
* library to release the client */
typedef int (*opal_pmix_server_client_finalized_fn_t)(opal_process_name_t *proc, void* server_object,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
/* A local client called pmix.abort - note that the client will be in a blocked
* state until the host server executes the callback function, thus
* allowing the server support library to release the client. The
* list of procs_to_abort indicates which processes are to be terminated. A NULL
* indicates that all procs in the client's nspace are to be terminated */
typedef int (*opal_pmix_server_abort_fn_t)(opal_process_name_t *proc, void *server_object,
int status, const char msg[],
opal_list_t *procs_to_abort,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
/* At least one client called either pmix.fence or pmix.fence_nb. In either case,
* the host server will be called via a non-blocking function to execute
* the specified operation once all participating local procs have
* contributed. All processes in the specified list are required to participate
* in the fence[_nb] operation. The callback is to be executed once each daemon
* hosting at least one participant has called the host server's fencenb function.
*
* The list of opal_value_t includes any directives from the user regarding
* how the fence is to be executed (e.g., timeout limits).
*
* The provided data is to be collectively shared with all host
* servers involved in the fence operation, and returned in the modex
* cbfunc. A _NULL_ data value indicates that the local procs had
* no data to contribute */
typedef int (*opal_pmix_server_fencenb_fn_t)(opal_list_t *procs, opal_list_t *info,
char *data, size_t ndata,
opal_pmix_modex_cbfunc_t cbfunc, void *cbdata);
/* Used by the PMIx server to request its local host contact the
* PMIx server on the remote node that hosts the specified proc to
* obtain and return a direct modex blob for that proc
*
* The list of opal_value_t includes any directives from the user regarding
* how the operation is to be executed (e.g., timeout limits).
*/
typedef int (*opal_pmix_server_dmodex_req_fn_t)(opal_process_name_t *proc, opal_list_t *info,
opal_pmix_modex_cbfunc_t cbfunc, void *cbdata);
/* Publish data per the PMIx API specification. The callback is to be executed
* upon completion of the operation. The host server is not required to guarantee
* support for the requested scope - i.e., the server does not need to return an
* error if the data store doesn't support scope-based isolation. However, the
* server must return an error (a) if the key is duplicative within the storage
* scope, and (b) if the server does not allow overwriting of published info by
* the original publisher - it is left to the discretion of the host server to
* allow info-key-based flags to modify this behavior. The persist flag indicates
* how long the server should retain the data. The nspace/rank of the publishing
* process is also provided and is expected to be returned on any subsequent
* lookup request */
typedef int (*opal_pmix_server_publish_fn_t)(opal_process_name_t *proc,
opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
/* Lookup published data. The host server will be passed a NULL-terminated array
* of string keys along with the scope within which the data is expected to have
* been published. The host server is not required to guarantee support for all
* PMIx-defined scopes, but should only search data stores within the specified
* scope within the context of the corresponding "publish" API. The wait flag
* indicates whether the server should wait for all data to become available
* before executing the callback function, or should callback with whatever
* data is immediately available.
*
* The list of opal_value_t includes any directives from the user regarding
* how the operation is to be executed (e.g., timeout limits, whether the
* lookup should wait until data appears).
*/
typedef int (*opal_pmix_server_lookup_fn_t)(opal_process_name_t *proc, char **keys,
opal_list_t *info,
opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata);
/* Delete data from the data store. The host server will be passed a NULL-terminated array
* of string keys along with the scope within which the data is expected to have
* been published. The callback is to be executed upon completion of the delete
* procedure */
typedef int (*opal_pmix_server_unpublish_fn_t)(opal_process_name_t *proc, char **keys,
opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
/* Spawn a set of applications/processes as per the PMIx API. Note that
* applications are not required to be MPI or any other programming model.
* Thus, the host server cannot make any assumptions as to their required
* support. The callback function is to be executed once all processes have
* been started. An error in starting any application or process in this
* request shall cause all applications and processes in the request to
* be terminated, and an error returned to the originating caller */
typedef int (*opal_pmix_server_spawn_fn_t)(opal_process_name_t *requestor,
opal_list_t *job_info, opal_list_t *apps,
opal_pmix_spawn_cbfunc_t cbfunc, void *cbdata);
/* Record the specified processes as "connected". This means that the resource
* manager should treat the failure of any process in the specified group as
* a reportable event, and take appropriate action. The callback function is
* to be called once all participating processes have called connect. Note that
* a process can only engage in *one* connect operation involving the identical
* set of procs at a time. However, a process *can* be simultaneously engaged
* in multiple connect operations, each involving a different set of procs
*
* The list of opal_value_t includes any directives from the user regarding
* how the operation is to be executed (e.g., timeout limits).
*/
typedef int (*opal_pmix_server_connect_fn_t)(opal_list_t *procs, opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
/* Disconnect a previously connected set of processes. An error should be returned
* if the specified set of procs was not previously "connected". As above, a process
* may be involved in multiple simultaneous disconnect operations. However, a process
* is not allowed to reconnect to a set of ranges that has not fully completed
* disconnect - i.e., you have to fully disconnect before you can reconnect to the
* same group of processes.
*
* The list of opal_value_t includes any directives from the user regarding
* how the operation is to be executed (e.g., timeout limits).
*/
typedef int (*opal_pmix_server_disconnect_fn_t)(opal_list_t *procs, opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
/* Register to receive notifications for the specified events. The resource
* manager may have access to events beyond process failure. In cases where
* the client application requests to be notified of such events, the request
* will be passed to the PMIx server, which in turn shall pass the request to
* the resource manager. The list of opal_value_t will provide the OPAL
* error codes corresponding to the desired events */
typedef int (*opal_pmix_server_register_events_fn_t)(opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc,
void *cbdata);
/* Deregister from the specified events. The list of opal_value_t will provide the OPAL
* error codes corresponding to the desired events */
typedef int (*opal_pmix_server_deregister_events_fn_t)(opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc,
void *cbdata);
/* Notify the specified processes of an event generated either by
* the PMIx server itself, or by one of its local clients. The RTE
* is requested to pass the notification to each PMIx server that
* hosts one or more of the specified processes */
typedef int (*opal_pmix_server_notify_fn_t)(int code, opal_process_name_t *source,
opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
/* Query the RTE for information - the list is composed of opal_pmix_query_t items */
typedef int (*opal_pmix_server_query_fn_t)(opal_process_name_t *requestor,
opal_list_t *queries,
opal_pmix_info_cbfunc_t cbfunc, void *cbdata);
/* Register that a tool has connected to the server, and request
* that the tool be assigned a jobid for further interactions.
* The optional opal_value_t list can be used to pass qualifiers for
* the connection request:
*
* (a) OPAL_PMIX_USERID - effective userid of the tool
* (b) OPAL_PMIX_GRPID - effective groupid of the tool
* (c) OPAL_PMIX_FWD_STDOUT - forward any stdout to this tool
* (d) OPAL_PMIX_FWD_STDERR - forward any stderr to this tool
* (e) OPAL_PMIX_FWD_STDIN - forward stdin from this tool to any
* processes spawned on its behalf
*/
typedef void (*opal_pmix_server_tool_connection_fn_t)(opal_list_t *info,
opal_pmix_tool_connection_cbfunc_t cbfunc,
void *cbdata);
/* Log data on behalf of the client */
typedef void (*opal_pmix_server_log_fn_t)(opal_process_name_t *requestor,
opal_list_t *info,
opal_list_t *directives,
opal_pmix_op_cbfunc_t cbfunc,
void *cbdata);
/* Callback function for incoming connection requests from
* local clients */
typedef void (*opal_pmix_connection_cbfunc_t)(int incoming_sd);
/* Register a socket the host server can monitor for connection
* requests, harvest them, and then call our internal callback
* function for further processing. A listener thread is essential
* to efficiently harvesting connection requests from large
* numbers of local clients such as occur when running on large
* SMPs. The host server listener is required to call accept
* on the incoming connection request, and then passing the
* resulting socket to the provided cbfunc. A NULL for this function
* will cause the internal PMIx server to spawn its own listener
* thread */
typedef int (*opal_pmix_server_listener_fn_t)(int listening_sd,
opal_pmix_connection_cbfunc_t cbfunc);
/* Request allocation modifications on behalf of a client */
typedef int (*opal_pmix_server_alloc_fn_t)(const opal_process_name_t *client,
opal_pmix_alloc_directive_t directive,
opal_list_t *data,
opal_pmix_info_cbfunc_t cbfunc, void *cbdata);
/* Execute a job control action on behalf of a client */
typedef int (*opal_pmix_server_job_control_fn_t)(const opal_process_name_t *requestor,
opal_list_t *targets, opal_list_t *directives,
opal_pmix_info_cbfunc_t cbfunc, void *cbdata);
/* we do not provide a monitoring capability */
/* Request forwarding of specified IO channels to the local PMIx server
* for distribution to local clients */
typedef int (*opal_pmix_server_iof_pull_fn_t)(opal_list_t *sources,
opal_list_t *directives,
opal_pmix_iof_channel_t channels,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
/* Entry point for pushing forwarded IO to clients/tools */
typedef int (*opal_pmix_server_iof_push_fn_t)(const opal_process_name_t *source,
opal_pmix_iof_channel_t channel,
unsigned char *data, size_t nbytes);
typedef struct opal_pmix_server_module_1_0_0_t {
opal_pmix_server_client_connected_fn_t client_connected;
opal_pmix_server_client_finalized_fn_t client_finalized;
opal_pmix_server_abort_fn_t abort;
opal_pmix_server_fencenb_fn_t fence_nb;
opal_pmix_server_dmodex_req_fn_t direct_modex;
opal_pmix_server_publish_fn_t publish;
opal_pmix_server_lookup_fn_t lookup;
opal_pmix_server_unpublish_fn_t unpublish;
opal_pmix_server_spawn_fn_t spawn;
opal_pmix_server_connect_fn_t connect;
opal_pmix_server_disconnect_fn_t disconnect;
opal_pmix_server_register_events_fn_t register_events;
opal_pmix_server_deregister_events_fn_t deregister_events;
opal_pmix_server_notify_fn_t notify_event;
opal_pmix_server_query_fn_t query;
opal_pmix_server_tool_connection_fn_t tool_connected;
opal_pmix_server_log_fn_t log;
opal_pmix_server_listener_fn_t listener;
opal_pmix_server_alloc_fn_t allocate;
opal_pmix_server_job_control_fn_t job_control;
opal_pmix_server_iof_pull_fn_t iof_pull;
opal_pmix_server_iof_push_fn_t iof_push;
} opal_pmix_server_module_t;
END_C_DECLS
#endif