3c914a7a97
Still in the "needs to be done" category: * mapping/ranking/binding options aren't correctly supported * if the DVM encounters some errors (e.g., not enough resources for the job), the resulting error is globally set and impacts any subsequent job submission Signed-off-by: Ralph Castain <rhc@open-mpi.org>
947 строки
49 KiB
C
947 строки
49 KiB
C
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
/*
|
|
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
|
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
|
* reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#ifndef OPAL_PMIX_H
|
|
#define OPAL_PMIX_H
|
|
|
|
#include "opal_config.h"
|
|
#include "opal/types.h"
|
|
|
|
#ifdef HAVE_SYS_UN_H
|
|
#include <sys/un.h>
|
|
#endif
|
|
|
|
#include "opal/mca/mca.h"
|
|
#include "opal/mca/event/event.h"
|
|
#include "opal/dss/dss.h"
|
|
#include "opal/runtime/opal.h"
|
|
#include "opal/dss/dss.h"
|
|
#include "opal/util/error.h"
|
|
#include "opal/util/proc.h"
|
|
#include "opal/hash_string.h"
|
|
|
|
#include "opal/mca/pmix/pmix_types.h"
|
|
#include "opal/mca/pmix/pmix_server.h"
|
|
|
|
BEGIN_C_DECLS
|
|
|
|
/* provide access to the framework verbose output without
|
|
* exposing the entire base */
|
|
extern int opal_pmix_verbose_output;
|
|
extern bool opal_pmix_collect_all_data;
|
|
extern bool opal_pmix_base_async_modex;
|
|
extern int opal_pmix_base_exchange(opal_value_t *info,
|
|
opal_pmix_pdata_t *pdat,
|
|
int timeout);
|
|
|
|
/*
|
|
* Count the fash for the the external RM
|
|
*/
|
|
#define OPAL_HASH_JOBID( str, hash ){ \
|
|
OPAL_HASH_STR( str, hash ); \
|
|
hash &= ~(0x8000); \
|
|
}
|
|
|
|
/**
|
|
* Provide a simplified macro for sending data via modex
|
|
* to other processes. The macro requires four arguments:
|
|
*
|
|
* r - the integer return status from the modex op
|
|
* sc - the PMIX scope of the data
|
|
* s - the key to tag the data being posted
|
|
* d - pointer to the data object being posted
|
|
* t - the type of the data
|
|
*/
|
|
#define OPAL_MODEX_SEND_VALUE(r, sc, s, d, t) \
|
|
do { \
|
|
opal_value_t _kv; \
|
|
OBJ_CONSTRUCT(&(_kv), opal_value_t); \
|
|
_kv.key = (s); \
|
|
if (OPAL_SUCCESS != ((r) = opal_value_load(&(_kv), (d), (t)))) { \
|
|
OPAL_ERROR_LOG((r)); \
|
|
} else { \
|
|
if (OPAL_SUCCESS != ((r) = opal_pmix.put(sc, &(_kv)))) { \
|
|
OPAL_ERROR_LOG((r)); \
|
|
} \
|
|
} \
|
|
/* opal_value_load makes a copy of the data, so release it */ \
|
|
_kv.key = NULL; \
|
|
OBJ_DESTRUCT(&(_kv)); \
|
|
} while(0);
|
|
|
|
/**
|
|
* Provide a simplified macro for sending data via modex
|
|
* to other processes. The macro requires four arguments:
|
|
*
|
|
* r - the integer return status from the modex op
|
|
* sc - the PMIX scope of the data
|
|
* s - the key to tag the data being posted
|
|
* d - the data object being posted
|
|
* sz - the number of bytes in the data object
|
|
*/
|
|
#define OPAL_MODEX_SEND_STRING(r, sc, s, d, sz) \
|
|
do { \
|
|
opal_value_t _kv; \
|
|
OBJ_CONSTRUCT(&(_kv), opal_value_t); \
|
|
_kv.key = (s); \
|
|
_kv.type = OPAL_BYTE_OBJECT; \
|
|
_kv.data.bo.bytes = (uint8_t*)(d); \
|
|
_kv.data.bo.size = (sz); \
|
|
if (OPAL_SUCCESS != ((r) = opal_pmix.put(sc, &(_kv)))) { \
|
|
OPAL_ERROR_LOG((r)); \
|
|
} \
|
|
_kv.data.bo.bytes = NULL; /* protect the data */ \
|
|
_kv.key = NULL; /* protect the key */ \
|
|
OBJ_DESTRUCT(&(_kv)); \
|
|
} while(0);
|
|
|
|
/**
|
|
* Provide a simplified macro for sending data via modex
|
|
* to other processes. The macro requires four arguments:
|
|
*
|
|
* r - the integer return status from the modex op
|
|
* sc - the PMIX scope of the data
|
|
* s - the MCA component that is posting the data
|
|
* d - the data object being posted
|
|
* sz - the number of bytes in the data object
|
|
*/
|
|
#define OPAL_MODEX_SEND(r, sc, s, d, sz) \
|
|
do { \
|
|
char *_key; \
|
|
_key = mca_base_component_to_string((s)); \
|
|
OPAL_MODEX_SEND_STRING((r), (sc), _key, (d), (sz)); \
|
|
free(_key); \
|
|
} while(0);
|
|
|
|
/**
|
|
* Provide a simplified macro for retrieving modex data
|
|
* from another process when we don't want the PMIx module
|
|
* to request it from the server if not found:
|
|
*
|
|
* r - the integer return status from the modex op (int)
|
|
* s - string key (char*)
|
|
* p - pointer to the opal_process_name_t of the proc that posted
|
|
* the data (opal_process_name_t*)
|
|
* d - pointer to a location wherein the data object
|
|
* is to be returned
|
|
* t - the expected data type
|
|
*/
|
|
#define OPAL_MODEX_RECV_VALUE_OPTIONAL(r, s, p, d, t) \
|
|
do { \
|
|
opal_value_t *_kv, *_info; \
|
|
opal_list_t _ilist; \
|
|
OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \
|
|
"%s[%s:%d] MODEX RECV VALUE OPTIONAL FOR PROC %s KEY %s", \
|
|
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
|
|
__FILE__, __LINE__, \
|
|
OPAL_NAME_PRINT(*(p)), (s))); \
|
|
OBJ_CONSTRUCT(&(_ilist), opal_list_t); \
|
|
_info = OBJ_NEW(opal_value_t); \
|
|
_info->key = strdup(OPAL_PMIX_OPTIONAL); \
|
|
_info->type = OPAL_BOOL; \
|
|
_info->data.flag = true; \
|
|
opal_list_append(&(_ilist), &(_info)->super); \
|
|
if (OPAL_SUCCESS == ((r) = opal_pmix.get((p), (s), &(_ilist), &(_kv)))) { \
|
|
if (NULL == _kv) { \
|
|
(r) = OPAL_ERR_NOT_FOUND; \
|
|
} else { \
|
|
(r) = opal_value_unload(_kv, (void**)(d), (t)); \
|
|
OBJ_RELEASE(_kv); \
|
|
} \
|
|
} \
|
|
OPAL_LIST_DESTRUCT(&(_ilist)); \
|
|
} while(0);
|
|
|
|
/**
|
|
* Provide a simplified macro for retrieving modex data
|
|
* from another process when we want the PMIx module
|
|
* to request it from the server if not found, but do not
|
|
* want the server to go find it if the server doesn't
|
|
* already have it:
|
|
*
|
|
* r - the integer return status from the modex op (int)
|
|
* s - string key (char*)
|
|
* p - pointer to the opal_process_name_t of the proc that posted
|
|
* the data (opal_process_name_t*)
|
|
* d - pointer to a location wherein the data object
|
|
* is to be returned
|
|
* t - the expected data type
|
|
*/
|
|
#define OPAL_MODEX_RECV_VALUE_IMMEDIATE(r, s, p, d, t) \
|
|
do { \
|
|
opal_value_t *_kv, *_info; \
|
|
opal_list_t _ilist; \
|
|
opal_output_verbose(1, opal_pmix_verbose_output, \
|
|
"%s[%s:%d] MODEX RECV VALUE IMMEDIATE FOR PROC %s KEY %s", \
|
|
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
|
|
__FILE__, __LINE__, \
|
|
OPAL_NAME_PRINT(*(p)), (s)); \
|
|
OBJ_CONSTRUCT(&(_ilist), opal_list_t); \
|
|
_info = OBJ_NEW(opal_value_t); \
|
|
_info->key = strdup(OPAL_PMIX_IMMEDIATE); \
|
|
_info->type = OPAL_BOOL; \
|
|
_info->data.flag = true; \
|
|
opal_list_append(&(_ilist), &(_info)->super); \
|
|
if (OPAL_SUCCESS == ((r) = opal_pmix.get((p), (s), &(_ilist), &(_kv)))) { \
|
|
if (NULL == _kv) { \
|
|
(r) = OPAL_ERR_NOT_FOUND; \
|
|
} else { \
|
|
(r) = opal_value_unload(_kv, (void**)(d), (t)); \
|
|
OBJ_RELEASE(_kv); \
|
|
} \
|
|
} \
|
|
OPAL_LIST_DESTRUCT(&(_ilist)); \
|
|
} while(0);
|
|
|
|
/**
|
|
* Provide a simplified macro for retrieving modex data
|
|
* from another process:
|
|
*
|
|
* r - the integer return status from the modex op (int)
|
|
* s - string key (char*)
|
|
* p - pointer to the opal_process_name_t of the proc that posted
|
|
* the data (opal_process_name_t*)
|
|
* d - pointer to a location wherein the data object
|
|
* is to be returned
|
|
* t - the expected data type
|
|
*/
|
|
#define OPAL_MODEX_RECV_VALUE(r, s, p, d, t) \
|
|
do { \
|
|
opal_value_t *_kv; \
|
|
OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \
|
|
"%s[%s:%d] MODEX RECV VALUE FOR PROC %s KEY %s", \
|
|
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
|
|
__FILE__, __LINE__, \
|
|
OPAL_NAME_PRINT(*(p)), (s))); \
|
|
if (OPAL_SUCCESS == ((r) = opal_pmix.get((p), (s), NULL, &(_kv)))) { \
|
|
if (NULL == _kv) { \
|
|
(r) = OPAL_ERR_NOT_FOUND; \
|
|
} else { \
|
|
(r) = opal_value_unload(_kv, (void**)(d), (t)); \
|
|
OBJ_RELEASE(_kv); \
|
|
} \
|
|
} \
|
|
} while(0);
|
|
|
|
/**
|
|
* Provide a simplified macro for retrieving modex data
|
|
* from another process:
|
|
*
|
|
* r - the integer return status from the modex op (int)
|
|
* s - string key (char*)
|
|
* p - pointer to the opal_process_name_t of the proc that posted
|
|
* the data (opal_process_name_t*)
|
|
* d - pointer to a location wherein the data object
|
|
* it to be returned (char**)
|
|
* sz - pointer to a location wherein the number of bytes
|
|
* in the data object can be returned (size_t)
|
|
*/
|
|
#define OPAL_MODEX_RECV_STRING(r, s, p, d, sz) \
|
|
do { \
|
|
opal_value_t *_kv; \
|
|
OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \
|
|
"%s[%s:%d] MODEX RECV STRING FOR PROC %s KEY %s", \
|
|
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
|
|
__FILE__, __LINE__, \
|
|
OPAL_NAME_PRINT(*(p)), (s))); \
|
|
if (OPAL_SUCCESS == ((r) = opal_pmix.get((p), (s), NULL, &(_kv)))) { \
|
|
if (NULL == _kv) { \
|
|
*(sz) = 0; \
|
|
(r) = OPAL_ERR_NOT_FOUND; \
|
|
} else { \
|
|
*(d) = _kv->data.bo.bytes; \
|
|
*(sz) = _kv->data.bo.size; \
|
|
_kv->data.bo.bytes = NULL; /* protect the data */ \
|
|
OBJ_RELEASE(_kv); \
|
|
} \
|
|
} else { \
|
|
*(sz) = 0; \
|
|
(r) = OPAL_ERR_NOT_FOUND; \
|
|
} \
|
|
} while(0);
|
|
|
|
/**
|
|
* Provide a simplified macro for retrieving modex data
|
|
* from another process:
|
|
*
|
|
* r - the integer return status from the modex op (int)
|
|
* s - the MCA component that posted the data (mca_base_component_t*)
|
|
* p - pointer to the opal_process_name_t of the proc that posted
|
|
* the data (opal_process_name_t*)
|
|
* d - pointer to a location wherein the data object
|
|
* it to be returned (char**)
|
|
* sz - pointer to a location wherein the number of bytes
|
|
* in the data object can be returned (size_t)
|
|
*/
|
|
#define OPAL_MODEX_RECV(r, s, p, d, sz) \
|
|
do { \
|
|
char *_key; \
|
|
_key = mca_base_component_to_string((s)); \
|
|
OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \
|
|
"%s[%s:%d] MODEX RECV FOR PROC %s KEY %s", \
|
|
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
|
|
__FILE__, __LINE__, \
|
|
OPAL_NAME_PRINT(*(p)), _key)); \
|
|
if (NULL == _key) { \
|
|
OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE); \
|
|
(r) = OPAL_ERR_OUT_OF_RESOURCE; \
|
|
} else { \
|
|
OPAL_MODEX_RECV_STRING((r), _key, (p), (d), (sz)); \
|
|
free(_key); \
|
|
} \
|
|
} while(0);
|
|
|
|
/**
|
|
* Provide a macro for accessing a base function that exchanges
|
|
* data values between two procs using the PMIx Publish/Lookup
|
|
* APIs */
|
|
#define OPAL_PMIX_EXCHANGE(r, i, p, t) \
|
|
do { \
|
|
OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \
|
|
"%s[%s:%d] EXCHANGE %s WITH %s", \
|
|
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
|
|
__FILE__, __LINE__, \
|
|
(i)->key, (p)->value.key)); \
|
|
(r) = opal_pmix_base_exchange((i), (p), (t)); \
|
|
} while(0);
|
|
|
|
|
|
/************************************************************
|
|
* CLIENT APIs *
|
|
************************************************************/
|
|
|
|
/* Initialize the PMIx client
|
|
* When called the client will check for the required connection
|
|
* information of the local server and will establish the connection.
|
|
* If the information is not found, or the server connection fails, then
|
|
* an appropriate error constant will be returned.
|
|
*/
|
|
typedef int (*opal_pmix_base_module_init_fn_t)(opal_list_t *ilist);
|
|
|
|
/* Finalize the PMIx client, closing the connection to the local server.
|
|
* An error code will be returned if, for some reason, the connection
|
|
* cannot be closed. */
|
|
typedef int (*opal_pmix_base_module_fini_fn_t)(void);
|
|
|
|
/* Returns _true_ if the PMIx client has been successfully initialized,
|
|
* returns _false_ otherwise. Note that the function only reports the
|
|
* internal state of the PMIx client - it does not verify an active
|
|
* connection with the server, nor that the server is functional. */
|
|
typedef int (*opal_pmix_base_module_initialized_fn_t)(void);
|
|
|
|
/* Request that the provided list of opal_namelist_t procs be aborted, returning the
|
|
* provided _status_ and printing the provided message. A _NULL_
|
|
* for the proc list indicates that all processes in the caller's
|
|
* nspace are to be aborted.
|
|
*
|
|
* The response to this request is somewhat dependent on the specific resource
|
|
* manager and its configuration (e.g., some resource managers will
|
|
* not abort the application if the provided _status_ is zero unless
|
|
* specifically configured to do so), and thus lies outside the control
|
|
* of PMIx itself. However, the client will inform the RM of
|
|
* the request that the application be aborted, regardless of the
|
|
* value of the provided _status_.
|
|
*
|
|
* Passing a _NULL_ msg parameter is allowed. Note that race conditions
|
|
* caused by multiple processes calling PMIx_Abort are left to the
|
|
* server implementation to resolve with regard to which status is
|
|
* returned and what messages (if any) are printed.
|
|
*/
|
|
typedef int (*opal_pmix_base_module_abort_fn_t)(int status, const char *msg,
|
|
opal_list_t *procs);
|
|
|
|
/* Push all previously _PMIx_Put_ values to the local PMIx server.
|
|
* This is an asynchronous operation - the library will immediately
|
|
* return to the caller while the data is transmitted to the local
|
|
* server in the background */
|
|
typedef int (*opal_pmix_base_module_commit_fn_t)(void);
|
|
|
|
/* Execute a blocking barrier across the processes identified in the
|
|
* specified list of opal_namelist_t. Passing a _NULL_ pointer
|
|
* indicates that the barrier is to span all processes in the client's
|
|
* namespace. Each provided opal_namelist_t can pass PMIX_RANK_WILDCARD to
|
|
* indicate that all processes in the given jobid are
|
|
* participating.
|
|
*
|
|
* The _collect_data_ parameter is passed to the server to indicate whether
|
|
* or not the barrier operation is to return the _put_ data from all
|
|
* participating processes. A value of _false_ indicates that the callback
|
|
* is just used as a release and no data is to be returned at that time. A
|
|
* value of _true_ indicates that all _put_ data is to be collected by the
|
|
* barrier. Returned data is locally cached so that subsequent calls to _PMIx_Get_
|
|
* can be serviced without communicating to/from the server, but at the cost
|
|
* of increased memory footprint
|
|
*/
|
|
typedef int (*opal_pmix_base_module_fence_fn_t)(opal_list_t *procs, int collect_data);
|
|
|
|
/* Fence_nb */
|
|
/* Non-blocking version of PMIx_Fence. Note that the function will return
|
|
* an error if a _NULL_ callback function is given. */
|
|
typedef int (*opal_pmix_base_module_fence_nb_fn_t)(opal_list_t *procs, int collect_data,
|
|
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
|
|
|
|
/* Push a value into the client's namespace. The client library will cache
|
|
* the information locally until _PMIx_Commit_ is called. The provided scope
|
|
* value is passed to the local PMIx server, which will distribute the data
|
|
* as directed. */
|
|
typedef int (*opal_pmix_base_module_put_fn_t)(opal_pmix_scope_t scope,
|
|
opal_value_t *val);
|
|
|
|
/* Retrieve information for the specified _key_ as published by the rank
|
|
* and jobid i the provided opal_process_name, and subject to any provided
|
|
* constraints, returning a pointer to the value in the given address.
|
|
*
|
|
* This is a blocking operation - the caller will block until
|
|
* the specified data has been _PMIx_Put_ by the specified rank. The caller is
|
|
* responsible for freeing all memory associated with the returned value when
|
|
* no longer required. */
|
|
typedef int (*opal_pmix_base_module_get_fn_t)(const opal_process_name_t *proc,
|
|
const char *key, opal_list_t *info,
|
|
opal_value_t **val);
|
|
|
|
/* Retrieve information for the specified _key_ as published by the given rank
|
|
* and jobid in the opal_process_name_t, and subject to any provided
|
|
* constraints. This is a non-blocking operation - the
|
|
* callback function will be executed once the specified data has been _PMIx_Put_
|
|
* by the specified proc and retrieved by the local server. */
|
|
typedef int (*opal_pmix_base_module_get_nb_fn_t)(const opal_process_name_t *proc,
|
|
const char *key, opal_list_t *info,
|
|
opal_pmix_value_cbfunc_t cbfunc, void *cbdata);
|
|
|
|
/* Publish the given data to the "universal" nspace
|
|
* for lookup by others subject to the provided scope.
|
|
* Note that the keys must be unique within the specified
|
|
* scope or else an error will be returned (first published
|
|
* wins). Attempts to access the data by procs outside of
|
|
* the provided scope will be rejected.
|
|
*
|
|
* Note: Some host environments may support user/group level
|
|
* access controls on the information in addition to the scope.
|
|
* These can be specified in the info array using the appropriately
|
|
* defined keys.
|
|
*
|
|
* The persistence parameter instructs the server as to how long
|
|
* the data is to be retained, within the context of the scope.
|
|
* For example, data published within _PMIX_NAMESPACE_ will be
|
|
* deleted along with the namespace regardless of the persistence.
|
|
* However, data published within PMIX_USER would be retained if
|
|
* the persistence was set to _PMIX_PERSIST_SESSION_ until the
|
|
* allocation terminates.
|
|
*
|
|
* The blocking form will block until the server confirms that the
|
|
* data has been posted and is available. The non-blocking form will
|
|
* return immediately, executing the callback when the server confirms
|
|
* availability of the data */
|
|
typedef int (*opal_pmix_base_module_publish_fn_t)(opal_list_t *info);
|
|
typedef int (*opal_pmix_base_module_publish_nb_fn_t)(opal_list_t *info,
|
|
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
|
|
|
|
/* Lookup information published by another process within the
|
|
* specified scope. A scope of _PMIX_SCOPE_UNDEF_ requests that
|
|
* the search be conducted across _all_ namespaces. The "data"
|
|
* parameter consists of an array of pmix_pdata_t struct with the
|
|
* keys specifying the requested information. Data will be returned
|
|
* for each key in the associated info struct - any key that cannot
|
|
* be found will return with a data type of "PMIX_UNDEF". The function
|
|
* will return SUCCESS if _any_ values can be found, so the caller
|
|
* must check each data element to ensure it was returned.
|
|
*
|
|
* The proc field in each pmix_pdata_t struct will contain the
|
|
* nspace/rank of the process that published the data.
|
|
*
|
|
* Note: although this is a blocking function, it will _not_ wait
|
|
* for the requested data to be published. Instead, it will block
|
|
* for the time required by the server to lookup its current data
|
|
* and return any found items. Thus, the caller is responsible for
|
|
* ensuring that data is published prior to executing a lookup, or
|
|
* for retrying until the requested data is found */
|
|
typedef int (*opal_pmix_base_module_lookup_fn_t)(opal_list_t *data,
|
|
opal_list_t *info);
|
|
|
|
/* Non-blocking form of the _PMIx_Lookup_ function. Data for
|
|
* the provided NULL-terminated keys array will be returned
|
|
* in the provided callback function. The _wait_ parameter
|
|
* is used to indicate if the caller wishes the callback to
|
|
* wait for _all_ requested data before executing the callback
|
|
* (_true_), or to callback once the server returns whatever
|
|
* data is immediately available (_false_) */
|
|
typedef int (*opal_pmix_base_module_lookup_nb_fn_t)(char **keys, opal_list_t *info,
|
|
opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata);
|
|
|
|
/* Unpublish data posted by this process using the given keys
|
|
* within the specified scope. The function will block until
|
|
* the data has been removed by the server. A value of _NULL_
|
|
* for the keys parameter instructs the server to remove
|
|
* _all_ data published by this process within the given scope */
|
|
typedef int (*opal_pmix_base_module_unpublish_fn_t)(char **keys, opal_list_t *info);
|
|
|
|
/* Non-blocking form of the _PMIx_Unpublish_ function. The
|
|
* callback function will be executed once the server confirms
|
|
* removal of the specified data. A value of _NULL_
|
|
* for the keys parameter instructs the server to remove
|
|
* _all_ data published by this process within the given scope */
|
|
typedef int (*opal_pmix_base_module_unpublish_nb_fn_t)(char **keys, opal_list_t *info,
|
|
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
|
|
|
|
/* Spawn a new job. The spawned applications are automatically
|
|
* connected to the calling process, and their assigned namespace
|
|
* is returned in the nspace parameter - a _NULL_ value in that
|
|
* location indicates that the caller doesn't wish to have the
|
|
* namespace returned. Behavior of individual resource managers
|
|
* may differ, but it is expected that failure of any application
|
|
* process to start will result in termination/cleanup of _all_
|
|
* processes in the newly spawned job and return of an error
|
|
* code to the caller */
|
|
typedef int (*opal_pmix_base_module_spawn_fn_t)(opal_list_t *job_info,
|
|
opal_list_t *apps,
|
|
opal_jobid_t *jobid);
|
|
|
|
/* Non-blocking form of the _PMIx_Spawn_ function. The callback
|
|
* will be executed upon launch of the specified applications,
|
|
* or upon failure to launch any of them. */
|
|
typedef int (*opal_pmix_base_module_spawn_nb_fn_t)(opal_list_t *job_info,
|
|
opal_list_t *apps,
|
|
opal_pmix_spawn_cbfunc_t cbfunc,
|
|
void *cbdata);
|
|
|
|
/* Record the specified processes as "connected". Both blocking and non-blocking
|
|
* versions are provided. This means that the resource manager should treat the
|
|
* failure of any process in the specified group as a reportable event, and take
|
|
* appropriate action. Note that different resource managers may respond to
|
|
* failures in different manners.
|
|
*
|
|
* The list is to be provided as opal_namelist_t objects
|
|
*
|
|
* The callback function is to be called once all participating processes have
|
|
* called connect. The server is required to return any job-level info for the
|
|
* connecting processes that might not already have - i.e., if the connect
|
|
* request involves procs from different nspaces, then each proc shall receive
|
|
* the job-level info from those nspaces other than their own.
|
|
*
|
|
* Note: a process can only engage in _one_ connect operation involving the identical
|
|
* set of ranges at a time. However, a process _can_ be simultaneously engaged
|
|
* in multiple connect operations, each involving a different set of ranges */
|
|
typedef int (*opal_pmix_base_module_connect_fn_t)(opal_list_t *procs);
|
|
|
|
typedef int (*opal_pmix_base_module_connect_nb_fn_t)(opal_list_t *procs,
|
|
opal_pmix_op_cbfunc_t cbfunc,
|
|
void *cbdata);
|
|
|
|
/* Disconnect a previously connected set of processes. An error will be returned
|
|
* if the specified set of procs was not previously "connected". As above, a process
|
|
* may be involved in multiple simultaneous disconnect operations. However, a process
|
|
* is not allowed to reconnect to a set of procs that has not fully completed
|
|
* disconnect - i.e., you have to fully disconnect before you can reconnect to the
|
|
* _same_ group of processes. */
|
|
typedef int (*opal_pmix_base_module_disconnect_fn_t)(opal_list_t *procs);
|
|
|
|
typedef int (*opal_pmix_base_module_disconnect_nb_fn_t)(opal_list_t *procs,
|
|
opal_pmix_op_cbfunc_t cbfunc,
|
|
void *cbdata);
|
|
|
|
/* Given a node name, return an array of processes within the specified jobid
|
|
* on that node. If the jobid is OPAL_JOBID_WILDCARD, then all processes on the node will
|
|
* be returned. If the specified node does not currently host any processes,
|
|
* then the returned list will be empty.
|
|
*/
|
|
typedef int (*opal_pmix_base_module_resolve_peers_fn_t)(const char *nodename,
|
|
opal_jobid_t jobid,
|
|
opal_list_t *procs);
|
|
|
|
|
|
/* Given a jobid, return the list of nodes hosting processes within
|
|
* that jobid. The returned string will contain a comma-delimited list
|
|
* of nodenames. The caller is responsible for releasing the string
|
|
* when done with it */
|
|
typedef int (*opal_pmix_base_module_resolve_nodes_fn_t)(opal_jobid_t jobid, char **nodelist);
|
|
|
|
|
|
/************************************************************
|
|
* SERVER APIs *
|
|
* *
|
|
* These are calls that go down (or "south") from the ORTE *
|
|
* daemon into the PMIx server library *
|
|
************************************************************/
|
|
|
|
/* Initialize the server support library - must pass the callback
|
|
* module for the server to use, plus any attributes we want to
|
|
* pass down to it */
|
|
typedef int (*opal_pmix_base_module_server_init_fn_t)(opal_pmix_server_module_t *module,
|
|
opal_list_t *info);
|
|
|
|
/* Finalize the server support library */
|
|
typedef int (*opal_pmix_base_module_server_finalize_fn_t)(void);
|
|
|
|
/* given a semicolon-separated list of input values, generate
|
|
* a regex that can be passed down to the client for parsing.
|
|
* The caller is responsible for free'ing the resulting
|
|
* string
|
|
*
|
|
* If values have leading zero's, then that is preserved. You
|
|
* have to add back any prefix/suffix for node names
|
|
* odin[009-015,017-023,076-086]
|
|
*
|
|
* "pmix:odin[009-015,017-023,076-086]"
|
|
*
|
|
* Note that the "pmix" at the beginning of each regex indicates
|
|
* that the PMIx native parser is to be used by the client for
|
|
* parsing the provided regex. Other parsers may be supported - see
|
|
* the pmix_client.h header for a list.
|
|
*/
|
|
typedef int (*opal_pmix_base_module_generate_regex_fn_t)(const char *input, char **regex);
|
|
|
|
/* The input is expected to consist of a comma-separated list
|
|
* of ranges. Thus, an input of:
|
|
* "1-4;2-5;8,10,11,12;6,7,9"
|
|
* would generate a regex of
|
|
* "[pmix:2x(3);8,10-12;6-7,9]"
|
|
*
|
|
* Note that the "pmix" at the beginning of each regex indicates
|
|
* that the PMIx native parser is to be used by the client for
|
|
* parsing the provided regex. Other parsers may be supported - see
|
|
* the pmix_client.h header for a list.
|
|
*/
|
|
typedef int (*opal_pmix_base_module_generate_ppn_fn_t)(const char *input, char **ppn);
|
|
|
|
/* Setup the data about a particular nspace so it can
|
|
* be passed to any child process upon startup. The PMIx
|
|
* connection procedure provides an opportunity for the
|
|
* host PMIx server to pass job-related info down to a
|
|
* child process. This might include the number of
|
|
* processes in the job, relative local ranks of the
|
|
* processes within the job, and other information of
|
|
* use to the process. The server is free to determine
|
|
* which, if any, of the supported elements it will
|
|
* provide - defined values are provided in pmix_common.h.
|
|
*
|
|
* NOTE: the server must register ALL nspaces that will
|
|
* participate in collective operations with local processes.
|
|
* This means that the server must register an nspace even
|
|
* if it will not host any local procs from within that
|
|
* nspace IF any local proc might at some point perform
|
|
* a collective operation involving one or more procs from
|
|
* that nspace. This is necessary so that the collective
|
|
* operation can know when it is locally complete.
|
|
*
|
|
* The caller must also provide the number of local procs
|
|
* that will be launched within this nspace. This is required
|
|
* for the PMIx server library to correctly handle collectives
|
|
* as a collective operation call can occur before all the
|
|
* procs have been started */
|
|
typedef int (*opal_pmix_base_module_server_register_nspace_fn_t)(opal_jobid_t jobid,
|
|
int nlocalprocs,
|
|
opal_list_t *info,
|
|
opal_pmix_op_cbfunc_t cbfunc,
|
|
void *cbdata);
|
|
|
|
/* Deregister an nspace. Instruct the PMIx server to purge
|
|
* all info relating to the provided jobid so that memory
|
|
* can be freed. Note that the server will automatically
|
|
* purge all info relating to any clients it has from
|
|
* this nspace */
|
|
typedef void (*opal_pmix_base_module_server_deregister_nspace_fn_t)(opal_jobid_t jobid,
|
|
opal_pmix_op_cbfunc_t cbfunc,
|
|
void *cbdata);
|
|
|
|
/* Register a client process with the PMIx server library. The
|
|
* expected user ID and group ID of the child process helps the
|
|
* server library to properly authenticate clients as they connect
|
|
* by requiring the two values to match.
|
|
*
|
|
* The host server can also, if it desires, provide an object
|
|
* it wishes to be returned when a server function is called
|
|
* that relates to a specific process. For example, the host
|
|
* server may have an object that tracks the specific client.
|
|
* Passing the object to the library allows the library to
|
|
* return that object when the client calls "finalize", thus
|
|
* allowing the host server to access the object without
|
|
* performing a lookup. */
|
|
typedef int (*opal_pmix_base_module_server_register_client_fn_t)(const opal_process_name_t *proc,
|
|
uid_t uid, gid_t gid,
|
|
void *server_object,
|
|
opal_pmix_op_cbfunc_t cbfunc,
|
|
void *cbdata);
|
|
|
|
/* Deregister a client. Instruct the PMIx server to purge
|
|
* all info relating to the provided client so that memory
|
|
* can be freed. As per above note, the server will automatically
|
|
* free all client-related data when the nspace is deregistered,
|
|
* so there is no need to call this function during normal
|
|
* finalize operations. Instead, this is provided for use
|
|
* during exception operations */
|
|
typedef void (*opal_pmix_base_module_server_deregister_client_fn_t)(const opal_process_name_t *proc,
|
|
opal_pmix_op_cbfunc_t cbfunc,
|
|
void *cbdata);
|
|
|
|
/* Setup the environment of a child process to be forked
|
|
* by the host so it can correctly interact with the PMIx
|
|
* server. The PMIx client needs some setup information
|
|
* so it can properly connect back to the server. This function
|
|
* will set appropriate environmental variables for this purpose. */
|
|
typedef int (*opal_pmix_base_module_server_setup_fork_fn_t)(const opal_process_name_t *proc, char ***env);
|
|
|
|
/* Define a function by which the host server can request modex data
|
|
* from the local PMIx server. This is used to support the direct modex
|
|
* operation - i.e., where data is cached locally on each PMIx
|
|
* server for its own local clients, and is obtained on-demand
|
|
* for remote requests. Upon receiving a request from a remote
|
|
* server, the host server will call this function to pass the
|
|
* request into the PMIx server. The PMIx server will return a blob
|
|
* (once it becomes available) via the cbfunc - the host
|
|
* server shall send the blob back to the original requestor */
|
|
typedef int (*opal_pmix_base_module_server_dmodex_request_fn_t)(const opal_process_name_t *proc,
|
|
opal_pmix_modex_cbfunc_t cbfunc,
|
|
void *cbdata);
|
|
|
|
/* Report an event to a process for notification via any
|
|
* registered event handler. The handler registration can be
|
|
* called by both the server and the client application. On the
|
|
* server side, the handler is used to report events detected
|
|
* by PMIx to the host server for handling. On the client side,
|
|
* the handler is used to notify the process of events
|
|
* reported by the server - e.g., the failure of another process.
|
|
*
|
|
* This function allows the host server to direct the server
|
|
* convenience library to notify all registered local procs of
|
|
* an event. The event can be local, or anywhere in the cluster.
|
|
* The status indicates the event being reported.
|
|
*
|
|
* The source parameter informs the handler of the source that
|
|
* generated the event. This will be NULL if the event came
|
|
* from the external resource manager.
|
|
*
|
|
* The info array contains any further info the RM can and/or chooses
|
|
* to provide.
|
|
*
|
|
* The callback function will be called upon completion of the
|
|
* notify_event function's actions. Note that any messages will
|
|
* have been queued, but may not have been transmitted by this
|
|
* time. Note that the caller is required to maintain the input
|
|
* data until the callback function has been executed if this
|
|
* function returns OPAL_SUCCESS! */
|
|
typedef int (*opal_pmix_base_module_server_notify_event_fn_t)(int status,
|
|
const opal_process_name_t *source,
|
|
opal_list_t *info,
|
|
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
|
|
|
|
|
|
/************************************************************
|
|
* TOOL APIs *
|
|
************************************************************/
|
|
/* Initialize the PMIx tool support
|
|
* When called the library will check for the required connection
|
|
* information of the local server and will establish the connection.
|
|
* The connection info can be provided either in the environment or
|
|
* in the list of attributes. If the information is not found, or the
|
|
* server connection fails, then an appropriate error constant will
|
|
* be returned.
|
|
*/
|
|
typedef int (*opal_pmix_base_module_tool_init_fn_t)(opal_list_t *ilist);
|
|
|
|
/* Finalize the PMIx tool support */
|
|
typedef int (*opal_pmix_base_module_tool_fini_fn_t)(void);
|
|
|
|
|
|
/************************************************************
|
|
* UTILITY APIs *
|
|
************************************************************/
|
|
|
|
/* get the version of the embedded library */
|
|
typedef const char* (*opal_pmix_base_module_get_version_fn_t)(void);
|
|
|
|
/* Register an event handler to report event. Three types of events
|
|
* can be reported:
|
|
*
|
|
* (a) those that occur within the client library, but are not
|
|
* reportable via the API itself (e.g., loss of connection to
|
|
* the server). These events typically occur during behind-the-scenes
|
|
* non-blocking operations.
|
|
*
|
|
* (b) job-related events such as the failure of another process in
|
|
* the job or in any connected job, impending failure of hardware
|
|
* within the job's usage footprint, etc.
|
|
*
|
|
* (c) system notifications that are made available by the local
|
|
* administrators
|
|
*
|
|
* By default, only events that directly affect the process and/or
|
|
* any process to which it is connected (via the PMIx_Connect call)
|
|
* will be reported. Options to modify that behavior can be provided
|
|
* in the info array
|
|
*
|
|
* Both the client application and the resource manager can register
|
|
* event handlers for specific events. PMIx client/server calls the registered
|
|
* event handler upon receiving event notify notification (via PMIx_Notify_event)
|
|
* from the other end (Resource Manager/Client application).
|
|
*
|
|
* Multiple event handlers can be registered for different events. PMIX returns
|
|
* a size_t reference to each register handler in the callback fn. The caller
|
|
* must retain the reference in order to deregister the evhandler.
|
|
* Modification of the notification behavior can be accomplished by
|
|
* deregistering the current evhandler, and then registering it
|
|
* using a new set of info values.
|
|
*
|
|
* A NULL for event_codes indicates registration as a default event handler
|
|
*
|
|
* See pmix_types.h for a description of the notification function */
|
|
typedef void (*opal_pmix_base_module_register_fn_t)(opal_list_t *event_codes,
|
|
opal_list_t *info,
|
|
opal_pmix_notification_fn_t evhandler,
|
|
opal_pmix_evhandler_reg_cbfunc_t cbfunc,
|
|
void *cbdata);
|
|
|
|
/* deregister the evhandler
|
|
* evhandler_ref is the reference returned by PMIx for the evhandler
|
|
* to pmix_evhandler_reg_cbfunc_t */
|
|
typedef void (*opal_pmix_base_module_deregister_fn_t)(size_t evhandler,
|
|
opal_pmix_op_cbfunc_t cbfunc,
|
|
void *cbdata);
|
|
|
|
/* Report an event for notification via any
|
|
* registered evhandler. On the PMIx
|
|
* server side, this is used to report events detected
|
|
* by PMIx to the host server for handling and/or distribution.
|
|
*
|
|
* The client application can also call this function to notify the
|
|
* resource manager of an event it detected. It can specify the
|
|
* range over which that notification should occur.
|
|
*
|
|
* The info array contains any further info the caller can and/or chooses
|
|
* to provide.
|
|
*
|
|
* The callback function will be called upon completion of the
|
|
* notify_event function's actions. Note that any messages will
|
|
* have been queued, but may not have been transmitted by this
|
|
* time. Note that the caller is required to maintain the input
|
|
* data until the callback function has been executed if it
|
|
* returns OPAL_SUCCESS!
|
|
*/
|
|
typedef int (*opal_pmix_base_module_notify_event_fn_t)(int status,
|
|
const opal_process_name_t *source,
|
|
opal_pmix_data_range_t range,
|
|
opal_list_t *info,
|
|
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
|
|
|
|
/* store data internally, but don't push it out to be shared - this is
|
|
* intended solely for storage of info on other procs that comes thru
|
|
* a non-PMIx channel (e.g., may be computed locally) but is desired
|
|
* to be available via a PMIx_Get call */
|
|
typedef int (*opal_pmix_base_module_store_fn_t)(const opal_process_name_t *proc,
|
|
opal_value_t *val);
|
|
|
|
/* retrieve the nspace corresponding to a given jobid */
|
|
typedef const char* (*opal_pmix_base_module_get_nspace_fn_t)(opal_jobid_t jobid);
|
|
|
|
/* register a jobid-to-nspace pair */
|
|
typedef void (*opal_pmix_base_module_register_jobid_fn_t)(opal_jobid_t jobid, const char *nspace);
|
|
|
|
/* query information from the system */
|
|
typedef void (*opal_pmix_base_module_query_fn_t)(opal_list_t *queries,
|
|
opal_pmix_info_cbfunc_t cbfunc, void *cbdata);
|
|
|
|
/* log data to the system */
|
|
typedef void (*opal_pmix_base_log_fn_t)(opal_list_t *info,
|
|
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
|
|
|
|
/* allocation */
|
|
typedef int (*opal_pmix_base_alloc_fn_t)(opal_pmix_alloc_directive_t directive,
|
|
opal_list_t *info,
|
|
opal_pmix_info_cbfunc_t cbfunc, void *cbdata);
|
|
|
|
/* job control */
|
|
typedef int (*opal_pmix_base_job_control_fn_t)(opal_list_t *targets,
|
|
opal_list_t *directives,
|
|
opal_pmix_info_cbfunc_t cbfunc, void *cbdata);
|
|
|
|
/* monitoring */
|
|
typedef int (*opal_pmix_base_process_monitor_fn_t)(opal_list_t *monitor,
|
|
opal_list_t *directives,
|
|
opal_pmix_info_cbfunc_t cbfunc, void *cbdata);
|
|
|
|
/*
|
|
* the standard public API data structure
|
|
*/
|
|
typedef struct {
|
|
/* client APIs */
|
|
opal_pmix_base_module_init_fn_t init;
|
|
opal_pmix_base_module_fini_fn_t finalize;
|
|
opal_pmix_base_module_initialized_fn_t initialized;
|
|
opal_pmix_base_module_abort_fn_t abort;
|
|
opal_pmix_base_module_commit_fn_t commit;
|
|
opal_pmix_base_module_fence_fn_t fence;
|
|
opal_pmix_base_module_fence_nb_fn_t fence_nb;
|
|
opal_pmix_base_module_put_fn_t put;
|
|
opal_pmix_base_module_get_fn_t get;
|
|
opal_pmix_base_module_get_nb_fn_t get_nb;
|
|
opal_pmix_base_module_publish_fn_t publish;
|
|
opal_pmix_base_module_publish_nb_fn_t publish_nb;
|
|
opal_pmix_base_module_lookup_fn_t lookup;
|
|
opal_pmix_base_module_lookup_nb_fn_t lookup_nb;
|
|
opal_pmix_base_module_unpublish_fn_t unpublish;
|
|
opal_pmix_base_module_unpublish_nb_fn_t unpublish_nb;
|
|
opal_pmix_base_module_spawn_fn_t spawn;
|
|
opal_pmix_base_module_spawn_nb_fn_t spawn_nb;
|
|
opal_pmix_base_module_connect_fn_t connect;
|
|
opal_pmix_base_module_connect_nb_fn_t connect_nb;
|
|
opal_pmix_base_module_disconnect_fn_t disconnect;
|
|
opal_pmix_base_module_disconnect_nb_fn_t disconnect_nb;
|
|
opal_pmix_base_module_resolve_peers_fn_t resolve_peers;
|
|
opal_pmix_base_module_resolve_nodes_fn_t resolve_nodes;
|
|
opal_pmix_base_module_query_fn_t query;
|
|
opal_pmix_base_log_fn_t log;
|
|
opal_pmix_base_alloc_fn_t allocate;
|
|
opal_pmix_base_job_control_fn_t job_control;
|
|
opal_pmix_base_process_monitor_fn_t monitor;
|
|
/* server APIs */
|
|
opal_pmix_base_module_server_init_fn_t server_init;
|
|
opal_pmix_base_module_server_finalize_fn_t server_finalize;
|
|
opal_pmix_base_module_generate_regex_fn_t generate_regex;
|
|
opal_pmix_base_module_generate_ppn_fn_t generate_ppn;
|
|
opal_pmix_base_module_server_register_nspace_fn_t server_register_nspace;
|
|
opal_pmix_base_module_server_deregister_nspace_fn_t server_deregister_nspace;
|
|
opal_pmix_base_module_server_register_client_fn_t server_register_client;
|
|
opal_pmix_base_module_server_deregister_client_fn_t server_deregister_client;
|
|
opal_pmix_base_module_server_setup_fork_fn_t server_setup_fork;
|
|
opal_pmix_base_module_server_dmodex_request_fn_t server_dmodex_request;
|
|
opal_pmix_base_module_server_notify_event_fn_t server_notify_event;
|
|
/* tool APIs */
|
|
opal_pmix_base_module_tool_init_fn_t tool_init;
|
|
opal_pmix_base_module_tool_fini_fn_t tool_finalize;
|
|
/* Utility APIs */
|
|
opal_pmix_base_module_get_version_fn_t get_version;
|
|
opal_pmix_base_module_register_fn_t register_evhandler;
|
|
opal_pmix_base_module_deregister_fn_t deregister_evhandler;
|
|
opal_pmix_base_module_notify_event_fn_t notify_event;
|
|
opal_pmix_base_module_store_fn_t store_local;
|
|
opal_pmix_base_module_get_nspace_fn_t get_nspace;
|
|
opal_pmix_base_module_register_jobid_fn_t register_jobid;
|
|
} opal_pmix_base_module_t;
|
|
|
|
typedef struct {
|
|
mca_base_component_t base_version;
|
|
mca_base_component_data_t base_data;
|
|
int priority;
|
|
} opal_pmix_base_component_t;
|
|
|
|
/*
|
|
* Macro for use in components that are of type pmix
|
|
*/
|
|
#define OPAL_PMIX_BASE_VERSION_2_0_0 \
|
|
OPAL_MCA_BASE_VERSION_2_1_0("pmix", 2, 0, 0)
|
|
|
|
/* Global structure for accessing store functions */
|
|
OPAL_DECLSPEC extern opal_pmix_base_module_t opal_pmix; /* holds base function pointers */
|
|
|
|
END_C_DECLS
|
|
|
|
#endif
|