Update to PMIx v3.0 PR for cleanup registration
If available, have apps use registration capability to cleanup their session directories. Setup capability for vader to register its shared memory file location - let someone familiar with that code do so. Final cleanup to track uid/gid, update the opal/pmix API to pass flags for ignore and leave top directory alone Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
родитель
a863c26d6f
Коммит
07427c6d89
@ -15,7 +15,7 @@
|
||||
* Copyright (c) 2010-2015 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
|
@ -65,6 +65,7 @@ typedef struct {
|
||||
opal_mutex_t mutex;
|
||||
opal_pmix_condition_t cond;
|
||||
volatile bool active;
|
||||
int status;
|
||||
} opal_pmix_lock_t;
|
||||
|
||||
|
||||
|
@ -867,6 +867,9 @@ typedef int (*opal_pmix_base_process_monitor_fn_t)(opal_list_t *monitor,
|
||||
opal_list_t *directives,
|
||||
opal_pmix_info_cbfunc_t cbfunc, void *cbdata);
|
||||
|
||||
/* register cleanup */
|
||||
typedef int (*opal_pmix_base_register_cleanup_fn_t)(char *path, bool ignore, bool jobscope);
|
||||
|
||||
/*
|
||||
* the standard public API data structure
|
||||
*/
|
||||
@ -901,6 +904,7 @@ typedef struct {
|
||||
opal_pmix_base_alloc_fn_t allocate;
|
||||
opal_pmix_base_job_control_fn_t job_control;
|
||||
opal_pmix_base_process_monitor_fn_t monitor;
|
||||
opal_pmix_base_register_cleanup_fn_t register_cleanup;
|
||||
/* server APIs */
|
||||
opal_pmix_base_module_server_init_fn_t server_init;
|
||||
opal_pmix_base_module_server_finalize_fn_t server_finalize;
|
||||
|
@ -30,7 +30,7 @@ greek=
|
||||
# command, or with the date (if "git describe" fails) in the form of
|
||||
# "date<date>".
|
||||
|
||||
repo_rev=gitf56d30e
|
||||
repo_rev=git5c0b64b
|
||||
|
||||
# If tarball_version is not empty, it is used as the version string in
|
||||
# the tarball filename, regardless of all other versions listed in
|
||||
@ -44,7 +44,7 @@ tarball_version=
|
||||
|
||||
# The date when this release was created
|
||||
|
||||
date="Nov 11, 2017"
|
||||
date="Dec 11, 2017"
|
||||
|
||||
# The shared library version of each of PMIx's public libraries.
|
||||
# These versions are maintained in accordance with the "Library
|
||||
|
@ -462,6 +462,16 @@ typedef uint32_t pmix_rank_t;
|
||||
#define PMIX_JOB_CTRL_PROVISION_IMAGE "pmix.jctrl.pvnimg" // (char*) name of the image that is to be provisioned
|
||||
#define PMIX_JOB_CTRL_PREEMPTIBLE "pmix.jctrl.preempt" // (bool) job can be pre-empted
|
||||
#define PMIX_JOB_CTRL_TERMINATE "pmix.jctrl.term" // (bool) politely terminate the specified procs
|
||||
#define PMIX_REGISTER_CLEANUP "pmix.reg.cleanup" // (char*) comma-delimited list of files/directories to
|
||||
// be removed upon process termination
|
||||
#define PMIX_CLEANUP_RECURSIVE "pmix.clnup.recurse" // (bool) recursively cleanup all subdirectories under the
|
||||
// specified one(s)
|
||||
#define PMIX_CLEANUP_EMPTY "pmix.clnup.empty" // (bool) only remove empty subdirectories
|
||||
#define PMIX_CLEANUP_IGNORE "pmix.clnup.ignore" // (char*) comma-delimited list of filenames that are not
|
||||
// to be removed
|
||||
#define PMIX_CLEANUP_LEAVE_TOPDIR "pmix.clnup.lvtop" // (bool) when recursively cleaning subdirs, do not remove
|
||||
// the top-level directory (the one given in the
|
||||
// cleanup request)
|
||||
|
||||
/* monitoring attributes */
|
||||
#define PMIX_MONITOR_ID "pmix.monitor.id" // (char*) provide a string identifier for this request
|
||||
@ -584,6 +594,7 @@ typedef int pmix_status_t;
|
||||
#define PMIX_ERR_NOT_IMPLEMENTED -48
|
||||
#define PMIX_ERR_COMM_FAILURE -49
|
||||
#define PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER -50 // internal-only
|
||||
#define PMIX_ERR_CONFLICTING_CLEANUP_DIRECTIVES -51
|
||||
|
||||
/* define a starting point for v2.x error values */
|
||||
#define PMIX_ERR_V2X_BASE -100
|
||||
|
@ -84,7 +84,7 @@ void pmix_atomic_rmb(void)
|
||||
static inline
|
||||
void pmix_atomic_wmb(void)
|
||||
{
|
||||
PMIXRMB();
|
||||
PMIXWMB();
|
||||
}
|
||||
|
||||
static inline
|
||||
@ -110,7 +110,7 @@ void pmix_atomic_isync(void)
|
||||
#pragma mc_func pmix_atomic_rmb { "7c2004ac" } /* lwsync */
|
||||
#pragma reg_killed_by pmix_atomic_rmb /* none */
|
||||
|
||||
#pragma mc_func pmix_atomic_wmb { "7c0006ac" } /* eieio */
|
||||
#pragma mc_func pmix_atomic_wmb { "7c2004ac" } /* lwsync */
|
||||
#pragma reg_killed_by pmix_atomic_wmb /* none */
|
||||
|
||||
#endif
|
||||
|
@ -36,11 +36,27 @@
|
||||
#endif
|
||||
#include <ctype.h>
|
||||
#include PMIX_EVENT_HEADER
|
||||
#if HAVE_SYS_STAT_H
|
||||
#include <sys/stat.h>
|
||||
#endif /* HAVE_SYS_STAT_H */
|
||||
#ifdef HAVE_DIRENT_H
|
||||
#include <dirent.h>
|
||||
#endif /* HAVE_DIRENT_H */
|
||||
|
||||
#include <pmix_common.h>
|
||||
|
||||
#include "src/mca/bfrops/bfrops_types.h"
|
||||
#include "src/class/pmix_hash_table.h"
|
||||
#include "src/class/pmix_list.h"
|
||||
#include "src/threads/threads.h"
|
||||
#include "src/util/argv.h"
|
||||
#include "src/util/error.h"
|
||||
#include "src/util/os_path.h"
|
||||
|
||||
static void cleanup(pmix_epilog_t *epi);
|
||||
static void dirpath_destroy(char *path, pmix_cleanup_dir_t *cd,
|
||||
pmix_epilog_t *epi);
|
||||
static bool dirpath_is_empty(const char *path);
|
||||
|
||||
PMIX_EXPORT pmix_lock_t pmix_global_lock = {
|
||||
.mutex = PMIX_MUTEX_STATIC_INIT,
|
||||
@ -52,6 +68,36 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_namelist_t,
|
||||
pmix_list_item_t,
|
||||
NULL, NULL);
|
||||
|
||||
static void cfcon(pmix_cleanup_file_t *p)
|
||||
{
|
||||
p->path = NULL;
|
||||
}
|
||||
static void cfdes(pmix_cleanup_file_t *p)
|
||||
{
|
||||
if (NULL != p->path) {
|
||||
free(p->path);
|
||||
}
|
||||
}
|
||||
PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_cleanup_file_t,
|
||||
pmix_list_item_t,
|
||||
cfcon, cfdes);
|
||||
|
||||
static void cdcon(pmix_cleanup_dir_t *p)
|
||||
{
|
||||
p->path = NULL;
|
||||
p->recurse = false;
|
||||
p->leave_topdir = false;
|
||||
}
|
||||
static void cddes(pmix_cleanup_dir_t *p)
|
||||
{
|
||||
if (NULL != p->path) {
|
||||
free(p->path);
|
||||
}
|
||||
}
|
||||
PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_cleanup_dir_t,
|
||||
pmix_list_item_t,
|
||||
cdcon, cddes);
|
||||
|
||||
static void nscon(pmix_nspace_t *p)
|
||||
{
|
||||
p->nspace = NULL;
|
||||
@ -61,6 +107,9 @@ static void nscon(pmix_nspace_t *p)
|
||||
p->ndelivered = 0;
|
||||
PMIX_CONSTRUCT(&p->ranks, pmix_list_t);
|
||||
memset(&p->compat, 0, sizeof(p->compat));
|
||||
PMIX_CONSTRUCT(&p->epilog.cleanup_dirs, pmix_list_t);
|
||||
PMIX_CONSTRUCT(&p->epilog.cleanup_files, pmix_list_t);
|
||||
PMIX_CONSTRUCT(&p->epilog.ignores, pmix_list_t);
|
||||
}
|
||||
static void nsdes(pmix_nspace_t *p)
|
||||
{
|
||||
@ -71,6 +120,12 @@ static void nsdes(pmix_nspace_t *p)
|
||||
PMIX_RELEASE(p->jobbkt);
|
||||
}
|
||||
PMIX_LIST_DESTRUCT(&p->ranks);
|
||||
/* perform any epilog */
|
||||
cleanup(&p->epilog);
|
||||
/* cleanup the epilog */
|
||||
PMIX_LIST_DESTRUCT(&p->epilog.cleanup_dirs);
|
||||
PMIX_LIST_DESTRUCT(&p->epilog.cleanup_files);
|
||||
PMIX_LIST_DESTRUCT(&p->epilog.ignores);
|
||||
}
|
||||
PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_nspace_t,
|
||||
pmix_list_item_t,
|
||||
@ -124,7 +179,11 @@ static void pcon(pmix_peer_t *p)
|
||||
PMIX_CONSTRUCT(&p->send_queue, pmix_list_t);
|
||||
p->send_msg = NULL;
|
||||
p->recv_msg = NULL;
|
||||
PMIX_CONSTRUCT(&p->epilog.cleanup_dirs, pmix_list_t);
|
||||
PMIX_CONSTRUCT(&p->epilog.cleanup_files, pmix_list_t);
|
||||
PMIX_CONSTRUCT(&p->epilog.ignores, pmix_list_t);
|
||||
}
|
||||
|
||||
static void pdes(pmix_peer_t *p)
|
||||
{
|
||||
if (0 <= p->sd) {
|
||||
@ -148,6 +207,12 @@ static void pdes(pmix_peer_t *p)
|
||||
if (NULL != p->recv_msg) {
|
||||
PMIX_RELEASE(p->recv_msg);
|
||||
}
|
||||
/* perform any epilog */
|
||||
cleanup(&p->epilog);
|
||||
/* cleanup the epilog */
|
||||
PMIX_LIST_DESTRUCT(&p->epilog.cleanup_dirs);
|
||||
PMIX_LIST_DESTRUCT(&p->epilog.cleanup_files);
|
||||
PMIX_LIST_DESTRUCT(&p->epilog.ignores);
|
||||
}
|
||||
PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_peer_t,
|
||||
pmix_object_t,
|
||||
@ -252,3 +317,200 @@ static void qdes(pmix_query_caddy_t *p)
|
||||
PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_query_caddy_t,
|
||||
pmix_object_t,
|
||||
qcon, qdes);
|
||||
|
||||
static void cleanup(pmix_epilog_t *epi)
|
||||
{
|
||||
pmix_cleanup_file_t *cf;
|
||||
pmix_cleanup_dir_t *cd;
|
||||
struct stat statbuf;
|
||||
int rc;
|
||||
|
||||
/* start with any specified files */
|
||||
PMIX_LIST_FOREACH(cf, &epi->cleanup_files, pmix_cleanup_file_t) {
|
||||
/* check the effective uid/gid of the file and ensure it
|
||||
* matches that of the peer - we do this to provide at least
|
||||
* some minimum level of protection */
|
||||
rc = stat(cf->path, &statbuf);
|
||||
if (0 != rc) {
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"File %s failed to stat: %s", cf->path, strerror(rc));
|
||||
continue;
|
||||
}
|
||||
if (statbuf.st_uid != epi->uid ||
|
||||
statbuf.st_gid != epi->gid) {
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"File %s uid/gid doesn't match: uid %lu(%lu) gid %lu(%lu)",
|
||||
cf->path,
|
||||
(unsigned long)statbuf.st_uid, (unsigned long)epi->uid,
|
||||
(unsigned long)statbuf.st_gid, (unsigned long)epi->gid);
|
||||
continue;
|
||||
}
|
||||
rc = unlink(cf->path);
|
||||
if (0 != rc) {
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"File %s failed to unlink: %s", cf->path, strerror(rc));
|
||||
}
|
||||
}
|
||||
|
||||
/* now cleanup the directories */
|
||||
PMIX_LIST_FOREACH(cd, &epi->cleanup_dirs, pmix_cleanup_dir_t) {
|
||||
/* check the effective uid/gid of the file and ensure it
|
||||
* matches that of the peer - we do this to provide at least
|
||||
* some minimum level of protection */
|
||||
rc = stat(cd->path, &statbuf);
|
||||
if (0 != rc) {
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"Directory %s failed to stat: %s", cd->path, strerror(rc));
|
||||
continue;
|
||||
}
|
||||
if (statbuf.st_uid != epi->uid ||
|
||||
statbuf.st_gid != epi->gid) {
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"Directory %s uid/gid doesn't match: uid %lu(%lu) gid %lu(%lu)",
|
||||
cd->path,
|
||||
(unsigned long)statbuf.st_uid, (unsigned long)epi->uid,
|
||||
(unsigned long)statbuf.st_gid, (unsigned long)epi->gid);
|
||||
continue;
|
||||
}
|
||||
if ((statbuf.st_mode & S_IRWXU) == S_IRWXU) {
|
||||
dirpath_destroy(cd->path, cd, epi);
|
||||
} else {
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"Directory %s lacks permissions", cd->path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void dirpath_destroy(char *path, pmix_cleanup_dir_t *cd, pmix_epilog_t *epi)
|
||||
{
|
||||
int rc;
|
||||
bool is_dir = false, ignore;
|
||||
DIR *dp;
|
||||
struct dirent *ep;
|
||||
char *filenm;
|
||||
struct stat buf;
|
||||
size_t n;
|
||||
pmix_cleanup_file_t *cf;
|
||||
|
||||
if (NULL == path) { /* protect against error */
|
||||
return;
|
||||
}
|
||||
|
||||
/* if this path is it to be ignored, then do so */
|
||||
PMIX_LIST_FOREACH(cf, &epi->ignores, pmix_cleanup_file_t) {
|
||||
if (0 == strcmp(cf->path, path)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Open up the directory */
|
||||
dp = opendir(path);
|
||||
if (NULL == dp) {
|
||||
return;
|
||||
}
|
||||
|
||||
while (NULL != (ep = readdir(dp))) {
|
||||
/* skip:
|
||||
* - . and ..
|
||||
*/
|
||||
if ((0 == strcmp(ep->d_name, ".")) ||
|
||||
(0 == strcmp(ep->d_name, ".."))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Create a pathname. This is not always needed, but it makes
|
||||
* for cleaner code just to create it here. Note that we are
|
||||
* allocating memory here, so we need to free it later on.
|
||||
*/
|
||||
filenm = pmix_os_path(false, path, ep->d_name, NULL);
|
||||
|
||||
/* if this path is it to be ignored, then do so */
|
||||
PMIX_LIST_FOREACH(cf, &epi->ignores, pmix_cleanup_file_t) {
|
||||
if (0 == strcmp(cf->path, filenm)) {
|
||||
free(filenm);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check to see if it is a directory */
|
||||
is_dir = false;
|
||||
|
||||
rc = stat(filenm, &buf);
|
||||
if (0 > rc) {
|
||||
/* Handle a race condition. filenm might have been deleted by an
|
||||
* other process running on the same node. That typically occurs
|
||||
* when one task is removing the job_session_dir and an other task
|
||||
* is still removing its proc_session_dir.
|
||||
*/
|
||||
free(filenm);
|
||||
continue;
|
||||
}
|
||||
/* if the uid/gid don't match, then leave it alone */
|
||||
if (buf.st_uid != epi->uid ||
|
||||
buf.st_gid != epi->gid) {
|
||||
free(filenm);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (S_ISDIR(buf.st_mode)) {
|
||||
is_dir = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* If not recursively decending, then if we find a directory then fail
|
||||
* since we were not told to remove it.
|
||||
*/
|
||||
if (is_dir && !cd->recurse) {
|
||||
/* continue removing files */
|
||||
free(filenm);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Directories are recursively destroyed */
|
||||
if (is_dir && cd->recurse && ((buf.st_mode & S_IRWXU) == S_IRWXU)) {
|
||||
dirpath_destroy(filenm, cd, epi);
|
||||
free(filenm);
|
||||
} else {
|
||||
/* Files are removed right here */
|
||||
unlink(filenm);
|
||||
free(filenm);
|
||||
}
|
||||
}
|
||||
|
||||
/* Done with this directory */
|
||||
closedir(dp);
|
||||
|
||||
cleanup:
|
||||
/* If the directory is empty, then remove it unless we
|
||||
* were told to leave it */
|
||||
if (0 == strcmp(path, cd->path) && cd->leave_topdir) {
|
||||
return;
|
||||
}
|
||||
if (dirpath_is_empty(path)) {
|
||||
rmdir(path);
|
||||
}
|
||||
}
|
||||
|
||||
static bool dirpath_is_empty(const char *path )
|
||||
{
|
||||
DIR *dp;
|
||||
struct dirent *ep;
|
||||
|
||||
if (NULL != path) { /* protect against error */
|
||||
dp = opendir(path);
|
||||
if (NULL != dp) {
|
||||
while ((ep = readdir(dp))) {
|
||||
if ((0 != strcmp(ep->d_name, ".")) &&
|
||||
(0 != strcmp(ep->d_name, ".."))) {
|
||||
closedir(dp);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
closedir(dp);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -119,6 +119,29 @@ typedef struct pmix_personality_t {
|
||||
pmix_gds_base_module_t *gds;
|
||||
} pmix_personality_t;
|
||||
|
||||
/* define a set of structs for tracking post-termination cleanup */
|
||||
typedef struct pmix_epilog_t {
|
||||
uid_t uid;
|
||||
gid_t gid;
|
||||
pmix_list_t cleanup_dirs;
|
||||
pmix_list_t cleanup_files;
|
||||
pmix_list_t ignores;
|
||||
} pmix_epilog_t;
|
||||
|
||||
typedef struct {
|
||||
pmix_list_item_t super;
|
||||
char *path;
|
||||
} pmix_cleanup_file_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_cleanup_file_t);
|
||||
|
||||
typedef struct {
|
||||
pmix_list_item_t super;
|
||||
char *path;
|
||||
bool recurse;
|
||||
bool leave_topdir;
|
||||
} pmix_cleanup_dir_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_cleanup_dir_t);
|
||||
|
||||
/* objects used by servers for tracking active nspaces */
|
||||
typedef struct {
|
||||
pmix_list_item_t super;
|
||||
@ -133,6 +156,8 @@ typedef struct {
|
||||
* Since servers may support clients from multiple nspaces,
|
||||
* track their respective compatibility modules here */
|
||||
pmix_personality_t compat;
|
||||
pmix_epilog_t epilog; // things to do upon termination of all local clients
|
||||
// from this nspace
|
||||
} pmix_nspace_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_nspace_t);
|
||||
|
||||
@ -156,6 +181,17 @@ typedef struct pmix_rank_info_t {
|
||||
} pmix_rank_info_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_rank_info_t);
|
||||
|
||||
|
||||
/* define a very simple caddy for dealing with pmix_info_t
|
||||
* objects when transferring portions of arrays */
|
||||
typedef struct {
|
||||
pmix_list_item_t super;
|
||||
pmix_info_t *info;
|
||||
size_t ninfo;
|
||||
} pmix_info_caddy_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_info_caddy_t);
|
||||
|
||||
|
||||
/* object for tracking peers - each peer can have multiple
|
||||
* connections. This can occur if the initial app executes
|
||||
* a fork/exec, and the child initiates its own connection
|
||||
@ -177,6 +213,8 @@ typedef struct pmix_peer_t {
|
||||
pmix_list_t send_queue; /**< list of messages to send */
|
||||
pmix_ptl_send_t *send_msg; /**< current send in progress */
|
||||
pmix_ptl_recv_t *recv_msg; /**< current recv in progress */
|
||||
pmix_epilog_t epilog; /**< things to be performed upon
|
||||
termination of this peer */
|
||||
} pmix_peer_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_peer_t);
|
||||
|
||||
@ -305,14 +343,6 @@ typedef struct {
|
||||
} pmix_cb_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_cb_t);
|
||||
|
||||
/* define a very simple caddy for dealing with pmix_info_t
|
||||
* objects when transferring portions of arrays */
|
||||
typedef struct {
|
||||
pmix_list_item_t super;
|
||||
pmix_info_t *info;
|
||||
} pmix_info_caddy_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_info_caddy_t);
|
||||
|
||||
#define PMIX_THREADSHIFT(r, c) \
|
||||
do { \
|
||||
pmix_event_assign(&((r)->ev), pmix_globals.evbase, \
|
||||
|
@ -426,6 +426,7 @@ pmix_status_t hash_cache_job_info(struct pmix_nspace_t *ns,
|
||||
/* an array of data pertaining to a specific proc */
|
||||
if (PMIX_DATA_ARRAY != info[n].value.type) {
|
||||
PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
|
||||
rc = PMIX_ERR_TYPE_MISMATCH;
|
||||
goto release;
|
||||
}
|
||||
size = info[n].value.data.darray->size;
|
||||
@ -433,6 +434,7 @@ pmix_status_t hash_cache_job_info(struct pmix_nspace_t *ns,
|
||||
/* first element of the array must be the rank */
|
||||
if (0 != strcmp(iptr[0].key, PMIX_RANK) ||
|
||||
PMIX_PROC_RANK != iptr[0].value.type) {
|
||||
rc = PMIX_ERR_TYPE_MISMATCH;
|
||||
PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
|
||||
goto release;
|
||||
}
|
||||
@ -458,7 +460,7 @@ pmix_status_t hash_cache_job_info(struct pmix_nspace_t *ns,
|
||||
if (NULL == tmp) {
|
||||
PMIX_ERROR_LOG(PMIX_ERR_NOMEM);
|
||||
rc = PMIX_ERR_NOMEM;
|
||||
return rc;
|
||||
goto release;
|
||||
}
|
||||
kp2->value->type = PMIX_COMPRESSED_STRING;
|
||||
free(kp2->value->data.string);
|
||||
@ -493,10 +495,10 @@ pmix_status_t hash_cache_job_info(struct pmix_nspace_t *ns,
|
||||
if (PMIX_STRING_SIZE_CHECK(kp2->value)) {
|
||||
if (pmix_util_compress_string(kp2->value->data.string, &tmp, &len)) {
|
||||
if (NULL == tmp) {
|
||||
PMIX_ERROR_LOG(PMIX_ERR_NOMEM);
|
||||
PMIX_RELEASE(kp2);
|
||||
rc = PMIX_ERR_NOMEM;
|
||||
return rc;
|
||||
PMIX_ERROR_LOG(rc);
|
||||
PMIX_RELEASE(kp2);
|
||||
goto release;
|
||||
}
|
||||
kp2->value->type = PMIX_COMPRESSED_STRING;
|
||||
free(kp2->value->data.string);
|
||||
|
@ -1161,6 +1161,12 @@ static void connection_handler(int sd, short args, void *cbdata)
|
||||
peer->nptr = nptr;
|
||||
PMIX_RETAIN(info);
|
||||
peer->info = info;
|
||||
/* update the epilog fields */
|
||||
peer->epilog.uid = info->uid;
|
||||
peer->epilog.gid = info->gid;
|
||||
/* ensure the nspace epilog is updated too */
|
||||
nptr->epilog.uid = info->uid;
|
||||
nptr->epilog.gid = info->gid;
|
||||
info->proc_cnt++; /* increase number of processes on this rank */
|
||||
peer->sd = pnd->sd;
|
||||
if (0 > (peer->index = pmix_pointer_array_add(&pmix_server_globals.clients, peer))) {
|
||||
@ -1399,6 +1405,11 @@ static void process_cbfunc(int sd, short args, void *cbdata)
|
||||
peer->nptr = nptr;
|
||||
PMIX_RETAIN(info);
|
||||
peer->info = info;
|
||||
/* save the uid/gid */
|
||||
peer->epilog.uid = info->uid;
|
||||
peer->epilog.gid = info->gid;
|
||||
nptr->epilog.uid = info->uid;
|
||||
nptr->epilog.gid = info->gid;
|
||||
peer->proc_cnt = 1;
|
||||
peer->sd = pnd->sd;
|
||||
|
||||
|
@ -601,6 +601,11 @@ static void connection_handler(int sd, short args, void *cbdata)
|
||||
psave->nptr = nptr;
|
||||
PMIX_RETAIN(info);
|
||||
psave->info = info;
|
||||
/* save the epilog info */
|
||||
psave->epilog.uid = info->uid;
|
||||
psave->epilog.gid = info->gid;
|
||||
nptr->epilog.uid = info->uid;
|
||||
nptr->epilog.gid = info->gid;
|
||||
info->proc_cnt++; /* increase number of processes on this rank */
|
||||
psave->sd = pnd->sd;
|
||||
if (0 > (psave->index = pmix_pointer_array_add(&pmix_server_globals.clients, psave))) {
|
||||
|
@ -382,6 +382,7 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf,
|
||||
}
|
||||
if (PMIX_ERR_NOT_FOUND != rc || NULL == lcd) {
|
||||
/* we have a problem - e.g., out of memory */
|
||||
cbfunc(PMIX_ERR_NOT_FOUND, NULL, 0, cbdata, NULL, NULL);
|
||||
PMIX_INFO_FREE(info, ninfo);
|
||||
return rc;
|
||||
}
|
||||
|
@ -2015,6 +2015,13 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer,
|
||||
pmix_status_t rc;
|
||||
pmix_query_caddy_t *cd;
|
||||
pmix_proc_t proc;
|
||||
size_t n;
|
||||
bool recurse, leave_topdir, duplicate;
|
||||
pmix_list_t cachedirs, cachefiles;
|
||||
pmix_epilog_t *epi;
|
||||
pmix_cleanup_file_t *cf, *cf2;
|
||||
pmix_cleanup_dir_t *cdir, *cdir2;
|
||||
struct stat statbuf;
|
||||
|
||||
pmix_output_verbose(2, pmix_server_globals.base_output,
|
||||
"recvd job control request from client");
|
||||
@ -2045,6 +2052,22 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer,
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
/* check targets to find proper place to put any epilog requests */
|
||||
if (NULL == cd->targets) {
|
||||
epi = &peer->nptr->epilog;
|
||||
} else if (1 == cd->ntargets) {
|
||||
if (0 == strncmp(cd->targets[0].nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN)) {
|
||||
if (PMIX_RANK_WILDCARD == cd->targets[0].rank) {
|
||||
epi = &peer->nptr->epilog;
|
||||
} else {
|
||||
epi = &peer->epilog;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
epi = NULL; // do not allow epilog requests
|
||||
}
|
||||
|
||||
/* unpack the number of info objects */
|
||||
cnt = 1;
|
||||
PMIX_BFROPS_UNPACK(rc, peer, buf, &cd->ninfo, &cnt, PMIX_SIZE);
|
||||
@ -2063,6 +2086,173 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer,
|
||||
}
|
||||
}
|
||||
|
||||
/* if this includes a request for post-termination cleanup, we handle
|
||||
* that request ourselves */
|
||||
PMIX_CONSTRUCT(&cachedirs, pmix_list_t);
|
||||
PMIX_CONSTRUCT(&cachefiles, pmix_list_t);
|
||||
cnt = 0; // track how many infos are cleanup related
|
||||
for (n=0; n < cd->ninfo; n++) {
|
||||
if (0 == strncmp(cd->info[n].key, PMIX_REGISTER_CLEANUP, PMIX_MAX_KEYLEN)) {
|
||||
++cnt;
|
||||
/* see if we allow epilog requests */
|
||||
if (NULL == epi) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_BAD_PARAM;
|
||||
goto exit;
|
||||
}
|
||||
if (PMIX_STRING != cd->info[n].value.type ||
|
||||
NULL == cd->info[n].value.data.string) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_BAD_PARAM;
|
||||
goto exit;
|
||||
}
|
||||
if (0 != stat(cd->info[n].value.data.string, &statbuf)) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_BAD_PARAM;
|
||||
goto exit;
|
||||
}
|
||||
if (S_ISDIR(statbuf.st_mode)) {
|
||||
cdir = PMIX_NEW(pmix_cleanup_dir_t);
|
||||
if (NULL == cdir) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_NOMEM;
|
||||
goto exit;
|
||||
}
|
||||
cdir->path = strdup(cd->info[n].value.data.string);
|
||||
pmix_list_append(&cachedirs, &cdir->super);
|
||||
} else {
|
||||
cf = PMIX_NEW(pmix_cleanup_file_t);
|
||||
if (NULL == cf) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_NOMEM;
|
||||
goto exit;
|
||||
}
|
||||
cf->path = strdup(cd->info[n].value.data.string);
|
||||
pmix_list_append(&cachefiles, &cf->super);
|
||||
}
|
||||
} else if (0 == strncmp(cd->info[n].key, PMIX_CLEANUP_RECURSIVE, PMIX_MAX_KEYLEN)) {
|
||||
/* see if we allow epilog requests */
|
||||
if (NULL == epi) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_BAD_PARAM;
|
||||
goto exit;
|
||||
}
|
||||
recurse = PMIX_INFO_TRUE(&cd->info[n]);
|
||||
++cnt;
|
||||
} else if (0 == strncmp(cd->info[n].key, PMIX_CLEANUP_IGNORE, PMIX_MAX_KEYLEN)) {
|
||||
if (PMIX_STRING != cd->info[n].value.type ||
|
||||
NULL == cd->info[n].value.data.string) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_BAD_PARAM;
|
||||
goto exit;
|
||||
}
|
||||
/* see if we allow epilog requests */
|
||||
if (NULL == epi) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_BAD_PARAM;
|
||||
goto exit;
|
||||
}
|
||||
/* scan the list of ignores for any duplicate */
|
||||
duplicate = false;
|
||||
PMIX_LIST_FOREACH(cf, &epi->ignores, pmix_cleanup_file_t) {
|
||||
if (0 == strcmp(cf->path, cd->info[n].value.data.string)) {
|
||||
/* we can drop this request */
|
||||
duplicate = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!duplicate) {
|
||||
cf = PMIX_NEW(pmix_cleanup_file_t);
|
||||
if (NULL == cf) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_NOMEM;
|
||||
goto exit;
|
||||
}
|
||||
cf->path = strdup(cd->info[n].value.data.string);
|
||||
pmix_list_append(&epi->ignores, &cf->super);
|
||||
}
|
||||
++cnt;
|
||||
} else if (0 == strncmp(cd->info[n].key, PMIX_CLEANUP_LEAVE_TOPDIR, PMIX_MAX_KEYLEN)) {
|
||||
/* see if we allow epilog requests */
|
||||
if (NULL == epi) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_BAD_PARAM;
|
||||
goto exit;
|
||||
}
|
||||
leave_topdir = PMIX_INFO_TRUE(&cd->info[n]);
|
||||
++cnt;
|
||||
}
|
||||
}
|
||||
if (0 < cnt) {
|
||||
while (NULL != (cdir = (pmix_cleanup_dir_t*)pmix_list_remove_first(&cachedirs))) {
|
||||
/* scan the existing list of directories for any duplicate */
|
||||
PMIX_LIST_FOREACH(cdir2, &epi->cleanup_dirs, pmix_cleanup_dir_t) {
|
||||
if (0 == strcmp(cdir2->path, cdir->path)) {
|
||||
/* duplicate - check for difference in flags per RFC
|
||||
* precedence rules */
|
||||
if (!cdir->recurse && recurse) {
|
||||
cdir->recurse = recurse;
|
||||
}
|
||||
if (!cdir->leave_topdir && leave_topdir) {
|
||||
cdir->leave_topdir = leave_topdir;
|
||||
}
|
||||
PMIX_RELEASE(cdir);
|
||||
cdir = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (NULL != cdir) {
|
||||
/* check for conflict with ignore */
|
||||
PMIX_LIST_FOREACH(cf, &epi->ignores, pmix_cleanup_file_t) {
|
||||
if (0 == strcmp(cf->path, cdir->path)) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_CONFLICTING_CLEANUP_DIRECTIVES;
|
||||
PMIX_LIST_DESTRUCT(&cachedirs);
|
||||
PMIX_LIST_DESTRUCT(&cachefiles);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
cdir->recurse = recurse;
|
||||
cdir->leave_topdir = leave_topdir;
|
||||
/* just append it to the end of the list */
|
||||
pmix_list_append(&epi->cleanup_dirs, &cdir->super);
|
||||
}
|
||||
}
|
||||
PMIX_DESTRUCT(&cachedirs);
|
||||
while (NULL != (cf = (pmix_cleanup_file_t*)pmix_list_remove_first(&cachefiles))) {
|
||||
/* scan the existing list of files for any duplicate */
|
||||
PMIX_LIST_FOREACH(cf2, &epi->cleanup_files, pmix_cleanup_file_t) {
|
||||
if (0 == strcmp(cf2->path, cf->path)) {
|
||||
PMIX_RELEASE(cf);
|
||||
cf = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (NULL != cf) {
|
||||
/* check for conflict with ignore */
|
||||
PMIX_LIST_FOREACH(cf2, &epi->ignores, pmix_cleanup_file_t) {
|
||||
if (0 == strcmp(cf->path, cf2->path)) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_CONFLICTING_CLEANUP_DIRECTIVES;
|
||||
PMIX_LIST_DESTRUCT(&cachedirs);
|
||||
PMIX_LIST_DESTRUCT(&cachefiles);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
/* just append it to the end of the list */
|
||||
pmix_list_append(&epi->cleanup_files, &cf->super);
|
||||
}
|
||||
}
|
||||
PMIX_DESTRUCT(&cachefiles);
|
||||
if (cnt == cd->ninfo) {
|
||||
/* nothing more to do */
|
||||
if (NULL != cbfunc) {
|
||||
cbfunc(PMIX_SUCCESS, NULL, 0, cd, NULL, NULL);
|
||||
}
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
/* setup the requesting peer name */
|
||||
(void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN);
|
||||
proc.rank = peer->info->pname.rank;
|
||||
|
@ -171,6 +171,8 @@ PMIX_EXPORT const char* PMIx_Error_string(pmix_status_t errnum)
|
||||
return "PMIX MODEL DECLARED";
|
||||
case PMIX_ERR_TEMP_UNAVAILABLE:
|
||||
return "PMIX TEMPORARILY UNAVAILABLE";
|
||||
case PMIX_ERR_CONFLICTING_CLEANUP_DIRECTIVES:
|
||||
return "PMIX CONFLICTING CLEANUP DIRECTIVES";
|
||||
case PMIX_SUCCESS:
|
||||
return "SUCCESS";
|
||||
default:
|
||||
|
@ -25,6 +25,9 @@
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_STAT_H
|
||||
#include <sys/stat.h>
|
||||
#endif
|
||||
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
@ -71,6 +74,8 @@ static void pmix3x_query(opal_list_t *queries,
|
||||
static void pmix3x_log(opal_list_t *info,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
|
||||
static int pmix3x_register_cleanup(char *path, bool ignore, bool jobscope);
|
||||
|
||||
const opal_pmix_base_module_t opal_pmix_pmix3x_module = {
|
||||
/* client APIs */
|
||||
.init = pmix3x_client_init,
|
||||
@ -101,6 +106,7 @@ const opal_pmix_base_module_t opal_pmix_pmix3x_module = {
|
||||
.log = pmix3x_log,
|
||||
.allocate = pmix3x_allocate,
|
||||
.job_control = pmix3x_job_control,
|
||||
.register_cleanup = pmix3x_register_cleanup,
|
||||
/* server APIs */
|
||||
.server_init = pmix3x_server_init,
|
||||
.server_finalize = pmix3x_server_finalize,
|
||||
@ -333,6 +339,78 @@ void pmix3x_event_hdlr(size_t evhdlr_registration_id,
|
||||
return;
|
||||
}
|
||||
|
||||
static void cleanup_cbfunc(pmix_status_t status,
|
||||
pmix_info_t *info, size_t ninfo,
|
||||
void *cbdata,
|
||||
pmix_release_cbfunc_t release_fn,
|
||||
void *release_cbdata)
|
||||
{
|
||||
opal_pmix_lock_t *lk = (opal_pmix_lock_t*)cbdata;
|
||||
|
||||
OPAL_POST_OBJECT(lk);
|
||||
|
||||
/* let the library release the data and cleanup from
|
||||
* the operation */
|
||||
if (NULL != release_fn) {
|
||||
release_fn(release_cbdata);
|
||||
}
|
||||
|
||||
/* release the block */
|
||||
lk->status = pmix3x_convert_rc(status);
|
||||
OPAL_PMIX_WAKEUP_THREAD(lk);
|
||||
}
|
||||
|
||||
static int pmix3x_register_cleanup(char *path, bool ignore, bool jobscope)
|
||||
{
|
||||
opal_pmix_lock_t lk;
|
||||
pmix_info_t pinfo[3];
|
||||
size_t n, ninfo=0;
|
||||
pmix_status_t rc;
|
||||
int ret;
|
||||
struct stat statbuf;
|
||||
|
||||
OPAL_PMIX_CONSTRUCT_LOCK(&lk);
|
||||
|
||||
if (ignore) {
|
||||
/* they want this path ignored */
|
||||
PMIX_INFO_LOAD(&pinfo[ninfo], PMIX_CLEANUP_IGNORE, path, PMIX_STRING);
|
||||
++ninfo;
|
||||
} else {
|
||||
/* order cleanup of the provided path */
|
||||
PMIX_INFO_LOAD(&pinfo[ninfo], PMIX_REGISTER_CLEANUP, path, PMIX_STRING);
|
||||
++ninfo;
|
||||
/* if the path is a directory, then we need to tell the server
|
||||
* to recursively clean up */
|
||||
if (stat(path, &statbuf) != 0) {
|
||||
return OPAL_ERR_NOT_FOUND;
|
||||
}
|
||||
if (S_ISDIR(statbuf.st_mode)) {
|
||||
/* recursively cleanup directories */
|
||||
PMIX_INFO_LOAD(&pinfo[ninfo], PMIX_CLEANUP_RECURSIVE, NULL, PMIX_BOOL);
|
||||
++ninfo;
|
||||
}
|
||||
}
|
||||
|
||||
/* if they want this applied to the job, then indicate so */
|
||||
if (jobscope) {
|
||||
rc = PMIx_Job_control_nb(NULL, 0, pinfo, ninfo, cleanup_cbfunc, (void*)&lk);
|
||||
} else {
|
||||
/* only applies to us */
|
||||
rc = PMIx_Job_control_nb(&mca_pmix_pmix3x_component.myproc, 1, pinfo, ninfo, cleanup_cbfunc, (void*)&lk);
|
||||
}
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
ret = pmix3x_convert_rc(rc);
|
||||
} else {
|
||||
OPAL_PMIX_WAIT_THREAD(&lk);
|
||||
ret = lk.status;
|
||||
}
|
||||
OPAL_PMIX_DESTRUCT_LOCK(&lk);
|
||||
for (n=0; n < ninfo; n++) {
|
||||
PMIX_INFO_DESTRUCT(&pinfo[n]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
opal_vpid_t pmix3x_convert_rank(pmix_rank_t rank)
|
||||
{
|
||||
switch(rank) {
|
||||
|
@ -39,6 +39,7 @@ BEGIN_C_DECLS
|
||||
|
||||
typedef struct {
|
||||
opal_pmix_base_component_t super;
|
||||
pmix_proc_t myproc;
|
||||
opal_list_t jobids;
|
||||
bool native_launch;
|
||||
size_t evindex;
|
||||
|
@ -38,7 +38,6 @@
|
||||
#include "pmix.h"
|
||||
#include "pmix_tool.h"
|
||||
|
||||
static pmix_proc_t my_proc;
|
||||
static char *dbgvalue=NULL;
|
||||
|
||||
static void errreg_cbfunc (pmix_status_t status,
|
||||
@ -105,7 +104,7 @@ int pmix3x_client_init(opal_list_t *ilist)
|
||||
}
|
||||
|
||||
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
||||
rc = PMIx_Init(&my_proc, pinfo, ninfo);
|
||||
rc = PMIx_Init(&mca_pmix_pmix3x_component.myproc, pinfo, ninfo);
|
||||
if (NULL != pinfo) {
|
||||
PMIX_INFO_FREE(pinfo, ninfo);
|
||||
}
|
||||
@ -127,20 +126,20 @@ int pmix3x_client_init(opal_list_t *ilist)
|
||||
/* if we were launched by the OMPI RTE, then
|
||||
* the jobid is in a special format - so get it */
|
||||
mca_pmix_pmix3x_component.native_launch = true;
|
||||
opal_convert_string_to_jobid(&pname.jobid, my_proc.nspace);
|
||||
opal_convert_string_to_jobid(&pname.jobid, mca_pmix_pmix3x_component.myproc.nspace);
|
||||
} else {
|
||||
/* we were launched by someone else, so make the
|
||||
* jobid just be the hash of the nspace */
|
||||
OPAL_HASH_JOBID(my_proc.nspace, pname.jobid);
|
||||
OPAL_HASH_JOBID(mca_pmix_pmix3x_component.myproc.nspace, pname.jobid);
|
||||
}
|
||||
/* insert this into our list of jobids - it will be the
|
||||
* first, and so we'll check it first */
|
||||
job = OBJ_NEW(opal_pmix3x_jobid_trkr_t);
|
||||
(void)strncpy(job->nspace, my_proc.nspace, PMIX_MAX_NSLEN);
|
||||
(void)strncpy(job->nspace, mca_pmix_pmix3x_component.myproc.nspace, PMIX_MAX_NSLEN);
|
||||
job->jobid = pname.jobid;
|
||||
opal_list_append(&mca_pmix_pmix3x_component.jobids, &job->super);
|
||||
|
||||
pname.vpid = pmix3x_convert_rank(my_proc.rank);
|
||||
pname.vpid = pmix3x_convert_rank(mca_pmix_pmix3x_component.myproc.rank);
|
||||
opal_proc_set_name(&pname);
|
||||
|
||||
/* release the thread in case the event handler fires when
|
||||
@ -221,10 +220,10 @@ int pmix3x_tool_init(opal_list_t *info)
|
||||
/* check to see if our name is being given from above */
|
||||
if (0 == strcmp(val->key, OPAL_PMIX_TOOL_NSPACE)) {
|
||||
opal_convert_string_to_jobid(&pname.jobid, val->data.string);
|
||||
(void)strncpy(my_proc.nspace, val->data.string, PMIX_MAX_NSLEN);
|
||||
(void)strncpy(mca_pmix_pmix3x_component.myproc.nspace, val->data.string, PMIX_MAX_NSLEN);
|
||||
} else if (0 == strcmp(val->key, OPAL_PMIX_TOOL_RANK)) {
|
||||
pname.vpid = val->data.name.vpid;
|
||||
my_proc.rank = pname.vpid;
|
||||
mca_pmix_pmix3x_component.myproc.rank = pname.vpid;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -236,7 +235,7 @@ int pmix3x_tool_init(opal_list_t *info)
|
||||
mca_pmix_pmix3x_component.native_launch = true;
|
||||
|
||||
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
||||
rc = PMIx_tool_init(&my_proc, pinfo, ninfo);
|
||||
rc = PMIx_tool_init(&mca_pmix_pmix3x_component.myproc, pinfo, ninfo);
|
||||
if (NULL != pinfo) {
|
||||
PMIX_INFO_FREE(pinfo, ninfo);
|
||||
}
|
||||
@ -254,13 +253,13 @@ int pmix3x_tool_init(opal_list_t *info)
|
||||
}
|
||||
|
||||
/* store our jobid and rank */
|
||||
opal_convert_string_to_jobid(&pname.jobid, my_proc.nspace);
|
||||
pname.vpid = pmix3x_convert_rank(my_proc.rank);
|
||||
opal_convert_string_to_jobid(&pname.jobid, mca_pmix_pmix3x_component.myproc.nspace);
|
||||
pname.vpid = pmix3x_convert_rank(mca_pmix_pmix3x_component.myproc.rank);
|
||||
|
||||
/* insert this into our list of jobids - it will be the
|
||||
* first, and so we'll check it first */
|
||||
job = OBJ_NEW(opal_pmix3x_jobid_trkr_t);
|
||||
(void)strncpy(job->nspace, my_proc.nspace, PMIX_MAX_NSLEN);
|
||||
(void)strncpy(job->nspace, mca_pmix_pmix3x_component.myproc.nspace, PMIX_MAX_NSLEN);
|
||||
job->jobid = pname.jobid;
|
||||
opal_list_append(&mca_pmix_pmix3x_component.jobids, &job->super);
|
||||
|
||||
@ -399,7 +398,7 @@ int pmix3x_store_local(const opal_process_name_t *proc, opal_value_t *val)
|
||||
p.rank = pmix3x_convert_opalrank(proc->vpid);
|
||||
} else {
|
||||
/* use our name */
|
||||
(void)strncpy(p.nspace, my_proc.nspace, PMIX_MAX_NSLEN);
|
||||
(void)strncpy(p.nspace, mca_pmix_pmix3x_component.myproc.nspace, PMIX_MAX_NSLEN);
|
||||
p.rank = pmix3x_convert_opalrank(OPAL_PROC_MY_NAME.vpid);
|
||||
}
|
||||
|
||||
@ -614,7 +613,7 @@ int pmix3x_get(const opal_process_name_t *proc, const char *key,
|
||||
if (0 == strcmp(key, OPAL_PMIX_RANK)) {
|
||||
(*val) = OBJ_NEW(opal_value_t);
|
||||
(*val)->type = OPAL_INT;
|
||||
(*val)->data.integer = pmix3x_convert_rank(my_proc.rank);
|
||||
(*val)->data.integer = pmix3x_convert_rank(mca_pmix_pmix3x_component.myproc.rank);
|
||||
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
@ -622,7 +621,7 @@ int pmix3x_get(const opal_process_name_t *proc, const char *key,
|
||||
*val = NULL;
|
||||
|
||||
if (NULL == proc) {
|
||||
(void)strncpy(p.nspace, my_proc.nspace, PMIX_MAX_NSLEN);
|
||||
(void)strncpy(p.nspace, mca_pmix_pmix3x_component.myproc.nspace, PMIX_MAX_NSLEN);
|
||||
p.rank = pmix3x_convert_rank(PMIX_RANK_WILDCARD);
|
||||
} else {
|
||||
if (NULL == (nsptr = pmix3x_convert_jobid(proc->jobid))) {
|
||||
@ -719,7 +718,7 @@ int pmix3x_getnb(const opal_process_name_t *proc, const char *key,
|
||||
if (NULL != cbfunc) {
|
||||
val = OBJ_NEW(opal_value_t);
|
||||
val->type = OPAL_INT;
|
||||
val->data.integer = pmix3x_convert_rank(my_proc.rank);
|
||||
val->data.integer = pmix3x_convert_rank(mca_pmix_pmix3x_component.myproc.rank);
|
||||
cbfunc(OPAL_SUCCESS, val, cbdata);
|
||||
}
|
||||
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
||||
@ -733,7 +732,7 @@ int pmix3x_getnb(const opal_process_name_t *proc, const char *key,
|
||||
op->cbdata = cbdata;
|
||||
|
||||
if (NULL == proc) {
|
||||
(void)strncpy(op->p.nspace, my_proc.nspace, PMIX_MAX_NSLEN);
|
||||
(void)strncpy(op->p.nspace, mca_pmix_pmix3x_component.myproc.nspace, PMIX_MAX_NSLEN);
|
||||
op->p.rank = pmix3x_convert_rank(PMIX_RANK_WILDCARD);
|
||||
} else {
|
||||
if (NULL == (nsptr = pmix3x_convert_jobid(proc->jobid))) {
|
||||
|
@ -118,6 +118,7 @@ BEGIN_C_DECLS
|
||||
|
||||
|
||||
/* information about relative ranks as assigned by the RM */
|
||||
#define OPAL_PMIX_CLUSTER_ID "pmix.clid" // (char*) a string name for the cluster this proc is executing on
|
||||
#define OPAL_PMIX_PROCID "pmix.procid" // (opal_process_name_t) process identifier
|
||||
#define OPAL_PMIX_NSPACE "pmix.nspace" // (char*) nspace of a job
|
||||
#define OPAL_PMIX_JOBID "pmix.jobid" // (uint32_t) jobid assigned by scheduler
|
||||
@ -189,6 +190,7 @@ BEGIN_C_DECLS
|
||||
#define OPAL_PMIX_NOTIFY_COMPLETION "pmix.notecomp" // (bool) notify parent process upon termination of child job
|
||||
#define OPAL_PMIX_RANGE "pmix.range" // (int) opal_pmix_data_range_t value for calls to publish/lookup/unpublish
|
||||
#define OPAL_PMIX_PERSISTENCE "pmix.persist" // (int) opal_pmix_persistence_t value for calls to publish
|
||||
#define OPAL_PMIX_DATA_SCOPE "pmix.scope" // (pmix_scope_t) scope of the data to be found in a PMIx_Get call
|
||||
#define OPAL_PMIX_OPTIONAL "pmix.optional" // (bool) look only in the immediate data store for the requested value - do
|
||||
// not request data from the server if not found
|
||||
#define OPAL_PMIX_EMBED_BARRIER "pmix.embed.barrier" // (bool) execute a blocking fence operation before executing the
|
||||
@ -364,6 +366,16 @@ BEGIN_C_DECLS
|
||||
#define OPAL_PMIX_JOB_CTRL_PROVISION_IMAGE "pmix.jctrl.pvnimg" // (char*) name of the image that is to be provisioned
|
||||
#define OPAL_PMIX_JOB_CTRL_PREEMPTIBLE "pmix.jctrl.preempt" // (bool) job can be pre-empted
|
||||
#define OPAL_PMIX_JOB_CTRL_TERMINATE "pmix.jctrl.term" // (bool) politely terminate the specified procs
|
||||
#define OPAL_PMIX_REGISTER_CLEANUP "pmix.reg.cleanup" // (char*) comma-delimited list of files/directories to
|
||||
// be removed upon process termination
|
||||
#define OPAL_PMIX_CLEANUP_RECURSIVE "pmix.clnup.recurse" // (bool) recursively cleanup all subdirectories under the
|
||||
// specified one(s)
|
||||
#define OPAL_PMIX_CLEANUP_EMPTY "pmix.clnup.empty" // (bool) only remove empty subdirectories
|
||||
#define OPAL_PMIX_CLEANUP_IGNORE "pmix.clnup.ignore" // (char*) comma-delimited list of filenames that are not
|
||||
// to be removed
|
||||
#define OPAL_PMIX_CLEANUP_LEAVE_TOPDIR "pmix.clnup.lvtop" // (bool) when recursively cleaning subdirs, do not remove
|
||||
// the top-level directory (the one given in the
|
||||
// cleanup request)
|
||||
|
||||
|
||||
/* monitoring attributes */
|
||||
|
@ -16,6 +16,7 @@
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2017 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2017 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -44,6 +45,7 @@
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/threads/mutex.h"
|
||||
#include "opal/constants.h"
|
||||
#include "opal/mca/pmix/pmix.h"
|
||||
|
||||
/*
|
||||
* Private data
|
||||
@ -785,18 +787,24 @@ static int open_file(int i)
|
||||
|
||||
/* Actually open the file */
|
||||
info[i].ldi_fd = open(filename, flags, 0644);
|
||||
free(filename); /* release the filename in all cases */
|
||||
if (-1 == info[i].ldi_fd) {
|
||||
info[i].ldi_used = false;
|
||||
free(filename); /* release the filename in all cases */
|
||||
return OPAL_ERR_IN_ERRNO;
|
||||
}
|
||||
|
||||
/* Make the file be close-on-exec to prevent child inheritance
|
||||
* problems */
|
||||
if (-1 == fcntl(info[i].ldi_fd, F_SETFD, 1)) {
|
||||
free(filename); /* release the filename in all cases */
|
||||
return OPAL_ERR_IN_ERRNO;
|
||||
}
|
||||
|
||||
/* register it to be ignored */
|
||||
if (NULL != opal_pmix.register_cleanup) {
|
||||
opal_pmix.register_cleanup(filename, true, false);
|
||||
}
|
||||
free(filename); /* release the filename in all cases */
|
||||
}
|
||||
|
||||
/* Return successfully even if the session dir did not exist yet;
|
||||
|
@ -129,7 +129,6 @@ int orte_ess_base_app_setup(bool db_restrict_local)
|
||||
error = "orte_errmgr_base_open";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* setup my session directory */
|
||||
if (orte_create_session_dirs) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_framework.framework_output,
|
||||
@ -147,6 +146,22 @@ int orte_ess_base_app_setup(bool db_restrict_local)
|
||||
proc-specific session directory. */
|
||||
opal_output_set_output_file_info(orte_process_info.proc_session_dir,
|
||||
"output-", NULL, NULL);
|
||||
/* register the directory for cleanup */
|
||||
if (NULL != opal_pmix.register_cleanup) {
|
||||
if (orte_standalone_operation) {
|
||||
if (OPAL_SUCCESS != (ret = opal_pmix.register_cleanup(orte_process_info.top_session_dir, false, true))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "register cleanup";
|
||||
goto error;
|
||||
}
|
||||
} else {
|
||||
if (OPAL_SUCCESS != (ret = opal_pmix.register_cleanup(orte_process_info.jobfam_session_dir, false, false))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "register cleanup";
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Setup the communication infrastructure */
|
||||
/* Routed system */
|
||||
@ -357,7 +372,9 @@ int orte_ess_base_app_finalize(void)
|
||||
(void) mca_base_framework_close(&orte_oob_base_framework);
|
||||
(void) mca_base_framework_close(&orte_state_base_framework);
|
||||
|
||||
if (NULL == opal_pmix.register_cleanup) {
|
||||
orte_session_dir_finalize(ORTE_PROC_MY_NAME);
|
||||
}
|
||||
/* cleanup the process info */
|
||||
orte_proc_info_finalize();
|
||||
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user