Merge pull request #4606 from rhc54/topic/register
Update to PMIx v3.0 PR for cleanup registration
This commit is contained in:
commit
e9f4e93800
@ -12,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2007 Voltaire. All rights reserved.
|
||||
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010-2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2010-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2015 Mellanox Technologies. All rights reserved.
|
||||
*
|
||||
@ -136,6 +136,8 @@ struct mca_btl_vader_component_t {
|
||||
opal_list_t pending_endpoints; /**< list of endpoints with pending fragments */
|
||||
opal_list_t pending_fragments; /**< fragments pending remote completion */
|
||||
|
||||
char *backing_directory; /**< directory to place shared memory backing files */
|
||||
|
||||
/* knem stuff */
|
||||
#if OPAL_BTL_VADER_HAVE_KNEM
|
||||
unsigned int knem_dma_min; /**< minimum size to enable DMA for knem transfers (0 disables) */
|
||||
|
@ -12,10 +12,10 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2007 Voltaire. All rights reserved.
|
||||
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010-2015 Los Alamos National Security, LLC.
|
||||
* Copyright (c) 2010-2017 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -211,6 +211,19 @@ static int mca_btl_vader_component_register (void)
|
||||
OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_GROUP, &mca_btl_vader_component.single_copy_mechanism);
|
||||
OBJ_RELEASE(new_enum);
|
||||
|
||||
if (0 == access ("/dev/shm", W_OK)) {
|
||||
mca_btl_vader_component.backing_directory = "/dev/shm";
|
||||
} else {
|
||||
mca_btl_vader_component.backing_directory = opal_process_info.proc_session_dir;
|
||||
}
|
||||
(void) mca_base_component_var_register (&mca_btl_vader_component.super.btl_version, "backing_directory",
|
||||
"Directory to place backing files for shared memory communication. "
|
||||
"This directory should be on a local filesystem such as /tmp or "
|
||||
"/dev/shm (default: (linux) /dev/shm, (others) session directory)",
|
||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3,
|
||||
MCA_BASE_VAR_SCOPE_READONLY, &mca_btl_vader_component.backing_directory);
|
||||
|
||||
|
||||
#if OPAL_BTL_VADER_HAVE_KNEM
|
||||
/* Currently disabling DMA mode by default; it's not clear that this is useful in all applications and architectures. */
|
||||
mca_btl_vader_component.knem_dma_min = 0;
|
||||
@ -491,13 +504,17 @@ static mca_btl_base_module_t **mca_btl_vader_component_init (int *num_btls,
|
||||
if (MCA_BTL_VADER_XPMEM != mca_btl_vader_component.single_copy_mechanism) {
|
||||
char *sm_file;
|
||||
|
||||
rc = asprintf(&sm_file, "%s" OPAL_PATH_SEP "vader_segment.%s.%d", opal_process_info.proc_session_dir,
|
||||
rc = asprintf(&sm_file, "%s" OPAL_PATH_SEP "vader_segment.%s.%d", mca_btl_vader_component.backing_directory,
|
||||
opal_process_info.nodename, MCA_BTL_VADER_LOCAL_RANK);
|
||||
if (0 > rc) {
|
||||
free (btls);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (NULL != opal_pmix.register_cleanup) {
|
||||
opal_pmix.register_cleanup (sm_file, false, false);
|
||||
}
|
||||
|
||||
rc = opal_shmem_segment_create (&component->seg_ds, sm_file, component->segment_size);
|
||||
free (sm_file);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
|
@ -65,6 +65,7 @@ typedef struct {
|
||||
opal_mutex_t mutex;
|
||||
opal_pmix_condition_t cond;
|
||||
volatile bool active;
|
||||
int status;
|
||||
} opal_pmix_lock_t;
|
||||
|
||||
|
||||
|
@ -867,6 +867,9 @@ typedef int (*opal_pmix_base_process_monitor_fn_t)(opal_list_t *monitor,
|
||||
opal_list_t *directives,
|
||||
opal_pmix_info_cbfunc_t cbfunc, void *cbdata);
|
||||
|
||||
/* register cleanup */
|
||||
typedef int (*opal_pmix_base_register_cleanup_fn_t)(char *path, bool ignore, bool jobscope);
|
||||
|
||||
/*
|
||||
* the standard public API data structure
|
||||
*/
|
||||
@ -901,6 +904,7 @@ typedef struct {
|
||||
opal_pmix_base_alloc_fn_t allocate;
|
||||
opal_pmix_base_job_control_fn_t job_control;
|
||||
opal_pmix_base_process_monitor_fn_t monitor;
|
||||
opal_pmix_base_register_cleanup_fn_t register_cleanup;
|
||||
/* server APIs */
|
||||
opal_pmix_base_module_server_init_fn_t server_init;
|
||||
opal_pmix_base_module_server_finalize_fn_t server_finalize;
|
||||
|
@ -30,7 +30,7 @@ greek=
|
||||
# command, or with the date (if "git describe" fails) in the form of
|
||||
# "date<date>".
|
||||
|
||||
repo_rev=gitf56d30e
|
||||
repo_rev=git5c0b64b
|
||||
|
||||
# If tarball_version is not empty, it is used as the version string in
|
||||
# the tarball filename, regardless of all other versions listed in
|
||||
@ -44,7 +44,7 @@ tarball_version=
|
||||
|
||||
# The date when this release was created
|
||||
|
||||
date="Nov 11, 2017"
|
||||
date="Dec 11, 2017"
|
||||
|
||||
# The shared library version of each of PMIx's public libraries.
|
||||
# These versions are maintained in accordance with the "Library
|
||||
|
@ -462,6 +462,16 @@ typedef uint32_t pmix_rank_t;
|
||||
#define PMIX_JOB_CTRL_PROVISION_IMAGE "pmix.jctrl.pvnimg" // (char*) name of the image that is to be provisioned
|
||||
#define PMIX_JOB_CTRL_PREEMPTIBLE "pmix.jctrl.preempt" // (bool) job can be pre-empted
|
||||
#define PMIX_JOB_CTRL_TERMINATE "pmix.jctrl.term" // (bool) politely terminate the specified procs
|
||||
#define PMIX_REGISTER_CLEANUP "pmix.reg.cleanup" // (char*) comma-delimited list of files/directories to
|
||||
// be removed upon process termination
|
||||
#define PMIX_CLEANUP_RECURSIVE "pmix.clnup.recurse" // (bool) recursively cleanup all subdirectories under the
|
||||
// specified one(s)
|
||||
#define PMIX_CLEANUP_EMPTY "pmix.clnup.empty" // (bool) only remove empty subdirectories
|
||||
#define PMIX_CLEANUP_IGNORE "pmix.clnup.ignore" // (char*) comma-delimited list of filenames that are not
|
||||
// to be removed
|
||||
#define PMIX_CLEANUP_LEAVE_TOPDIR "pmix.clnup.lvtop" // (bool) when recursively cleaning subdirs, do not remove
|
||||
// the top-level directory (the one given in the
|
||||
// cleanup request)
|
||||
|
||||
/* monitoring attributes */
|
||||
#define PMIX_MONITOR_ID "pmix.monitor.id" // (char*) provide a string identifier for this request
|
||||
@ -584,6 +594,7 @@ typedef int pmix_status_t;
|
||||
#define PMIX_ERR_NOT_IMPLEMENTED -48
|
||||
#define PMIX_ERR_COMM_FAILURE -49
|
||||
#define PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER -50 // internal-only
|
||||
#define PMIX_ERR_CONFLICTING_CLEANUP_DIRECTIVES -51
|
||||
|
||||
/* define a starting point for v2.x error values */
|
||||
#define PMIX_ERR_V2X_BASE -100
|
||||
|
@ -84,7 +84,7 @@ void pmix_atomic_rmb(void)
|
||||
static inline
|
||||
void pmix_atomic_wmb(void)
|
||||
{
|
||||
PMIXRMB();
|
||||
PMIXWMB();
|
||||
}
|
||||
|
||||
static inline
|
||||
@ -110,7 +110,7 @@ void pmix_atomic_isync(void)
|
||||
#pragma mc_func pmix_atomic_rmb { "7c2004ac" } /* lwsync */
|
||||
#pragma reg_killed_by pmix_atomic_rmb /* none */
|
||||
|
||||
#pragma mc_func pmix_atomic_wmb { "7c0006ac" } /* eieio */
|
||||
#pragma mc_func pmix_atomic_wmb { "7c2004ac" } /* lwsync */
|
||||
#pragma reg_killed_by pmix_atomic_wmb /* none */
|
||||
|
||||
#endif
|
||||
|
@ -36,11 +36,27 @@
|
||||
#endif
|
||||
#include <ctype.h>
|
||||
#include PMIX_EVENT_HEADER
|
||||
#if HAVE_SYS_STAT_H
|
||||
#include <sys/stat.h>
|
||||
#endif /* HAVE_SYS_STAT_H */
|
||||
#ifdef HAVE_DIRENT_H
|
||||
#include <dirent.h>
|
||||
#endif /* HAVE_DIRENT_H */
|
||||
|
||||
#include <pmix_common.h>
|
||||
|
||||
#include "src/mca/bfrops/bfrops_types.h"
|
||||
#include "src/class/pmix_hash_table.h"
|
||||
#include "src/class/pmix_list.h"
|
||||
#include "src/threads/threads.h"
|
||||
#include "src/util/argv.h"
|
||||
#include "src/util/error.h"
|
||||
#include "src/util/os_path.h"
|
||||
|
||||
static void cleanup(pmix_epilog_t *epi);
|
||||
static void dirpath_destroy(char *path, pmix_cleanup_dir_t *cd,
|
||||
pmix_epilog_t *epi);
|
||||
static bool dirpath_is_empty(const char *path);
|
||||
|
||||
PMIX_EXPORT pmix_lock_t pmix_global_lock = {
|
||||
.mutex = PMIX_MUTEX_STATIC_INIT,
|
||||
@ -52,6 +68,36 @@ PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_namelist_t,
|
||||
pmix_list_item_t,
|
||||
NULL, NULL);
|
||||
|
||||
static void cfcon(pmix_cleanup_file_t *p)
|
||||
{
|
||||
p->path = NULL;
|
||||
}
|
||||
static void cfdes(pmix_cleanup_file_t *p)
|
||||
{
|
||||
if (NULL != p->path) {
|
||||
free(p->path);
|
||||
}
|
||||
}
|
||||
PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_cleanup_file_t,
|
||||
pmix_list_item_t,
|
||||
cfcon, cfdes);
|
||||
|
||||
static void cdcon(pmix_cleanup_dir_t *p)
|
||||
{
|
||||
p->path = NULL;
|
||||
p->recurse = false;
|
||||
p->leave_topdir = false;
|
||||
}
|
||||
static void cddes(pmix_cleanup_dir_t *p)
|
||||
{
|
||||
if (NULL != p->path) {
|
||||
free(p->path);
|
||||
}
|
||||
}
|
||||
PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_cleanup_dir_t,
|
||||
pmix_list_item_t,
|
||||
cdcon, cddes);
|
||||
|
||||
static void nscon(pmix_nspace_t *p)
|
||||
{
|
||||
p->nspace = NULL;
|
||||
@ -61,6 +107,9 @@ static void nscon(pmix_nspace_t *p)
|
||||
p->ndelivered = 0;
|
||||
PMIX_CONSTRUCT(&p->ranks, pmix_list_t);
|
||||
memset(&p->compat, 0, sizeof(p->compat));
|
||||
PMIX_CONSTRUCT(&p->epilog.cleanup_dirs, pmix_list_t);
|
||||
PMIX_CONSTRUCT(&p->epilog.cleanup_files, pmix_list_t);
|
||||
PMIX_CONSTRUCT(&p->epilog.ignores, pmix_list_t);
|
||||
}
|
||||
static void nsdes(pmix_nspace_t *p)
|
||||
{
|
||||
@ -71,6 +120,12 @@ static void nsdes(pmix_nspace_t *p)
|
||||
PMIX_RELEASE(p->jobbkt);
|
||||
}
|
||||
PMIX_LIST_DESTRUCT(&p->ranks);
|
||||
/* perform any epilog */
|
||||
cleanup(&p->epilog);
|
||||
/* cleanup the epilog */
|
||||
PMIX_LIST_DESTRUCT(&p->epilog.cleanup_dirs);
|
||||
PMIX_LIST_DESTRUCT(&p->epilog.cleanup_files);
|
||||
PMIX_LIST_DESTRUCT(&p->epilog.ignores);
|
||||
}
|
||||
PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_nspace_t,
|
||||
pmix_list_item_t,
|
||||
@ -124,7 +179,11 @@ static void pcon(pmix_peer_t *p)
|
||||
PMIX_CONSTRUCT(&p->send_queue, pmix_list_t);
|
||||
p->send_msg = NULL;
|
||||
p->recv_msg = NULL;
|
||||
PMIX_CONSTRUCT(&p->epilog.cleanup_dirs, pmix_list_t);
|
||||
PMIX_CONSTRUCT(&p->epilog.cleanup_files, pmix_list_t);
|
||||
PMIX_CONSTRUCT(&p->epilog.ignores, pmix_list_t);
|
||||
}
|
||||
|
||||
static void pdes(pmix_peer_t *p)
|
||||
{
|
||||
if (0 <= p->sd) {
|
||||
@ -148,6 +207,12 @@ static void pdes(pmix_peer_t *p)
|
||||
if (NULL != p->recv_msg) {
|
||||
PMIX_RELEASE(p->recv_msg);
|
||||
}
|
||||
/* perform any epilog */
|
||||
cleanup(&p->epilog);
|
||||
/* cleanup the epilog */
|
||||
PMIX_LIST_DESTRUCT(&p->epilog.cleanup_dirs);
|
||||
PMIX_LIST_DESTRUCT(&p->epilog.cleanup_files);
|
||||
PMIX_LIST_DESTRUCT(&p->epilog.ignores);
|
||||
}
|
||||
PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_peer_t,
|
||||
pmix_object_t,
|
||||
@ -252,3 +317,200 @@ static void qdes(pmix_query_caddy_t *p)
|
||||
PMIX_EXPORT PMIX_CLASS_INSTANCE(pmix_query_caddy_t,
|
||||
pmix_object_t,
|
||||
qcon, qdes);
|
||||
|
||||
static void cleanup(pmix_epilog_t *epi)
|
||||
{
|
||||
pmix_cleanup_file_t *cf;
|
||||
pmix_cleanup_dir_t *cd;
|
||||
struct stat statbuf;
|
||||
int rc;
|
||||
|
||||
/* start with any specified files */
|
||||
PMIX_LIST_FOREACH(cf, &epi->cleanup_files, pmix_cleanup_file_t) {
|
||||
/* check the effective uid/gid of the file and ensure it
|
||||
* matches that of the peer - we do this to provide at least
|
||||
* some minimum level of protection */
|
||||
rc = stat(cf->path, &statbuf);
|
||||
if (0 != rc) {
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"File %s failed to stat: %s", cf->path, strerror(rc));
|
||||
continue;
|
||||
}
|
||||
if (statbuf.st_uid != epi->uid ||
|
||||
statbuf.st_gid != epi->gid) {
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"File %s uid/gid doesn't match: uid %lu(%lu) gid %lu(%lu)",
|
||||
cf->path,
|
||||
(unsigned long)statbuf.st_uid, (unsigned long)epi->uid,
|
||||
(unsigned long)statbuf.st_gid, (unsigned long)epi->gid);
|
||||
continue;
|
||||
}
|
||||
rc = unlink(cf->path);
|
||||
if (0 != rc) {
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"File %s failed to unlink: %s", cf->path, strerror(rc));
|
||||
}
|
||||
}
|
||||
|
||||
/* now cleanup the directories */
|
||||
PMIX_LIST_FOREACH(cd, &epi->cleanup_dirs, pmix_cleanup_dir_t) {
|
||||
/* check the effective uid/gid of the file and ensure it
|
||||
* matches that of the peer - we do this to provide at least
|
||||
* some minimum level of protection */
|
||||
rc = stat(cd->path, &statbuf);
|
||||
if (0 != rc) {
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"Directory %s failed to stat: %s", cd->path, strerror(rc));
|
||||
continue;
|
||||
}
|
||||
if (statbuf.st_uid != epi->uid ||
|
||||
statbuf.st_gid != epi->gid) {
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"Directory %s uid/gid doesn't match: uid %lu(%lu) gid %lu(%lu)",
|
||||
cd->path,
|
||||
(unsigned long)statbuf.st_uid, (unsigned long)epi->uid,
|
||||
(unsigned long)statbuf.st_gid, (unsigned long)epi->gid);
|
||||
continue;
|
||||
}
|
||||
if ((statbuf.st_mode & S_IRWXU) == S_IRWXU) {
|
||||
dirpath_destroy(cd->path, cd, epi);
|
||||
} else {
|
||||
pmix_output_verbose(10, pmix_globals.debug_output,
|
||||
"Directory %s lacks permissions", cd->path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void dirpath_destroy(char *path, pmix_cleanup_dir_t *cd, pmix_epilog_t *epi)
|
||||
{
|
||||
int rc;
|
||||
bool is_dir = false, ignore;
|
||||
DIR *dp;
|
||||
struct dirent *ep;
|
||||
char *filenm;
|
||||
struct stat buf;
|
||||
size_t n;
|
||||
pmix_cleanup_file_t *cf;
|
||||
|
||||
if (NULL == path) { /* protect against error */
|
||||
return;
|
||||
}
|
||||
|
||||
/* if this path is it to be ignored, then do so */
|
||||
PMIX_LIST_FOREACH(cf, &epi->ignores, pmix_cleanup_file_t) {
|
||||
if (0 == strcmp(cf->path, path)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Open up the directory */
|
||||
dp = opendir(path);
|
||||
if (NULL == dp) {
|
||||
return;
|
||||
}
|
||||
|
||||
while (NULL != (ep = readdir(dp))) {
|
||||
/* skip:
|
||||
* - . and ..
|
||||
*/
|
||||
if ((0 == strcmp(ep->d_name, ".")) ||
|
||||
(0 == strcmp(ep->d_name, ".."))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Create a pathname. This is not always needed, but it makes
|
||||
* for cleaner code just to create it here. Note that we are
|
||||
* allocating memory here, so we need to free it later on.
|
||||
*/
|
||||
filenm = pmix_os_path(false, path, ep->d_name, NULL);
|
||||
|
||||
/* if this path is it to be ignored, then do so */
|
||||
PMIX_LIST_FOREACH(cf, &epi->ignores, pmix_cleanup_file_t) {
|
||||
if (0 == strcmp(cf->path, filenm)) {
|
||||
free(filenm);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check to see if it is a directory */
|
||||
is_dir = false;
|
||||
|
||||
rc = stat(filenm, &buf);
|
||||
if (0 > rc) {
|
||||
/* Handle a race condition. filenm might have been deleted by an
|
||||
* other process running on the same node. That typically occurs
|
||||
* when one task is removing the job_session_dir and an other task
|
||||
* is still removing its proc_session_dir.
|
||||
*/
|
||||
free(filenm);
|
||||
continue;
|
||||
}
|
||||
/* if the uid/gid don't match, then leave it alone */
|
||||
if (buf.st_uid != epi->uid ||
|
||||
buf.st_gid != epi->gid) {
|
||||
free(filenm);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (S_ISDIR(buf.st_mode)) {
|
||||
is_dir = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* If not recursively decending, then if we find a directory then fail
|
||||
* since we were not told to remove it.
|
||||
*/
|
||||
if (is_dir && !cd->recurse) {
|
||||
/* continue removing files */
|
||||
free(filenm);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Directories are recursively destroyed */
|
||||
if (is_dir && cd->recurse && ((buf.st_mode & S_IRWXU) == S_IRWXU)) {
|
||||
dirpath_destroy(filenm, cd, epi);
|
||||
free(filenm);
|
||||
} else {
|
||||
/* Files are removed right here */
|
||||
unlink(filenm);
|
||||
free(filenm);
|
||||
}
|
||||
}
|
||||
|
||||
/* Done with this directory */
|
||||
closedir(dp);
|
||||
|
||||
cleanup:
|
||||
/* If the directory is empty, then remove it unless we
|
||||
* were told to leave it */
|
||||
if (0 == strcmp(path, cd->path) && cd->leave_topdir) {
|
||||
return;
|
||||
}
|
||||
if (dirpath_is_empty(path)) {
|
||||
rmdir(path);
|
||||
}
|
||||
}
|
||||
|
||||
static bool dirpath_is_empty(const char *path )
|
||||
{
|
||||
DIR *dp;
|
||||
struct dirent *ep;
|
||||
|
||||
if (NULL != path) { /* protect against error */
|
||||
dp = opendir(path);
|
||||
if (NULL != dp) {
|
||||
while ((ep = readdir(dp))) {
|
||||
if ((0 != strcmp(ep->d_name, ".")) &&
|
||||
(0 != strcmp(ep->d_name, ".."))) {
|
||||
closedir(dp);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
closedir(dp);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -119,6 +119,29 @@ typedef struct pmix_personality_t {
|
||||
pmix_gds_base_module_t *gds;
|
||||
} pmix_personality_t;
|
||||
|
||||
/* define a set of structs for tracking post-termination cleanup */
|
||||
typedef struct pmix_epilog_t {
|
||||
uid_t uid;
|
||||
gid_t gid;
|
||||
pmix_list_t cleanup_dirs;
|
||||
pmix_list_t cleanup_files;
|
||||
pmix_list_t ignores;
|
||||
} pmix_epilog_t;
|
||||
|
||||
typedef struct {
|
||||
pmix_list_item_t super;
|
||||
char *path;
|
||||
} pmix_cleanup_file_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_cleanup_file_t);
|
||||
|
||||
typedef struct {
|
||||
pmix_list_item_t super;
|
||||
char *path;
|
||||
bool recurse;
|
||||
bool leave_topdir;
|
||||
} pmix_cleanup_dir_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_cleanup_dir_t);
|
||||
|
||||
/* objects used by servers for tracking active nspaces */
|
||||
typedef struct {
|
||||
pmix_list_item_t super;
|
||||
@ -133,6 +156,8 @@ typedef struct {
|
||||
* Since servers may support clients from multiple nspaces,
|
||||
* track their respective compatibility modules here */
|
||||
pmix_personality_t compat;
|
||||
pmix_epilog_t epilog; // things to do upon termination of all local clients
|
||||
// from this nspace
|
||||
} pmix_nspace_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_nspace_t);
|
||||
|
||||
@ -156,6 +181,17 @@ typedef struct pmix_rank_info_t {
|
||||
} pmix_rank_info_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_rank_info_t);
|
||||
|
||||
|
||||
/* define a very simple caddy for dealing with pmix_info_t
|
||||
* objects when transferring portions of arrays */
|
||||
typedef struct {
|
||||
pmix_list_item_t super;
|
||||
pmix_info_t *info;
|
||||
size_t ninfo;
|
||||
} pmix_info_caddy_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_info_caddy_t);
|
||||
|
||||
|
||||
/* object for tracking peers - each peer can have multiple
|
||||
* connections. This can occur if the initial app executes
|
||||
* a fork/exec, and the child initiates its own connection
|
||||
@ -177,6 +213,8 @@ typedef struct pmix_peer_t {
|
||||
pmix_list_t send_queue; /**< list of messages to send */
|
||||
pmix_ptl_send_t *send_msg; /**< current send in progress */
|
||||
pmix_ptl_recv_t *recv_msg; /**< current recv in progress */
|
||||
pmix_epilog_t epilog; /**< things to be performed upon
|
||||
termination of this peer */
|
||||
} pmix_peer_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_peer_t);
|
||||
|
||||
@ -305,14 +343,6 @@ typedef struct {
|
||||
} pmix_cb_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_cb_t);
|
||||
|
||||
/* define a very simple caddy for dealing with pmix_info_t
|
||||
* objects when transferring portions of arrays */
|
||||
typedef struct {
|
||||
pmix_list_item_t super;
|
||||
pmix_info_t *info;
|
||||
} pmix_info_caddy_t;
|
||||
PMIX_CLASS_DECLARATION(pmix_info_caddy_t);
|
||||
|
||||
#define PMIX_THREADSHIFT(r, c) \
|
||||
do { \
|
||||
pmix_event_assign(&((r)->ev), pmix_globals.evbase, \
|
||||
|
@ -426,6 +426,7 @@ pmix_status_t hash_cache_job_info(struct pmix_nspace_t *ns,
|
||||
/* an array of data pertaining to a specific proc */
|
||||
if (PMIX_DATA_ARRAY != info[n].value.type) {
|
||||
PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
|
||||
rc = PMIX_ERR_TYPE_MISMATCH;
|
||||
goto release;
|
||||
}
|
||||
size = info[n].value.data.darray->size;
|
||||
@ -433,6 +434,7 @@ pmix_status_t hash_cache_job_info(struct pmix_nspace_t *ns,
|
||||
/* first element of the array must be the rank */
|
||||
if (0 != strcmp(iptr[0].key, PMIX_RANK) ||
|
||||
PMIX_PROC_RANK != iptr[0].value.type) {
|
||||
rc = PMIX_ERR_TYPE_MISMATCH;
|
||||
PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
|
||||
goto release;
|
||||
}
|
||||
@ -458,7 +460,7 @@ pmix_status_t hash_cache_job_info(struct pmix_nspace_t *ns,
|
||||
if (NULL == tmp) {
|
||||
PMIX_ERROR_LOG(PMIX_ERR_NOMEM);
|
||||
rc = PMIX_ERR_NOMEM;
|
||||
return rc;
|
||||
goto release;
|
||||
}
|
||||
kp2->value->type = PMIX_COMPRESSED_STRING;
|
||||
free(kp2->value->data.string);
|
||||
@ -493,10 +495,10 @@ pmix_status_t hash_cache_job_info(struct pmix_nspace_t *ns,
|
||||
if (PMIX_STRING_SIZE_CHECK(kp2->value)) {
|
||||
if (pmix_util_compress_string(kp2->value->data.string, &tmp, &len)) {
|
||||
if (NULL == tmp) {
|
||||
PMIX_ERROR_LOG(PMIX_ERR_NOMEM);
|
||||
PMIX_RELEASE(kp2);
|
||||
rc = PMIX_ERR_NOMEM;
|
||||
return rc;
|
||||
PMIX_ERROR_LOG(rc);
|
||||
PMIX_RELEASE(kp2);
|
||||
goto release;
|
||||
}
|
||||
kp2->value->type = PMIX_COMPRESSED_STRING;
|
||||
free(kp2->value->data.string);
|
||||
|
@ -1161,6 +1161,12 @@ static void connection_handler(int sd, short args, void *cbdata)
|
||||
peer->nptr = nptr;
|
||||
PMIX_RETAIN(info);
|
||||
peer->info = info;
|
||||
/* update the epilog fields */
|
||||
peer->epilog.uid = info->uid;
|
||||
peer->epilog.gid = info->gid;
|
||||
/* ensure the nspace epilog is updated too */
|
||||
nptr->epilog.uid = info->uid;
|
||||
nptr->epilog.gid = info->gid;
|
||||
info->proc_cnt++; /* increase number of processes on this rank */
|
||||
peer->sd = pnd->sd;
|
||||
if (0 > (peer->index = pmix_pointer_array_add(&pmix_server_globals.clients, peer))) {
|
||||
@ -1399,6 +1405,11 @@ static void process_cbfunc(int sd, short args, void *cbdata)
|
||||
peer->nptr = nptr;
|
||||
PMIX_RETAIN(info);
|
||||
peer->info = info;
|
||||
/* save the uid/gid */
|
||||
peer->epilog.uid = info->uid;
|
||||
peer->epilog.gid = info->gid;
|
||||
nptr->epilog.uid = info->uid;
|
||||
nptr->epilog.gid = info->gid;
|
||||
peer->proc_cnt = 1;
|
||||
peer->sd = pnd->sd;
|
||||
|
||||
|
@ -601,6 +601,11 @@ static void connection_handler(int sd, short args, void *cbdata)
|
||||
psave->nptr = nptr;
|
||||
PMIX_RETAIN(info);
|
||||
psave->info = info;
|
||||
/* save the epilog info */
|
||||
psave->epilog.uid = info->uid;
|
||||
psave->epilog.gid = info->gid;
|
||||
nptr->epilog.uid = info->uid;
|
||||
nptr->epilog.gid = info->gid;
|
||||
info->proc_cnt++; /* increase number of processes on this rank */
|
||||
psave->sd = pnd->sd;
|
||||
if (0 > (psave->index = pmix_pointer_array_add(&pmix_server_globals.clients, psave))) {
|
||||
|
@ -382,6 +382,7 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf,
|
||||
}
|
||||
if (PMIX_ERR_NOT_FOUND != rc || NULL == lcd) {
|
||||
/* we have a problem - e.g., out of memory */
|
||||
cbfunc(PMIX_ERR_NOT_FOUND, NULL, 0, cbdata, NULL, NULL);
|
||||
PMIX_INFO_FREE(info, ninfo);
|
||||
return rc;
|
||||
}
|
||||
|
@ -2015,6 +2015,13 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer,
|
||||
pmix_status_t rc;
|
||||
pmix_query_caddy_t *cd;
|
||||
pmix_proc_t proc;
|
||||
size_t n;
|
||||
bool recurse, leave_topdir, duplicate;
|
||||
pmix_list_t cachedirs, cachefiles;
|
||||
pmix_epilog_t *epi;
|
||||
pmix_cleanup_file_t *cf, *cf2;
|
||||
pmix_cleanup_dir_t *cdir, *cdir2;
|
||||
struct stat statbuf;
|
||||
|
||||
pmix_output_verbose(2, pmix_server_globals.base_output,
|
||||
"recvd job control request from client");
|
||||
@ -2045,6 +2052,22 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer,
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
/* check targets to find proper place to put any epilog requests */
|
||||
if (NULL == cd->targets) {
|
||||
epi = &peer->nptr->epilog;
|
||||
} else if (1 == cd->ntargets) {
|
||||
if (0 == strncmp(cd->targets[0].nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN)) {
|
||||
if (PMIX_RANK_WILDCARD == cd->targets[0].rank) {
|
||||
epi = &peer->nptr->epilog;
|
||||
} else {
|
||||
epi = &peer->epilog;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
epi = NULL; // do not allow epilog requests
|
||||
}
|
||||
|
||||
/* unpack the number of info objects */
|
||||
cnt = 1;
|
||||
PMIX_BFROPS_UNPACK(rc, peer, buf, &cd->ninfo, &cnt, PMIX_SIZE);
|
||||
@ -2063,6 +2086,173 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer,
|
||||
}
|
||||
}
|
||||
|
||||
/* if this includes a request for post-termination cleanup, we handle
|
||||
* that request ourselves */
|
||||
PMIX_CONSTRUCT(&cachedirs, pmix_list_t);
|
||||
PMIX_CONSTRUCT(&cachefiles, pmix_list_t);
|
||||
cnt = 0; // track how many infos are cleanup related
|
||||
for (n=0; n < cd->ninfo; n++) {
|
||||
if (0 == strncmp(cd->info[n].key, PMIX_REGISTER_CLEANUP, PMIX_MAX_KEYLEN)) {
|
||||
++cnt;
|
||||
/* see if we allow epilog requests */
|
||||
if (NULL == epi) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_BAD_PARAM;
|
||||
goto exit;
|
||||
}
|
||||
if (PMIX_STRING != cd->info[n].value.type ||
|
||||
NULL == cd->info[n].value.data.string) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_BAD_PARAM;
|
||||
goto exit;
|
||||
}
|
||||
if (0 != stat(cd->info[n].value.data.string, &statbuf)) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_BAD_PARAM;
|
||||
goto exit;
|
||||
}
|
||||
if (S_ISDIR(statbuf.st_mode)) {
|
||||
cdir = PMIX_NEW(pmix_cleanup_dir_t);
|
||||
if (NULL == cdir) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_NOMEM;
|
||||
goto exit;
|
||||
}
|
||||
cdir->path = strdup(cd->info[n].value.data.string);
|
||||
pmix_list_append(&cachedirs, &cdir->super);
|
||||
} else {
|
||||
cf = PMIX_NEW(pmix_cleanup_file_t);
|
||||
if (NULL == cf) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_NOMEM;
|
||||
goto exit;
|
||||
}
|
||||
cf->path = strdup(cd->info[n].value.data.string);
|
||||
pmix_list_append(&cachefiles, &cf->super);
|
||||
}
|
||||
} else if (0 == strncmp(cd->info[n].key, PMIX_CLEANUP_RECURSIVE, PMIX_MAX_KEYLEN)) {
|
||||
/* see if we allow epilog requests */
|
||||
if (NULL == epi) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_BAD_PARAM;
|
||||
goto exit;
|
||||
}
|
||||
recurse = PMIX_INFO_TRUE(&cd->info[n]);
|
||||
++cnt;
|
||||
} else if (0 == strncmp(cd->info[n].key, PMIX_CLEANUP_IGNORE, PMIX_MAX_KEYLEN)) {
|
||||
if (PMIX_STRING != cd->info[n].value.type ||
|
||||
NULL == cd->info[n].value.data.string) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_BAD_PARAM;
|
||||
goto exit;
|
||||
}
|
||||
/* see if we allow epilog requests */
|
||||
if (NULL == epi) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_BAD_PARAM;
|
||||
goto exit;
|
||||
}
|
||||
/* scan the list of ignores for any duplicate */
|
||||
duplicate = false;
|
||||
PMIX_LIST_FOREACH(cf, &epi->ignores, pmix_cleanup_file_t) {
|
||||
if (0 == strcmp(cf->path, cd->info[n].value.data.string)) {
|
||||
/* we can drop this request */
|
||||
duplicate = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!duplicate) {
|
||||
cf = PMIX_NEW(pmix_cleanup_file_t);
|
||||
if (NULL == cf) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_NOMEM;
|
||||
goto exit;
|
||||
}
|
||||
cf->path = strdup(cd->info[n].value.data.string);
|
||||
pmix_list_append(&epi->ignores, &cf->super);
|
||||
}
|
||||
++cnt;
|
||||
} else if (0 == strncmp(cd->info[n].key, PMIX_CLEANUP_LEAVE_TOPDIR, PMIX_MAX_KEYLEN)) {
|
||||
/* see if we allow epilog requests */
|
||||
if (NULL == epi) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_BAD_PARAM;
|
||||
goto exit;
|
||||
}
|
||||
leave_topdir = PMIX_INFO_TRUE(&cd->info[n]);
|
||||
++cnt;
|
||||
}
|
||||
}
|
||||
if (0 < cnt) {
|
||||
while (NULL != (cdir = (pmix_cleanup_dir_t*)pmix_list_remove_first(&cachedirs))) {
|
||||
/* scan the existing list of directories for any duplicate */
|
||||
PMIX_LIST_FOREACH(cdir2, &epi->cleanup_dirs, pmix_cleanup_dir_t) {
|
||||
if (0 == strcmp(cdir2->path, cdir->path)) {
|
||||
/* duplicate - check for difference in flags per RFC
|
||||
* precedence rules */
|
||||
if (!cdir->recurse && recurse) {
|
||||
cdir->recurse = recurse;
|
||||
}
|
||||
if (!cdir->leave_topdir && leave_topdir) {
|
||||
cdir->leave_topdir = leave_topdir;
|
||||
}
|
||||
PMIX_RELEASE(cdir);
|
||||
cdir = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (NULL != cdir) {
|
||||
/* check for conflict with ignore */
|
||||
PMIX_LIST_FOREACH(cf, &epi->ignores, pmix_cleanup_file_t) {
|
||||
if (0 == strcmp(cf->path, cdir->path)) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_CONFLICTING_CLEANUP_DIRECTIVES;
|
||||
PMIX_LIST_DESTRUCT(&cachedirs);
|
||||
PMIX_LIST_DESTRUCT(&cachefiles);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
cdir->recurse = recurse;
|
||||
cdir->leave_topdir = leave_topdir;
|
||||
/* just append it to the end of the list */
|
||||
pmix_list_append(&epi->cleanup_dirs, &cdir->super);
|
||||
}
|
||||
}
|
||||
PMIX_DESTRUCT(&cachedirs);
|
||||
while (NULL != (cf = (pmix_cleanup_file_t*)pmix_list_remove_first(&cachefiles))) {
|
||||
/* scan the existing list of files for any duplicate */
|
||||
PMIX_LIST_FOREACH(cf2, &epi->cleanup_files, pmix_cleanup_file_t) {
|
||||
if (0 == strcmp(cf2->path, cf->path)) {
|
||||
PMIX_RELEASE(cf);
|
||||
cf = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (NULL != cf) {
|
||||
/* check for conflict with ignore */
|
||||
PMIX_LIST_FOREACH(cf2, &epi->ignores, pmix_cleanup_file_t) {
|
||||
if (0 == strcmp(cf->path, cf2->path)) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_CONFLICTING_CLEANUP_DIRECTIVES;
|
||||
PMIX_LIST_DESTRUCT(&cachedirs);
|
||||
PMIX_LIST_DESTRUCT(&cachefiles);
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
/* just append it to the end of the list */
|
||||
pmix_list_append(&epi->cleanup_files, &cf->super);
|
||||
}
|
||||
}
|
||||
PMIX_DESTRUCT(&cachefiles);
|
||||
if (cnt == cd->ninfo) {
|
||||
/* nothing more to do */
|
||||
if (NULL != cbfunc) {
|
||||
cbfunc(PMIX_SUCCESS, NULL, 0, cd, NULL, NULL);
|
||||
}
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
/* setup the requesting peer name */
|
||||
(void)strncpy(proc.nspace, peer->info->pname.nspace, PMIX_MAX_NSLEN);
|
||||
proc.rank = peer->info->pname.rank;
|
||||
|
@ -171,6 +171,8 @@ PMIX_EXPORT const char* PMIx_Error_string(pmix_status_t errnum)
|
||||
return "PMIX MODEL DECLARED";
|
||||
case PMIX_ERR_TEMP_UNAVAILABLE:
|
||||
return "PMIX TEMPORARILY UNAVAILABLE";
|
||||
case PMIX_ERR_CONFLICTING_CLEANUP_DIRECTIVES:
|
||||
return "PMIX CONFLICTING CLEANUP DIRECTIVES";
|
||||
case PMIX_SUCCESS:
|
||||
return "SUCCESS";
|
||||
default:
|
||||
|
@ -25,6 +25,9 @@
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_STAT_H
|
||||
#include <sys/stat.h>
|
||||
#endif
|
||||
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
@ -71,6 +74,8 @@ static void pmix3x_query(opal_list_t *queries,
|
||||
static void pmix3x_log(opal_list_t *info,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
|
||||
static int pmix3x_register_cleanup(char *path, bool ignore, bool jobscope);
|
||||
|
||||
const opal_pmix_base_module_t opal_pmix_pmix3x_module = {
|
||||
/* client APIs */
|
||||
.init = pmix3x_client_init,
|
||||
@ -101,6 +106,7 @@ const opal_pmix_base_module_t opal_pmix_pmix3x_module = {
|
||||
.log = pmix3x_log,
|
||||
.allocate = pmix3x_allocate,
|
||||
.job_control = pmix3x_job_control,
|
||||
.register_cleanup = pmix3x_register_cleanup,
|
||||
/* server APIs */
|
||||
.server_init = pmix3x_server_init,
|
||||
.server_finalize = pmix3x_server_finalize,
|
||||
@ -333,6 +339,78 @@ void pmix3x_event_hdlr(size_t evhdlr_registration_id,
|
||||
return;
|
||||
}
|
||||
|
||||
static void cleanup_cbfunc(pmix_status_t status,
|
||||
pmix_info_t *info, size_t ninfo,
|
||||
void *cbdata,
|
||||
pmix_release_cbfunc_t release_fn,
|
||||
void *release_cbdata)
|
||||
{
|
||||
opal_pmix_lock_t *lk = (opal_pmix_lock_t*)cbdata;
|
||||
|
||||
OPAL_POST_OBJECT(lk);
|
||||
|
||||
/* let the library release the data and cleanup from
|
||||
* the operation */
|
||||
if (NULL != release_fn) {
|
||||
release_fn(release_cbdata);
|
||||
}
|
||||
|
||||
/* release the block */
|
||||
lk->status = pmix3x_convert_rc(status);
|
||||
OPAL_PMIX_WAKEUP_THREAD(lk);
|
||||
}
|
||||
|
||||
static int pmix3x_register_cleanup(char *path, bool ignore, bool jobscope)
|
||||
{
|
||||
opal_pmix_lock_t lk;
|
||||
pmix_info_t pinfo[3];
|
||||
size_t n, ninfo=0;
|
||||
pmix_status_t rc;
|
||||
int ret;
|
||||
struct stat statbuf;
|
||||
|
||||
OPAL_PMIX_CONSTRUCT_LOCK(&lk);
|
||||
|
||||
if (ignore) {
|
||||
/* they want this path ignored */
|
||||
PMIX_INFO_LOAD(&pinfo[ninfo], PMIX_CLEANUP_IGNORE, path, PMIX_STRING);
|
||||
++ninfo;
|
||||
} else {
|
||||
/* order cleanup of the provided path */
|
||||
PMIX_INFO_LOAD(&pinfo[ninfo], PMIX_REGISTER_CLEANUP, path, PMIX_STRING);
|
||||
++ninfo;
|
||||
/* if the path is a directory, then we need to tell the server
|
||||
* to recursively clean up */
|
||||
if (stat(path, &statbuf) != 0) {
|
||||
return OPAL_ERR_NOT_FOUND;
|
||||
}
|
||||
if (S_ISDIR(statbuf.st_mode)) {
|
||||
/* recursively cleanup directories */
|
||||
PMIX_INFO_LOAD(&pinfo[ninfo], PMIX_CLEANUP_RECURSIVE, NULL, PMIX_BOOL);
|
||||
++ninfo;
|
||||
}
|
||||
}
|
||||
|
||||
/* if they want this applied to the job, then indicate so */
|
||||
if (jobscope) {
|
||||
rc = PMIx_Job_control_nb(NULL, 0, pinfo, ninfo, cleanup_cbfunc, (void*)&lk);
|
||||
} else {
|
||||
/* only applies to us */
|
||||
rc = PMIx_Job_control_nb(&mca_pmix_pmix3x_component.myproc, 1, pinfo, ninfo, cleanup_cbfunc, (void*)&lk);
|
||||
}
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
ret = pmix3x_convert_rc(rc);
|
||||
} else {
|
||||
OPAL_PMIX_WAIT_THREAD(&lk);
|
||||
ret = lk.status;
|
||||
}
|
||||
OPAL_PMIX_DESTRUCT_LOCK(&lk);
|
||||
for (n=0; n < ninfo; n++) {
|
||||
PMIX_INFO_DESTRUCT(&pinfo[n]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
opal_vpid_t pmix3x_convert_rank(pmix_rank_t rank)
|
||||
{
|
||||
switch(rank) {
|
||||
|
@ -38,15 +38,16 @@
|
||||
BEGIN_C_DECLS
|
||||
|
||||
typedef struct {
|
||||
opal_pmix_base_component_t super;
|
||||
opal_list_t jobids;
|
||||
bool native_launch;
|
||||
size_t evindex;
|
||||
opal_list_t events;
|
||||
int cache_size;
|
||||
opal_list_t cache;
|
||||
opal_list_t dmdx;
|
||||
bool silence_warning;
|
||||
opal_pmix_base_component_t super;
|
||||
pmix_proc_t myproc;
|
||||
opal_list_t jobids;
|
||||
bool native_launch;
|
||||
size_t evindex;
|
||||
opal_list_t events;
|
||||
int cache_size;
|
||||
opal_list_t cache;
|
||||
opal_list_t dmdx;
|
||||
bool silence_warning;
|
||||
} mca_pmix_pmix3x_component_t;
|
||||
|
||||
OPAL_DECLSPEC extern mca_pmix_pmix3x_component_t mca_pmix_pmix3x_component;
|
||||
|
@ -5,7 +5,7 @@
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014-2017 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -38,7 +38,6 @@
|
||||
#include "pmix.h"
|
||||
#include "pmix_tool.h"
|
||||
|
||||
static pmix_proc_t my_proc;
|
||||
static char *dbgvalue=NULL;
|
||||
|
||||
static void errreg_cbfunc (pmix_status_t status,
|
||||
@ -105,7 +104,7 @@ int pmix3x_client_init(opal_list_t *ilist)
|
||||
}
|
||||
|
||||
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
||||
rc = PMIx_Init(&my_proc, pinfo, ninfo);
|
||||
rc = PMIx_Init(&mca_pmix_pmix3x_component.myproc, pinfo, ninfo);
|
||||
if (NULL != pinfo) {
|
||||
PMIX_INFO_FREE(pinfo, ninfo);
|
||||
}
|
||||
@ -127,20 +126,20 @@ int pmix3x_client_init(opal_list_t *ilist)
|
||||
/* if we were launched by the OMPI RTE, then
|
||||
* the jobid is in a special format - so get it */
|
||||
mca_pmix_pmix3x_component.native_launch = true;
|
||||
opal_convert_string_to_jobid(&pname.jobid, my_proc.nspace);
|
||||
opal_convert_string_to_jobid(&pname.jobid, mca_pmix_pmix3x_component.myproc.nspace);
|
||||
} else {
|
||||
/* we were launched by someone else, so make the
|
||||
* jobid just be the hash of the nspace */
|
||||
OPAL_HASH_JOBID(my_proc.nspace, pname.jobid);
|
||||
OPAL_HASH_JOBID(mca_pmix_pmix3x_component.myproc.nspace, pname.jobid);
|
||||
}
|
||||
/* insert this into our list of jobids - it will be the
|
||||
* first, and so we'll check it first */
|
||||
job = OBJ_NEW(opal_pmix3x_jobid_trkr_t);
|
||||
(void)strncpy(job->nspace, my_proc.nspace, PMIX_MAX_NSLEN);
|
||||
(void)strncpy(job->nspace, mca_pmix_pmix3x_component.myproc.nspace, PMIX_MAX_NSLEN);
|
||||
job->jobid = pname.jobid;
|
||||
opal_list_append(&mca_pmix_pmix3x_component.jobids, &job->super);
|
||||
|
||||
pname.vpid = pmix3x_convert_rank(my_proc.rank);
|
||||
pname.vpid = pmix3x_convert_rank(mca_pmix_pmix3x_component.myproc.rank);
|
||||
opal_proc_set_name(&pname);
|
||||
|
||||
/* release the thread in case the event handler fires when
|
||||
@ -221,10 +220,10 @@ int pmix3x_tool_init(opal_list_t *info)
|
||||
/* check to see if our name is being given from above */
|
||||
if (0 == strcmp(val->key, OPAL_PMIX_TOOL_NSPACE)) {
|
||||
opal_convert_string_to_jobid(&pname.jobid, val->data.string);
|
||||
(void)strncpy(my_proc.nspace, val->data.string, PMIX_MAX_NSLEN);
|
||||
(void)strncpy(mca_pmix_pmix3x_component.myproc.nspace, val->data.string, PMIX_MAX_NSLEN);
|
||||
} else if (0 == strcmp(val->key, OPAL_PMIX_TOOL_RANK)) {
|
||||
pname.vpid = val->data.name.vpid;
|
||||
my_proc.rank = pname.vpid;
|
||||
mca_pmix_pmix3x_component.myproc.rank = pname.vpid;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -236,7 +235,7 @@ int pmix3x_tool_init(opal_list_t *info)
|
||||
mca_pmix_pmix3x_component.native_launch = true;
|
||||
|
||||
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
||||
rc = PMIx_tool_init(&my_proc, pinfo, ninfo);
|
||||
rc = PMIx_tool_init(&mca_pmix_pmix3x_component.myproc, pinfo, ninfo);
|
||||
if (NULL != pinfo) {
|
||||
PMIX_INFO_FREE(pinfo, ninfo);
|
||||
}
|
||||
@ -254,13 +253,13 @@ int pmix3x_tool_init(opal_list_t *info)
|
||||
}
|
||||
|
||||
/* store our jobid and rank */
|
||||
opal_convert_string_to_jobid(&pname.jobid, my_proc.nspace);
|
||||
pname.vpid = pmix3x_convert_rank(my_proc.rank);
|
||||
opal_convert_string_to_jobid(&pname.jobid, mca_pmix_pmix3x_component.myproc.nspace);
|
||||
pname.vpid = pmix3x_convert_rank(mca_pmix_pmix3x_component.myproc.rank);
|
||||
|
||||
/* insert this into our list of jobids - it will be the
|
||||
* first, and so we'll check it first */
|
||||
job = OBJ_NEW(opal_pmix3x_jobid_trkr_t);
|
||||
(void)strncpy(job->nspace, my_proc.nspace, PMIX_MAX_NSLEN);
|
||||
(void)strncpy(job->nspace, mca_pmix_pmix3x_component.myproc.nspace, PMIX_MAX_NSLEN);
|
||||
job->jobid = pname.jobid;
|
||||
opal_list_append(&mca_pmix_pmix3x_component.jobids, &job->super);
|
||||
|
||||
@ -399,7 +398,7 @@ int pmix3x_store_local(const opal_process_name_t *proc, opal_value_t *val)
|
||||
p.rank = pmix3x_convert_opalrank(proc->vpid);
|
||||
} else {
|
||||
/* use our name */
|
||||
(void)strncpy(p.nspace, my_proc.nspace, PMIX_MAX_NSLEN);
|
||||
(void)strncpy(p.nspace, mca_pmix_pmix3x_component.myproc.nspace, PMIX_MAX_NSLEN);
|
||||
p.rank = pmix3x_convert_opalrank(OPAL_PROC_MY_NAME.vpid);
|
||||
}
|
||||
|
||||
@ -614,7 +613,7 @@ int pmix3x_get(const opal_process_name_t *proc, const char *key,
|
||||
if (0 == strcmp(key, OPAL_PMIX_RANK)) {
|
||||
(*val) = OBJ_NEW(opal_value_t);
|
||||
(*val)->type = OPAL_INT;
|
||||
(*val)->data.integer = pmix3x_convert_rank(my_proc.rank);
|
||||
(*val)->data.integer = pmix3x_convert_rank(mca_pmix_pmix3x_component.myproc.rank);
|
||||
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
@ -622,7 +621,7 @@ int pmix3x_get(const opal_process_name_t *proc, const char *key,
|
||||
*val = NULL;
|
||||
|
||||
if (NULL == proc) {
|
||||
(void)strncpy(p.nspace, my_proc.nspace, PMIX_MAX_NSLEN);
|
||||
(void)strncpy(p.nspace, mca_pmix_pmix3x_component.myproc.nspace, PMIX_MAX_NSLEN);
|
||||
p.rank = pmix3x_convert_rank(PMIX_RANK_WILDCARD);
|
||||
} else {
|
||||
if (NULL == (nsptr = pmix3x_convert_jobid(proc->jobid))) {
|
||||
@ -719,7 +718,7 @@ int pmix3x_getnb(const opal_process_name_t *proc, const char *key,
|
||||
if (NULL != cbfunc) {
|
||||
val = OBJ_NEW(opal_value_t);
|
||||
val->type = OPAL_INT;
|
||||
val->data.integer = pmix3x_convert_rank(my_proc.rank);
|
||||
val->data.integer = pmix3x_convert_rank(mca_pmix_pmix3x_component.myproc.rank);
|
||||
cbfunc(OPAL_SUCCESS, val, cbdata);
|
||||
}
|
||||
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
||||
@ -733,7 +732,7 @@ int pmix3x_getnb(const opal_process_name_t *proc, const char *key,
|
||||
op->cbdata = cbdata;
|
||||
|
||||
if (NULL == proc) {
|
||||
(void)strncpy(op->p.nspace, my_proc.nspace, PMIX_MAX_NSLEN);
|
||||
(void)strncpy(op->p.nspace, mca_pmix_pmix3x_component.myproc.nspace, PMIX_MAX_NSLEN);
|
||||
op->p.rank = pmix3x_convert_rank(PMIX_RANK_WILDCARD);
|
||||
} else {
|
||||
if (NULL == (nsptr = pmix3x_convert_jobid(proc->jobid))) {
|
||||
|
@ -118,6 +118,7 @@ BEGIN_C_DECLS
|
||||
|
||||
|
||||
/* information about relative ranks as assigned by the RM */
|
||||
#define OPAL_PMIX_CLUSTER_ID "pmix.clid" // (char*) a string name for the cluster this proc is executing on
|
||||
#define OPAL_PMIX_PROCID "pmix.procid" // (opal_process_name_t) process identifier
|
||||
#define OPAL_PMIX_NSPACE "pmix.nspace" // (char*) nspace of a job
|
||||
#define OPAL_PMIX_JOBID "pmix.jobid" // (uint32_t) jobid assigned by scheduler
|
||||
@ -189,6 +190,7 @@ BEGIN_C_DECLS
|
||||
#define OPAL_PMIX_NOTIFY_COMPLETION "pmix.notecomp" // (bool) notify parent process upon termination of child job
|
||||
#define OPAL_PMIX_RANGE "pmix.range" // (int) opal_pmix_data_range_t value for calls to publish/lookup/unpublish
|
||||
#define OPAL_PMIX_PERSISTENCE "pmix.persist" // (int) opal_pmix_persistence_t value for calls to publish
|
||||
#define OPAL_PMIX_DATA_SCOPE "pmix.scope" // (pmix_scope_t) scope of the data to be found in a PMIx_Get call
|
||||
#define OPAL_PMIX_OPTIONAL "pmix.optional" // (bool) look only in the immediate data store for the requested value - do
|
||||
// not request data from the server if not found
|
||||
#define OPAL_PMIX_EMBED_BARRIER "pmix.embed.barrier" // (bool) execute a blocking fence operation before executing the
|
||||
@ -364,6 +366,16 @@ BEGIN_C_DECLS
|
||||
#define OPAL_PMIX_JOB_CTRL_PROVISION_IMAGE "pmix.jctrl.pvnimg" // (char*) name of the image that is to be provisioned
|
||||
#define OPAL_PMIX_JOB_CTRL_PREEMPTIBLE "pmix.jctrl.preempt" // (bool) job can be pre-empted
|
||||
#define OPAL_PMIX_JOB_CTRL_TERMINATE "pmix.jctrl.term" // (bool) politely terminate the specified procs
|
||||
#define OPAL_PMIX_REGISTER_CLEANUP "pmix.reg.cleanup" // (char*) comma-delimited list of files/directories to
|
||||
// be removed upon process termination
|
||||
#define OPAL_PMIX_CLEANUP_RECURSIVE "pmix.clnup.recurse" // (bool) recursively cleanup all subdirectories under the
|
||||
// specified one(s)
|
||||
#define OPAL_PMIX_CLEANUP_EMPTY "pmix.clnup.empty" // (bool) only remove empty subdirectories
|
||||
#define OPAL_PMIX_CLEANUP_IGNORE "pmix.clnup.ignore" // (char*) comma-delimited list of filenames that are not
|
||||
// to be removed
|
||||
#define OPAL_PMIX_CLEANUP_LEAVE_TOPDIR "pmix.clnup.lvtop" // (bool) when recursively cleaning subdirs, do not remove
|
||||
// the top-level directory (the one given in the
|
||||
// cleanup request)
|
||||
|
||||
|
||||
/* monitoring attributes */
|
||||
|
@ -16,6 +16,7 @@
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2017 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2017 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -44,6 +45,7 @@
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/threads/mutex.h"
|
||||
#include "opal/constants.h"
|
||||
#include "opal/mca/pmix/pmix.h"
|
||||
|
||||
/*
|
||||
* Private data
|
||||
@ -505,10 +507,10 @@ void opal_output_finalize(void)
|
||||
output_dir = NULL;
|
||||
|
||||
if(NULL != temp_str) {
|
||||
free(temp_str);
|
||||
temp_str = NULL;
|
||||
temp_str_len = 0;
|
||||
}
|
||||
free(temp_str);
|
||||
temp_str = NULL;
|
||||
temp_str_len = 0;
|
||||
}
|
||||
OBJ_DESTRUCT(&verbose);
|
||||
OBJ_DESTRUCT(&mutex);
|
||||
}
|
||||
@ -785,18 +787,24 @@ static int open_file(int i)
|
||||
|
||||
/* Actually open the file */
|
||||
info[i].ldi_fd = open(filename, flags, 0644);
|
||||
free(filename); /* release the filename in all cases */
|
||||
if (-1 == info[i].ldi_fd) {
|
||||
info[i].ldi_used = false;
|
||||
free(filename); /* release the filename in all cases */
|
||||
return OPAL_ERR_IN_ERRNO;
|
||||
}
|
||||
|
||||
/* Make the file be close-on-exec to prevent child inheritance
|
||||
* problems */
|
||||
if (-1 == fcntl(info[i].ldi_fd, F_SETFD, 1)) {
|
||||
return OPAL_ERR_IN_ERRNO;
|
||||
free(filename); /* release the filename in all cases */
|
||||
return OPAL_ERR_IN_ERRNO;
|
||||
}
|
||||
|
||||
/* register it to be ignored */
|
||||
if (NULL != opal_pmix.register_cleanup) {
|
||||
opal_pmix.register_cleanup(filename, true, false);
|
||||
}
|
||||
free(filename); /* release the filename in all cases */
|
||||
}
|
||||
|
||||
/* Return successfully even if the session dir did not exist yet;
|
||||
@ -814,20 +822,20 @@ static void free_descriptor(int output_id)
|
||||
output_desc_t *ldi;
|
||||
|
||||
if (output_id >= 0 && output_id < OPAL_OUTPUT_MAX_STREAMS &&
|
||||
info[output_id].ldi_used && info[output_id |