Ensure cleanup of registered files/dirs
Resolve a race condition between registering for a file to be removed upon termination and actual creation of that file by providing attributes that identify whether the path is a file or directory. This removes the need for PMIx to detect the difference. Refs #4686 Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
родитель
614696f03c
Коммит
6216225bda
@ -15,7 +15,7 @@
|
||||
* Copyright (c) 2010-2017 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -510,9 +510,8 @@ static mca_btl_base_module_t **mca_btl_vader_component_init (int *num_btls,
|
||||
free (btls);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (NULL != opal_pmix.register_cleanup) {
|
||||
opal_pmix.register_cleanup (sm_file, false, false);
|
||||
opal_pmix.register_cleanup (sm_file, false, false, false);
|
||||
}
|
||||
|
||||
rc = opal_shmem_segment_create (&component->seg_ds, sm_file, component->segment_size);
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -868,7 +868,7 @@ typedef int (*opal_pmix_base_process_monitor_fn_t)(opal_list_t *monitor,
|
||||
opal_pmix_info_cbfunc_t cbfunc, void *cbdata);
|
||||
|
||||
/* register cleanup */
|
||||
typedef int (*opal_pmix_base_register_cleanup_fn_t)(char *path, bool ignore, bool jobscope);
|
||||
typedef int (*opal_pmix_base_register_cleanup_fn_t)(char *path, bool directory, bool ignore, bool jobscope);
|
||||
|
||||
/*
|
||||
* the standard public API data structure
|
||||
|
@ -462,7 +462,9 @@ typedef uint32_t pmix_rank_t;
|
||||
#define PMIX_JOB_CTRL_PROVISION_IMAGE "pmix.jctrl.pvnimg" // (char*) name of the image that is to be provisioned
|
||||
#define PMIX_JOB_CTRL_PREEMPTIBLE "pmix.jctrl.preempt" // (bool) job can be pre-empted
|
||||
#define PMIX_JOB_CTRL_TERMINATE "pmix.jctrl.term" // (bool) politely terminate the specified procs
|
||||
#define PMIX_REGISTER_CLEANUP "pmix.reg.cleanup" // (char*) comma-delimited list of files/directories to
|
||||
#define PMIX_REGISTER_CLEANUP "pmix.reg.cleanup" // (char*) comma-delimited list of files to
|
||||
// be removed upon process termination
|
||||
#define PMIX_REGISTER_CLEANUP_DIR "pmix.reg.cleanupdir" // (char*) comma-delimited list of directories to
|
||||
// be removed upon process termination
|
||||
#define PMIX_CLEANUP_RECURSIVE "pmix.clnup.recurse" // (bool) recursively cleanup all subdirectories under the
|
||||
// specified one(s)
|
||||
|
@ -2106,30 +2106,36 @@ pmix_status_t pmix_server_job_ctrl(pmix_peer_t *peer,
|
||||
rc = PMIX_ERR_BAD_PARAM;
|
||||
goto exit;
|
||||
}
|
||||
if (0 != stat(cd->info[n].value.data.string, &statbuf)) {
|
||||
cf = PMIX_NEW(pmix_cleanup_file_t);
|
||||
if (NULL == cf) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_NOMEM;
|
||||
goto exit;
|
||||
}
|
||||
cf->path = strdup(cd->info[n].value.data.string);
|
||||
pmix_list_append(&cachefiles, &cf->super);
|
||||
} else if (0 == strncmp(cd->info[n].key, PMIX_REGISTER_CLEANUP_DIR, PMIX_MAX_KEYLEN)) {
|
||||
++cnt;
|
||||
/* see if we allow epilog requests */
|
||||
if (NULL == epi) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_BAD_PARAM;
|
||||
goto exit;
|
||||
}
|
||||
if (S_ISDIR(statbuf.st_mode)) {
|
||||
cdir = PMIX_NEW(pmix_cleanup_dir_t);
|
||||
if (NULL == cdir) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_NOMEM;
|
||||
goto exit;
|
||||
}
|
||||
cdir->path = strdup(cd->info[n].value.data.string);
|
||||
pmix_list_append(&cachedirs, &cdir->super);
|
||||
} else {
|
||||
cf = PMIX_NEW(pmix_cleanup_file_t);
|
||||
if (NULL == cf) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_NOMEM;
|
||||
goto exit;
|
||||
}
|
||||
cf->path = strdup(cd->info[n].value.data.string);
|
||||
pmix_list_append(&cachefiles, &cf->super);
|
||||
if (PMIX_STRING != cd->info[n].value.type ||
|
||||
NULL == cd->info[n].value.data.string) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_BAD_PARAM;
|
||||
goto exit;
|
||||
}
|
||||
cdir = PMIX_NEW(pmix_cleanup_dir_t);
|
||||
if (NULL == cdir) {
|
||||
/* return an error */
|
||||
rc = PMIX_ERR_NOMEM;
|
||||
goto exit;
|
||||
}
|
||||
cdir->path = strdup(cd->info[n].value.data.string);
|
||||
pmix_list_append(&cachedirs, &cdir->super);
|
||||
} else if (0 == strncmp(cd->info[n].key, PMIX_CLEANUP_RECURSIVE, PMIX_MAX_KEYLEN)) {
|
||||
/* see if we allow epilog requests */
|
||||
if (NULL == epi) {
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014-2015 Mellanox Technologies, Inc.
|
||||
@ -74,7 +74,7 @@ static void pmix3x_query(opal_list_t *queries,
|
||||
static void pmix3x_log(opal_list_t *info,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
|
||||
static int pmix3x_register_cleanup(char *path, bool ignore, bool jobscope);
|
||||
static int pmix3x_register_cleanup(char *path, bool directory, bool ignore, bool jobscope);
|
||||
|
||||
const opal_pmix_base_module_t opal_pmix_pmix3x_module = {
|
||||
/* client APIs */
|
||||
@ -360,14 +360,13 @@ static void cleanup_cbfunc(pmix_status_t status,
|
||||
OPAL_PMIX_WAKEUP_THREAD(lk);
|
||||
}
|
||||
|
||||
static int pmix3x_register_cleanup(char *path, bool ignore, bool jobscope)
|
||||
static int pmix3x_register_cleanup(char *path, bool directory, bool ignore, bool jobscope)
|
||||
{
|
||||
opal_pmix_lock_t lk;
|
||||
pmix_info_t pinfo[3];
|
||||
size_t n, ninfo=0;
|
||||
pmix_status_t rc;
|
||||
int ret;
|
||||
struct stat statbuf;
|
||||
|
||||
OPAL_PMIX_CONSTRUCT_LOCK(&lk);
|
||||
|
||||
@ -376,18 +375,16 @@ static int pmix3x_register_cleanup(char *path, bool ignore, bool jobscope)
|
||||
PMIX_INFO_LOAD(&pinfo[ninfo], PMIX_CLEANUP_IGNORE, path, PMIX_STRING);
|
||||
++ninfo;
|
||||
} else {
|
||||
/* order cleanup of the provided path */
|
||||
PMIX_INFO_LOAD(&pinfo[ninfo], PMIX_REGISTER_CLEANUP, path, PMIX_STRING);
|
||||
++ninfo;
|
||||
/* if the path is a directory, then we need to tell the server
|
||||
* to recursively clean up */
|
||||
if (stat(path, &statbuf) != 0) {
|
||||
return OPAL_ERR_NOT_FOUND;
|
||||
}
|
||||
if (S_ISDIR(statbuf.st_mode)) {
|
||||
if (directory) {
|
||||
PMIX_INFO_LOAD(&pinfo[ninfo], PMIX_REGISTER_CLEANUP_DIR, path, PMIX_STRING);
|
||||
++ninfo;
|
||||
/* recursively cleanup directories */
|
||||
PMIX_INFO_LOAD(&pinfo[ninfo], PMIX_CLEANUP_RECURSIVE, NULL, PMIX_BOOL);
|
||||
++ninfo;
|
||||
} else {
|
||||
/* order cleanup of the provided path */
|
||||
PMIX_INFO_LOAD(&pinfo[ninfo], PMIX_REGISTER_CLEANUP, path, PMIX_STRING);
|
||||
++ninfo;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
@ -201,7 +201,6 @@ int pmix3x_server_finalize(void)
|
||||
}
|
||||
}
|
||||
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
||||
|
||||
rc = PMIx_server_finalize();
|
||||
return pmix3x_convert_rc(rc);
|
||||
}
|
||||
|
@ -16,7 +16,7 @@
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2017 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2017-2018 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -802,7 +802,7 @@ static int open_file(int i)
|
||||
|
||||
/* register it to be ignored */
|
||||
if (NULL != opal_pmix.register_cleanup) {
|
||||
opal_pmix.register_cleanup(filename, true, false);
|
||||
opal_pmix.register_cleanup(filename, false, true, false);
|
||||
}
|
||||
free(filename); /* release the filename in all cases */
|
||||
}
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
|
||||
@ -149,13 +149,13 @@ int orte_ess_base_app_setup(bool db_restrict_local)
|
||||
/* register the directory for cleanup */
|
||||
if (NULL != opal_pmix.register_cleanup) {
|
||||
if (orte_standalone_operation) {
|
||||
if (OPAL_SUCCESS != (ret = opal_pmix.register_cleanup(orte_process_info.top_session_dir, false, true))) {
|
||||
if (OPAL_SUCCESS != (ret = opal_pmix.register_cleanup(orte_process_info.top_session_dir, true, false, true))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "register cleanup";
|
||||
goto error;
|
||||
}
|
||||
} else {
|
||||
if (OPAL_SUCCESS != (ret = opal_pmix.register_cleanup(orte_process_info.jobfam_session_dir, false, false))) {
|
||||
if (OPAL_SUCCESS != (ret = opal_pmix.register_cleanup(orte_process_info.jobfam_session_dir, true, false, false))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "register cleanup";
|
||||
goto error;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user