2004-10-15 16:31:54 +00:00
|
|
|
/*
|
2005-11-05 19:57:48 +00:00
|
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
2006-08-23 03:32:36 +00:00
|
|
|
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
2005-11-05 19:57:48 +00:00
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
2015-06-23 20:59:57 -07:00
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
2004-11-28 20:09:25 +00:00
|
|
|
* University of Stuttgart. All rights reserved.
|
2005-03-24 12:43:37 +00:00
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
2015-03-19 21:24:13 -07:00
|
|
|
* Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
|
2015-02-17 12:27:23 +09:00
|
|
|
* Copyright (c) 2015 Research Organization for Information Science
|
|
|
|
* and Technology (RIST). All rights reserved.
|
2018-08-25 07:45:28 -07:00
|
|
|
* Copyright (c) 2015-2018 Intel, Inc. All rights reserved.
|
2004-11-22 01:38:40 +00:00
|
|
|
* $COPYRIGHT$
|
2015-06-23 20:59:57 -07:00
|
|
|
*
|
2004-11-22 01:38:40 +00:00
|
|
|
* Additional copyrights may follow
|
2015-06-23 20:59:57 -07:00
|
|
|
*
|
2004-10-15 16:31:54 +00:00
|
|
|
* $HEADER$
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2005-03-14 20:57:21 +00:00
|
|
|
#include "orte_config.h"
|
2008-02-28 01:57:57 +00:00
|
|
|
#include "orte/constants.h"
|
2004-10-15 16:31:54 +00:00
|
|
|
|
|
|
|
#include <stdio.h>
|
2004-10-20 01:03:09 +00:00
|
|
|
#ifdef HAVE_PWD_H
|
2004-10-15 16:31:54 +00:00
|
|
|
#include <pwd.h>
|
2004-10-20 01:03:09 +00:00
|
|
|
#endif
|
2004-10-15 16:31:54 +00:00
|
|
|
#include <stddef.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
2004-10-20 01:03:09 +00:00
|
|
|
#ifdef HAVE_SYS_PARAM_H
|
2004-10-15 16:31:54 +00:00
|
|
|
#include <sys/param.h>
|
2007-04-01 16:16:54 +00:00
|
|
|
#endif /* HAVE_SYS_PARAM_H */
|
2004-10-20 01:03:09 +00:00
|
|
|
#ifdef HAVE_SYS_TYPES_H
|
2004-10-15 16:31:54 +00:00
|
|
|
#include <sys/types.h>
|
2007-04-01 16:16:54 +00:00
|
|
|
#endif /* HAVE_SYS_TYPES_H */
|
2004-10-15 16:31:54 +00:00
|
|
|
#include <sys/stat.h>
|
2004-10-20 01:03:09 +00:00
|
|
|
#ifdef HAVE_UNISTD_H
|
2004-10-15 16:31:54 +00:00
|
|
|
#include <unistd.h>
|
2007-04-01 16:16:54 +00:00
|
|
|
#endif /* HAVE_UNISTD_H */
|
2004-10-15 16:31:54 +00:00
|
|
|
#include <errno.h>
|
2004-10-20 01:03:09 +00:00
|
|
|
#ifdef HAVE_DIRENT_H
|
2004-10-15 16:31:54 +00:00
|
|
|
#include <dirent.h>
|
2007-04-01 16:16:54 +00:00
|
|
|
#endif /* HAVE_DIRENT_H */
|
2008-03-23 23:10:15 +00:00
|
|
|
#ifdef HAVE_PWD_H
|
|
|
|
#include <pwd.h>
|
|
|
|
#endif /* HAVE_PWD_H */
|
2004-10-15 16:31:54 +00:00
|
|
|
|
2008-06-24 17:50:56 +00:00
|
|
|
#include "opal/util/argv.h"
|
2009-02-14 02:26:12 +00:00
|
|
|
#include "opal/util/output.h"
|
2005-07-04 01:59:52 +00:00
|
|
|
#include "opal/util/os_path.h"
|
2006-07-03 22:23:07 +00:00
|
|
|
#include "opal/util/os_dirpath.h"
|
2006-08-23 03:32:36 +00:00
|
|
|
#include "opal/util/basename.h"
|
2008-03-31 17:10:08 +00:00
|
|
|
#include "opal/util/opal_environ.h"
|
2018-10-06 19:32:37 +00:00
|
|
|
#include "opal/util/printf.h"
|
2004-10-15 16:31:54 +00:00
|
|
|
|
2008-02-28 01:57:57 +00:00
|
|
|
#include "orte/util/proc_info.h"
|
|
|
|
#include "orte/util/name_fns.h"
|
2008-06-24 17:50:56 +00:00
|
|
|
#include "orte/util/show_help.h"
|
2008-02-28 01:57:57 +00:00
|
|
|
|
2006-02-12 01:33:29 +00:00
|
|
|
#include "orte/mca/errmgr/errmgr.h"
|
2018-08-25 07:45:28 -07:00
|
|
|
#include "orte/mca/ras/base/base.h"
|
2006-02-12 01:33:29 +00:00
|
|
|
#include "orte/runtime/runtime.h"
|
2008-02-28 01:57:57 +00:00
|
|
|
#include "orte/runtime/orte_globals.h"
|
2004-10-15 16:31:54 +00:00
|
|
|
|
2006-02-12 01:33:29 +00:00
|
|
|
#include "orte/util/session_dir.h"
|
2004-10-15 16:31:54 +00:00
|
|
|
|
2006-06-28 21:03:31 +00:00
|
|
|
/*******************************
|
|
|
|
* Local function Declarations
|
|
|
|
*******************************/
|
|
|
|
static int orte_create_dir(char *directory);
|
2004-10-15 16:31:54 +00:00
|
|
|
|
2006-07-03 22:23:07 +00:00
|
|
|
static bool orte_dir_check_file(const char *root, const char *path);
|
2004-10-15 16:31:54 +00:00
|
|
|
|
2005-06-26 23:11:37 +00:00
|
|
|
#define OMPI_PRINTF_FIX_STRING(a) ((NULL == a) ? "(null)" : a)
|
2004-10-15 16:31:54 +00:00
|
|
|
|
2006-06-28 21:03:31 +00:00
|
|
|
/****************************
|
|
|
|
* Funcationality
|
|
|
|
****************************/
|
|
|
|
/*
|
|
|
|
* Check and create the directory requested
|
|
|
|
*/
|
|
|
|
static int orte_create_dir(char *directory)
|
|
|
|
{
|
2007-04-01 16:16:54 +00:00
|
|
|
mode_t my_mode = S_IRWXU; /* I'm looking for full rights */
|
2006-06-28 21:03:31 +00:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* Sanity check before creating the directory with the proper mode,
|
|
|
|
* Make sure it doesn't exist already */
|
2010-05-17 23:08:56 +00:00
|
|
|
if( ORTE_ERR_NOT_FOUND !=
|
2011-11-22 21:24:35 +00:00
|
|
|
(ret = opal_os_dirpath_access(directory, my_mode)) ) {
|
2006-07-03 22:23:07 +00:00
|
|
|
/* Failure because opal_os_dirpath_access() indicated that either:
|
2015-06-23 20:59:57 -07:00
|
|
|
* - The directory exists and we can access it (no need to create it again),
|
2006-07-03 22:23:07 +00:00
|
|
|
* return OPAL_SUCCESS, or
|
|
|
|
* - don't have access rights, return OPAL_ERROR
|
2006-06-28 21:03:31 +00:00
|
|
|
*/
|
2007-07-18 19:50:54 +00:00
|
|
|
if (ORTE_SUCCESS != ret) {
|
|
|
|
ORTE_ERROR_LOG(ret);
|
|
|
|
}
|
2006-06-28 21:03:31 +00:00
|
|
|
return(ret);
|
|
|
|
}
|
2015-06-23 20:59:57 -07:00
|
|
|
|
2007-07-18 19:50:54 +00:00
|
|
|
/* Get here if the directory doesn't exist, so create it */
|
|
|
|
if (ORTE_SUCCESS != (ret = opal_os_dirpath_create(directory, my_mode))) {
|
|
|
|
ORTE_ERROR_LOG(ret);
|
2004-10-15 16:31:54 +00:00
|
|
|
}
|
2007-07-18 19:50:54 +00:00
|
|
|
return ret;
|
2004-10-15 16:31:54 +00:00
|
|
|
}
|
|
|
|
|
2015-06-23 20:59:57 -07:00
|
|
|
|
2016-09-15 15:18:42 -07:00
|
|
|
static int _setup_tmpdir_base(void)
|
2016-09-01 16:03:10 +07:00
|
|
|
{
|
|
|
|
int rc = ORTE_SUCCESS;
|
2016-08-15 22:46:46 -05:00
|
|
|
|
2016-09-01 16:03:10 +07:00
|
|
|
/* make sure that we have tmpdir_base set
|
|
|
|
* if we need it
|
2006-06-28 21:03:31 +00:00
|
|
|
*/
|
2016-09-01 16:03:10 +07:00
|
|
|
if (NULL == orte_process_info.tmpdir_base) {
|
|
|
|
orte_process_info.tmpdir_base =
|
|
|
|
strdup(opal_tmp_directory());
|
|
|
|
if (NULL == orte_process_info.tmpdir_base) {
|
|
|
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
|
|
|
goto exit;
|
2006-06-28 21:03:31 +00:00
|
|
|
}
|
2004-10-15 16:31:54 +00:00
|
|
|
}
|
2016-09-01 16:03:10 +07:00
|
|
|
exit:
|
|
|
|
if( ORTE_SUCCESS != rc ){
|
|
|
|
ORTE_ERROR_LOG(rc);
|
|
|
|
}
|
|
|
|
return rc;
|
|
|
|
}
|
2015-06-23 20:59:57 -07:00
|
|
|
|
2017-03-11 12:50:38 -08:00
|
|
|
int orte_setup_top_session_dir(void)
|
2016-09-01 16:03:10 +07:00
|
|
|
{
|
|
|
|
int rc = ORTE_SUCCESS;
|
|
|
|
/* get the effective uid */
|
|
|
|
uid_t uid = geteuid();
|
|
|
|
|
|
|
|
/* construct the top_session_dir if we need */
|
|
|
|
if (NULL == orte_process_info.top_session_dir) {
|
|
|
|
if (ORTE_SUCCESS != (rc = _setup_tmpdir_base())) {
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
if( NULL == orte_process_info.nodename ||
|
|
|
|
NULL == orte_process_info.tmpdir_base ){
|
|
|
|
/* we can't setup top session dir */
|
|
|
|
rc = ORTE_ERR_BAD_PARAM;
|
|
|
|
goto exit;
|
|
|
|
}
|
|
|
|
|
2018-10-06 19:32:37 +00:00
|
|
|
if (0 > opal_asprintf(&orte_process_info.top_session_dir,
|
2016-09-01 16:03:10 +07:00
|
|
|
"%s/ompi.%s.%lu", orte_process_info.tmpdir_base,
|
|
|
|
orte_process_info.nodename, (unsigned long)uid)) {
|
|
|
|
orte_process_info.top_session_dir = NULL;
|
|
|
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
|
|
|
goto exit;
|
2016-08-15 22:46:46 -05:00
|
|
|
}
|
2006-06-28 21:03:31 +00:00
|
|
|
}
|
2016-09-01 16:03:10 +07:00
|
|
|
exit:
|
|
|
|
if( ORTE_SUCCESS != rc ){
|
|
|
|
ORTE_ERROR_LOG(rc);
|
|
|
|
}
|
|
|
|
return rc;
|
|
|
|
}
|
2016-08-15 22:46:46 -05:00
|
|
|
|
2016-09-01 16:03:10 +07:00
|
|
|
static int _setup_jobfam_session_dir(orte_process_name_t *proc)
|
|
|
|
{
|
|
|
|
int rc = ORTE_SUCCESS;
|
|
|
|
|
|
|
|
/* construct the top_session_dir if we need */
|
|
|
|
if (NULL == orte_process_info.jobfam_session_dir) {
|
2017-03-11 12:50:38 -08:00
|
|
|
if (ORTE_SUCCESS != (rc = orte_setup_top_session_dir())) {
|
2016-09-01 16:03:10 +07:00
|
|
|
return rc;
|
2006-06-28 21:03:31 +00:00
|
|
|
}
|
2016-09-01 16:03:10 +07:00
|
|
|
|
2017-03-11 12:50:38 -08:00
|
|
|
if (ORTE_PROC_IS_MASTER) {
|
2018-10-06 19:32:37 +00:00
|
|
|
if (0 > opal_asprintf(&orte_process_info.jobfam_session_dir,
|
2017-03-11 12:50:38 -08:00
|
|
|
"%s/dvm", orte_process_info.top_session_dir)) {
|
|
|
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
|
|
|
goto exit;
|
|
|
|
}
|
|
|
|
} else if (ORTE_PROC_IS_HNP) {
|
2018-10-06 19:32:37 +00:00
|
|
|
if (0 > opal_asprintf(&orte_process_info.jobfam_session_dir,
|
2016-09-01 16:03:10 +07:00
|
|
|
"%s/pid.%lu", orte_process_info.top_session_dir,
|
2017-03-11 12:50:38 -08:00
|
|
|
(unsigned long)orte_process_info.pid)) {
|
2016-09-01 16:03:10 +07:00
|
|
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
|
|
|
goto exit;
|
|
|
|
}
|
2016-08-15 22:46:46 -05:00
|
|
|
} else {
|
2016-09-01 16:03:10 +07:00
|
|
|
/* we were not given one, so define it */
|
2017-03-11 12:50:38 -08:00
|
|
|
if (NULL == proc || (ORTE_JOBID_INVALID == proc->jobid)) {
|
2018-10-06 19:32:37 +00:00
|
|
|
if (0 > opal_asprintf(&orte_process_info.jobfam_session_dir,
|
2016-09-01 16:03:10 +07:00
|
|
|
"%s/jobfam", orte_process_info.top_session_dir) ) {
|
|
|
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
|
|
|
goto exit;
|
|
|
|
}
|
|
|
|
} else {
|
2018-10-06 19:32:37 +00:00
|
|
|
if (0 > opal_asprintf(&orte_process_info.jobfam_session_dir,
|
2016-09-01 16:03:10 +07:00
|
|
|
"%s/jf.%d", orte_process_info.top_session_dir,
|
|
|
|
ORTE_JOB_FAMILY(proc->jobid))) {
|
|
|
|
orte_process_info.jobfam_session_dir = NULL;
|
|
|
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
|
|
|
goto exit;
|
|
|
|
}
|
2016-08-15 22:46:46 -05:00
|
|
|
}
|
|
|
|
}
|
2004-10-15 16:31:54 +00:00
|
|
|
}
|
2016-09-01 16:03:10 +07:00
|
|
|
exit:
|
|
|
|
if( ORTE_SUCCESS != rc ){
|
|
|
|
ORTE_ERROR_LOG(rc);
|
|
|
|
}
|
|
|
|
return rc;
|
|
|
|
}
|
2004-10-15 16:31:54 +00:00
|
|
|
|
2016-09-01 16:03:10 +07:00
|
|
|
static int
|
|
|
|
_setup_job_session_dir(orte_process_name_t *proc)
|
|
|
|
{
|
|
|
|
int rc = ORTE_SUCCESS;
|
2015-06-23 20:59:57 -07:00
|
|
|
|
2016-09-01 16:03:10 +07:00
|
|
|
/* construct the top_session_dir if we need */
|
|
|
|
if( NULL == orte_process_info.job_session_dir ){
|
|
|
|
if( ORTE_SUCCESS != (rc = _setup_jobfam_session_dir(proc)) ){
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
if (ORTE_JOBID_INVALID != proc->jobid) {
|
2018-10-06 19:32:37 +00:00
|
|
|
if (0 > opal_asprintf(&orte_process_info.job_session_dir,
|
2016-09-01 16:03:10 +07:00
|
|
|
"%s/%d", orte_process_info.jobfam_session_dir,
|
|
|
|
ORTE_LOCAL_JOBID(proc->jobid))) {
|
|
|
|
orte_process_info.job_session_dir = NULL;
|
|
|
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
|
|
|
goto exit;
|
2008-07-29 18:58:35 +00:00
|
|
|
}
|
2016-09-01 16:03:10 +07:00
|
|
|
} else {
|
|
|
|
orte_process_info.job_session_dir = NULL;
|
|
|
|
}
|
|
|
|
}
|
2015-06-23 20:59:57 -07:00
|
|
|
|
2016-09-01 16:03:10 +07:00
|
|
|
exit:
|
|
|
|
if( ORTE_SUCCESS != rc ){
|
|
|
|
ORTE_ERROR_LOG(rc);
|
|
|
|
}
|
|
|
|
return rc;
|
|
|
|
}
|
2015-06-23 20:59:57 -07:00
|
|
|
|
2016-09-01 16:03:10 +07:00
|
|
|
static int
|
|
|
|
_setup_proc_session_dir(orte_process_name_t *proc)
|
|
|
|
{
|
|
|
|
int rc = ORTE_SUCCESS;
|
|
|
|
|
|
|
|
/* construct the top_session_dir if we need */
|
|
|
|
if( NULL == orte_process_info.proc_session_dir ){
|
|
|
|
if( ORTE_SUCCESS != (rc = _setup_job_session_dir(proc)) ){
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
if (ORTE_VPID_INVALID != proc->vpid) {
|
2018-10-06 19:32:37 +00:00
|
|
|
if (0 > opal_asprintf(&orte_process_info.proc_session_dir,
|
2016-09-01 16:03:10 +07:00
|
|
|
"%s/%d", orte_process_info.job_session_dir,
|
|
|
|
proc->vpid)) {
|
|
|
|
orte_process_info.proc_session_dir = NULL;
|
|
|
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
|
|
|
goto exit;
|
2008-07-29 18:58:35 +00:00
|
|
|
}
|
2016-08-15 22:46:46 -05:00
|
|
|
} else {
|
2016-09-01 16:03:10 +07:00
|
|
|
orte_process_info.proc_session_dir = NULL;
|
2006-06-28 21:03:31 +00:00
|
|
|
}
|
2006-07-11 16:54:07 +00:00
|
|
|
}
|
2015-06-23 20:59:57 -07:00
|
|
|
|
2016-09-01 16:03:10 +07:00
|
|
|
exit:
|
|
|
|
if( ORTE_SUCCESS != rc ){
|
|
|
|
ORTE_ERROR_LOG(rc);
|
2004-10-15 16:31:54 +00:00
|
|
|
}
|
2016-09-01 16:03:10 +07:00
|
|
|
return rc;
|
|
|
|
}
|
2004-10-15 16:31:54 +00:00
|
|
|
|
2016-09-01 16:03:10 +07:00
|
|
|
int orte_session_setup_base(orte_process_name_t *proc)
|
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
/* Ensure that system info is set */
|
|
|
|
orte_proc_info();
|
|
|
|
|
|
|
|
/* setup job and proc session directories */
|
|
|
|
if( ORTE_SUCCESS != (rc = _setup_job_session_dir(proc)) ){
|
|
|
|
return rc;
|
2006-06-28 21:03:31 +00:00
|
|
|
}
|
2016-09-01 16:03:10 +07:00
|
|
|
|
|
|
|
if( ORTE_SUCCESS != (rc = _setup_proc_session_dir(proc)) ){
|
|
|
|
return rc;
|
2010-04-27 03:40:44 +00:00
|
|
|
}
|
2015-06-23 20:59:57 -07:00
|
|
|
|
2008-06-24 17:50:56 +00:00
|
|
|
/* BEFORE doing anything else, check to see if this prefix is
|
|
|
|
* allowed by the system
|
|
|
|
*/
|
2016-09-01 16:03:10 +07:00
|
|
|
if (NULL != orte_prohibited_session_dirs ||
|
|
|
|
NULL != orte_process_info.tmpdir_base ) {
|
2008-06-24 17:50:56 +00:00
|
|
|
char **list;
|
|
|
|
int i, len;
|
|
|
|
/* break the string into tokens - it should be
|
|
|
|
* separated by ','
|
|
|
|
*/
|
|
|
|
list = opal_argv_split(orte_prohibited_session_dirs, ',');
|
|
|
|
len = opal_argv_count(list);
|
|
|
|
/* cycle through the list */
|
|
|
|
for (i=0; i < len; i++) {
|
|
|
|
/* check if prefix matches */
|
2016-09-01 16:03:10 +07:00
|
|
|
if (0 == strncmp(orte_process_info.tmpdir_base, list[i], strlen(list[i]))) {
|
2008-06-24 17:50:56 +00:00
|
|
|
/* this is a prohibited location */
|
|
|
|
orte_show_help("help-orte-runtime.txt",
|
|
|
|
"orte:session:dir:prohibited",
|
2016-09-01 16:03:10 +07:00
|
|
|
true, orte_process_info.tmpdir_base,
|
|
|
|
orte_prohibited_session_dirs);
|
2015-02-17 12:27:23 +09:00
|
|
|
opal_argv_free(list);
|
2008-06-24 17:50:56 +00:00
|
|
|
return ORTE_ERR_FATAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
opal_argv_free(list); /* done with this */
|
|
|
|
}
|
2016-09-01 16:03:10 +07:00
|
|
|
return ORTE_SUCCESS;
|
2006-06-28 21:03:31 +00:00
|
|
|
}
|
2005-03-18 23:58:36 +00:00
|
|
|
|
2006-06-28 21:03:31 +00:00
|
|
|
/*
|
|
|
|
* Construct the session directory and create it if necessary
|
|
|
|
*/
|
2016-09-01 16:03:10 +07:00
|
|
|
int orte_session_dir(bool create, orte_process_name_t *proc)
|
2006-06-28 21:03:31 +00:00
|
|
|
{
|
2009-12-03 01:57:35 +00:00
|
|
|
int rc = ORTE_SUCCESS;
|
2015-06-23 20:59:57 -07:00
|
|
|
|
2006-06-28 21:03:31 +00:00
|
|
|
/*
|
|
|
|
* Get the session directory full name
|
|
|
|
*/
|
2016-09-01 16:03:10 +07:00
|
|
|
if (ORTE_SUCCESS != (rc = orte_session_setup_base(proc))) {
|
2011-11-22 21:24:35 +00:00
|
|
|
if (ORTE_ERR_FATAL == rc) {
|
2009-12-03 01:57:35 +00:00
|
|
|
/* this indicates we should abort quietly */
|
|
|
|
rc = ORTE_ERR_SILENT;
|
2006-06-28 21:03:31 +00:00
|
|
|
}
|
2009-12-03 01:57:35 +00:00
|
|
|
goto cleanup;
|
2006-06-28 21:03:31 +00:00
|
|
|
}
|
2015-06-23 20:59:57 -07:00
|
|
|
|
2006-06-28 21:03:31 +00:00
|
|
|
/*
|
|
|
|
* Now that we have the full path, go ahead and create it if necessary
|
|
|
|
*/
|
|
|
|
if( create ) {
|
2016-09-01 16:03:10 +07:00
|
|
|
if( ORTE_SUCCESS != (rc = orte_create_dir(orte_process_info.proc_session_dir)) ) {
|
2009-12-03 01:57:35 +00:00
|
|
|
ORTE_ERROR_LOG(rc);
|
|
|
|
goto cleanup;
|
2006-06-28 21:03:31 +00:00
|
|
|
}
|
|
|
|
}
|
2015-06-23 20:59:57 -07:00
|
|
|
|
2005-03-14 20:57:21 +00:00
|
|
|
if (orte_debug_flag) {
|
2016-09-15 15:18:42 -07:00
|
|
|
opal_output(0, "procdir: %s",
|
2009-03-05 21:56:03 +00:00
|
|
|
OMPI_PRINTF_FIX_STRING(orte_process_info.proc_session_dir));
|
2016-09-15 15:18:42 -07:00
|
|
|
opal_output(0, "jobdir: %s",
|
2009-03-05 21:56:03 +00:00
|
|
|
OMPI_PRINTF_FIX_STRING(orte_process_info.job_session_dir));
|
2016-09-01 16:03:10 +07:00
|
|
|
opal_output(0, "top: %s",
|
|
|
|
OMPI_PRINTF_FIX_STRING(orte_process_info.jobfam_session_dir));
|
|
|
|
opal_output(0, "top: %s",
|
2009-03-05 21:56:03 +00:00
|
|
|
OMPI_PRINTF_FIX_STRING(orte_process_info.top_session_dir));
|
2016-09-15 15:18:42 -07:00
|
|
|
opal_output(0, "tmp: %s",
|
2009-03-05 21:56:03 +00:00
|
|
|
OMPI_PRINTF_FIX_STRING(orte_process_info.tmpdir_base));
|
2004-10-15 16:31:54 +00:00
|
|
|
}
|
2015-06-23 20:59:57 -07:00
|
|
|
|
2009-12-03 01:57:35 +00:00
|
|
|
cleanup:
|
|
|
|
return rc;
|
2004-10-15 16:31:54 +00:00
|
|
|
}
|
|
|
|
|
2006-02-16 00:16:22 +00:00
|
|
|
/*
|
2006-06-28 21:03:31 +00:00
|
|
|
* A job has aborted - so force cleanup of the session directory
|
2006-02-16 00:16:22 +00:00
|
|
|
*/
|
|
|
|
int
|
|
|
|
orte_session_dir_cleanup(orte_jobid_t jobid)
|
|
|
|
{
|
2018-08-25 07:45:28 -07:00
|
|
|
/* special case - if a daemon is colocated with mpirun,
|
|
|
|
* then we let mpirun do the rest to avoid a race
|
|
|
|
* condition. this scenario always results in the rank=1
|
|
|
|
* daemon colocated with mpirun */
|
|
|
|
if (orte_ras_base.launch_orted_on_hn &&
|
|
|
|
ORTE_PROC_IS_DAEMON &&
|
|
|
|
1 == ORTE_PROC_MY_NAME->vpid) {
|
|
|
|
return ORTE_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2016-09-02 00:23:30 +07:00
|
|
|
if (!orte_create_session_dirs || orte_process_info.rm_session_dirs ) {
|
|
|
|
/* we haven't created them or RM will clean them up for us*/
|
2010-03-02 15:18:33 +00:00
|
|
|
return ORTE_SUCCESS;
|
|
|
|
}
|
2015-06-23 20:59:57 -07:00
|
|
|
|
2017-06-21 06:33:37 -07:00
|
|
|
if (NULL == orte_process_info.jobfam_session_dir ||
|
2016-09-01 16:03:10 +07:00
|
|
|
NULL == orte_process_info.proc_session_dir) {
|
2014-04-18 16:21:55 +00:00
|
|
|
/* this should never happen - it means we are calling
|
|
|
|
* cleanup *before* properly setting up the session
|
|
|
|
* dir system. This leaves open the possibility of
|
|
|
|
* accidentally removing directories we shouldn't
|
|
|
|
* touch
|
|
|
|
*/
|
2017-06-21 06:33:37 -07:00
|
|
|
return ORTE_ERR_NOT_INITIALIZED;
|
2014-04-18 16:21:55 +00:00
|
|
|
}
|
|
|
|
|
2018-08-25 07:45:28 -07:00
|
|
|
|
2014-05-08 17:22:43 +00:00
|
|
|
/* recursively blow the whole session away for our job family,
|
|
|
|
* saving only output files
|
|
|
|
*/
|
2017-06-21 06:33:37 -07:00
|
|
|
opal_os_dirpath_destroy(orte_process_info.jobfam_session_dir,
|
2016-09-01 16:03:10 +07:00
|
|
|
true, orte_dir_check_file);
|
2015-06-23 20:59:57 -07:00
|
|
|
|
2017-06-21 06:33:37 -07:00
|
|
|
if (opal_os_dirpath_is_empty(orte_process_info.jobfam_session_dir)) {
|
2016-09-15 15:18:42 -07:00
|
|
|
if (orte_debug_flag) {
|
2017-06-21 06:33:37 -07:00
|
|
|
opal_output(0, "sess_dir_cleanup: found jobfam session dir empty - deleting");
|
2016-09-15 15:18:42 -07:00
|
|
|
}
|
2017-06-21 06:33:37 -07:00
|
|
|
rmdir(orte_process_info.jobfam_session_dir);
|
2006-02-16 00:16:22 +00:00
|
|
|
} else {
|
2016-09-15 15:18:42 -07:00
|
|
|
if (orte_debug_flag) {
|
2016-09-01 16:03:10 +07:00
|
|
|
if (OPAL_ERR_NOT_FOUND ==
|
|
|
|
opal_os_dirpath_access(orte_process_info.job_session_dir, 0)) {
|
2014-04-18 14:25:48 +00:00
|
|
|
opal_output(0, "sess_dir_cleanup: job session dir does not exist");
|
|
|
|
} else {
|
|
|
|
opal_output(0, "sess_dir_cleanup: job session dir not empty - leaving");
|
|
|
|
}
|
2016-09-15 15:18:42 -07:00
|
|
|
}
|
2006-02-16 00:16:22 +00:00
|
|
|
}
|
|
|
|
|
2017-06-21 06:33:37 -07:00
|
|
|
if (NULL != orte_process_info.top_session_dir) {
|
|
|
|
if (opal_os_dirpath_is_empty(orte_process_info.top_session_dir)) {
|
2016-09-01 16:03:10 +07:00
|
|
|
if (orte_debug_flag) {
|
|
|
|
opal_output(0, "sess_dir_cleanup: found top session dir empty - deleting");
|
2014-04-18 14:25:48 +00:00
|
|
|
}
|
2016-09-01 16:03:10 +07:00
|
|
|
rmdir(orte_process_info.top_session_dir);
|
|
|
|
} else {
|
|
|
|
if (orte_debug_flag) {
|
|
|
|
if (OPAL_ERR_NOT_FOUND ==
|
|
|
|
opal_os_dirpath_access(orte_process_info.top_session_dir, 0)) {
|
|
|
|
opal_output(0, "sess_dir_cleanup: top session dir does not exist");
|
|
|
|
} else {
|
|
|
|
opal_output(0, "sess_dir_cleanup: top session dir not empty - leaving");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2006-02-16 00:16:22 +00:00
|
|
|
}
|
|
|
|
|
2017-06-21 06:33:37 -07:00
|
|
|
/* now attempt to eliminate the top level directory itself - this
|
|
|
|
* will fail if anything is present, but ensures we cleanup if
|
|
|
|
* we are the last one out
|
|
|
|
*/
|
|
|
|
if( NULL != orte_process_info.top_session_dir ){
|
|
|
|
opal_os_dirpath_destroy(orte_process_info.top_session_dir,
|
|
|
|
false, orte_dir_check_file);
|
|
|
|
}
|
2016-09-01 16:03:10 +07:00
|
|
|
|
2017-06-21 06:33:37 -07:00
|
|
|
|
|
|
|
return ORTE_SUCCESS;
|
2006-02-16 00:16:22 +00:00
|
|
|
}
|
|
|
|
|
2004-10-15 16:31:54 +00:00
|
|
|
|
|
|
|
int
|
2005-04-14 01:04:26 +00:00
|
|
|
orte_session_dir_finalize(orte_process_name_t *proc)
|
2004-10-15 16:31:54 +00:00
|
|
|
{
|
2016-09-02 00:23:30 +07:00
|
|
|
if (!orte_create_session_dirs || orte_process_info.rm_session_dirs ) {
|
|
|
|
/* we haven't created them or RM will clean them up for us*/
|
2010-03-02 15:18:33 +00:00
|
|
|
return ORTE_SUCCESS;
|
|
|
|
}
|
2015-06-23 20:59:57 -07:00
|
|
|
|
2016-09-01 16:03:10 +07:00
|
|
|
if (NULL == orte_process_info.job_session_dir ||
|
|
|
|
NULL == orte_process_info.proc_session_dir) {
|
2014-04-18 16:21:55 +00:00
|
|
|
/* this should never happen - it means we are calling
|
|
|
|
* cleanup *before* properly setting up the session
|
2016-09-01 16:03:10 +07:00
|
|
|
* dir system. This leaves open the possibility of
|
2014-04-18 16:21:55 +00:00
|
|
|
* accidentally removing directories we shouldn't
|
2016-09-01 16:03:10 +07:00
|
|
|
* touch
|
2014-04-18 16:21:55 +00:00
|
|
|
*/
|
2016-11-29 20:51:28 -08:00
|
|
|
return ORTE_ERR_NOT_INITIALIZED;
|
2005-04-14 01:04:26 +00:00
|
|
|
}
|
2015-06-23 20:59:57 -07:00
|
|
|
|
2016-09-01 16:03:10 +07:00
|
|
|
opal_os_dirpath_destroy(orte_process_info.proc_session_dir,
|
2006-07-03 22:23:07 +00:00
|
|
|
false, orte_dir_check_file);
|
|
|
|
|
2016-09-01 16:03:10 +07:00
|
|
|
if (opal_os_dirpath_is_empty(orte_process_info.proc_session_dir)) {
|
2016-09-15 15:18:42 -07:00
|
|
|
if (orte_debug_flag) {
|
|
|
|
opal_output(0, "sess_dir_finalize: found proc session dir empty - deleting");
|
|
|
|
}
|
2016-09-01 16:03:10 +07:00
|
|
|
rmdir(orte_process_info.proc_session_dir);
|
2004-10-15 16:31:54 +00:00
|
|
|
} else {
|
2016-09-15 15:18:42 -07:00
|
|
|
if (orte_debug_flag) {
|
2016-09-01 16:03:10 +07:00
|
|
|
if (OPAL_ERR_NOT_FOUND ==
|
|
|
|
opal_os_dirpath_access(orte_process_info.proc_session_dir, 0)) {
|
2014-04-18 14:25:48 +00:00
|
|
|
opal_output(0, "sess_dir_finalize: proc session dir does not exist");
|
|
|
|
} else {
|
|
|
|
opal_output(0, "sess_dir_finalize: proc session dir not empty - leaving");
|
|
|
|
}
|
2016-09-15 15:18:42 -07:00
|
|
|
}
|
2004-10-15 16:31:54 +00:00
|
|
|
}
|
|
|
|
|
2018-08-25 07:45:28 -07:00
|
|
|
/* special case - if a daemon is colocated with mpirun,
|
|
|
|
* then we let mpirun do the rest to avoid a race
|
|
|
|
* condition. this scenario always results in the rank=1
|
|
|
|
* daemon colocated with mpirun */
|
|
|
|
if (orte_ras_base.launch_orted_on_hn &&
|
|
|
|
ORTE_PROC_IS_DAEMON &&
|
|
|
|
1 == ORTE_PROC_MY_NAME->vpid) {
|
|
|
|
return ORTE_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
opal_os_dirpath_destroy(orte_process_info.job_session_dir,
|
|
|
|
false, orte_dir_check_file);
|
|
|
|
|
|
|
|
/* only remove the jobfam session dir if we are the
|
|
|
|
* local daemon and we are finalizing our own session dir */
|
|
|
|
if ((ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) &&
|
|
|
|
(ORTE_PROC_MY_NAME == proc)) {
|
|
|
|
opal_os_dirpath_destroy(orte_process_info.jobfam_session_dir,
|
|
|
|
false, orte_dir_check_file);
|
|
|
|
}
|
|
|
|
|
|
|
|
if( NULL != orte_process_info.top_session_dir ){
|
|
|
|
opal_os_dirpath_destroy(orte_process_info.top_session_dir,
|
|
|
|
false, orte_dir_check_file);
|
|
|
|
}
|
|
|
|
|
2016-09-01 16:03:10 +07:00
|
|
|
if (opal_os_dirpath_is_empty(orte_process_info.job_session_dir)) {
|
2016-09-15 15:18:42 -07:00
|
|
|
if (orte_debug_flag) {
|
|
|
|
opal_output(0, "sess_dir_finalize: found job session dir empty - deleting");
|
|
|
|
}
|
2016-09-01 16:03:10 +07:00
|
|
|
rmdir(orte_process_info.job_session_dir);
|
2004-10-15 16:31:54 +00:00
|
|
|
} else {
|
2016-09-15 15:18:42 -07:00
|
|
|
if (orte_debug_flag) {
|
2016-09-01 16:03:10 +07:00
|
|
|
if (OPAL_ERR_NOT_FOUND ==
|
|
|
|
opal_os_dirpath_access(orte_process_info.job_session_dir, 0)) {
|
2014-04-18 14:25:48 +00:00
|
|
|
opal_output(0, "sess_dir_finalize: job session dir does not exist");
|
|
|
|
} else {
|
|
|
|
opal_output(0, "sess_dir_finalize: job session dir not empty - leaving");
|
|
|
|
}
|
2016-09-15 15:18:42 -07:00
|
|
|
}
|
2016-11-29 20:51:28 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (opal_os_dirpath_is_empty(orte_process_info.jobfam_session_dir)) {
|
|
|
|
if (orte_debug_flag) {
|
|
|
|
opal_output(0, "sess_dir_finalize: found jobfam session dir empty - deleting");
|
|
|
|
}
|
|
|
|
rmdir(orte_process_info.jobfam_session_dir);
|
|
|
|
} else {
|
|
|
|
if (orte_debug_flag) {
|
|
|
|
if (OPAL_ERR_NOT_FOUND ==
|
|
|
|
opal_os_dirpath_access(orte_process_info.jobfam_session_dir, 0)) {
|
|
|
|
opal_output(0, "sess_dir_finalize: jobfam session dir does not exist");
|
|
|
|
} else {
|
|
|
|
opal_output(0, "sess_dir_finalize: jobfam session dir not empty - leaving");
|
|
|
|
}
|
|
|
|
}
|
2004-10-15 16:31:54 +00:00
|
|
|
}
|
|
|
|
|
2017-03-11 12:50:38 -08:00
|
|
|
if (opal_os_dirpath_is_empty(orte_process_info.jobfam_session_dir)) {
|
|
|
|
if (orte_debug_flag) {
|
|
|
|
opal_output(0, "sess_dir_finalize: found jobfam session dir empty - deleting");
|
|
|
|
}
|
|
|
|
rmdir(orte_process_info.jobfam_session_dir);
|
|
|
|
} else {
|
|
|
|
if (orte_debug_flag) {
|
|
|
|
if (OPAL_ERR_NOT_FOUND ==
|
|
|
|
opal_os_dirpath_access(orte_process_info.jobfam_session_dir, 0)) {
|
|
|
|
opal_output(0, "sess_dir_finalize: jobfam session dir does not exist");
|
|
|
|
} else {
|
|
|
|
opal_output(0, "sess_dir_finalize: jobfam session dir not empty - leaving");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (NULL != orte_process_info.top_session_dir) {
|
2016-09-01 16:03:10 +07:00
|
|
|
if (opal_os_dirpath_is_empty(orte_process_info.top_session_dir)) {
|
|
|
|
if (orte_debug_flag) {
|
|
|
|
opal_output(0, "sess_dir_finalize: found top session dir empty - deleting");
|
2014-04-18 14:25:48 +00:00
|
|
|
}
|
2016-09-02 00:25:40 +07:00
|
|
|
rmdir(orte_process_info.top_session_dir);
|
2016-09-01 16:03:10 +07:00
|
|
|
} else {
|
|
|
|
if (orte_debug_flag) {
|
|
|
|
if (OPAL_ERR_NOT_FOUND ==
|
|
|
|
opal_os_dirpath_access(orte_process_info.top_session_dir, 0)) {
|
|
|
|
opal_output(0, "sess_dir_finalize: top session dir does not exist");
|
|
|
|
} else {
|
|
|
|
opal_output(0, "sess_dir_finalize: top session dir not empty - leaving");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2004-10-15 16:31:54 +00:00
|
|
|
}
|
|
|
|
|
2016-11-29 20:51:28 -08:00
|
|
|
return ORTE_SUCCESS;
|
2004-10-15 16:31:54 +00:00
|
|
|
}
|
|
|
|
|
2015-06-23 20:59:57 -07:00
|
|
|
static bool
|
2008-02-28 01:57:57 +00:00
|
|
|
orte_dir_check_file(const char *root, const char *path)
|
|
|
|
{
|
2014-04-30 17:52:10 +00:00
|
|
|
struct stat st;
|
|
|
|
char *fullpath;
|
|
|
|
|
2006-07-03 22:23:07 +00:00
|
|
|
/*
|
|
|
|
* Keep:
|
2014-04-30 17:52:10 +00:00
|
|
|
* - non-zero files starting with "output-"
|
2006-07-03 22:23:07 +00:00
|
|
|
*/
|
2014-02-11 22:16:17 +00:00
|
|
|
if (0 == strncmp(path, "output-", strlen("output-"))) {
|
2014-04-30 17:52:10 +00:00
|
|
|
fullpath = opal_os_path(false, &fullpath, root, path, NULL);
|
|
|
|
stat(fullpath, &st);
|
|
|
|
free(fullpath);
|
|
|
|
if (0 == st.st_size) {
|
|
|
|
return true;
|
|
|
|
}
|
2006-07-03 22:23:07 +00:00
|
|
|
return false;
|
2004-11-02 21:48:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
2004-10-15 16:31:54 +00:00
|
|
|
}
|