1
1
openmpi/orte/util/session_dir.h

173 строки
8.2 KiB
C
Исходник Обычный вид История

/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file:
*
* Find and/or create Open MPI session directory.
*
* The orte_session_dir() function searches for a temporary directory
* that is used by the Open MPI system for storing system-critical
* information. For a given system and user, the function attempts to
* find (or create, if not found and create is requested) a directory
* that will be used to independently house information for multiple
* universes, as the user creates them. Thus, the function pursues a
* directory tree of the form:
*
* \par \em [prefix-dir] An absolute path that identifies a temporary
* directory that is read-write-execute accessible to everyone. The
* function first checks to see if the user has specified the [prefix]
* directory on the command line. If so, then the function will use
* that [prefix] if the access permissions are correct, or will return
* an error condition if not - the function will not search for
* alternative locations if the user provides the [prefix] name.
*
* \par If the [prefix] is not provided by the user, the function
* searches for a suitable directory in a specific order, taking the
* first option that meets the access permission requirement, using:
* (a) the "OMPI_PREFIX_ENV" environment variable; (b) the "TMPDIR"
* environment variable; and (c) the "TMP" environment variabley. If
* none of those environmental variables have been defined and/or the
* function was unable to create a suitable directory within any of
* them, then the function tries to use a default location of "/tmp",
* where the "/" represents the top-level directory of the local
* system. If none of these options are successful, the function
* returns an error code.
*
* \par \em [openmpi-sessions]-[user-id]@[host]:[batchid] This serves
* as a concentrator for all Open MPI session directories for this
* user on the local system. If it doesn't already exist, this
* directory is created with read-write-execute permissions
* exclusively restricted to the user. If it does exist, the access
* permissions are checked to ensure they are correct - if not, the
* program attempts to correct them. If they can't' be changed to the
* correct values, an error condition is returned. The [host] and
* [batchid] fields are included to provide uniqueness on shared file
* systems and batch schedulers, respectively.
*
* \par Note: The [prefix]/openmpi-sessions-[user-id]@[host]:[batchid]
* directory is left on the system upon termination of an application
* and/or an Open MPI universe for future use by the user. Thus, when
* checking a potential location for the directory, the
* orte_session_tree_init() function first checks to see if an
* appropriate directory already exists, and uses it if it does.
*
* \par \em [universe-name] A directory is created for the specified
* universe name. This is the directory that will be used to house all
* information relating to the specific universe. If the directory
* already exists (indicating that the user is joining an existing
* universe), then the function ensures that the user has exclusive
* read-write-execute permissions on the directory.
*
* \par \em [job] A directory is created for the specified job
* name. This will house all information relating to that specific
* job, including directories for each process within that job on this
* host.
*
* \par \em [process] A directory for the specific process, will house
* all information for that process.
*
* \par If \c create is \c true, the directory will be created and the
* proc_info structure will be updated. If proc_info is false,
*/
#ifndef ORTE_SESSION_DIR_H_HAS_BEEN_INCLUDED
#define ORTE_SESSION_DIR_H_HAS_BEEN_INCLUDED
#include "orte_config.h"
#include "orte/types.h"
BEGIN_C_DECLS
/** @param create A boolean variable that indicates whether or not to
* create the specified directory. If set to "false",
* the function only checks to see if an existing
* directory can be found. This is typically used to
* locate an already existing universe for reconnection
* purposes. If set to "true", then the function
* creates the directory, if possible.
* @param prefix A string variable indicating where the user
* stipulated the directory should be found or
* placed. A value of "NULL" indicates that the user
* specified no location - hence, the function explores
* a range of "standard" locations.
* @param hostid Name of the host on which the session directory is
* being built. Used to build the name of the
* "openmpi-sessions-[user]@[host]:[batch]" branch of
* the directory tree. NULL indicates that the nodename
* found in orte_process_info is to be used.
* @param batchid Batch job name, used in batch scheduling
* systems. NULL indicates that the default of "0" is
* to be used.
* @param job String version of the jobid for which a session
* directory is to be created/found. NULL indicates
* that only the universe directory is to be
* created/found.
* @param vpid String version of the vpid for which a session
* directory is to be created/found. NULL indicates
* that only the job directory is to be created/found.
*
* @retval ORTE_SUCCESS The directory was found and/or created with
* the proper permissions.
* @retval OMPI_ERROR The directory cannot be found (if create is
* "false") or created (if create is "true").
*/
ORTE_DECLSPEC int orte_session_dir(bool create, char *prefix, char *hostid,
char *batchid, char *job, char *vpid);
Brining over the session directory and universe changes from the tmp/jjhursey-ft-cr branch. In this commit we change the way universe names are created. Before we by default first created "default-universe" then if there was a conflict we created "default-universe-PID" where PID is the PID of the HNP. Now we create "default-universe-PID" all the time (when a default universe name is used). This makes it much easier when trying to find a HNP from an outside app (e.g. orte-ps, orteconsole, ...) This also adds a "search" function to find all of the universes on the machine. This is useful in many contexts when trying to find a persistent daemon or when trying to connect to a HNP. This commit also makes orte_universe_t an opal_object_t, which is something that needed to happen, and only effected the SDS in one of it's base functions. I was asked to bring this over to aid in fixing orteconsole and orteprobe. Due to the change of orte_universe_t to an object orteprobe may need to be updated to reflect this change. Since orteprobe needs to be looked at anyway I'll leave this to Ralph to take care of. *Note*: These changes do not depend upon any of the FT work (but the FT work does depend upon them). These were brought over to help in fixing some of the ORTE tool set that require the functionality layed out in this patch. Testing: Ran the 'ibm' tests before and after this change, and all was as well as before the change. If anyone notices additional irregularities in the system let me know. But none are expected. This commit was SVN r10550.
2006-06-29 01:03:31 +04:00
/*
* Construct the session directory name from the input parameters.
* This function does no checking that the directory exists, or can be used
*/
ORTE_DECLSPEC int orte_session_dir_get_name(char **fulldirpath,
Brining over the session directory and universe changes from the tmp/jjhursey-ft-cr branch. In this commit we change the way universe names are created. Before we by default first created "default-universe" then if there was a conflict we created "default-universe-PID" where PID is the PID of the HNP. Now we create "default-universe-PID" all the time (when a default universe name is used). This makes it much easier when trying to find a HNP from an outside app (e.g. orte-ps, orteconsole, ...) This also adds a "search" function to find all of the universes on the machine. This is useful in many contexts when trying to find a persistent daemon or when trying to connect to a HNP. This commit also makes orte_universe_t an opal_object_t, which is something that needed to happen, and only effected the SDS in one of it's base functions. I was asked to bring this over to aid in fixing orteconsole and orteprobe. Due to the change of orte_universe_t to an object orteprobe may need to be updated to reflect this change. Since orteprobe needs to be looked at anyway I'll leave this to Ralph to take care of. *Note*: These changes do not depend upon any of the FT work (but the FT work does depend upon them). These were brought over to help in fixing some of the ORTE tool set that require the functionality layed out in this patch. Testing: Ran the 'ibm' tests before and after this change, and all was as well as before the change. If anyone notices additional irregularities in the system let me know. But none are expected. This commit was SVN r10550.
2006-06-29 01:03:31 +04:00
char **prfx,
char **frontend,
char *hostid,
char *batchid,
Brining over the session directory and universe changes from the tmp/jjhursey-ft-cr branch. In this commit we change the way universe names are created. Before we by default first created "default-universe" then if there was a conflict we created "default-universe-PID" where PID is the PID of the HNP. Now we create "default-universe-PID" all the time (when a default universe name is used). This makes it much easier when trying to find a HNP from an outside app (e.g. orte-ps, orteconsole, ...) This also adds a "search" function to find all of the universes on the machine. This is useful in many contexts when trying to find a persistent daemon or when trying to connect to a HNP. This commit also makes orte_universe_t an opal_object_t, which is something that needed to happen, and only effected the SDS in one of it's base functions. I was asked to bring this over to aid in fixing orteconsole and orteprobe. Due to the change of orte_universe_t to an object orteprobe may need to be updated to reflect this change. Since orteprobe needs to be looked at anyway I'll leave this to Ralph to take care of. *Note*: These changes do not depend upon any of the FT work (but the FT work does depend upon them). These were brought over to help in fixing some of the ORTE tool set that require the functionality layed out in this patch. Testing: Ran the 'ibm' tests before and after this change, and all was as well as before the change. If anyone notices additional irregularities in the system let me know. But none are expected. This commit was SVN r10550.
2006-06-29 01:03:31 +04:00
char *job, char *proc);
/** The orte_session_dir_finalize() function performs a cleanup of the
* session directory tree. It first removes the session directory for
* the calling process. It then checks to see if the job-level session
* directory is now empty - if so, it removes that level as
* well. Finally, it checks to see if the universe-level session
* directory is now empty - if so, it also removes that level. This
* three-part "last-one-out" procedure ensures that the directory tree
* is properly removed if all processes and applications within a
* universe have completed.
*
* @param None
* @retval ORTE_SUCCESS If the directory tree is properly cleaned up.
* @retval OMPI_ERROR If something prevents the tree from being
* properly cleaned up.
*/
ORTE_DECLSPEC int orte_session_dir_finalize(orte_process_name_t *proc);
/** The orte_session_dir_cleanup() function performs a cleanup of the
* session directory tree when a job is aborted. It cleans up all
* process directories for a given job and then backs up the tree.
*
* @param jobid
* @retval OMPI_SUCCESS If the directory tree is properly cleaned up.
* @retval OMPI_ERROR If something prevents the tree from being
* properly cleaned up.
*/
ORTE_DECLSPEC int orte_session_dir_cleanup(orte_jobid_t jobid);
END_C_DECLS
#endif /* ORTE_SESSION_DIR_H_HAS_BEEN_INCLUDED */