From b118779c08c91806c92492ff4cf3305dd9ba8d8c Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 24 Jun 2008 17:50:56 +0000 Subject: [PATCH] It is okay for us to init the ORTE mca params multiple times. Indeed, it is absolutely required by orterun as the first time has to be done prior to parsing the command line, which means that the mca values haven't been parsed yet! Add ability for sys admins to prohibit putting session directories under specified locations. Thus, they can now protect parallel file systems from foolish user mistakes. This commit was SVN r18721. --- orte/runtime/help-orte-runtime.txt | 12 +++++++++++ orte/runtime/orte_globals.c | 2 ++ orte/runtime/orte_globals.h | 3 +++ orte/runtime/orte_init.c | 9 +++++--- orte/runtime/orte_mca_params.c | 19 +++++++++-------- orte/tools/orterun/orterun.c | 7 +++++- orte/util/session_dir.c | 34 +++++++++++++++++++++++++++++- 7 files changed, 72 insertions(+), 14 deletions(-) diff --git a/orte/runtime/help-orte-runtime.txt b/orte/runtime/help-orte-runtime.txt index bafa6dd59d..c1cf29ce56 100644 --- a/orte/runtime/help-orte-runtime.txt +++ b/orte/runtime/help-orte-runtime.txt @@ -28,4 +28,16 @@ Open MPI developer): %s failed --> Returned value %s (%d) instead of ORTE_SUCCESS +# +# +[orte:session:dir:prohibited] +The specified location for the temporary directories required by Open MPI +is on the list of prohibited locations: +Location given: %s +Prohibited locations: %s + +If you believe this is in error, please contact your system administrator +to have the list of prohibited locations changed. Otherwise, please identify +a different location to be used (use -h to see the cmd line option), or +simply let the system pick a default location. diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index f06b9aafe1..e07db7e405 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -55,6 +55,8 @@ bool orte_keep_fqdn_hostnames = false; bool orte_xml_output; int orted_debug_failure; int orted_debug_failure_delay; +bool orte_homogeneous_nodes = false; +bool orte_hetero_apps = false; int32_t orte_contiguous_nodes; int orte_debug_output = -1; diff --git a/orte/runtime/orte_globals.h b/orte/runtime/orte_globals.h index 8b38fb55a7..7bfa640220 100644 --- a/orte/runtime/orte_globals.h +++ b/orte/runtime/orte_globals.h @@ -52,6 +52,7 @@ BEGIN_C_DECLS ORTE_DECLSPEC extern bool orte_help_want_aggregate; +ORTE_DECLSPEC extern char *orte_prohibited_session_dirs; #define ORTE_PROC_MY_NAME (&orte_process_info.my_name) @@ -330,6 +331,8 @@ ORTE_DECLSPEC extern bool orte_xml_output; ORTE_DECLSPEC extern int orte_debug_verbosity; ORTE_DECLSPEC extern int orted_debug_failure; ORTE_DECLSPEC extern int orted_debug_failure_delay; +ORTE_DECLSPEC extern bool orte_homogeneous_nodes; +ORTE_DECLSPEC extern bool orte_hetero_apps; ORTE_DECLSPEC extern char **orte_launch_environ; ORTE_DECLSPEC extern opal_pointer_array_t orte_daemonmap; diff --git a/orte/runtime/orte_init.c b/orte/runtime/orte_init.c index caef79ec5d..effd01c141 100644 --- a/orte/runtime/orte_init.c +++ b/orte/runtime/orte_init.c @@ -49,6 +49,7 @@ bool orte_initialized = false; bool orte_finalizing = false; bool orte_debug_flag = false; int orte_debug_verbosity; +char *orte_prohibited_session_dirs = NULL; orte_process_name_t orte_name_wildcard = {ORTE_JOBID_WILDCARD, ORTE_VPID_WILDCARD}; orte_process_name_t orte_name_invalid = {ORTE_JOBID_INVALID, ORTE_VPID_INVALID}; @@ -131,9 +132,11 @@ int orte_init(char flags) return ORTE_SUCCESS; error: - orte_show_help("help-orte-runtime", - "orte_init:startup:internal-failure", - true, error, ORTE_ERROR_NAME(ret), ret); + if (ORTE_ERR_SILENT != ret) { + orte_show_help("help-orte-runtime", + "orte_init:startup:internal-failure", + true, error, ORTE_ERROR_NAME(ret), ret); + } return ret; } diff --git a/orte/runtime/orte_mca_params.c b/orte/runtime/orte_mca_params.c index e59d19ed37..e1dce6c4bf 100644 --- a/orte/runtime/orte_mca_params.c +++ b/orte/runtime/orte_mca_params.c @@ -33,16 +33,10 @@ #include "orte/runtime/runtime.h" #include "orte/runtime/orte_globals.h" -static bool orte_params_set=false; - int orte_register_params(void) { int value; - if (orte_params_set) { - return ORTE_SUCCESS; - } - mca_base_param_reg_int_name("orte", "base_help_aggregate", "If orte_base_help_aggregate is true, duplicate help messages will be aggregated rather than displayed individually. This can be helpful for parallel jobs that experience multiple identical failures; rather than print out the same help/failure message N times, display it once with a count of how many processes sent the same message.", false, false, @@ -52,7 +46,11 @@ int orte_register_params(void) mca_base_param_reg_string_name("orte", "tmpdir_base", "Base of the session directory tree", false, false, NULL, &(orte_process_info.tmpdir_base)); - + + mca_base_param_reg_string_name("orte", "no_session_dirs", + "Prohibited locations for session directories (multiple locations separated by ',', default=NULL)", + false, false, NULL, &orte_prohibited_session_dirs); + #if !ORTE_DISABLE_FULL_SUPPORT mca_base_param_reg_int_name("orte", "debug", "Top-level ORTE debug switch (default verbosity: 1)", @@ -148,9 +146,12 @@ int orte_register_params(void) false, false, (int) false, &value); orte_xml_output = OPAL_INT_TO_BOOL(value); + mca_base_param_reg_int_name("orte", "hetero_apps", + "Indicates that multiple app_contexts are being provided that are a mix of 32/64 bit binaries (default: false)", + false, false, (int) false, &value); + orte_hetero_apps = OPAL_INT_TO_BOOL(value); + #endif /* ORTE_DISABLE_FULL_SUPPORT */ - /* All done */ - orte_params_set = true; return ORTE_SUCCESS; } diff --git a/orte/tools/orterun/orterun.c b/orte/tools/orterun/orterun.c index dae6aebfe9..31dc3e4ea3 100644 --- a/orte/tools/orterun/orterun.c +++ b/orte/tools/orterun/orterun.c @@ -137,6 +137,11 @@ static opal_cmd_line_init_t cmd_line_init[] = { &orterun_globals.quiet, OPAL_CMD_LINE_TYPE_BOOL, "Suppress helpful messages" }, + /* hetero apps */ + { "orte", "hetero", "apps", '\0', NULL, "hetero", 0, + NULL, OPAL_CMD_LINE_TYPE_BOOL, + "Indicates that multiple app_contexts are being provided that are a mix of 32/64 bit binaries" }, + /* select XML output */ { "orte", "xml", "output", '\0', NULL, "xml", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, @@ -346,7 +351,7 @@ int orterun(int argc, char *argv[]) /* Setup MCA params */ orte_register_params(); - + /* Check for some "global" command line params */ parse_globals(argc, argv, &cmd_line); OBJ_DESTRUCT(&cmd_line); diff --git a/orte/util/session_dir.c b/orte/util/session_dir.c index 53ca2ca166..dca53d4f4a 100644 --- a/orte/util/session_dir.c +++ b/orte/util/session_dir.c @@ -45,7 +45,7 @@ #include #endif /* HAVE_PWD_H */ -#include "orte/util/show_help.h" +#include "opal/util/argv.h" #include "opal/util/os_path.h" #include "opal/util/os_dirpath.h" #include "opal/util/basename.h" @@ -53,6 +53,7 @@ #include "orte/util/proc_info.h" #include "orte/util/name_fns.h" +#include "orte/util/show_help.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/runtime/runtime.h" @@ -261,6 +262,30 @@ orte_session_dir_get_name(char **fulldirpath, prefix = strdup(opal_tmp_directory()); } + /* BEFORE doing anything else, check to see if this prefix is + * allowed by the system + */ + if (NULL != orte_prohibited_session_dirs) { + char **list; + int i, len; + /* break the string into tokens - it should be + * separated by ',' + */ + list = opal_argv_split(orte_prohibited_session_dirs, ','); + len = opal_argv_count(list); + /* cycle through the list */ + for (i=0; i < len; i++) { + /* check if prefix matches */ + if (0 == strncmp(prefix, list[i], strlen(list[i]))) { + /* this is a prohibited location */ + orte_show_help("help-orte-runtime.txt", + "orte:session:dir:prohibited", + true, prefix, orte_prohibited_session_dirs); + return ORTE_ERR_FATAL; + } + } + opal_argv_free(list); /* done with this */ + } /* * Construct the absolute final path, if requested */ @@ -332,6 +357,13 @@ int orte_session_dir(bool create, hostid, batchid, job, proc) ) ) { + if (ORTE_ERR_FATAL == rtn) { + /* this indicates we definitely need to abort, so + * don't try the NULL prefix + */ + return_code = ORTE_ERR_SILENT; + goto cleanup; + } return_code = rtn; /* * If the first attempt at the path creation failed, try with a null