It is okay for us to init the ORTE mca params multiple times. Indeed, it is absolutely required by orterun as the first time has to be done prior to parsing the command line, which means that the mca values haven't been parsed yet!
Add ability for sys admins to prohibit putting session directories under specified locations. Thus, they can now protect parallel file systems from foolish user mistakes. This commit was SVN r18721.
Этот коммит содержится в:
родитель
e0545460ff
Коммит
b118779c08
@ -28,4 +28,16 @@ Open MPI developer):
|
|||||||
|
|
||||||
%s failed
|
%s failed
|
||||||
--> Returned value %s (%d) instead of ORTE_SUCCESS
|
--> Returned value %s (%d) instead of ORTE_SUCCESS
|
||||||
|
#
|
||||||
|
#
|
||||||
|
[orte:session:dir:prohibited]
|
||||||
|
The specified location for the temporary directories required by Open MPI
|
||||||
|
is on the list of prohibited locations:
|
||||||
|
|
||||||
|
Location given: %s
|
||||||
|
Prohibited locations: %s
|
||||||
|
|
||||||
|
If you believe this is in error, please contact your system administrator
|
||||||
|
to have the list of prohibited locations changed. Otherwise, please identify
|
||||||
|
a different location to be used (use -h to see the cmd line option), or
|
||||||
|
simply let the system pick a default location.
|
||||||
|
@ -55,6 +55,8 @@ bool orte_keep_fqdn_hostnames = false;
|
|||||||
bool orte_xml_output;
|
bool orte_xml_output;
|
||||||
int orted_debug_failure;
|
int orted_debug_failure;
|
||||||
int orted_debug_failure_delay;
|
int orted_debug_failure_delay;
|
||||||
|
bool orte_homogeneous_nodes = false;
|
||||||
|
bool orte_hetero_apps = false;
|
||||||
|
|
||||||
int32_t orte_contiguous_nodes;
|
int32_t orte_contiguous_nodes;
|
||||||
int orte_debug_output = -1;
|
int orte_debug_output = -1;
|
||||||
|
@ -52,6 +52,7 @@
|
|||||||
BEGIN_C_DECLS
|
BEGIN_C_DECLS
|
||||||
|
|
||||||
ORTE_DECLSPEC extern bool orte_help_want_aggregate;
|
ORTE_DECLSPEC extern bool orte_help_want_aggregate;
|
||||||
|
ORTE_DECLSPEC extern char *orte_prohibited_session_dirs;
|
||||||
|
|
||||||
#define ORTE_PROC_MY_NAME (&orte_process_info.my_name)
|
#define ORTE_PROC_MY_NAME (&orte_process_info.my_name)
|
||||||
|
|
||||||
@ -330,6 +331,8 @@ ORTE_DECLSPEC extern bool orte_xml_output;
|
|||||||
ORTE_DECLSPEC extern int orte_debug_verbosity;
|
ORTE_DECLSPEC extern int orte_debug_verbosity;
|
||||||
ORTE_DECLSPEC extern int orted_debug_failure;
|
ORTE_DECLSPEC extern int orted_debug_failure;
|
||||||
ORTE_DECLSPEC extern int orted_debug_failure_delay;
|
ORTE_DECLSPEC extern int orted_debug_failure_delay;
|
||||||
|
ORTE_DECLSPEC extern bool orte_homogeneous_nodes;
|
||||||
|
ORTE_DECLSPEC extern bool orte_hetero_apps;
|
||||||
|
|
||||||
ORTE_DECLSPEC extern char **orte_launch_environ;
|
ORTE_DECLSPEC extern char **orte_launch_environ;
|
||||||
ORTE_DECLSPEC extern opal_pointer_array_t orte_daemonmap;
|
ORTE_DECLSPEC extern opal_pointer_array_t orte_daemonmap;
|
||||||
|
@ -49,6 +49,7 @@ bool orte_initialized = false;
|
|||||||
bool orte_finalizing = false;
|
bool orte_finalizing = false;
|
||||||
bool orte_debug_flag = false;
|
bool orte_debug_flag = false;
|
||||||
int orte_debug_verbosity;
|
int orte_debug_verbosity;
|
||||||
|
char *orte_prohibited_session_dirs = NULL;
|
||||||
|
|
||||||
orte_process_name_t orte_name_wildcard = {ORTE_JOBID_WILDCARD, ORTE_VPID_WILDCARD};
|
orte_process_name_t orte_name_wildcard = {ORTE_JOBID_WILDCARD, ORTE_VPID_WILDCARD};
|
||||||
orte_process_name_t orte_name_invalid = {ORTE_JOBID_INVALID, ORTE_VPID_INVALID};
|
orte_process_name_t orte_name_invalid = {ORTE_JOBID_INVALID, ORTE_VPID_INVALID};
|
||||||
@ -131,9 +132,11 @@ int orte_init(char flags)
|
|||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
|
|
||||||
error:
|
error:
|
||||||
orte_show_help("help-orte-runtime",
|
if (ORTE_ERR_SILENT != ret) {
|
||||||
"orte_init:startup:internal-failure",
|
orte_show_help("help-orte-runtime",
|
||||||
true, error, ORTE_ERROR_NAME(ret), ret);
|
"orte_init:startup:internal-failure",
|
||||||
|
true, error, ORTE_ERROR_NAME(ret), ret);
|
||||||
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -33,16 +33,10 @@
|
|||||||
#include "orte/runtime/runtime.h"
|
#include "orte/runtime/runtime.h"
|
||||||
#include "orte/runtime/orte_globals.h"
|
#include "orte/runtime/orte_globals.h"
|
||||||
|
|
||||||
static bool orte_params_set=false;
|
|
||||||
|
|
||||||
int orte_register_params(void)
|
int orte_register_params(void)
|
||||||
{
|
{
|
||||||
int value;
|
int value;
|
||||||
|
|
||||||
if (orte_params_set) {
|
|
||||||
return ORTE_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
mca_base_param_reg_int_name("orte", "base_help_aggregate",
|
mca_base_param_reg_int_name("orte", "base_help_aggregate",
|
||||||
"If orte_base_help_aggregate is true, duplicate help messages will be aggregated rather than displayed individually. This can be helpful for parallel jobs that experience multiple identical failures; rather than print out the same help/failure message N times, display it once with a count of how many processes sent the same message.",
|
"If orte_base_help_aggregate is true, duplicate help messages will be aggregated rather than displayed individually. This can be helpful for parallel jobs that experience multiple identical failures; rather than print out the same help/failure message N times, display it once with a count of how many processes sent the same message.",
|
||||||
false, false,
|
false, false,
|
||||||
@ -53,6 +47,10 @@ int orte_register_params(void)
|
|||||||
"Base of the session directory tree",
|
"Base of the session directory tree",
|
||||||
false, false, NULL, &(orte_process_info.tmpdir_base));
|
false, false, NULL, &(orte_process_info.tmpdir_base));
|
||||||
|
|
||||||
|
mca_base_param_reg_string_name("orte", "no_session_dirs",
|
||||||
|
"Prohibited locations for session directories (multiple locations separated by ',', default=NULL)",
|
||||||
|
false, false, NULL, &orte_prohibited_session_dirs);
|
||||||
|
|
||||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||||
mca_base_param_reg_int_name("orte", "debug",
|
mca_base_param_reg_int_name("orte", "debug",
|
||||||
"Top-level ORTE debug switch (default verbosity: 1)",
|
"Top-level ORTE debug switch (default verbosity: 1)",
|
||||||
@ -148,9 +146,12 @@ int orte_register_params(void)
|
|||||||
false, false, (int) false, &value);
|
false, false, (int) false, &value);
|
||||||
orte_xml_output = OPAL_INT_TO_BOOL(value);
|
orte_xml_output = OPAL_INT_TO_BOOL(value);
|
||||||
|
|
||||||
|
mca_base_param_reg_int_name("orte", "hetero_apps",
|
||||||
|
"Indicates that multiple app_contexts are being provided that are a mix of 32/64 bit binaries (default: false)",
|
||||||
|
false, false, (int) false, &value);
|
||||||
|
orte_hetero_apps = OPAL_INT_TO_BOOL(value);
|
||||||
|
|
||||||
#endif /* ORTE_DISABLE_FULL_SUPPORT */
|
#endif /* ORTE_DISABLE_FULL_SUPPORT */
|
||||||
|
|
||||||
/* All done */
|
|
||||||
orte_params_set = true;
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -137,6 +137,11 @@ static opal_cmd_line_init_t cmd_line_init[] = {
|
|||||||
&orterun_globals.quiet, OPAL_CMD_LINE_TYPE_BOOL,
|
&orterun_globals.quiet, OPAL_CMD_LINE_TYPE_BOOL,
|
||||||
"Suppress helpful messages" },
|
"Suppress helpful messages" },
|
||||||
|
|
||||||
|
/* hetero apps */
|
||||||
|
{ "orte", "hetero", "apps", '\0', NULL, "hetero", 0,
|
||||||
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
||||||
|
"Indicates that multiple app_contexts are being provided that are a mix of 32/64 bit binaries" },
|
||||||
|
|
||||||
/* select XML output */
|
/* select XML output */
|
||||||
{ "orte", "xml", "output", '\0', NULL, "xml", 0,
|
{ "orte", "xml", "output", '\0', NULL, "xml", 0,
|
||||||
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
||||||
|
@ -45,7 +45,7 @@
|
|||||||
#include <pwd.h>
|
#include <pwd.h>
|
||||||
#endif /* HAVE_PWD_H */
|
#endif /* HAVE_PWD_H */
|
||||||
|
|
||||||
#include "orte/util/show_help.h"
|
#include "opal/util/argv.h"
|
||||||
#include "opal/util/os_path.h"
|
#include "opal/util/os_path.h"
|
||||||
#include "opal/util/os_dirpath.h"
|
#include "opal/util/os_dirpath.h"
|
||||||
#include "opal/util/basename.h"
|
#include "opal/util/basename.h"
|
||||||
@ -53,6 +53,7 @@
|
|||||||
|
|
||||||
#include "orte/util/proc_info.h"
|
#include "orte/util/proc_info.h"
|
||||||
#include "orte/util/name_fns.h"
|
#include "orte/util/name_fns.h"
|
||||||
|
#include "orte/util/show_help.h"
|
||||||
|
|
||||||
#include "orte/mca/errmgr/errmgr.h"
|
#include "orte/mca/errmgr/errmgr.h"
|
||||||
#include "orte/runtime/runtime.h"
|
#include "orte/runtime/runtime.h"
|
||||||
@ -261,6 +262,30 @@ orte_session_dir_get_name(char **fulldirpath,
|
|||||||
prefix = strdup(opal_tmp_directory());
|
prefix = strdup(opal_tmp_directory());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* BEFORE doing anything else, check to see if this prefix is
|
||||||
|
* allowed by the system
|
||||||
|
*/
|
||||||
|
if (NULL != orte_prohibited_session_dirs) {
|
||||||
|
char **list;
|
||||||
|
int i, len;
|
||||||
|
/* break the string into tokens - it should be
|
||||||
|
* separated by ','
|
||||||
|
*/
|
||||||
|
list = opal_argv_split(orte_prohibited_session_dirs, ',');
|
||||||
|
len = opal_argv_count(list);
|
||||||
|
/* cycle through the list */
|
||||||
|
for (i=0; i < len; i++) {
|
||||||
|
/* check if prefix matches */
|
||||||
|
if (0 == strncmp(prefix, list[i], strlen(list[i]))) {
|
||||||
|
/* this is a prohibited location */
|
||||||
|
orte_show_help("help-orte-runtime.txt",
|
||||||
|
"orte:session:dir:prohibited",
|
||||||
|
true, prefix, orte_prohibited_session_dirs);
|
||||||
|
return ORTE_ERR_FATAL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
opal_argv_free(list); /* done with this */
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
* Construct the absolute final path, if requested
|
* Construct the absolute final path, if requested
|
||||||
*/
|
*/
|
||||||
@ -332,6 +357,13 @@ int orte_session_dir(bool create,
|
|||||||
hostid,
|
hostid,
|
||||||
batchid, job,
|
batchid, job,
|
||||||
proc) ) ) {
|
proc) ) ) {
|
||||||
|
if (ORTE_ERR_FATAL == rtn) {
|
||||||
|
/* this indicates we definitely need to abort, so
|
||||||
|
* don't try the NULL prefix
|
||||||
|
*/
|
||||||
|
return_code = ORTE_ERR_SILENT;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
return_code = rtn;
|
return_code = rtn;
|
||||||
/*
|
/*
|
||||||
* If the first attempt at the path creation failed, try with a null
|
* If the first attempt at the path creation failed, try with a null
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user