Fix a problem where orterun itself would not receive MCA parameters
that were set on the command line. This was techinically exactly the way the code was designed, but it certainly violated the Law of Least Astonishment (even to its designer ;-) ). So now if you execute something like this: mpirun -mca pls_rsh_debug 1 -np 4 hello You'll see debugging output from the rsh pls component, as you would expect (this was not previously the case -- the MCA pls_rsh_debug parame would be set to 1 in the 4 spawned hello processes, but *not* in the orterun process). More specifically, MCA parameters will be set in the orterun process in the following cases: - The new command line switch "--gmca" (or "-gmca") is used, indicating that the MCA parameter is "global". --gmca also means that that MCA parameter will be applied to all context app's. For example: mpirun -gmca foo bar -np 1 hello : -np 2 goodbye The foo MCA param will be set in both the hello and goodbye processes. - If there is only one context app. For example: mpirun -mca pls_rsh_debug 1 -np 4 hello will set pls_rsh_debug to 1 in both the orterun process and the 4 spawned hello processes. Also added a few more comments inside orterun to document a somewhat confusing use of a state variable in a recursive case. This commit was SVN r6764.
Этот коммит содержится в:
родитель
473720c22f
Коммит
32e71e5c6c
@ -71,7 +71,7 @@ int main(int argc, char *argv[])
|
||||
bool cmd_error = false;
|
||||
bool acted = false;
|
||||
bool want_all = false;
|
||||
char **env = NULL;
|
||||
char **app_env = NULL, **global_env = NULL;
|
||||
int i, len;
|
||||
|
||||
// Initialize the argv parsing handle
|
||||
@ -145,16 +145,20 @@ int main(int argc, char *argv[])
|
||||
exit(cmd_error ? 1 : 0);
|
||||
}
|
||||
|
||||
mca_base_cmd_line_process_args(cmd_line, &env);
|
||||
mca_base_cmd_line_process_args(cmd_line, &app_env, &global_env);
|
||||
|
||||
// putenv() all the stuff that we got back from env (in case the
|
||||
// user specified some --mca params on the command line). This
|
||||
// creates a memory leak, but that's unfortunately how putenv()
|
||||
// works. :-(
|
||||
|
||||
len = opal_argv_count(env);
|
||||
len = opal_argv_count(app_env);
|
||||
for (i = 0; i < len; ++i) {
|
||||
putenv(env[i]);
|
||||
putenv(app_env[i]);
|
||||
}
|
||||
len = opal_argv_count(global_env);
|
||||
for (i = 0; i < len; ++i) {
|
||||
putenv(global_env[i]);
|
||||
}
|
||||
|
||||
ompi_info::mca_types.push_back("mca");
|
||||
@ -233,8 +237,11 @@ int main(int argc, char *argv[])
|
||||
|
||||
// All done
|
||||
|
||||
if (NULL != env) {
|
||||
opal_argv_free(env);
|
||||
if (NULL != app_env) {
|
||||
opal_argv_free(app_env);
|
||||
}
|
||||
if (NULL != global_env) {
|
||||
opal_argv_free(global_env);
|
||||
}
|
||||
ompi_info::close_components();
|
||||
OBJ_RELEASE(cmd_line);
|
||||
|
@ -105,8 +105,8 @@ OMPI_DECLSPEC int mca_base_close(void);
|
||||
|
||||
OMPI_DECLSPEC int mca_base_cmd_line_setup(opal_cmd_line_t *cmd);
|
||||
OMPI_DECLSPEC int mca_base_cmd_line_process_args(opal_cmd_line_t *cmd,
|
||||
char ***env);
|
||||
OMPI_DECLSPEC int mca_base_cmd_line_process_arg(const char *param, const char *value);
|
||||
char ***app_env,
|
||||
char ***global_env);
|
||||
|
||||
/* mca_base_component_compare.c */
|
||||
|
||||
|
@ -29,10 +29,13 @@
|
||||
/*
|
||||
* Private variables
|
||||
*/
|
||||
static int mca_param_argc = 0;
|
||||
static char **mca_param_argv = NULL;
|
||||
static int mca_value_argc = 0;
|
||||
static char **mca_value_argv = NULL;
|
||||
|
||||
/*
|
||||
* Private functions
|
||||
*/
|
||||
static int process_arg(const char *param, const char *value,
|
||||
char ***params, char ***values);
|
||||
static void add_to_env(char **params, char **values, char ***env);
|
||||
|
||||
|
||||
/*
|
||||
@ -40,8 +43,16 @@ static char **mca_value_argv = NULL;
|
||||
*/
|
||||
int mca_base_cmd_line_setup(opal_cmd_line_t *cmd)
|
||||
{
|
||||
return opal_cmd_line_make_opt3(cmd, '\0', "mca", "mca", 2,
|
||||
"Pass MCA parameters (arg0 is the parameter name; arg1 is the parameter value)");
|
||||
int ret;
|
||||
ret = opal_cmd_line_make_opt3(cmd, '\0', "mca", "mca", 2,
|
||||
"Pass context-specific MCA parameters; they are considered global if --gmca is not used and only one context is specified (arg0 is the parameter name; arg1 is the parameter value)");
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = opal_cmd_line_make_opt3(cmd, '\0', "gmca", "gmca", 2,
|
||||
"Pass global MCA parameters that are applicable to all contexts (arg0 is the parameter name; arg1 is the parameter value)");
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@ -49,83 +60,99 @@ int mca_base_cmd_line_setup(opal_cmd_line_t *cmd)
|
||||
* Look for and handle any -mca options on the command line
|
||||
*/
|
||||
int mca_base_cmd_line_process_args(opal_cmd_line_t *cmd,
|
||||
char ***env)
|
||||
char ***context_env, char ***global_env)
|
||||
{
|
||||
int i, num_insts;
|
||||
char *name;
|
||||
char **params;
|
||||
char **values;
|
||||
|
||||
/* First, wipe out any previous results */
|
||||
/* If no relevant parameters were given, just return */
|
||||
|
||||
if (mca_param_argc > 0) {
|
||||
opal_argv_free(mca_param_argv);
|
||||
opal_argv_free(mca_value_argv);
|
||||
mca_param_argv = mca_value_argv = NULL;
|
||||
mca_param_argc = mca_value_argc = 0;
|
||||
}
|
||||
|
||||
/* If no "-mca" parameters were given, just return */
|
||||
|
||||
if (!opal_cmd_line_is_taken(cmd, "mca")) {
|
||||
if (!opal_cmd_line_is_taken(cmd, "mca") &&
|
||||
!opal_cmd_line_is_taken(cmd, "gmca")) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Otherwise, assemble them into an argc/argv */
|
||||
/* Handle app context-specific parameters */
|
||||
|
||||
num_insts = opal_cmd_line_get_ninsts(cmd, "mca");
|
||||
params = values = NULL;
|
||||
for (i = 0; i < num_insts; ++i) {
|
||||
mca_base_cmd_line_process_arg(opal_cmd_line_get_param(cmd, "mca", i, 0),
|
||||
opal_cmd_line_get_param(cmd, "mca", i, 1));
|
||||
process_arg(opal_cmd_line_get_param(cmd, "mca", i, 0),
|
||||
opal_cmd_line_get_param(cmd, "mca", i, 1),
|
||||
¶ms, &values);
|
||||
}
|
||||
if (NULL != params) {
|
||||
add_to_env(params, values, context_env);
|
||||
opal_argv_free(params);
|
||||
opal_argv_free(values);
|
||||
}
|
||||
|
||||
/* Now put that argc/argv in the environment */
|
||||
/* Handle global parameters */
|
||||
|
||||
if (NULL == mca_param_argv) {
|
||||
return OMPI_SUCCESS;
|
||||
num_insts = opal_cmd_line_get_ninsts(cmd, "gmca");
|
||||
params = values = NULL;
|
||||
for (i = 0; i < num_insts; ++i) {
|
||||
process_arg(opal_cmd_line_get_param(cmd, "gmca", i, 0),
|
||||
opal_cmd_line_get_param(cmd, "gmca", i, 1),
|
||||
¶ms, &values);
|
||||
}
|
||||
if (NULL != params) {
|
||||
add_to_env(params, values, global_env);
|
||||
opal_argv_free(params);
|
||||
opal_argv_free(values);
|
||||
}
|
||||
|
||||
/* Loop through all the -mca args that we've gotten and make env
|
||||
vars of the form OMPI_MCA_*=value. This is a memory leak, but
|
||||
that's how putenv works. :-( */
|
||||
|
||||
for (i = 0; NULL != mca_param_argv[i]; ++i) {
|
||||
name = mca_base_param_environ_variable(mca_param_argv[i], NULL, NULL);
|
||||
opal_setenv(name, mca_value_argv[i], true, env);
|
||||
free(name);
|
||||
}
|
||||
/* All done */
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Process a single MCA argument. Done as a separate function so that
|
||||
* top-level applications can directly invoke this to effect MCA
|
||||
* command line arguments.
|
||||
* Process a single MCA argument.
|
||||
*/
|
||||
int mca_base_cmd_line_process_arg(const char *param, const char *value)
|
||||
int static process_arg(const char *param, const char *value,
|
||||
char ***params, char ***values)
|
||||
{
|
||||
int i;
|
||||
char *new_str;
|
||||
int i;
|
||||
char *new_str;
|
||||
|
||||
/* Look to see if we've already got an -mca argument for the same
|
||||
param. Check against the list of MCA param's that we've already
|
||||
saved arguments for. */
|
||||
/* Look to see if we've already got an -mca argument for the same
|
||||
param. Check against the list of MCA param's that we've
|
||||
already saved arguments for. */
|
||||
|
||||
for (i = 0; NULL != mca_param_argv && NULL != mca_param_argv[i]; ++i) {
|
||||
if (0 == strcmp(param, mca_param_argv[i])) {
|
||||
asprintf(&new_str, "%s,%s", mca_value_argv[i], value);
|
||||
free(mca_value_argv[i]);
|
||||
mca_value_argv[i] = new_str;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
for (i = 0; NULL != *params && NULL != (*params)[i]; ++i) {
|
||||
if (0 == strcmp(param, (*params)[i])) {
|
||||
asprintf(&new_str, "%s,%s", (*values)[i], value);
|
||||
free((*values)[i]);
|
||||
(*values)[i] = new_str;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If we didn't already have an value for the same param, save this
|
||||
one away */
|
||||
/* If we didn't already have an value for the same param, save
|
||||
this one away */
|
||||
|
||||
opal_argv_append(&mca_param_argc, &mca_param_argv, param);
|
||||
opal_argv_append(&mca_value_argc, &mca_value_argv, value);
|
||||
opal_argv_append_nosize(params, param);
|
||||
opal_argv_append_nosize(values, value);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static void add_to_env(char **params, char **values, char ***env)
|
||||
{
|
||||
int i;
|
||||
char *name;
|
||||
|
||||
/* Loop through all the args that we've gotten and make env
|
||||
vars of the form OMPI_MCA_*=value. */
|
||||
|
||||
for (i = 0; NULL != params && NULL != params[i]; ++i) {
|
||||
name = mca_base_param_environ_variable(params[i], NULL, NULL);
|
||||
opal_setenv(name, values[i], true, env);
|
||||
free(name);
|
||||
}
|
||||
}
|
||||
|
@ -76,6 +76,7 @@ static char *orterun_basename = NULL;
|
||||
static int max_display_aborted = 1;
|
||||
static int num_aborted = 0;
|
||||
static int num_killed = 0;
|
||||
static char **global_mca_env = NULL;
|
||||
|
||||
/*
|
||||
* setup globals for catching orterun command line options
|
||||
@ -211,7 +212,7 @@ opal_cmd_line_init_t cmd_line_init[] = {
|
||||
static void exit_callback(int fd, short event, void *arg);
|
||||
static void signal_callback(int fd, short flags, void *arg);
|
||||
static int create_app(int argc, char* argv[], orte_app_context_t **app,
|
||||
bool *made_app, char ***env);
|
||||
bool *made_app, char ***app_env);
|
||||
static int init_globals(void);
|
||||
static int parse_globals(int argc, char* argv[]);
|
||||
static int parse_locals(int argc, char* argv[]);
|
||||
@ -252,7 +253,7 @@ int main(int argc, char *argv[])
|
||||
for (j = i = 0; i < array_size; ++i) {
|
||||
apps[num_apps] = (orte_app_context_t *)
|
||||
orte_pointer_array_get_item(apps_pa, i);
|
||||
if(NULL != apps[num_apps]) {
|
||||
if (NULL != apps[num_apps]) {
|
||||
j += apps[num_apps]->num_procs;
|
||||
num_apps++;
|
||||
}
|
||||
@ -284,8 +285,7 @@ int main(int argc, char *argv[])
|
||||
|
||||
id = mca_base_param_reg_int_name("orte_base", "infrastructure",
|
||||
"Whether we are ORTE infrastructure or an ORTE application",
|
||||
false, false, (int)false, NULL);
|
||||
mca_base_param_set_int(id, (int)true);
|
||||
false, false, (int)true, NULL);
|
||||
|
||||
/* now call orte_init and setup the RTE */
|
||||
if (ORTE_SUCCESS != (rc = orte_init())) {
|
||||
@ -645,7 +645,7 @@ static int init_globals(void)
|
||||
static int parse_globals(int argc, char* argv[])
|
||||
{
|
||||
opal_cmd_line_t cmd_line;
|
||||
int ras, ret;
|
||||
int id, ret;
|
||||
|
||||
/* Setup and parse the command line */
|
||||
|
||||
@ -674,14 +674,14 @@ static int parse_globals(int argc, char* argv[])
|
||||
MCA param. */
|
||||
|
||||
/* JMS To be changed post-beta to LAM's C/N command line notation */
|
||||
ras = mca_base_param_register_string("ras", "base", "schedule_policy",
|
||||
id = mca_base_param_register_string("ras", "base", "schedule_policy",
|
||||
NULL, "slot");
|
||||
if (orterun_globals.by_node) {
|
||||
orterun_globals.by_slot = false;
|
||||
mca_base_param_set_string(ras, "node");
|
||||
mca_base_param_set_string(id, "node");
|
||||
} else {
|
||||
orterun_globals.by_slot = true;
|
||||
mca_base_param_set_string(ras, "slot");
|
||||
mca_base_param_set_string(id, "slot");
|
||||
}
|
||||
|
||||
/* If we don't want to wait, we don't want to wait */
|
||||
@ -699,10 +699,10 @@ static int parse_locals(int argc, char* argv[])
|
||||
{
|
||||
int i, rc, app_num;
|
||||
int temp_argc;
|
||||
char **temp_argv;
|
||||
char **temp_argv, **env;
|
||||
orte_app_context_t *app;
|
||||
bool made_app;
|
||||
char **env;
|
||||
size_t j, size1;
|
||||
|
||||
/* Make the apps */
|
||||
|
||||
@ -711,6 +711,10 @@ static int parse_locals(int argc, char* argv[])
|
||||
opal_argv_append(&temp_argc, &temp_argv, argv[0]);
|
||||
orte_pointer_array_init(&apps_pa, 1, argc + 1, 2);
|
||||
|
||||
/* NOTE: This bogus env variable is necessary in the calls to
|
||||
create_app(), below. See comment immediately before the
|
||||
create_app() function for an explanation. */
|
||||
|
||||
env = NULL;
|
||||
for (app_num = 0, i = 1; i < argc; ++i) {
|
||||
if (0 == strcmp(argv[i], ":")) {
|
||||
@ -769,14 +773,95 @@ static int parse_locals(int argc, char* argv[])
|
||||
}
|
||||
opal_argv_free(temp_argv);
|
||||
|
||||
/* Once we've created all the apps, add the global MCA params to
|
||||
each app's environment (checking for duplicates, of
|
||||
course -- yay opal_environ_merge()). */
|
||||
|
||||
if (NULL != global_mca_env) {
|
||||
size1 = orte_pointer_array_get_size(apps_pa);
|
||||
/* Iterate through all the apps */
|
||||
for (j = 0; j < size1; ++j) {
|
||||
app = (orte_app_context_t *)
|
||||
orte_pointer_array_get_item(apps_pa, j);
|
||||
if (NULL != app) {
|
||||
/* Use handy utility function */
|
||||
env = opal_environ_merge(global_mca_env, app->env);
|
||||
opal_argv_free(app->env);
|
||||
app->env = env;
|
||||
app->num_env = opal_argv_count(app->env);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Now take a subset of the MCA params and set them as MCA
|
||||
overrides here in orterun (so that when we orte_init() later,
|
||||
all the components see these MCA params). Here's how we decide
|
||||
which subset of the MCA params we set here in orterun:
|
||||
|
||||
1. If any global MCA params were set, use those
|
||||
2. If no global MCA params were set and there was only one app,
|
||||
then use its app MCA params
|
||||
3. Otherwise, don't set any
|
||||
*/
|
||||
|
||||
env = NULL;
|
||||
if (NULL != global_mca_env) {
|
||||
env = global_mca_env;
|
||||
} else {
|
||||
if (orte_pointer_array_get_size(apps_pa) >= 1) {
|
||||
/* Remember that pointer_array's can be padded with NULL
|
||||
entries; so only use the app's env if there is exactly
|
||||
1 non-NULL entry */
|
||||
app = (orte_app_context_t *)
|
||||
orte_pointer_array_get_item(apps_pa, 0);
|
||||
if (NULL != app) {
|
||||
env = app->env;
|
||||
for (j = 1; j < orte_pointer_array_get_size(apps_pa); ++j) {
|
||||
if (NULL != orte_pointer_array_get_item(apps_pa, j)) {
|
||||
env = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (NULL != env) {
|
||||
size1 = opal_argv_count(env);
|
||||
for (j = 0; j < size1; ++j) {
|
||||
putenv(env[j]);
|
||||
}
|
||||
}
|
||||
|
||||
/* All done */
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* This function takes a "char ***app_env" parameter to handle the
|
||||
* specific case:
|
||||
*
|
||||
* orterun --mca foo bar -app appfile
|
||||
*
|
||||
* That is, we'll need to keep foo=bar, but the presence of the app
|
||||
* file will cause an invocation of parse_appfile(), which will cause
|
||||
* one or more recursive calls back to create_app(). Since the
|
||||
* foo=bar value applies globally to all apps in the appfile, we need
|
||||
* to pass in the "base" environment (that contains the foo=bar value)
|
||||
* when we parse each line in the appfile.
|
||||
*
|
||||
* This is really just a special case -- when we have a simple case like:
|
||||
*
|
||||
* orterun --mca foo bar -np 4 hostname
|
||||
*
|
||||
* Then the upper-level function (parse_locals()) calls create_app()
|
||||
* with a NULL value for app_env, meaning that there is no "base"
|
||||
* environment that the app needs to be created from.
|
||||
*/
|
||||
static int create_app(int argc, char* argv[], orte_app_context_t **app_ptr,
|
||||
bool *made_app, char ***env)
|
||||
bool *made_app, char ***app_env)
|
||||
{
|
||||
opal_cmd_line_t cmd_line;
|
||||
char cwd[OMPI_PATH_MAX];
|
||||
@ -881,13 +966,13 @@ static int create_app(int argc, char* argv[], orte_app_context_t **app_ptr,
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
goto cleanup;
|
||||
}
|
||||
mca_base_cmd_line_process_args(&cmd_line, env);
|
||||
mca_base_cmd_line_process_args(&cmd_line, app_env, &global_mca_env);
|
||||
|
||||
/* Is there an appfile in here? */
|
||||
|
||||
if (NULL != orterun_globals.appfile) {
|
||||
OBJ_DESTRUCT(&cmd_line);
|
||||
return parse_appfile(strdup(orterun_globals.appfile), env);
|
||||
return parse_appfile(strdup(orterun_globals.appfile), app_env);
|
||||
}
|
||||
|
||||
/* Setup application context */
|
||||
@ -906,8 +991,7 @@ static int create_app(int argc, char* argv[], orte_app_context_t **app_ptr,
|
||||
|
||||
/* Grab all OMPI_* environment variables */
|
||||
|
||||
app->env = opal_argv_copy(*env);
|
||||
app->num_env = opal_argv_count(*env);
|
||||
app->env = opal_argv_copy(*app_env);
|
||||
for (i = 0; NULL != environ[i]; ++i) {
|
||||
if (0 == strncmp("OMPI_", environ[i], 5)) {
|
||||
opal_argv_append_nosize(&app->env, environ[i]);
|
||||
@ -1118,9 +1202,17 @@ static int parse_appfile(char *filename, char ***env)
|
||||
argc = opal_argv_count(argv);
|
||||
if (argc > 0) {
|
||||
|
||||
/* Create a temporary env to play with in the recursive
|
||||
call -- that is: don't disturb the original env so that
|
||||
we can have a consistent global env */
|
||||
/* Create a temporary env to use in the recursive call --
|
||||
that is: don't disturb the original env so that we can
|
||||
have a consistent global env. This allows for the
|
||||
case:
|
||||
|
||||
orterun --mca foo bar --appfile file
|
||||
|
||||
where the "file" contains multiple apps. In this case,
|
||||
each app in "file" will get *only* foo=bar as the base
|
||||
environment from which its specific environment is
|
||||
constructed. */
|
||||
|
||||
if (NULL != *env) {
|
||||
tmp_env = opal_argv_copy(*env);
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user