1
1

Fix a problem where orterun itself would not receive MCA parameters

that were set on the command line.  This was techinically exactly the
way the code was designed, but it certainly violated the Law of Least
Astonishment (even to its designer ;-) ).  So now if you execute
something like this:

   mpirun -mca pls_rsh_debug 1 -np 4 hello

You'll see debugging output from the rsh pls component, as you would
expect (this was not previously the case -- the MCA pls_rsh_debug
parame would be set to 1 in the 4 spawned hello processes, but *not*
in the orterun process).

More specifically, MCA parameters will be set in the orterun process
in the following cases:

- The new command line switch "--gmca" (or "-gmca") is used,
  indicating that the MCA parameter is "global".  --gmca also means
  that that MCA parameter will be applied to all context app's.  For
  example:

      mpirun -gmca foo bar -np 1 hello : -np 2 goodbye

  The foo MCA param will be set in both the hello and goodbye
  processes.

- If there is only one context app.  For example:

      mpirun -mca pls_rsh_debug 1 -np 4 hello

  will set pls_rsh_debug to 1 in both the orterun process and the 4
  spawned hello processes.

Also added a few more comments inside orterun to document a somewhat
confusing use of a state variable in a recursive case.

This commit was SVN r6764.
Этот коммит содержится в:
Jeff Squyres 2005-08-08 16:42:28 +00:00
родитель 473720c22f
Коммит 32e71e5c6c
4 изменённых файлов: 208 добавлений и 82 удалений
ompi/tools/ompi_info
opal/mca/base
orte/tools/orterun

Просмотреть файл

@ -71,7 +71,7 @@ int main(int argc, char *argv[])
bool cmd_error = false;
bool acted = false;
bool want_all = false;
char **env = NULL;
char **app_env = NULL, **global_env = NULL;
int i, len;
// Initialize the argv parsing handle
@ -145,16 +145,20 @@ int main(int argc, char *argv[])
exit(cmd_error ? 1 : 0);
}
mca_base_cmd_line_process_args(cmd_line, &env);
mca_base_cmd_line_process_args(cmd_line, &app_env, &global_env);
// putenv() all the stuff that we got back from env (in case the
// user specified some --mca params on the command line). This
// creates a memory leak, but that's unfortunately how putenv()
// works. :-(
len = opal_argv_count(env);
len = opal_argv_count(app_env);
for (i = 0; i < len; ++i) {
putenv(env[i]);
putenv(app_env[i]);
}
len = opal_argv_count(global_env);
for (i = 0; i < len; ++i) {
putenv(global_env[i]);
}
ompi_info::mca_types.push_back("mca");
@ -233,8 +237,11 @@ int main(int argc, char *argv[])
// All done
if (NULL != env) {
opal_argv_free(env);
if (NULL != app_env) {
opal_argv_free(app_env);
}
if (NULL != global_env) {
opal_argv_free(global_env);
}
ompi_info::close_components();
OBJ_RELEASE(cmd_line);

Просмотреть файл

@ -105,8 +105,8 @@ OMPI_DECLSPEC int mca_base_close(void);
OMPI_DECLSPEC int mca_base_cmd_line_setup(opal_cmd_line_t *cmd);
OMPI_DECLSPEC int mca_base_cmd_line_process_args(opal_cmd_line_t *cmd,
char ***env);
OMPI_DECLSPEC int mca_base_cmd_line_process_arg(const char *param, const char *value);
char ***app_env,
char ***global_env);
/* mca_base_component_compare.c */

Просмотреть файл

@ -29,10 +29,13 @@
/*
* Private variables
*/
static int mca_param_argc = 0;
static char **mca_param_argv = NULL;
static int mca_value_argc = 0;
static char **mca_value_argv = NULL;
/*
* Private functions
*/
static int process_arg(const char *param, const char *value,
char ***params, char ***values);
static void add_to_env(char **params, char **values, char ***env);
/*
@ -40,8 +43,16 @@ static char **mca_value_argv = NULL;
*/
int mca_base_cmd_line_setup(opal_cmd_line_t *cmd)
{
return opal_cmd_line_make_opt3(cmd, '\0', "mca", "mca", 2,
"Pass MCA parameters (arg0 is the parameter name; arg1 is the parameter value)");
int ret;
ret = opal_cmd_line_make_opt3(cmd, '\0', "mca", "mca", 2,
"Pass context-specific MCA parameters; they are considered global if --gmca is not used and only one context is specified (arg0 is the parameter name; arg1 is the parameter value)");
if (OMPI_SUCCESS != ret) {
return ret;
}
ret = opal_cmd_line_make_opt3(cmd, '\0', "gmca", "gmca", 2,
"Pass global MCA parameters that are applicable to all contexts (arg0 is the parameter name; arg1 is the parameter value)");
return ret;
}
@ -49,83 +60,99 @@ int mca_base_cmd_line_setup(opal_cmd_line_t *cmd)
* Look for and handle any -mca options on the command line
*/
int mca_base_cmd_line_process_args(opal_cmd_line_t *cmd,
char ***env)
char ***context_env, char ***global_env)
{
int i, num_insts;
char *name;
char **params;
char **values;
/* First, wipe out any previous results */
/* If no relevant parameters were given, just return */
if (mca_param_argc > 0) {
opal_argv_free(mca_param_argv);
opal_argv_free(mca_value_argv);
mca_param_argv = mca_value_argv = NULL;
mca_param_argc = mca_value_argc = 0;
}
/* If no "-mca" parameters were given, just return */
if (!opal_cmd_line_is_taken(cmd, "mca")) {
if (!opal_cmd_line_is_taken(cmd, "mca") &&
!opal_cmd_line_is_taken(cmd, "gmca")) {
return OMPI_SUCCESS;
}
/* Otherwise, assemble them into an argc/argv */
/* Handle app context-specific parameters */
num_insts = opal_cmd_line_get_ninsts(cmd, "mca");
params = values = NULL;
for (i = 0; i < num_insts; ++i) {
mca_base_cmd_line_process_arg(opal_cmd_line_get_param(cmd, "mca", i, 0),
opal_cmd_line_get_param(cmd, "mca", i, 1));
process_arg(opal_cmd_line_get_param(cmd, "mca", i, 0),
opal_cmd_line_get_param(cmd, "mca", i, 1),
&params, &values);
}
if (NULL != params) {
add_to_env(params, values, context_env);
opal_argv_free(params);
opal_argv_free(values);
}
/* Now put that argc/argv in the environment */
/* Handle global parameters */
if (NULL == mca_param_argv) {
return OMPI_SUCCESS;
num_insts = opal_cmd_line_get_ninsts(cmd, "gmca");
params = values = NULL;
for (i = 0; i < num_insts; ++i) {
process_arg(opal_cmd_line_get_param(cmd, "gmca", i, 0),
opal_cmd_line_get_param(cmd, "gmca", i, 1),
&params, &values);
}
if (NULL != params) {
add_to_env(params, values, global_env);
opal_argv_free(params);
opal_argv_free(values);
}
/* Loop through all the -mca args that we've gotten and make env
vars of the form OMPI_MCA_*=value. This is a memory leak, but
that's how putenv works. :-( */
for (i = 0; NULL != mca_param_argv[i]; ++i) {
name = mca_base_param_environ_variable(mca_param_argv[i], NULL, NULL);
opal_setenv(name, mca_value_argv[i], true, env);
free(name);
}
/* All done */
return OMPI_SUCCESS;
}
/*
* Process a single MCA argument. Done as a separate function so that
* top-level applications can directly invoke this to effect MCA
* command line arguments.
* Process a single MCA argument.
*/
int mca_base_cmd_line_process_arg(const char *param, const char *value)
int static process_arg(const char *param, const char *value,
char ***params, char ***values)
{
int i;
char *new_str;
int i;
char *new_str;
/* Look to see if we've already got an -mca argument for the same
param. Check against the list of MCA param's that we've already
saved arguments for. */
/* Look to see if we've already got an -mca argument for the same
param. Check against the list of MCA param's that we've
already saved arguments for. */
for (i = 0; NULL != mca_param_argv && NULL != mca_param_argv[i]; ++i) {
if (0 == strcmp(param, mca_param_argv[i])) {
asprintf(&new_str, "%s,%s", mca_value_argv[i], value);
free(mca_value_argv[i]);
mca_value_argv[i] = new_str;
return OMPI_SUCCESS;
for (i = 0; NULL != *params && NULL != (*params)[i]; ++i) {
if (0 == strcmp(param, (*params)[i])) {
asprintf(&new_str, "%s,%s", (*values)[i], value);
free((*values)[i]);
(*values)[i] = new_str;
return OMPI_SUCCESS;
}
}
}
/* If we didn't already have an value for the same param, save this
one away */
/* If we didn't already have an value for the same param, save
this one away */
opal_argv_append(&mca_param_argc, &mca_param_argv, param);
opal_argv_append(&mca_value_argc, &mca_value_argv, value);
opal_argv_append_nosize(params, param);
opal_argv_append_nosize(values, value);
return OMPI_SUCCESS;
return OMPI_SUCCESS;
}
static void add_to_env(char **params, char **values, char ***env)
{
int i;
char *name;
/* Loop through all the args that we've gotten and make env
vars of the form OMPI_MCA_*=value. */
for (i = 0; NULL != params && NULL != params[i]; ++i) {
name = mca_base_param_environ_variable(params[i], NULL, NULL);
opal_setenv(name, values[i], true, env);
free(name);
}
}

Просмотреть файл

@ -76,6 +76,7 @@ static char *orterun_basename = NULL;
static int max_display_aborted = 1;
static int num_aborted = 0;
static int num_killed = 0;
static char **global_mca_env = NULL;
/*
* setup globals for catching orterun command line options
@ -211,7 +212,7 @@ opal_cmd_line_init_t cmd_line_init[] = {
static void exit_callback(int fd, short event, void *arg);
static void signal_callback(int fd, short flags, void *arg);
static int create_app(int argc, char* argv[], orte_app_context_t **app,
bool *made_app, char ***env);
bool *made_app, char ***app_env);
static int init_globals(void);
static int parse_globals(int argc, char* argv[]);
static int parse_locals(int argc, char* argv[]);
@ -252,7 +253,7 @@ int main(int argc, char *argv[])
for (j = i = 0; i < array_size; ++i) {
apps[num_apps] = (orte_app_context_t *)
orte_pointer_array_get_item(apps_pa, i);
if(NULL != apps[num_apps]) {
if (NULL != apps[num_apps]) {
j += apps[num_apps]->num_procs;
num_apps++;
}
@ -284,8 +285,7 @@ int main(int argc, char *argv[])
id = mca_base_param_reg_int_name("orte_base", "infrastructure",
"Whether we are ORTE infrastructure or an ORTE application",
false, false, (int)false, NULL);
mca_base_param_set_int(id, (int)true);
false, false, (int)true, NULL);
/* now call orte_init and setup the RTE */
if (ORTE_SUCCESS != (rc = orte_init())) {
@ -645,7 +645,7 @@ static int init_globals(void)
static int parse_globals(int argc, char* argv[])
{
opal_cmd_line_t cmd_line;
int ras, ret;
int id, ret;
/* Setup and parse the command line */
@ -674,14 +674,14 @@ static int parse_globals(int argc, char* argv[])
MCA param. */
/* JMS To be changed post-beta to LAM's C/N command line notation */
ras = mca_base_param_register_string("ras", "base", "schedule_policy",
id = mca_base_param_register_string("ras", "base", "schedule_policy",
NULL, "slot");
if (orterun_globals.by_node) {
orterun_globals.by_slot = false;
mca_base_param_set_string(ras, "node");
mca_base_param_set_string(id, "node");
} else {
orterun_globals.by_slot = true;
mca_base_param_set_string(ras, "slot");
mca_base_param_set_string(id, "slot");
}
/* If we don't want to wait, we don't want to wait */
@ -699,10 +699,10 @@ static int parse_locals(int argc, char* argv[])
{
int i, rc, app_num;
int temp_argc;
char **temp_argv;
char **temp_argv, **env;
orte_app_context_t *app;
bool made_app;
char **env;
size_t j, size1;
/* Make the apps */
@ -711,6 +711,10 @@ static int parse_locals(int argc, char* argv[])
opal_argv_append(&temp_argc, &temp_argv, argv[0]);
orte_pointer_array_init(&apps_pa, 1, argc + 1, 2);
/* NOTE: This bogus env variable is necessary in the calls to
create_app(), below. See comment immediately before the
create_app() function for an explanation. */
env = NULL;
for (app_num = 0, i = 1; i < argc; ++i) {
if (0 == strcmp(argv[i], ":")) {
@ -769,14 +773,95 @@ static int parse_locals(int argc, char* argv[])
}
opal_argv_free(temp_argv);
/* Once we've created all the apps, add the global MCA params to
each app's environment (checking for duplicates, of
course -- yay opal_environ_merge()). */
if (NULL != global_mca_env) {
size1 = orte_pointer_array_get_size(apps_pa);
/* Iterate through all the apps */
for (j = 0; j < size1; ++j) {
app = (orte_app_context_t *)
orte_pointer_array_get_item(apps_pa, j);
if (NULL != app) {
/* Use handy utility function */
env = opal_environ_merge(global_mca_env, app->env);
opal_argv_free(app->env);
app->env = env;
app->num_env = opal_argv_count(app->env);
}
}
}
/* Now take a subset of the MCA params and set them as MCA
overrides here in orterun (so that when we orte_init() later,
all the components see these MCA params). Here's how we decide
which subset of the MCA params we set here in orterun:
1. If any global MCA params were set, use those
2. If no global MCA params were set and there was only one app,
then use its app MCA params
3. Otherwise, don't set any
*/
env = NULL;
if (NULL != global_mca_env) {
env = global_mca_env;
} else {
if (orte_pointer_array_get_size(apps_pa) >= 1) {
/* Remember that pointer_array's can be padded with NULL
entries; so only use the app's env if there is exactly
1 non-NULL entry */
app = (orte_app_context_t *)
orte_pointer_array_get_item(apps_pa, 0);
if (NULL != app) {
env = app->env;
for (j = 1; j < orte_pointer_array_get_size(apps_pa); ++j) {
if (NULL != orte_pointer_array_get_item(apps_pa, j)) {
env = NULL;
break;
}
}
}
}
}
if (NULL != env) {
size1 = opal_argv_count(env);
for (j = 0; j < size1; ++j) {
putenv(env[j]);
}
}
/* All done */
return ORTE_SUCCESS;
}
/*
* This function takes a "char ***app_env" parameter to handle the
* specific case:
*
* orterun --mca foo bar -app appfile
*
* That is, we'll need to keep foo=bar, but the presence of the app
* file will cause an invocation of parse_appfile(), which will cause
* one or more recursive calls back to create_app(). Since the
* foo=bar value applies globally to all apps in the appfile, we need
* to pass in the "base" environment (that contains the foo=bar value)
* when we parse each line in the appfile.
*
* This is really just a special case -- when we have a simple case like:
*
* orterun --mca foo bar -np 4 hostname
*
* Then the upper-level function (parse_locals()) calls create_app()
* with a NULL value for app_env, meaning that there is no "base"
* environment that the app needs to be created from.
*/
static int create_app(int argc, char* argv[], orte_app_context_t **app_ptr,
bool *made_app, char ***env)
bool *made_app, char ***app_env)
{
opal_cmd_line_t cmd_line;
char cwd[OMPI_PATH_MAX];
@ -881,13 +966,13 @@ static int create_app(int argc, char* argv[], orte_app_context_t **app_ptr,
if (OMPI_SUCCESS != rc) {
goto cleanup;
}
mca_base_cmd_line_process_args(&cmd_line, env);
mca_base_cmd_line_process_args(&cmd_line, app_env, &global_mca_env);
/* Is there an appfile in here? */
if (NULL != orterun_globals.appfile) {
OBJ_DESTRUCT(&cmd_line);
return parse_appfile(strdup(orterun_globals.appfile), env);
return parse_appfile(strdup(orterun_globals.appfile), app_env);
}
/* Setup application context */
@ -906,8 +991,7 @@ static int create_app(int argc, char* argv[], orte_app_context_t **app_ptr,
/* Grab all OMPI_* environment variables */
app->env = opal_argv_copy(*env);
app->num_env = opal_argv_count(*env);
app->env = opal_argv_copy(*app_env);
for (i = 0; NULL != environ[i]; ++i) {
if (0 == strncmp("OMPI_", environ[i], 5)) {
opal_argv_append_nosize(&app->env, environ[i]);
@ -1118,9 +1202,17 @@ static int parse_appfile(char *filename, char ***env)
argc = opal_argv_count(argv);
if (argc > 0) {
/* Create a temporary env to play with in the recursive
call -- that is: don't disturb the original env so that
we can have a consistent global env */
/* Create a temporary env to use in the recursive call --
that is: don't disturb the original env so that we can
have a consistent global env. This allows for the
case:
orterun --mca foo bar --appfile file
where the "file" contains multiple apps. In this case,
each app in "file" will get *only* foo=bar as the base
environment from which its specific environment is
constructed. */
if (NULL != *env) {
tmp_env = opal_argv_copy(*env);