1
1

Cleaner rendition now extended to other environments. Remove MCA params for backend procs that can cause trouble. Specifically, any directives on the selection of components for RDS, RAS, RMAPS, PLS, and RMGR can be bad mojo on the backend.

This patch will cause a problem for cnos, however, as there we want to specifically tell the backends to be "null". I'm working on that issue.

This commit was SVN r12225.
Этот коммит содержится в:
Ralph Castain 2006-10-20 16:50:13 +00:00
родитель 02efd07b60
Коммит c07d4e2510
14 изменённых файлов: 179 добавлений и 207 удалений

Просмотреть файл

@ -24,7 +24,6 @@ libmca_odls_la_SOURCES += \
base/odls_base_close.c \
base/odls_base_open.c \
base/odls_base_select.c \
base/odls_base_support_fns.c \
base/data_type_support/odls_compare_fns.c \
base/data_type_support/odls_copy_fns.c \
base/data_type_support/odls_packing_fns.c \

Просмотреть файл

@ -1,61 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/util/opal_environ.h"
#include "orte/mca/odls/base/odls_private.h"
int orte_odls_base_purge_environment(char ***environ)
{
char *param;
if(NULL == (param = mca_base_param_environ_variable("rds",NULL,NULL))) {
return ORTE_ERR_OUT_OF_RESOURCE;
}
opal_unsetenv(param, environ);
free(param);
if(NULL == (param = mca_base_param_environ_variable("ras",NULL,NULL))) {
return ORTE_ERR_OUT_OF_RESOURCE;
}
opal_unsetenv(param, environ);
free(param);
if(NULL == (param = mca_base_param_environ_variable("rmaps",NULL,NULL))) {
return ORTE_ERR_OUT_OF_RESOURCE;
}
opal_unsetenv(param, environ);
free(param);
if(NULL == (param = mca_base_param_environ_variable("pls",NULL,NULL))) {
return ORTE_ERR_OUT_OF_RESOURCE;
}
opal_unsetenv(param, environ);
free(param);
if(NULL == (param = mca_base_param_environ_variable("rmgr",NULL,NULL))) {
return ORTE_ERR_OUT_OF_RESOURCE;
}
opal_unsetenv(param, environ);
free(param);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -461,12 +461,6 @@ static int orte_odls_process_fork_local_proc(
} else {
environ_copy = opal_argv_copy(base_environ);
}
/* purge any disallowed component directives */
if (ORTE_SUCCESS != orte_odls_base_purge_environment(&environ_copy)) {
/* Tell the parent that Badness happened */
return ORTE_ERR_FATAL;
}
/* special case handling for --prefix: this is somewhat icky,
but at least some users do this. :-\ It is possible that

Просмотреть файл

@ -21,6 +21,7 @@
#include "orte/orte_constants.h"
#include "opal/util/argv.h"
#include "opal/util/opal_environ.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/mca/pls/base/pls_private.h"
@ -55,3 +56,23 @@ int orte_pls_base_mca_argv(int *argc, char ***argv)
return ORTE_SUCCESS;
}
void orte_pls_base_purge_mca_params(char ***env)
{
char *var;
var = mca_base_param_environ_variable("rds",NULL,NULL);
opal_setenv(var, "proxy", true, env);
free(var);
var = mca_base_param_environ_variable("ras",NULL,NULL);
opal_setenv(var, "proxy", true, env);
free(var);
var = mca_base_param_environ_variable("rmaps",NULL,NULL);
opal_setenv(var, "proxy", true, env);
free(var);
var = mca_base_param_environ_variable("pls",NULL,NULL);
opal_setenv(var, "proxy", true, env);
free(var);
var = mca_base_param_environ_variable("rmgr",NULL,NULL);
opal_setenv(var, "proxy", true, env);
free(var);
}

Просмотреть файл

@ -96,6 +96,7 @@ extern "C" {
* general utilities
*/
int orte_pls_base_mca_argv(int *argc, char ***argv);
void orte_pls_base_purge_mca_params(char ***env);
#if defined(c_plusplus) || defined(__cplusplus)
}

Просмотреть файл

@ -514,6 +514,11 @@ static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp,
OPAL_TRACE(1);
/* clean out any MCA component selection directives that
* won't work on remote nodes
*/
orte_pls_base_purge_mca_params(envp);
/* setup a list that will contain the info for all the daemons
* so we can store it on the registry when done
*/

Просмотреть файл

@ -203,6 +203,7 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
char *prefix_dir;
char *uri, *param;
char **argv;
char **env;
int argc;
int rc;
sigset_t sigs;
@ -333,7 +334,19 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
free(uri);
free(param);
if (mca_pls_gridengine_component.debug) {
/* setup environment. The environment is common to all the daemons
* so we only need to do this once
*/
env = opal_argv_copy(environ);
param = mca_base_param_environ_variable("seed",NULL,NULL);
opal_setenv(param, "0", true, &env);
/* clean out any MCA component selection directives that
* won't work on remote nodes
*/
orte_pls_base_purge_mca_params(&env);
if (mca_pls_gridengine_component.debug) {
param = opal_argv_join(argv, ' ');
if (NULL != param) {
opal_output(0, "pls:gridengine: final template argv:");
@ -356,7 +369,45 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
*/
prefix_dir = map->apps[0]->prefix_dir;
/*
/* If we have a prefix, then modify the PATH and
LD_LIBRARY_PATH environment variables.
*/
if (NULL != prefix_dir) {
char *oldenv, *newenv;
/* Reset PATH */
newenv = opal_os_path( false, prefix_dir, bin_base, NULL );
oldenv = getenv("PATH");
if (NULL != oldenv) {
char *temp;
asprintf(&temp, "%s:%s", newenv, oldenv);
free( newenv );
newenv = temp;
}
opal_setenv("PATH", newenv, true, &env);
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: reset PATH: %s", newenv);
}
free(newenv);
/* Reset LD_LIBRARY_PATH */
newenv = opal_os_path( false, prefix_dir, lib_base, NULL );
oldenv = getenv("LD_LIBRARY_PATH");
if (NULL != oldenv) {
char* temp;
asprintf(&temp, "%s:%s", newenv, oldenv);
free(newenv);
newenv = temp;
}
opal_setenv("LD_LIBRARY_PATH", newenv, true, &env);
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: reset LD_LIBRARY_PATH: %s",
newenv);
}
free(newenv);
}
/*
* Iterate through the nodes.
*/
for(n_item = opal_list_get_first(&map->nodes);
@ -442,7 +493,6 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
/* child */
if (pid == 0) {
char* name_string;
char** env;
char* var;
long fd, fdmax = sysconf(_SC_OPEN_MAX);
@ -494,45 +544,6 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
opal_output(0, "pls:gridengine: orted_path=%s", orted_path);
}
/* If we have a prefix, then modify the PATH and
LD_LIBRARY_PATH environment variables. We're
already in the child process, so it's ok to modify
environ. */
if (NULL != prefix_dir) {
char *oldenv, *newenv;
/* Reset PATH */
newenv = opal_os_path( false, prefix_dir, bin_base, NULL );
oldenv = getenv("PATH");
if (NULL != oldenv) {
char *temp;
asprintf(&temp, "%s:%s", newenv, oldenv);
free( newenv );
newenv = temp;
}
opal_setenv("PATH", newenv, true, &environ);
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: reset PATH: %s", newenv);
}
free(newenv);
/* Reset LD_LIBRARY_PATH */
newenv = opal_os_path( false, prefix_dir, lib_base, NULL );
oldenv = getenv("LD_LIBRARY_PATH");
if (NULL != oldenv) {
char* temp;
asprintf(&temp, "%s:%s", newenv, oldenv);
free(newenv);
newenv = temp;
}
opal_setenv("LD_LIBRARY_PATH", newenv, true, &environ);
if (mca_pls_gridengine_component.debug) {
opal_output(0, "pls:gridengine: reset LD_LIBRARY_PATH: %s",
newenv);
}
free(newenv);
}
var = getenv("HOME");
if (NULL != var) {
if (mca_pls_gridengine_component.debug) {
@ -592,11 +603,6 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
sigprocmask(SIG_UNBLOCK, &sigs, 0);
#endif
/* setup environment */
env = opal_argv_copy(environ);
var = mca_base_param_environ_variable("seed",NULL,NULL);
opal_setenv(var, "0", true, &env);
/* exec the daemon */
if (mca_pls_gridengine_component.debug) {
param = opal_argv_join(exec_argv, ' ');
@ -641,6 +647,7 @@ int orte_pls_gridengine_launch_job(orte_jobid_t jobid)
free(jobid_string); /* done with this variable */
opal_argv_free(argv);
opal_argv_free(env);
return rc;
}

Просмотреть файл

@ -387,6 +387,7 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
char *uri, *param;
char **argv, **tmp;
char *prefix_dir;
char **env;
int argc;
int rc;
sigset_t sigs;
@ -635,6 +636,60 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
lib_base = opal_basename(OPAL_LIBDIR);
bin_base = opal_basename(OPAL_BINDIR);
/* copy the environment so we can modify it with opal functions. The
* environment is the same for all daemons, so we only need to do
* this once
*/
env = opal_argv_copy(environ);
/* If we have a prefix, then modify the PATH and
LD_LIBRARY_PATH environment variables
*/
if (NULL != prefix_dir) {
char *oldenv, *newenv;
/* Reset PATH */
newenv = opal_os_path( false, prefix_dir, bin_base, NULL );
oldenv = getenv("PATH");
if (NULL != oldenv) {
char *temp;
asprintf(&temp, "%s:%s", newenv, oldenv );
free( newenv );
newenv = temp;
}
opal_setenv("PATH", newenv, true, &env);
if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: reset PATH: %s", newenv);
}
free(newenv);
/* Reset LD_LIBRARY_PATH */
newenv = opal_os_path( false, prefix_dir, lib_base, NULL );
oldenv = getenv("LD_LIBRARY_PATH");
if (NULL != oldenv) {
char* temp;
asprintf(&temp, "%s:%s", newenv, oldenv);
free(newenv);
newenv = temp;
}
opal_setenv("LD_LIBRARY_PATH", newenv, true, &env);
if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: reset LD_LIBRARY_PATH: %s",
newenv);
}
free(newenv);
}
/* ensure we aren't the seed */
param = mca_base_param_environ_variable("seed",NULL,NULL);
opal_setenv(param, "0", true, &env);
free(param);
/* clean out any MCA component selection directives that
* won't work on remote nodes
*/
orte_pls_base_purge_mca_params(&env);
/*
* Iterate through each of the nodes
*/
@ -700,7 +755,6 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
/* child */
if (pid == 0) {
char* name_string;
char** env;
char* var;
long fd, fdmax = sysconf(_SC_OPEN_MAX);
@ -709,9 +763,6 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
rmaps_node->nodename);
}
/* copy the environment so we can modify it with opal functions */
env = opal_argv_copy(environ);
/* We don't need to sense an oversubscribed condition and set the sched_yield
* for the node as we are only launching the daemons at this time. The daemons
* are now smart enough to set the oversubscribed condition themselves when
@ -754,44 +805,6 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
}
/* If we have a prefix, then modify the PATH and
LD_LIBRARY_PATH environment variables
*/
if (NULL != prefix_dir) {
char *oldenv, *newenv;
/* Reset PATH */
newenv = opal_os_path( false, prefix_dir, bin_base, NULL );
oldenv = getenv("PATH");
if (NULL != oldenv) {
char *temp;
asprintf(&temp, "%s:%s", newenv, oldenv );
free( newenv );
newenv = temp;
}
opal_setenv("PATH", newenv, true, &env);
if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: reset PATH: %s", newenv);
}
free(newenv);
/* Reset LD_LIBRARY_PATH */
newenv = opal_os_path( false, prefix_dir, lib_base, NULL );
oldenv = getenv("LD_LIBRARY_PATH");
if (NULL != oldenv) {
char* temp;
asprintf(&temp, "%s:%s", newenv, oldenv);
free(newenv);
newenv = temp;
}
opal_setenv("LD_LIBRARY_PATH", newenv, true, &env);
if (mca_pls_rsh_component.debug) {
opal_output(0, "pls:rsh: reset LD_LIBRARY_PATH: %s",
newenv);
}
free(newenv);
}
/* Since this is a local execution, we need to
potentially whack the final ")" in the argv (if
sh/csh conditionals, from above). Note that we're
@ -913,30 +926,6 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
sigprocmask(0, 0, &sigs);
sigprocmask(SIG_UNBLOCK, &sigs, 0);
/* setup environment */
var = mca_base_param_environ_variable("seed",NULL,NULL);
opal_setenv(var, "0", true, &env);
free(var);
/* clean out any MCA component selection directives that
* won't work on remote nodes
*/
var = mca_base_param_environ_variable("rds",NULL,NULL);
opal_setenv(var, "0", true, &env);
free(var);
var = mca_base_param_environ_variable("ras",NULL,NULL);
opal_setenv(var, "0", true, &env);
free(var);
var = mca_base_param_environ_variable("rmaps",NULL,NULL);
opal_setenv(var, "0", true, &env);
free(var);
var = mca_base_param_environ_variable("pls",NULL,NULL);
opal_setenv(var, "0", true, &env);
free(var);
var = mca_base_param_environ_variable("rmgr",NULL,NULL);
opal_setenv(var, "0", true, &env);
free(var);
/* exec the daemon */
if (mca_pls_rsh_component.debug) {
param = opal_argv_join(exec_argv, ' ');
@ -945,6 +934,7 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
free(param);
}
}
execve(exec_path, exec_argv, env);
opal_output(0, "pls:rsh: execv failed with errno=%d\n", errno);
exit(-1);
@ -999,6 +989,7 @@ cleanup:
free(jobid_string); /* done with this variable */
opal_argv_free(argv);
opal_argv_free(env);
return rc;
}

Просмотреть файл

@ -367,6 +367,11 @@ static int pls_slurm_launch_job(orte_jobid_t jobid)
var = mca_base_param_environ_variable("seed", NULL, NULL);
opal_setenv(var, "0", true, &env);
/* clean out any MCA component selection directives that
* won't work on remote nodes
*/
orte_pls_base_purge_mca_params(&env);
/* exec the daemon */
rc = pls_slurm_start_proc(argc, argv, env, cur_prefix);
if (ORTE_SUCCESS != rc) {

Просмотреть файл

@ -284,6 +284,11 @@ static int pls_tm_launch_job(orte_jobid_t jobid)
var = mca_base_param_environ_variable("seed",NULL,NULL);
opal_setenv(var, "0", true, &env);
/* clean out any MCA component selection directives that
* won't work on remote nodes
*/
orte_pls_base_purge_mca_params(&env);
/* If we have a prefix, then modify the PATH and
LD_LIBRARY_PATH environment variables. We only allow
a single prefix to be specified. Since there will

Просмотреть файл

@ -39,6 +39,8 @@
#include "opal/util/show_help.h"
#include "opal/util/basename.h"
#include "opal/util/path.h"
#include "opal/util/opal_environ.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/mca/errmgr/errmgr.h"
@ -172,3 +174,27 @@ int orte_rmgr_base_check_context_app(orte_app_context_t *context)
return ORTE_SUCCESS;
}
void orte_rmgr_base_purge_mca_params(char ***env)
{
char *param;
param = mca_base_param_environ_variable("rds",NULL,NULL);
opal_setenv(param, "proxy", true, env);
free(param);
param = mca_base_param_environ_variable("ras",NULL,NULL);
opal_setenv(param, "proxy", true, env);
free(param);
param = mca_base_param_environ_variable("rmaps",NULL,NULL);
opal_setenv(param, "proxy", true, env);
free(param);
param = mca_base_param_environ_variable("pls",NULL,NULL);
opal_setenv(param, "proxy", true, env);
free(param);
param = mca_base_param_environ_variable("rmgr",NULL,NULL);
opal_setenv(param, "proxy", true, env);
free(param);
}

Просмотреть файл

@ -125,6 +125,8 @@ int orte_rmgr_base_finalize_not_available(void);
/*
* Support functions
*/
void orte_rmgr_base_purge_mca_params(char ***env);
ORTE_DECLSPEC int orte_rmgr_base_proc_stage_gate_init(orte_jobid_t job);
ORTE_DECLSPEC int orte_rmgr_base_proc_stage_gate_mgr(orte_gpr_notify_message_t *msg);

Просмотреть файл

@ -116,7 +116,6 @@ static int orte_rmgr_urm_setup_job(
orte_jobid_t* jobid)
{
int rc;
char *param;
orte_std_cntr_t i;
OPAL_TRACE(1);
@ -130,31 +129,9 @@ static int orte_rmgr_urm_setup_job(
/* for each app_context, we need to purge their environment of HNP
* MCA component selection directives
*/
param = mca_base_param_environ_variable("rds",NULL,NULL);
for (i=0; i < num_context; i++) {
opal_setenv(param, "proxy", true, &app_context[i]->env);
orte_rmgr_base_purge_mca_params(&app_context[i]->env);
}
free(param);
param = mca_base_param_environ_variable("ras",NULL,NULL);
for (i=0; i < num_context; i++) {
opal_setenv(param, "proxy", true, &app_context[i]->env);
}
free(param);
param = mca_base_param_environ_variable("rmaps",NULL,NULL);
for (i=0; i < num_context; i++) {
opal_setenv(param, "proxy", true, &app_context[i]->env);
}
free(param);
param = mca_base_param_environ_variable("pls",NULL,NULL);
for (i=0; i < num_context; i++) {
opal_setenv(param, "proxy", true, &app_context[i]->env);
}
free(param);
param = mca_base_param_environ_variable("rmgr",NULL,NULL);
for (i=0; i < num_context; i++) {
opal_setenv(param, "proxy", true, &app_context[i]->env);
}
free(param);
/* create and initialize job segment */ /* JJH C/N mapping before this */
if (ORTE_SUCCESS !=