From 515b99b3575d313e6e97f02590f038080209e08c Mon Sep 17 00:00:00 2001 From: Rolf vandeVaart Date: Wed, 18 Feb 2009 18:02:38 +0000 Subject: [PATCH] Under SGE, the orted should not daemonize by default. Also create mca parameter to force daemonization (previous behavior) which might be needed on larger clusters or to make use of the -notify flag with qsub. This fixes trac:1783. This commit was SVN r20582. The following Trac tickets were found above: Ticket 1783 --> https://svn.open-mpi.org/trac/ompi/ticket/1783 --- orte/mca/plm/rsh/plm_rsh.h | 2 ++ orte/mca/plm/rsh/plm_rsh_component.c | 9 ++++++++- orte/mca/plm/rsh/plm_rsh_module.c | 8 ++++++-- orte/mca/ras/gridengine/ras_gridengine_module.c | 5 ++++- 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/orte/mca/plm/rsh/plm_rsh.h b/orte/mca/plm/rsh/plm_rsh.h index 685dcabae3..e8bf6d7049 100644 --- a/orte/mca/plm/rsh/plm_rsh.h +++ b/orte/mca/plm/rsh/plm_rsh.h @@ -67,6 +67,8 @@ struct orte_plm_rsh_component_t { bool assume_same_shell; bool force_rsh; bool disable_qrsh; + bool using_qrsh; + bool daemonize_qrsh; int delay; int priority; bool tree_spawn; diff --git a/orte/mca/plm/rsh/plm_rsh_component.c b/orte/mca/plm/rsh/plm_rsh_component.c index c896dcaf8a..0fb565dfb1 100644 --- a/orte/mca/plm/rsh/plm_rsh_component.c +++ b/orte/mca/plm/rsh/plm_rsh_component.c @@ -101,6 +101,7 @@ int orte_plm_rsh_component_open(void) OBJ_CONSTRUCT(&mca_plm_rsh_component.cond, opal_condition_t); mca_plm_rsh_component.num_children = 0; OBJ_CONSTRUCT(&mca_plm_rsh_component.children, opal_list_t); + mca_plm_rsh_component.using_qrsh = false; /* lookup parameters */ mca_base_param_reg_int(c, "num_concurrent", @@ -120,7 +121,12 @@ int orte_plm_rsh_component_open(void) mca_base_param_reg_int(c, "disable_qrsh", "Disable the launcher to use qrsh when under the SGE parallel environment", false, false, false, &tmp); - mca_plm_rsh_component.disable_qrsh = OPAL_INT_TO_BOOL(tmp); + mca_plm_rsh_component.disable_qrsh = OPAL_INT_TO_BOOL(tmp); + + mca_base_param_reg_int(c, "daemonize_qrsh", + "Daemonize the orted under the SGE parallel environment", + false, false, false, &tmp); + mca_plm_rsh_component.daemonize_qrsh = OPAL_INT_TO_BOOL(tmp); mca_base_param_reg_int(c, "priority", "Priority of the rsh plm component", @@ -184,6 +190,7 @@ int orte_plm_rsh_component_query(mca_base_module_t **module, int *priority) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp); free(tmp); } + mca_plm_rsh_component.using_qrsh = true; *priority = mca_plm_rsh_component.priority; *module = (mca_base_module_t *) &orte_plm_rsh_module; return ORTE_SUCCESS; diff --git a/orte/mca/plm/rsh/plm_rsh_module.c b/orte/mca/plm/rsh/plm_rsh_module.c index f77ae406a9..6e6ddbebe1 100644 --- a/orte/mca/plm/rsh/plm_rsh_module.c +++ b/orte/mca/plm/rsh/plm_rsh_module.c @@ -12,7 +12,7 @@ * Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -651,7 +651,11 @@ static int setup_launch(int *argcptr, char ***argvptr, !orte_debug_flag && !orte_debug_daemons_flag && !orte_debug_daemons_file_flag && - !orte_leave_session_attached) { + !orte_leave_session_attached && + /* Daemonize when not using qrsh. Or, if using qrsh, only + * daemonize if told to by user with daemonize_qrsh flag. */ + ((!mca_plm_rsh_component.using_qrsh) || + (mca_plm_rsh_component.using_qrsh && mca_plm_rsh_component.daemonize_qrsh))) { opal_argv_append(&argc, &argv, "--daemonize"); } diff --git a/orte/mca/ras/gridengine/ras_gridengine_module.c b/orte/mca/ras/gridengine/ras_gridengine_module.c index 234299bad7..25392dd25f 100644 --- a/orte/mca/ras/gridengine/ras_gridengine_module.c +++ b/orte/mca/ras/gridengine/ras_gridengine_module.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2007 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2006-2009 Sun Microsystems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -84,6 +84,9 @@ static int orte_ras_gridengine_allocate(opal_list_t *nodelist) /* parse the pe_hostfile for hostname, slots, etc, then compare the * current node with a list of hosts in the nodelist, if the current * node is not found in nodelist, add it in */ + opal_output(mca_ras_gridengine_component.verbose, + "ras:gridengine: PE_HOSTFILE: %s", pe_hostfile); + while (fgets(buf, sizeof(buf), fp)) { ptr = strtok_r(buf, " \n", &tok); num = strtok_r(NULL, " \n", &tok);