From 93d61d01fb02169db204c561a2da816fba73bd13 Mon Sep 17 00:00:00 2001 From: Tim Prins Date: Tue, 24 Oct 2006 01:41:28 +0000 Subject: [PATCH] Fix for a problem on SLURM we have neen having since r12243 where mpirun would hang after the process had finished. It turns out that we were always reporting the name of the daemon wrong, but we simply never noticed as we never used it, until r12243. This makes it so we report the name of the daemon correctly. This commit was SVN r12274. The following SVN revision numbers were found above: r12243 --> open-mpi/ompi@153e38ffc9ca2d4284b60187cf52c89540458a27 --- orte/mca/pls/slurm/pls_slurm_component.c | 5 +++++ orte/mca/pls/slurm/pls_slurm_module.c | 5 ++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/orte/mca/pls/slurm/pls_slurm_component.c b/orte/mca/pls/slurm/pls_slurm_component.c index 8b757d8c05..c0d5854299 100644 --- a/orte/mca/pls/slurm/pls_slurm_component.c +++ b/orte/mca/pls/slurm/pls_slurm_component.c @@ -107,6 +107,11 @@ static int pls_slurm_open(void) mca_base_param_reg_int(comp, "debug", "Enable debugging of slurm pls", false, false, 0, &mca_pls_slurm_component.debug); + if (mca_pls_slurm_component.debug == 0) { + mca_base_param_reg_int_name("orte", "debug", + "Whether or not to enable debugging output for all ORTE components (0 or 1)", + false, false, false, &mca_pls_slurm_component.debug); + } mca_base_param_reg_int(comp, "priority", "Default selection priority", false, false, 75, diff --git a/orte/mca/pls/slurm/pls_slurm_module.c b/orte/mca/pls/slurm/pls_slurm_module.c index 1eebc76abf..c9e9e76d1e 100644 --- a/orte/mca/pls/slurm/pls_slurm_module.c +++ b/orte/mca/pls/slurm/pls_slurm_module.c @@ -113,6 +113,7 @@ static int pls_slurm_launch_job(orte_jobid_t jobid) opal_list_item_t *item; size_t num_nodes; orte_vpid_t vpid; + orte_vpid_t start_vpid; char *jobid_string; char *uri, *param; char **argv; @@ -159,6 +160,7 @@ static int pls_slurm_launch_job(orte_jobid_t jobid) if (ORTE_SUCCESS != rc) { goto cleanup; } + start_vpid = vpid; /* setup the orted triggers for passing their launch info */ if (ORTE_SUCCESS != (rc = orte_smr.init_orted_stage_gates(jobid, num_nodes, NULL, NULL))) { @@ -338,7 +340,7 @@ static int pls_slurm_launch_job(orte_jobid_t jobid) } /* setup the daemon info for each node */ - vpid = 0; + vpid = start_vpid; for (item = opal_list_get_first(&map->nodes); item != opal_list_get_end(&map->nodes); item = opal_list_get_next(item)) { @@ -559,6 +561,7 @@ static int pls_slurm_start_proc(int argc, char **argv, char **env, /* When not in debug mode, tie stdout/stderr to dev null so we don't see messages from orted */ + /* XXX: this prevents --debug-daemons from working */ if (!mca_pls_slurm_component.debug) { fd = open("/dev/null", O_CREAT|O_WRONLY|O_TRUNC, 0666); if (fd >= 0) {