diff --git a/orte/runtime/Makefile.am b/orte/runtime/Makefile.am index bd7d8428d5..c2ececb2cc 100644 --- a/orte/runtime/Makefile.am +++ b/orte/runtime/Makefile.am @@ -22,13 +22,14 @@ dist_pkgdata_DATA += runtime/help-orte-runtime.txt headers += \ - runtime/orte_wait.h \ + runtime/orte_wait.h \ + runtime/orte_wakeup.h \ runtime/runtime.h \ runtime/runtime_internal.h \ runtime/runtime_types.h \ runtime/params.h \ - runtime/orte_setup_hnp.h \ - runtime/orte_cr.h + runtime/orte_setup_hnp.h \ + runtime/orte_cr.h libopen_rte_la_SOURCES += \ runtime/orte_abort.c \ @@ -44,4 +45,5 @@ libopen_rte_la_SOURCES += \ runtime/orte_system_init.c \ runtime/orte_universe_exists.c \ runtime/orte_wait.c \ - runtime/orte_cr.c + runtime/orte_wakeup.c \ + runtime/orte_cr.c diff --git a/orte/runtime/orte_wakeup.c b/orte/runtime/orte_wakeup.c new file mode 100644 index 0000000000..6f4f4f051e --- /dev/null +++ b/orte/runtime/orte_wakeup.c @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/orte_constants.h" + +#include + +#include "opal/util/output.h" + +#include "orte/dss/dss.h" +#include "orte/mca/ns/ns.h" +#include "orte/mca/gpr/gpr.h" +#include "orte/mca/errmgr/errmgr.h" + +#include "orte/runtime/orte_wakeup.h" + +/* orterun will only wakeup when all procs IN THE ROOT JOB report terminated. In some cases, +* such as when an orted fails to start, we have a situation where the root job's processes +* cannot report back as terminated or aborted. For those situations, we force the issue by +* deliberately causing the TERMINATE trigger on the root job to fire +*/ +int orte_wakeup(orte_jobid_t job) { + int rc; + orte_vpid_t root, range; + char *segment; + orte_std_cntr_t num; + char *tokens[] = { + ORTE_JOB_GLOBALS, + NULL + }; + orte_data_value_t dval = ORTE_DATA_VALUE_EMPTY; + + if (ORTE_SUCCESS != (rc = orte_ns.get_root_job(&root, job))) { + ORTE_ERROR_LOG(rc); + } + if (ORTE_SUCCESS != (rc = orte_ns.get_vpid_range(root, &range))) { + ORTE_ERROR_LOG(rc); + } + if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, root))) { + ORTE_ERROR_LOG(rc); + } + num = range; + if (ORTE_SUCCESS != (rc = orte_dss.set(&dval, (void*)&num, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + } + + if (ORTE_SUCCESS != (rc = orte_gpr.put_1(ORTE_GPR_OVERWRITE | ORTE_GPR_TOKENS_AND | ORTE_GPR_KEYS_OR, + segment, tokens, ORTE_PROC_NUM_TERMINATED, &dval))) { + ORTE_ERROR_LOG(rc); + } + + return ORTE_SUCCESS; +} diff --git a/orte/runtime/orte_wakeup.h b/orte/runtime/orte_wakeup.h new file mode 100644 index 0000000000..87030b11e1 --- /dev/null +++ b/orte/runtime/orte_wakeup.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/** + * @file + * + * Interface for forcibly waking up orterun. + */ +#ifndef ORTE_WAKEUP_H +#define ORTE_WAKEUP_H + +#include "orte_config.h" + +#include "orte/mca/ns/ns_types.h" + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +/** + * Wakeup orterun by reporting the termination of all processes + */ +ORTE_DECLSPEC int orte_wakeup(orte_jobid_t job); + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + +#endif /* #ifndef ORTE_WAKEUP_H */