Take a step back on the slurm and tm launchers. Problems were occurring in the MTT runs, although not under non-MTT scenarios. Preserve the modified plm versions in new components that are ompi_ignored until we can resolve the problems.
This will allow for better MTT coverage until the problem can be better understood. This commit was SVN r20083.
Этот коммит содержится в:
родитель
89792bbc72
Коммит
ce4018efeb
@ -101,10 +101,9 @@ orte_plm_base_module_1_0_0_t orte_plm_slurm_module = {
|
|||||||
/*
|
/*
|
||||||
* Local variables
|
* Local variables
|
||||||
*/
|
*/
|
||||||
static pid_t primary_srun_pid = 0;
|
static pid_t srun_pid = 0;
|
||||||
static bool primary_pid_set = false;
|
|
||||||
static orte_jobid_t active_job = ORTE_JOBID_INVALID;
|
static orte_jobid_t active_job = ORTE_JOBID_INVALID;
|
||||||
static bool launching_daemons;
|
static bool failed_launch;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -148,8 +147,6 @@ static int plm_slurm_launch_job(orte_job_t *jdata)
|
|||||||
struct timeval launchstart, launchstop;
|
struct timeval launchstart, launchstop;
|
||||||
int proc_vpid_index;
|
int proc_vpid_index;
|
||||||
orte_jobid_t failed_job;
|
orte_jobid_t failed_job;
|
||||||
orte_job_state_t job_state = ORTE_JOB_NEVER_LAUNCHED;
|
|
||||||
bool failed_launch=false;
|
|
||||||
|
|
||||||
/* flag the daemons as failing by default */
|
/* flag the daemons as failing by default */
|
||||||
failed_job = ORTE_PROC_MY_NAME->jobid;
|
failed_job = ORTE_PROC_MY_NAME->jobid;
|
||||||
@ -163,7 +160,7 @@ static int plm_slurm_launch_job(orte_job_t *jdata)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* indicate the state of the launch */
|
/* indicate the state of the launch */
|
||||||
launching_daemons = true;
|
failed_launch = true;
|
||||||
|
|
||||||
/* create a jobid for this job */
|
/* create a jobid for this job */
|
||||||
if (ORTE_SUCCESS != (rc = orte_plm_base_create_jobid(&jdata->jobid))) {
|
if (ORTE_SUCCESS != (rc = orte_plm_base_create_jobid(&jdata->jobid))) {
|
||||||
@ -337,9 +334,6 @@ static int plm_slurm_launch_job(orte_job_t *jdata)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* set the job state to indicate we attempted to launch */
|
|
||||||
job_state = ORTE_JOB_STATE_FAILED_TO_START;
|
|
||||||
|
|
||||||
/* setup environment */
|
/* setup environment */
|
||||||
env = opal_argv_copy(orte_launch_environ);
|
env = opal_argv_copy(orte_launch_environ);
|
||||||
|
|
||||||
@ -370,10 +364,7 @@ static int plm_slurm_launch_job(orte_job_t *jdata)
|
|||||||
}
|
}
|
||||||
|
|
||||||
launch_apps:
|
launch_apps:
|
||||||
/* get here if daemons launch okay, or no daemons need to be launched - any
|
/* get here if daemons launch okay - any failures now by apps */
|
||||||
* failures now are from launching apps
|
|
||||||
*/
|
|
||||||
launching_daemons = false;
|
|
||||||
failed_job = active_job;
|
failed_job = active_job;
|
||||||
if (ORTE_SUCCESS != (rc = orte_plm_base_launch_apps(active_job))) {
|
if (ORTE_SUCCESS != (rc = orte_plm_base_launch_apps(active_job))) {
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
@ -415,7 +406,7 @@ cleanup:
|
|||||||
|
|
||||||
/* check for failed launch - if so, force terminate */
|
/* check for failed launch - if so, force terminate */
|
||||||
if (failed_launch) {
|
if (failed_launch) {
|
||||||
orte_plm_base_launch_failed(failed_job, -1, ORTE_ERROR_DEFAULT_EXIT_CODE, job_state);
|
orte_plm_base_launch_failed(failed_job, -1, ORTE_ERROR_DEFAULT_EXIT_CODE, ORTE_JOB_STATE_FAILED_TO_START);
|
||||||
}
|
}
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
@ -442,10 +433,15 @@ static int plm_slurm_terminate_orteds(void)
|
|||||||
{
|
{
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
/* tell them to die without sending a reply - we will rely on the
|
/* deregister the waitpid callback to ensure we don't make it look like
|
||||||
* waitpid to tell us when they have exited!
|
* srun failed when it didn't. Since the srun may have already completed,
|
||||||
|
* do NOT ERROR_LOG any return code to avoid confusing, duplicate error
|
||||||
|
* messages
|
||||||
*/
|
*/
|
||||||
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_NO_REPLY_CMD))) {
|
orte_wait_cb_cancel(srun_pid);
|
||||||
|
|
||||||
|
/* tell them to die! */
|
||||||
|
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_WITH_REPLY_CMD))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -483,8 +479,6 @@ static int plm_slurm_finalize(void)
|
|||||||
|
|
||||||
|
|
||||||
static void srun_wait_cb(pid_t pid, int status, void* cbdata){
|
static void srun_wait_cb(pid_t pid, int status, void* cbdata){
|
||||||
orte_job_t *jdata;
|
|
||||||
|
|
||||||
/* According to the SLURM folks, srun always returns the highest exit
|
/* According to the SLURM folks, srun always returns the highest exit
|
||||||
code of our remote processes. Thus, a non-zero exit status doesn't
|
code of our remote processes. Thus, a non-zero exit status doesn't
|
||||||
necessarily mean that srun failed - it could be that an orted returned
|
necessarily mean that srun failed - it could be that an orted returned
|
||||||
@ -505,41 +499,20 @@ static void srun_wait_cb(pid_t pid, int status, void* cbdata){
|
|||||||
pid so nobody thinks this is real
|
pid so nobody thinks this is real
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* if we are in the launch phase, then any termination is bad */
|
if (0 != status) {
|
||||||
if (launching_daemons) {
|
if (failed_launch) {
|
||||||
/* report that one or more daemons failed to launch so we can exit */
|
/* report that the daemon has failed so we can exit
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
*/
|
||||||
"%s plm:slurm: daemon failed during launch",
|
orte_plm_base_launch_failed(ORTE_PROC_MY_NAME->jobid, -1, status, ORTE_JOB_STATE_FAILED_TO_START);
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
|
||||||
orte_plm_base_launch_failed(ORTE_PROC_MY_NAME->jobid, -1, status, ORTE_JOB_STATE_FAILED_TO_START);
|
} else {
|
||||||
} else {
|
|
||||||
/* if this is after launch, then we need to abort only if the status
|
|
||||||
* returned is non-zero - i.e., if the orteds exited with an error
|
|
||||||
*/
|
|
||||||
if (0 != status) {
|
|
||||||
/* an orted must have died unexpectedly after launch - report
|
/* an orted must have died unexpectedly after launch - report
|
||||||
* that the daemon has failed so we exit
|
* that the daemon has failed so we exit
|
||||||
*/
|
*/
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
|
||||||
"%s plm:slurm: daemon failed while running",
|
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
|
||||||
orte_plm_base_launch_failed(ORTE_PROC_MY_NAME->jobid, -1, status, ORTE_JOB_STATE_ABORTED);
|
orte_plm_base_launch_failed(ORTE_PROC_MY_NAME->jobid, -1, status, ORTE_JOB_STATE_ABORTED);
|
||||||
}
|
}
|
||||||
/* otherwise, check to see if this is the primary pid */
|
|
||||||
if (primary_srun_pid == pid) {
|
|
||||||
/* in this case, we just want to fire the proper trigger so
|
|
||||||
* mpirun can exit
|
|
||||||
*/
|
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
|
||||||
"%s plm:slurm: primary daemons complete!",
|
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
|
||||||
jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
|
|
||||||
jdata->state = ORTE_JOB_STATE_TERMINATED;
|
|
||||||
/* need to set the #terminated value to avoid an incorrect error msg */
|
|
||||||
jdata->num_terminated = jdata->num_procs;
|
|
||||||
orte_trigger_event(&orteds_exit);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -547,7 +520,6 @@ static int plm_slurm_start_proc(int argc, char **argv, char **env,
|
|||||||
char *prefix)
|
char *prefix)
|
||||||
{
|
{
|
||||||
int fd;
|
int fd;
|
||||||
int srun_pid;
|
|
||||||
char *exec_argv = opal_path_findv(argv[0], 0, env, NULL);
|
char *exec_argv = opal_path_findv(argv[0], 0, env, NULL);
|
||||||
|
|
||||||
if (NULL == exec_argv) {
|
if (NULL == exec_argv) {
|
||||||
@ -651,14 +623,6 @@ static int plm_slurm_start_proc(int argc, char **argv, char **env,
|
|||||||
/* setup the waitpid so we can find out if srun succeeds! */
|
/* setup the waitpid so we can find out if srun succeeds! */
|
||||||
orte_wait_cb(srun_pid, srun_wait_cb, NULL);
|
orte_wait_cb(srun_pid, srun_wait_cb, NULL);
|
||||||
free(exec_argv);
|
free(exec_argv);
|
||||||
|
|
||||||
/* if this is the primary launch - i.e., not a comm_spawn of a
|
|
||||||
* child job - then save the pid
|
|
||||||
*/
|
|
||||||
if (!primary_pid_set) {
|
|
||||||
primary_srun_pid = srun_pid;
|
|
||||||
primary_pid_set = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
|
0
orte/mca/plm/slurmd/.ompi_ignore
Обычный файл
0
orte/mca/plm/slurmd/.ompi_ignore
Обычный файл
1
orte/mca/plm/slurmd/.ompi_unignore
Обычный файл
1
orte/mca/plm/slurmd/.ompi_unignore
Обычный файл
@ -0,0 +1 @@
|
|||||||
|
rhc
|
45
orte/mca/plm/slurmd/Makefile.am
Обычный файл
45
orte/mca/plm/slurmd/Makefile.am
Обычный файл
@ -0,0 +1,45 @@
|
|||||||
|
#
|
||||||
|
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||||
|
# University Research and Technology
|
||||||
|
# Corporation. All rights reserved.
|
||||||
|
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||||
|
# of Tennessee Research Foundation. All rights
|
||||||
|
# reserved.
|
||||||
|
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
# University of Stuttgart. All rights reserved.
|
||||||
|
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
# All rights reserved.
|
||||||
|
# $COPYRIGHT$
|
||||||
|
#
|
||||||
|
# Additional copyrights may follow
|
||||||
|
#
|
||||||
|
# $HEADER$
|
||||||
|
#
|
||||||
|
|
||||||
|
sources = \
|
||||||
|
plm_slurmd.h \
|
||||||
|
plm_slurmd_component.c \
|
||||||
|
plm_slurmd_module.c
|
||||||
|
|
||||||
|
dist_pkgdata_DATA = help-plm-slurmd.txt
|
||||||
|
|
||||||
|
# Make the output library in this directory, and name it either
|
||||||
|
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||||
|
# (for static builds).
|
||||||
|
|
||||||
|
if OMPI_BUILD_plm_slurmd_DSO
|
||||||
|
component_noinst =
|
||||||
|
component_install = mca_plm_slurmd.la
|
||||||
|
else
|
||||||
|
component_noinst = libmca_plm_slurmd.la
|
||||||
|
component_install =
|
||||||
|
endif
|
||||||
|
|
||||||
|
mcacomponentdir = $(pkglibdir)
|
||||||
|
mcacomponent_LTLIBRARIES = $(component_install)
|
||||||
|
mca_plm_slurmd_la_SOURCES = $(sources)
|
||||||
|
mca_plm_slurmd_la_LDFLAGS = -module -avoid-version
|
||||||
|
|
||||||
|
noinst_LTLIBRARIES = $(component_noinst)
|
||||||
|
libmca_plm_slurmd_la_SOURCES =$(sources)
|
||||||
|
libmca_plm_slurmd_la_LDFLAGS = -module -avoid-version
|
37
orte/mca/plm/slurmd/configure.m4
Обычный файл
37
orte/mca/plm/slurmd/configure.m4
Обычный файл
@ -0,0 +1,37 @@
|
|||||||
|
# -*- shell-script -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||||
|
# University Research and Technology
|
||||||
|
# Corporation. All rights reserved.
|
||||||
|
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||||
|
# of Tennessee Research Foundation. All rights
|
||||||
|
# reserved.
|
||||||
|
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
# University of Stuttgart. All rights reserved.
|
||||||
|
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
# All rights reserved.
|
||||||
|
# $COPYRIGHT$
|
||||||
|
#
|
||||||
|
# Additional copyrights may follow
|
||||||
|
#
|
||||||
|
# $HEADER$
|
||||||
|
#
|
||||||
|
|
||||||
|
# MCA_plm_slurmd_CONFIG([action-if-found], [action-if-not-found])
|
||||||
|
# -----------------------------------------------------------
|
||||||
|
AC_DEFUN([MCA_plm_slurmd_CONFIG],[
|
||||||
|
OMPI_CHECK_SLURM([plm_slurmd], [plm_slurmd_good=1], [plm_slurmd_good=0])
|
||||||
|
|
||||||
|
# if check worked, set wrapper flags if so.
|
||||||
|
# Evaluate succeed / fail
|
||||||
|
AS_IF([test "$plm_slurmd_good" = "1"],
|
||||||
|
[plm_slurmd_WRAPPER_EXTRA_LDFLAGS="$plm_slurmd_LDFLAGS"
|
||||||
|
plm_slurmd_WRAPPER_EXTRA_LIBS="$plm_slurmd_LIBS"
|
||||||
|
$1],
|
||||||
|
[$2])
|
||||||
|
|
||||||
|
# set build flags to use in makefile
|
||||||
|
AC_SUBST([plm_slurmd_CPPFLAGS])
|
||||||
|
AC_SUBST([plm_slurmd_LDFLAGS])
|
||||||
|
AC_SUBST([plm_slurmd_LIBS])
|
||||||
|
])dnl
|
22
orte/mca/plm/slurmd/configure.params
Обычный файл
22
orte/mca/plm/slurmd/configure.params
Обычный файл
@ -0,0 +1,22 @@
|
|||||||
|
# -*- shell-script -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||||
|
# University Research and Technology
|
||||||
|
# Corporation. All rights reserved.
|
||||||
|
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||||
|
# of Tennessee Research Foundation. All rights
|
||||||
|
# reserved.
|
||||||
|
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
# University of Stuttgart. All rights reserved.
|
||||||
|
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
# All rights reserved.
|
||||||
|
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
|
||||||
|
# reserved.
|
||||||
|
# $COPYRIGHT$
|
||||||
|
#
|
||||||
|
# Additional copyrights may follow
|
||||||
|
#
|
||||||
|
# $HEADER$
|
||||||
|
#
|
||||||
|
|
||||||
|
PARAM_CONFIG_FILES="Makefile"
|
41
orte/mca/plm/slurmd/help-plm-slurmd.txt
Обычный файл
41
orte/mca/plm/slurmd/help-plm-slurmd.txt
Обычный файл
@ -0,0 +1,41 @@
|
|||||||
|
# -*- text -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||||
|
# University Research and Technology
|
||||||
|
# Corporation. All rights reserved.
|
||||||
|
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||||
|
# of Tennessee Research Foundation. All rights
|
||||||
|
# reserved.
|
||||||
|
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
# University of Stuttgart. All rights reserved.
|
||||||
|
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
# All rights reserved.
|
||||||
|
# $COPYRIGHT$
|
||||||
|
#
|
||||||
|
# Additional copyrights may follow
|
||||||
|
#
|
||||||
|
# $HEADER$
|
||||||
|
#
|
||||||
|
[multiple-prefixes]
|
||||||
|
The SLURM process starter for Open MPI does not support multiple
|
||||||
|
different --prefix options to mpirun. You can specify at most one
|
||||||
|
unique value for the --prefix option (in any of the application
|
||||||
|
contexts); it will be applied to all the application contexts of your
|
||||||
|
parallel job.
|
||||||
|
|
||||||
|
Put simply, you must have Open MPI installed in the same location on
|
||||||
|
all of your SLURM nodes.
|
||||||
|
|
||||||
|
Multiple different --prefix options were specified to mpirun. This is
|
||||||
|
a fatal error for the SLURM process starter in Open MPI.
|
||||||
|
|
||||||
|
The first two prefix values supplied were:
|
||||||
|
%s
|
||||||
|
and %s
|
||||||
|
#
|
||||||
|
[no-hosts-in-list]
|
||||||
|
The SLURM process starter for Open MPI didn't find any hosts in
|
||||||
|
the map for this application. This can be caused by a lack of
|
||||||
|
an allocation, or by an error in the Open MPI code. Please check
|
||||||
|
to ensure you have a SLURM allocation. If you do, then please pass
|
||||||
|
the error to the Open MPI user's mailing list for assistance.
|
44
orte/mca/plm/slurmd/plm_slurmd.h
Обычный файл
44
orte/mca/plm/slurmd/plm_slurmd.h
Обычный файл
@ -0,0 +1,44 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||||
|
* University Research and Technology
|
||||||
|
* Corporation. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||||
|
* of Tennessee Research Foundation. All rights
|
||||||
|
* reserved.
|
||||||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
* University of Stuttgart. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
* All rights reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef ORTE_PLM_SLURMD_EXPORT_H
|
||||||
|
#define ORTE_PLM_SLURMD_EXPORT_H
|
||||||
|
|
||||||
|
#include "orte_config.h"
|
||||||
|
|
||||||
|
#include "opal/mca/mca.h"
|
||||||
|
#include "orte/mca/plm/plm.h"
|
||||||
|
|
||||||
|
BEGIN_C_DECLS
|
||||||
|
|
||||||
|
struct orte_plm_slurmd_component_t {
|
||||||
|
orte_plm_base_component_t super;
|
||||||
|
char *custom_args;
|
||||||
|
};
|
||||||
|
typedef struct orte_plm_slurmd_component_t orte_plm_slurmd_component_t;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Globally exported variable
|
||||||
|
*/
|
||||||
|
|
||||||
|
ORTE_MODULE_DECLSPEC extern orte_plm_slurmd_component_t mca_plm_slurmd_component;
|
||||||
|
ORTE_DECLSPEC extern orte_plm_base_module_t orte_plm_slurmd_module;
|
||||||
|
|
||||||
|
END_C_DECLS
|
||||||
|
|
||||||
|
#endif /* ORTE_PLM_SLURMD_EXPORT_H */
|
130
orte/mca/plm/slurmd/plm_slurmd_component.c
Обычный файл
130
orte/mca/plm/slurmd/plm_slurmd_component.c
Обычный файл
@ -0,0 +1,130 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
|
||||||
|
* University Research and Technology
|
||||||
|
* Corporation. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||||
|
* of Tennessee Research Foundation. All rights
|
||||||
|
* reserved.
|
||||||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
* University of Stuttgart. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
* All rights reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*
|
||||||
|
* These symbols are in a file by themselves to provide nice linker
|
||||||
|
* semantics. Since linkers generally pull in symbols by object
|
||||||
|
* files, keeping these symbols as the only symbols in this file
|
||||||
|
* prevents utility programs such as "ompi_info" from having to import
|
||||||
|
* entire components just to query their version and parameters.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "orte_config.h"
|
||||||
|
#include "orte/constants.h"
|
||||||
|
|
||||||
|
#include "orte/util/show_help.h"
|
||||||
|
#include "opal/mca/base/mca_base_param.h"
|
||||||
|
|
||||||
|
#include "orte/util/name_fns.h"
|
||||||
|
#include "orte/runtime/orte_globals.h"
|
||||||
|
|
||||||
|
#include "orte/mca/plm/plm.h"
|
||||||
|
#include "orte/mca/plm/base/plm_private.h"
|
||||||
|
#include "plm_slurmd.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Public string showing the plm ompi_slurmd component version number
|
||||||
|
*/
|
||||||
|
const char *mca_plm_slurmd_component_version_string =
|
||||||
|
"Open MPI slurmd plm MCA component version " ORTE_VERSION;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Local functions
|
||||||
|
*/
|
||||||
|
static int plm_slurmd_open(void);
|
||||||
|
static int plm_slurmd_close(void);
|
||||||
|
static int orte_plm_slurmd_component_query(mca_base_module_t **module, int *priority);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Instantiate the public struct with all of our public information
|
||||||
|
* and pointers to our public functions in it
|
||||||
|
*/
|
||||||
|
|
||||||
|
orte_plm_slurmd_component_t mca_plm_slurmd_component = {
|
||||||
|
|
||||||
|
{
|
||||||
|
/* First, the mca_component_t struct containing meta
|
||||||
|
information about the component itself */
|
||||||
|
|
||||||
|
{
|
||||||
|
ORTE_PLM_BASE_VERSION_2_0_0,
|
||||||
|
|
||||||
|
/* Component name and version */
|
||||||
|
"slurmd",
|
||||||
|
ORTE_MAJOR_VERSION,
|
||||||
|
ORTE_MINOR_VERSION,
|
||||||
|
ORTE_RELEASE_VERSION,
|
||||||
|
|
||||||
|
/* Component open and close functions */
|
||||||
|
plm_slurmd_open,
|
||||||
|
plm_slurmd_close,
|
||||||
|
orte_plm_slurmd_component_query
|
||||||
|
},
|
||||||
|
{
|
||||||
|
/* The component is checkpoint ready */
|
||||||
|
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Other orte_plm_slurmd_component_t items -- left uninitialized
|
||||||
|
here; will be initialized in plm_slurmd_open() */
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static int plm_slurmd_open(void)
|
||||||
|
{
|
||||||
|
mca_base_component_t *comp = &mca_plm_slurmd_component.super.base_version;
|
||||||
|
|
||||||
|
mca_base_param_reg_string(comp, "args",
|
||||||
|
"Custom arguments to srun",
|
||||||
|
false, false, NULL,
|
||||||
|
&mca_plm_slurmd_component.custom_args);
|
||||||
|
|
||||||
|
return ORTE_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int orte_plm_slurmd_component_query(mca_base_module_t **module, int *priority)
|
||||||
|
{
|
||||||
|
/* Are we running under a SLURM job? */
|
||||||
|
|
||||||
|
if (NULL != getenv("SLURM_JOBID")) {
|
||||||
|
*priority = 2;
|
||||||
|
|
||||||
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
|
"%s plm:slurmd: available for selection",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
|
|
||||||
|
*module = (mca_base_module_t *)&orte_plm_slurmd_module;
|
||||||
|
return ORTE_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Sadly, no */
|
||||||
|
*module = NULL;
|
||||||
|
return ORTE_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int plm_slurmd_close(void)
|
||||||
|
{
|
||||||
|
if (NULL != mca_plm_slurmd_component.custom_args) {
|
||||||
|
free(mca_plm_slurmd_component.custom_args);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ORTE_SUCCESS;
|
||||||
|
}
|
665
orte/mca/plm/slurmd/plm_slurmd_module.c
Обычный файл
665
orte/mca/plm/slurmd/plm_slurmd_module.c
Обычный файл
@ -0,0 +1,665 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||||
|
* University Research and Technology
|
||||||
|
* Corporation. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||||
|
* of Tennessee Research Foundation. All rights
|
||||||
|
* reserved.
|
||||||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
|
* University of Stuttgart. All rights reserved.
|
||||||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved.
|
||||||
|
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
|
||||||
|
* reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*
|
||||||
|
* These symbols are in a file by themselves to provide nice linker
|
||||||
|
* semantics. Since linkers generally pull in symbols by object
|
||||||
|
* files, keeping these symbols as the only symbols in this file
|
||||||
|
* prevents utility programs such as "ompi_info" from having to import
|
||||||
|
* entire components just to query their version and parameters.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "orte_config.h"
|
||||||
|
#include "orte/constants.h"
|
||||||
|
#include "orte/types.h"
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
#ifdef HAVE_UNISTD_H
|
||||||
|
#include <unistd.h>
|
||||||
|
#endif
|
||||||
|
#include <signal.h>
|
||||||
|
#ifdef HAVE_STDLIB_H
|
||||||
|
#include <stdlib.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_SYS_TYPES_H
|
||||||
|
#include <sys/types.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_SYS_TIME_H
|
||||||
|
#include <sys/time.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_SYS_STAT_H
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_FCNTL_H
|
||||||
|
#include <fcntl.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "opal/mca/installdirs/installdirs.h"
|
||||||
|
#include "opal/util/argv.h"
|
||||||
|
#include "opal/util/opal_environ.h"
|
||||||
|
#include "opal/util/path.h"
|
||||||
|
#include "opal/util/basename.h"
|
||||||
|
#include "opal/mca/base/mca_base_param.h"
|
||||||
|
|
||||||
|
#include "orte/util/show_help.h"
|
||||||
|
#include "orte/util/name_fns.h"
|
||||||
|
#include "orte/runtime/orte_globals.h"
|
||||||
|
#include "orte/runtime/runtime.h"
|
||||||
|
#include "orte/runtime/orte_wait.h"
|
||||||
|
#include "orte/mca/errmgr/errmgr.h"
|
||||||
|
#include "orte/mca/rmaps/rmaps.h"
|
||||||
|
|
||||||
|
#include "orte/mca/plm/plm.h"
|
||||||
|
#include "orte/mca/plm/base/plm_private.h"
|
||||||
|
#include "plm_slurmd.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Local functions
|
||||||
|
*/
|
||||||
|
static int plm_slurmd_init(void);
|
||||||
|
static int plm_slurmd_launch_job(orte_job_t *jdata);
|
||||||
|
static int plm_slurmd_terminate_job(orte_jobid_t jobid);
|
||||||
|
static int plm_slurmd_terminate_orteds(void);
|
||||||
|
static int plm_slurmd_signal_job(orte_jobid_t jobid, int32_t signal);
|
||||||
|
static int plm_slurmd_finalize(void);
|
||||||
|
|
||||||
|
static int plm_slurmd_start_proc(int argc, char **argv, char **env,
|
||||||
|
char *prefix);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Global variable
|
||||||
|
*/
|
||||||
|
orte_plm_base_module_1_0_0_t orte_plm_slurmd_module = {
|
||||||
|
plm_slurmd_init,
|
||||||
|
orte_plm_base_set_hnp_name,
|
||||||
|
plm_slurmd_launch_job,
|
||||||
|
NULL,
|
||||||
|
plm_slurmd_terminate_job,
|
||||||
|
plm_slurmd_terminate_orteds,
|
||||||
|
plm_slurmd_signal_job,
|
||||||
|
plm_slurmd_finalize
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Local variables
|
||||||
|
*/
|
||||||
|
static pid_t primary_srun_pid = 0;
|
||||||
|
static bool primary_pid_set = false;
|
||||||
|
static orte_jobid_t active_job = ORTE_JOBID_INVALID;
|
||||||
|
static bool launching_daemons;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Init the module
|
||||||
|
*/
|
||||||
|
static int plm_slurmd_init(void)
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
if (ORTE_SUCCESS != (rc = orte_plm_base_comm_start())) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
}
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* When working in this function, ALWAYS jump to "cleanup" if
|
||||||
|
* you encounter an error so that orterun will be woken up and
|
||||||
|
* the job can cleanly terminate
|
||||||
|
*/
|
||||||
|
static int plm_slurmd_launch_job(orte_job_t *jdata)
|
||||||
|
{
|
||||||
|
orte_app_context_t **apps;
|
||||||
|
orte_node_t **nodes;
|
||||||
|
orte_std_cntr_t n;
|
||||||
|
orte_job_map_t *map;
|
||||||
|
char *jobid_string = NULL;
|
||||||
|
char *param;
|
||||||
|
char **argv = NULL;
|
||||||
|
int argc;
|
||||||
|
int rc;
|
||||||
|
char *tmp;
|
||||||
|
char** env = NULL;
|
||||||
|
char* var;
|
||||||
|
char *nodelist_flat;
|
||||||
|
char **nodelist_argv;
|
||||||
|
int nodelist_argc;
|
||||||
|
char *name_string;
|
||||||
|
char **custom_strings;
|
||||||
|
int num_args, i;
|
||||||
|
char *cur_prefix;
|
||||||
|
struct timeval launchstart, launchstop;
|
||||||
|
int proc_vpid_index;
|
||||||
|
orte_jobid_t failed_job;
|
||||||
|
orte_job_state_t job_state = ORTE_JOB_NEVER_LAUNCHED;
|
||||||
|
bool failed_launch=false;
|
||||||
|
|
||||||
|
/* flag the daemons as failing by default */
|
||||||
|
failed_job = ORTE_PROC_MY_NAME->jobid;
|
||||||
|
|
||||||
|
if (orte_timing) {
|
||||||
|
if (0 != gettimeofday(&launchstart, NULL)) {
|
||||||
|
opal_output(0, "plm_slurmd: could not obtain job start time");
|
||||||
|
launchstart.tv_sec = 0;
|
||||||
|
launchstart.tv_usec = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* indicate the state of the launch */
|
||||||
|
launching_daemons = true;
|
||||||
|
|
||||||
|
/* create a jobid for this job */
|
||||||
|
if (ORTE_SUCCESS != (rc = orte_plm_base_create_jobid(&jdata->jobid))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
|
"%s plm:slurmd: launching job %s",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
|
ORTE_JOBID_PRINT(jdata->jobid)));
|
||||||
|
|
||||||
|
/* setup the job */
|
||||||
|
if (ORTE_SUCCESS != (rc = orte_plm_base_setup_job(jdata))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* set the active jobid */
|
||||||
|
active_job = jdata->jobid;
|
||||||
|
|
||||||
|
/* Get the map for this job */
|
||||||
|
if (NULL == (map = orte_rmaps.get_job_map(active_job))) {
|
||||||
|
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||||
|
rc = ORTE_ERR_NOT_FOUND;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
apps = (orte_app_context_t**)jdata->apps->addr;
|
||||||
|
nodes = (orte_node_t**)map->nodes->addr;
|
||||||
|
|
||||||
|
if (0 == map->num_new_daemons) {
|
||||||
|
/* no new daemons required - just launch apps */
|
||||||
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
|
"%s plm:slurmd: no new daemons to launch",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
|
goto launch_apps;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* need integer value for command line parameter */
|
||||||
|
asprintf(&jobid_string, "%lu", (unsigned long) jdata->jobid);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* start building argv array
|
||||||
|
*/
|
||||||
|
argv = NULL;
|
||||||
|
argc = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SLURM srun OPTIONS
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* add the srun command */
|
||||||
|
opal_argv_append(&argc, &argv, "srun");
|
||||||
|
|
||||||
|
/* Append user defined arguments to srun */
|
||||||
|
if ( NULL != mca_plm_slurmd_component.custom_args ) {
|
||||||
|
custom_strings = opal_argv_split(mca_plm_slurmd_component.custom_args, ' ');
|
||||||
|
num_args = opal_argv_count(custom_strings);
|
||||||
|
for (i = 0; i < num_args; ++i) {
|
||||||
|
opal_argv_append(&argc, &argv, custom_strings[i]);
|
||||||
|
}
|
||||||
|
opal_argv_free(custom_strings);
|
||||||
|
}
|
||||||
|
|
||||||
|
asprintf(&tmp, "--nodes=%lu", (unsigned long) map->num_new_daemons);
|
||||||
|
opal_argv_append(&argc, &argv, tmp);
|
||||||
|
free(tmp);
|
||||||
|
|
||||||
|
asprintf(&tmp, "--ntasks=%lu", (unsigned long) map->num_new_daemons);
|
||||||
|
opal_argv_append(&argc, &argv, tmp);
|
||||||
|
free(tmp);
|
||||||
|
|
||||||
|
/* alert us if any orteds die during startup */
|
||||||
|
opal_argv_append(&argc, &argv, "--kill-on-bad-exit");
|
||||||
|
|
||||||
|
/* create nodelist */
|
||||||
|
nodelist_argv = NULL;
|
||||||
|
nodelist_argc = 0;
|
||||||
|
|
||||||
|
for (n=0; n < map->num_nodes; n++ ) {
|
||||||
|
/* if the daemon already exists on this node, then
|
||||||
|
* don't include it
|
||||||
|
*/
|
||||||
|
if (nodes[n]->daemon_launched) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* otherwise, add it to the list of nodes upon which
|
||||||
|
* we need to launch a daemon
|
||||||
|
*/
|
||||||
|
opal_argv_append(&nodelist_argc, &nodelist_argv, nodes[n]->name);
|
||||||
|
}
|
||||||
|
if (0 == opal_argv_count(nodelist_argv)) {
|
||||||
|
orte_show_help("help-plm-slurmd.txt", "no-hosts-in-list", true);
|
||||||
|
rc = ORTE_ERR_FAILED_TO_START;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
nodelist_flat = opal_argv_join(nodelist_argv, ',');
|
||||||
|
opal_argv_free(nodelist_argv);
|
||||||
|
asprintf(&tmp, "--nodelist=%s", nodelist_flat);
|
||||||
|
opal_argv_append(&argc, &argv, tmp);
|
||||||
|
free(tmp);
|
||||||
|
|
||||||
|
OPAL_OUTPUT_VERBOSE((2, orte_plm_globals.output,
|
||||||
|
"%s plm:slurmd: launching on nodes %s",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nodelist_flat));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ORTED OPTIONS
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* add the daemon command (as specified by user) */
|
||||||
|
orte_plm_base_setup_orted_cmd(&argc, &argv);
|
||||||
|
|
||||||
|
/* Add basic orted command line options, including debug flags */
|
||||||
|
orte_plm_base_orted_append_basic_args(&argc, &argv,
|
||||||
|
"slurmd",
|
||||||
|
&proc_vpid_index,
|
||||||
|
false);
|
||||||
|
|
||||||
|
/* tell the new daemons the base of the name list so they can compute
|
||||||
|
* their own name on the other end
|
||||||
|
*/
|
||||||
|
rc = orte_util_convert_vpid_to_string(&name_string, map->daemon_vpid_start);
|
||||||
|
if (ORTE_SUCCESS != rc) {
|
||||||
|
opal_output(0, "plm_slurmd: unable to get daemon vpid as string");
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
free(argv[proc_vpid_index]);
|
||||||
|
argv[proc_vpid_index] = strdup(name_string);
|
||||||
|
free(name_string);
|
||||||
|
|
||||||
|
if (0 < opal_output_get_verbosity(orte_plm_globals.output)) {
|
||||||
|
param = opal_argv_join(argv, ' ');
|
||||||
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
|
"%s plm:slurmd: final top-level argv:\n\t%s",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
|
(NULL == param) ? "NULL" : param));
|
||||||
|
if (NULL != param) free(param);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Copy the prefix-directory specified in the
|
||||||
|
corresponding app_context. If there are multiple,
|
||||||
|
different prefix's in the app context, complain (i.e., only
|
||||||
|
allow one --prefix option for the entire slurmd run -- we
|
||||||
|
don't support different --prefix'es for different nodes in
|
||||||
|
the SLURM plm) */
|
||||||
|
cur_prefix = NULL;
|
||||||
|
for (n=0; n < jdata->num_apps; n++) {
|
||||||
|
char * app_prefix_dir = apps[n]->prefix_dir;
|
||||||
|
/* Check for already set cur_prefix_dir -- if different,
|
||||||
|
complain */
|
||||||
|
if (NULL != app_prefix_dir) {
|
||||||
|
if (NULL != cur_prefix &&
|
||||||
|
0 != strcmp (cur_prefix, app_prefix_dir)) {
|
||||||
|
orte_show_help("help-plm-slurmd.txt", "multiple-prefixes",
|
||||||
|
true, cur_prefix, app_prefix_dir);
|
||||||
|
return ORTE_ERR_FATAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If not yet set, copy it; iff set, then it's the
|
||||||
|
same anyway */
|
||||||
|
if (NULL == cur_prefix) {
|
||||||
|
cur_prefix = strdup(app_prefix_dir);
|
||||||
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
|
"%s plm:slurmd: Set prefix:%s",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
|
cur_prefix));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* set the job state to indicate we attempted to launch */
|
||||||
|
job_state = ORTE_JOB_STATE_FAILED_TO_START;
|
||||||
|
|
||||||
|
/* setup environment */
|
||||||
|
env = opal_argv_copy(orte_launch_environ);
|
||||||
|
|
||||||
|
/* add the nodelist */
|
||||||
|
var = mca_base_param_environ_variable("orte", "slurmd", "nodelist");
|
||||||
|
opal_setenv(var, nodelist_flat, true, &env);
|
||||||
|
free(nodelist_flat);
|
||||||
|
free(var);
|
||||||
|
|
||||||
|
/* exec the daemon(s) */
|
||||||
|
if (ORTE_SUCCESS != (rc = plm_slurmd_start_proc(argc, argv, env, cur_prefix))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* do NOT wait for srun to complete. Srun only completes when the processes
|
||||||
|
* it starts - in this case, the orteds - complete. Instead, we'll catch
|
||||||
|
* any srun failures and deal with them elsewhere
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* wait for daemons to callback */
|
||||||
|
if (ORTE_SUCCESS != (rc = orte_plm_base_daemon_callback(map->num_new_daemons))) {
|
||||||
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
|
"%s plm:slurmd: daemon launch failed for job %s on error %s",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
|
ORTE_JOBID_PRINT(active_job), ORTE_ERROR_NAME(rc)));
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
launch_apps:
|
||||||
|
/* get here if daemons launch okay, or no daemons need to be launched - any
|
||||||
|
* failures now are from launching apps
|
||||||
|
*/
|
||||||
|
launching_daemons = false;
|
||||||
|
failed_job = active_job;
|
||||||
|
if (ORTE_SUCCESS != (rc = orte_plm_base_launch_apps(active_job))) {
|
||||||
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
|
"%s plm:slurmd: launch of apps failed for job %s on error %s",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
|
ORTE_JOBID_PRINT(active_job), ORTE_ERROR_NAME(rc)));
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* declare the launch a success */
|
||||||
|
failed_launch = false;
|
||||||
|
|
||||||
|
if (orte_timing) {
|
||||||
|
if (0 != gettimeofday(&launchstop, NULL)) {
|
||||||
|
opal_output(0, "plm_slurmd: could not obtain stop time");
|
||||||
|
} else {
|
||||||
|
opal_output(0, "plm_slurmd: total job launch time is %ld usec",
|
||||||
|
(launchstop.tv_sec - launchstart.tv_sec)*1000000 +
|
||||||
|
(launchstop.tv_usec - launchstart.tv_usec));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ORTE_SUCCESS != rc) {
|
||||||
|
opal_output(0, "plm:slurmd: start_procs returned error %d", rc);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup:
|
||||||
|
if (NULL != argv) {
|
||||||
|
opal_argv_free(argv);
|
||||||
|
}
|
||||||
|
if (NULL != env) {
|
||||||
|
opal_argv_free(env);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(NULL != jobid_string) {
|
||||||
|
free(jobid_string);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check for failed launch - if so, force terminate */
|
||||||
|
if (failed_launch) {
|
||||||
|
orte_plm_base_launch_failed(failed_job, -1, ORTE_ERROR_DEFAULT_EXIT_CODE, job_state);
|
||||||
|
}
|
||||||
|
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int plm_slurmd_terminate_job(orte_jobid_t jobid)
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
/* order them to kill their local procs for this job */
|
||||||
|
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_kill_local_procs(jobid))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
}
|
||||||
|
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Terminate the orteds for a given job
|
||||||
|
*/
|
||||||
|
static int plm_slurmd_terminate_orteds(void)
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
/* tell them to die without sending a reply - we will rely on the
|
||||||
|
* waitpid to tell us when they have exited!
|
||||||
|
*/
|
||||||
|
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_NO_REPLY_CMD))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
}
|
||||||
|
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Signal all the processes in the child srun by sending the signal directly to it
|
||||||
|
*/
|
||||||
|
static int plm_slurmd_signal_job(orte_jobid_t jobid, int32_t signal)
|
||||||
|
{
|
||||||
|
int rc = ORTE_SUCCESS;
|
||||||
|
|
||||||
|
/* order them to pass this signal to their local procs */
|
||||||
|
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_signal_local_procs(jobid, signal))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
}
|
||||||
|
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int plm_slurmd_finalize(void)
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
/* cleanup any pending recvs */
|
||||||
|
if (ORTE_SUCCESS != (rc = orte_plm_base_comm_stop())) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ORTE_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void srun_wait_cb(pid_t pid, int status, void* cbdata){
|
||||||
|
orte_job_t *jdata;
|
||||||
|
|
||||||
|
/* According to the SLURM folks, srun always returns the highest exit
|
||||||
|
code of our remote processes. Thus, a non-zero exit status doesn't
|
||||||
|
necessarily mean that srun failed - it could be that an orted returned
|
||||||
|
a non-zero exit status. Of course, that means the orted failed(!), so
|
||||||
|
the end result is the same - the job didn't start.
|
||||||
|
|
||||||
|
As a result, we really can't do much with the exit status itself - it
|
||||||
|
could be something in errno (if srun itself failed), or it could be
|
||||||
|
something returned by an orted, or it could be something returned by
|
||||||
|
the OS (e.g., couldn't find the orted binary). Somebody is welcome
|
||||||
|
to sort out all the options and pretty-print a better error message. For
|
||||||
|
now, though, the only thing that really matters is that
|
||||||
|
srun failed. Report the error and make sure that orterun
|
||||||
|
wakes up - otherwise, do nothing!
|
||||||
|
|
||||||
|
Unfortunately, the pid returned here is the srun pid, not the pid of
|
||||||
|
the proc that actually died! So, to avoid confusion, just use -1 as the
|
||||||
|
pid so nobody thinks this is real
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* if we are in the launch phase, then any termination is bad */
|
||||||
|
if (launching_daemons) {
|
||||||
|
/* report that one or more daemons failed to launch so we can exit */
|
||||||
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
|
"%s plm:slurmd: daemon failed during launch",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
|
orte_plm_base_launch_failed(ORTE_PROC_MY_NAME->jobid, -1, status, ORTE_JOB_STATE_FAILED_TO_START);
|
||||||
|
} else {
|
||||||
|
/* if this is after launch, then we need to abort only if the status
|
||||||
|
* returned is non-zero - i.e., if the orteds exited with an error
|
||||||
|
*/
|
||||||
|
if (0 != status) {
|
||||||
|
/* an orted must have died unexpectedly after launch - report
|
||||||
|
* that the daemon has failed so we exit
|
||||||
|
*/
|
||||||
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
|
"%s plm:slurmd: daemon failed while running",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
|
orte_plm_base_launch_failed(ORTE_PROC_MY_NAME->jobid, -1, status, ORTE_JOB_STATE_ABORTED);
|
||||||
|
}
|
||||||
|
/* otherwise, check to see if this is the primary pid */
|
||||||
|
if (primary_srun_pid == pid) {
|
||||||
|
/* in this case, we just want to fire the proper trigger so
|
||||||
|
* mpirun can exit
|
||||||
|
*/
|
||||||
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
|
"%s plm:slurmd: primary daemons complete!",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
|
jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
|
||||||
|
jdata->state = ORTE_JOB_STATE_TERMINATED;
|
||||||
|
/* need to set the #terminated value to avoid an incorrect error msg */
|
||||||
|
jdata->num_terminated = jdata->num_procs;
|
||||||
|
orte_trigger_event(&orteds_exit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int plm_slurmd_start_proc(int argc, char **argv, char **env,
|
||||||
|
char *prefix)
|
||||||
|
{
|
||||||
|
int fd;
|
||||||
|
int srun_pid;
|
||||||
|
char *exec_argv = opal_path_findv(argv[0], 0, env, NULL);
|
||||||
|
|
||||||
|
if (NULL == exec_argv) {
|
||||||
|
return ORTE_ERR_NOT_FOUND;
|
||||||
|
}
|
||||||
|
|
||||||
|
srun_pid = fork();
|
||||||
|
if (-1 == srun_pid) {
|
||||||
|
ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_CHILDREN);
|
||||||
|
free(exec_argv);
|
||||||
|
return ORTE_ERR_SYS_LIMITS_CHILDREN;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 == srun_pid) { /* child */
|
||||||
|
char *bin_base = NULL, *lib_base = NULL;
|
||||||
|
|
||||||
|
/* Figure out the basenames for the libdir and bindir. There
|
||||||
|
is a lengthy comment about this in plm_rsh_module.c
|
||||||
|
explaining all the rationale for how / why we're doing
|
||||||
|
this. */
|
||||||
|
|
||||||
|
lib_base = opal_basename(opal_install_dirs.libdir);
|
||||||
|
bin_base = opal_basename(opal_install_dirs.bindir);
|
||||||
|
|
||||||
|
/* If we have a prefix, then modify the PATH and
|
||||||
|
LD_LIBRARY_PATH environment variables. */
|
||||||
|
if (NULL != prefix) {
|
||||||
|
char *oldenv, *newenv;
|
||||||
|
|
||||||
|
/* Reset PATH */
|
||||||
|
oldenv = getenv("PATH");
|
||||||
|
if (NULL != oldenv) {
|
||||||
|
asprintf(&newenv, "%s/%s:%s", prefix, bin_base, oldenv);
|
||||||
|
} else {
|
||||||
|
asprintf(&newenv, "%s/%s", prefix, bin_base);
|
||||||
|
}
|
||||||
|
opal_setenv("PATH", newenv, true, &env);
|
||||||
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
|
"%s plm:slurmd: reset PATH: %s",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
|
newenv));
|
||||||
|
free(newenv);
|
||||||
|
|
||||||
|
/* Reset LD_LIBRARY_PATH */
|
||||||
|
oldenv = getenv("LD_LIBRARY_PATH");
|
||||||
|
if (NULL != oldenv) {
|
||||||
|
asprintf(&newenv, "%s/%s:%s", prefix, lib_base, oldenv);
|
||||||
|
} else {
|
||||||
|
asprintf(&newenv, "%s/%s", prefix, lib_base);
|
||||||
|
}
|
||||||
|
opal_setenv("LD_LIBRARY_PATH", newenv, true, &env);
|
||||||
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
|
"%s plm:slurmd: reset LD_LIBRARY_PATH: %s",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
|
newenv));
|
||||||
|
free(newenv);
|
||||||
|
}
|
||||||
|
|
||||||
|
fd = open("/dev/null", O_CREAT|O_WRONLY|O_TRUNC, 0666);
|
||||||
|
if(fd > 0) {
|
||||||
|
dup2(fd, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* When not in debug mode and --debug-daemons was not passed,
|
||||||
|
* tie stdout/stderr to dev null so we don't see messages from orted
|
||||||
|
* EXCEPT if the user has requested that we leave sessions attached
|
||||||
|
*/
|
||||||
|
if (0 >= opal_output_get_verbosity(orte_plm_globals.output) &&
|
||||||
|
!orte_debug_daemons_flag && !orte_leave_session_attached) {
|
||||||
|
if (fd >= 0) {
|
||||||
|
if (fd != 1) {
|
||||||
|
dup2(fd,1);
|
||||||
|
}
|
||||||
|
if (fd != 2) {
|
||||||
|
dup2(fd,2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fd > 2) {
|
||||||
|
close(fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* get the srun process out of orterun's process group so that
|
||||||
|
signals sent from the shell (like those resulting from
|
||||||
|
cntl-c) don't get sent to srun */
|
||||||
|
setpgid(0, 0);
|
||||||
|
|
||||||
|
execve(exec_argv, argv, env);
|
||||||
|
|
||||||
|
opal_output(0, "plm:slurmd:start_proc: exec failed");
|
||||||
|
/* don't return - need to exit - returning would be bad -
|
||||||
|
we're not in the calling process anymore */
|
||||||
|
exit(1);
|
||||||
|
} else { /* parent */
|
||||||
|
/* just in case, make sure that the srun process is not in our
|
||||||
|
process group any more. Stevens says always do this on both
|
||||||
|
sides of the fork... */
|
||||||
|
setpgid(srun_pid, srun_pid);
|
||||||
|
|
||||||
|
/* setup the waitpid so we can find out if srun succeeds! */
|
||||||
|
orte_wait_cb(srun_pid, srun_wait_cb, NULL);
|
||||||
|
free(exec_argv);
|
||||||
|
|
||||||
|
/* if this is the primary launch - i.e., not a comm_spawn of a
|
||||||
|
* child job - then save the pid
|
||||||
|
*/
|
||||||
|
if (!primary_pid_set) {
|
||||||
|
primary_srun_pid = srun_pid;
|
||||||
|
primary_pid_set = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ORTE_SUCCESS;
|
||||||
|
}
|
@ -85,19 +85,13 @@ static int plm_tm_signal_job(orte_jobid_t jobid, int32_t signal);
|
|||||||
static int plm_tm_finalize(void);
|
static int plm_tm_finalize(void);
|
||||||
|
|
||||||
static int plm_tm_connect(void);
|
static int plm_tm_connect(void);
|
||||||
|
static int plm_tm_disconnect(void);
|
||||||
static void failed_start(int fd, short event, void *arg);
|
static void failed_start(int fd, short event, void *arg);
|
||||||
static int obit_submit(int tid);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Local "global" variables
|
* Local "global" variables
|
||||||
*/
|
*/
|
||||||
static opal_event_t *ev=NULL;
|
static opal_event_t *ev=NULL;
|
||||||
static bool connected;
|
|
||||||
static tm_event_t *events_spawn = NULL;
|
|
||||||
static tm_event_t *events_obit = NULL;
|
|
||||||
static tm_task_id *tm_task_ids = NULL;
|
|
||||||
static int *evs = NULL;
|
|
||||||
static bool time_is_up;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Global variable
|
* Global variable
|
||||||
@ -113,20 +107,6 @@ orte_plm_base_module_t orte_plm_tm_module = {
|
|||||||
plm_tm_finalize
|
plm_tm_finalize
|
||||||
};
|
};
|
||||||
|
|
||||||
/* catch timeout to allow cmds to progress */
|
|
||||||
static void timer_cb(int fd, short event, void *cbdata)
|
|
||||||
{
|
|
||||||
opal_event_t *ev = (opal_event_t*)cbdata;
|
|
||||||
|
|
||||||
/* free event */
|
|
||||||
if (NULL != ev) {
|
|
||||||
free(ev);
|
|
||||||
}
|
|
||||||
/* declare time is up */
|
|
||||||
time_is_up = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Init the module
|
* Init the module
|
||||||
*/
|
*/
|
||||||
@ -147,7 +127,6 @@ static int plm_tm_init(void)
|
|||||||
*/
|
*/
|
||||||
static int plm_tm_launch_job(orte_job_t *jdata)
|
static int plm_tm_launch_job(orte_job_t *jdata)
|
||||||
{
|
{
|
||||||
orte_job_t *jdatorted;
|
|
||||||
orte_job_map_t *map = NULL;
|
orte_job_map_t *map = NULL;
|
||||||
orte_app_context_t **apps;
|
orte_app_context_t **apps;
|
||||||
orte_node_t **nodes;
|
orte_node_t **nodes;
|
||||||
@ -158,23 +137,20 @@ static int plm_tm_launch_job(orte_job_t *jdata)
|
|||||||
char **argv = NULL;
|
char **argv = NULL;
|
||||||
int argc = 0;
|
int argc = 0;
|
||||||
int rc;
|
int rc;
|
||||||
|
bool connected = false;
|
||||||
orte_std_cntr_t launched = 0, i;
|
orte_std_cntr_t launched = 0, i;
|
||||||
char *bin_base = NULL, *lib_base = NULL;
|
char *bin_base = NULL, *lib_base = NULL;
|
||||||
|
tm_event_t *tm_events = NULL;
|
||||||
|
tm_task_id *tm_task_ids = NULL;
|
||||||
int local_err;
|
int local_err;
|
||||||
|
tm_event_t event;
|
||||||
bool failed_launch = true;
|
bool failed_launch = true;
|
||||||
mode_t current_umask;
|
mode_t current_umask;
|
||||||
orte_jobid_t failed_job;
|
orte_jobid_t failed_job;
|
||||||
orte_job_state_t job_state = ORTE_JOB_NEVER_LAUNCHED;
|
|
||||||
int offset;
|
|
||||||
tm_event_t eventpolled;
|
|
||||||
orte_std_cntr_t num_daemons;
|
|
||||||
opal_event_t *timerev;
|
|
||||||
int j;
|
|
||||||
|
|
||||||
/* default to declaring the daemons as failed */
|
/* default to declaring the daemons as failed */
|
||||||
failed_job = ORTE_PROC_MY_NAME->jobid;
|
failed_job = ORTE_PROC_MY_NAME->jobid;
|
||||||
connected = false;
|
|
||||||
|
|
||||||
/* create a jobid for this job */
|
/* create a jobid for this job */
|
||||||
if (ORTE_SUCCESS != (rc = orte_plm_base_create_jobid(&jdata->jobid))) {
|
if (ORTE_SUCCESS != (rc = orte_plm_base_create_jobid(&jdata->jobid))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
@ -206,107 +182,20 @@ static int plm_tm_launch_job(orte_job_t *jdata)
|
|||||||
goto launch_apps;
|
goto launch_apps;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* lookup the daemon job object - must do this -after- the job is
|
/* Allocate a bunch of TM events to use for tm_spawn()ing */
|
||||||
* setup so the number of required daemons has been updated
|
tm_events = malloc(sizeof(tm_event_t) * map->num_new_daemons);
|
||||||
*/
|
if (NULL == tm_events) {
|
||||||
if (NULL == (jdatorted = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
ORTE_ERROR_LOG(rc);
|
||||||
rc = ORTE_ERR_NOT_FOUND;
|
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
num_daemons = jdatorted->num_procs - 1; /* do not include myself as I am already here! */
|
tm_task_ids = malloc(sizeof(tm_task_id) * map->num_new_daemons);
|
||||||
if (0 >= num_daemons) {
|
|
||||||
/* this won't work */
|
|
||||||
rc = ORTE_ERR_BAD_PARAM;
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Allocate a bunch of TM events to use */
|
|
||||||
if (NULL == events_spawn) {
|
|
||||||
/* spawn events for first launch */
|
|
||||||
events_spawn = (tm_event_t*)malloc(num_daemons * sizeof(tm_event_t));
|
|
||||||
if (NULL == events_spawn) {
|
|
||||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
/* comm_spawn launch */
|
|
||||||
events_spawn = (tm_event_t*)realloc(events_spawn, sizeof(tm_event_t) * num_daemons);
|
|
||||||
if (NULL == events_spawn) {
|
|
||||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
if (NULL == events_obit) {
|
|
||||||
/* obit events for first launch */
|
|
||||||
events_obit = (tm_event_t*)malloc(num_daemons * sizeof(tm_event_t));
|
|
||||||
if (NULL == events_obit) {
|
|
||||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
/* comm_spawn launch */
|
|
||||||
events_obit = (tm_event_t*)realloc(events_obit, sizeof(tm_event_t) * num_daemons);
|
|
||||||
if (NULL == events_obit) {
|
|
||||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
if (NULL == evs) {
|
|
||||||
/* evs for first launch */
|
|
||||||
evs = (int*)malloc(num_daemons * sizeof(tm_event_t));
|
|
||||||
if (NULL == evs) {
|
|
||||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
/* comm_spawn launch */
|
|
||||||
evs = (int*)realloc(evs, sizeof(int) * num_daemons);
|
|
||||||
if (NULL == evs) {
|
|
||||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/* allocate task ids for the orteds */
|
|
||||||
if (NULL == tm_task_ids) {
|
if (NULL == tm_task_ids) {
|
||||||
/* first launch */
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
tm_task_ids = (tm_task_id*)malloc(num_daemons * sizeof(tm_task_id));
|
ORTE_ERROR_LOG(rc);
|
||||||
if (NULL == tm_task_ids) {
|
goto cleanup;
|
||||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
/* comm_spawn launch */
|
|
||||||
tm_task_ids = (tm_task_id*)realloc(tm_task_ids, sizeof(tm_task_id) * num_daemons);
|
|
||||||
if (NULL == tm_task_ids) {
|
|
||||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* compute the offset into the event/task arrays */
|
|
||||||
offset = num_daemons - map->num_new_daemons;
|
|
||||||
|
|
||||||
/* initialize them */
|
|
||||||
for (i=0; i < map->num_new_daemons; i++) {
|
|
||||||
*(tm_task_ids + offset + i) = TM_NULL_TASK;
|
|
||||||
*(events_spawn + offset + i) = TM_NULL_EVENT;
|
|
||||||
*(events_obit + offset + i) = TM_NULL_EVENT;
|
|
||||||
*(evs + offset + i) = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* add the daemon command (as specified by user) */
|
/* add the daemon command (as specified by user) */
|
||||||
orte_plm_base_setup_orted_cmd(&argc, &argv);
|
orte_plm_base_setup_orted_cmd(&argc, &argv);
|
||||||
|
|
||||||
@ -382,9 +271,6 @@ static int plm_tm_launch_job(orte_job_t *jdata)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* set the job state to indicate we attempted to launch */
|
|
||||||
job_state = ORTE_JOB_STATE_FAILED_TO_START;
|
|
||||||
|
|
||||||
/* Iterate through each of the nodes and spin
|
/* Iterate through each of the nodes and spin
|
||||||
* up a daemon.
|
* up a daemon.
|
||||||
*/
|
*/
|
||||||
@ -406,7 +292,7 @@ static int plm_tm_launch_job(orte_job_t *jdata)
|
|||||||
rc = orte_util_convert_vpid_to_string(&vpid_string, nodes[i]->daemon->name.vpid);
|
rc = orte_util_convert_vpid_to_string(&vpid_string, nodes[i]->daemon->name.vpid);
|
||||||
if (ORTE_SUCCESS != rc) {
|
if (ORTE_SUCCESS != rc) {
|
||||||
opal_output(0, "plm:tm: unable to get daemon vpid as string");
|
opal_output(0, "plm:tm: unable to get daemon vpid as string");
|
||||||
goto cleanup;
|
exit(-1);
|
||||||
}
|
}
|
||||||
free(argv[proc_vpid_index]);
|
free(argv[proc_vpid_index]);
|
||||||
argv[proc_vpid_index] = strdup(vpid_string);
|
argv[proc_vpid_index] = strdup(vpid_string);
|
||||||
@ -422,7 +308,7 @@ static int plm_tm_launch_job(orte_job_t *jdata)
|
|||||||
if (NULL != param) free(param);
|
if (NULL != param) free(param);
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = tm_spawn(argc, argv, env, node->launch_id, tm_task_ids + offset + launched, events_spawn + offset + launched);
|
rc = tm_spawn(argc, argv, env, node->launch_id, tm_task_ids + launched, tm_events + launched);
|
||||||
if (TM_SUCCESS != rc) {
|
if (TM_SUCCESS != rc) {
|
||||||
orte_show_help("help-plm-tm.txt", "tm-spawn-failed",
|
orte_show_help("help-plm-tm.txt", "tm-spawn-failed",
|
||||||
true, argv[0], node->name, node->launch_id);
|
true, argv[0], node->name, node->launch_id);
|
||||||
@ -440,54 +326,14 @@ static int plm_tm_launch_job(orte_job_t *jdata)
|
|||||||
"%s plm:tm:launch: finished spawning orteds",
|
"%s plm:tm:launch: finished spawning orteds",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
|
|
||||||
/* setup a timer to give the cmd a chance to be sent */
|
|
||||||
time_is_up = false;
|
|
||||||
ORTE_DETECT_TIMEOUT(&timerev, launched,
|
|
||||||
100, -1, timer_cb);
|
|
||||||
|
|
||||||
ORTE_PROGRESSED_WAIT(time_is_up, 0, 1);
|
|
||||||
|
|
||||||
/* TM poll for all the spawns */
|
/* TM poll for all the spawns */
|
||||||
while (0 < launched) {
|
for (i = 0; i < launched; ++i) {
|
||||||
rc = tm_poll(TM_NULL_EVENT, &eventpolled, (int)false, &local_err);
|
rc = tm_poll(TM_NULL_EVENT, &event, 1, &local_err);
|
||||||
if (TM_SUCCESS != rc) {
|
if (TM_SUCCESS != rc) {
|
||||||
opal_output(0, "plm:tm: event poll for spawned daemon failed, return status = %d", rc);
|
errno = local_err;
|
||||||
rc = ORTE_ERROR;
|
opal_output(0, "plm:tm: failed to poll for a spawned daemon, return status = %d", rc);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
/* if we get back the NULL event, then just continue */
|
|
||||||
if (eventpolled == TM_NULL_EVENT) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
/* look for the spawned event */
|
|
||||||
for (j=0; j < map->num_new_daemons; j++) {
|
|
||||||
if (eventpolled == *(events_spawn + offset + j)) {
|
|
||||||
/* got the event - check returned code */
|
|
||||||
if (local_err) {
|
|
||||||
/* this orted failed to launch! */
|
|
||||||
orte_show_help("help-plm-tm.txt", "tm-spawn-failed",
|
|
||||||
true, argv[0], nodes[j]->name, nodes[j]->launch_id);
|
|
||||||
rc = ORTE_ERROR;
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
/* register the corresponding obit so we can detect when this
|
|
||||||
* orted terminates
|
|
||||||
*/
|
|
||||||
if (ORTE_SUCCESS != (rc = obit_submit(offset+j))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
/* all done with this event */
|
|
||||||
goto MOVEON;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* if we get here, then we failed to find the event */
|
|
||||||
opal_output(0, "TM FAILED TO FIND SPAWN EVENT WHEN LAUNCHING");
|
|
||||||
rc = ORTE_ERROR;
|
|
||||||
goto cleanup;
|
|
||||||
|
|
||||||
MOVEON:
|
|
||||||
launched--;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* set a timer to tell us if one or more daemon's fails to start - use the
|
/* set a timer to tell us if one or more daemon's fails to start - use the
|
||||||
@ -542,6 +388,16 @@ launch_apps:
|
|||||||
opal_argv_free(env);
|
opal_argv_free(env);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (connected) {
|
||||||
|
plm_tm_disconnect();
|
||||||
|
}
|
||||||
|
if (NULL != tm_events) {
|
||||||
|
free(tm_events);
|
||||||
|
}
|
||||||
|
if (NULL != tm_task_ids) {
|
||||||
|
free(tm_task_ids);
|
||||||
|
}
|
||||||
|
|
||||||
if (NULL != lib_base) {
|
if (NULL != lib_base) {
|
||||||
free(lib_base);
|
free(lib_base);
|
||||||
}
|
}
|
||||||
@ -551,7 +407,7 @@ launch_apps:
|
|||||||
|
|
||||||
/* check for failed launch - if so, force terminate */
|
/* check for failed launch - if so, force terminate */
|
||||||
if (failed_launch) {
|
if (failed_launch) {
|
||||||
orte_plm_base_launch_failed(failed_job, -1, ORTE_ERROR_DEFAULT_EXIT_CODE, job_state);
|
orte_plm_base_launch_failed(failed_job, -1, ORTE_ERROR_DEFAULT_EXIT_CODE, ORTE_JOB_STATE_FAILED_TO_START);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* setup a "heartbeat" timer to periodically check on
|
/* setup a "heartbeat" timer to periodically check on
|
||||||
@ -582,14 +438,6 @@ static int plm_tm_terminate_job(orte_jobid_t jobid)
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* quick timeout loop */
|
|
||||||
static bool timer_fired;
|
|
||||||
|
|
||||||
static void quicktime_cb(int fd, short event, void *cbdata)
|
|
||||||
{
|
|
||||||
/* declare it fired */
|
|
||||||
timer_fired = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Terminate the orteds for a given job
|
* Terminate the orteds for a given job
|
||||||
@ -597,143 +445,12 @@ static void quicktime_cb(int fd, short event, void *cbdata)
|
|||||||
int plm_tm_terminate_orteds(void)
|
int plm_tm_terminate_orteds(void)
|
||||||
{
|
{
|
||||||
int rc;
|
int rc;
|
||||||
orte_job_t *jdata;
|
|
||||||
orte_proc_t **daemons;
|
|
||||||
tm_event_t eventpolled;
|
|
||||||
orte_vpid_t j, alive;
|
|
||||||
int local_err;
|
|
||||||
opal_event_t *timerev=NULL;
|
|
||||||
opal_event_t *quicktime=NULL;
|
|
||||||
struct timeval quicktimeval;
|
|
||||||
bool aborted;
|
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
|
||||||
"%s plm:tm: terminating orteds",
|
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
|
||||||
|
|
||||||
/* lookup the daemon job object */
|
|
||||||
if (NULL == (jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
|
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
|
||||||
}
|
|
||||||
alive = jdata->num_procs - 1; /* do not include myself! */
|
|
||||||
daemons = (orte_proc_t**)jdata->procs->addr;
|
|
||||||
aborted = false;
|
|
||||||
|
|
||||||
/* tell them to die! */
|
/* now tell them to die! */
|
||||||
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_NO_REPLY_CMD))) {
|
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit())) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* if there are more than just me... */
|
|
||||||
if (0 < alive) {
|
|
||||||
/* setup a max time for the daemons to die */
|
|
||||||
time_is_up = false;
|
|
||||||
ORTE_DETECT_TIMEOUT(&timerev, alive,
|
|
||||||
1000000, 60000000, timer_cb);
|
|
||||||
|
|
||||||
/* give the cmds a chance to get out */
|
|
||||||
quicktimeval.tv_sec = 0;
|
|
||||||
quicktimeval.tv_usec = 100;
|
|
||||||
timer_fired = false;
|
|
||||||
ORTE_DETECT_TIMEOUT(&quicktime, alive, 1000, 10000, quicktime_cb);
|
|
||||||
ORTE_PROGRESSED_WAIT(timer_fired, 0, 1);
|
|
||||||
|
|
||||||
/* now begin polling to see if daemons have terminated */
|
|
||||||
while (!time_is_up && 0 < alive) {
|
|
||||||
OPAL_OUTPUT_VERBOSE((10, orte_plm_globals.output,
|
|
||||||
"%s plm:tm: polling for daemon termination",
|
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
|
||||||
rc = tm_poll(TM_NULL_EVENT, &eventpolled, (int)false, &local_err);
|
|
||||||
if (TM_SUCCESS != rc) {
|
|
||||||
errno = local_err;
|
|
||||||
opal_output(0, "plm:tm: event poll for daemon termination failed, return status = %d", rc);
|
|
||||||
continue; /* we will wait for timeout to tell us to quit */
|
|
||||||
}
|
|
||||||
/* if we get back the NULL event, then just continue */
|
|
||||||
if (eventpolled == TM_NULL_EVENT) {
|
|
||||||
OPAL_OUTPUT_VERBOSE((10, orte_plm_globals.output,
|
|
||||||
"%s plm:tm: got null event",
|
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
|
||||||
/* give system a little time to progress */
|
|
||||||
timer_fired = false;
|
|
||||||
opal_evtimer_add(quicktime, &quicktimeval);
|
|
||||||
ORTE_PROGRESSED_WAIT(timer_fired, 0, 1);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
/* look for the obit event */
|
|
||||||
for (j=0; j < jdata->num_procs-1; j++) {
|
|
||||||
if (eventpolled == *(events_obit + j)) {
|
|
||||||
/* got the event - check returned code */
|
|
||||||
if (local_err == TM_ESYSTEM) {
|
|
||||||
OPAL_OUTPUT_VERBOSE((10, orte_plm_globals.output,
|
|
||||||
"%s plm:tm: got TM_ESYSTEM on obit - resubmitting",
|
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
|
||||||
if (ORTE_SUCCESS != (rc = obit_submit(j))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
goto MOVEON;
|
|
||||||
}
|
|
||||||
/* give system a little time to progress */
|
|
||||||
timer_fired = false;
|
|
||||||
opal_evtimer_add(quicktime, &quicktimeval);
|
|
||||||
ORTE_PROGRESSED_WAIT(timer_fired, 0, 1);
|
|
||||||
}
|
|
||||||
if (0 != local_err) {
|
|
||||||
OPAL_OUTPUT_VERBOSE((10, orte_plm_globals.output,
|
|
||||||
"%s plm:tm: got error %d on obit for task %d",
|
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_err, j));
|
|
||||||
rc = ORTE_ERROR;
|
|
||||||
goto MOVEON;
|
|
||||||
}
|
|
||||||
/* this daemon has terminated */
|
|
||||||
*(tm_task_ids+j) = TM_NULL_TASK;
|
|
||||||
*(events_obit+j) = TM_NULL_EVENT;
|
|
||||||
OPAL_OUTPUT_VERBOSE((10, orte_plm_globals.output,
|
|
||||||
"%s plm:tm: task %d exited with status %d",
|
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j, *(evs+j)));
|
|
||||||
/* update the termination status for this daemon */
|
|
||||||
daemons[j+1]->exit_code = *(evs+j);
|
|
||||||
if (0 != daemons[j+1]->exit_code) {
|
|
||||||
daemons[j+1]->state = ORTE_PROC_STATE_ABORTED;
|
|
||||||
aborted = true;
|
|
||||||
} else {
|
|
||||||
daemons[j+1]->state = ORTE_PROC_STATE_TERMINATED;
|
|
||||||
}
|
|
||||||
jdata->num_terminated++;
|
|
||||||
/* all done with this event */
|
|
||||||
goto MOVEON;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* if we get here, then we failed to find the event */
|
|
||||||
opal_output(0, "TM FAILED TO FIND OBIT EVENT");
|
|
||||||
|
|
||||||
MOVEON:
|
|
||||||
alive--;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* release event if not already done */
|
|
||||||
if (NULL != quicktime) {
|
|
||||||
free(quicktime);
|
|
||||||
}
|
|
||||||
if (NULL != timerev) {
|
|
||||||
opal_event_del(timerev);
|
|
||||||
free(timerev);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
/* still need to give the cmds a chance to get out so I can process
|
|
||||||
* them myself!
|
|
||||||
*/
|
|
||||||
timer_fired = false;
|
|
||||||
ORTE_DETECT_TIMEOUT(&quicktime, 1, 1000, 10000, quicktime_cb);
|
|
||||||
ORTE_PROGRESSED_WAIT(timer_fired, 0, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* declare the daemons done */
|
|
||||||
if (aborted || 0 < alive) {
|
|
||||||
jdata->state = ORTE_JOB_STATE_ABORTED;
|
|
||||||
} else {
|
|
||||||
jdata->state = ORTE_JOB_STATE_TERMINATED;
|
|
||||||
}
|
|
||||||
orte_trigger_event(&orteds_exit);
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -762,24 +479,6 @@ static int plm_tm_finalize(void)
|
|||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (connected) {
|
|
||||||
tm_finalize();
|
|
||||||
}
|
|
||||||
|
|
||||||
/* cleanup data arrays */
|
|
||||||
if (NULL != events_spawn) {
|
|
||||||
free(events_spawn);
|
|
||||||
}
|
|
||||||
if (NULL != events_obit) {
|
|
||||||
free(events_obit);
|
|
||||||
}
|
|
||||||
if (NULL != tm_task_ids) {
|
|
||||||
free(tm_task_ids);
|
|
||||||
}
|
|
||||||
if (NULL != evs) {
|
|
||||||
free(evs);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -810,6 +509,13 @@ static int plm_tm_connect(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int plm_tm_disconnect(void)
|
||||||
|
{
|
||||||
|
tm_finalize();
|
||||||
|
|
||||||
|
return ORTE_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
/* call this function if the timer fires indicating that one
|
/* call this function if the timer fires indicating that one
|
||||||
* or more daemons failed to start
|
* or more daemons failed to start
|
||||||
*/
|
*/
|
||||||
@ -830,21 +536,3 @@ static void failed_start(int fd, short dummy, void *arg)
|
|||||||
orte_plm_base_launch_failed(ORTE_PROC_MY_NAME->jobid, -1,
|
orte_plm_base_launch_failed(ORTE_PROC_MY_NAME->jobid, -1,
|
||||||
ORTE_ERROR_DEFAULT_EXIT_CODE, ORTE_JOB_STATE_FAILED_TO_START);
|
ORTE_ERROR_DEFAULT_EXIT_CODE, ORTE_JOB_STATE_FAILED_TO_START);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int obit_submit(int tid)
|
|
||||||
{
|
|
||||||
int rc;
|
|
||||||
|
|
||||||
if (TM_SUCCESS != (rc = tm_obit(*(tm_task_ids+tid), evs+tid, events_obit+tid))) {
|
|
||||||
opal_output(0, "failed to register termination notice for task %d", tid);
|
|
||||||
rc = ORTE_ERROR;
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
if (*(events_obit+tid) == TM_NULL_EVENT) {
|
|
||||||
opal_output(0, "task %d is already dead", tid);
|
|
||||||
} else if (*(events_obit+tid) == TM_ERROR_EVENT) {
|
|
||||||
opal_output(0, "Error on obit return - got error event for task %d", tid);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ORTE_SUCCESS;
|
|
||||||
}
|
|
||||||
|
@ -30,7 +30,6 @@ BEGIN_C_DECLS
|
|||||||
struct orte_plm_tmd_component_t {
|
struct orte_plm_tmd_component_t {
|
||||||
orte_plm_base_component_t super;
|
orte_plm_base_component_t super;
|
||||||
bool want_path_check;
|
bool want_path_check;
|
||||||
char *orted;
|
|
||||||
char **checked_paths;
|
char **checked_paths;
|
||||||
};
|
};
|
||||||
typedef struct orte_plm_tmd_component_t orte_plm_tmd_component_t;
|
typedef struct orte_plm_tmd_component_t orte_plm_tmd_component_t;
|
||||||
@ -41,4 +40,4 @@ extern orte_plm_base_module_t orte_plm_tmd_module;
|
|||||||
|
|
||||||
END_C_DECLS
|
END_C_DECLS
|
||||||
|
|
||||||
#endif /* ORTE_PLM_TM_EXPORT_H */
|
#endif /* ORTE_PLM_TMD_EXPORT_H */
|
||||||
|
@ -92,12 +92,8 @@ static int plm_tmd_open(void)
|
|||||||
int tmp;
|
int tmp;
|
||||||
mca_base_component_t *comp = &mca_plm_tmd_component.super.base_version;
|
mca_base_component_t *comp = &mca_plm_tmd_component.super.base_version;
|
||||||
|
|
||||||
mca_base_param_reg_string(comp, "orted",
|
|
||||||
"Command to use to start proxy orted",
|
|
||||||
false, false, "orted",
|
|
||||||
&mca_plm_tmd_component.orted);
|
|
||||||
mca_base_param_reg_int(comp, "want_path_check",
|
mca_base_param_reg_int(comp, "want_path_check",
|
||||||
"Whether the launching process should check for the plm_tmd_orted executable in the PATH before launching (the TM API does not give an idication of failure; this is a somewhat-lame workaround; non-zero values enable this check)",
|
"Whether the launching process should check for the plm_tmd_orted executable in the PATH before launching (the TM API does not give an indication of failure; this is a somewhat-lame workaround; non-zero values enable this check)",
|
||||||
false, false, (int) true, &tmp);
|
false, false, (int) true, &tmp);
|
||||||
mca_plm_tmd_component.want_path_check = OPAL_INT_TO_BOOL(tmp);
|
mca_plm_tmd_component.want_path_check = OPAL_INT_TO_BOOL(tmp);
|
||||||
|
|
||||||
@ -124,7 +120,7 @@ static int orte_plm_tmd_component_query(mca_base_module_t **module, int *priorit
|
|||||||
if (NULL != getenv("PBS_ENVIRONMENT") &&
|
if (NULL != getenv("PBS_ENVIRONMENT") &&
|
||||||
NULL != getenv("PBS_JOBID")) {
|
NULL != getenv("PBS_JOBID")) {
|
||||||
|
|
||||||
*priority = 1;
|
*priority = 2;
|
||||||
*module = (mca_base_module_t *) &orte_plm_tmd_module;
|
*module = (mca_base_module_t *) &orte_plm_tmd_module;
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -85,13 +85,19 @@ static int plm_tmd_signal_job(orte_jobid_t jobid, int32_t signal);
|
|||||||
static int plm_tmd_finalize(void);
|
static int plm_tmd_finalize(void);
|
||||||
|
|
||||||
static int plm_tmd_connect(void);
|
static int plm_tmd_connect(void);
|
||||||
static int plm_tmd_disconnect(void);
|
|
||||||
static void failed_start(int fd, short event, void *arg);
|
static void failed_start(int fd, short event, void *arg);
|
||||||
|
static int obit_submit(int tid);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Local "global" variables
|
* Local "global" variables
|
||||||
*/
|
*/
|
||||||
static opal_event_t *ev=NULL;
|
static opal_event_t *ev=NULL;
|
||||||
|
static bool connected;
|
||||||
|
static tm_event_t *events_spawn = NULL;
|
||||||
|
static tm_event_t *events_obit = NULL;
|
||||||
|
static tm_task_id *tm_task_ids = NULL;
|
||||||
|
static int *evs = NULL;
|
||||||
|
static bool time_is_up;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Global variable
|
* Global variable
|
||||||
@ -107,6 +113,20 @@ orte_plm_base_module_t orte_plm_tmd_module = {
|
|||||||
plm_tmd_finalize
|
plm_tmd_finalize
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* catch timeout to allow cmds to progress */
|
||||||
|
static void timer_cb(int fd, short event, void *cbdata)
|
||||||
|
{
|
||||||
|
opal_event_t *ev = (opal_event_t*)cbdata;
|
||||||
|
|
||||||
|
/* free event */
|
||||||
|
if (NULL != ev) {
|
||||||
|
free(ev);
|
||||||
|
}
|
||||||
|
/* declare time is up */
|
||||||
|
time_is_up = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Init the module
|
* Init the module
|
||||||
*/
|
*/
|
||||||
@ -127,6 +147,7 @@ static int plm_tmd_init(void)
|
|||||||
*/
|
*/
|
||||||
static int plm_tmd_launch_job(orte_job_t *jdata)
|
static int plm_tmd_launch_job(orte_job_t *jdata)
|
||||||
{
|
{
|
||||||
|
orte_job_t *jdatorted;
|
||||||
orte_job_map_t *map = NULL;
|
orte_job_map_t *map = NULL;
|
||||||
orte_app_context_t **apps;
|
orte_app_context_t **apps;
|
||||||
orte_node_t **nodes;
|
orte_node_t **nodes;
|
||||||
@ -135,22 +156,25 @@ static int plm_tmd_launch_job(orte_job_t *jdata)
|
|||||||
char **env = NULL;
|
char **env = NULL;
|
||||||
char *var;
|
char *var;
|
||||||
char **argv = NULL;
|
char **argv = NULL;
|
||||||
int argc;
|
int argc = 0;
|
||||||
int rc;
|
int rc;
|
||||||
bool connected = false;
|
|
||||||
orte_std_cntr_t launched = 0, i;
|
orte_std_cntr_t launched = 0, i;
|
||||||
char *bin_base = NULL, *lib_base = NULL;
|
char *bin_base = NULL, *lib_base = NULL;
|
||||||
tm_event_t *tm_events = NULL;
|
|
||||||
tm_task_id *tm_task_ids = NULL;
|
|
||||||
int local_err;
|
int local_err;
|
||||||
tm_event_t event;
|
|
||||||
bool failed_launch = true;
|
bool failed_launch = true;
|
||||||
mode_t current_umask;
|
mode_t current_umask;
|
||||||
orte_jobid_t failed_job;
|
orte_jobid_t failed_job;
|
||||||
|
orte_job_state_t job_state = ORTE_JOB_NEVER_LAUNCHED;
|
||||||
|
int offset;
|
||||||
|
tm_event_t eventpolled;
|
||||||
|
orte_std_cntr_t num_daemons;
|
||||||
|
opal_event_t *timerev;
|
||||||
|
int j;
|
||||||
|
|
||||||
/* default to declaring the daemons as failed */
|
/* default to declaring the daemons as failed */
|
||||||
failed_job = ORTE_PROC_MY_NAME->jobid;
|
failed_job = ORTE_PROC_MY_NAME->jobid;
|
||||||
|
connected = false;
|
||||||
|
|
||||||
/* create a jobid for this job */
|
/* create a jobid for this job */
|
||||||
if (ORTE_SUCCESS != (rc = orte_plm_base_create_jobid(&jdata->jobid))) {
|
if (ORTE_SUCCESS != (rc = orte_plm_base_create_jobid(&jdata->jobid))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
@ -158,7 +182,7 @@ static int plm_tmd_launch_job(orte_job_t *jdata)
|
|||||||
}
|
}
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
"%s plm:tmd: launching job %s",
|
"%s plm:tm: launching job %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
ORTE_JOBID_PRINT(jdata->jobid)));
|
ORTE_JOBID_PRINT(jdata->jobid)));
|
||||||
|
|
||||||
@ -182,23 +206,109 @@ static int plm_tmd_launch_job(orte_job_t *jdata)
|
|||||||
goto launch_apps;
|
goto launch_apps;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Allocate a bunch of TM events to use for tm_spawn()ing */
|
/* lookup the daemon job object - must do this -after- the job is
|
||||||
tm_events = malloc(sizeof(tm_event_t) * map->num_new_daemons);
|
* setup so the number of required daemons has been updated
|
||||||
if (NULL == tm_events) {
|
*/
|
||||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
if (NULL == (jdatorted = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||||
|
rc = ORTE_ERR_NOT_FOUND;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
tm_task_ids = malloc(sizeof(tm_task_id) * map->num_new_daemons);
|
num_daemons = jdatorted->num_procs - 1; /* do not include myself as I am already here! */
|
||||||
if (NULL == tm_task_ids) {
|
if (0 >= num_daemons) {
|
||||||
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
/* this won't work */
|
||||||
ORTE_ERROR_LOG(rc);
|
rc = ORTE_ERR_BAD_PARAM;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Allocate a bunch of TM events to use */
|
||||||
|
if (NULL == events_spawn) {
|
||||||
|
/* spawn events for first launch */
|
||||||
|
events_spawn = (tm_event_t*)malloc(num_daemons * sizeof(tm_event_t));
|
||||||
|
if (NULL == events_spawn) {
|
||||||
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* comm_spawn launch */
|
||||||
|
events_spawn = (tm_event_t*)realloc(events_spawn, sizeof(tm_event_t) * num_daemons);
|
||||||
|
if (NULL == events_spawn) {
|
||||||
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
if (NULL == events_obit) {
|
||||||
|
/* obit events for first launch */
|
||||||
|
events_obit = (tm_event_t*)malloc(num_daemons * sizeof(tm_event_t));
|
||||||
|
if (NULL == events_obit) {
|
||||||
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* comm_spawn launch */
|
||||||
|
events_obit = (tm_event_t*)realloc(events_obit, sizeof(tm_event_t) * num_daemons);
|
||||||
|
if (NULL == events_obit) {
|
||||||
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
if (NULL == evs) {
|
||||||
|
/* evs for first launch */
|
||||||
|
evs = (int*)malloc(num_daemons * sizeof(tm_event_t));
|
||||||
|
if (NULL == evs) {
|
||||||
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* comm_spawn launch */
|
||||||
|
evs = (int*)realloc(evs, sizeof(int) * num_daemons);
|
||||||
|
if (NULL == evs) {
|
||||||
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/* allocate task ids for the orteds */
|
||||||
|
if (NULL == tm_task_ids) {
|
||||||
|
/* first launch */
|
||||||
|
tm_task_ids = (tm_task_id*)malloc(num_daemons * sizeof(tm_task_id));
|
||||||
|
if (NULL == tm_task_ids) {
|
||||||
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* comm_spawn launch */
|
||||||
|
tm_task_ids = (tm_task_id*)realloc(tm_task_ids, sizeof(tm_task_id) * num_daemons);
|
||||||
|
if (NULL == tm_task_ids) {
|
||||||
|
rc = ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* compute the offset into the event/task arrays */
|
||||||
|
offset = num_daemons - map->num_new_daemons;
|
||||||
|
|
||||||
|
/* initialize them */
|
||||||
|
for (i=0; i < map->num_new_daemons; i++) {
|
||||||
|
*(tm_task_ids + offset + i) = TM_NULL_TASK;
|
||||||
|
*(events_spawn + offset + i) = TM_NULL_EVENT;
|
||||||
|
*(events_obit + offset + i) = TM_NULL_EVENT;
|
||||||
|
*(evs + offset + i) = 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* add the daemon command (as specified by user) */
|
/* add the daemon command (as specified by user) */
|
||||||
argv = opal_argv_split(mca_plm_tmd_component.orted, ' ');
|
orte_plm_base_setup_orted_cmd(&argc, &argv);
|
||||||
argc = opal_argv_count(argv);
|
|
||||||
|
|
||||||
/* Add basic orted command line options */
|
/* Add basic orted command line options */
|
||||||
orte_plm_base_orted_append_basic_args(&argc, &argv, "env",
|
orte_plm_base_orted_append_basic_args(&argc, &argv, "env",
|
||||||
@ -208,7 +318,7 @@ static int plm_tmd_launch_job(orte_job_t *jdata)
|
|||||||
if (0 < opal_output_get_verbosity(orte_plm_globals.output)) {
|
if (0 < opal_output_get_verbosity(orte_plm_globals.output)) {
|
||||||
param = opal_argv_join(argv, ' ');
|
param = opal_argv_join(argv, ' ');
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
"%s plm:tmd: final top-level argv:\n\t%s",
|
"%s plm:tm: final top-level argv:\n\t%s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
(NULL == param) ? "NULL" : param));
|
(NULL == param) ? "NULL" : param));
|
||||||
if (NULL != param) free(param);
|
if (NULL != param) free(param);
|
||||||
@ -251,7 +361,7 @@ static int plm_tmd_launch_job(orte_job_t *jdata)
|
|||||||
asprintf(&newenv, "%s/%s:%s",
|
asprintf(&newenv, "%s/%s:%s",
|
||||||
apps[0]->prefix_dir, bin_base, env[i] + 5);
|
apps[0]->prefix_dir, bin_base, env[i] + 5);
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
"%s plm:tmd: resetting PATH: %s",
|
"%s plm:tm: resetting PATH: %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
newenv));
|
newenv));
|
||||||
opal_setenv("PATH", newenv, true, &env);
|
opal_setenv("PATH", newenv, true, &env);
|
||||||
@ -263,7 +373,7 @@ static int plm_tmd_launch_job(orte_job_t *jdata)
|
|||||||
asprintf(&newenv, "%s/%s:%s",
|
asprintf(&newenv, "%s/%s:%s",
|
||||||
apps[0]->prefix_dir, lib_base, env[i] + 16);
|
apps[0]->prefix_dir, lib_base, env[i] + 16);
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
"%s plm:tmd: resetting LD_LIBRARY_PATH: %s",
|
"%s plm:tm: resetting LD_LIBRARY_PATH: %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
newenv));
|
newenv));
|
||||||
opal_setenv("LD_LIBRARY_PATH", newenv, true, &env);
|
opal_setenv("LD_LIBRARY_PATH", newenv, true, &env);
|
||||||
@ -272,13 +382,8 @@ static int plm_tmd_launch_job(orte_job_t *jdata)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* For this launch module, we encode all the required launch info
|
/* set the job state to indicate we attempted to launch */
|
||||||
* in the daemon's environment. This includes the nidmap for the
|
job_state = ORTE_JOB_STATE_FAILED_TO_START;
|
||||||
* daemons, as well as the app_contexts and the map of ranks vs
|
|
||||||
* nodes
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* encode the nidmap */
|
|
||||||
|
|
||||||
/* Iterate through each of the nodes and spin
|
/* Iterate through each of the nodes and spin
|
||||||
* up a daemon.
|
* up a daemon.
|
||||||
@ -293,15 +398,15 @@ static int plm_tmd_launch_job(orte_job_t *jdata)
|
|||||||
}
|
}
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
"%s plm:tmd: launching on node %s",
|
"%s plm:tm: launching on node %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
node->name));
|
node->name));
|
||||||
|
|
||||||
/* setup process name */
|
/* setup process name */
|
||||||
rc = orte_util_convert_vpid_to_string(&vpid_string, nodes[i]->daemon->name.vpid);
|
rc = orte_util_convert_vpid_to_string(&vpid_string, nodes[i]->daemon->name.vpid);
|
||||||
if (ORTE_SUCCESS != rc) {
|
if (ORTE_SUCCESS != rc) {
|
||||||
opal_output(0, "plm:tmd: unable to get daemon vpid as string");
|
opal_output(0, "plm:tm: unable to get daemon vpid as string");
|
||||||
exit(-1);
|
goto cleanup;
|
||||||
}
|
}
|
||||||
free(argv[proc_vpid_index]);
|
free(argv[proc_vpid_index]);
|
||||||
argv[proc_vpid_index] = strdup(vpid_string);
|
argv[proc_vpid_index] = strdup(vpid_string);
|
||||||
@ -311,15 +416,15 @@ static int plm_tmd_launch_job(orte_job_t *jdata)
|
|||||||
if (0 < opal_output_get_verbosity(orte_plm_globals.output)) {
|
if (0 < opal_output_get_verbosity(orte_plm_globals.output)) {
|
||||||
param = opal_argv_join(argv, ' ');
|
param = opal_argv_join(argv, ' ');
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
"%s plm:tmd: executing:\n\t%s",
|
"%s plm:tm: executing:\n\t%s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
(NULL == param) ? "NULL" : param));
|
(NULL == param) ? "NULL" : param));
|
||||||
if (NULL != param) free(param);
|
if (NULL != param) free(param);
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = tm_spawn(argc, argv, env, node->launch_id, tm_task_ids + launched, tm_events + launched);
|
rc = tm_spawn(argc, argv, env, node->launch_id, tm_task_ids + offset + launched, events_spawn + offset + launched);
|
||||||
if (TM_SUCCESS != rc) {
|
if (TM_SUCCESS != rc) {
|
||||||
orte_show_help("help-plm-tmd.txt", "tmd-spawn-failed",
|
orte_show_help("help-plm-tm.txt", "tm-spawn-failed",
|
||||||
true, argv[0], node->name, node->launch_id);
|
true, argv[0], node->name, node->launch_id);
|
||||||
rc = ORTE_ERROR;
|
rc = ORTE_ERROR;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
@ -332,17 +437,57 @@ static int plm_tmd_launch_job(orte_job_t *jdata)
|
|||||||
}
|
}
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
"%s plm:tmd:launch: finished spawning orteds",
|
"%s plm:tm:launch: finished spawning orteds",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
|
|
||||||
|
/* setup a timer to give the cmd a chance to be sent */
|
||||||
|
time_is_up = false;
|
||||||
|
ORTE_DETECT_TIMEOUT(&timerev, launched,
|
||||||
|
100, -1, timer_cb);
|
||||||
|
|
||||||
|
ORTE_PROGRESSED_WAIT(time_is_up, 0, 1);
|
||||||
|
|
||||||
/* TM poll for all the spawns */
|
/* TM poll for all the spawns */
|
||||||
for (i = 0; i < launched; ++i) {
|
while (0 < launched) {
|
||||||
rc = tm_poll(TM_NULL_EVENT, &event, 1, &local_err);
|
rc = tm_poll(TM_NULL_EVENT, &eventpolled, (int)false, &local_err);
|
||||||
if (TM_SUCCESS != rc) {
|
if (TM_SUCCESS != rc) {
|
||||||
errno = local_err;
|
opal_output(0, "plm:tm: event poll for spawned daemon failed, return status = %d", rc);
|
||||||
opal_output(0, "plm:tmd: failed to poll for a spawned daemon, return status = %d", rc);
|
rc = ORTE_ERROR;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
/* if we get back the NULL event, then just continue */
|
||||||
|
if (eventpolled == TM_NULL_EVENT) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
/* look for the spawned event */
|
||||||
|
for (j=0; j < map->num_new_daemons; j++) {
|
||||||
|
if (eventpolled == *(events_spawn + offset + j)) {
|
||||||
|
/* got the event - check returned code */
|
||||||
|
if (local_err) {
|
||||||
|
/* this orted failed to launch! */
|
||||||
|
orte_show_help("help-plm-tm.txt", "tm-spawn-failed",
|
||||||
|
true, argv[0], nodes[j]->name, nodes[j]->launch_id);
|
||||||
|
rc = ORTE_ERROR;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
/* register the corresponding obit so we can detect when this
|
||||||
|
* orted terminates
|
||||||
|
*/
|
||||||
|
if (ORTE_SUCCESS != (rc = obit_submit(offset+j))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
/* all done with this event */
|
||||||
|
goto MOVEON;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* if we get here, then we failed to find the event */
|
||||||
|
opal_output(0, "TM FAILED TO FIND SPAWN EVENT WHEN LAUNCHING");
|
||||||
|
rc = ORTE_ERROR;
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
MOVEON:
|
||||||
|
launched--;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* set a timer to tell us if one or more daemon's fails to start - use the
|
/* set a timer to tell us if one or more daemon's fails to start - use the
|
||||||
@ -350,7 +495,7 @@ static int plm_tmd_launch_job(orte_job_t *jdata)
|
|||||||
*/
|
*/
|
||||||
if (0 < orte_startup_timeout) {
|
if (0 < orte_startup_timeout) {
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
"%s plm:tmd: setting startup timer for %d milliseconds",
|
"%s plm:tm: setting startup timer for %d milliseconds",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
orte_startup_timeout));
|
orte_startup_timeout));
|
||||||
ORTE_DETECT_TIMEOUT(&ev, map->num_new_daemons,
|
ORTE_DETECT_TIMEOUT(&ev, map->num_new_daemons,
|
||||||
@ -361,7 +506,7 @@ static int plm_tmd_launch_job(orte_job_t *jdata)
|
|||||||
/* wait for daemons to callback */
|
/* wait for daemons to callback */
|
||||||
if (ORTE_SUCCESS != (rc = orte_plm_base_daemon_callback(map->num_new_daemons))) {
|
if (ORTE_SUCCESS != (rc = orte_plm_base_daemon_callback(map->num_new_daemons))) {
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
"%s plm:tmd: daemon launch failed for job %s on error %s",
|
"%s plm:tm: daemon launch failed for job %s on error %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
ORTE_JOBID_PRINT(jdata->jobid), ORTE_ERROR_NAME(rc)));
|
ORTE_JOBID_PRINT(jdata->jobid), ORTE_ERROR_NAME(rc)));
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
@ -379,7 +524,7 @@ launch_apps:
|
|||||||
failed_job = jdata->jobid;
|
failed_job = jdata->jobid;
|
||||||
if (ORTE_SUCCESS != (rc = orte_plm_base_launch_apps(jdata->jobid))) {
|
if (ORTE_SUCCESS != (rc = orte_plm_base_launch_apps(jdata->jobid))) {
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
"%s plm:tmd: launch of apps failed for job %s on error %s",
|
"%s plm:tm: launch of apps failed for job %s on error %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
ORTE_JOBID_PRINT(jdata->jobid), ORTE_ERROR_NAME(rc)));
|
ORTE_JOBID_PRINT(jdata->jobid), ORTE_ERROR_NAME(rc)));
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
@ -397,16 +542,6 @@ launch_apps:
|
|||||||
opal_argv_free(env);
|
opal_argv_free(env);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (connected) {
|
|
||||||
plm_tmd_disconnect();
|
|
||||||
}
|
|
||||||
if (NULL != tm_events) {
|
|
||||||
free(tm_events);
|
|
||||||
}
|
|
||||||
if (NULL != tm_task_ids) {
|
|
||||||
free(tm_task_ids);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (NULL != lib_base) {
|
if (NULL != lib_base) {
|
||||||
free(lib_base);
|
free(lib_base);
|
||||||
}
|
}
|
||||||
@ -416,7 +551,7 @@ launch_apps:
|
|||||||
|
|
||||||
/* check for failed launch - if so, force terminate */
|
/* check for failed launch - if so, force terminate */
|
||||||
if (failed_launch) {
|
if (failed_launch) {
|
||||||
orte_plm_base_launch_failed(failed_job, -1, ORTE_ERROR_DEFAULT_EXIT_CODE, ORTE_JOB_STATE_FAILED_TO_START);
|
orte_plm_base_launch_failed(failed_job, -1, ORTE_ERROR_DEFAULT_EXIT_CODE, job_state);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* setup a "heartbeat" timer to periodically check on
|
/* setup a "heartbeat" timer to periodically check on
|
||||||
@ -428,7 +563,7 @@ launch_apps:
|
|||||||
}
|
}
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
"%s plm:tmd:launch: finished",
|
"%s plm:tm:launch: finished",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
@ -447,6 +582,14 @@ static int plm_tmd_terminate_job(orte_jobid_t jobid)
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* quick timeout loop */
|
||||||
|
static bool timer_fired;
|
||||||
|
|
||||||
|
static void quicktime_cb(int fd, short event, void *cbdata)
|
||||||
|
{
|
||||||
|
/* declare it fired */
|
||||||
|
timer_fired = true;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Terminate the orteds for a given job
|
* Terminate the orteds for a given job
|
||||||
@ -454,12 +597,143 @@ static int plm_tmd_terminate_job(orte_jobid_t jobid)
|
|||||||
int plm_tmd_terminate_orteds(void)
|
int plm_tmd_terminate_orteds(void)
|
||||||
{
|
{
|
||||||
int rc;
|
int rc;
|
||||||
|
orte_job_t *jdata;
|
||||||
|
orte_proc_t **daemons;
|
||||||
|
tm_event_t eventpolled;
|
||||||
|
orte_vpid_t j, alive;
|
||||||
|
int local_err;
|
||||||
|
opal_event_t *timerev=NULL;
|
||||||
|
opal_event_t *quicktime=NULL;
|
||||||
|
struct timeval quicktimeval;
|
||||||
|
bool aborted;
|
||||||
|
|
||||||
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
|
"%s plm:tm: terminating orteds",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
|
|
||||||
|
/* lookup the daemon job object */
|
||||||
|
if (NULL == (jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
|
||||||
|
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||||
|
}
|
||||||
|
alive = jdata->num_procs - 1; /* do not include myself! */
|
||||||
|
daemons = (orte_proc_t**)jdata->procs->addr;
|
||||||
|
aborted = false;
|
||||||
|
|
||||||
/* now tell them to die! */
|
/* tell them to die! */
|
||||||
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_WITH_REPLY_CMD))) {
|
if (ORTE_SUCCESS != (rc = orte_plm_base_orted_exit(ORTE_DAEMON_EXIT_NO_REPLY_CMD))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* if there are more than just me... */
|
||||||
|
if (0 < alive) {
|
||||||
|
/* setup a max time for the daemons to die */
|
||||||
|
time_is_up = false;
|
||||||
|
ORTE_DETECT_TIMEOUT(&timerev, alive,
|
||||||
|
1000000, 60000000, timer_cb);
|
||||||
|
|
||||||
|
/* give the cmds a chance to get out */
|
||||||
|
quicktimeval.tv_sec = 0;
|
||||||
|
quicktimeval.tv_usec = 100;
|
||||||
|
timer_fired = false;
|
||||||
|
ORTE_DETECT_TIMEOUT(&quicktime, alive, 1000, 10000, quicktime_cb);
|
||||||
|
ORTE_PROGRESSED_WAIT(timer_fired, 0, 1);
|
||||||
|
|
||||||
|
/* now begin polling to see if daemons have terminated */
|
||||||
|
while (!time_is_up && 0 < alive) {
|
||||||
|
OPAL_OUTPUT_VERBOSE((10, orte_plm_globals.output,
|
||||||
|
"%s plm:tm: polling for daemon termination",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
|
rc = tm_poll(TM_NULL_EVENT, &eventpolled, (int)false, &local_err);
|
||||||
|
if (TM_SUCCESS != rc) {
|
||||||
|
errno = local_err;
|
||||||
|
opal_output(0, "plm:tm: event poll for daemon termination failed, return status = %d", rc);
|
||||||
|
continue; /* we will wait for timeout to tell us to quit */
|
||||||
|
}
|
||||||
|
/* if we get back the NULL event, then just continue */
|
||||||
|
if (eventpolled == TM_NULL_EVENT) {
|
||||||
|
OPAL_OUTPUT_VERBOSE((10, orte_plm_globals.output,
|
||||||
|
"%s plm:tm: got null event",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
|
/* give system a little time to progress */
|
||||||
|
timer_fired = false;
|
||||||
|
opal_evtimer_add(quicktime, &quicktimeval);
|
||||||
|
ORTE_PROGRESSED_WAIT(timer_fired, 0, 1);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
/* look for the obit event */
|
||||||
|
for (j=0; j < jdata->num_procs-1; j++) {
|
||||||
|
if (eventpolled == *(events_obit + j)) {
|
||||||
|
/* got the event - check returned code */
|
||||||
|
if (local_err == TM_ESYSTEM) {
|
||||||
|
OPAL_OUTPUT_VERBOSE((10, orte_plm_globals.output,
|
||||||
|
"%s plm:tm: got TM_ESYSTEM on obit - resubmitting",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
|
if (ORTE_SUCCESS != (rc = obit_submit(j))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
goto MOVEON;
|
||||||
|
}
|
||||||
|
/* give system a little time to progress */
|
||||||
|
timer_fired = false;
|
||||||
|
opal_evtimer_add(quicktime, &quicktimeval);
|
||||||
|
ORTE_PROGRESSED_WAIT(timer_fired, 0, 1);
|
||||||
|
}
|
||||||
|
if (0 != local_err) {
|
||||||
|
OPAL_OUTPUT_VERBOSE((10, orte_plm_globals.output,
|
||||||
|
"%s plm:tm: got error %d on obit for task %d",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_err, j));
|
||||||
|
rc = ORTE_ERROR;
|
||||||
|
goto MOVEON;
|
||||||
|
}
|
||||||
|
/* this daemon has terminated */
|
||||||
|
*(tm_task_ids+j) = TM_NULL_TASK;
|
||||||
|
*(events_obit+j) = TM_NULL_EVENT;
|
||||||
|
OPAL_OUTPUT_VERBOSE((10, orte_plm_globals.output,
|
||||||
|
"%s plm:tm: task %d exited with status %d",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j, *(evs+j)));
|
||||||
|
/* update the termination status for this daemon */
|
||||||
|
daemons[j+1]->exit_code = *(evs+j);
|
||||||
|
if (0 != daemons[j+1]->exit_code) {
|
||||||
|
daemons[j+1]->state = ORTE_PROC_STATE_ABORTED;
|
||||||
|
aborted = true;
|
||||||
|
} else {
|
||||||
|
daemons[j+1]->state = ORTE_PROC_STATE_TERMINATED;
|
||||||
|
}
|
||||||
|
jdata->num_terminated++;
|
||||||
|
/* all done with this event */
|
||||||
|
goto MOVEON;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* if we get here, then we failed to find the event */
|
||||||
|
opal_output(0, "TM FAILED TO FIND OBIT EVENT");
|
||||||
|
|
||||||
|
MOVEON:
|
||||||
|
alive--;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* release event if not already done */
|
||||||
|
if (NULL != quicktime) {
|
||||||
|
free(quicktime);
|
||||||
|
}
|
||||||
|
if (NULL != timerev) {
|
||||||
|
opal_event_del(timerev);
|
||||||
|
free(timerev);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* still need to give the cmds a chance to get out so I can process
|
||||||
|
* them myself!
|
||||||
|
*/
|
||||||
|
timer_fired = false;
|
||||||
|
ORTE_DETECT_TIMEOUT(&quicktime, 1, 1000, 10000, quicktime_cb);
|
||||||
|
ORTE_PROGRESSED_WAIT(timer_fired, 0, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* declare the daemons done */
|
||||||
|
if (aborted || 0 < alive) {
|
||||||
|
jdata->state = ORTE_JOB_STATE_ABORTED;
|
||||||
|
} else {
|
||||||
|
jdata->state = ORTE_JOB_STATE_TERMINATED;
|
||||||
|
}
|
||||||
|
orte_trigger_event(&orteds_exit);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -488,6 +762,24 @@ static int plm_tmd_finalize(void)
|
|||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (connected) {
|
||||||
|
tm_finalize();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* cleanup data arrays */
|
||||||
|
if (NULL != events_spawn) {
|
||||||
|
free(events_spawn);
|
||||||
|
}
|
||||||
|
if (NULL != events_obit) {
|
||||||
|
free(events_obit);
|
||||||
|
}
|
||||||
|
if (NULL != tm_task_ids) {
|
||||||
|
free(tm_task_ids);
|
||||||
|
}
|
||||||
|
if (NULL != evs) {
|
||||||
|
free(evs);
|
||||||
|
}
|
||||||
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -518,26 +810,19 @@ static int plm_tmd_connect(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static int plm_tmd_disconnect(void)
|
|
||||||
{
|
|
||||||
tm_finalize();
|
|
||||||
|
|
||||||
return ORTE_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* call this function if the timer fires indicating that one
|
/* call this function if the timer fires indicating that one
|
||||||
* or more daemons failed to start
|
* or more daemons failed to start
|
||||||
*/
|
*/
|
||||||
static void failed_start(int fd, short dummy, void *arg)
|
static void failed_start(int fd, short dummy, void *arg)
|
||||||
{
|
{
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
"%s plm:tmd:failed_start",
|
"%s plm:tm:failed_start",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
|
|
||||||
/* if we are aborting, ignore this */
|
/* if we are aborting, ignore this */
|
||||||
if (orte_abnormal_term_ordered) {
|
if (orte_abnormal_term_ordered) {
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||||
"%s plm:tmd:failed_start - abnormal term in progress",
|
"%s plm:tm:failed_start - abnormal term in progress",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -545,3 +830,21 @@ static void failed_start(int fd, short dummy, void *arg)
|
|||||||
orte_plm_base_launch_failed(ORTE_PROC_MY_NAME->jobid, -1,
|
orte_plm_base_launch_failed(ORTE_PROC_MY_NAME->jobid, -1,
|
||||||
ORTE_ERROR_DEFAULT_EXIT_CODE, ORTE_JOB_STATE_FAILED_TO_START);
|
ORTE_ERROR_DEFAULT_EXIT_CODE, ORTE_JOB_STATE_FAILED_TO_START);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int obit_submit(int tid)
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
if (TM_SUCCESS != (rc = tm_obit(*(tm_task_ids+tid), evs+tid, events_obit+tid))) {
|
||||||
|
opal_output(0, "failed to register termination notice for task %d", tid);
|
||||||
|
rc = ORTE_ERROR;
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
if (*(events_obit+tid) == TM_NULL_EVENT) {
|
||||||
|
opal_output(0, "task %d is already dead", tid);
|
||||||
|
} else if (*(events_obit+tid) == TM_ERROR_EVENT) {
|
||||||
|
opal_output(0, "Error on obit return - got error event for task %d", tid);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ORTE_SUCCESS;
|
||||||
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user