1
1

Add an orted component for staged operations and rename the staged component to "staged_hnp".

This commit was SVN r27305.
Этот коммит содержится в:
Ralph Castain 2012-09-11 20:35:46 +00:00
родитель 387f657fc2
Коммит a0ffeb205a
11 изменённых файлов: 572 добавлений и 71 удалений

Просмотреть файл

@ -38,24 +38,21 @@ static int init(void);
static int finalize(void);
/******************
* ORTED module - just uses base functions after
* initializing the proc state machine. Job state
* machine is unused by ortedlication procs at this
* time.
* ORTED module
******************/
orte_state_base_module_t orte_state_orted_module = {
init,
finalize,
orte_state_base_activate_job_state,
orte_state_base_add_job_state,
orte_state_base_set_job_state_callback,
orte_state_base_set_job_state_priority,
orte_state_base_remove_job_state,
orte_state_base_activate_proc_state,
orte_state_base_add_proc_state,
orte_state_base_set_proc_state_callback,
orte_state_base_set_proc_state_priority,
orte_state_base_remove_proc_state
init,
finalize,
orte_state_base_activate_job_state,
orte_state_base_add_job_state,
orte_state_base_set_job_state_callback,
orte_state_base_set_job_state_priority,
orte_state_base_remove_job_state,
orte_state_base_activate_proc_state,
orte_state_base_add_proc_state,
orte_state_base_set_proc_state_callback,
orte_state_base_set_proc_state_priority,
orte_state_base_remove_proc_state
};
/* Local functions */
@ -73,16 +70,16 @@ static orte_state_cbfunc_t job_callbacks[] = {
};
static orte_proc_state_t proc_states[] = {
ORTE_PROC_STATE_RUNNING,
ORTE_PROC_STATE_REGISTERED,
ORTE_PROC_STATE_IOF_COMPLETE,
ORTE_PROC_STATE_WAITPID_FIRED
ORTE_PROC_STATE_RUNNING,
ORTE_PROC_STATE_REGISTERED,
ORTE_PROC_STATE_IOF_COMPLETE,
ORTE_PROC_STATE_WAITPID_FIRED
};
static orte_state_cbfunc_t proc_callbacks[] = {
track_procs,
track_procs,
track_procs,
track_procs
track_procs,
track_procs,
track_procs,
track_procs
};
/************************

37
orte/mca/state/staged_hnp/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,37 @@
#
# Copyright (c) 2012 Los Alamos National Security, LLC.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
dist_pkgdata_DATA = help-state-staged-hnp.txt
sources = \
state_staged_hnp.h \
state_staged_hnp_component.c \
state_staged_hnp.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_orte_state_staged_hnp_DSO
component_noinst =
component_install = mca_state_staged_hnp.la
else
component_noinst = libmca_state_staged_hnp.la
component_install =
endif
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_state_staged_hnp_la_SOURCES = $(sources)
mca_state_staged_hnp_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_state_staged_hnp_la_SOURCES =$(sources)
libmca_state_staged_hnp_la_LDFLAGS = -module -avoid-version

Просмотреть файл

@ -34,7 +34,7 @@
#include "orte/mca/state/state.h"
#include "orte/mca/state/base/base.h"
#include "orte/mca/state/base/state_private.h"
#include "state_staged.h"
#include "state_staged_hnp.h"
/*
* Module functions: Global
@ -45,7 +45,7 @@ static int finalize(void);
/******************
* STAGED module
******************/
orte_state_base_module_t orte_state_staged_module = {
orte_state_base_module_t orte_state_staged_hnp_module = {
init,
finalize,
orte_state_base_activate_job_state,
@ -107,7 +107,7 @@ static orte_state_cbfunc_t launch_callbacks[] = {
orte_quit
};
/* staged execution requires that we start as many
/* staged_hnp execution requires that we start as many
* procs initially as we have resources - if we have
* adequate resources, then we behave just like the
* default HNP module. If we don't, then we will have
@ -218,8 +218,8 @@ static void setup_job_complete(int fd, short args, void *cbdata)
continue;
}
if (app->num_procs <= 0) {
/* must specify -np for staged execution */
orte_show_help("help-state-staged.txt", "no-np", true);
/* must specify -np for staged_hnp execution */
orte_show_help("help-state-staged-hnp.txt", "no-np", true);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_SILENT_ABORT);
OBJ_RELEASE(caddy);
return;
@ -248,7 +248,7 @@ static void setup_job_complete(int fd, short args, void *cbdata)
}
}
/* set the job map to use the staged mapper */
/* set the job map to use the staged_hnp mapper */
if (NULL == jdata->map) {
jdata->map = OBJ_NEW(orte_job_map_t);
jdata->map->req_mapper = strdup("staged");
@ -282,7 +282,7 @@ static void cleanup_node(orte_proc_t *proc)
}
}
OPAL_OUTPUT_VERBOSE((5, orte_state_base_output,
"%s state:staged:track_procs node %s has %d slots, %d slots inuse",
"%s state:staged_hnp:track_procs node %s has %d slots, %d slots inuse",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name,
(int)node->slots, (int)node->slots_inuse));
}
@ -296,7 +296,7 @@ static void track_procs(int fd, short args, void *cbdata)
orte_proc_t *pdata;
OPAL_OUTPUT_VERBOSE((5, orte_state_base_output,
"%s state:staged:track_procs called for proc %s state %s",
"%s state:staged_hnp:track_procs called for proc %s state %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
orte_proc_state_to_str(state)));
@ -316,7 +316,7 @@ static void track_procs(int fd, short args, void *cbdata)
if (ORTE_PROC_STATE_REGISTERED == state) {
if (pdata->mpi_proc && !jdata->gang_launched) {
/* we can't support this - issue an error and abort */
orte_show_help("help-state-staged.txt", "mpi-procs-not-supported", true);
orte_show_help("help-state-staged-hnp.txt", "mpi-procs-not-supported", true);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_SILENT_ABORT);
}
/* update the proc state */

Просмотреть файл

@ -14,8 +14,8 @@
*
*/
#ifndef MCA_STATE_STAGED_EXPORT_H
#define MCA_STATE_STAGED_EXPORT_H
#ifndef MCA_STATE_STAGED_HNP_EXPORT_H
#define MCA_STATE_STAGED_HNP_EXPORT_H
#include "orte_config.h"
@ -27,10 +27,10 @@ BEGIN_C_DECLS
* Local Component structures
*/
ORTE_MODULE_DECLSPEC extern orte_state_base_component_t mca_state_staged_component;
ORTE_MODULE_DECLSPEC extern orte_state_base_component_t mca_state_staged_hnp_component;
ORTE_DECLSPEC extern orte_state_base_module_t orte_state_staged_module;
ORTE_DECLSPEC extern orte_state_base_module_t orte_state_staged_hnp_module;
END_C_DECLS
#endif /* MCA_STATE_STAGED_EXPORT_H */
#endif /* MCA_STATE_STAGED_HNP_EXPORT_H */

Просмотреть файл

@ -14,26 +14,26 @@
#include "orte/mca/state/state.h"
#include "orte/mca/state/base/base.h"
#include "state_staged.h"
#include "state_staged_hnp.h"
/*
* Public string for version number
*/
const char *orte_state_staged_component_version_string =
"ORTE STATE staged MCA component version " ORTE_VERSION;
const char *orte_state_staged_hnp_component_version_string =
"ORTE STATE staged_hnp MCA component version " ORTE_VERSION;
/*
* Local functionality
*/
static int state_staged_open(void);
static int state_staged_close(void);
static int state_staged_component_query(mca_base_module_t **module, int *priority);
static int state_staged_hnp_open(void);
static int state_staged_hnp_close(void);
static int state_staged_hnp_component_query(mca_base_module_t **module, int *priority);
/*
* Instantiate the public struct with all of our public information
* and pointer to our public functions in it
*/
orte_state_base_component_t mca_state_staged_component =
orte_state_base_component_t mca_state_staged_hnp_component =
{
/* Handle the general mca_component_t struct containing
* meta information about the component
@ -41,15 +41,15 @@ orte_state_base_component_t mca_state_staged_component =
{
ORTE_STATE_BASE_VERSION_1_0_0,
/* Component name and version */
"staged",
"staged_hnp",
ORTE_MAJOR_VERSION,
ORTE_MINOR_VERSION,
ORTE_RELEASE_VERSION,
/* Component open and close functions */
state_staged_open,
state_staged_close,
state_staged_component_query
state_staged_hnp_open,
state_staged_hnp_close,
state_staged_hnp_component_query
},
{
/* The component is checkpoint ready */
@ -59,29 +59,28 @@ orte_state_base_component_t mca_state_staged_component =
static bool select_me = false;
static int state_staged_open(void)
static int state_staged_hnp_open(void)
{
int tmp;
mca_base_component_t *c=&mca_state_staged_component.base_version;
mca_base_param_reg_int(c, "select",
"Use this component",
false, false, (int)false, &tmp);
mca_base_param_reg_int_name("state", "staged_select",
"Use this component",
false, false, (int)false, &tmp);
select_me = OPAL_INT_TO_BOOL(tmp);
return ORTE_SUCCESS;
}
static int state_staged_close(void)
static int state_staged_hnp_close(void)
{
return ORTE_SUCCESS;
}
static int state_staged_component_query(mca_base_module_t **module, int *priority)
static int state_staged_hnp_component_query(mca_base_module_t **module, int *priority)
{
if (ORTE_PROC_IS_HNP && select_me) {
*priority = 1000;
*module = (mca_base_module_t *)&orte_state_staged_module;
*module = (mca_base_module_t *)&orte_state_staged_hnp_module;
return ORTE_SUCCESS;
}

Просмотреть файл

@ -8,30 +8,28 @@
# $HEADER$
#
dist_pkgdata_DATA = help-state-staged.txt
sources = \
state_staged.h \
state_staged_component.c \
state_staged.c
state_staged_orted.h \
state_staged_orted_component.c \
state_staged_orted.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_orte_state_staged_DSO
if MCA_BUILD_orte_state_staged_orted_DSO
component_noinst =
component_install = mca_state_staged.la
component_install = mca_state_staged_orted.la
else
component_noinst = libmca_state_staged.la
component_noinst = libmca_state_staged_orted.la
component_install =
endif
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_state_staged_la_SOURCES = $(sources)
mca_state_staged_la_LDFLAGS = -module -avoid-version
mca_state_staged_orted_la_SOURCES = $(sources)
mca_state_staged_orted_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_state_staged_la_SOURCES =$(sources)
libmca_state_staged_la_LDFLAGS = -module -avoid-version
libmca_state_staged_orted_la_SOURCES =$(sources)
libmca_state_staged_orted_la_LDFLAGS = -module -avoid-version

Просмотреть файл

@ -0,0 +1,342 @@
/*
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include <sys/types.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#include "opal/util/output.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/iof/iof.h"
#include "orte/mca/rml/rml.h"
#include "orte/util/session_dir.h"
#include "orte/runtime/orte_quit.h"
#include "orte/mca/state/state.h"
#include "orte/mca/state/base/base.h"
#include "orte/mca/state/base/state_private.h"
#include "state_staged_orted.h"
/*
* Module functions: Global
*/
static int init(void);
static int finalize(void);
/******************
* STAGED_ORTED module
******************/
orte_state_base_module_t orte_state_staged_orted_module = {
init,
finalize,
orte_state_base_activate_job_state,
orte_state_base_add_job_state,
orte_state_base_set_job_state_callback,
orte_state_base_set_job_state_priority,
orte_state_base_remove_job_state,
orte_state_base_activate_proc_state,
orte_state_base_add_proc_state,
orte_state_base_set_proc_state_callback,
orte_state_base_set_proc_state_priority,
orte_state_base_remove_proc_state
};
/* Local functions */
static void track_jobs(int fd, short argc, void *cbdata);
static void track_procs(int fd, short argc, void *cbdata);
static int pack_state_update(opal_buffer_t *buf,
orte_job_t *jdata,
orte_proc_t *proc);
/* defined default state machines */
static orte_job_state_t job_states[] = {
ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE,
};
static orte_state_cbfunc_t job_callbacks[] = {
track_jobs
};
static orte_proc_state_t proc_states[] = {
ORTE_PROC_STATE_RUNNING,
ORTE_PROC_STATE_REGISTERED,
ORTE_PROC_STATE_IOF_COMPLETE,
ORTE_PROC_STATE_WAITPID_FIRED
};
static orte_state_cbfunc_t proc_callbacks[] = {
track_procs,
track_procs,
track_procs,
track_procs
};
/************************
* API Definitions
************************/
static int init(void)
{
int num_states, i, rc;
/* setup the state machine */
OBJ_CONSTRUCT(&orte_job_states, opal_list_t);
OBJ_CONSTRUCT(&orte_proc_states, opal_list_t);
num_states = sizeof(job_states) / sizeof(orte_job_state_t);
for (i=0; i < num_states; i++) {
if (ORTE_SUCCESS != (rc = orte_state.add_job_state(job_states[i],
job_callbacks[i],
ORTE_SYS_PRI))) {
ORTE_ERROR_LOG(rc);
}
}
/* add a default error response */
if (ORTE_SUCCESS != (rc = orte_state.add_job_state(ORTE_JOB_STATE_FORCED_EXIT,
orte_quit, ORTE_ERROR_PRI))) {
ORTE_ERROR_LOG(rc);
}
/* add a state for when we are ordered to terminate */
if (ORTE_SUCCESS != (rc = orte_state.add_job_state(ORTE_JOB_STATE_DAEMONS_TERMINATED,
orte_quit, ORTE_ERROR_PRI))) {
ORTE_ERROR_LOG(rc);
}
if (5 < opal_output_get_verbosity(orte_state_base_output)) {
orte_state_base_print_job_state_machine();
}
/* populate the proc state machine to allow us to
* track proc lifecycle changes
*/
num_states = sizeof(proc_states) / sizeof(orte_proc_state_t);
for (i=0; i < num_states; i++) {
if (ORTE_SUCCESS != (rc = orte_state.add_proc_state(proc_states[i],
proc_callbacks[i],
ORTE_SYS_PRI))) {
ORTE_ERROR_LOG(rc);
}
}
if (5 < opal_output_get_verbosity(orte_state_base_output)) {
orte_state_base_print_proc_state_machine();
}
return ORTE_SUCCESS;
}
static int finalize(void)
{
opal_list_item_t *item;
/* cleanup the state machines */
while (NULL != (item = opal_list_remove_first(&orte_job_states))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&orte_job_states);
while (NULL != (item = opal_list_remove_first(&orte_proc_states))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&orte_proc_states);
return ORTE_SUCCESS;
}
static void track_jobs(int fd, short argc, void *cbdata)
{
orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
/* ignore this */
OBJ_RELEASE(caddy);
}
static void track_procs(int fd, short argc, void *cbdata)
{
orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
orte_process_name_t *proc = &caddy->name;
orte_proc_state_t state = caddy->proc_state;
orte_job_t *jdata;
orte_proc_t *pdata;
opal_buffer_t *alert;
int rc;
orte_plm_cmd_flag_t cmd;
OPAL_OUTPUT_VERBOSE((5, orte_state_base_output,
"%s state:staged_orted:track_procs called for proc %s state %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
orte_proc_state_to_str(state)));
/* get the job object for this proc */
if (NULL == (jdata = orte_get_job_data_object(proc->jobid))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
goto cleanup;
}
pdata = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->vpid);
switch (state) {
case ORTE_PROC_STATE_RUNNING:
/* update the proc state */
pdata->state = state;
jdata->num_launched++;
/* we don't really care - nothing further to do */
break;
case ORTE_PROC_STATE_REGISTERED:
/* update the proc state */
pdata->state = state;
/* if this proc registered as an MPI proc, and
* MPI is not allowed, then that is an error
*/
if (!jdata->gang_launched && pdata->mpi_proc) {
/* abort the proc */
/* notify the HNP of the error */
}
break;
case ORTE_PROC_STATE_IOF_COMPLETE:
/* do NOT update the proc state as this can hit
* while we are still trying to notify the HNP of
* successful launch for short-lived procs
*/
pdata->iof_complete = true;
if (pdata->waitpid_recvd) {
/* the proc has terminated */
pdata->alive = false;
pdata->state = ORTE_PROC_STATE_TERMINATED;
/* Clean up the session directory as if we were the process
* itself. This covers the case where the process died abnormally
* and didn't cleanup its own session directory.
*/
orte_session_dir_finalize(proc);
/* alert the HNP */
cmd = ORTE_PLM_UPDATE_PROC_STATE;
alert = OBJ_NEW(opal_buffer_t);
if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &cmd, 1, ORTE_PLM_CMD))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* pack the info */
if (ORTE_SUCCESS != (rc = pack_state_update(alert, jdata, pdata))) {
ORTE_ERROR_LOG(rc);
}
/* send it */
OPAL_OUTPUT_VERBOSE((5, orte_state_base_output,
"%s SENDING TERMINATION UPDATE FOR PROC %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&pdata->name)));
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert,
ORTE_RML_TAG_PLM, 0,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc);
}
}
/* Release the stdin IOF file descriptor for this child, if one
* was defined. File descriptors for the other IOF channels - stdout,
* stderr, and stddiag - were released when their associated pipes
* were cleared and closed due to termination of the process
* Do this after we handle termination in case the IOF needs
* to check to see if all procs from the job are actually terminated
*/
if (NULL != orte_iof.close) {
orte_iof.close(proc, ORTE_IOF_STDIN);
}
break;
case ORTE_PROC_STATE_WAITPID_FIRED:
/* do NOT update the proc state as this can hit
* while we are still trying to notify the HNP of
* successful launch for short-lived procs
*/
pdata->waitpid_recvd = true;
if (pdata->iof_complete) {
/* the proc has terminated */
pdata->alive = false;
pdata->state = ORTE_PROC_STATE_TERMINATED;
/* Clean up the session directory as if we were the process
* itself. This covers the case where the process died abnormally
* and didn't cleanup its own session directory.
*/
orte_session_dir_finalize(proc);
/* alert the HNP */
cmd = ORTE_PLM_UPDATE_PROC_STATE;
alert = OBJ_NEW(opal_buffer_t);
if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &cmd, 1, ORTE_PLM_CMD))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* pack the info */
if (ORTE_SUCCESS != (rc = pack_state_update(alert, jdata, pdata))) {
ORTE_ERROR_LOG(rc);
}
/* send it */
OPAL_OUTPUT_VERBOSE((5, orte_state_base_output,
"%s SENDING TERMINATION UPDATE FOR PROC %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&pdata->name)));
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert,
ORTE_RML_TAG_PLM, 0,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc);
}
}
break;
default:
/* ignore */
break;
}
cleanup:
OBJ_RELEASE(caddy);
}
static int pack_state_update(opal_buffer_t *alert,
orte_job_t *jdata,
orte_proc_t *child)
{
int rc;
orte_vpid_t null=ORTE_VPID_INVALID;
/* pack the jobid */
if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &jdata->jobid, 1, ORTE_JOBID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the child's vpid */
if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &(child->name.vpid), 1, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the pid */
if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &child->pid, 1, OPAL_PID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack its state */
if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &child->state, 1, ORTE_PROC_STATE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack its exit code */
if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &child->exit_code, 1, ORTE_EXIT_CODE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* flag that this job is complete so the receiver can know */
if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &null, 1, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -0,0 +1,36 @@
/*
* Copyright (c) 2012 Los Alamos National Security, LLC.
* All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
*/
#ifndef MCA_STATE_STAGED_ORTED_EXPORT_H
#define MCA_STATE_STAGED_ORTED_EXPORT_H
#include "orte_config.h"
#include "orte/mca/state/state.h"
BEGIN_C_DECLS
/*
* Local Component structures
*/
ORTE_MODULE_DECLSPEC extern orte_state_base_component_t mca_state_staged_orted_component;
ORTE_DECLSPEC extern orte_state_base_module_t orte_state_staged_orted_module;
END_C_DECLS
#endif /* MCA_STATE_STAGED_ORTED_EXPORT_H */

Просмотреть файл

@ -0,0 +1,91 @@
/*
* Copyright (c) 2012 Los Alamos National Security, LLC.
* All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "opal/util/output.h"
#include "orte/mca/state/state.h"
#include "orte/mca/state/base/base.h"
#include "state_staged_orted.h"
/*
* Public string for version number
*/
const char *orte_state_staged_orted_component_version_string =
"ORTE STATE staged_orted MCA component version " ORTE_VERSION;
/*
* Local functionality
*/
static int state_staged_orted_open(void);
static int state_staged_orted_close(void);
static int state_staged_orted_component_query(mca_base_module_t **module, int *priority);
/*
* Instantiate the public struct with all of our public information
* and pointer to our public functions in it
*/
orte_state_base_component_t mca_state_staged_orted_component =
{
/* Handle the general mca_component_t struct containing
* meta information about the component
*/
{
ORTE_STATE_BASE_VERSION_1_0_0,
/* Component name and version */
"staged_orted",
ORTE_MAJOR_VERSION,
ORTE_MINOR_VERSION,
ORTE_RELEASE_VERSION,
/* Component open and close functions */
state_staged_orted_open,
state_staged_orted_close,
state_staged_orted_component_query
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
};
static bool select_me = false;
static int state_staged_orted_open(void)
{
int tmp;
mca_base_param_reg_int_name("state", "staged_select",
"Use this component",
false, false, (int)false, &tmp);
select_me = OPAL_INT_TO_BOOL(tmp);
return ORTE_SUCCESS;
}
static int state_staged_orted_close(void)
{
return ORTE_SUCCESS;
}
static int state_staged_orted_component_query(mca_base_module_t **module, int *priority)
{
if (ORTE_PROC_IS_DAEMON && select_me) {
/* set our priority high */
*priority = 1000;
*module = (mca_base_module_t *)&orte_state_staged_orted_module;
return ORTE_SUCCESS;
}
*priority = -1;
*module = NULL;
return ORTE_ERROR;
}

Просмотреть файл

@ -217,12 +217,13 @@ int orte_dt_print_job(char **output, char *prefix, orte_job_t *src, opal_data_ty
asprintf(&pfx2, "%s", prefix);
}
asprintf(&tmp, "\n%sData for job: %s\tRecovery: %s(%s)\n%s\tNum apps: %ld\tControls: %0x\tStdin target: %s\tState: %s\tAbort: %s", pfx2,
asprintf(&tmp, "\n%sData for job: %s\tRecovery: %s(%s)\n%s\tNum apps: %ld\tControls: %0x\tMPI allowed: %s\tStdin target: %s\tState: %s\tAbort: %s", pfx2,
ORTE_JOBID_PRINT(src->jobid),
(src->enable_recovery) ? "ENABLED" : "DISABLED",
(src->recovery_defined) ? "DEFINED" : "DEFAULT",
pfx2,
(long)src->num_apps, src->controls, ORTE_VPID_PRINT(src->stdin_target),
(long)src->num_apps, src->controls,
src->gang_launched ? "YES" : "NO", ORTE_VPID_PRINT(src->stdin_target),
orte_job_state_to_str(src->state), src->abort ? "True" : "False");
asprintf(&pfx, "%s\t", pfx2);
free(pfx2);