e9e4d2a4bc
The Open MPI code base assumed that asprintf always behaved like the FreeBSD variant, where ptr is set to NULL on error. However, the C standard (and Linux) only guarantee that the return code will be -1 on error and leave ptr undefined. Rather than fix all the usage in the code, we use opal_asprintf() wrapper instead, which guarantees the BSD-like behavior of ptr always being set to NULL. In addition to being correct, this will fix many, many warnings in the Open MPI code base. Signed-off-by: Brian Barrett <bbarrett@amazon.com>
712 строки
25 KiB
C
712 строки
25 KiB
C
/*
|
|
* Copyright (c) 2010 The Trustees of Indiana University.
|
|
* All rights reserved.
|
|
* Copyright (c) 2010-2011 Alex Brick <bricka@ccs.neu.edu>.
|
|
* All rights reserved.
|
|
*
|
|
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#include "opal_config.h"
|
|
|
|
#include <sched.h>
|
|
#include <unistd.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <errno.h>
|
|
#include <sys/types.h>
|
|
#include <sys/wait.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/syscall.h>
|
|
#include <fcntl.h>
|
|
|
|
#include "opal/util/output.h"
|
|
#include "opal/util/argv.h"
|
|
#include "opal/util/printf.h"
|
|
#include "opal/constants.h"
|
|
|
|
#include "opal/mca/base/mca_base_var.h"
|
|
|
|
#include "opal/threads/mutex.h"
|
|
#include "opal/threads/condition.h"
|
|
|
|
#include "opal/mca/event/event.h"
|
|
|
|
#include "opal/mca/crs/crs.h"
|
|
#include "opal/mca/crs/base/base.h"
|
|
|
|
#include "crs_dmtcp.h"
|
|
|
|
#define MTCP_RESTART_COMMAND "mtcp_restart"
|
|
|
|
/*
|
|
* DMTCP module
|
|
*/
|
|
static opal_crs_base_module_t dmtcp_module = {
|
|
/** Initialization Function */
|
|
opal_crs_dmtcp_module_init,
|
|
/** Finalization Function */
|
|
opal_crs_dmtcp_module_finalize,
|
|
|
|
/** Checkpoint interface */
|
|
opal_crs_dmtcp_checkpoint,
|
|
|
|
/** Restart Command Access */
|
|
opal_crs_dmtcp_restart,
|
|
|
|
/** Disable checkpoints */
|
|
opal_crs_dmtcp_disable_checkpoint,
|
|
/** Enable checkpoints */
|
|
opal_crs_dmtcp_enable_checkpoint,
|
|
|
|
/** Prelaunch */
|
|
opal_crs_dmtcp_prelaunch,
|
|
|
|
/** Register Thread */
|
|
opal_crs_dmtcp_reg_thread
|
|
};
|
|
|
|
/***************************
|
|
* Snapshot Class Functions
|
|
***************************/
|
|
OBJ_CLASS_DECLARATION(opal_crs_dmtcp_snapshot_t);
|
|
|
|
struct opal_crs_dmtcp_snapshot_t {
|
|
/** Base CRS snapshot type */
|
|
opal_crs_base_snapshot_t super;
|
|
char * context_filename;
|
|
};
|
|
typedef struct opal_crs_dmtcp_snapshot_t opal_crs_dmtcp_snapshot_t;
|
|
|
|
void opal_crs_dmtcp_construct(opal_crs_dmtcp_snapshot_t *obj);
|
|
void opal_crs_dmtcp_destruct(opal_crs_dmtcp_snapshot_t *obj);
|
|
|
|
OBJ_CLASS_INSTANCE(opal_crs_dmtcp_snapshot_t,
|
|
opal_crs_base_snapshot_t,
|
|
opal_crs_dmtcp_construct,
|
|
opal_crs_dmtcp_destruct);
|
|
|
|
/******************
|
|
* Local Functions
|
|
******************/
|
|
static int dmtcp_cold_start(opal_crs_dmtcp_snapshot_t *snapshot);
|
|
static int dmtcp_generate_full_ckpt_path(opal_crs_dmtcp_snapshot_t *snapshot);
|
|
static void dmtcp_sleep_between_ckpt_callback(int interval);
|
|
static void dmtcp_pre_ckpt_callback(char **ckpt_filename);
|
|
static void dmtcp_post_ckpt_callback(int is_restarting,
|
|
char *mtcp_restore_argv_start_addr);
|
|
static int dmtcp_should_ckpt_fd_callback(int fd);
|
|
|
|
/*************************
|
|
* Local Global Variables
|
|
*************************/
|
|
static char *full_ckpt_path = NULL;
|
|
static pthread_cond_t checkpoint_cond = PTHREAD_COND_INITIALIZER;
|
|
static pthread_cond_t checkpoint_done_cond = PTHREAD_COND_INITIALIZER;
|
|
static pthread_mutex_t checkpoint_mutex = PTHREAD_MUTEX_INITIALIZER;
|
|
static int post_ckpt_state;
|
|
|
|
void opal_crs_dmtcp_construct(opal_crs_dmtcp_snapshot_t *snapshot) {
|
|
snapshot->context_filename = NULL;
|
|
snapshot->super.component_name =
|
|
strdup(mca_crs_dmtcp_component.super.base_version.mca_component_name);
|
|
}
|
|
|
|
void opal_crs_dmtcp_destruct( opal_crs_dmtcp_snapshot_t *snapshot) {
|
|
if(NULL != snapshot->context_filename) {
|
|
free(snapshot->context_filename);
|
|
snapshot->context_filename = NULL;
|
|
}
|
|
}
|
|
|
|
/*****************
|
|
* MCA Functions
|
|
*****************/
|
|
int opal_crs_dmtcp_component_query(mca_base_module_t **module, int *priority)
|
|
{
|
|
opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: component_query()");
|
|
|
|
*priority = mca_crs_dmtcp_component.super.priority;
|
|
*module = (mca_base_module_t *)&dmtcp_module;
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
int opal_crs_dmtcp_module_init(void)
|
|
{
|
|
char *temp_checkpoint_name;
|
|
opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: module_init()");
|
|
|
|
/*
|
|
* JJH NOTE: Call any initialization routines you require
|
|
*/
|
|
mtcp_set_callbacks(dmtcp_sleep_between_ckpt_callback, /* sleep_between_ckpt */
|
|
dmtcp_pre_ckpt_callback, /* pre_ckpt */
|
|
dmtcp_post_ckpt_callback, /* post_ckpt */
|
|
dmtcp_should_ckpt_fd_callback, /* ckpt_fd */
|
|
NULL); /* write_ckpt_header */
|
|
|
|
/* This serves to simply initialize MTCP. The checkpoint file will
|
|
* actually be set by our pre_ckpt callback (which takes it from the
|
|
* snapshot given to the CRS checkpoint function), and the interval will be
|
|
* ignored, substituted for a synchronization signal that is handled by our
|
|
* sleep_between_ckpt callback.
|
|
*/
|
|
|
|
opal_asprintf(&temp_checkpoint_name, "checkpoint.dmtcp.%ld", syscall(SYS_getpid));
|
|
mtcp_init(temp_checkpoint_name, 0, 1);
|
|
mtcp_ok();
|
|
|
|
opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: leaving module_init()");
|
|
|
|
free(temp_checkpoint_name);
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
int opal_crs_dmtcp_module_finalize(void)
|
|
{
|
|
opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: module_finalize()");
|
|
|
|
/*
|
|
* JJH NOTE: Call any finalization routines you require
|
|
*/
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
int opal_crs_dmtcp_prelaunch(int32_t rank,
|
|
char *base_snapshot_dir,
|
|
char **app,
|
|
char **cwd,
|
|
char ***argv,
|
|
char ***env)
|
|
{
|
|
char * tmp_env_var = NULL;
|
|
|
|
/*
|
|
* The below should be left untouched for now
|
|
*/
|
|
(void) mca_base_var_env_name("opal_cr_is_tool", &tmp_env_var);
|
|
opal_setenv(tmp_env_var,
|
|
"0", true, env);
|
|
free(tmp_env_var);
|
|
tmp_env_var = NULL;
|
|
|
|
opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: leaving module_prelaunch()");
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
int opal_crs_dmtcp_reg_thread(void)
|
|
{
|
|
/*
|
|
* JJH NOTE: If you require that all threads that may call into MTCP
|
|
* explicitly register with MTCP, then place the necessary
|
|
* initialization here.
|
|
*/
|
|
|
|
opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: leaving module_reg_thread()");
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
int opal_crs_dmtcp_checkpoint(pid_t pid,
|
|
opal_crs_base_snapshot_t *base_snapshot,
|
|
opal_crs_base_ckpt_options_t *options,
|
|
opal_crs_state_type_t *state)
|
|
{
|
|
int unlock_retval, exit_status = OPAL_SUCCESS;
|
|
char buf[BUFSIZ];
|
|
opal_crs_dmtcp_snapshot_t *snapshot;
|
|
|
|
opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: about to lock mutex for checkpoint()");
|
|
|
|
pthread_mutex_lock(&checkpoint_mutex);
|
|
snapshot = (opal_crs_dmtcp_snapshot_t *) base_snapshot;
|
|
|
|
opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: checkpoint(%d, ---)", pid);
|
|
|
|
/* Are we checkpointing ourselves or a peer.
|
|
* JJH NOTE: This will only ever be called when pid == getpid()
|
|
* This is an old interface argument, that is no longer used.
|
|
*/
|
|
|
|
/* bricka (2010-05-14): According to crs.h, 0 also indicates checkpointing
|
|
* self.
|
|
*/
|
|
if((pid != 0) && (pid != syscall(SYS_getpid)) ) {
|
|
/* MTCP can only checkpoint a single process: we can only checkpoint
|
|
* ourself. */
|
|
*state = OPAL_CRS_ERROR;
|
|
exit_status = OPAL_ERROR;
|
|
goto cleanup;
|
|
}
|
|
|
|
/* the metadata file should always be NULL at this point */
|
|
if ( NULL != snapshot->super.metadata) {
|
|
opal_output(mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: checkpoint(): Error: Metadata file already open");
|
|
exit_status = OPAL_ERROR;
|
|
goto cleanup;
|
|
}
|
|
|
|
/*
|
|
* Update the snapshot metadata with the component name so opal-restart can
|
|
* pick the correct CRS to restart with.
|
|
*/
|
|
snapshot->super.component_name = strdup(mca_crs_dmtcp_component.super.base_version.mca_component_name);
|
|
|
|
if( NULL == snapshot->super.metadata ) {
|
|
if (NULL == (snapshot->super.metadata = fopen(snapshot->super.metadata_filename, "a")) ) {
|
|
opal_output(mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: checkpoint(): Error: Unable to open the file (%s)",
|
|
snapshot->super.metadata_filename);
|
|
exit_status = OPAL_ERROR;
|
|
goto cleanup;
|
|
}
|
|
}
|
|
|
|
/* The filename of the checkpoint will be changed by our pre_ckpt hook
|
|
* based on the options given to this function. */
|
|
if(dmtcp_generate_full_ckpt_path(snapshot) == -1) {
|
|
opal_output(mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: dmtcp_checkpoint: unable to generate context filename.");
|
|
|
|
exit_status = OPAL_ERROR;
|
|
goto cleanup;
|
|
}
|
|
|
|
/*
|
|
* JJH NOTE: You can write however much or little data you want to the
|
|
* metadata file. The metadata file is stored with the local
|
|
* checkpoint, and provided at restart time to help the
|
|
* CRS component deteremine how to restart from any files
|
|
* that is left in this directory during checkpoint.
|
|
* Use the command below to write key/value strings to the
|
|
* metadata file.
|
|
* (Just as we did above with the component name).
|
|
*/
|
|
if ( 0 > fprintf(snapshot->super.metadata, "%s%s\n", CRS_METADATA_COMP, snapshot->super.component_name)) {
|
|
opal_output(mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: dmtcp_checkpoint: unable to print component name to metadata");
|
|
}
|
|
|
|
if ( 0 > fprintf(snapshot->super.metadata, "%s%s\n", CRS_METADATA_CONTEXT, snapshot->context_filename)) {
|
|
opal_output(mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: dmtcp_checkpoint: unable to print context name to metadata");
|
|
}
|
|
|
|
fclose(snapshot->super.metadata );
|
|
snapshot->super.metadata = NULL;
|
|
|
|
/*
|
|
* JJH NOTE: Setup and request a checkpoint of this process.
|
|
*/
|
|
|
|
opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: dmtcp_checkpoint: will checkpoint to file: %s",
|
|
full_ckpt_path);
|
|
|
|
opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: dmtcp_checkpoint: about to signal checkpoint");
|
|
|
|
/* Now that we have set the requested filename, we simply need to start
|
|
* the checkpoint. */
|
|
pthread_cond_signal(&checkpoint_cond);
|
|
|
|
opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: dmtcp_checkpoint: signalled checkpoint");
|
|
|
|
/* We want to wait for the checkpoint to finish before we continue (in
|
|
* particular, we need the post_ckpt hook to happen so that we know the
|
|
* status of the checkpoint)
|
|
*/
|
|
pthread_cond_wait(&checkpoint_done_cond, &checkpoint_mutex);
|
|
|
|
opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: dmtcp_checkpoint: received checkpoint_done signal");
|
|
|
|
/* We have now been checkpointed. Note that the state of the checkpoint
|
|
* (OPAL_CRS_CONTINUE, etc.) has been recorded by the post_ckpt hook.
|
|
*/
|
|
*state = post_ckpt_state;
|
|
exit_status = OPAL_SUCCESS;
|
|
|
|
free(full_ckpt_path);
|
|
|
|
cleanup:
|
|
unlock_retval = pthread_mutex_unlock(&checkpoint_mutex);
|
|
|
|
if( 0 != unlock_retval ) {
|
|
opal_output(mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: dmtcp_checkpoint: unable to unlock mutex at end of checkpoint: %s",
|
|
strerror_r(unlock_retval, buf, BUFSIZ));
|
|
|
|
exit_status = OPAL_ERROR;
|
|
}
|
|
|
|
if( NULL != snapshot->super.metadata ) {
|
|
fclose(snapshot->super.metadata );
|
|
snapshot->super.metadata = NULL;
|
|
}
|
|
|
|
return exit_status;
|
|
}
|
|
|
|
int opal_crs_dmtcp_restart(opal_crs_base_snapshot_t *base_snapshot, bool spawn_child, pid_t *child_pid)
|
|
{
|
|
int ret, exit_status = OPAL_SUCCESS;
|
|
int exec_status;
|
|
|
|
opal_crs_dmtcp_snapshot_t *snapshot = OBJ_NEW(opal_crs_dmtcp_snapshot_t);
|
|
snapshot->super = *base_snapshot;
|
|
|
|
opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: restart(--, %d)", spawn_child);
|
|
|
|
/*
|
|
* JJH NOTE: 'cold_start' indicates that this process is being restarted from
|
|
* opal-restart instead of from within an already running process.
|
|
* In the current code base, this is always set to true since it
|
|
* does not allow a process to request a restart of itself.
|
|
*/
|
|
if(snapshot->super.cold_start) {
|
|
/*
|
|
* Read the metadata left by the checkpoint() of this process
|
|
*/
|
|
if( OPAL_SUCCESS != (ret = dmtcp_cold_start(snapshot)) ) {
|
|
opal_output(mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: dmtcp_restart: Unable to reconstruct the snapshot.");
|
|
exit_status = OPAL_ERROR;
|
|
goto cleanup;
|
|
}
|
|
}
|
|
|
|
/* JJH NOTE: Nearly all of the time the 'spawn_child' argument is set to
|
|
* 'false' indicating that the restart function is expected to
|
|
* call exec() directly. It is only set to 'true' if the user
|
|
* explicitly tells opal-restart to spawn off the child, which
|
|
* rarely/never happens. So I would not worry about that option.
|
|
*/
|
|
if( spawn_child ) {
|
|
pid_t child_pid = fork();
|
|
|
|
if(child_pid > 0)
|
|
goto cleanup;
|
|
else if(child_pid < 0) {
|
|
opal_output(mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: dmtcp_restart: Unable to spawn child.");
|
|
exit_status = OPAL_ERROR;
|
|
goto cleanup;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* JJH NOTE: Restart the process by replacing this process
|
|
*/
|
|
|
|
opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: dmtcp_restart: About to invoke command: %s with argv: %s %s",
|
|
MTCP_RESTART_COMMAND,
|
|
MTCP_RESTART_COMMAND,
|
|
snapshot->context_filename);
|
|
|
|
exec_status = execlp(MTCP_RESTART_COMMAND, MTCP_RESTART_COMMAND, snapshot->context_filename, NULL);
|
|
|
|
/* If we get down here, something has broken. */
|
|
|
|
if(exec_status < 0)
|
|
opal_output(mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: dmtcp_restart: error in replacing process: %s",
|
|
strerror(errno));
|
|
else
|
|
opal_output(mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: dmtcp_restart: exec() returned!");
|
|
|
|
exit_status = OPAL_ERROR;
|
|
goto cleanup;
|
|
|
|
cleanup:
|
|
return exit_status;
|
|
}
|
|
|
|
int opal_crs_dmtcp_disable_checkpoint(void)
|
|
{
|
|
opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: disable_checkpoint()");
|
|
|
|
/*
|
|
* JJH NOTE: Enter a critical section. This is not really used in the code
|
|
* at the moment.
|
|
*/
|
|
mtcp_no();
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
int opal_crs_dmtcp_enable_checkpoint(void)
|
|
{
|
|
opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: enable_checkpoint()");
|
|
/*
|
|
* JJH NOTE: Leave a critical section. This is not really used in the code
|
|
* at the moment.
|
|
*/
|
|
mtcp_ok();
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
/*****************************
|
|
* Local Function Definitions
|
|
*****************************/
|
|
static int dmtcp_cold_start(opal_crs_dmtcp_snapshot_t *snapshot) {
|
|
int ret, exit_status = OPAL_SUCCESS;
|
|
char **tmp_argv = NULL;
|
|
char * component_name = NULL;
|
|
int prev_pid;
|
|
|
|
/*
|
|
* Find the snapshot directory, read the metadata file for
|
|
* component name and previous pid
|
|
*/
|
|
if( NULL == snapshot->super.metadata ) {
|
|
if (NULL == (snapshot->super.metadata = fopen(snapshot->super.metadata_filename, "r")) ) {
|
|
opal_output(mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: dmtcp_cold_start(): Error: Unable to open the file (%s)",
|
|
snapshot->super.metadata_filename);
|
|
exit_status = OPAL_ERROR;
|
|
goto cleanup;
|
|
}
|
|
}
|
|
if( OPAL_SUCCESS != (ret = opal_crs_base_extract_expected_component(snapshot->super.metadata,
|
|
&component_name, &prev_pid) ) ) {
|
|
opal_output(mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: dmtcp_cold_start: Error: Failed to extract the metadata from the local snapshot (%s). Returned %d.",
|
|
snapshot->super.metadata_filename, ret);
|
|
exit_status = ret;
|
|
goto cleanup;
|
|
}
|
|
|
|
snapshot->super.component_name = strdup(component_name);
|
|
|
|
/*
|
|
* Compare the component strings to make sure this is our snapshot before going further.
|
|
* JJH NOTE: This will nearly always be true since opal-restart also checks this metadata.
|
|
*/
|
|
if ( 0 != strncmp(mca_crs_dmtcp_component.super.base_version.mca_component_name,
|
|
component_name, strlen(component_name)) ) {
|
|
opal_output(mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: dmtcp_cold_start: Error: This snapshot (%s) is not intended for us (%s)\n",
|
|
component_name, mca_crs_dmtcp_component.super.base_version.mca_component_name);
|
|
exit_status = OPAL_ERROR;
|
|
goto cleanup;
|
|
}
|
|
|
|
/*
|
|
* Read context information from the metadata file
|
|
*/
|
|
opal_crs_base_metadata_read_token(snapshot->super.metadata, CRS_METADATA_CONTEXT, &tmp_argv);
|
|
if( NULL == tmp_argv ) {
|
|
opal_output(mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: dmtcp_cold_start: Error: Failed to read the %s token from the local checkpoint in %s",
|
|
CRS_METADATA_CONTEXT, snapshot->super.snapshot_directory);
|
|
exit_status = OPAL_ERROR;
|
|
goto cleanup;
|
|
}
|
|
|
|
opal_asprintf(&(snapshot->context_filename), "%s/%s", snapshot->super.snapshot_directory, tmp_argv[0]);
|
|
|
|
opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: cold_start(%s)", snapshot->context_filename);
|
|
|
|
/*
|
|
* Reset the cold_start flag
|
|
*/
|
|
snapshot->super.cold_start = false;
|
|
|
|
cleanup:
|
|
if(NULL != tmp_argv) {
|
|
opal_argv_free(tmp_argv);
|
|
tmp_argv = NULL;
|
|
}
|
|
|
|
if( NULL != snapshot->super.metadata ) {
|
|
fclose(snapshot->super.metadata);
|
|
snapshot->super.metadata = NULL;
|
|
}
|
|
|
|
return exit_status;
|
|
}
|
|
|
|
/**
|
|
* Given a snapshot, generate the context filename and its full path.
|
|
*
|
|
* @param snapshot the snapshot with request information
|
|
*/
|
|
static int dmtcp_generate_full_ckpt_path(opal_crs_dmtcp_snapshot_t *snapshot)
|
|
{
|
|
int retval;
|
|
retval = opal_asprintf(&(snapshot->context_filename), "ompi_dmtcp_context.%ld", syscall(SYS_getpid));
|
|
if(retval == -1)
|
|
return -1;
|
|
|
|
return opal_asprintf(&full_ckpt_path, "%s/%s", snapshot->super.snapshot_directory, snapshot->context_filename);
|
|
}
|
|
|
|
/**
|
|
* This is a callback function to call the actual checkpointing routine.
|
|
* Instead of waiting for a specific interval as MTCP does, we will wait on a
|
|
* synchronization signal that will allow us to checkpoint on demand. The
|
|
* argument to this function will be ignored.
|
|
*/
|
|
static void dmtcp_sleep_between_ckpt_callback(int interval)
|
|
{
|
|
int signal_retval;
|
|
char buf[BUFSIZ];
|
|
|
|
opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: called sleep_between_ckpt callback");
|
|
|
|
pthread_mutex_lock(&checkpoint_mutex);
|
|
|
|
/* If the MPI checkpoint thread is waiting on the checkpoint_done_cond and
|
|
* this thread is here, it means that a checkpoint has just completed.
|
|
* Let's signal the MPI checkpoint thread to resume. */
|
|
signal_retval = pthread_cond_signal(&checkpoint_done_cond);
|
|
|
|
if( 0 != signal_retval) {
|
|
opal_output(mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: post_ckpt_callback(): Unable to signal checkpoint done: %s",
|
|
strerror_r(signal_retval, buf, BUFSIZ));
|
|
}
|
|
|
|
/* now we simply wait for the signal to checkpoint */
|
|
pthread_cond_wait(&checkpoint_cond, &checkpoint_mutex);
|
|
|
|
opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: received sync signal to checkpoint.");
|
|
|
|
/* We have now been instructed to checkpoint, so we return. Note that the
|
|
* mutex is still locked: the post_ckpt callback will unlock it. */
|
|
}
|
|
|
|
/**
|
|
* This is a callback function that is invoked before the checkpoint actually
|
|
* occurs. It enables us to do any logging that is necessary, as well as change
|
|
* the filename that the checkpoint will be written to. We expect that this
|
|
* filename will be pulled from the checkpoint options.
|
|
*
|
|
* @param ckpt_filename a pointer in which to store the desired checkpoint
|
|
* filename
|
|
*/
|
|
static void dmtcp_pre_ckpt_callback(char **ckpt_filename)
|
|
{
|
|
*ckpt_filename = full_ckpt_path;
|
|
}
|
|
|
|
/**
|
|
* This is a callback function that is invoked after the checkpoint has
|
|
* finished. It enables us to do any logging that is necessary, as well as
|
|
* report whether this is called from a restart or a checkpoint. We will report
|
|
* this status, signal the CRS code to continue running, and then release the
|
|
* mutex that we are holding.
|
|
*
|
|
* @param is_restarting whether or not this is being called as part of a restart
|
|
* @param mtcp_restore_argv_start_addr unused
|
|
*/
|
|
static void dmtcp_post_ckpt_callback(int is_restarting, char *mtcp_restore_argv_start_addr)
|
|
{
|
|
int unlock_retval;
|
|
char buf[BUFSIZ];
|
|
|
|
opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: in post_ckpt_callback, restarting: %d", is_restarting);
|
|
if(is_restarting)
|
|
post_ckpt_state = OPAL_CRS_RESTART;
|
|
else
|
|
post_ckpt_state = OPAL_CRS_CONTINUE;
|
|
|
|
opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: unlocking at end of post_ckpt_callback");
|
|
|
|
unlock_retval = pthread_mutex_unlock(&checkpoint_mutex);
|
|
|
|
if( 0 != unlock_retval) {
|
|
opal_output(mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: post_ckpt_callback(): Unable to unlock mutex: %s",
|
|
strerror_r(unlock_retval, buf, BUFSIZ));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* This is a callback function that is invoked by DMTCP to see if it should
|
|
* checkpoint the given file descriptor.
|
|
*
|
|
* If the file descriptor is a socket, named-pipe or pseudo-terminal, DMTCP
|
|
* should skip checkpointing them.
|
|
*
|
|
* If we can't determine the type of fd (stat and/or readlink failed), we ask
|
|
* DMTCP to try to checkpoint them anyways with the assumption that DMTCP would
|
|
* warn users of any such case.
|
|
*
|
|
* @param fd file descriptor to checkpoint
|
|
* @return: 1 if DMTCP should ckpt the file descriptor, 0 otherwise.
|
|
*/
|
|
static int dmtcp_should_ckpt_fd_callback(int fd)
|
|
{
|
|
struct stat stat_buf;
|
|
char device_name[PATH_MAX];
|
|
char proc_filename[64];
|
|
char buf[BUFSIZ];
|
|
|
|
if (fstat(fd, &stat_buf) != 0) {
|
|
opal_output(mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: should_ckpt_fd_callback(): error stat()'ing %d: %s",
|
|
fd, strerror_r(errno, buf, BUFSIZ));
|
|
return 1;
|
|
/* Don't checkpoint sockets and FIFOs */
|
|
} else if (S_ISSOCK(stat_buf.st_mode) || S_ISFIFO(stat_buf.st_mode)) {
|
|
opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: skipping checkpointing socket/fifo: %d",
|
|
fd);
|
|
return 0;
|
|
}
|
|
|
|
memset(device_name, 0, sizeof device_name);
|
|
sprintf(proc_filename, "/proc/self/fd/%d", fd);
|
|
if (readlink(proc_filename, device_name, sizeof(device_name) - 1) <= 0) {
|
|
opal_output(mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: should_ckpt_fd_callback(): readlink(%d) failed: %s",
|
|
fd, strerror_r(errno, buf, BUFSIZ));
|
|
return 1;
|
|
}
|
|
|
|
/* Don't checkpoint ptys */
|
|
if (strstr(device_name, "/dev/pts/") == 0 ||
|
|
strstr(device_name, "/dev/pty") == 0 ||
|
|
strstr(device_name, "/dev/tty") == 0) {
|
|
opal_output_verbose(10, mca_crs_dmtcp_component.super.output_handle,
|
|
"crs:dmtcp: skipping checkpointing %s",
|
|
device_name);
|
|
return 0;
|
|
}
|
|
|
|
/* Checkpoint fd by default */
|
|
return 1;
|
|
}
|