/* * Copyright (c) 2004-2010 The Trustees of Indiana University. * All rights reserved. * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. * All rights reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2007 Evergrid, Inc. All rights reserved. * * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "opal_config.h" #include #ifdef HAVE_UNISTD_H #include #endif /* HAVE_UNISTD_H */ #include #ifdef HAVE_DLFCN_H #include #endif #include "opal/util/opal_environ.h" #include "opal/util/output.h" #include "opal/util/show_help.h" #include "opal/util/argv.h" #include "opal/util/opal_environ.h" #include "opal/util/printf.h" #include "opal/constants.h" #include "opal/mca/base/mca_base_var.h" #include "opal/mca/crs/crs.h" #include "opal/mca/crs/base/base.h" #include "opal/runtime/opal_cr.h" #include "crs_self.h" /* * Self module */ static opal_crs_base_module_t loc_module = { /** Initialization Function */ opal_crs_self_module_init, /** Finalization Function */ opal_crs_self_module_finalize, /** Checkpoint interface */ opal_crs_self_checkpoint, /** Restart Command Access */ opal_crs_self_restart, /** Disable checkpoints */ opal_crs_self_disable_checkpoint, /** Enable checkpoints */ opal_crs_self_enable_checkpoint, /** Prelaunch */ opal_crs_self_prelaunch, /** Register Thread */ opal_crs_self_reg_thread }; /* * Snapshot structure */ OBJ_CLASS_DECLARATION(opal_crs_self_snapshot_t); struct opal_crs_self_snapshot_t { /** Base CRS snapshot type */ opal_crs_base_snapshot_t super; /** Command Line used to restart the app */ char * cmd_line; }; typedef struct opal_crs_self_snapshot_t opal_crs_self_snapshot_t; static void opal_crs_self_construct(opal_crs_self_snapshot_t *obj); static void opal_crs_self_destruct( opal_crs_self_snapshot_t *obj); OBJ_CLASS_INSTANCE(opal_crs_self_snapshot_t, opal_crs_base_snapshot_t, opal_crs_self_construct, opal_crs_self_destruct); typedef void (*opal_crs_self_dlsym_dummy_fn_t)(void); /************************************ * Locally Global vars & functions :) ************************************/ static int crs_self_find_function(char *prefix, char *suffix, opal_crs_self_dlsym_dummy_fn_t *fn_ptr); static int self_update_snapshot_metadata(opal_crs_self_snapshot_t *snapshot); static int opal_crs_self_restart_cmd(opal_crs_self_snapshot_t *snapshot, char **cmd); static int self_cold_start(opal_crs_self_snapshot_t *snapshot); void opal_crs_self_construct(opal_crs_self_snapshot_t *snapshot) { snapshot->cmd_line = NULL; } void opal_crs_self_destruct( opal_crs_self_snapshot_t *snapshot) { if(NULL != snapshot->cmd_line) free(snapshot->cmd_line); } static int opal_crs_self_extract_callbacks(void); /* * MCA Functions */ int opal_crs_self_component_query(mca_base_module_t **module, int *priority) { int ret; opal_output_verbose(10, mca_crs_self_component.super.output_handle, "crs:self: component_query()"); /* * If this is a tool, then return a module with the lowest priority. * This allows 'mpirun' to select the 'none' component since it has * a priority higher than 0. * But also allows 'opal-restart' to select this component if needed * since it only ever requests that a specific component be opened * that is defined in the snapshot metadata file. */ if( opal_cr_is_tool ) { *priority = 0; *module = (mca_base_module_t *)&loc_module; return OPAL_SUCCESS; } /* * Extract the user level callbacks if they exist */ ret = opal_crs_self_extract_callbacks(); if( OPAL_SUCCESS != ret || !mca_crs_self_component.can_checkpoint ) { *priority = -1; *module = NULL; return OPAL_ERROR; } else { *priority = mca_crs_self_component.super.priority; *module = (mca_base_module_t *)&loc_module; return OPAL_SUCCESS; } } static int opal_crs_self_extract_callbacks(void) { opal_crs_self_dlsym_dummy_fn_t loc_fn; /* * Find the function names */ crs_self_find_function(mca_crs_self_component.prefix, SUFFIX_CHECKPOINT, &loc_fn); mca_crs_self_component.ucb_checkpoint_fn = (opal_crs_self_checkpoint_callback_fn_t)loc_fn; crs_self_find_function(mca_crs_self_component.prefix, SUFFIX_CONTINUE, &loc_fn); mca_crs_self_component.ucb_continue_fn = (opal_crs_self_continue_callback_fn_t)loc_fn; crs_self_find_function(mca_crs_self_component.prefix, SUFFIX_RESTART, &loc_fn); mca_crs_self_component.ucb_restart_fn = (opal_crs_self_restart_callback_fn_t)loc_fn; /* * Sanity check */ mca_crs_self_component.can_checkpoint = true; if(NULL == mca_crs_self_component.ucb_checkpoint_fn) { mca_crs_self_component.can_checkpoint = false; } if(NULL == mca_crs_self_component.ucb_continue_fn) { } if(NULL == mca_crs_self_component.ucb_restart_fn) { } return OPAL_SUCCESS; } int opal_crs_self_module_init(void) { bool callback_matched = true; opal_output_verbose(10, mca_crs_self_component.super.output_handle, "crs:self: module_init()"); if( opal_cr_is_tool ) { return OPAL_SUCCESS; } /* * Sanity check */ if(NULL == mca_crs_self_component.ucb_checkpoint_fn) { callback_matched = false; mca_crs_self_component.can_checkpoint = false; } if(NULL == mca_crs_self_component.ucb_continue_fn) { callback_matched = false; } if(NULL == mca_crs_self_component.ucb_restart_fn) { callback_matched = false; } if( !callback_matched ) { if( 1 <= mca_crs_self_component.super.verbose ) { opal_show_help("help-opal-crs-self.txt", "self:no_callback", false, "checkpoint", mca_crs_self_component.prefix, SUFFIX_CHECKPOINT, "continue ", mca_crs_self_component.prefix, SUFFIX_CONTINUE, "restart ", mca_crs_self_component.prefix, SUFFIX_RESTART, PREFIX_DEFAULT); } } /* * If the user requested that we do_restart, then call their callback */ if(mca_crs_self_component.do_restart) { opal_output_verbose(10, mca_crs_self_component.super.output_handle, "crs:self: module_init: Call their restart function"); if( NULL != mca_crs_self_component.ucb_restart_fn) mca_crs_self_component.ucb_restart_fn(); } return OPAL_SUCCESS; } int opal_crs_self_module_finalize(void) { opal_output_verbose(10, mca_crs_self_component.super.output_handle, "crs:self: module_finalize()"); return OPAL_SUCCESS; } int opal_crs_self_checkpoint(pid_t pid, opal_crs_base_snapshot_t *base_snapshot, opal_crs_base_ckpt_options_t *options, opal_crs_state_type_t *state) { opal_crs_self_snapshot_t *snapshot = OBJ_NEW(opal_crs_self_snapshot_t); int ret, exit_status = OPAL_SUCCESS; char * restart_cmd = NULL; /* * This function should never be called by a tool */ if( opal_cr_is_tool ) { return OPAL_ERR_NOT_SUPPORTED; } if( options->stop ) { opal_output(0, "crs:self: checkpoint(): Error: SIGSTOP Not currently supported!"); } /* * Setup for snapshot directory creation */ snapshot->super = *base_snapshot; #if 0 snapshot->super.snapshot_directory = strdup(base_snapshot->snapshot_directory); snapshot->super.metadata_filename = strdup(base_snapshot->metadata_filename); #endif opal_output_verbose(10, mca_crs_self_component.super.output_handle, "crs:self: checkpoint(%d, ---)", pid); if(!mca_crs_self_component.can_checkpoint) { opal_show_help("help-opal-crs-self.txt", "self:ckpt_disabled", false); exit_status = OPAL_ERROR; goto cleanup; } /* * Update the snapshot metadata */ snapshot->super.component_name = strdup(mca_crs_self_component.super.base_version.mca_component_name); if( NULL == snapshot->super.metadata ) { if (NULL == (snapshot->super.metadata = fopen(snapshot->super.metadata_filename, "a")) ) { opal_output(mca_crs_self_component.super.output_handle, "crs:self: checkpoint(): Error: Unable to open the file (%s)", snapshot->super.metadata_filename); exit_status = OPAL_ERROR; goto cleanup; } } fprintf(snapshot->super.metadata, "%s%s\n", CRS_METADATA_COMP, snapshot->super.component_name); /* * Call the user callback function */ if(NULL != mca_crs_self_component.ucb_checkpoint_fn) { mca_crs_self_component.ucb_checkpoint_fn(&restart_cmd); } /* * Save the restart command */ if( NULL == restart_cmd) { *state = OPAL_CRS_ERROR; opal_show_help("help-opal-crs-self.txt", "self:no-restart-cmd", true); exit_status = OPAL_ERROR; goto cleanup; } else { snapshot->cmd_line = strdup(restart_cmd); opal_output_verbose(10, mca_crs_self_component.super.output_handle, "crs:self: checkpoint: Restart Command (%s)", snapshot->cmd_line); } /* * The best we can do is update the metadata file with the * application argv and argc we started with. */ if( OPAL_SUCCESS != (ret = self_update_snapshot_metadata(snapshot)) ) { *state = OPAL_CRS_ERROR; opal_output(mca_crs_self_component.super.output_handle, "crs:self: checkpoint(): Error: Unable to update metadata for snapshot (%s).", snapshot->super.metadata_filename); exit_status = ret; goto cleanup; } *state = OPAL_CRS_CONTINUE; /* * Call their continue routine for completeness */ if(NULL != mca_crs_self_component.ucb_continue_fn) { mca_crs_self_component.ucb_continue_fn(); } base_snapshot = &(snapshot->super); cleanup: if( NULL != restart_cmd) { free(restart_cmd); restart_cmd = NULL; } return exit_status; } /* * Notice that the user restart callback is not called here, but always from * opal_init for the self module. */ int opal_crs_self_restart(opal_crs_base_snapshot_t *base_snapshot, bool spawn_child, pid_t *child_pid) { opal_crs_self_snapshot_t *snapshot = OBJ_NEW(opal_crs_self_snapshot_t); char **cr_argv = NULL; char * cr_cmd = NULL; int ret; int exit_status = OPAL_SUCCESS; int status; snapshot->super = *base_snapshot; opal_output_verbose(10, mca_crs_self_component.super.output_handle, "crs:self: restart(%d)", spawn_child); /* * If we need to reconstruct the snapshot */ if(snapshot->super.cold_start) { if( OPAL_SUCCESS != (ret = self_cold_start(snapshot)) ){ exit_status = ret; opal_output(mca_crs_self_component.super.output_handle, "crs:blcr: blcr_restart: Unable to reconstruct the snapshot."); goto cleanup; } } /* * JJH: Check to make sure the application exists? */ /* * Get the restart command */ if ( OPAL_SUCCESS != (ret = opal_crs_self_restart_cmd(snapshot, &cr_cmd)) ) { exit_status = ret; goto cleanup; } if ( NULL == (cr_argv = opal_argv_split(cr_cmd, ' ')) ) { exit_status = OPAL_ERROR; goto cleanup; } if (!spawn_child) { opal_output_verbose(10, mca_crs_self_component.super.output_handle, "crs:self: self_restart: SELF: exec :(%s, %s):", strdup(cr_argv[0]), opal_argv_join(cr_argv, ' ')); status = execvp(strdup(cr_argv[0]), cr_argv); if(status < 0) { opal_output(mca_crs_self_component.super.output_handle, "crs:self: self_restart: SELF: Child failed to execute :(%d):", status); } opal_output(mca_crs_self_component.super.output_handle, "crs:self: self_restart: SELF: execvp returned %d", status); exit_status = status; goto cleanup; } else { *child_pid = fork(); if( *child_pid == 0) { /* Child Process */ opal_output_verbose(10, mca_crs_self_component.super.output_handle, "crs:self: self_restart: CHILD: exec :(%s, %s):", strdup(cr_argv[0]), opal_argv_join(cr_argv, ' ')); status = execvp(strdup(cr_argv[0]), cr_argv); if(status < 0) { opal_output(mca_crs_self_component.super.output_handle, "crs:self: self_restart: CHILD: Child failed to execute :(%d):", status); } opal_output(mca_crs_self_component.super.output_handle, "crs:self: self_restart: CHILD: execvp returned %d", status); exit_status = status; goto cleanup; } else if(*child_pid > 0) { /* Parent is done once it is started. */ ; } else { opal_output(mca_crs_self_component.super.output_handle, "crs:self: self_restart: CHILD: fork failed :(%d):", *child_pid); } } cleanup: if( NULL != cr_cmd) free(cr_cmd); if( NULL != cr_argv) opal_argv_free(cr_argv); return exit_status; } int opal_crs_self_disable_checkpoint(void) { /* * This function should never be called by a tool */ if( opal_cr_is_tool ) { return OPAL_ERR_NOT_SUPPORTED; } opal_output_verbose(10, mca_crs_self_component.super.output_handle, "crs:self: disable_checkpoint()"); mca_crs_self_component.can_checkpoint = false; return OPAL_SUCCESS; } int opal_crs_self_enable_checkpoint(void) { /* * This function should never be called by a tool */ if( opal_cr_is_tool ) { return OPAL_ERR_NOT_SUPPORTED; } opal_output_verbose(10, mca_crs_self_component.super.output_handle, "crs:self: enable_checkpoint()"); mca_crs_self_component.can_checkpoint = true; return OPAL_SUCCESS; } int opal_crs_self_prelaunch(int32_t rank, char *base_snapshot_dir, char **app, char **cwd, char ***argv, char ***env) { char * tmp_env_var = NULL; /* * This function should never be called by a tool */ if( opal_cr_is_tool ) { return OPAL_ERR_NOT_SUPPORTED; } (void) mca_base_var_env_name("opal_cr_is_tool", &tmp_env_var); opal_setenv(tmp_env_var, "0", true, env); free(tmp_env_var); tmp_env_var = NULL; return OPAL_SUCCESS; } int opal_crs_self_reg_thread(void) { /* * This function should never be called by a tool */ if( opal_cr_is_tool ) { return OPAL_ERR_NOT_SUPPORTED; } return OPAL_SUCCESS; } /****************** * Local functions ******************/ static int crs_self_find_function(char *prefix, char *suffix, opal_crs_self_dlsym_dummy_fn_t *fn_ptr) { char *func_to_find = NULL; if( NULL == prefix || 0 >= strlen(prefix) ) { opal_output(mca_crs_self_component.super.output_handle, "crs:self: crs_self_find_function: Error: prefix is NULL or empty string!"); *fn_ptr = NULL; return OPAL_ERROR; } if( NULL == suffix || 0 >= strlen(suffix) ) { opal_output(mca_crs_self_component.super.output_handle, "crs:self: crs_self_find_function: Error: suffix is NULL or empty string!"); *fn_ptr = NULL; return OPAL_ERROR; } opal_output_verbose(10, mca_crs_self_component.super.output_handle, "crs:self: crs_self_find_function(--, %s, %s)", prefix, suffix); opal_asprintf(&func_to_find, "%s_%s", prefix, suffix); /* The RTLD_DEFAULT is a special handle that searches the default libraries * including the current application for the indicated symbol. This allows * us to not have to dlopen/dlclose the executable. A bit of short hand * really. */ *((void**) fn_ptr) = dlsym(RTLD_DEFAULT, func_to_find); if( NULL == fn_ptr) { opal_output_verbose(12, mca_crs_self_component.super.output_handle, "crs:self: crs_self_find_function: WARNING: Function \"%s\" not found", func_to_find); } else { opal_output_verbose(10, mca_crs_self_component.super.output_handle, "crs:self: crs_self_find_function: Found function \"%s\"", func_to_find); } if( NULL == func_to_find) { free(func_to_find); } return OPAL_SUCCESS; } /* * Self is a special case. The 'fname' here is the command line that the user * wishes to execute. This function takes this command line and adds * -mca crs_self_do_restart 1 * Which will trigger the restart callback once the program has been run. * * For example, The user starts their program with: * $ my_prog arg1 arg2 * * They checkpoint it: * $ opal_checkpoint -mca crs self 1234 * * They restart it: * $ opal_restart -mca crs self my_prog arg1 arg2 * * fname is then: * fname = "my_prog arg1 arg2" * * This funciton translates that to the command: * cmd = "my_prog arg1 arg2 -mca crs self -mca crs_self_do_restart 1" * * Which will cause the program "my_prog" to call their restart function * upon opal_init time. * * Note: The user could bypass the opal_restart routine safely by simply calling * $ my_prog arg1 arg2 -mca crs self -mca crs_self_do_restart 1 * However, for consistency sake, we should not encourage this as it won't work for * all of the other checkpointers. */ static int opal_crs_self_restart_cmd(opal_crs_self_snapshot_t *snapshot, char **cmd) { char * tmp_env_var = NULL; opal_output_verbose(10, mca_crs_self_component.super.output_handle, "crs:self: restart_cmd(%s, ---)", snapshot->cmd_line); (void) mca_base_var_env_name("crs", &tmp_env_var); opal_setenv(tmp_env_var, "self", true, &environ); free(tmp_env_var); tmp_env_var = NULL; (void) mca_base_var_env_name("crs_self_do_restart", &tmp_env_var); opal_setenv(tmp_env_var, "1", true, &environ); free(tmp_env_var); tmp_env_var = NULL; (void) mca_base_var_env_name("crs_self_prefix", &tmp_env_var); opal_setenv(tmp_env_var, mca_crs_self_component.prefix, true, &environ); free(tmp_env_var); tmp_env_var = NULL; /* Instead of adding it to the command line, we should use the environment * to pass the values. This allow sthe OPAL application to be braindead * WRT MCA parameters * add_args = strdup("-mca crs self -mca crs_self_do_restart 1"); */ opal_asprintf(cmd, "%s", snapshot->cmd_line); return OPAL_SUCCESS; } static int self_cold_start(opal_crs_self_snapshot_t *snapshot) { int ret, exit_status = OPAL_SUCCESS; char **tmp_argv = NULL; char * component_name = NULL; int prev_pid; opal_output_verbose(10, mca_crs_self_component.super.output_handle, "crs:self: cold_start()"); /* * Find the snapshot directory, read the metadata file */ if( NULL == snapshot->super.metadata ) { if (NULL == (snapshot->super.metadata = fopen(snapshot->super.metadata_filename, "a")) ) { opal_output(mca_crs_self_component.super.output_handle, "crs:self: checkpoint(): Error: Unable to open the file (%s)", snapshot->super.metadata_filename); exit_status = OPAL_ERROR; goto cleanup; } } if( OPAL_SUCCESS != (ret = opal_crs_base_extract_expected_component(snapshot->super.metadata, &component_name, &prev_pid) ) ) { opal_output(mca_crs_self_component.super.output_handle, "crs:self: self_cold_start: Error: Failed to extract the metadata from the local snapshot (%s). Returned %d.", snapshot->super.metadata_filename, ret); exit_status = ret; goto cleanup; } snapshot->super.component_name = strdup(component_name); /* Compare the strings to make sure this is our snapshot before going further */ if ( 0 != strncmp(mca_crs_self_component.super.base_version.mca_component_name, component_name, strlen(component_name)) ) { exit_status = OPAL_ERROR; opal_output(mca_crs_self_component.super.output_handle, "crs:self: self_cold_start: Error: This snapshot (%s) is not intended for us (%s)\n", component_name, mca_crs_self_component.super.base_version.mca_component_name); goto cleanup; } /* * Restart command * JJH: Command lines limited to 256 chars. */ opal_crs_base_metadata_read_token(snapshot->super.metadata, CRS_METADATA_CONTEXT, &tmp_argv); if( NULL == tmp_argv ) { opal_output(mca_crs_self_component.super.output_handle, "crs:self: self_cold_start: Error: Failed to read the %s token from the local checkpoint in %s", CRS_METADATA_CONTEXT, snapshot->super.snapshot_directory); exit_status = OPAL_ERROR; goto cleanup; } opal_asprintf(&snapshot->cmd_line, "%s", tmp_argv[0]); /* * Reset the cold_start flag */ snapshot->super.cold_start = false; cleanup: if(NULL != tmp_argv) { opal_argv_free(tmp_argv); tmp_argv = NULL; } return exit_status; } static int self_update_snapshot_metadata(opal_crs_self_snapshot_t *snapshot) { int exit_status = OPAL_SUCCESS; if(NULL == snapshot->cmd_line) { opal_show_help("help-opal-crs-self.txt", "self:no-restart-cmd", true); exit_status = OPAL_ERROR; goto cleanup; } opal_output_verbose(10, mca_crs_self_component.super.output_handle, "crs:self: update_snapshot_metadata(%s)", snapshot->super.metadata_filename); /* * Append to the metadata file the command line to restart with * - How user wants us to restart */ fprintf(snapshot->super.metadata, "%s%s\n", CRS_METADATA_CONTEXT, snapshot->cmd_line); cleanup: return exit_status; }