From da2f1c58e2acf256ba9c3b1c6d43be7fd72267a1 Mon Sep 17 00:00:00 2001 From: Josh Hursey Date: Thu, 8 May 2008 18:47:47 +0000 Subject: [PATCH] Some checkpoint/restart cleanup. * Remove the opal_only option. This was suffering from bit rot, and no one uses it. It can be added back fairly easily if wanted. * Cleanup metadata interactions at the local level. * Touch up some of the INC funcitonality (fix typos and a minor ordering issue) This commit was SVN r18416. --- ompi/mca/bml/r2/bml_r2_ft.c | 41 ++- ompi/mca/pml/ob1/pml_ob1.c | 4 +- ompi/proc/proc.c | 3 + opal/mca/crs/base/base.h | 38 +- opal/mca/crs/base/crs_base_fns.c | 425 +++++++++++++++++----- opal/mca/crs/blcr/crs_blcr_module.c | 83 ++--- opal/mca/crs/self/crs_self_module.c | 78 ++-- opal/runtime/opal_cr.c | 482 +------------------------ opal/runtime/opal_cr.h | 31 +- opal/tools/opal-restart/opal-restart.c | 91 ++++- orte/mca/snapc/base/snapc_base_fns.c | 5 +- orte/mca/snapc/full/snapc_full_app.c | 11 +- 12 files changed, 577 insertions(+), 715 deletions(-) diff --git a/ompi/mca/bml/r2/bml_r2_ft.c b/ompi/mca/bml/r2/bml_r2_ft.c index 18a54c3709..4c9eb8306a 100644 --- a/ompi/mca/bml/r2/bml_r2_ft.c +++ b/ompi/mca/bml/r2/bml_r2_ft.c @@ -35,6 +35,7 @@ #include "ompi/mca/pml/pml.h" #include "ompi/mca/pml/base/base.h" #include "orte/mca/rml/rml.h" +#include "orte/mca/grpcomm/grpcomm.h" #include "ompi/proc/proc.h" #include "bml_r2.h" @@ -93,18 +94,6 @@ int mca_bml_r2_ft_event(int state) * preparation for being shut down. */ for(btl_idx = 0; btl_idx < mca_bml_r2.num_btl_modules; btl_idx++) { - /* - * Notify BTL - */ - if( NULL != (mca_bml_r2.btl_modules[btl_idx])->btl_ft_event) { - opal_output_verbose(10, ompi_cr_output, - "bml:r2: ft_event: Notify the %s BTL.\n", - (mca_bml_r2.btl_modules[btl_idx])->btl_component->btl_version.mca_component_name); - if(OMPI_SUCCESS != (ret = (mca_bml_r2.btl_modules[btl_idx])->btl_ft_event(loc_state) ) ) { - continue; - } - } - /* * Notify Mpool */ @@ -117,6 +106,18 @@ int mca_bml_r2_ft_event(int state) continue; } } + + /* + * Notify BTL + */ + if( NULL != (mca_bml_r2.btl_modules[btl_idx])->btl_ft_event) { + opal_output_verbose(10, ompi_cr_output, + "bml:r2: ft_event: Notify the %s BTL.\n", + (mca_bml_r2.btl_modules[btl_idx])->btl_component->btl_version.mca_component_name); + if(OMPI_SUCCESS != (ret = (mca_bml_r2.btl_modules[btl_idx])->btl_ft_event(loc_state) ) ) { + continue; + } + } } } @@ -140,7 +141,7 @@ int mca_bml_r2_ft_event(int state) } opal_output_verbose(10, ompi_cr_output, - "bml:r2: ft_event(reboot): Reselect BTLs\n"); + "bml:r2: ft_event(Restart): Reselect BTLs\n"); /* * Close the BTLs @@ -157,6 +158,20 @@ int mca_bml_r2_ft_event(int state) } else if(OPAL_CRS_RESTART == state ) { + + /* + * Barrier to make all processes have been successfully restarted before + * we try to remove some restart only files. + */ + if (OMPI_SUCCESS != (ret = orte_grpcomm.barrier())) { + opal_output(0, "bml:r2: ft_event(Restart): Failed in orte_grpcomm.barrier (%d)", ret); + return ret; + } + + opal_output_verbose(10, ompi_cr_output, + "bml:r2: ft_event(Restart): Cleanup restart files\n"); + opal_crs_base_cleanup_flush(); + /* * Re-open the BTL framework to get the full list of components. */ diff --git a/ompi/mca/pml/ob1/pml_ob1.c b/ompi/mca/pml/ob1/pml_ob1.c index 09829b46ef..37b13d247d 100644 --- a/ompi/mca/pml/ob1/pml_ob1.c +++ b/ompi/mca/pml/ob1/pml_ob1.c @@ -542,13 +542,13 @@ int mca_pml_ob1_ft_event( int state ) * Add the new procs (BTLs redo modex recv's) */ if( OMPI_SUCCESS != (ret = mca_pml_ob1_add_procs(procs, num_procs) ) ) { - opal_output(0, "pml:ob1: fr_event(Restart): Failed in add_procs (%d)", ret); + opal_output(0, "pml:ob1: ft_event(Restart): Failed in add_procs (%d)", ret); return ret; } /* Is this barrier necessary ? JJH */ if (OMPI_SUCCESS != (ret = orte_grpcomm.barrier())) { - opal_output(0, "pml:ob1: fr_event(Restart): Failed in orte_grpcomm.barrier (%d)", ret); + opal_output(0, "pml:ob1: ft_event(Restart): Failed in orte_grpcomm.barrier (%d)", ret); return ret; } diff --git a/ompi/proc/proc.c b/ompi/proc/proc.c index 5fd5356926..b97204bb6f 100644 --- a/ompi/proc/proc.c +++ b/ompi/proc/proc.c @@ -281,6 +281,9 @@ int ompi_proc_refresh(void) { /* Does not change: proc->proc_name.vpid */ proc->proc_name.jobid = ORTE_PROC_MY_NAME->jobid; + /* Make sure to clear the local flag before we set it below */ + proc->proc_flags = 0; + if (i == ORTE_PROC_MY_NAME->vpid) { ompi_proc_local_proc = proc; proc->proc_flags |= OMPI_PROC_FLAG_LOCAL; diff --git a/opal/mca/crs/base/base.h b/opal/mca/crs/base/base.h index 65ef2ed0da..2d96a548f9 100644 --- a/opal/mca/crs/base/base.h +++ b/opal/mca/crs/base/base.h @@ -32,6 +32,14 @@ extern "C" { #endif +/* Some local strings to use genericly with the local metadata file */ +#define CRS_METADATA_BASE ("# ") +#define CRS_METADATA_COMP ("# Component: ") +#define CRS_METADATA_PID ("# PID: ") +#define CRS_METADATA_CONTEXT ("# CONTEXT: ") +#define CRS_METADATA_MKDIR ("# MKDIR: ") +#define CRS_METADATA_TOUCH ("# TOUCH: ") + /** * Initialize the CRS MCA framework * @@ -106,24 +114,32 @@ extern "C" { OPAL_DECLSPEC char * opal_crs_base_state_str(opal_crs_state_type_t state); OPAL_DECLSPEC char * opal_crs_base_unique_snapshot_name(pid_t pid); - OPAL_DECLSPEC char * opal_crs_base_extract_expected_component(char *snapshot_loc, int *prev_pid); + OPAL_DECLSPEC int opal_crs_base_extract_expected_component(char *snapshot_loc, char ** component_name, int *prev_pid); OPAL_DECLSPEC int opal_crs_base_init_snapshot_directory(opal_crs_base_snapshot_t *snapshot); OPAL_DECLSPEC char * opal_crs_base_get_snapshot_directory(char *uniq_snapshot_name); - /* Opens the metadata file and places all the base information in the file. - * Options: - * 'w' = Open for writing - * 'a' = Open for writing and appending information + /* + * Read a token to the metadata file + * NULL can be passed for snapshot_loc if nit_snapshot_directory has been called. */ - OPAL_DECLSPEC FILE *opal_crs_base_open_metadata(opal_crs_base_snapshot_t *snapshot, char mode ); + OPAL_DECLSPEC int opal_crs_base_metadata_read_token(char *snapshot_loc, char * token, char ***value); - /* Open the metadata file, read off the base information and - * return the component and previous pid to the caller. - * Note: component is allocated inside this function, it is the - * callers responsibility to free this memory. + /* + * Write a token to the metadata file + * NULL can be passed for snapshot_loc if nit_snapshot_directory has been called. */ - OPAL_DECLSPEC FILE * opal_crs_base_open_read_metadata(char *location, char **component, int *prev_pid); + OPAL_DECLSPEC int opal_crs_base_metadata_write_token(char *snapshot_loc, char * token, char *value); + /* + * Register a file for cleanup. + * Useful in C/R when files only need to temporarily exist for restart + */ + OPAL_DECLSPEC int opal_crs_base_cleanup_append(char* filename, bool is_dir); + + /* + * Flush the cleanup of all registered files. + */ + OPAL_DECLSPEC int opal_crs_base_cleanup_flush(void); #if defined(c_plusplus) || defined(__cplusplus) } diff --git a/opal/mca/crs/base/crs_base_fns.c b/opal/mca/crs/base/crs_base_fns.c index b0f67a5061..aca7bd7ce1 100644 --- a/opal/mca/crs/base/crs_base_fns.c +++ b/opal/mca/crs/base/crs_base_fns.c @@ -24,16 +24,36 @@ #if HAVE_UNISTD_H #include #endif +#ifdef HAVE_FCNTL_H +#include +#endif /* HAVE_FCNTL_H */ +#ifdef HAVE_SYS_STAT_H +#include +#endif #include "opal/mca/mca.h" #include "opal/mca/base/base.h" #include "opal/include/opal/constants.h" #include "opal/util/os_dirpath.h" #include "opal/util/output.h" +#include "opal/util/argv.h" #include "opal/mca/crs/crs.h" #include "opal/mca/crs/base/base.h" +/****************** + * Local Functions + ******************/ +static int metadata_extract_next_token(FILE *file, char **token, char **value); +static int opal_crs_base_metadata_open(FILE ** meta_data, char * location, char * mode); + +static char *last_metadata_file = NULL; +static char **cleanup_file_argv = NULL; +static char **cleanup_dir_argv = NULL; + +/****************** + * Object stuff + ******************/ static void opal_crs_base_construct(opal_crs_base_snapshot_t *snapshot) { snapshot->component_name = NULL; @@ -198,78 +218,112 @@ char * opal_crs_base_unique_snapshot_name(pid_t pid) return loc_str; } -FILE * opal_crs_base_open_read_metadata(char * location, char **component, int *prev_pid) -{ - char * dir_name = NULL; - char * content = NULL; - char * tmp_str = NULL; - int len = 0; +int opal_crs_base_metadata_read_token(char *snapshot_loc, char * token, char ***value) { + int ret, exit_status = OPAL_SUCCESS; FILE * meta_data = NULL; + char * loc_token = NULL; + char * loc_value = NULL; + int argc = 0; - *component = NULL; - *prev_pid = -1; + /* Dummy check */ + if( NULL == token ) { + goto cleanup; + } /* - * Find the snapshot directory, read the metadata file + * Open the metadata file */ - asprintf(&dir_name, "%s/%s", location, opal_crs_base_metadata_filename); - if (NULL == (meta_data = fopen(dir_name, "r")) ) { + if( OPAL_SUCCESS != (ret = opal_crs_base_metadata_open(&meta_data, snapshot_loc, "r")) ) { + opal_output(opal_crs_base_output, + "opal:crs:base: opal_crs_base_metadata_read_token: Error: Unable to open the metadata file\n"); + exit_status = ret; goto cleanup; } - /* - * Component Name - */ - len = 32; /* Max size for a CRS component name */ - content = (char *) malloc(sizeof(char) * len); - if (NULL == fgets(content, len, meta_data) ) { - free(content); - content = NULL; - goto cleanup; - } - /* Strip of newline */ - len = strlen(content); - content[len - 1] = '\0'; - - *component = strdup(content); - /* - * Get the PID + * Extract each token and make the records */ - len = 128; - tmp_str = (char *) malloc(sizeof(char) * len); - if (NULL == fgets(tmp_str, len, meta_data) ) { - goto cleanup; - } - /* Strip of newline */ - len = strlen(tmp_str); - if(tmp_str[len - 1] == '\n') - tmp_str[len - 1] = '\0'; - *prev_pid = atoi(tmp_str); + do { + /* Get next token */ + if( OPAL_SUCCESS != metadata_extract_next_token(meta_data, &loc_token, &loc_value) ) { + break; + } + /* Check token to see if it matches */ + if(0 == strncmp(token, loc_token, strlen(loc_token)) ) { + opal_argv_append(&argc, value, loc_value); + } + } while(0 == feof(meta_data) ); + cleanup: - return meta_data; + if(NULL != meta_data) { + fclose(meta_data); + meta_data = NULL; + } + + return exit_status; } -char * opal_crs_base_extract_expected_component(char *snapshot_loc, int *prev_pid) -{ +int opal_crs_base_metadata_write_token(char *snapshot_loc, char * token, char *value) { + int ret, exit_status = OPAL_SUCCESS; FILE * meta_data = NULL; - char * component_name = NULL; - - *prev_pid = -1; - if( NULL == (meta_data = opal_crs_base_open_read_metadata(snapshot_loc, &component_name, prev_pid)) ) { - opal_output(opal_crs_base_output, - "opal:crs:base: extract_expected_component: Error: Unable to open the file (%s)\n", - snapshot_loc); + /* Dummy check */ + if( NULL == token || NULL == value) { goto cleanup; } - cleanup: - if(NULL != meta_data) - fclose(meta_data); + /* + * Open the metadata file + */ + if( OPAL_SUCCESS != (ret = opal_crs_base_metadata_open(&meta_data, snapshot_loc, "a")) ) { + opal_output(opal_crs_base_output, + "opal:crs:base: opal_crs_base_metadata_write_token: Error: Unable to open the metadata file\n"); + exit_status = ret; + goto cleanup; + } - return component_name; + fprintf(meta_data, "%s%s\n", token, value); + + cleanup: + if(NULL != meta_data) { + fclose(meta_data); + meta_data = NULL; + } + + return exit_status; +} + +int opal_crs_base_extract_expected_component(char *snapshot_loc, char ** component_name, int *prev_pid) +{ + char **pid_argv = NULL; + char **name_argv = NULL; + + opal_crs_base_metadata_read_token(snapshot_loc, CRS_METADATA_PID, &pid_argv); + if( NULL != pid_argv && NULL != pid_argv[0] ) { + *prev_pid = atoi(pid_argv[0]); + } else { + opal_output(0, "Error: expected_component: PID information unavailable!"); + } + + opal_crs_base_metadata_read_token(snapshot_loc, CRS_METADATA_COMP, &name_argv); + if( NULL != name_argv && NULL != name_argv[0] ) { + *component_name = strdup(name_argv[0]); + } else { + opal_output(0, "Error: expected_component: Component Name information unavailable!"); + } + + if( NULL != pid_argv ) { + opal_argv_free(pid_argv); + pid_argv = NULL; + } + + if( NULL != name_argv ) { + opal_argv_free(name_argv); + name_argv = NULL; + } + + return OPAL_SUCCESS; } char * opal_crs_base_get_snapshot_directory(char *uniq_snapshot_name) @@ -283,77 +337,113 @@ char * opal_crs_base_get_snapshot_directory(char *uniq_snapshot_name) int opal_crs_base_init_snapshot_directory(opal_crs_base_snapshot_t *snapshot) { - mode_t my_mode = S_IRWXU; int ret, exit_status = OPAL_SUCCESS; - FILE * meta_data = NULL; + mode_t my_mode = S_IRWXU; + char * pid_str = NULL; /* * Make the snapshot directory from the uniq_snapshot_name */ if(OPAL_SUCCESS != (ret = opal_os_dirpath_create(snapshot->local_location, my_mode)) ) { + opal_output(opal_crs_base_output, + "opal:crs:base: init_snapshot_directory: Error: Unable to create directory (%s)\n", + snapshot->local_location); exit_status = ret; goto cleanup; } /* * Initialize the metadata file at the top of that directory. + * Add 'BASE' and 'PID' */ - if (NULL == (meta_data = opal_crs_base_open_metadata(snapshot, 'w') ) ) { + if( NULL != last_metadata_file ) { + free(last_metadata_file); + last_metadata_file = NULL; + } + last_metadata_file = strdup(snapshot->local_location); + + if( OPAL_SUCCESS != (ret = opal_crs_base_metadata_write_token(NULL, CRS_METADATA_BASE, "") ) ) { opal_output(opal_crs_base_output, - "opal:crs:base: init_snapshot_directory: Error: Unable to open the file (%s/%s)\n", + "opal:crs:base: init_snapshot_directory: Error: Unable to write BASE to the file (%s/%s)\n", snapshot->local_location, opal_crs_base_metadata_filename); - exit_status = OPAL_ERROR; + exit_status = ret; goto cleanup; } - + + asprintf(&pid_str, "%d", getpid()); + if( OPAL_SUCCESS != (ret = opal_crs_base_metadata_write_token(NULL, CRS_METADATA_PID, pid_str) ) ) { + opal_output(opal_crs_base_output, + "opal:crs:base: init_snapshot_directory: Error: Unable to write PID (%s) to the file (%s/%s)\n", + pid_str, snapshot->local_location, opal_crs_base_metadata_filename); + exit_status = ret; + goto cleanup; + } + cleanup: - if(NULL != meta_data) - fclose(meta_data); + if( NULL != pid_str) { + free(pid_str); + pid_str = NULL; + } return OPAL_SUCCESS; } -FILE *opal_crs_base_open_metadata(opal_crs_base_snapshot_t *snapshot, char mode ) +int opal_crs_base_cleanup_append(char* filename, bool is_dir) { - char *meta_data_fname = NULL; - FILE * meta_data = NULL; + if( NULL == filename ) { + return OPAL_SUCCESS; + } + + if( is_dir ) { + opal_output_verbose(15, opal_crs_base_output, + "opal:crs: cleanup_append: Append Dir <%s>\n", + filename); + opal_argv_append_nosize(&cleanup_dir_argv, filename); + } else { + opal_output_verbose(15, opal_crs_base_output, + "opal:crs: cleanup_append: Append File <%s>\n", + filename); + opal_argv_append_nosize(&cleanup_file_argv, filename); + } + + return OPAL_SUCCESS; +} + +int opal_crs_base_cleanup_flush(void) +{ + int argc, i; /* - * Construct path + * Cleanup files first */ - asprintf(&meta_data_fname, "%s/%s", snapshot->local_location, opal_crs_base_metadata_filename); + if( NULL != cleanup_file_argv ) { + argc = opal_argv_count(cleanup_file_argv); + for( i = 0; i < argc; ++i) { + opal_output_verbose(15, opal_crs_base_output, + "opal:crs: cleanup_flush: Remove File <%s>\n", cleanup_dir_argv[i]); + unlink(cleanup_file_argv[i]); + } + + opal_argv_free(cleanup_file_argv); + cleanup_file_argv = NULL; + } /* - * Open the metadata file + * Try to cleanup directories next */ - if( mode == 'w' ) { - meta_data = fopen(meta_data_fname, "w"); - } - else if( mode == 'a' ) { - meta_data = fopen(meta_data_fname, "a"); + if( NULL != cleanup_dir_argv ) { + argc = opal_argv_count(cleanup_dir_argv); + for( i = 0; i < argc; ++i) { + opal_output_verbose(15, opal_crs_base_output, + "opal:crs: cleanup_flush: Remove Dir <%s>\n", cleanup_dir_argv[i]); + opal_os_dirpath_destroy(cleanup_dir_argv[i], true, NULL); + } + + opal_argv_free(cleanup_dir_argv); + cleanup_dir_argv = NULL; } - if (NULL == meta_data ) { - opal_output(opal_crs_base_output, - "opal:crs:base: open_metadata (%c): Error: Unable to open the file (%s)\n", - mode, meta_data_fname); - goto cleanup; - } - - if( mode == 'w' ) { - /* - * The first line is the component name, - * everything else here is defined by the component - */ - fprintf(meta_data, "%s\n", snapshot->component_name); - fprintf(meta_data, "%d\n", getpid()); - } - - cleanup: - if(NULL != meta_data_fname) - free(meta_data_fname); - - return meta_data; + return OPAL_SUCCESS; } char * opal_crs_base_state_str(opal_crs_state_type_t state) @@ -386,3 +476,154 @@ char * opal_crs_base_state_str(opal_crs_state_type_t state) return str; } + +/****************** + * Local Functions + ******************/ +static int opal_crs_base_metadata_open(FILE **meta_data, char * location, char * mode) +{ + int exit_status = OPAL_SUCCESS; + char * dir_name = NULL; + + if( NULL == location ) { + if( NULL == last_metadata_file ) { + opal_output(0, "Error: No metadata filename specified!"); + exit_status = OPAL_ERROR; + goto cleanup; + } else { + location = last_metadata_file; + } + } + + /* + * Find the snapshot directory, read the metadata file + */ + asprintf(&dir_name, "%s/%s", location, opal_crs_base_metadata_filename); + if (NULL == (*meta_data = fopen(dir_name, mode)) ) { + exit_status = OPAL_ERROR; + goto cleanup; + } + + cleanup: + if( NULL != dir_name ) { + free(dir_name); + dir_name = NULL; + } + return exit_status; +} + +static int metadata_extract_next_token(FILE *file, char **token, char **value) +{ + int exit_status = OPAL_SUCCESS; + int max_len = 256; + char * line = NULL; + int line_len = 0; + int c = 0, s = 0, v = 0; + char *local_token = NULL; + char *local_value = NULL; + bool end_of_line = false; + + line = (char *) malloc(sizeof(char) * max_len); + + try_again: + /* + * If we are at the end of the file, then just return + */ + if(0 != feof(file) ) { + exit_status = OPAL_ERROR; + goto cleanup; + } + + /* + * Other wise grab the next token/value pair + */ + if (NULL == fgets(line, max_len, file) ) { + exit_status = OPAL_ERROR; + goto cleanup; + } + line_len = strlen(line); + /* Strip off the new line if it it there */ + if('\n' == line[line_len-1]) { + line[line_len-1] = '\0'; + line_len--; + end_of_line = true; + } + else { + end_of_line = false; + } + + /* Ignore lines with just '#' too */ + if(2 >= line_len) + goto try_again; + + /* + * Extract the token from the set + */ + for(c = 0; + line[c] != ':' && + c < line_len; + ++c) { + ; + } + c += 2; /* For the ' ' and the '\0' */ + local_token = (char *)malloc(sizeof(char) * (c + 1)); + + for(s = 0; s < c; ++s) { + local_token[s] = line[s]; + } + + local_token[s] = '\0'; + *token = strdup(local_token); + + if( NULL != local_token) { + free(local_token); + local_token = NULL; + } + + /* + * Extract the value from the set + */ + local_value = (char *)malloc(sizeof(char) * (line_len - c + 1)); + for(v = 0, s = c; + s < line_len; + ++s, ++v) { + local_value[v] = line[s]; + } + + while(!end_of_line) { + if (NULL == fgets(line, max_len, file) ) { + exit_status = OPAL_ERROR; + goto cleanup; + } + line_len = strlen(line); + /* Strip off the new line if it it there */ + if('\n' == line[line_len-1]) { + line[line_len-1] = '\0'; + line_len--; + end_of_line = true; + } + else { + end_of_line = false; + } + + local_value = (char *)realloc(local_value, sizeof(char) * line_len); + for(s = 0; + s < line_len; + ++s, ++v) { + local_value[v] = line[s]; + } + } + + local_value[v] = '\0'; + *value = strdup(local_value); + + cleanup: + if( NULL != local_token) + free(local_token); + if( NULL != local_value) + free(local_value); + if( NULL != line) + free(line); + + return exit_status; +} diff --git a/opal/mca/crs/blcr/crs_blcr_module.c b/opal/mca/crs/blcr/crs_blcr_module.c index f7cebdaaea..13e7c10561 100644 --- a/opal/mca/crs/blcr/crs_blcr_module.c +++ b/opal/mca/crs/blcr/crs_blcr_module.c @@ -252,7 +252,7 @@ int opal_crs_blcr_module_finalize(void) int opal_crs_blcr_checkpoint(pid_t pid, opal_crs_base_snapshot_t *base_snapshot, opal_crs_state_type_t *state) { - int ret; + int ret, exit_status = OPAL_SUCCESS; opal_crs_blcr_snapshot_t *snapshot = OBJ_NEW(opal_crs_blcr_snapshot_t); char * tmp_str = NULL; @@ -272,15 +272,15 @@ int opal_crs_blcr_checkpoint(pid_t pid, opal_crs_base_snapshot_t *base_snapshot, snapshot->super.remote_location = strdup(base_snapshot->remote_location); /* - * Create the snapshot directory + * Update the snapshot metadata */ snapshot->super.component_name = strdup(mca_crs_blcr_component.super.base_version.mca_component_name); - if( OPAL_SUCCESS != (ret = opal_crs_base_init_snapshot_directory(&snapshot->super) )) { - *state = OPAL_CRS_ERROR; + if( OPAL_SUCCESS != (ret = opal_crs_base_metadata_write_token(NULL, CRS_METADATA_COMP, snapshot->super.component_name) ) ) { opal_output(mca_crs_blcr_component.super.output_handle, - "crs:blcr: checkpoint(): Error: Unable to initialize the directory for (%s).", + "crs:blcr: checkpoint(): Error: Unable to write component name to the directory for (%s).", snapshot->super.reference_name); - return ret; + exit_status = ret; + goto cleanup; } /* @@ -328,7 +328,8 @@ int opal_crs_blcr_checkpoint(pid_t pid, opal_crs_base_snapshot_t *base_snapshot, opal_output(mca_crs_blcr_component.super.output_handle, "crs:blcr: checkpoint(): Error: Unable to checkpoint pid (%d)", pid); - return ret; + exit_status = ret; + goto cleanup; } *state = blcr_current_state; @@ -347,7 +348,9 @@ int opal_crs_blcr_checkpoint(pid_t pid, opal_crs_base_snapshot_t *base_snapshot, snapshot->context_filename, tmp_str, ret); perror("crs:blcr: checkpoint"); free(tmp_str); - return ret; + + exit_status = ret; + goto cleanup; } /* @@ -358,7 +361,8 @@ int opal_crs_blcr_checkpoint(pid_t pid, opal_crs_base_snapshot_t *base_snapshot, opal_output(mca_crs_blcr_component.super.output_handle, "crs:blcr: checkpoint(): Error: Unable to update metadata for snapshot (%s).", snapshot->super.reference_name); - return ret; + exit_status = ret; + goto cleanup; } } @@ -367,10 +371,13 @@ int opal_crs_blcr_checkpoint(pid_t pid, opal_crs_base_snapshot_t *base_snapshot, */ base_snapshot = &(snapshot->super); - if(NULL != tmp_str) + cleanup: + if(NULL != tmp_str) { free(tmp_str); + tmp_str = NULL; + } - return OPAL_SUCCESS; + return exit_status; } int opal_crs_blcr_restart(opal_crs_base_snapshot_t *base_snapshot, bool spawn_child, pid_t *child_pid) @@ -760,8 +767,6 @@ static int blcr_get_checkpoint_filename(char **fname, pid_t pid) } static int blcr_update_snapshot_metadata(opal_crs_blcr_snapshot_t *snapshot) { - char * dir_name = NULL; - FILE * meta_data = NULL; int exit_status = OPAL_SUCCESS; opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, @@ -779,34 +784,19 @@ static int blcr_update_snapshot_metadata(opal_crs_blcr_snapshot_t *snapshot) { } /* - * Append to the metadata file: - * the relative path of the context filename + * Append to the metadata file the context filename */ - if( NULL == (meta_data = opal_crs_base_open_metadata(&snapshot->super, 'w') ) ) { - exit_status = OPAL_ERROR; - goto cleanup; - } - - /* Context Filename -- Relative path */ - fprintf(meta_data, "%s\n", snapshot->context_filename); - + opal_crs_base_metadata_write_token(snapshot->super.local_location, CRS_METADATA_CONTEXT, snapshot->context_filename); cleanup: - if(NULL != meta_data) - fclose(meta_data); - if(NULL != dir_name) - free(dir_name); - return exit_status; } static int blcr_cold_start(opal_crs_blcr_snapshot_t *snapshot) { - char * content = NULL; + int ret, exit_status = OPAL_SUCCESS; + char **tmp_argv = NULL; char * component_name = NULL; int prev_pid; - int len = 0; - FILE * meta_data = NULL; - int exit_status = OPAL_SUCCESS; opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, "crs:blcr: cold_start(%s)", snapshot->super.reference_name); @@ -814,9 +804,9 @@ static int blcr_cold_start(opal_crs_blcr_snapshot_t *snapshot) { /* * Find the snapshot directory, read the metadata file */ - if( NULL == (meta_data = opal_crs_base_open_read_metadata(snapshot->super.local_location, - &component_name, &prev_pid) ) ) { - exit_status = OPAL_ERROR; + if( OPAL_SUCCESS != (ret = opal_crs_base_extract_expected_component(snapshot->super.local_location, + &component_name, &prev_pid) ) ) { + exit_status = ret; goto cleanup; } @@ -835,19 +825,8 @@ static int blcr_cold_start(opal_crs_blcr_snapshot_t *snapshot) { /* * Context Filename */ - len = 256; /* Max size for a BLCR filename */ - content = (char *) malloc(sizeof(char) * len); - if (NULL == fgets(content, len, meta_data) ) { - free(content); - content = NULL; - goto cleanup; - } - /* Strip of newline */ - len = strlen(content); - content[len - 1] = '\0'; - - /* save the filename in the structure */ - asprintf(&snapshot->context_filename, "%s/%s", snapshot->super.local_location, content); + opal_crs_base_metadata_read_token(snapshot->super.local_location, CRS_METADATA_CONTEXT, &tmp_argv); + asprintf(&snapshot->context_filename, "%s/%s", snapshot->super.local_location, tmp_argv[0]); /* * Reset the cold_start flag @@ -855,10 +834,10 @@ static int blcr_cold_start(opal_crs_blcr_snapshot_t *snapshot) { snapshot->super.cold_start = false; cleanup: - if(NULL != meta_data) - fclose(meta_data); - if(NULL != content) - free(content); + if(NULL != tmp_argv) { + opal_argv_free(tmp_argv); + tmp_argv = NULL; + } return exit_status; } diff --git a/opal/mca/crs/self/crs_self_module.c b/opal/mca/crs/self/crs_self_module.c index 20d2a94da6..10aa0b09f5 100644 --- a/opal/mca/crs/self/crs_self_module.c +++ b/opal/mca/crs/self/crs_self_module.c @@ -291,13 +291,12 @@ int opal_crs_self_checkpoint(pid_t pid, opal_crs_base_snapshot_t *base_snapshot, } /* - * Create the snapshot directory + * Update the snapshot metadata */ snapshot->super.component_name = strdup(mca_crs_self_component.super.base_version.mca_component_name); - if( OPAL_SUCCESS != (ret = opal_crs_base_init_snapshot_directory(&snapshot->super) )) { - *state = OPAL_CRS_ERROR; + if( OPAL_SUCCESS != (ret = opal_crs_base_metadata_write_token(NULL, CRS_METADATA_COMP, snapshot->super.component_name) ) ) { opal_output(mca_crs_self_component.super.output_handle, - "crs:self: checkpoint(): Error: Unable to initialize the directory for (%s).", + "crs:self: checkpoint(): Error: Unable to write component name to the directory for (%s).", snapshot->super.reference_name); exit_status = ret; goto cleanup; @@ -592,12 +591,10 @@ static int opal_crs_self_restart_cmd(opal_crs_self_snapshot_t *snapshot, char ** } static int self_cold_start(opal_crs_self_snapshot_t *snapshot) { - char * content = NULL; + int ret, exit_status = OPAL_SUCCESS; + char **tmp_argv = NULL; char * component_name = NULL; int prev_pid; - int len = 0; - FILE * meta_data = NULL; - int exit_status = OPAL_SUCCESS; opal_output_verbose(10, mca_crs_self_component.super.output_handle, "crs:self: cold_start(%s)", snapshot->super.reference_name); @@ -605,9 +602,9 @@ static int self_cold_start(opal_crs_self_snapshot_t *snapshot) { /* * Find the snapshot directory, read the metadata file */ - if( NULL == (meta_data = opal_crs_base_open_read_metadata(snapshot->super.local_location, - &component_name, &prev_pid) ) ) { - exit_status = OPAL_ERROR; + if( OPAL_SUCCESS != (ret = opal_crs_base_extract_expected_component(snapshot->super.local_location, + &component_name, &prev_pid) ) ) { + exit_status = ret; goto cleanup; } @@ -627,19 +624,8 @@ static int self_cold_start(opal_crs_self_snapshot_t *snapshot) { * Restart command * JJH: Command lines limited to 256 chars. */ - len = 256; /* Max size for a SELF filename */ - content = (char *) malloc(sizeof(char) * len); - if (NULL == fgets(content, len, meta_data) ) { - free(content); - content = NULL; - goto cleanup; - } - /* Strip of newline */ - len = strlen(content); - content[len - 1] = '\0'; - - /* save the command line in the structure */ - asprintf(&snapshot->cmd_line, "%s", content); + opal_crs_base_metadata_read_token(snapshot->super.local_location, CRS_METADATA_CONTEXT, &tmp_argv); + asprintf(&snapshot->cmd_line, "%s", tmp_argv[0]); /* * Reset the cold_start flag @@ -647,51 +633,35 @@ static int self_cold_start(opal_crs_self_snapshot_t *snapshot) { snapshot->super.cold_start = false; cleanup: - if(NULL != meta_data) - fclose(meta_data); - if(NULL != content) - free(content); + if(NULL != tmp_argv) { + opal_argv_free(tmp_argv); + tmp_argv = NULL; + } return exit_status; } static int self_update_snapshot_metadata(opal_crs_self_snapshot_t *snapshot) { - char * dir_name = NULL; - FILE *meta_data = NULL; int exit_status = OPAL_SUCCESS; + if(NULL == snapshot->cmd_line) { + opal_show_help("help-opal-crs-self.txt", "self:no-restart-cmd", + true); + exit_status = OPAL_ERROR; + goto cleanup; + } + opal_output_verbose(10, mca_crs_self_component.super.output_handle, "crs:self: update_snapshot_metadata(%s)", snapshot->super.reference_name); /* - * Append to the metadata file: - * the relative path of the context filename + * Append to the metadata file the command line to restart with + * - How user wants us to restart */ - if( NULL == (meta_data = opal_crs_base_open_metadata(&snapshot->super, 'w') ) ) { - exit_status = OPAL_ERROR; - goto cleanup; - } - - /* How user wants us to restart */ - if(NULL != snapshot->cmd_line) { - fprintf(meta_data, "%s\n", snapshot->cmd_line); - } - else { - opal_show_help("help-opal-crs-self.txt", "self:no-restart-cmd", - true); - exit_status = OPAL_ERROR; - } + opal_crs_base_metadata_write_token(snapshot->super.local_location, CRS_METADATA_CONTEXT, snapshot->cmd_line); cleanup: - if(NULL != meta_data) { - fclose(meta_data); - } - if(NULL != dir_name) { - free(dir_name); - dir_name = NULL; - } - return exit_status; } diff --git a/opal/runtime/opal_cr.c b/opal/runtime/opal_cr.c index ae262195bc..2d9372453f 100644 --- a/opal/runtime/opal_cr.c +++ b/opal/runtime/opal_cr.c @@ -73,7 +73,6 @@ /****************** * Global Var Decls ******************/ -bool opal_cr_allow_opal_only = false; bool opal_cr_stall_check = false; bool opal_cr_currently_stalled = false; int opal_cr_output; @@ -81,18 +80,13 @@ int opal_cr_output; /****************** * Local Functions & Var Decls ******************/ -static int cr_notify_response(opal_cr_ckpt_cmd_state_t resp); static int extract_env_vars(int prev_pid); -static int cr_entry_point_notify_reopen_files(int *prog_read_fd, int *prog_write_fd); -static void opal_cr_entry_point_signal_handler (int signo); + static void opal_cr_sigpipe_debug_signal_handler (int signo); static opal_cr_coord_callback_fn_t cur_coord_callback = NULL; static opal_cr_notify_callback_fn_t cur_notify_callback = NULL; -static char *prog_named_pipe_r = NULL; -static char *prog_named_pipe_w = NULL; - /****************** * Interface Functions & Vars ******************/ @@ -100,10 +94,12 @@ char * opal_cr_pipe_dir = NULL; int opal_cr_entry_point_signal = 0; bool opal_cr_is_enabled = true; bool opal_cr_is_tool = false; + /* Current checkpoint state */ -int opal_cr_checkpointing = OPAL_CR_STATUS_NONE; +int opal_cr_checkpointing_state = OPAL_CR_STATUS_NONE; + /* Current checkpoint request channel state */ -int opal_cr_checkpoint_request = OPAL_CR_STATUS_NONE; +int opal_cr_checkpoint_request = OPAL_CR_STATUS_NONE; static bool opal_cr_debug_sigpipe = false; @@ -241,22 +237,6 @@ int opal_cr_init(void ) opal_cr_thread_sleep_check, opal_cr_thread_sleep_wait); #endif - /* - * Whether or not to allow OPAL only checkpointing. - * By default we rely on ORTE to provide this functionality for us, but - * if the application is OPAL only then we need to fallback to the signal - * method which is activated by setting this MCA parameter to 'true'. - */ - mca_base_param_reg_int_name("opal_cr", "allow_opal_only", - "Enable OPAL Only checkpointing [Default: Disabled]", - true, false, - 0, &val); - opal_cr_allow_opal_only = OPAL_INT_TO_BOOL(val); - - opal_output_verbose(10, opal_cr_output, - "opal_cr: init: OPAL CR Allow OPAL Only: %d", - val); - mca_base_param_reg_int_name("opal_cr", "is_tool", "Is this a tool program, meaning does it require a fully operational OPAL or just enough to exec.", false, false, @@ -315,13 +295,6 @@ int opal_cr_init(void ) opal_cr_stall_check = false; opal_cr_currently_stalled = false; - /* - * Register the entry point - */ - if( OPAL_SUCCESS != (ret = opal_cr_entry_point_init()) ) { - exit_status = OPAL_ERROR; - goto cleanup; - } } /* End opal_cr_is_tool = true */ /* @@ -381,7 +354,7 @@ int opal_cr_init(void ) int opal_cr_finalize(void) { - int ret, exit_status = OPAL_SUCCESS; + int exit_status = OPAL_SUCCESS; if( --opal_cr_initalized != 0 ) { if( opal_cr_initalized < 0 ) { @@ -407,13 +380,9 @@ int opal_cr_finalize(void) } #endif /* OPAL_ENABLE_FT_THREAD == 1 */ - if( OPAL_SUCCESS != (ret = opal_cr_entry_point_finalize()) ) { - exit_status = ret; - } - /* Nothing to do for just process notifications */ - opal_cr_checkpointing = OPAL_CR_STATUS_TERM; - opal_cr_checkpoint_request = OPAL_CR_STATUS_TERM; + opal_cr_checkpointing_state = OPAL_CR_STATUS_TERM; + opal_cr_checkpoint_request = OPAL_CR_STATUS_TERM; } #if OPAL_ENABLE_FT == 1 @@ -452,7 +421,7 @@ void opal_cr_test_if_checkpoint_ready(void) * - If a request is pending then cancel it * - o.w., skip it. */ - if(OPAL_CR_STATUS_RUNNING == opal_cr_checkpointing ) { + if(OPAL_CR_STATUS_RUNNING == opal_cr_checkpointing_state ) { if( OPAL_SUCCESS != (ret = cur_notify_callback(OPAL_CHECKPOINT_CMD_IN_PROGRESS) ) ) { opal_output(opal_cr_output, "Error: opal_cr: test_if_checkpoint_ready: Respond [In Progress] Failed. (%d)", @@ -478,8 +447,8 @@ void opal_cr_test_if_checkpoint_ready(void) /* * Start the checkpoint */ - opal_cr_checkpointing = OPAL_CR_STATUS_RUNNING; - opal_cr_checkpoint_request = OPAL_CR_STATUS_NONE; + opal_cr_checkpointing_state = OPAL_CR_STATUS_RUNNING; + opal_cr_checkpoint_request = OPAL_CR_STATUS_NONE; STAGE_1: if( OPAL_SUCCESS != (ret = cur_notify_callback(OPAL_CHECKPOINT_CMD_START) ) ) { @@ -525,8 +494,12 @@ int opal_cr_inc_core(pid_t pid, opal_crs_base_snapshot_t *snapshot, bool term, i } if(*state == OPAL_CRS_CONTINUE) { - if(term) + if(term) { *state = OPAL_CRS_TERM; + opal_cr_checkpointing_state = OPAL_CR_STATUS_TERM; + } else { + opal_cr_checkpointing_state = OPAL_CR_STATUS_CONTINUE; + } } else { term = false; @@ -537,6 +510,7 @@ int opal_cr_inc_core(pid_t pid, opal_crs_base_snapshot_t *snapshot, bool term, i */ if(*state == OPAL_CRS_RESTART) { extract_env_vars(prev_pid); + opal_cr_checkpointing_state = OPAL_CR_STATUS_RESTART_PRE; } /* @@ -594,11 +568,10 @@ int opal_cr_coord(int state) /* * Here we are returning to either: - * - opal_notify() - * If we have an OPAL only opplication. * - [orte | ompi]_notify() - * If we have an ORTE or OPAL application. */ + opal_cr_checkpointing_state = OPAL_CR_STATUS_RESTART_POST; + return OPAL_SUCCESS; } @@ -720,83 +693,6 @@ static int extract_env_vars(int prev_pid) /***************************************** * OPAL CR Entry Point Functionality *****************************************/ -int opal_cr_entry_point_init(void) -{ - int exit_status = OPAL_SUCCESS; - char *tmp_pid = NULL; - opal_cr_notify_callback_fn_t prev_notify_func; - - if( !opal_cr_allow_opal_only ) { - return OPAL_SUCCESS; - } - - opal_cr_reg_notify_callback(cr_notify_response, &prev_notify_func); - - /* String representation of the PID */ - asprintf(&tmp_pid, "%d", getpid()); - - asprintf(&prog_named_pipe_r, "%s/%s.%s", opal_cr_pipe_dir, OPAL_CR_NAMED_PROG_R, tmp_pid); - asprintf(&prog_named_pipe_w, "%s/%s.%s", opal_cr_pipe_dir, OPAL_CR_NAMED_PROG_W, tmp_pid); - - opal_output_verbose(15, opal_cr_output, - "opal_cr: init: Named Pipes (%s) (%s)", - prog_named_pipe_r, prog_named_pipe_w); - - /* - * Setup a signal handler to catch and start the proper thread - * to handle the checkpoint - */ - if( SIG_ERR == signal(opal_cr_entry_point_signal, opal_cr_entry_point_signal_handler) ) { - exit_status = OPAL_ERROR; - goto cleanup; - } - - cleanup: - if( NULL != tmp_pid) { - free(tmp_pid); - tmp_pid = NULL; - } - - return exit_status; -} - -int opal_cr_entry_point_finalize(void) -{ - if( !opal_cr_allow_opal_only ) { - return OPAL_SUCCESS; - } - - if( NULL != prog_named_pipe_r) { - free(prog_named_pipe_r); - prog_named_pipe_r = NULL; - } - - if( NULL != prog_named_pipe_w) { - free(prog_named_pipe_w); - prog_named_pipe_w = NULL; - } - - return OPAL_SUCCESS; -} - -/* - * C/R Signal Handler. - * Once a signal is received then the notification thread is notified - * so it can communicate with the checkpoint command to take the approprate - * action. - */ -static void opal_cr_entry_point_signal_handler (int signo) -{ - if( opal_cr_entry_point_signal != signo ) { - /* Not our signal */ - return; - } - /* - * Signal thread to start checkpoint handshake - */ - opal_cr_checkpoint_request = OPAL_CR_STATUS_REQUESTED; -} - /* * Used only for debugging SIGPIPE problems */ @@ -818,346 +714,6 @@ static void opal_cr_sigpipe_debug_signal_handler (int signo) } } -/* - * Respond to an asynchronous checkpoint request - */ -int cr_notify_response(opal_cr_ckpt_cmd_state_t resp) -{ - static int app_term = 0, app_pid = 0; - static opal_crs_base_snapshot_t *snapshot = NULL; - static int prog_named_read_pipe_fd, prog_named_write_pipe_fd; - static int len = 0; - static int cr_state; - int ret, exit_status = OPAL_SUCCESS; - int tmp_resp; - char *tmp_str = NULL; - ssize_t tmp_size = 0; - /* Commands from the command line tool */ - unsigned char app_cmd; - - if( opal_cr_currently_stalled ) { - goto STAGE_1; - } - - /* - * Open a named pipe for our application - */ - if (OPAL_SUCCESS != (ret = cr_entry_point_notify_reopen_files(&prog_named_read_pipe_fd, &prog_named_write_pipe_fd))) { - goto ckpt_cleanup; - } - - /* - * Get the initial handshake command - */ - if( sizeof(int) != (ret = read(prog_named_read_pipe_fd, &len, sizeof(int))) ) { - opal_output(opal_cr_output, - "opal_cr: cr_notify_response: Error: Unable to read the first handshake from named pipe (%s). %d\n", - prog_named_pipe_r, ret); - goto ckpt_cleanup; - } - - tmp_resp = (int)resp; - if( sizeof(int) != (ret = write(prog_named_write_pipe_fd, &tmp_resp, sizeof(int)) ) ) { - opal_output(opal_cr_output, - "opal_cr: cr_notify_response: %d: Error: Unable to write to pipe (%s) ret = %d [Line %d]\n", - tmp_resp, prog_named_pipe_w, ret, __LINE__); - goto ckpt_cleanup; - } - - /* - * Respond that the checkpoint is currently in progress - */ - if( OPAL_CHECKPOINT_CMD_IN_PROGRESS == resp ) { - opal_output_verbose(10, opal_cr_output, - "opal_cr: cr_notify_response: Checkpoint in progress, cannot start (%d)", - getpid()); - goto ckpt_cleanup; - } - /* - * Respond that the application is unable to be checkpointed - */ - else if( OPAL_CHECKPOINT_CMD_NULL == resp ) { - opal_output_verbose(10, opal_cr_output, - "opal_cr: cr_notify_response: Non-checkpointable application, cannot start (%d)", - getpid()); - goto ckpt_cleanup; - } - /* - * Respond that some error has occurred such that the application is - * not able to be checkpointed - */ - else if( OPAL_CHECKPOINT_CMD_ERROR == resp ) { - opal_output_verbose(10, opal_cr_output, - "opal_cr: cr_notify_response: Error generated, cannot start (%d)", - getpid()); - goto ckpt_cleanup; - } - - /* - * Respond signalng that we wish to respond to this request - */ - opal_output_verbose(10, opal_cr_output, - "opal_cr: cr_notify_response: Starting checkpoint request (%d)", - getpid()); - - /* - * Wait for a notify command from command line tool - */ - if( sizeof(app_cmd) != (ret = read(prog_named_read_pipe_fd, &app_cmd, sizeof(app_cmd))) ) { - opal_output(opal_cr_output, - "opal_cr: cr_notify_response: Error: Unable to read the requested command from named pipe (%s). %d\n", - prog_named_pipe_r, ret); - goto ckpt_cleanup; - } - - /* get PID argument */ - if( sizeof(int) != (ret = read(prog_named_read_pipe_fd, &app_pid, sizeof(int))) ) { - opal_output(opal_cr_output, - "opal_cr: cr_notify_response: Error: Unable to read the pid from named pipe (%s). %d\n", - prog_named_pipe_r, ret); - goto ckpt_cleanup; - } - - /* get term argument */ - if( sizeof(int) != (ret = read(prog_named_read_pipe_fd, &app_term, sizeof(int))) ) { - opal_output(opal_cr_output, - "opal_cr: cr_notify_response: Error: Unable to read the term from named pipe (%s). %d\n", - prog_named_pipe_r, ret); - goto ckpt_cleanup; - } - - /* get Snapshot Handle argument */ - if( sizeof(int) != (ret = read(prog_named_read_pipe_fd, &len, sizeof(int))) ) { - opal_output(opal_cr_output, - "opal_cr: cr_notify_response: Error: Unable to read the snapshot_handle len from named pipe (%s). %d\n", - prog_named_pipe_r, ret); - goto ckpt_cleanup; - } - - tmp_size = sizeof(char) * len; - tmp_str = (char *) malloc(sizeof(char) * len); - if( tmp_size != (ret = read(prog_named_read_pipe_fd, tmp_str, (sizeof(char) * len))) ) { - opal_output(opal_cr_output, - "opal_cr: cr_notify_response: Error: Unable to read the snapshot_handle from named pipe (%s). %d\n", - prog_named_pipe_r, ret); - goto ckpt_cleanup; - } - - /* - * If they didn't send anything of meaning then use the defaults - */ - snapshot = OBJ_NEW(opal_crs_base_snapshot_t); - - if( 1 < strlen(tmp_str) ) { - if( NULL != snapshot->reference_name) - free( snapshot->reference_name ); - snapshot->reference_name = strdup(tmp_str); - - if( NULL != snapshot->local_location ) - free( snapshot->local_location ); - snapshot->local_location = opal_crs_base_get_snapshot_directory(snapshot->reference_name); - - if( NULL != snapshot->remote_location ) - free( snapshot->remote_location ); - snapshot->remote_location = strdup(snapshot->local_location); - - free(tmp_str); - tmp_str = NULL; - } - - /* get Snapshot location argument */ - if( sizeof(int) != (ret = read(prog_named_read_pipe_fd, &len, sizeof(int))) ) { - opal_output(opal_cr_output, - "opal_cr: cr_notify_response: Error: Unable to read the snapshot_location len from named pipe (%s). %d\n", - prog_named_pipe_r, ret); - goto ckpt_cleanup; - } - - tmp_str = (char *) malloc(sizeof(char) * len); - tmp_size = sizeof(char) * len; - if( tmp_size != (ret = read(prog_named_read_pipe_fd, tmp_str, (sizeof(char) * len))) ) { - opal_output(opal_cr_output, - "opal_cr: cr_notify_response: Error: Unable to read the snapshot_location from named pipe (%s). %d\n", - prog_named_pipe_r, ret); - goto ckpt_cleanup; - } - - /* - * If they didn't send anything of meaning then use the defaults - */ - if( 1 < strlen(tmp_str) ) { - if( NULL != snapshot->local_location) - free( snapshot->local_location ); - asprintf(&(snapshot->local_location), "%s/%s", tmp_str, snapshot->reference_name); - - if( NULL != snapshot->remote_location) - free( snapshot->remote_location ); - snapshot->remote_location = strdup(snapshot->local_location); - - free(tmp_str); - tmp_str = NULL; - } - - /* - * Raise the notification flag. - * This will trigger the coordination, and checkpoint of the - * application if it is possible - */ - STAGE_1: - opal_cr_currently_stalled = false; - - ret = opal_cr_inc_core(app_pid, snapshot, app_term, &cr_state); - if( OPAL_EXISTS == ret ) { - opal_output_verbose(5, opal_cr_output, - "opal_cr: cr_notify_response: Stalling the checkpoint progress until state is stable again (PID = %d)\n", - getpid()); - opal_cr_currently_stalled = true; - return exit_status; - } - else if(OPAL_SUCCESS != ret) { - opal_output(opal_cr_output, - "opal_cr: cr_notify_response: Error: checkpoint notification failed. %d\n", ret); - goto ckpt_cleanup; - } - - /* Don't stall any longer */ - opal_cr_stall_check = false; - - if(OPAL_CRS_RESTART == cr_state) { - opal_output_verbose(10, opal_cr_output, - "opal_cr: cr_notify_response: Restarting...(%d)\n", - getpid()); - - app_term = false; - /* Do not respond to the non-existent command line tool */ - goto ckpt_cleanup; - } - else if(cr_state == OPAL_CRS_CONTINUE) { - ; /* Don't need to do anything here */ - } - else if(cr_state == OPAL_CRS_TERM ) { - ; /* Don't need to do anything here */ - } - else { - opal_output_verbose(5, opal_cr_output, - "opal_cr: cr_notify_response: Unknown cr_state(%d) [%d]", - cr_state, getpid()); - } - - /* - * Return the expected variables to the command line tool - */ - len = strlen(snapshot->reference_name); - len++; /* To account for the Null character */ - if( sizeof(int) != (ret = write(prog_named_write_pipe_fd, &len, sizeof(int))) ) { - opal_output(opal_cr_output, - "opal_cr: cr_notify_response: Error: Unable to write fname length to named pipe (%s). %d.\n", - prog_named_pipe_w, ret); - goto ckpt_cleanup; - } - - if(len > 0) { - if( (ssize_t)(sizeof(char) * len) != - (ret = write(prog_named_write_pipe_fd, snapshot->reference_name, (sizeof(char) * len))) ) { - opal_output(opal_cr_output, - "opal_cr: cr_notify_response: Error: Unable to write snapshot->reference_name to named pipe (%s). %d\n", - prog_named_pipe_w, ret); - goto ckpt_cleanup; - } - } - - if( sizeof(int) != (ret = write(prog_named_write_pipe_fd, &cr_state, sizeof(int))) ) { - opal_output(opal_cr_output, - "opal_cr: cr_notify_response: Error: Unable to write cr_state to named pipe (%s). %d\n", - prog_named_pipe_w, ret); - goto ckpt_cleanup; - } - - ckpt_cleanup: - close(prog_named_write_pipe_fd); - close(prog_named_read_pipe_fd); - remove(prog_named_pipe_r); - remove(prog_named_pipe_w); - - if(app_term) { - opal_output_verbose(10, opal_cr_output, - "opal_cr: cr_notify_response: User has asked to terminate the application"); - exit(OPAL_SUCCESS); - } - - /* Prepare to wait for another checkpoint action */ - opal_cr_checkpointing = OPAL_CR_STATUS_NONE; - - opal_cr_currently_stalled = false; - - return exit_status; -} - -static int cr_entry_point_notify_reopen_files(int *prog_read_fd, int *prog_write_fd) -{ - int ret = OPAL_ERR_NOT_IMPLEMENTED; - -#ifndef HAVE_MKFIFO - return ret; -#else -#ifdef __WINDOWS__ - return ret; -#else - /* - * Open up the read pipe - */ - if( (ret = mkfifo(prog_named_pipe_r, 0660)) < 0) { - if(EEXIST == ret || -1 == ret ) { - opal_output_verbose(10, opal_cr_output, - "opal_cr: notify_reopen_files: mkfifo failed because file (%s) already exists, attempting to use this pipe. (%d)", - prog_named_pipe_r, ret); - } - else { - opal_output(opal_cr_output, - "opal_cr: notify_reopen_files: Error: mkfifo failed to make named pipe (%s). (%d)\n", - prog_named_pipe_r, ret); - return OPAL_ERROR; - } - } - - *prog_read_fd = open(prog_named_pipe_r, O_RDWR); - if(*prog_read_fd < 0) { - opal_output(opal_cr_output, - "opal_cr: init: Error: open failed to open the named pipe (%s). %d\n", - prog_named_pipe_r, *prog_read_fd); - return OPAL_ERROR; - } - - /* - * Open up the write pipe - */ - if( (ret = mkfifo(prog_named_pipe_w, 0660)) < 0) { - if(EEXIST == ret || -1 == ret ) { - opal_output_verbose(10, opal_cr_output, - "opal_cr: notify_reopen_files: mkfifo failed because file (%s) already exists, attempting to use this pipe. (%d)", - prog_named_pipe_w, ret); - } - else { - opal_output(opal_cr_output, - "opal_cr: notify_reopen_files: Error: mkfifo failed to make named pipe (%s). (%d)\n", - prog_named_pipe_w, ret); - return OPAL_ERROR; - } - } - - *prog_write_fd = open(prog_named_pipe_w, O_WRONLY); - if(*prog_write_fd < 0) { - opal_output(opal_cr_output, - "opal_cr: notify_reopen_files: Error: open failed to open the named pipe (%s). (%d)\n", - prog_named_pipe_w, *prog_write_fd); - return OPAL_ERROR; - } - - return OPAL_SUCCESS; -#endif /* __WINDOWS__ */ -#endif /* HAVE_MKFIFO */ -} - #if OPAL_ENABLE_FT_THREAD == 1 static void* opal_cr_thread_fn(opal_object_t *obj) { diff --git a/opal/runtime/opal_cr.h b/opal/runtime/opal_cr.h index 799d5f21d4..7bc1937ef6 100644 --- a/opal/runtime/opal_cr.h +++ b/opal/runtime/opal_cr.h @@ -61,28 +61,39 @@ enum opal_cr_ckpt_cmd_state_t { OPAL_CR_STATUS_NONE, /* No checkpoint in progress */ OPAL_CR_STATUS_REQUESTED, /* Checkpoint has been requested */ OPAL_CR_STATUS_RUNNING, /* Checkpoint is currently running */ - OPAL_CR_STATUS_TERM /* Checkpoint is running and will terminate process upon completion */ + OPAL_CR_STATUS_TERM, /* Checkpoint is running and will terminate process upon completion */ + /* State of the continue operation */ + OPAL_CR_STATUS_CONTINUE, + /* State of the restart operation */ + OPAL_CR_STATUS_RESTART_PRE, + OPAL_CR_STATUS_RESTART_POST }; typedef enum opal_cr_ckpt_cmd_state_t opal_cr_ckpt_cmd_state_t; + /* An output handle to be used by the cr runtime + * functionality as an argument to opal_output() */ + OPAL_DECLSPEC extern int opal_cr_output; + /* Directory containing the named pipes for communication * with the opal-checkpoint tool */ OPAL_DECLSPEC extern char * opal_cr_pipe_dir; + /* Signal that opal-checkpoint uses to contact the * application process */ OPAL_DECLSPEC extern int opal_cr_entry_point_signal; + /* If Checkpointing is enabled in this application */ OPAL_DECLSPEC extern bool opal_cr_is_enabled; + /* If the application running is a tool * (e.g., opal-checkpoint, orted, ...) */ OPAL_DECLSPEC extern bool opal_cr_is_tool; - /* An output handle to be used by the cr runtime - * functionality as an argument to opal_output() */ - OPAL_DECLSPEC extern int opal_cr_output; + /* If a checkpoint has been requested */ OPAL_DECLSPEC extern int opal_cr_checkpoint_request; + /* The current state of a checkpoint operation */ - OPAL_DECLSPEC extern int opal_cr_checkpointing; + OPAL_DECLSPEC extern int opal_cr_checkpointing_state; /* * If this is an application that doesn't want to have @@ -217,16 +228,6 @@ typedef enum opal_cr_ckpt_cmd_state_t opal_cr_ckpt_cmd_state_t; /******************************* * Notification Routines *******************************/ - /* - * Init OPAL entry point functionality - */ - OPAL_DECLSPEC int opal_cr_entry_point_init(void); - - /* - * Finalize OPAL entry point functionality - */ - OPAL_DECLSPEC int opal_cr_entry_point_finalize(void); - /** * A function to respond to the async checkpoint request * this is useful when figuring out who should respond diff --git a/opal/tools/opal-restart/opal-restart.c b/opal/tools/opal-restart/opal-restart.c index 76a88a30c0..b036c9e970 100644 --- a/opal/tools/opal-restart/opal-restart.c +++ b/opal/tools/opal-restart/opal-restart.c @@ -41,6 +41,9 @@ #ifdef HAVE_SYS_STAT_H #include #endif +#ifdef HAVE_FCNTL_H +#include +#endif /* HAVE_FCNTL_H */ #ifdef HAVE_SYS_TYPES_H #include #endif @@ -75,7 +78,7 @@ static int initialize(int argc, char *argv[]); static int finalize(void); static int parse_args(int argc, char *argv[]); static int check_file(char *given_filename); -static int post_env_vars(int prev_pid); +static int post_env_vars(int prev_pid, char *location); /***************************************** * Global Vars for Command line Arguments @@ -185,7 +188,7 @@ main(int argc, char *argv[]) char * base = NULL; base = opal_crs_base_get_snapshot_directory(opal_restart_globals.filename); - expected_crs_comp = strdup(opal_crs_base_extract_expected_component(base, &prev_pid)); + opal_crs_base_extract_expected_component(base, &expected_crs_comp, &prev_pid); free(base); } @@ -260,12 +263,18 @@ main(int argc, char *argv[]) /* Since some checkpoint/restart systems don't pass along env vars to the * restarted app, we need to take care of that. + * + * Included here is the creation of any files or directories that need to be + * created before the process is restarted. */ - if(OPAL_SUCCESS != (ret = post_env_vars(prev_pid) ) ) { + if(OPAL_SUCCESS != (ret = post_env_vars(prev_pid, snapshot->local_location) ) ) { exit_status = ret; goto cleanup; } + /* + * Do the actual restart + */ ret = opal_crs.crs_restart(snapshot, opal_restart_globals.forked, &child_pid); @@ -513,12 +522,15 @@ static int check_file(char *given_filename) return exit_status; } -static int post_env_vars(int prev_pid) +static int post_env_vars(int prev_pid, char *location) { int ret, exit_status = OPAL_SUCCESS; char *command = NULL; char *proc_file = NULL; - + char **loc_touch = NULL; + char **loc_mkdir = NULL; + int argc, i; + if( 0 > prev_pid ) { opal_output(opal_restart_globals.output, "Invalid PID (%d)\n", @@ -535,17 +547,82 @@ static int post_env_vars(int prev_pid) asprintf(&proc_file, "/tmp/%s-%d", OPAL_CR_BASE_ENV_NAME, prev_pid); asprintf(&command, "env | grep OMPI_ > %s", proc_file); + opal_output_verbose(5, opal_restart_globals.output, + "post_env_vars: Execute: <%s>", command); + ret = system(command); if( 0 > ret) { exit_status = ret; goto cleanup; } + /* + * Any directories that need to be created + */ + opal_crs_base_metadata_read_token(location, CRS_METADATA_MKDIR, &loc_mkdir); + argc = opal_argv_count(loc_mkdir); + for( i = 0; i < argc; ++i ) { + if( NULL != command ) { + free(command); + command = NULL; + } + asprintf(&command, "mkdir -p %s", loc_mkdir[i]); + + opal_output_verbose(5, opal_restart_globals.output, + "post_env_vars: Execute: <%s>", command); + + ret = system(command); + if( 0 > ret) { + exit_status = ret; + goto cleanup; + } + } + if( 0 < argc ) { + system("sync ; sync"); + } + + /* + * Any files that need to exist + */ + opal_crs_base_metadata_read_token(location, CRS_METADATA_TOUCH, &loc_touch); + argc = opal_argv_count(loc_touch); + for( i = 0; i < argc; ++i ) { + if( NULL != command ) { + free(command); + command = NULL; + } + asprintf(&command, "touch %s", loc_touch[i]); + + opal_output_verbose(5, opal_restart_globals.output, + "post_env_vars: Execute: <%s>", command); + + ret = system(command); + if( 0 > ret) { + exit_status = ret; + goto cleanup; + } + } + if( 0 < argc ) { + system("sync ; sync"); + } + cleanup: - if( NULL != command) + if( NULL != command) { free(command); - if( NULL != proc_file) + command = NULL; + } + if( NULL != proc_file) { free(proc_file); + proc_file = NULL; + } + if( NULL != loc_mkdir ) { + opal_argv_free(loc_mkdir); + loc_mkdir = NULL; + } + if( NULL != loc_touch ) { + opal_argv_free(loc_touch); + loc_touch = NULL; + } return exit_status; } diff --git a/orte/mca/snapc/base/snapc_base_fns.c b/orte/mca/snapc/base/snapc_base_fns.c index 1ffb5abae2..b48a44c486 100644 --- a/orte/mca/snapc/base/snapc_base_fns.c +++ b/orte/mca/snapc/base/snapc_base_fns.c @@ -631,7 +631,7 @@ int orte_snapc_base_add_vpid_metadata( orte_process_name_t *proc, char *snapshot_ref, char *snapshot_location) { - int exit_status = ORTE_SUCCESS; + int ret, exit_status = ORTE_SUCCESS; FILE * meta_data = NULL; char * meta_data_fname = NULL; char * crs_comp = NULL; @@ -659,8 +659,7 @@ int orte_snapc_base_add_vpid_metadata( orte_process_name_t *proc, orte_util_convert_process_name_to_string(&proc_name, proc); /* Extract the checkpointer */ - crs_comp = opal_crs_base_extract_expected_component(snapshot_location, &prev_pid); - if( NULL == crs_comp ) { + if( OPAL_SUCCESS != (ret = opal_crs_base_extract_expected_component(snapshot_location, &crs_comp, &prev_pid)) ) { exit_status = ORTE_ERROR; goto cleanup; } diff --git a/orte/mca/snapc/full/snapc_full_app.c b/orte/mca/snapc/full/snapc_full_app.c index 98ec46b132..b7399f8f15 100644 --- a/orte/mca/snapc/full/snapc_full_app.c +++ b/orte/mca/snapc/full/snapc_full_app.c @@ -86,7 +86,6 @@ int app_coord_init() { * Register the INC notification callback */ opal_cr_reg_notify_callback(snapc_full_app_notify_response, &prev_notify_func); - opal_cr_entry_point_finalize(); /* String representation of the PID */ asprintf(&tmp_pid, "%d", getpid()); @@ -198,9 +197,15 @@ int snapc_full_app_notify_response(opal_cr_ckpt_cmd_state_t resp) /* * Begin checkpoint + * - Init the checkpoint metadata file */ OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, "App) notify_response: Start checkpoint...")); + if( OPAL_SUCCESS != (ret = opal_crs_base_init_snapshot_directory(local_snapshot) ) ) { + opal_output(0, "App) Error: Unable to initalize the snapshot directory!\n"); + exit_status = ret; + goto ckpt_cleanup; + } STAGE_1: opal_cr_currently_stalled = false; @@ -269,8 +274,8 @@ int snapc_full_app_notify_response(opal_cr_ckpt_cmd_state_t resp) } /* Prepare to wait for another checkpoint action */ - opal_cr_checkpointing = OPAL_CR_STATUS_NONE; - opal_cr_currently_stalled = false; + opal_cr_checkpointing_state = OPAL_CR_STATUS_NONE; + opal_cr_currently_stalled = false; return exit_status; }