From 0deb00922543b123a5ee3483fd8799601ee7e30d Mon Sep 17 00:00:00 2001 From: Josh Hursey Date: Thu, 30 Apr 2009 16:55:39 +0000 Subject: [PATCH] A bunch of improvements focused on Snapshot Coordination (SnapC) and File Management (FileM). * Improved timing in SnapC Full Global Coordinator * Improved scalability of the SnapC Full protocol * Minor improvements to the error reporting mechanisms in SnapC and FileM * Improved the memory usage of the metadata routines - now the owner of the data is more explicit. * Added a FileM hint to indicate when files stored locally can be moved to/from a globally mounted file system using just the 'cp' command instead of the 'rcp/scp' command. Slightly improves performance, but not too drastically. Can be set using the following SnapC MCA parameter: {{{snapc_base_global_shared=1}}} * Implement the ability to throttle the number of outgoing connections in FileM. At larger scales this type of explicit throttling helps prevent overwhelming the HNP machine. Default: 10, set via MCA parameter: {{{filem_rsh_max_outgoing}}} * Add a few diagnostic/debugging features to SnapC and FileM. This commit was SVN r21131. --- orte/mca/filem/base/filem_base_fns.c | 8 +- orte/mca/filem/base/filem_base_open.c | 3 +- orte/mca/filem/filem.h | 15 +- orte/mca/filem/rsh/filem_rsh_component.c | 4 +- orte/mca/filem/rsh/filem_rsh_module.c | 67 +- orte/mca/snapc/base/base.h | 20 +- orte/mca/snapc/base/snapc_base_fns.c | 214 +- orte/mca/snapc/base/snapc_base_open.c | 22 +- orte/mca/snapc/full/snapc_full.h | 77 +- orte/mca/snapc/full/snapc_full_app.c | 9 +- orte/mca/snapc/full/snapc_full_component.c | 20 +- orte/mca/snapc/full/snapc_full_global.c | 2375 ++++++++++-------- orte/mca/snapc/full/snapc_full_local.c | 1404 ++++++----- orte/mca/snapc/full/snapc_full_module.c | 76 +- orte/mca/snapc/snapc.h | 89 +- orte/tools/orte-checkpoint/orte-checkpoint.c | 71 +- orte/tools/orte-ps/orte-ps.c | 21 +- orte/tools/orte-restart/orte-restart.c | 30 +- 18 files changed, 2549 insertions(+), 1976 deletions(-) diff --git a/orte/mca/filem/base/filem_base_fns.c b/orte/mca/filem/base/filem_base_fns.c index 3716268405..12567a4cb7 100644 --- a/orte/mca/filem/base/filem_base_fns.c +++ b/orte/mca/filem/base/filem_base_fns.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2008 The Trustees of Indiana University. + * Copyright (c) 2004-2009 The Trustees of Indiana University. * All rights reserved. * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. * All rights reserved. @@ -71,7 +71,11 @@ ORTE_DECLSPEC OBJ_CLASS_INSTANCE(orte_filem_base_file_set_t, ORTE_DECLSPEC void orte_filem_base_file_set_construct(orte_filem_base_file_set_t *req) { req->local_target = NULL; + req->local_hint = ORTE_FILEM_HINT_NONE; + req->remote_target = NULL; + req->remote_hint = ORTE_FILEM_HINT_NONE; + req->target_flag = ORTE_FILEM_TYPE_UNKNOWN; } @@ -81,11 +85,13 @@ ORTE_DECLSPEC void orte_filem_base_file_set_destruct( orte_filem_base_file_set_t free(req->local_target); req->local_target = NULL; } + req->local_hint = ORTE_FILEM_HINT_NONE; if( NULL != req->remote_target ) { free(req->remote_target); req->remote_target = NULL; } + req->remote_hint = ORTE_FILEM_HINT_NONE; req->target_flag = ORTE_FILEM_TYPE_UNKNOWN; } diff --git a/orte/mca/filem/base/filem_base_open.c b/orte/mca/filem/base/filem_base_open.c index 6fd91e0351..75ce06fdfc 100644 --- a/orte/mca/filem/base/filem_base_open.c +++ b/orte/mca/filem/base/filem_base_open.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2007 The Trustees of Indiana University. + * Copyright (c) 2004-2009 The Trustees of Indiana University. * All rights reserved. * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. * All rights reserved. @@ -77,6 +77,7 @@ int orte_filem_base_open(void) NULL, &str_value); if( NULL != str_value ) { free(str_value); + str_value = NULL; } /* Open up all available components */ diff --git a/orte/mca/filem/filem.h b/orte/mca/filem/filem.h index 9c86573b97..a4a0a2f187 100644 --- a/orte/mca/filem/filem.h +++ b/orte/mca/filem/filem.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University @@ -54,6 +54,13 @@ extern "C" { #define ORTE_FILEM_MOVE_TYPE_RM 2 #define ORTE_FILEM_MOVE_TYPE_UNKNOWN 3 +/** + * Hints that describe the local or remote file target for + * optimization purposes. + */ +#define ORTE_FILEM_HINT_NONE 0 +#define ORTE_FILEM_HINT_SHARED 1 + /** * Define a Process Set * @@ -92,9 +99,15 @@ struct orte_filem_base_file_set_1_0_0_t { /* Local file reference */ char * local_target; + /* Local file reference hints */ + int local_hint; + /* Remove file reference */ char * remote_target; + /* Remote file reference hints */ + int remote_hint; + /* Type of file to move */ int target_flag; }; diff --git a/orte/mca/filem/rsh/filem_rsh_component.c b/orte/mca/filem/rsh/filem_rsh_component.c index 0dbb011269..275dfbb22c 100644 --- a/orte/mca/filem/rsh/filem_rsh_component.c +++ b/orte/mca/filem/rsh/filem_rsh_component.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2008 The Trustees of Indiana University. + * Copyright (c) 2004-2009 The Trustees of Indiana University. * All rights reserved. * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. * All rights reserved. @@ -37,7 +37,7 @@ static int filem_rsh_open(void); static int filem_rsh_close(void); int orte_filem_rsh_max_incomming = 10; -int orte_filem_rsh_max_outgoing = 10; +int orte_filem_rsh_max_outgoing = 10; /* * Instantiate the public struct with all of our public information diff --git a/orte/mca/filem/rsh/filem_rsh_module.c b/orte/mca/filem/rsh/filem_rsh_module.c index 38ea3c121b..e3a6af6b73 100644 --- a/orte/mca/filem/rsh/filem_rsh_module.c +++ b/orte/mca/filem/rsh/filem_rsh_module.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2008 The Trustees of Indiana University. + * Copyright (c) 2004-2009 The Trustees of Indiana University. * All rights reserved. * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. * All rights reserved. @@ -42,6 +42,7 @@ #include "opal/util/argv.h" #include "opal/util/output.h" #include "opal/util/opal_environ.h" +#include "opal/util/basename.h" #include "opal/threads/mutex.h" #include "opal/threads/condition.h" @@ -632,41 +633,51 @@ static int orte_filem_rsh_start_copy(orte_filem_base_request_t *request) { } /* Do not check a local get() operation, to help supress the warnings from the HNP */ else if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &p_set->source, &p_set->sink) ) { + char *base = NULL; + asprintf(&base, "%s/%s", f_set->local_target, opal_basename(f_set->remote_target)); /* * The file should not exist if we are getting a file with the * same name since we do not want to overwrite the filename * without the users consent. */ - if( 0 == access(f_set->local_target, R_OK) ) { + if( 0 == access(base, R_OK) ) { OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle, - "filem:rsh: copy(): %s -> %s: Error: Cannot move file %s to %s. Already exists at destination\n", + "filem:rsh: copy(): %s -> %s: Error: Cannot move file %s to %s. Already exists at destination (%s)\n", ORTE_NAME_PRINT(&p_set->source), ORTE_NAME_PRINT(&p_set->sink), f_set->remote_target, - f_set->local_target)); + f_set->local_target, base)); orte_show_help("help-orte-filem-rsh.txt", "orte-filem-rsh:get-file-exists", true, f_set->local_target, orte_process_info.nodename); + free(base); + base = NULL; request->is_done[cur_index] = true; request->is_active[cur_index] = true; request->exit_status[cur_index] = -1; goto continue_set; } + free(base); + base = NULL; } if( request->movement_type == ORTE_FILEM_MOVE_TYPE_PUT ) { OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle, - "filem:rsh: copy(): %s -> %s: Moving file %s to %s\n", + "filem:rsh: copy(): %s -> %s: Moving file %s %s to %s %s\n", ORTE_NAME_PRINT(&p_set->source), ORTE_NAME_PRINT(&p_set->sink), + (f_set->local_hint == ORTE_FILEM_HINT_SHARED ? "(S)" : ""), f_set->local_target, + (f_set->remote_hint == ORTE_FILEM_HINT_SHARED ? "(S)" : ""), f_set->remote_target)); } else { OPAL_OUTPUT_VERBOSE((10, mca_filem_rsh_component.super.output_handle, - "filem:rsh: copy(): %s -> %s: Moving file %s to %s\n", + "filem:rsh: copy(): %s -> %s: Moving file %s %s to %s %s\n", ORTE_NAME_PRINT(&p_set->source), ORTE_NAME_PRINT(&p_set->sink), + (f_set->remote_hint == ORTE_FILEM_HINT_SHARED ? "(S)" : ""), f_set->remote_target, + (f_set->local_hint == ORTE_FILEM_HINT_SHARED ? "(S)" : ""), f_set->local_target)); } @@ -736,12 +747,20 @@ static int orte_filem_rsh_start_copy(orte_filem_base_request_t *request) { * If this is the put() routine */ if( request->movement_type == ORTE_FILEM_MOVE_TYPE_PUT ) { - asprintf(&command, "%s %s %s %s:%s ", - mca_filem_rsh_component.cp_command, - dir_arg, - f_set->local_target, - remote_machine, - remote_file); + /* Use a local 'cp' when able */ + if(f_set->remote_hint == ORTE_FILEM_HINT_SHARED ) { + asprintf(&command, "cp %s %s %s ", + dir_arg, + f_set->local_target, + remote_file); + } else { + asprintf(&command, "%s %s %s %s:%s ", + mca_filem_rsh_component.cp_command, + dir_arg, + f_set->local_target, + remote_machine, + remote_file); + } OPAL_OUTPUT_VERBOSE((17, mca_filem_rsh_component.super.output_handle, "filem:rsh:put about to execute [%s]", command)); @@ -758,13 +777,23 @@ static int orte_filem_rsh_start_copy(orte_filem_base_request_t *request) { * ow it is the get() routine */ else { - asprintf(&command, "%s %s %s:%s %s ", - mca_filem_rsh_component.cp_command, - dir_arg, - remote_machine, - remote_file, - f_set->local_target); - + /* Use a local 'cp' when able */ + if(f_set->local_hint == ORTE_FILEM_HINT_SHARED ) { + asprintf(&command, "%s %s cp %s %s %s ", + mca_filem_rsh_component.remote_sh_command, + remote_machine, + dir_arg, + remote_file, + f_set->local_target); + } else { + asprintf(&command, "%s %s %s:%s %s ", + mca_filem_rsh_component.cp_command, + dir_arg, + remote_machine, + remote_file, + f_set->local_target); + } + OPAL_OUTPUT_VERBOSE((17, mca_filem_rsh_component.super.output_handle, "filem:rsh:get about to execute [%s]", command)); diff --git a/orte/mca/snapc/base/base.h b/orte/mca/snapc/base/base.h index de0e75e234..1a949494fc 100644 --- a/orte/mca/snapc/base/base.h +++ b/orte/mca/snapc/base/base.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University @@ -75,8 +75,8 @@ ORTE_DECLSPEC extern orte_snapc_coord_type_t orte_snapc_coord_type; /** * Global Snapshot Object Maintenance functions */ - void orte_snapc_base_snapshot_construct(orte_snapc_base_snapshot_t *obj); - void orte_snapc_base_snapshot_destruct( orte_snapc_base_snapshot_t *obj); + void orte_snapc_base_local_snapshot_construct(orte_snapc_base_local_snapshot_t *obj); + void orte_snapc_base_local_snapshot_destruct( orte_snapc_base_local_snapshot_t *obj); void orte_snapc_base_global_snapshot_construct(orte_snapc_base_global_snapshot_t *obj); void orte_snapc_base_global_snapshot_destruct( orte_snapc_base_global_snapshot_t *obj); @@ -132,24 +132,26 @@ ORTE_DECLSPEC extern orte_snapc_coord_type_t orte_snapc_coord_type; ORTE_DECLSPEC extern bool orte_snapc_base_store_in_place; ORTE_DECLSPEC extern bool orte_snapc_base_store_only_one_seq; ORTE_DECLSPEC extern bool orte_snapc_base_establish_global_snapshot_dir; + ORTE_DECLSPEC extern bool orte_snapc_base_is_global_dir_shared; ORTE_DECLSPEC extern size_t orte_snapc_base_snapshot_seq_number; /** * Some utility functions */ - ORTE_DECLSPEC char * orte_snapc_ckpt_state_str(size_t state); + ORTE_DECLSPEC int orte_snapc_ckpt_state_str(char ** state_str, int state); - ORTE_DECLSPEC char * orte_snapc_base_unique_global_snapshot_name(pid_t pid); - ORTE_DECLSPEC char * orte_snapc_base_get_global_snapshot_metadata_file(char *uniq_snapshot_name); - ORTE_DECLSPEC char * orte_snapc_base_get_global_snapshot_directory(char *uniq_global_snapshot_name); - ORTE_DECLSPEC int orte_snapc_base_init_global_snapshot_directory(char *uniq_global_snapshot_name, + ORTE_DECLSPEC int orte_snapc_base_unique_global_snapshot_name(char **name_str, pid_t pid); + ORTE_DECLSPEC int orte_snapc_base_get_global_snapshot_metadata_file(char **file_name, char *uniq_snapshot_name); + ORTE_DECLSPEC int orte_snapc_base_get_global_snapshot_directory(char **dir_name, char *uniq_global_snapshot_name); + ORTE_DECLSPEC int orte_snapc_base_init_global_snapshot_directory(char *uniq_global_snapshot_name, bool empty_metadata); ORTE_DECLSPEC int orte_snapc_base_add_timestamp(char * global_snapshot_ref); ORTE_DECLSPEC int orte_snapc_base_add_vpid_metadata(orte_process_name_t *proc, char * global_snapshot_ref, char *snapshot_ref, - char *snapshot_location); + char *snapshot_location, + char *crs_agent); ORTE_DECLSPEC int orte_snapc_base_finalize_metadata(char * global_snapshot_ref); ORTE_DECLSPEC int orte_snapc_base_extract_metadata(orte_snapc_base_global_snapshot_t *snapshot); diff --git a/orte/mca/snapc/base/snapc_base_fns.c b/orte/mca/snapc/base/snapc_base_fns.c index 1e91c66c9d..95b689cbed 100644 --- a/orte/mca/snapc/base/snapc_base_fns.c +++ b/orte/mca/snapc/base/snapc_base_fns.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2008 The Trustees of Indiana University. + * Copyright (c) 2004-2009 The Trustees of Indiana University. * All rights reserved. * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. * All rights reserved. @@ -41,6 +41,7 @@ #include "orte/mca/rml/rml.h" #include "orte/mca/rml/rml_types.h" +#include "orte/mca/errmgr/errmgr.h" #include "orte/runtime/orte_globals.h" #include "orte/util/name_fns.h" @@ -68,27 +69,51 @@ size_t orte_snapc_base_snapshot_seq_number = 0; /****************** * Object stuff ******************/ -OBJ_CLASS_INSTANCE(orte_snapc_base_snapshot_t, - opal_crs_base_snapshot_t, - orte_snapc_base_snapshot_construct, - orte_snapc_base_snapshot_destruct); +OBJ_CLASS_INSTANCE(orte_snapc_base_local_snapshot_t, + opal_list_item_t, + orte_snapc_base_local_snapshot_construct, + orte_snapc_base_local_snapshot_destruct); -void orte_snapc_base_snapshot_construct(orte_snapc_base_snapshot_t *snapshot) +void orte_snapc_base_local_snapshot_construct(orte_snapc_base_local_snapshot_t *snapshot) { snapshot->process_name.jobid = 0; snapshot->process_name.vpid = 0; - snapshot->process_pid = 0; + snapshot->state = ORTE_SNAPC_CKPT_STATE_NONE; - snapshot->term = false; + + snapshot->reference_name = NULL; + snapshot->local_location = NULL; + snapshot->remote_location = NULL; + + snapshot->opal_crs = NULL; } -void orte_snapc_base_snapshot_destruct( orte_snapc_base_snapshot_t *snapshot) +void orte_snapc_base_local_snapshot_destruct( orte_snapc_base_local_snapshot_t *snapshot) { snapshot->process_name.jobid = 0; snapshot->process_name.vpid = 0; - snapshot->process_pid = 0; + snapshot->state = ORTE_SNAPC_CKPT_STATE_NONE; - snapshot->term = false; + + if( NULL != snapshot->reference_name ) { + free(snapshot->reference_name); + snapshot->reference_name = NULL; + } + + if( NULL != snapshot->local_location ) { + free(snapshot->local_location); + snapshot->local_location = NULL; + } + + if( NULL != snapshot->remote_location ) { + free(snapshot->remote_location); + snapshot->remote_location = NULL; + } + + if( NULL != snapshot->opal_crs ) { + free(snapshot->opal_crs); + snapshot->opal_crs = NULL; + } } /****/ @@ -99,51 +124,38 @@ OBJ_CLASS_INSTANCE(orte_snapc_base_global_snapshot_t, void orte_snapc_base_global_snapshot_construct(orte_snapc_base_global_snapshot_t *snapshot) { - OBJ_CONSTRUCT(&(snapshot->snapshots), opal_list_t); + char *tmp_dir = NULL; - snapshot->component_name = NULL; - snapshot->reference_name = orte_snapc_base_unique_global_snapshot_name(getpid()); - snapshot->local_location = opal_dirname(orte_snapc_base_get_global_snapshot_directory(snapshot->reference_name)); + OBJ_CONSTRUCT(&(snapshot->local_snapshots), opal_list_t); + + orte_snapc_base_unique_global_snapshot_name(&(snapshot->reference_name), getpid()); + + orte_snapc_base_get_global_snapshot_directory(&tmp_dir, snapshot->reference_name); + snapshot->local_location = opal_dirname(tmp_dir); + free(tmp_dir); snapshot->seq_num = 0; - snapshot->start_time = NULL; - snapshot->end_time = NULL; } void orte_snapc_base_global_snapshot_destruct( orte_snapc_base_global_snapshot_t *snapshot) { opal_list_item_t* item = NULL; - while (NULL != (item = opal_list_remove_first(&snapshot->snapshots))) { + while (NULL != (item = opal_list_remove_first(&snapshot->local_snapshots))) { OBJ_RELEASE(item); } - OBJ_DESTRUCT(&(snapshot->snapshots)); + OBJ_DESTRUCT(&(snapshot->local_snapshots)); if(NULL != snapshot->reference_name) { free(snapshot->reference_name); snapshot->reference_name = NULL; } - if(NULL != snapshot->component_name) { - free(snapshot->component_name); - snapshot->component_name = NULL; - } - if(NULL != snapshot->local_location) { free(snapshot->local_location); snapshot->local_location = NULL; } - if(NULL != snapshot->start_time) { - free(snapshot->start_time); - snapshot->start_time = NULL; - } - - if(NULL != snapshot->end_time) { - free(snapshot->end_time); - snapshot->end_time = NULL; - } - snapshot->seq_num = 0; } @@ -198,6 +210,7 @@ int orte_snapc_base_none_setup_job(orte_jobid_t jobid) ORTE_RML_PERSISTENT, snapc_none_global_cmdline_request, NULL))) { + ORTE_ERROR_LOG(rc); exit_status = rc; goto cleanup; } @@ -238,6 +251,7 @@ static void snapc_none_global_cmdline_request(int status, n = 1; if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &command, &n, ORTE_SNAPC_CMD))) { + ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } @@ -251,6 +265,7 @@ static void snapc_none_global_cmdline_request(int status, * Do the basic handshake with the orte_checkpoint command */ if( ORTE_SUCCESS != (ret = orte_snapc_base_global_coord_ckpt_init_cmd(sender, buffer, &term, &jobid)) ) { + ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } @@ -259,6 +274,7 @@ static void snapc_none_global_cmdline_request(int status, * Respond with an invalid response */ if( ORTE_SUCCESS != (ret = orte_snapc_base_global_coord_ckpt_update_cmd(sender, NULL, -1, ORTE_SNAPC_CKPT_STATE_NO_CKPT)) ) { + ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } @@ -312,6 +328,7 @@ int orte_snapc_base_global_coord_ckpt_init_cmd(orte_process_name_t* peer, "%s) base:ckpt_init_cmd: Error: DSS Unpack (term) Failure (ret = %d) (LINE = %d)\n", ORTE_SNAPC_COORD_NAME_PRINT(orte_snapc_coord_type), ret, __LINE__); + ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } @@ -322,6 +339,7 @@ int orte_snapc_base_global_coord_ckpt_init_cmd(orte_process_name_t* peer, "%s) base:ckpt_init_cmd: Error: DSS Unpack (jobid) Failure (ret = %d) (LINE = %d)\n", ORTE_SNAPC_COORD_NAME_PRINT(orte_snapc_coord_type), ret, __LINE__); + ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } @@ -373,6 +391,7 @@ int orte_snapc_base_global_coord_ckpt_update_cmd(orte_process_name_t* peer, } if (ORTE_SUCCESS != (ret = opal_dss.pack(loc_buffer, &command, 1, ORTE_SNAPC_CMD)) ) { + ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } @@ -382,6 +401,7 @@ int orte_snapc_base_global_coord_ckpt_update_cmd(orte_process_name_t* peer, "%s) base:ckpt_update_cmd: Error: DSS Pack (ckpt_status) Failure (ret = %d) (LINE = %d)\n", ORTE_SNAPC_COORD_NAME_PRINT(orte_snapc_coord_type), ret, __LINE__); + ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } @@ -393,6 +413,7 @@ int orte_snapc_base_global_coord_ckpt_update_cmd(orte_process_name_t* peer, "%s) base:ckpt_update_cmd: Error: DSS Pack (snapshot handle) Failure (ret = %d) (LINE = %d)\n", ORTE_SNAPC_COORD_NAME_PRINT(orte_snapc_coord_type), ret, __LINE__); + ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } @@ -401,6 +422,7 @@ int orte_snapc_base_global_coord_ckpt_update_cmd(orte_process_name_t* peer, "%s) base:ckpt_update_cmd: Error: DSS Pack (seq number) Failure (ret = %d) (LINE = %d)\n", ORTE_SNAPC_COORD_NAME_PRINT(orte_snapc_coord_type), ret, __LINE__); + ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } @@ -411,6 +433,7 @@ int orte_snapc_base_global_coord_ckpt_update_cmd(orte_process_name_t* peer, "%s) base:ckpt_update_cmd: Error: Send (ckpt_status) Failure (ret = %d) (LINE = %d)\n", ORTE_SNAPC_COORD_NAME_PRINT(orte_snapc_coord_type), ret, __LINE__); + ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } @@ -433,42 +456,36 @@ int orte_snapc_base_global_coord_ckpt_update_cmd(orte_process_name_t* peer, /***************************** * Snapshot metadata functions *****************************/ -char * orte_snapc_base_unique_global_snapshot_name(pid_t pid) +int orte_snapc_base_unique_global_snapshot_name(char **name_str, pid_t pid) { - char * uniq_name; - if( NULL == orte_snapc_base_global_snapshot_ref ) { - asprintf(&uniq_name, "ompi_global_snapshot_%d.ckpt", pid); + asprintf(name_str, "ompi_global_snapshot_%d.ckpt", pid); } else { - uniq_name = strdup(orte_snapc_base_global_snapshot_ref); + *name_str = strdup(orte_snapc_base_global_snapshot_ref); } - return uniq_name; + return ORTE_SUCCESS; } -char * orte_snapc_base_get_global_snapshot_metadata_file(char *uniq_snapshot_name) +int orte_snapc_base_get_global_snapshot_metadata_file(char **file_name, char *uniq_snapshot_name) { - char * path = NULL; - - asprintf(&path, "%s/%s/%s", + asprintf(file_name, "%s/%s/%s", orte_snapc_base_global_snapshot_dir, uniq_snapshot_name, orte_snapc_base_metadata_filename); - return path; + return ORTE_SUCCESS; } -char * orte_snapc_base_get_global_snapshot_directory(char *uniq_snapshot_name) +int orte_snapc_base_get_global_snapshot_directory(char **dir_name, char *uniq_snapshot_name) { - char * dir_name = NULL; - - asprintf(&dir_name, "%s/%s/%d", + asprintf(dir_name, "%s/%s/%d", orte_snapc_base_global_snapshot_dir, uniq_snapshot_name, (int)orte_snapc_base_snapshot_seq_number); - return dir_name; + return ORTE_SUCCESS; } int orte_snapc_base_init_global_snapshot_directory(char *uniq_global_snapshot_name, bool empty_metadata) @@ -482,8 +499,9 @@ int orte_snapc_base_init_global_snapshot_directory(char *uniq_global_snapshot_na /* * Make the snapshot directory from the uniq_global_snapshot_name */ - dir_name = orte_snapc_base_get_global_snapshot_directory(uniq_global_snapshot_name); + orte_snapc_base_get_global_snapshot_directory(&dir_name, uniq_global_snapshot_name); if(OPAL_SUCCESS != (ret = opal_os_dirpath_create(dir_name, my_mode)) ) { + ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } @@ -491,13 +509,14 @@ int orte_snapc_base_init_global_snapshot_directory(char *uniq_global_snapshot_na /* * Initialize the metadata file at the top of that directory. */ - meta_data_fname = orte_snapc_base_get_global_snapshot_metadata_file(uniq_global_snapshot_name); + orte_snapc_base_get_global_snapshot_metadata_file(&meta_data_fname, uniq_global_snapshot_name); if (NULL == (meta_data = fopen(meta_data_fname, "a")) ) { opal_output(orte_snapc_base_output, "%s) base:init_global_snapshot_directory: Error: Unable to open the file (%s)\n", ORTE_SNAPC_COORD_NAME_PRINT(orte_snapc_coord_type), meta_data_fname); + ORTE_ERROR_LOG(ORTE_ERROR); exit_status = ORTE_ERROR; goto cleanup; } @@ -529,7 +548,7 @@ int orte_snapc_base_init_global_snapshot_directory(char *uniq_global_snapshot_na if(NULL != meta_data_fname) free(meta_data_fname); - return OPAL_SUCCESS; + return ORTE_SUCCESS; } /* @@ -575,13 +594,14 @@ int orte_snapc_base_add_timestamp(char * global_snapshot_ref) char * meta_data_fname = NULL; time_t timestamp; - meta_data_fname = orte_snapc_base_get_global_snapshot_metadata_file(global_snapshot_ref); + orte_snapc_base_get_global_snapshot_metadata_file(&meta_data_fname, global_snapshot_ref); if (NULL == (meta_data = fopen(meta_data_fname, "a")) ) { opal_output(orte_snapc_base_output, "%s) base:add_timestamp: Error: Unable to open the file (%s)\n", ORTE_SNAPC_COORD_NAME_PRINT(orte_snapc_coord_type), meta_data_fname); + ORTE_ERROR_LOG(ORTE_ERROR); exit_status = ORTE_ERROR; goto cleanup; } @@ -607,13 +627,14 @@ int orte_snapc_base_finalize_metadata(char * global_snapshot_ref) /* Add the final timestamp */ orte_snapc_base_add_timestamp(global_snapshot_ref); - meta_data_fname = orte_snapc_base_get_global_snapshot_metadata_file(global_snapshot_ref); + orte_snapc_base_get_global_snapshot_metadata_file(&meta_data_fname, global_snapshot_ref); if (NULL == (meta_data = fopen(meta_data_fname, "a")) ) { opal_output(orte_snapc_base_output, "%s) base:add_timestamp: Error: Unable to open the file (%s)\n", ORTE_SNAPC_COORD_NAME_PRINT(orte_snapc_coord_type), meta_data_fname); + ORTE_ERROR_LOG(ORTE_ERROR); exit_status = ORTE_ERROR; goto cleanup; } @@ -633,23 +654,28 @@ int orte_snapc_base_finalize_metadata(char * global_snapshot_ref) int orte_snapc_base_add_vpid_metadata( orte_process_name_t *proc, char * global_snapshot_ref, char *snapshot_ref, - char *snapshot_location) + char *snapshot_location, + char *crs_agent) { int ret, exit_status = ORTE_SUCCESS; FILE * meta_data = NULL; char * meta_data_fname = NULL; char * crs_comp = NULL; - char * local_dir = NULL; char * proc_name = NULL; int prev_pid = 0; - meta_data_fname = orte_snapc_base_get_global_snapshot_metadata_file(global_snapshot_ref); + if( NULL == snapshot_location ) { + return ORTE_ERROR; + } + + orte_snapc_base_get_global_snapshot_metadata_file(&meta_data_fname, global_snapshot_ref); if (NULL == (meta_data = fopen(meta_data_fname, "a")) ) { opal_output(orte_snapc_base_output, "%s) base:add_metadata: Error: Unable to open the file (%s)\n", ORTE_SNAPC_COORD_NAME_PRINT(orte_snapc_coord_type), meta_data_fname); + ORTE_ERROR_LOG(ORTE_ERROR); exit_status = ORTE_ERROR; goto cleanup; } @@ -663,20 +689,21 @@ int orte_snapc_base_add_vpid_metadata( orte_process_name_t *proc, orte_util_convert_process_name_to_string(&proc_name, proc); /* Extract the checkpointer */ - if( OPAL_SUCCESS != (ret = opal_crs_base_extract_expected_component(snapshot_location, &crs_comp, &prev_pid)) ) { - exit_status = ORTE_ERROR; - goto cleanup; + if( NULL == crs_agent ) { + if( OPAL_SUCCESS != (ret = opal_crs_base_extract_expected_component(snapshot_location, &crs_comp, &prev_pid)) ) { + exit_status = ret; + ORTE_ERROR_LOG(ret); + goto cleanup; + } + } else { + crs_comp = strdup(crs_agent); } - /* get the base of the location */ - local_dir = strdup(snapshot_location); - local_dir = opal_dirname(local_dir); - /* Write the string */ fprintf(meta_data, "%s%s\n", SNAPC_METADATA_PROCESS, proc_name); fprintf(meta_data, "%s%s\n", SNAPC_METADATA_CRS_COMP, crs_comp); fprintf(meta_data, "%s%s\n", SNAPC_METADATA_SNAP_REF, snapshot_ref); - fprintf(meta_data, "%s%s\n", SNAPC_METADATA_SNAP_LOC, local_dir); + fprintf(meta_data, "%s%s\n", SNAPC_METADATA_SNAP_LOC, snapshot_location); cleanup: if( NULL != meta_data ) @@ -684,9 +711,6 @@ int orte_snapc_base_add_vpid_metadata( orte_process_name_t *proc, if( NULL != meta_data_fname) free(meta_data_fname); - if( NULL != local_dir) - free(local_dir); - return exit_status; } @@ -698,13 +722,14 @@ int orte_snapc_base_extract_metadata(orte_snapc_base_global_snapshot_t *global_s int next_seq_int; char * token = NULL; char * value = NULL; - orte_snapc_base_snapshot_t *vpid_snapshot = NULL; + orte_snapc_base_local_snapshot_t *vpid_snapshot = NULL; /* * Open the metadata file */ - meta_data_fname = orte_snapc_base_get_global_snapshot_metadata_file(global_snapshot->reference_name); + orte_snapc_base_get_global_snapshot_metadata_file(&meta_data_fname, global_snapshot->reference_name); if (NULL == (meta_data = fopen(meta_data_fname, "r")) ) { + ORTE_ERROR_LOG(ORTE_ERROR); exit_status = ORTE_ERROR; goto cleanup; } @@ -742,12 +767,7 @@ int orte_snapc_base_extract_metadata(orte_snapc_base_global_snapshot_t *global_s break; } else if(0 == strncmp(SNAPC_METADATA_TIME, token, strlen(SNAPC_METADATA_TIME)) ) { - if( NULL == global_snapshot->start_time) { - global_snapshot->start_time = strdup(value); - } - else { - global_snapshot->end_time = strdup(value); - } + ; } else if(0 == strncmp(SNAPC_METADATA_PROCESS, token, strlen(SNAPC_METADATA_PROCESS)) ) { orte_process_name_t proc; @@ -756,29 +776,29 @@ int orte_snapc_base_extract_metadata(orte_snapc_base_global_snapshot_t *global_s /* Not the first process, so append it to the list */ if( NULL != vpid_snapshot) { - opal_list_append(&global_snapshot->snapshots, &(vpid_snapshot->crs_snapshot_super.super)); + opal_list_append(&global_snapshot->local_snapshots, &(vpid_snapshot->super)); } - vpid_snapshot = OBJ_NEW(orte_snapc_base_snapshot_t); + vpid_snapshot = OBJ_NEW(orte_snapc_base_local_snapshot_t); vpid_snapshot->process_name.jobid = proc.jobid; vpid_snapshot->process_name.vpid = proc.vpid; } else if(0 == strncmp(SNAPC_METADATA_CRS_COMP, token, strlen(SNAPC_METADATA_CRS_COMP)) ) { - vpid_snapshot->crs_snapshot_super.component_name = strdup(value); + vpid_snapshot->opal_crs = strdup(value); } else if(0 == strncmp(SNAPC_METADATA_SNAP_REF, token, strlen(SNAPC_METADATA_SNAP_REF)) ) { - vpid_snapshot->crs_snapshot_super.reference_name = strdup(value); + vpid_snapshot->reference_name = strdup(value); } else if(0 == strncmp(SNAPC_METADATA_SNAP_LOC, token, strlen(SNAPC_METADATA_SNAP_LOC)) ) { - vpid_snapshot->crs_snapshot_super.local_location = strdup(value); - vpid_snapshot->crs_snapshot_super.remote_location = strdup(value); + vpid_snapshot->local_location = strdup(value); + vpid_snapshot->remote_location = strdup(value); } } while(0 == feof(meta_data) ); /* Append the last item */ if( NULL != vpid_snapshot) { - opal_list_append(&global_snapshot->snapshots, &(vpid_snapshot->crs_snapshot_super.super)); + opal_list_append(&global_snapshot->local_snapshots, &(vpid_snapshot->super)); } cleanup: @@ -960,34 +980,40 @@ static int metadata_extract_next_token(FILE *file, char **token, char **value) return exit_status; } -char * orte_snapc_ckpt_state_str(size_t state) +int orte_snapc_ckpt_state_str(char ** state_str, int state) { switch(state) { case ORTE_SNAPC_CKPT_STATE_NONE: - return strdup(" -- "); + *state_str = strdup(" -- "); break; case ORTE_SNAPC_CKPT_STATE_REQUEST: - return strdup("Requested"); + *state_str = strdup("Requested"); break; case ORTE_SNAPC_CKPT_STATE_PENDING_TERM: - return strdup("Pending (Termination)"); + *state_str = strdup("Pending (Termination)"); break; case ORTE_SNAPC_CKPT_STATE_PENDING: - return strdup("Pending"); + *state_str = strdup("Pending"); break; case ORTE_SNAPC_CKPT_STATE_RUNNING: - return strdup("Running"); + *state_str = strdup("Running"); break; case ORTE_SNAPC_CKPT_STATE_FILE_XFER: - return strdup("File Transfer"); + *state_str = strdup("File Transfer"); break; case ORTE_SNAPC_CKPT_STATE_FINISHED: - return strdup("Finished"); + *state_str = strdup("Finished"); + break; + case ORTE_SNAPC_CKPT_STATE_FINISHED_LOCAL: + *state_str = strdup("Locally Finished"); break; case ORTE_SNAPC_CKPT_STATE_ERROR: - return strdup("Error"); + *state_str = strdup("Error"); break; default: - return strdup("Unknown"); + asprintf(state_str, "Unknown %d", state); + break; } + + return ORTE_SUCCESS; } diff --git a/orte/mca/snapc/base/snapc_base_open.c b/orte/mca/snapc/base/snapc_base_open.c index 67ce6ac94d..f9cb3dcaf1 100644 --- a/orte/mca/snapc/base/snapc_base_open.c +++ b/orte/mca/snapc/base/snapc_base_open.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2007 The Trustees of Indiana University. + * Copyright (c) 2004-2009 The Trustees of Indiana University. * All rights reserved. * Copyright (c) 2004-2008 The Trustees of the University of Tennessee. * All rights reserved. @@ -74,6 +74,7 @@ char * orte_snapc_base_global_snapshot_ref = NULL; bool orte_snapc_base_store_in_place = true; bool orte_snapc_base_store_only_one_seq = false; bool orte_snapc_base_establish_global_snapshot_dir = false; +bool orte_snapc_base_is_global_dir_shared = false; /** * Function for finding and opening either all MCA components, @@ -97,9 +98,20 @@ int orte_snapc_base_open(void) opal_home_directory(), &orte_snapc_base_global_snapshot_dir); + mca_base_param_reg_int_name("snapc", + "base_global_shared", + "If the global_snapshot_dir is on a shared file system all nodes can access, " + "then the checkpoint files can be copied more efficiently when FileM is used." + " [Default = disabled]", + false, false, + 0, + &value); + orte_snapc_base_is_global_dir_shared = OPAL_INT_TO_BOOL(value); + OPAL_OUTPUT_VERBOSE((20, orte_snapc_base_output, - "snapc:base: open: base_global_snapshot_dir = %s", - orte_snapc_base_global_snapshot_dir)); + "snapc:base: open: base_global_snapshot_dir = %s (%s)", + orte_snapc_base_global_snapshot_dir, + (orte_snapc_base_is_global_dir_shared ? "Shared" : "Local") )); /* * Store the checkpoint files in their final location. @@ -173,8 +185,8 @@ int orte_snapc_base_open(void) if( NULL == orte_snapc_base_global_snapshot_loc ) { char *t1 = NULL; char *t2 = NULL; - t1 = strdup( orte_snapc_base_unique_global_snapshot_name( getpid() ) ); - t2 = orte_snapc_base_get_global_snapshot_directory( t1 ); + orte_snapc_base_unique_global_snapshot_name(&t1, getpid() ); + orte_snapc_base_get_global_snapshot_directory(&t2, t1 ); orte_snapc_base_global_snapshot_loc = strdup(t2); free(t1); free(t2); diff --git a/orte/mca/snapc/full/snapc_full.h b/orte/mca/snapc/full/snapc_full.h index 40186bf301..e4c3b5ff59 100644 --- a/orte/mca/snapc/full/snapc_full.h +++ b/orte/mca/snapc/full/snapc_full.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2008 The Trustees of Indiana University. + * Copyright (c) 2004-2009 The Trustees of Indiana University. * All rights reserved. * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. * All rights reserved. @@ -30,6 +30,7 @@ #include "opal/mca/mca.h" #include "opal/event/event.h" +#include "orte/mca/filem/filem.h" #include "orte/mca/snapc/snapc.h" BEGIN_C_DECLS @@ -39,35 +40,54 @@ BEGIN_C_DECLS */ typedef uint8_t orte_snapc_full_cmd_flag_t; #define ORTE_SNAPC_FULL_CMD OPAL_UINT8 -#define ORTE_SNAPC_FULL_UPDATE_JOB_STATE_CMD 1 -#define ORTE_SNAPC_FULL_UPDATE_PROC_STATE_CMD 2 -#define ORTE_SNAPC_FULL_VPID_ASSOC_CMD 3 -#define ORTE_SNAPC_FULL_ESTABLISH_DIR_CMD 4 +#define ORTE_SNAPC_FULL_UPDATE_JOB_STATE_CMD 1 +#define ORTE_SNAPC_FULL_UPDATE_JOB_STATE_QUICK_CMD 2 +#define ORTE_SNAPC_FULL_UPDATE_ORTED_STATE_CMD 3 +#define ORTE_SNAPC_FULL_UPDATE_ORTED_STATE_QUICK_CMD 4 +#define ORTE_SNAPC_FULL_VPID_ASSOC_CMD 5 +#define ORTE_SNAPC_FULL_ESTABLISH_DIR_CMD 6 +#define ORTE_SNAPC_FULL_MAX 7 /* * Local Component structures */ struct orte_snapc_full_component_t { orte_snapc_base_component_t super; /** Base SNAPC component */ - }; typedef struct orte_snapc_full_component_t orte_snapc_full_component_t; OPAL_MODULE_DECLSPEC extern orte_snapc_full_component_t mca_snapc_full_component; - struct orte_snapc_full_global_snapshot_t { + /* + * Global Coordinator per orted metadata + */ + struct orte_snapc_full_orted_snapshot_t { /** Base SNAPC Global snapshot type */ - orte_snapc_base_snapshot_t super; + orte_snapc_base_global_snapshot_t super; - /** Local coordinator associated with this vpid */ - orte_process_name_t local_coord; + /** ORTE Process name */ + orte_process_name_t process_name; + + /** State of the checkpoint */ + int state; + + /** OPAL CRS Component */ + char * opal_crs; + + /** Term flag */ + bool term; + + /** FileM request */ + orte_filem_base_request_t *filem_request; }; - typedef struct orte_snapc_full_global_snapshot_t orte_snapc_full_global_snapshot_t; + typedef struct orte_snapc_full_orted_snapshot_t orte_snapc_full_orted_snapshot_t; + OBJ_CLASS_DECLARATION(orte_snapc_full_orted_snapshot_t); - OBJ_CLASS_DECLARATION(orte_snapc_full_global_snapshot_t); - - struct orte_snapc_full_local_snapshot_t { + /* + * Local Coordinator per app metadata + */ + struct orte_snapc_full_app_snapshot_t { /** Base SNAPC Global snapshot type */ - orte_snapc_base_snapshot_t super; + orte_snapc_base_local_snapshot_t super; /** Named Pipe Read and Write */ char * comm_pipe_r; @@ -79,14 +99,18 @@ typedef uint8_t orte_snapc_full_cmd_flag_t; struct opal_event comm_pipe_r_eh; bool is_eh_active; - /** State of the process wrt checkpointing */ - int ckpt_state; - }; - typedef struct orte_snapc_full_local_snapshot_t orte_snapc_full_local_snapshot_t; + /** Process pid */ + pid_t process_pid; - OBJ_CLASS_DECLARATION(orte_snapc_full_local_snapshot_t); + /** Term */ + bool term; + }; + typedef struct orte_snapc_full_app_snapshot_t orte_snapc_full_app_snapshot_t; + OBJ_CLASS_DECLARATION(orte_snapc_full_app_snapshot_t); extern bool orte_snapc_full_skip_filem; + extern bool orte_snapc_full_skip_app; + extern bool orte_snapc_full_timing_enabled; int orte_snapc_full_component_query(mca_base_module_t **module, int *priority); @@ -108,12 +132,11 @@ typedef uint8_t orte_snapc_full_cmd_flag_t; int global_coord_finalize(void); int global_coord_setup_job(orte_jobid_t jobid); int global_coord_release_job(orte_jobid_t jobid); - int global_coord_vpid_assoc_update(orte_process_name_t local_coord, - orte_process_name_t proc_name); - int global_coord_vpid_state_update(orte_process_name_t proc_name, - size_t proc_ckpt_state, - char **proc_ckpt_ref, - char **proc_ckpt_loc); + int global_coord_orted_state_update(orte_process_name_t proc_name, + int proc_ckpt_state, + char **proc_ckpt_ref, + char **proc_ckpt_loc, + char **agent_ckpt); /* * Local Coordinator Functionality */ @@ -122,7 +145,7 @@ typedef uint8_t orte_snapc_full_cmd_flag_t; int local_coord_setup_job(orte_jobid_t jobid); int local_coord_release_job(orte_jobid_t jobid); int local_coord_job_state_update(orte_jobid_t jobid, - size_t job_ckpt_state, + int job_ckpt_state, char **job_ckpt_ref, char **job_ckpt_loc); diff --git a/orte/mca/snapc/full/snapc_full_app.c b/orte/mca/snapc/full/snapc_full_app.c index 26fa949766..2741b0a0d7 100644 --- a/orte/mca/snapc/full/snapc_full_app.c +++ b/orte/mca/snapc/full/snapc_full_app.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2007 The Trustees of Indiana University. + * Copyright (c) 2004-2009 The Trustees of Indiana University. * All rights reserved. * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. * All rights reserved. @@ -219,7 +219,12 @@ int snapc_full_app_notify_response(opal_cr_ckpt_cmd_state_t resp) opal_cr_currently_stalled = false; app_pid = getpid(); - ret = opal_cr_inc_core(app_pid, local_snapshot, app_term, &cr_state); + if( orte_snapc_full_skip_app ) { + ret = ORTE_SUCCESS; + cr_state = OPAL_CRS_CONTINUE; + } else { + ret = opal_cr_inc_core(app_pid, local_snapshot, app_term, &cr_state); + } if( OPAL_EXISTS == ret ) { OPAL_OUTPUT_VERBOSE((5, mca_snapc_full_component.super.output_handle, "App) notify_response: Stalling the checkpoint progress until state is stable again (PID = %d)\n", diff --git a/orte/mca/snapc/full/snapc_full_component.c b/orte/mca/snapc/full/snapc_full_component.c index 4366d2e93c..f532117880 100644 --- a/orte/mca/snapc/full/snapc_full_component.c +++ b/orte/mca/snapc/full/snapc_full_component.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2008 The Trustees of Indiana University. + * Copyright (c) 2004-2009 The Trustees of Indiana University. * All rights reserved. * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. * All rights reserved. @@ -35,6 +35,8 @@ static int snapc_full_open(void); static int snapc_full_close(void); bool orte_snapc_full_skip_filem = false; +bool orte_snapc_full_skip_app = false; +bool orte_snapc_full_timing_enabled = false; /* * Instantiate the public struct with all of our public information @@ -113,6 +115,22 @@ static int snapc_full_open(void) &value); orte_snapc_full_skip_filem = OPAL_INT_TO_BOOL(value); + mca_base_param_reg_int(&mca_snapc_full_component.super.base_version, + "skip_app", + "Not for general use! For debugging only! Shortcut app level coord. [Default = disabled]", + false, false, + 0, + &value); + orte_snapc_full_skip_app = OPAL_INT_TO_BOOL(value); + + mca_base_param_reg_int(&mca_snapc_full_component.super.base_version, + "enable_timing", + "Enable timing information. [Default = disabled]", + false, false, + 0, + &value); + orte_snapc_full_timing_enabled = OPAL_INT_TO_BOOL(value); + /* * Debug Output */ diff --git a/orte/mca/snapc/full/snapc_full_global.c b/orte/mca/snapc/full/snapc_full_global.c index 12d489d153..2236a639d3 100644 --- a/orte/mca/snapc/full/snapc_full_global.c +++ b/orte/mca/snapc/full/snapc_full_global.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2008 The Trustees of Indiana University. + * Copyright (c) 2004-2009 The Trustees of Indiana University. * All rights reserved. * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. * All rights reserved. @@ -39,6 +39,7 @@ #include "orte/mca/plm/plm.h" #include "orte/mca/filem/filem.h" #include "orte/mca/grpcomm/grpcomm.h" +#include "orte/runtime/orte_wait.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/snapc/snapc.h" @@ -46,6 +47,8 @@ #include "snapc_full.h" +#include MCA_timer_IMPLEMENTATION_HEADER + /************************************ * Locally Global vars & functions :) ************************************/ @@ -58,74 +61,86 @@ } \ } -static bool snapc_recv_issued = false; -static bool snapc_cmdline_recv_issued = false; +static orte_jobid_t current_global_jobid = 0; +static orte_snapc_base_global_snapshot_t global_snapshot; +static bool updated_job_to_running; +static int current_job_ckpt_state = ORTE_SNAPC_CKPT_STATE_NONE; +static bool global_coord_has_local_children = false; +static bool wait_all_xfer = false; +static double timer_start = 0; +static double timer_local_done = 0; +static double timer_xfer_done = 0; +static double timer_end = 0; +static double get_time(void); +static void print_time(void); + +static int global_init_job_structs(void); + +static bool snapc_orted_recv_issued = false; static int snapc_full_global_start_listener(void); static int snapc_full_global_stop_listener(void); +static void snapc_full_global_orted_recv(int status, + orte_process_name_t* sender, + opal_buffer_t* buffer, + orte_rml_tag_t tag, + void* cbdata); +static void snapc_full_process_orted_request_cmd(int fd, short event, void *cbdata); + +/*** Command Line Interactions */ +static orte_process_name_t orte_checkpoint_sender = {0,0}; +static bool snapc_cmdline_recv_issued = false; static int snapc_full_global_start_cmdline_listener(void); static int snapc_full_global_stop_cmdline_listener(void); +static void snapc_full_global_cmdline_recv(int status, + orte_process_name_t* sender, + opal_buffer_t* buffer, + orte_rml_tag_t tag, + void* cbdata); +static void snapc_full_process_cmdline_request_cmd(int fd, short event, void *cbdata); -static void snapc_full_global_cmd_recv(int status, - orte_process_name_t* sender, - opal_buffer_t* buffer, - orte_rml_tag_t tag, - void* cbdata); -static void snapc_full_process_job_update_cmd(orte_process_name_t* sender, - opal_buffer_t* buffer); -static void snapc_full_process_proc_update_cmd(orte_process_name_t* sender, - opal_buffer_t* buffer); -static void snapc_full_process_vpid_assoc_cmd(orte_process_name_t* sender, - opal_buffer_t* buffer); -static void snapc_full_process_establish_dir_cmd(orte_process_name_t* sender, - opal_buffer_t* buffer); -static void snapc_full_process_cmdline_request_cmd(orte_process_name_t* sender, - opal_buffer_t* buffer); +static void snapc_full_process_filem_xfer(void); + +static int snapc_full_establish_snapshot_dir(bool empty_metadata); + +/*** */ +static int snapc_full_global_checkpoint(bool term); +static int snapc_full_global_notify_checkpoint(orte_jobid_t jobid, + bool term); +static int orte_snapc_full_global_set_job_ckpt_info( orte_jobid_t jobid, + int ckpt_state, + char *ckpt_snapshot_ref, + char *ckpt_snapshot_loc, + bool quick); int global_coord_job_state_update(orte_jobid_t jobid, - size_t job_ckpt_state, + int job_ckpt_state, char **job_ckpt_snapshot_ref, char **job_ckpt_snapshot_loc); +static void snapc_full_process_job_update_cmd(orte_process_name_t* sender, + opal_buffer_t* buffer, + bool quick); +static int snapc_full_process_orted_update_cmd(orte_process_name_t* sender, + opal_buffer_t* buffer, + bool quick); +static orte_snapc_full_orted_snapshot_t *find_orted_snapshot(orte_process_name_t *name ); +static orte_snapc_base_local_snapshot_t *find_orted_app_snapshot(orte_snapc_full_orted_snapshot_t *orted_snapshot, + orte_process_name_t *name); -static int orte_snapc_full_global_set_job_ckpt_info( orte_jobid_t jobid, - size_t ckpt_state, - char *ckpt_snapshot_ref, - char *ckpt_snapshot_loc); -static int orte_snapc_full_global_set_vpid_ckpt_info( orte_process_name_t proc, - size_t ckpt_state, - char *ckpt_snapshot_ref, - char *ckpt_snapshot_loc); +static int snapc_full_start_filem(orte_snapc_full_orted_snapshot_t *orted_snapshot); +static int snapc_full_wait_filem(void); -static int snapc_full_get_vpid_range( orte_jobid_t jobid, - orte_vpid_t *vpid_start, - orte_vpid_t *vpid_range); - -static int snapc_full_global_checkpoint(orte_jobid_t jobid, - bool term, - char **global_snapshot_handle, - int *ckpt_status); - -static int snapc_full_global_notify_checkpoint( char * global_snapshot_handle, - orte_jobid_t jobid, - bool term); - -static int snapc_full_global_check_for_done(orte_jobid_t jobid); - -static int snapc_full_global_gather_all_files(void); -static bool snapc_full_global_is_done_yet(void); - -static orte_snapc_base_global_snapshot_t global_snapshot; -static orte_process_name_t orte_checkpoint_sender = {0,0}; -static bool updated_job_to_running; - -static size_t cur_job_ckpt_state = ORTE_SNAPC_CKPT_STATE_NONE; -static orte_jobid_t cur_job_id = 0; +static int snapc_full_global_get_min_state(void); +static int write_out_global_metadata(void); /************************ * Function Definitions ************************/ int global_coord_init(void) { + current_global_jobid = 0; + orte_snapc_base_snapshot_seq_number = -1; + return ORTE_SUCCESS; } @@ -136,44 +151,41 @@ int global_coord_finalize(void) { int global_coord_setup_job(orte_jobid_t jobid) { int ret, exit_status = ORTE_SUCCESS; - orte_vpid_t vpid_start = 0, vpid_range = 0; - orte_vpid_t i; /* - * If we have already setup a jobid, warn + * Only allow one job at a time. + * + * It is possible to pass through this function twice since HNP may also be + * a local daemon. So it may be both a global and local coordinator. + * Global: orte_plm_base_setup_job() + * Local : odls_default_module.c */ - /* - * If we pass this way twice the first time will have been from: - * orte_plm_base_setup_job(): As the global coordinator - * The second time will have been from: - * odls_default_module.c: As the local coordinator. - * The later case means that we (as the HNP) are acting as both the global and - * local coordinators. - * JJH FIX NOTE: - * This fix imposes the restriction that only one jobid can be checkpointed - * at a time. In the future we will want to lift this restriction. - */ - if( 0 >= cur_job_id ) { - /* Global Coordinator pass */ - cur_job_id = jobid; - } - else if ( jobid == cur_job_id ) { - /* Local Coordinator pass -- Will always happen after Global Coordinator Pass */ + /* Global Coordinator pass */ + if( 0 >= current_global_jobid ) { + current_global_jobid = jobid; OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, - "Global) [%d] Setup job %s again as the local coordinator for %s\n", - getpid(), ORTE_JOBID_PRINT(jobid), ORTE_JOBID_PRINT(cur_job_id))); + "Global) Setup job %s as the Global Coordinator\n", + ORTE_JOBID_PRINT(jobid))); + } + /* Local Coordinator pass - Always happens after global coordinator pass */ + else if ( jobid == current_global_jobid ) { + /* If there are no local children, do not become a local coordinator */ + if( !global_coord_has_local_children ) { + return ORTE_SUCCESS; + } + OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, + "Global) Setup job %s as the Local Coordinator\n", + ORTE_JOBID_PRINT(jobid))); orte_snapc_coord_type |= ORTE_SNAPC_LOCAL_COORD_TYPE; - return local_coord_setup_job(jobid); } + /* Only allow one job at a time */ else { - /* Already setup things for another job, - * We do not currently support the ability to checkpoint more than one - * jobid - */ opal_output(mca_snapc_full_component.super.output_handle, - "global [%d]) Setup job (%d) Failed. Already setup job (%d)\n", getpid(), jobid, cur_job_id); + "Global) Setup of job %s Failed! Already setup job %s\n", + ORTE_JOBID_PRINT(jobid), ORTE_JOBID_PRINT(current_global_jobid)); + ORTE_ERROR_LOG(ORTE_ERROR); return ORTE_ERROR; } @@ -184,34 +196,19 @@ int global_coord_setup_job(orte_jobid_t jobid) { orte_snapc_base_snapshot_seq_number = -1; /* - * Get vpid range + * Allocate structure to track node status */ - if( ORTE_SUCCESS != (ret = snapc_full_get_vpid_range(jobid, &vpid_start, &vpid_range) ) ) { + if( ORTE_SUCCESS != (ret = global_init_job_structs()) ) { + ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } - /* - * Allocate the snapshot structures - */ - OBJ_CONSTRUCT(&global_snapshot, orte_snapc_base_global_snapshot_t); - global_snapshot.component_name = strdup(mca_snapc_full_component.super.base_version.mca_component_name); - for(i = vpid_start; i < vpid_start + vpid_range; ++i) { - orte_snapc_full_global_snapshot_t *vpid_snapshot; - - vpid_snapshot = OBJ_NEW(orte_snapc_full_global_snapshot_t); - - vpid_snapshot->super.process_name.jobid = jobid; - vpid_snapshot->super.process_name.vpid = i; - vpid_snapshot->super.term = false; - - opal_list_append(&global_snapshot.snapshots, &(vpid_snapshot->super.crs_snapshot_super.super)); - } - /* * Setup Global Coordinator command processing listener */ if( ORTE_SUCCESS != (ret = snapc_full_global_start_listener()) ) { + ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } @@ -220,6 +217,7 @@ int global_coord_setup_job(orte_jobid_t jobid) { * Setup command line tool checkpoint request listener */ if( ORTE_SUCCESS != (ret = snapc_full_global_start_cmdline_listener()) ) { + ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } @@ -228,39 +226,22 @@ int global_coord_setup_job(orte_jobid_t jobid) { * If requested pre-establish the global snapshot directory */ if(orte_snapc_base_establish_global_snapshot_dir) { - char *global_snapshot_handle = NULL; - char *global_dir = NULL; - - INC_SEQ_NUM(); - global_snapshot_handle = strdup( orte_snapc_base_unique_global_snapshot_name( getpid() ) ); - global_dir = orte_snapc_base_get_global_snapshot_directory(global_snapshot_handle); - orte_snapc_base_global_snapshot_loc = strdup(global_dir); - - global_snapshot.seq_num = orte_snapc_base_snapshot_seq_number; - global_snapshot.reference_name = strdup(global_snapshot_handle); - global_snapshot.local_location = opal_dirname(orte_snapc_base_get_global_snapshot_directory(global_snapshot.reference_name)); - + opal_output(0, "Global) Error: Pre-establishment of snapshot directory currently not supported!"); + ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED); +#if 0 OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, "Global) Pre-establish the global snapshot directory\n")); - - /* Creates the directory (with metadata files): - * /tmp/ompi_global_snapshot_PID.ckpt/seq_num - */ - if( ORTE_SUCCESS != (ret = orte_snapc_base_init_global_snapshot_directory(global_snapshot_handle, true))) { + if( ORTE_SUCCESS != (ret = snapc_full_establish_snapshot_dir(true))) { + ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } - - free(global_snapshot_handle); - global_snapshot_handle = NULL; - - free(global_dir); - global_dir = NULL; +#endif } OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, - "Global) [%d] Setup job %s with vpid [%d, %d]\n", - getpid(), ORTE_JOBID_PRINT(jobid), vpid_start, vpid_range)); + "Global) Finished setup of job %s ", + ORTE_JOBID_PRINT(jobid))); cleanup: return exit_status; @@ -277,10 +258,12 @@ int global_coord_release_job(orte_jobid_t jobid) { * Clean up listeners */ if( ORTE_SUCCESS != (ret = snapc_full_global_stop_cmdline_listener()) ) { + ORTE_ERROR_LOG(ret); exit_status = ret; } if( ORTE_SUCCESS != (ret = snapc_full_global_stop_listener()) ) { + ORTE_ERROR_LOG(ret); exit_status = ret; } @@ -292,32 +275,94 @@ int global_coord_release_job(orte_jobid_t jobid) { /****************** * Local functions ******************/ +static int global_init_job_structs(void) +{ + orte_snapc_full_orted_snapshot_t *orted_snapshot = NULL; + orte_snapc_base_local_snapshot_t *app_snapshot = NULL; + orte_node_t **nodes = NULL; + orte_job_map_t *map = NULL; + orte_job_t *jdata = NULL; + orte_proc_t **procs = NULL; + orte_std_cntr_t i = 0; + orte_vpid_t p = 0; + + /* look up job data object */ + if (NULL == (jdata = orte_get_job_data_object(current_global_jobid))) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + + OBJ_CONSTRUCT(&global_snapshot, orte_snapc_base_global_snapshot_t); + /* JJH XXX global_snapshot.component_name = strdup(mca_snapc_full_component.super.base_version.mca_component_name);*/ + + map = jdata->map; + nodes = (orte_node_t**)map->nodes->addr; + + for(i = 0; i < map->num_nodes; i++) { + procs = (orte_proc_t**)nodes[i]->procs->addr; + + OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, + "Global) [%d] Found Daemon %s with %d procs", + i, ORTE_NAME_PRINT(&(nodes[i]->daemon->name)), nodes[i]->num_procs)); + + orted_snapshot = OBJ_NEW(orte_snapc_full_orted_snapshot_t); + + orted_snapshot->process_name.jobid = nodes[i]->daemon->name.jobid; + orted_snapshot->process_name.vpid = nodes[i]->daemon->name.vpid; + + if( orted_snapshot->process_name.jobid == ORTE_PROC_MY_NAME->jobid && + orted_snapshot->process_name.vpid == ORTE_PROC_MY_NAME->vpid ) { + global_coord_has_local_children = true; + } + + for(p = 0; p < nodes[i]->num_procs; ++p) { + OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, + "Global) \t [%d] Found Process %s on Daemon %s", + p, ORTE_NAME_PRINT(&(procs[p]->name)), ORTE_NAME_PRINT(&(nodes[i]->daemon->name)) )); + + app_snapshot = OBJ_NEW(orte_snapc_base_local_snapshot_t); + + app_snapshot->process_name.jobid = procs[p]->name.jobid; + app_snapshot->process_name.vpid = procs[p]->name.vpid; + + opal_list_append(&(orted_snapshot->super.local_snapshots), &(app_snapshot->super)); + } + + + opal_list_append(&global_snapshot.local_snapshots, &(orted_snapshot->super.super)); + } + + return ORTE_SUCCESS; +} + +/***************** + * Setup listeners + *****************/ static int snapc_full_global_start_listener(void) { - int exit_status = ORTE_SUCCESS; - int rc; + int ret, exit_status = ORTE_SUCCESS; - if (snapc_recv_issued && orte_process_info.hnp) { + if (snapc_orted_recv_issued && orte_process_info.hnp) { return ORTE_SUCCESS; } OPAL_OUTPUT_VERBOSE((5, mca_snapc_full_component.super.output_handle, - "Global) Receive: Start command recv")); + "Global) Startup Coordinator Channel")); /* * Coordinator command listener */ - if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_SNAPC_FULL, - ORTE_RML_PERSISTENT, - snapc_full_global_cmd_recv, - NULL))) { - ORTE_ERROR_LOG(rc); - exit_status = rc; + if (ORTE_SUCCESS != (ret = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, + ORTE_RML_TAG_SNAPC_FULL, + ORTE_RML_PERSISTENT, + snapc_full_global_orted_recv, + NULL))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; goto cleanup; } - snapc_recv_issued = true; + snapc_orted_recv_issued = true; cleanup: return exit_status; @@ -325,24 +370,23 @@ static int snapc_full_global_start_listener(void) static int snapc_full_global_stop_listener(void) { - int exit_status = ORTE_SUCCESS; - int rc; + int ret, exit_status = ORTE_SUCCESS; - if (!snapc_recv_issued && orte_process_info.hnp) { + if (!snapc_orted_recv_issued && orte_process_info.hnp) { return ORTE_SUCCESS; } OPAL_OUTPUT_VERBOSE((5, mca_snapc_full_component.super.output_handle, - "Global) Receive stop command recv")); + "Global) Shutdown Coordinator Channel")); - if (ORTE_SUCCESS != (rc = orte_rml.recv_cancel(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_SNAPC_FULL))) { - ORTE_ERROR_LOG(rc); - exit_status = rc; + if (ORTE_SUCCESS != (ret = orte_rml.recv_cancel(ORTE_NAME_WILDCARD, + ORTE_RML_TAG_SNAPC_FULL))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; goto cleanup; } - snapc_recv_issued = false; + snapc_orted_recv_issued = false; cleanup: return exit_status; @@ -350,26 +394,25 @@ static int snapc_full_global_stop_listener(void) static int snapc_full_global_start_cmdline_listener(void) { - int exit_status = ORTE_SUCCESS; - int rc; + int ret, exit_status = ORTE_SUCCESS; if (snapc_cmdline_recv_issued && orte_process_info.hnp) { return ORTE_SUCCESS; } OPAL_OUTPUT_VERBOSE((5, mca_snapc_full_component.super.output_handle, - "Global) Receive (Command line): Start command recv")); + "Global) Startup Command Line Channel")); /* * Coordinator command listener */ - if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_CKPT, - 0, - snapc_full_global_cmd_recv, - NULL))) { - ORTE_ERROR_LOG(rc); - exit_status = rc; + if (ORTE_SUCCESS != (ret = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, + ORTE_RML_TAG_CKPT, + 0, + snapc_full_global_cmdline_recv, + NULL))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; goto cleanup; } @@ -381,20 +424,19 @@ static int snapc_full_global_start_cmdline_listener(void) static int snapc_full_global_stop_cmdline_listener(void) { - int exit_status = ORTE_SUCCESS; - int rc; + int ret, exit_status = ORTE_SUCCESS; if (!snapc_cmdline_recv_issued && orte_process_info.hnp) { return ORTE_SUCCESS; } OPAL_OUTPUT_VERBOSE((5, mca_snapc_full_component.super.output_handle, - "Global) Receive (Command Line) stop command")); + "Global) Shutdown Command Line Channel")); - if (ORTE_SUCCESS != (rc = orte_rml.recv_cancel(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_CKPT))) { - ORTE_ERROR_LOG(rc); - exit_status = rc; + if (ORTE_SUCCESS != (ret = orte_rml.recv_cancel(ORTE_NAME_WILDCARD, + ORTE_RML_TAG_CKPT))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; goto cleanup; } @@ -404,94 +446,525 @@ static int snapc_full_global_stop_cmdline_listener(void) return exit_status; } -void snapc_full_global_cmd_recv(int status, - orte_process_name_t* sender, - opal_buffer_t* buffer, - orte_rml_tag_t tag, - void* cbdata) +/***************** + * Listener Callbacks + *****************/ +static void snapc_full_global_cmdline_recv(int status, + orte_process_name_t* sender, + opal_buffer_t* buffer, + orte_rml_tag_t tag, + void* cbdata) { - orte_snapc_full_cmd_flag_t command; - orte_std_cntr_t count; - int rc; + if( ORTE_RML_TAG_CKPT != tag ) { + opal_output(mca_snapc_full_component.super.output_handle, + "Global) Error: Unknown tag: Received a command message from %s (tag = %d).", + ORTE_NAME_PRINT(sender), tag); + ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); + return; + } + OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, + "Global) Command Line: Start a checkpoint operation [Sender = %s]", + ORTE_NAME_PRINT(sender))); + + snapc_cmdline_recv_issued = false; /* Not a persistent RML message */ + + /* + * Do not process this right away - we need to get out of the recv before + * we process the message to avoid performing the rest of the job while + * inside this receive! Instead, setup an event so that the message gets processed + * as soon as we leave the recv. + * + * The macro makes a copy of the buffer, which we release above - the incoming + * buffer, however, is NOT released here, although its payload IS transferred + * to the message buffer for later processing + * + */ + ORTE_MESSAGE_EVENT(sender, buffer, tag, snapc_full_process_cmdline_request_cmd); + + return; +} + +void snapc_full_global_orted_recv(int status, + orte_process_name_t* sender, + opal_buffer_t* buffer, + orte_rml_tag_t tag, + void* cbdata) +{ + if( ORTE_RML_TAG_SNAPC_FULL != tag ) { + opal_output(mca_snapc_full_component.super.output_handle, + "Global) Error: Unknown tag: Received a command message from %s (tag = %d).", + ORTE_NAME_PRINT(sender), tag); + ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); + return; + } + + /* + * This is a message from a Local Coordinator + */ OPAL_OUTPUT_VERBOSE((5, mca_snapc_full_component.super.output_handle, "Global) Receive a command message from %s.", ORTE_NAME_PRINT(sender))); /* - * If this is a command line checkpoint request, handle directly + * Do not process this right away - we need to get out of the recv before + * we process the message to avoid performing the rest of the job while + * inside this receive! Instead, setup an event so that the message gets processed + * as soon as we leave the recv. + * + * The macro makes a copy of the buffer, which we release above - the incoming + * buffer, however, is NOT released here, although its payload IS transferred + * to the message buffer for later processing + * */ - if( ORTE_RML_TAG_CKPT == tag ) { - OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, - "Global) Command Line: Start a checkpoint operation")); + ORTE_MESSAGE_EVENT(sender, buffer, tag, snapc_full_process_orted_request_cmd); - snapc_cmdline_recv_issued = false; /* Not a persistent RML message */ - snapc_full_process_cmdline_request_cmd(sender, buffer); - return; + return; +} + +/************************************/ +static void snapc_full_process_cmdline_request_cmd(int fd, short event, void *cbdata) +{ + int ret; + orte_message_event_t *mev = (orte_message_event_t*)cbdata; + orte_process_name_t *sender = NULL; + orte_snapc_cmd_flag_t command; + orte_std_cntr_t count = 1; + bool term = false; + orte_jobid_t jobid; + + sender = &(mev->sender); + + count = 1; + if (ORTE_SUCCESS != (ret = opal_dss.unpack(mev->buffer, &command, &count, ORTE_SNAPC_CMD))) { + ORTE_ERROR_LOG(ret); + goto cleanup; } /* - * Otherwise this is an inter-coordinator command (usually updating state info). + * orte_checkpoint has requested that a checkpoint be taken */ - count = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &command, &count, ORTE_SNAPC_FULL_CMD))) { - ORTE_ERROR_LOG(rc); - return; + if (ORTE_SNAPC_GLOBAL_INIT_CMD == command) { + OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, + "Global) Command line requested a checkpoint [command %d]\n", + command)); + + /* + * Unpack the buffer from the orte_checkpoint command + */ + if( ORTE_SUCCESS != (ret = orte_snapc_base_global_coord_ckpt_init_cmd(sender, mev->buffer, &term, &jobid)) ) { + ORTE_ERROR_LOG(ret); + goto cleanup; + } + + /* + * If the jobid was specified, and does not match the current job, then fail + */ + if( ORTE_JOBID_INVALID != jobid && jobid != current_global_jobid) { + opal_output(mca_snapc_full_component.super.output_handle, + "Global) Error: Jobid %s does not match the current jobid %s", + ORTE_JOBID_PRINT(jobid), ORTE_JOBID_PRINT(current_global_jobid)); + ORTE_ERROR_LOG(ORTE_ERROR); + goto cleanup; + } + + /************************* + * Kick off the checkpoint + *************************/ + orte_checkpoint_sender = *sender; + if(orte_snapc_full_timing_enabled) { + timer_start = get_time(); + } + if( ORTE_SUCCESS != (ret = snapc_full_global_checkpoint(term) ) ) { + ORTE_ERROR_LOG(ret); + goto cleanup; + } + + } + /* + * Terminate the connection (Not currently implemented) + */ + else if (ORTE_SNAPC_GLOBAL_TERM_CMD == command) { + OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, + "Global) Command line requested to terminate connection (command %d)\n", + command)); + ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED); + goto cleanup; + } + /* + * Unknown command + */ + else { + OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, + "Global) Command line sent an unknown command (command %d)\n", + command)); + ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED); + goto cleanup; } + cleanup: + /* release the message event */ + OBJ_RELEASE(mev); + return; +} + +static void snapc_full_process_orted_request_cmd(int fd, short event, void *cbdata) +{ + int ret; + orte_message_event_t *mev = (orte_message_event_t*)cbdata; + orte_snapc_full_cmd_flag_t command; + orte_std_cntr_t count; + static int num_inside = 0; + + count = 1; + if (ORTE_SUCCESS != (ret = opal_dss.unpack(mev->buffer, &command, &count, ORTE_SNAPC_FULL_CMD))) { + ORTE_ERROR_LOG(ret); + goto cleanup; + } + + ++num_inside; + switch (command) { + case ORTE_SNAPC_FULL_UPDATE_JOB_STATE_QUICK_CMD: + OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, + "Global) Command: Job State Update (quick)")); + + snapc_full_process_job_update_cmd(&(mev->sender), mev->buffer, true); + break; + case ORTE_SNAPC_FULL_UPDATE_JOB_STATE_CMD: OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, - "Global) Command: Update Job state command")); + "Global) Command: Job State Update")); - snapc_full_process_job_update_cmd(sender, buffer); + snapc_full_process_job_update_cmd(&(mev->sender), mev->buffer, false); break; - case ORTE_SNAPC_FULL_UPDATE_PROC_STATE_CMD: + case ORTE_SNAPC_FULL_UPDATE_ORTED_STATE_QUICK_CMD: OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, - "Global) Command: Update Proc state command")); + "Global) Command: Daemon State Update (quick)")); - snapc_full_process_proc_update_cmd(sender, buffer); + snapc_full_process_orted_update_cmd(&(mev->sender), mev->buffer, true); break; - case ORTE_SNAPC_FULL_VPID_ASSOC_CMD: + case ORTE_SNAPC_FULL_UPDATE_ORTED_STATE_CMD: OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, - "Global) Command: Update process/orted associations")); + "Global) Command: Daemon State Update")); - snapc_full_process_vpid_assoc_cmd(sender, buffer); - break; - - case ORTE_SNAPC_FULL_ESTABLISH_DIR_CMD: - OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, - "Global) Command: Establish checkpoint directory")); - - snapc_full_process_establish_dir_cmd(sender, buffer); + snapc_full_process_orted_update_cmd(&(mev->sender), mev->buffer, false); break; default: ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS); } + + /* We need to wait for the last notification to start the waiting loop + * if we do not then we could get stuck in a recursive stack. + */ + --num_inside; + if( wait_all_xfer && num_inside <= 0) { + wait_all_xfer = false; + snapc_full_process_filem_xfer(); + } + + cleanup: + /* release the message event */ + OBJ_RELEASE(mev); + return; +} + +static int snapc_full_process_orted_update_cmd(orte_process_name_t* sender, + opal_buffer_t* buffer, + bool quick) +{ + int ret, exit_status = ORTE_SUCCESS; + orte_std_cntr_t count; + orte_process_name_t remote_proc; + size_t num_procs, i; + int remote_ckpt_state; + char *remote_ckpt_ref = NULL, *remote_ckpt_loc = NULL; + char *agent_crs = NULL; + orte_snapc_full_orted_snapshot_t *orted_snapshot = NULL; + orte_snapc_base_local_snapshot_t *app_snapshot = NULL; + int loc_min_state; + char *state_str = NULL; + + orted_snapshot = find_orted_snapshot(sender); + if( NULL == orted_snapshot ) { + exit_status = ORTE_ERROR; + ORTE_ERROR_LOG(ORTE_ERROR); + goto cleanup; + } + + OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, + "Global) Daemon %s: Changed state to:\n", + ORTE_NAME_PRINT(&(orted_snapshot->process_name)) )); + + /* + * Unpack the data (quick) + * - state + * Unpack the data (long) + * - state + * - CRS Component + * - # procs + * - Foreach proc + * - process name + * - ckpt_ref + * - ckpt_loc + */ + count = 1; + if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &remote_ckpt_state, &count, OPAL_INT))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + orted_snapshot->state = remote_ckpt_state; + orte_snapc_ckpt_state_str(&state_str, orted_snapshot->state); + OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, + "Global) State: %d (%s)\n", + (int)(orted_snapshot->state), state_str)); + free(state_str); + state_str = NULL; + + if( quick ) { + exit_status = ORTE_SUCCESS; + goto post_process; + } + + count = 1; + if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &agent_crs, &count, OPAL_STRING))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + if( NULL != orted_snapshot->opal_crs ) { + free( orted_snapshot->opal_crs ); + } + orted_snapshot->opal_crs = strdup(agent_crs); + OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, + "Global) CRS: %s\n", + orted_snapshot->opal_crs)); + + count = 1; + if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &num_procs, &count, OPAL_SIZE))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + + for(i = 0; i < num_procs; ++i ) { + count = 1; + if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &remote_proc, &count, ORTE_NAME))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + app_snapshot = find_orted_app_snapshot(orted_snapshot, &remote_proc); + OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, + "Global) Process: %s\n", + ORTE_NAME_PRINT(&remote_proc) )); + + count = 1; + if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &remote_ckpt_ref, &count, OPAL_STRING))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + if( NULL != app_snapshot->reference_name ) { + free( app_snapshot->reference_name ); + } + app_snapshot->reference_name = strdup(remote_ckpt_ref); + OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, + "Global) Ref: %s\n", + app_snapshot->reference_name )); + + count = 1; + if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &remote_ckpt_loc, &count, OPAL_STRING))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + if( NULL != app_snapshot->remote_location ) { + free( app_snapshot->remote_location ); + } + app_snapshot->remote_location = strdup(remote_ckpt_loc); + if( NULL == app_snapshot->local_location ) { + app_snapshot->local_location = strdup(orte_snapc_base_global_snapshot_loc); + } + OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, + "Global) R Loc: %s\n", + app_snapshot->remote_location )); + OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, + "Global) L Loc: %s\n", + app_snapshot->local_location )); + + } + + post_process: + loc_min_state = snapc_full_global_get_min_state(); + + /* + * Notify the orte-checkpoint command once we have everyone running. + * No need to broadcast this to everyone since they already know. + */ + if( ORTE_SNAPC_CKPT_STATE_RUNNING == loc_min_state && + ORTE_SNAPC_CKPT_STATE_RUNNING != current_job_ckpt_state) { + current_job_ckpt_state = loc_min_state; + if( ORTE_SUCCESS != (ret = orte_snapc_base_global_coord_ckpt_update_cmd(&orte_checkpoint_sender, + global_snapshot.reference_name, + global_snapshot.seq_num, + current_job_ckpt_state)) ) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + } + + /* + * if(all_orted == FINISHED_LOCAL) { + * xcast(FIN_LOCAL) + * if( !xfer ) { + * xcast(FIN) -- happens in job_state_update -- + * } + * } + * if(orted == FINISHED_LOCAL && xfer) { + * start_filem_xfer(); + * send(FIN) when finished with xfer + * } + */ + /* + * If all daemons have finished + */ + if( loc_min_state == ORTE_SNAPC_CKPT_STATE_FINISHED_LOCAL ) { + if(orte_snapc_full_timing_enabled) { + timer_local_done = get_time(); + } + + if( ORTE_SNAPC_CKPT_STATE_NONE != current_job_ckpt_state ) { + if( loc_min_state == current_job_ckpt_state) { + opal_output(0, "Global) JJH WARNING!!: (%d) == (%d)", loc_min_state, current_job_ckpt_state); + } + } + + /* + * If we know that there is no file transfer, just fast path the + * finished message, the local coordinator will know how to handle it. + */ + if( orte_snapc_base_store_in_place || orte_snapc_full_skip_filem) { + current_job_ckpt_state = ORTE_SNAPC_CKPT_STATE_FINISHED; + } else { + current_job_ckpt_state = loc_min_state; + } + + if( NULL != state_str ) { + free(state_str); + } + orte_snapc_ckpt_state_str(&state_str, current_job_ckpt_state); + OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, + "Global) Job State Changed: %d (%s)\n", + (int)current_job_ckpt_state, state_str )); + free(state_str); + state_str = NULL; + + if( ORTE_SUCCESS != (ret = orte_snapc_full_global_set_job_ckpt_info(current_global_jobid, + current_job_ckpt_state, + NULL, NULL, true) ) ) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + } + + /* + * If the process has finished the local checkpoint, start any transfers + * while the other daemons are reporting in. + * + * if(orted == FINISHED_LOCAL && xfer) { + * start_filem_xfer(); + * send(FIN) when finished with xfer + * } + */ + if( ORTE_SNAPC_CKPT_STATE_FINISHED_LOCAL == orted_snapshot->state ) { + if(!orte_snapc_base_store_in_place && !orte_snapc_full_skip_filem) { + /* Start the transfer of files while other daemons are reporting in */ + orted_snapshot->state = ORTE_SNAPC_CKPT_STATE_FILE_XFER; + + OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, + "Global) Starting FileM (%s)", + ORTE_NAME_PRINT(&orted_snapshot->process_name))); + if( ORTE_SUCCESS != (ret = snapc_full_start_filem(orted_snapshot) ) ) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + } + } + + /* + * If all of the daemons are currently transferring data, + * wait here until done. Then xcast(FIN) + */ + loc_min_state = snapc_full_global_get_min_state(); + if( ORTE_SNAPC_CKPT_STATE_FILE_XFER == loc_min_state ) { + wait_all_xfer = true; + } + + cleanup: + if( NULL != state_str ) { + free(state_str); + state_str = NULL; + } + + return exit_status; +} + +static void snapc_full_process_filem_xfer(void) +{ + int ret; + char * state_str = NULL; + + OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, + "Global) Wait for all FileM to complete")); + if( ORTE_SUCCESS != (ret = snapc_full_wait_filem() ) ) { + ORTE_ERROR_LOG(ret); + goto cleanup; + } + + if(orte_snapc_full_timing_enabled) { + timer_xfer_done = get_time(); + } + current_job_ckpt_state = ORTE_SNAPC_CKPT_STATE_FINISHED; + + orte_snapc_ckpt_state_str(&state_str, current_job_ckpt_state); + OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, + "Global) Job State Changed: %d (%s) -- Done with Transfer of files\n", + (int)current_job_ckpt_state, state_str )); + + if( ORTE_SUCCESS != (ret = orte_snapc_full_global_set_job_ckpt_info(current_global_jobid, + current_job_ckpt_state, + NULL, NULL, true) ) ) { + ORTE_ERROR_LOG(ret); + goto cleanup; + } + + cleanup: + if(NULL != state_str ){ + free(state_str); + state_str = NULL; + } + + return; } static void snapc_full_process_job_update_cmd(orte_process_name_t* sender, - opal_buffer_t* buffer) + opal_buffer_t* buffer, + bool quick) { int ret, exit_status = ORTE_SUCCESS; orte_std_cntr_t count; orte_jobid_t jobid; - size_t job_ckpt_state = ORTE_SNAPC_CKPT_STATE_NONE; - char *job_ckpt_snapshot_ref = NULL; - char *job_ckpt_snapshot_loc = NULL; - - /* - * If we sent this message to ourself then we will process it elsewhere - */ - if( sender->jobid == ORTE_PROC_MY_NAME->jobid && - sender->vpid == ORTE_PROC_MY_NAME->vpid ) { - OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, - "Global) Command: Reflect the job update command")); - return; - } + int job_ckpt_state = ORTE_SNAPC_CKPT_STATE_NONE; + char *job_ckpt_snapshot_ref = NULL; + char *job_ckpt_snapshot_loc = NULL; /* * Unpack the data @@ -504,23 +977,28 @@ static void snapc_full_process_job_update_cmd(orte_process_name_t* sender, exit_status = ret; goto cleanup; } + count = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &job_ckpt_state, &count, OPAL_SIZE))) { + if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &job_ckpt_state, &count, OPAL_INT))) { ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } - count = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &job_ckpt_snapshot_ref, &count, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - count = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &job_ckpt_snapshot_loc, &count, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; + + if( !quick ) { + count = 1; + if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &job_ckpt_snapshot_ref, &count, OPAL_STRING))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + + count = 1; + if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &job_ckpt_snapshot_loc, &count, OPAL_STRING))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } } if( ORTE_SUCCESS != (ret = global_coord_job_state_update(jobid, @@ -536,27 +1014,261 @@ static void snapc_full_process_job_update_cmd(orte_process_name_t* sender, return; } +static int snapc_full_establish_snapshot_dir(bool empty_metadata) +{ + int ret; + char * global_snapshot_handle = NULL; + + /********************* + * Generate the global snapshot directory, and unique global snapshot handle + *********************/ + INC_SEQ_NUM(); + if( NULL == global_snapshot_handle ) { + orte_snapc_base_unique_global_snapshot_name(&global_snapshot_handle, getpid()); + } + + orte_snapc_base_get_global_snapshot_directory(&orte_snapc_base_global_snapshot_loc, global_snapshot_handle); + + global_snapshot.seq_num = orte_snapc_base_snapshot_seq_number; + global_snapshot.reference_name = strdup(global_snapshot_handle); + global_snapshot.local_location = opal_dirname(orte_snapc_base_global_snapshot_loc); + + OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, + "Global) Setup Directory (seq = %d) (dir = %s)", + global_snapshot.seq_num, orte_snapc_base_global_snapshot_loc)); + + /* Creates the directory (with metadata files): + * /tmp/ompi_global_snapshot_PID.ckpt/seq_num + */ + if( ORTE_SUCCESS != (ret = orte_snapc_base_init_global_snapshot_directory(global_snapshot.reference_name, empty_metadata))) { + ORTE_ERROR_LOG(ret); + return ret; + } + + if( NULL != global_snapshot_handle ) { + free(global_snapshot_handle); + global_snapshot_handle = NULL; + } + + return ORTE_SUCCESS; +} + +static int snapc_full_global_checkpoint(bool term) +{ + int ret, exit_status = ORTE_SUCCESS; + + OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, + "Global) Checkpoint of job %s has been requested\n", + ORTE_JOBID_PRINT(current_global_jobid))); + + /********************* + * Generate the global snapshot directory, and unique global snapshot handle + *********************/ + if( ORTE_SUCCESS != (ret = snapc_full_establish_snapshot_dir(false))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + + /*********************************** + * Do an update handshake with the orte_checkpoint command + ***********************************/ + updated_job_to_running = false; + if( ORTE_SUCCESS != (ret = orte_snapc_base_global_coord_ckpt_update_cmd(&orte_checkpoint_sender, + global_snapshot.reference_name, + global_snapshot.seq_num, + ORTE_SNAPC_CKPT_STATE_REQUEST) ) ) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + + OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, + "Global) Using the checkpoint directory (%s)\n", + global_snapshot.reference_name)); + + /********************** + * Notify the Local Snapshot Coordinators of the checkpoint request + **********************/ + OPAL_OUTPUT_VERBOSE((15, mca_snapc_full_component.super.output_handle, + "Global) Notifying the Local Coordinators\n")); + + if( ORTE_SUCCESS != (ret = snapc_full_global_notify_checkpoint(current_global_jobid, + term))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + + cleanup: + return exit_status; +} + +static int snapc_full_global_notify_checkpoint(orte_jobid_t jobid, + bool term) +{ + int ret, exit_status = ORTE_SUCCESS; + orte_snapc_full_orted_snapshot_t *orted_snapshot = NULL; + opal_list_item_t* item = NULL; + char * global_dir = NULL; + int ckpt_state; + + orte_snapc_base_get_global_snapshot_directory(&global_dir, global_snapshot.reference_name); + + if( term ) { + ckpt_state = ORTE_SNAPC_CKPT_STATE_PENDING_TERM; + } else { + ckpt_state = ORTE_SNAPC_CKPT_STATE_PENDING; + } + + /* + * Update the global structure + */ + for(item = opal_list_get_first(&global_snapshot.local_snapshots); + item != opal_list_get_end(&global_snapshot.local_snapshots); + item = opal_list_get_next(item) ) { + orted_snapshot = (orte_snapc_full_orted_snapshot_t*)item; + orted_snapshot->state = ckpt_state; + orted_snapshot->term = term; + } + + /* + * Update the job state, and broadcast to all local daemons + */ + orte_snapc_base_global_snapshot_loc = strdup(global_dir); + if( ORTE_SUCCESS != (ret = orte_snapc_full_global_set_job_ckpt_info(jobid, + ckpt_state, + global_snapshot.reference_name, + global_dir, + false) ) ) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + + cleanup: + if( NULL != global_dir) + free(global_dir); + + return exit_status; +} + +/********************************** + * Job/Proc State Set/Get Routines + **********************************/ +static int orte_snapc_full_global_set_job_ckpt_info( orte_jobid_t jobid, + int ckpt_state, + char *ckpt_snapshot_ref, + char *ckpt_snapshot_loc, + bool quick) +{ + int ret, exit_status = ORTE_SUCCESS; + orte_snapc_full_cmd_flag_t command; + opal_buffer_t buffer; + char * state_str = NULL; + + /* + * Update all Local Coordinators (broadcast operation) + */ + OBJ_CONSTRUCT(&buffer, opal_buffer_t); + + if( quick ) { + command = ORTE_SNAPC_FULL_UPDATE_JOB_STATE_QUICK_CMD; + } else { + command = ORTE_SNAPC_FULL_UPDATE_JOB_STATE_CMD; + } + + if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &command, 1, ORTE_SNAPC_FULL_CMD))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + + if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &jobid, 1, ORTE_JOBID))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + + if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &ckpt_state, 1, OPAL_INT))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + + if( quick ) { + goto process_msg; + } + + if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &ckpt_snapshot_ref, 1, OPAL_STRING))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + + if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &ckpt_snapshot_loc, 1, OPAL_STRING))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + + process_msg: + orte_snapc_ckpt_state_str(&state_str, ckpt_state); + OPAL_OUTPUT_VERBOSE((15, mca_snapc_full_component.super.output_handle, + "Global) Notify Local Coordinators of job %s state change to %d (%s)\n", + ORTE_JOBID_PRINT(jobid), (int)ckpt_state, state_str )); + free(state_str); + state_str = NULL; + + if( ORTE_SUCCESS != (ret = orte_grpcomm.xcast(ORTE_PROC_MY_NAME->jobid, &buffer, ORTE_RML_TAG_SNAPC_FULL)) ) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + + /* + * We will also receive the job update, and process in the RML callback + */ + + cleanup: + if( NULL != state_str ) { + free(state_str); + state_str = NULL; + } + + OBJ_DESTRUCT(&buffer); + return exit_status; +} + int global_coord_job_state_update(orte_jobid_t jobid, - size_t job_ckpt_state, + int job_ckpt_state, char **job_ckpt_snapshot_ref, char **job_ckpt_snapshot_loc) { int ret, exit_status = ORTE_SUCCESS; + orte_snapc_full_orted_snapshot_t *orted_snapshot = NULL; + orte_snapc_base_local_snapshot_t *app_snapshot = NULL; opal_list_item_t* item = NULL; + opal_list_item_t* aitem = NULL; bool term_job = false; + char * state_str = NULL; + orte_snapc_ckpt_state_str(&state_str, job_ckpt_state); OPAL_OUTPUT_VERBOSE((15, mca_snapc_full_component.super.output_handle, - "Global) Job update command: jobid %s -> state %d\n", - ORTE_JOBID_PRINT(jobid), (int)job_ckpt_state)); + "Global) Job update command: jobid %s -> state %d (%s)\n", + ORTE_JOBID_PRINT(jobid), (int)job_ckpt_state, state_str )); + free(state_str); + state_str = NULL; /************************ * Update the orte_checkpoint command ************************/ - cur_job_ckpt_state = job_ckpt_state; + current_job_ckpt_state = job_ckpt_state; if( ORTE_SUCCESS != (ret = orte_snapc_base_global_coord_ckpt_update_cmd(&orte_checkpoint_sender, global_snapshot.reference_name, global_snapshot.seq_num, - cur_job_ckpt_state)) ) { + current_job_ckpt_state)) ) { + ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } @@ -572,79 +1284,83 @@ int global_coord_job_state_update(orte_jobid_t jobid, } } - if(ORTE_SNAPC_CKPT_STATE_REQUEST == job_ckpt_state ) { -#if 0 - /* - * Start the checkpoint, now that we have the jobid - */ - if( ORTE_SUCCESS != (ret = snapc_full_global_checkpoint(jobid, term, &global_snapshot_handle, &job_ckpt_state) ) ) { - exit_status = ret; - goto cleanup; - } -#else - opal_output(mca_snapc_full_component.super.output_handle, - "ERROR: Internal Checkpoint request not implemented."); -#endif - } /* - * If we need to transfer files + * If we have completed locally, and not transfering files + * then just finish the checkpoint operation. + * + * Otherwise the FIN is xcast'ed in process_orted_update_cmd() */ - else if( ORTE_SNAPC_CKPT_STATE_FILE_XFER == job_ckpt_state ) { - /********************** - * Gather all of the files locally - * Note: We don't need to worry about the return code in as much since the - * rest of the functions know what to do with an error scenario. - **********************/ - if( ORTE_SUCCESS != (ret = snapc_full_global_gather_all_files()) ) { - exit_status = ret; - cur_job_ckpt_state = ORTE_SNAPC_CKPT_STATE_ERROR; - } - - /********************************** - * Update the job checkpoint state - **********************************/ - if( ORTE_SNAPC_CKPT_STATE_ERROR != cur_job_ckpt_state ) { - cur_job_ckpt_state = ORTE_SNAPC_CKPT_STATE_FINISHED; - } - - if( ORTE_SUCCESS != (ret = orte_snapc_full_global_set_job_ckpt_info(jobid, - cur_job_ckpt_state, - global_snapshot.reference_name, - orte_snapc_base_global_snapshot_loc) ) ) { - exit_status = ret; - goto cleanup; - } - } - else if( ORTE_SNAPC_CKPT_STATE_FINISHED == job_ckpt_state || - ORTE_SNAPC_CKPT_STATE_ERROR == job_ckpt_state ) { - /*********************************** - * Update the vpid checkpoint state - ***********************************/ - for(item = opal_list_get_first(&global_snapshot.snapshots); - item != opal_list_get_end(&global_snapshot.snapshots); - item = opal_list_get_next(item) ) { - orte_snapc_full_global_snapshot_t *vpid_snapshot; - vpid_snapshot = (orte_snapc_full_global_snapshot_t*)item; - - vpid_snapshot->super.state = ORTE_SNAPC_CKPT_STATE_NONE; - - if( vpid_snapshot->super.term ){ - term_job = true; - } - - if (ORTE_SUCCESS != (ret = orte_snapc_full_global_set_vpid_ckpt_info(vpid_snapshot->super.process_name, - vpid_snapshot->super.state, - vpid_snapshot->super.crs_snapshot_super.reference_name, - vpid_snapshot->super.crs_snapshot_super.local_location) ) ) { + if( ORTE_SNAPC_CKPT_STATE_FINISHED_LOCAL == job_ckpt_state ) { + if( orte_snapc_base_store_in_place || orte_snapc_full_skip_filem) { + if( ORTE_SUCCESS != (ret = orte_snapc_full_global_set_job_ckpt_info(current_global_jobid, + ORTE_SNAPC_CKPT_STATE_FINISHED, + NULL, NULL, true) ) ) { + ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } } + } + /* + * Once finished, then cleanup and finalize the global snapshot + */ + else if( ORTE_SNAPC_CKPT_STATE_FINISHED == job_ckpt_state || + ORTE_SNAPC_CKPT_STATE_ERROR == job_ckpt_state ) { + /* + * Write out metadata + */ + write_out_global_metadata(); + + /* + * Reset global data structures + */ + for(item = opal_list_get_first(&(global_snapshot.local_snapshots)); + item != opal_list_get_end(&(global_snapshot.local_snapshots)); + item = opal_list_get_next(item) ) { + orted_snapshot = (orte_snapc_full_orted_snapshot_t*)item; + + orted_snapshot->state = ORTE_SNAPC_CKPT_STATE_NONE; + + if( orted_snapshot->term ) { + term_job = true; + } + + for(aitem = opal_list_get_first(&(orted_snapshot->super.local_snapshots)); + aitem != opal_list_get_end(&(orted_snapshot->super.local_snapshots)); + aitem = opal_list_get_next(aitem) ) { + app_snapshot = (orte_snapc_base_local_snapshot_t*)aitem; + + app_snapshot->state = ORTE_SNAPC_CKPT_STATE_NONE; + if( NULL != app_snapshot->reference_name ) { + free(app_snapshot->reference_name); + app_snapshot->reference_name = NULL; + } + if( NULL != app_snapshot->local_location ) { + free(app_snapshot->local_location); + app_snapshot->local_location = NULL; + } + if( NULL != app_snapshot->remote_location ) { + free(app_snapshot->remote_location); + app_snapshot->remote_location = NULL; + } + } + } + + if(orte_snapc_full_timing_enabled) { + timer_end = get_time(); + print_time(); + timer_start = 0; + timer_local_done = 0; + timer_xfer_done = 0; + timer_end = 0; + } + /************************ * Set up the Command Line listener again *************************/ if( ORTE_SUCCESS != (ret = snapc_full_global_start_cmdline_listener() ) ){ + ORTE_ERROR_LOG(ret); exit_status = ret; } @@ -657,651 +1373,130 @@ int global_coord_job_state_update(orte_jobid_t jobid, orte_plm.terminate_job(jobid); } } + /* + * This should not happen, since this state is always handled locally + */ + else if( ORTE_SNAPC_CKPT_STATE_FILE_XFER == job_ckpt_state ) { + OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, + "Global) JJH WARNING: job state = %d (FILE_XFER)", + job_ckpt_state)); + } + /* + * This should not happen, since we do not handle this case + */ + else if(ORTE_SNAPC_CKPT_STATE_REQUEST == job_ckpt_state ) { + opal_output(mca_snapc_full_component.super.output_handle, + "ERROR: Internal Checkpoint request not implemented."); + ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED); + } cleanup: + if( NULL != state_str) { + free(state_str); + state_str = NULL; + } + return exit_status; } -static void snapc_full_process_proc_update_cmd(orte_process_name_t* sender, - opal_buffer_t* buffer) -{ - int ret, exit_status = ORTE_SUCCESS; - orte_std_cntr_t count; - orte_process_name_t remote_proc; - char *remote_ckpt_ref = NULL, *remote_ckpt_loc = NULL; - size_t remote_ckpt_state; - - /* - * Unpack the data - * - process name - * - ckpt_state - * - ckpt_ref - * - ckpt_loc - */ - count = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &remote_proc, &count, ORTE_NAME))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - count = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &remote_ckpt_state, &count, OPAL_SIZE))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - count = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &remote_ckpt_ref, &count, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - count = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &remote_ckpt_loc, &count, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - if( ORTE_SUCCESS != (ret = global_coord_vpid_state_update(remote_proc, remote_ckpt_state, &remote_ckpt_ref, &remote_ckpt_loc)) ) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - cleanup: - return; -} - -int global_coord_vpid_state_update(orte_process_name_t proc_name, - size_t proc_ckpt_state, - char **proc_ckpt_ref, - char **proc_ckpt_loc) -{ - int ret, exit_status = ORTE_SUCCESS; - orte_snapc_full_global_snapshot_t *vpid_snapshot = NULL; - opal_list_item_t* item = NULL; - - OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, - "Global) Process %s: Changed to state to:\n", - ORTE_NAME_PRINT(&proc_name))); - OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, - "Global) State: %d\n", - (int)proc_ckpt_state)); - OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, - "Global) Snapshot Ref: [%s]\n", - *proc_ckpt_ref)); - OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, - "Global) Remote Location: [%s]\n", - *proc_ckpt_loc)); - - /* - * Find this process and update it's information - */ - for(item = opal_list_get_first(&global_snapshot.snapshots); - item != opal_list_get_end(&global_snapshot.snapshots); - item = opal_list_get_next(item) ) { - vpid_snapshot = (orte_snapc_full_global_snapshot_t*)item; - - if(vpid_snapshot->super.process_name.jobid == proc_name.jobid && - vpid_snapshot->super.process_name.vpid == proc_name.vpid) { - - vpid_snapshot->super.state = proc_ckpt_state; - vpid_snapshot->super.crs_snapshot_super.reference_name = strdup(*proc_ckpt_ref); - vpid_snapshot->super.crs_snapshot_super.remote_location = strdup(*proc_ckpt_loc); - - if(proc_ckpt_state == ORTE_SNAPC_CKPT_STATE_FINISHED || - proc_ckpt_state == ORTE_SNAPC_CKPT_STATE_ERROR ) { - snapc_full_global_check_for_done(vpid_snapshot->super.process_name.jobid); - } - break; - } - } - - /* - * Update the global struct - */ - if( ORTE_SUCCESS != (ret = orte_snapc_full_global_set_vpid_ckpt_info(proc_name, - proc_ckpt_state, - *proc_ckpt_ref, - *proc_ckpt_loc))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - /* - * Update the job to running - */ - if( !updated_job_to_running) { - char * global_dir = NULL; - global_dir = orte_snapc_base_get_global_snapshot_directory(global_snapshot.reference_name); - - orte_snapc_base_global_snapshot_loc = strdup(global_dir); - - if( ORTE_SUCCESS != (ret = orte_snapc_full_global_set_job_ckpt_info(proc_name.jobid, - ORTE_SNAPC_CKPT_STATE_RUNNING, - global_snapshot.reference_name, - global_dir) ) ) { - free(global_dir); - exit_status = ret; - goto cleanup; - } - - free(global_dir); - updated_job_to_running = true; - } - - cleanup: - return exit_status; -} - -static void snapc_full_process_vpid_assoc_cmd(orte_process_name_t* sender, - opal_buffer_t* buffer) +static int write_out_global_metadata(void) { int ret; - orte_std_cntr_t count; - orte_process_name_t tmp_proc_name; - size_t num_vpids = 0, i; + orte_snapc_full_orted_snapshot_t *orted_snapshot = NULL; + orte_snapc_base_local_snapshot_t *app_snapshot = NULL; + opal_list_item_t* orted_item = NULL; + opal_list_item_t* app_item = NULL; - count = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &num_vpids, &count, OPAL_SIZE))) { - opal_output(mca_snapc_full_component.super.output_handle, - "Global) vpid_assoc: Failed to unpack num_vpids from peer %s\n", - ORTE_NAME_PRINT(sender)); - goto cleanup; - } + OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, + "Global) Updating Metadata")); - for(i = 0; i < num_vpids; ++i) { - count = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &tmp_proc_name, &count, ORTE_NAME))) { - opal_output(mca_snapc_full_component.super.output_handle, - "Global) vpid_assoc: Failed to unpack process name from peer %s\n", - ORTE_NAME_PRINT(sender)); - goto cleanup; + for(orted_item = opal_list_get_first(&(global_snapshot.local_snapshots)); + orted_item != opal_list_get_end(&(global_snapshot.local_snapshots)); + orted_item = opal_list_get_next(orted_item) ) { + orted_snapshot = (orte_snapc_full_orted_snapshot_t*)orted_item; + + if( ORTE_SNAPC_CKPT_STATE_ERROR == orted_snapshot->state ) { + return ORTE_ERROR; } - global_coord_vpid_assoc_update(*sender, tmp_proc_name); - } + for(app_item = opal_list_get_first(&(orted_snapshot->super.local_snapshots)); + app_item != opal_list_get_end(&(orted_snapshot->super.local_snapshots)); + app_item = opal_list_get_next(app_item) ) { + app_snapshot = (orte_snapc_base_local_snapshot_t*)app_item; - cleanup: - return; -} + OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, + "Global) Process Name: %s\n", + ORTE_NAME_PRINT(&app_snapshot->process_name) )); + OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, + "Global) Reference : %s\n", + app_snapshot->reference_name)); + OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, + "Global) Location : %s\n", + app_snapshot->local_location)); -int global_coord_vpid_assoc_update(orte_process_name_t local_coord, - orte_process_name_t proc_name) -{ - orte_snapc_full_global_snapshot_t *vpid_snapshot = NULL; - opal_list_item_t* item = NULL; - - for(item = opal_list_get_first(&global_snapshot.snapshots); - item != opal_list_get_end(&global_snapshot.snapshots); - item = opal_list_get_next(item) ) { - vpid_snapshot = (orte_snapc_full_global_snapshot_t*)item; - - if(vpid_snapshot->super.process_name.jobid == proc_name.jobid && - vpid_snapshot->super.process_name.vpid == proc_name.vpid) { - vpid_snapshot->local_coord.vpid = local_coord.vpid; - vpid_snapshot->local_coord.jobid = local_coord.jobid; - break; + if(ORTE_SUCCESS != (ret = orte_snapc_base_add_vpid_metadata(&app_snapshot->process_name, + global_snapshot.reference_name, + app_snapshot->reference_name, + app_snapshot->local_location, + orted_snapshot->opal_crs) ) ){ + ORTE_ERROR_LOG(ret); + return ret; + } } + } + orte_snapc_base_finalize_metadata(global_snapshot.reference_name); + return ORTE_SUCCESS; } -static void snapc_full_process_establish_dir_cmd(orte_process_name_t* sender, - opal_buffer_t* exbuf) +static orte_snapc_full_orted_snapshot_t *find_orted_snapshot(orte_process_name_t *name ) { - int ret, exit_status = ORTE_SUCCESS; - orte_snapc_full_cmd_flag_t command = ORTE_SNAPC_FULL_ESTABLISH_DIR_CMD; - opal_buffer_t buffer; - - /* Send back: - * - Reference - * - Local location - */ - OBJ_CONSTRUCT(&buffer, opal_buffer_t); - - if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &command, 1, ORTE_SNAPC_FULL_CMD))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &(global_snapshot.reference_name), 1, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &(orte_snapc_base_global_snapshot_loc), 1, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - if (0 > (ret = orte_rml.send_buffer(sender, &buffer, ORTE_RML_TAG_SNAPC_FULL, 0))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - cleanup: - OBJ_DESTRUCT(&buffer); - return; -} - -static void snapc_full_process_cmdline_request_cmd(orte_process_name_t* sender, - opal_buffer_t* buffer) -{ - int ret, exit_status = ORTE_SUCCESS; - orte_snapc_cmd_flag_t command; - orte_std_cntr_t count = 1; - bool term = false; - int ckpt_status = ORTE_SUCCESS; - char *global_snapshot_handle = NULL; - orte_jobid_t jobid; - - count = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &command, &count, ORTE_SNAPC_CMD))) { - exit_status = ret; - goto cleanup; - } - - /* - * orte_checkpoint has requested that a checkpoint be taken - */ - if (ORTE_SNAPC_GLOBAL_INIT_CMD == command) { - OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, - "Global) Command line requested a checkpoint (command %d)\n", - command)); - /******************** - * Do the basic handshake with the orte_checkpoint command - ********************/ - if( ORTE_SUCCESS != (ret = orte_snapc_base_global_coord_ckpt_init_cmd(sender, buffer, &term, &jobid)) ) { - exit_status = ret; - goto cleanup; - } - - /* If the command line did not specify a jobid, then use the one we - * are watching by default - */ - if( ORTE_JOBID_INVALID == jobid ) { - jobid = cur_job_id; - } - - /* Save things */ - orte_checkpoint_sender = *sender; - - /************************* - * Kick off the checkpoint - *************************/ - if( ORTE_SUCCESS != (ret = snapc_full_global_checkpoint(jobid, term, &global_snapshot_handle, &ckpt_status) ) ) { - exit_status = ret; - /* We don't want to terminate here, becase orte_checkpoint may be waiting for - * us to come back with something, so just send back the empty values, and - * it will know what to do - */ - } - - } - else if (ORTE_SNAPC_GLOBAL_TERM_CMD == command) { - OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, - "Global) Command line requested to terminate connection (command %d)\n", - command)); - /* Something must have happened so we are forced to terminate */ - goto cleanup; - } - /* - * Unknown command - */ - else { - OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, - "Global) Command line sent an unknown command (command %d)\n", - command)); - goto cleanup; - } - - cleanup: - return; -} - -/********************************** - * Job/Proc State Set/Get Routines - **********************************/ -static int orte_snapc_full_global_set_job_ckpt_info( orte_jobid_t jobid, - size_t ckpt_state, - char *ckpt_snapshot_ref, - char *ckpt_snapshot_loc) -{ - int ret, exit_status = ORTE_SUCCESS; - orte_job_t *jdata = NULL; - opal_buffer_t buffer; - orte_snapc_full_cmd_flag_t command = ORTE_SNAPC_FULL_UPDATE_JOB_STATE_CMD; - - /* - * Update locally - Global Coordinator structures - */ - /* Get the job data object */ - if (NULL == (jdata = orte_get_job_data_object(jobid))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - exit_status = ORTE_ERR_NOT_FOUND; - goto cleanup; - } - - /* Update Local structures */ - jdata->ckpt_state = ckpt_state; - if( NULL != jdata->ckpt_snapshot_ref) { - free(jdata->ckpt_snapshot_ref); - jdata->ckpt_snapshot_ref = NULL; - } - jdata->ckpt_snapshot_ref = strdup(ckpt_snapshot_ref); - if( NULL != jdata->ckpt_snapshot_loc) { - free(jdata->ckpt_snapshot_loc); - jdata->ckpt_snapshot_loc = NULL; - } - jdata->ckpt_snapshot_loc = strdup(ckpt_snapshot_loc); - - /* - * Update all Local Coordinators (broadcast operation) - */ - OBJ_CONSTRUCT(&buffer, opal_buffer_t); - - if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &command, 1, ORTE_SNAPC_FULL_CMD))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &jobid, 1, ORTE_JOBID))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &ckpt_state, 1, OPAL_SIZE))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &ckpt_snapshot_ref, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &ckpt_snapshot_loc, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - OPAL_OUTPUT_VERBOSE((15, mca_snapc_full_component.super.output_handle, - "Global) Notifying all Local Coordinators of job %s state change to %d\n", - ORTE_JOBID_PRINT(jobid), (int)ckpt_state)); - - if( ORTE_SUCCESS != (ret = orte_grpcomm.xcast(ORTE_PROC_MY_NAME->jobid, &buffer, ORTE_RML_TAG_SNAPC_FULL)) ) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - /* - * Process the job update - Global Coordinator - */ - OPAL_OUTPUT_VERBOSE((15, mca_snapc_full_component.super.output_handle, - "Global) Act locally on job %s state change to %d\n", - ORTE_JOBID_PRINT(jobid), (int)ckpt_state)); - if( ORTE_SUCCESS != (ret = global_coord_job_state_update(jobid, ckpt_state, &ckpt_snapshot_ref, &ckpt_snapshot_loc) ) ) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - cleanup: - OBJ_DESTRUCT(&buffer); - return exit_status; -} - -static int orte_snapc_full_global_set_vpid_ckpt_info( orte_process_name_t proc, - size_t ckpt_state, - char *ckpt_snapshot_ref, - char *ckpt_snapshot_loc) -{ - int exit_status = ORTE_SUCCESS; - orte_job_t *jdata = NULL; - orte_proc_t **procs = NULL; - - /* get the job data object for this proc */ - if (NULL == (jdata = orte_get_job_data_object(proc.jobid))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - exit_status = ORTE_ERR_NOT_FOUND; - goto cleanup; - } - /* Get the proc object for this process */ - procs = (orte_proc_t**)jdata->procs->addr; - if (NULL == procs[proc.vpid] ) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - exit_status = ORTE_ERR_NOT_FOUND; - goto cleanup; - } - - /* Update Local structures */ - procs[proc.vpid]->ckpt_state = ckpt_state; - if( NULL != procs[proc.vpid]->ckpt_snapshot_ref) { - free(procs[proc.vpid]->ckpt_snapshot_ref); - procs[proc.vpid]->ckpt_snapshot_ref = NULL; - } - procs[proc.vpid]->ckpt_snapshot_ref = strdup(ckpt_snapshot_ref); - if( NULL != procs[proc.vpid]->ckpt_snapshot_loc) { - free(procs[proc.vpid]->ckpt_snapshot_loc); - procs[proc.vpid]->ckpt_snapshot_loc = NULL; - } - procs[proc.vpid]->ckpt_snapshot_loc = strdup(ckpt_snapshot_loc); - - cleanup: - return exit_status; -} - -static int snapc_full_global_checkpoint(orte_jobid_t jobid, - bool term, - char **global_snapshot_handle, - int *ckpt_status) -{ - int ret, exit_status = ORTE_SUCCESS; - - OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, - "Global) Checkpoint of job %s has been requested\n", - ORTE_JOBID_PRINT(jobid))); - - /********************* - * Generate the global snapshot directory, and unique global snapshot handle - *********************/ - INC_SEQ_NUM(); - *global_snapshot_handle = strdup( orte_snapc_base_unique_global_snapshot_name( getpid() ) ); - - global_snapshot.seq_num = orte_snapc_base_snapshot_seq_number; - global_snapshot.reference_name = strdup(*global_snapshot_handle); - global_snapshot.local_location = opal_dirname(orte_snapc_base_get_global_snapshot_directory(global_snapshot.reference_name)); - - /* Creates the directory (with metadata files): - * /tmp/ompi_global_snapshot_PID.ckpt/seq_num - */ - if( ORTE_SUCCESS != (ret = orte_snapc_base_init_global_snapshot_directory(*global_snapshot_handle, false))) { - exit_status = ret; - goto cleanup; - } - - /*********************************** - * Do an update handshake with the orte_checkpoint command - ***********************************/ - updated_job_to_running = false; - if( ORTE_SUCCESS != (ret = orte_snapc_base_global_coord_ckpt_update_cmd(&orte_checkpoint_sender, - global_snapshot.reference_name, - global_snapshot.seq_num, - ORTE_SNAPC_CKPT_STATE_REQUEST) ) ) { - exit_status = ret; - goto cleanup; - } - - OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, - "Global) Using the checkpoint directory (%s)\n", - *global_snapshot_handle)); - - /********************** - * Notify the Local Snapshot Coordinators of the checkpoint request - **********************/ - OPAL_OUTPUT_VERBOSE((15, mca_snapc_full_component.super.output_handle, - "Global) Notifying the Local Coordinators\n")); - - if( ORTE_SUCCESS != (ret = snapc_full_global_notify_checkpoint(*global_snapshot_handle, - jobid, - term))) { - exit_status = ret; - goto cleanup; - } - - cleanup: - - *ckpt_status = exit_status; - return exit_status; -} - -static int snapc_full_global_notify_checkpoint( char * global_snapshot_handle, - orte_jobid_t jobid, - bool term) -{ - int ret, exit_status = ORTE_SUCCESS; + orte_snapc_full_orted_snapshot_t *orted_snapshot = NULL; opal_list_item_t* item = NULL; - char * global_dir = NULL; - size_t ckpt_state = ORTE_SNAPC_CKPT_STATE_PENDING; - global_dir = orte_snapc_base_get_global_snapshot_directory(global_snapshot_handle); - - if( term ) { - ckpt_state = ORTE_SNAPC_CKPT_STATE_PENDING_TERM; - } - - /* - * By updating the job segment we tell the Local Coordinator to - * checkpoint all their apps, so we don't need to do it explicitly here - * Just update the global structure here... - */ - for(item = opal_list_get_first(&global_snapshot.snapshots); - item != opal_list_get_end(&global_snapshot.snapshots); + for(item = opal_list_get_first(&(global_snapshot.local_snapshots)); + item != opal_list_get_end(&(global_snapshot.local_snapshots)); item = opal_list_get_next(item) ) { - orte_snapc_full_global_snapshot_t *vpid_snapshot; + orted_snapshot = (orte_snapc_full_orted_snapshot_t*)item; - vpid_snapshot = (orte_snapc_full_global_snapshot_t*)item; - - vpid_snapshot->super.state = ckpt_state; - vpid_snapshot->super.term = term; - - if( NULL != vpid_snapshot->super.crs_snapshot_super.reference_name) - free(vpid_snapshot->super.crs_snapshot_super.reference_name); - vpid_snapshot->super.crs_snapshot_super.reference_name = opal_crs_base_unique_snapshot_name(vpid_snapshot->super.process_name.vpid); - - if( NULL != vpid_snapshot->super.crs_snapshot_super.local_location) - free(vpid_snapshot->super.crs_snapshot_super.local_location); - asprintf(&(vpid_snapshot->super.crs_snapshot_super.local_location), "%s/%s", global_dir, vpid_snapshot->super.crs_snapshot_super.reference_name); - - if( NULL != vpid_snapshot->super.crs_snapshot_super.remote_location) - free(vpid_snapshot->super.crs_snapshot_super.remote_location); - asprintf(&(vpid_snapshot->super.crs_snapshot_super.remote_location), "%s/%s", global_dir, vpid_snapshot->super.crs_snapshot_super.reference_name); - - /* Update the individual process information */ - if (ORTE_SUCCESS != (ret = orte_snapc_full_global_set_vpid_ckpt_info(vpid_snapshot->super.process_name, - ORTE_SNAPC_CKPT_STATE_REQUEST, - vpid_snapshot->super.crs_snapshot_super.reference_name, - vpid_snapshot->super.crs_snapshot_super.local_location) ) ) { - exit_status = ret; - goto cleanup; + if( name->jobid == orted_snapshot->process_name.jobid && + name->vpid == orted_snapshot->process_name.vpid ) { + return orted_snapshot; } } - /* - * Update the job state, and broadcast to all local daemons - */ - orte_snapc_base_global_snapshot_loc = strdup(global_dir); - if( ORTE_SUCCESS != (ret = orte_snapc_full_global_set_job_ckpt_info(jobid, - ckpt_state, - global_snapshot_handle, - global_dir) ) ) { - exit_status = ret; - goto cleanup; - } - - cleanup: - if( NULL != global_dir) - free(global_dir); - - return exit_status; + return NULL; } -static int snapc_full_global_check_for_done(orte_jobid_t jobid) { - int ret, exit_status = ORTE_SUCCESS; - char * global_dir = NULL; - - /* If we are not done, then keep waiting */ - if(!snapc_full_global_is_done_yet()) { - return exit_status; - } - - /********************************** - * Update the job checkpoint state - **********************************/ - global_dir = orte_snapc_base_get_global_snapshot_directory(global_snapshot.reference_name); - orte_snapc_base_global_snapshot_loc = strdup(global_dir); - - if( ORTE_SNAPC_CKPT_STATE_ERROR != cur_job_ckpt_state ) { - cur_job_ckpt_state = ORTE_SNAPC_CKPT_STATE_FILE_XFER; - } - - if( ORTE_SUCCESS != (ret = orte_snapc_full_global_set_job_ckpt_info(jobid, - cur_job_ckpt_state, - global_snapshot.reference_name, - global_dir) ) ) { - exit_status = ret; - goto cleanup; - } - - cleanup: - if( NULL != global_dir) - free(global_dir); - - return exit_status; -} - -static bool snapc_full_global_is_done_yet(void) { +static orte_snapc_base_local_snapshot_t *find_orted_app_snapshot(orte_snapc_full_orted_snapshot_t *orted_snapshot, + orte_process_name_t *name) +{ + orte_snapc_base_local_snapshot_t *app_snapshot = NULL; opal_list_item_t* item = NULL; - /* Be optimistic, we are talking about Fault Tolerance */ - bool done_yet = true; - - for(item = opal_list_get_first(&global_snapshot.snapshots); - item != opal_list_get_end(&global_snapshot.snapshots); + + for(item = opal_list_get_first(&(orted_snapshot->super.local_snapshots)); + item != opal_list_get_end(&(orted_snapshot->super.local_snapshots)); item = opal_list_get_next(item) ) { - orte_snapc_full_global_snapshot_t *vpid_snapshot; - vpid_snapshot = (orte_snapc_full_global_snapshot_t*)item; - - /* If they are working, then we are not done yet */ - if(ORTE_SNAPC_CKPT_STATE_FINISHED != vpid_snapshot->super.state && - ORTE_SNAPC_CKPT_STATE_ERROR != vpid_snapshot->super.state ) { - done_yet = false; - return done_yet; + app_snapshot = (orte_snapc_base_local_snapshot_t*)item; + + if( name->jobid == app_snapshot->process_name.jobid && + name->vpid == app_snapshot->process_name.vpid ) { + return app_snapshot; } } - - return done_yet; -} -static int snapc_full_global_gather_all_files(void) { + return NULL; +} +static int snapc_full_start_filem(orte_snapc_full_orted_snapshot_t *orted_snapshot) +{ int ret, exit_status = ORTE_SUCCESS; - opal_list_item_t* item = NULL; - char * local_dir = NULL; - orte_filem_base_request_t *filem_request = NULL; orte_filem_base_process_set_t *p_set = NULL; orte_filem_base_file_set_t * f_set = NULL; opal_list_t all_filem_requests; + orte_snapc_base_local_snapshot_t *app_snapshot = NULL; + opal_list_item_t* item = NULL; OBJ_CONSTRUCT(&all_filem_requests, opal_list_t); @@ -1314,206 +1509,262 @@ static int snapc_full_global_gather_all_files(void) { } /* * If it is stored in place, then we do not need to transfer anything + * -- Should not have gotten here, so return an error -- */ else if( orte_snapc_base_store_in_place ) { - for(item = opal_list_get_first(&global_snapshot.snapshots); - item != opal_list_get_end(&global_snapshot.snapshots); - item = opal_list_get_next(item) ) { - orte_snapc_full_global_snapshot_t *vpid_snapshot; - vpid_snapshot = (orte_snapc_full_global_snapshot_t*)item; - - OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, - "Global) Updating Metadata - Files stored in place, no transfer required:\n")); - OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, - "Global) State: %d\n", - (int)vpid_snapshot->super.state)); - OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, - "Global) Remote Location: [%s]\n", - vpid_snapshot->super.crs_snapshot_super.remote_location)); - OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, - "Global) Local Location: [%s]\n", - vpid_snapshot->super.crs_snapshot_super.local_location)); - - if( ORTE_SNAPC_CKPT_STATE_ERROR == vpid_snapshot->super.state ) { - exit_status = ORTE_ERROR; - goto cleanup; - } - - /* - * Update the metadata file - */ - if(ORTE_SUCCESS != (ret = orte_snapc_base_add_vpid_metadata(&vpid_snapshot->super.process_name, - global_snapshot.reference_name, - vpid_snapshot->super.crs_snapshot_super.reference_name, - vpid_snapshot->super.crs_snapshot_super.local_location))) { - exit_status = ret; - goto cleanup; - } - } + exit_status = ORTE_ERROR; + goto cleanup; } + /* - * If *not* stored in place then use FileM to transfer the files and cleanup + * Setup the FileM data structures to transfer the files */ - else { + orted_snapshot->filem_request = OBJ_NEW(orte_filem_base_request_t); + /* + * Construct the process set + */ + p_set = OBJ_NEW(orte_filem_base_process_set_t); - /* - * Construct a request for each file/directory to transfer - * - start the non-blocking transfer - */ - for(item = opal_list_get_first(&global_snapshot.snapshots); - item != opal_list_get_end(&global_snapshot.snapshots); - item = opal_list_get_next(item) ) { - orte_snapc_full_global_snapshot_t *vpid_snapshot; - vpid_snapshot = (orte_snapc_full_global_snapshot_t*)item; + p_set->source.jobid = orted_snapshot->process_name.jobid; + p_set->source.vpid = orted_snapshot->process_name.vpid; + p_set->sink.jobid = ORTE_PROC_MY_NAME->jobid; + p_set->sink.vpid = ORTE_PROC_MY_NAME->vpid; - OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, - "Global) Getting remote directory:\n")); - OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, - "Global) Status: (%d)\n", - (int)vpid_snapshot->super.state)); - OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, - "Global) Remote Location: (%s)\n", - vpid_snapshot->super.crs_snapshot_super.remote_location)); - OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, - "Global) Local Location: (%s)\n", - vpid_snapshot->super.crs_snapshot_super.local_location)); + opal_list_append(&(orted_snapshot->filem_request->process_sets), &(p_set->super) ); - if( ORTE_SNAPC_CKPT_STATE_ERROR == vpid_snapshot->super.state ) { - exit_status = ORTE_ERROR; - goto cleanup; - } + for(item = opal_list_get_first(&(orted_snapshot->super.local_snapshots)); + item != opal_list_get_end(&(orted_snapshot->super.local_snapshots)); + item = opal_list_get_next(item) ) { + app_snapshot = (orte_snapc_base_local_snapshot_t*)item; - filem_request = OBJ_NEW(orte_filem_base_request_t); - - /* - * Construct the process set - */ - p_set = OBJ_NEW(orte_filem_base_process_set_t); - - p_set->source.jobid = vpid_snapshot->local_coord.jobid; - p_set->source.vpid = vpid_snapshot->local_coord.vpid; - p_set->sink.jobid = ORTE_PROC_MY_NAME->jobid; - p_set->sink.vpid = ORTE_PROC_MY_NAME->vpid; - - opal_list_append(&(filem_request->process_sets), &(p_set->super) ); - - /* - * Construct the file set - */ - f_set = OBJ_NEW(orte_filem_base_file_set_t); - - local_dir = strdup(vpid_snapshot->super.crs_snapshot_super.local_location); - f_set->local_target = opal_dirname(local_dir); - f_set->remote_target = strdup(vpid_snapshot->super.crs_snapshot_super.remote_location); - f_set->target_flag = ORTE_FILEM_TYPE_DIR; - - opal_list_append(&(filem_request->file_sets), &(f_set->super) ); - - /* - * Start the transfer - */ - opal_list_append(&all_filem_requests, &(filem_request->super)); - if(ORTE_SUCCESS != (ret = orte_filem.get_nb(filem_request) ) ) { - opal_list_remove_item(&all_filem_requests, &(filem_request->super)); - OBJ_RELEASE(filem_request); - filem_request = NULL; - - exit_status = ret; - /* Keep getting all the other files, eventually return an error */ - continue; - } - } - - /* - * Wait for all the transfers to complete - */ - OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, - "Global) Getting remote directory: Waiting...\n")); - if(ORTE_SUCCESS != (ret = orte_filem.wait_all(&all_filem_requests) ) ) { - exit_status = ret; + /* If one of the checkpoints failed, we need to return an error */ + if( ORTE_SNAPC_CKPT_STATE_ERROR == app_snapshot->state ) { + exit_status = ORTE_ERROR; + ORTE_ERROR_LOG(ORTE_ERROR); goto cleanup; } /* - * Now that the files have been brought local, remove the remote copy + * Construct the file set */ - for(item = opal_list_get_first( &all_filem_requests); - item != opal_list_get_end( &all_filem_requests); - item = opal_list_get_next( item) ) { - filem_request = (orte_filem_base_request_t *) item; - if(ORTE_SUCCESS != (ret = orte_filem.rm_nb(filem_request)) ) { - exit_status = ret; - /* Keep removing, eventually return an error */ - continue; - } + f_set = OBJ_NEW(orte_filem_base_file_set_t); + + f_set->local_target = strdup(orte_snapc_base_global_snapshot_loc); + if( orte_snapc_base_is_global_dir_shared ) { + f_set->local_hint = ORTE_FILEM_HINT_SHARED; } - /* - * Update all of the metadata - */ - OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, - "Global) Getting remote directory: Updating Metadata...\n")); - for(item = opal_list_get_first(&global_snapshot.snapshots); - item != opal_list_get_end(&global_snapshot.snapshots); - item = opal_list_get_next(item) ) { - orte_snapc_full_global_snapshot_t *vpid_snapshot; - vpid_snapshot = (orte_snapc_full_global_snapshot_t*)item; + asprintf(&(f_set->remote_target), "%s/%s", app_snapshot->remote_location, app_snapshot->reference_name); - if(ORTE_SUCCESS != (ret = orte_snapc_base_add_vpid_metadata(&vpid_snapshot->super.process_name, - global_snapshot.reference_name, - vpid_snapshot->super.crs_snapshot_super.reference_name, - vpid_snapshot->super.crs_snapshot_super.local_location))) { - exit_status = ret; - goto cleanup; - } - } + f_set->target_flag = ORTE_FILEM_TYPE_DIR; - /* - * Wait for all the removes to complete - */ - OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, - "Global) Waiting for removes to complete...\n")); - if(ORTE_SUCCESS != (ret = orte_filem.wait_all(&all_filem_requests) ) ) { - exit_status = ret; - goto cleanup; - } + opal_list_append(&(orted_snapshot->filem_request->file_sets), &(f_set->super) ); + OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, + "Global) ... FileM (%s) [%s] --> [%s]", + ORTE_NAME_PRINT(&orted_snapshot->process_name), f_set->remote_target, f_set->local_target)); } /* - * Now that we gathered all the files, finish off the metadata file + * Start the transfer */ - orte_snapc_base_finalize_metadata(global_snapshot.reference_name); - + if(ORTE_SUCCESS != (ret = orte_filem.get_nb(orted_snapshot->filem_request) ) ) { + OBJ_RELEASE(orted_snapshot->filem_request); + orted_snapshot->filem_request = NULL; + exit_status = ret; + ORTE_ERROR_LOG(ret); + goto cleanup; + } + cleanup: - if(NULL != local_dir) - free(local_dir); + return exit_status; +} +static int snapc_full_wait_filem(void) +{ + int ret, exit_status = ORTE_SUCCESS; + opal_list_t all_filem_requests; + orte_snapc_full_orted_snapshot_t *orted_snapshot = NULL; + opal_list_item_t* item = NULL; + + OBJ_CONSTRUCT(&all_filem_requests, opal_list_t); + + /* + * Construct a list for wait_all() + */ + for(item = opal_list_get_first(&(global_snapshot.local_snapshots)); + item != opal_list_get_end(&(global_snapshot.local_snapshots)); + item = opal_list_get_next(item) ) { + orted_snapshot = (orte_snapc_full_orted_snapshot_t*)item; + + if( NULL != orted_snapshot->filem_request ) { + opal_list_append(&all_filem_requests, &(orted_snapshot->filem_request->super)); + } + } + + OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, + "Global) FileM -- Enter wait_all() Get")); + + /* + * Wait for all transfers to complete + */ + if(ORTE_SUCCESS != (ret = orte_filem.wait_all(&all_filem_requests) ) ) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + + OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, + "Global) FileM -- Setup removal()")); + + /* + * Start removal of old data + */ + for(item = opal_list_get_first(&(global_snapshot.local_snapshots)); + item != opal_list_get_end(&(global_snapshot.local_snapshots)); + item = opal_list_get_next(item) ) { + orted_snapshot = (orte_snapc_full_orted_snapshot_t*)item; + + if( NULL != orted_snapshot->filem_request ) { + if(ORTE_SUCCESS != (ret = orte_filem.rm_nb(orted_snapshot->filem_request)) ) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + } + } + + OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, + "Global) FileM -- Enter wait_all() Remove")); + + /* + * Wait for all removals to complete + */ + if(ORTE_SUCCESS != (ret = orte_filem.wait_all(&all_filem_requests) ) ) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + + cleanup: + for(item = opal_list_get_first(&(global_snapshot.local_snapshots)); + item != opal_list_get_end(&(global_snapshot.local_snapshots)); + item = opal_list_get_next(item) ) { + orted_snapshot = (orte_snapc_full_orted_snapshot_t*)item; + + if( NULL != orted_snapshot->filem_request ) { + /*OBJ_RELEASE(orted_snapshot->filem_request);*/ + orted_snapshot->filem_request = NULL; + } + } + + /* JJH I don't think this is needed (??) */ while (NULL != (item = opal_list_remove_first(&all_filem_requests) ) ) { OBJ_RELEASE(item); } OBJ_DESTRUCT(&all_filem_requests); - return exit_status; } -static int snapc_full_get_vpid_range( orte_jobid_t jobid, - orte_vpid_t *vpid_start, - orte_vpid_t *vpid_range) +static int snapc_full_global_get_min_state(void) { - int exit_status = ORTE_SUCCESS; - orte_job_t *jdata; - - /* look up job data object */ - if (NULL == (jdata = orte_get_job_data_object(jobid))) { - exit_status = ORTE_ERR_NOT_FOUND; - goto cleanup; + int min_state = ORTE_SNAPC_CKPT_MAX; + orte_snapc_full_orted_snapshot_t *orted_snapshot = NULL; + opal_list_item_t* item = NULL; + char * state_str_a = NULL; + char * state_str_b = NULL; + + for(item = opal_list_get_first(&(global_snapshot.local_snapshots)); + item != opal_list_get_end(&(global_snapshot.local_snapshots)); + item = opal_list_get_next(item) ) { + orted_snapshot = (orte_snapc_full_orted_snapshot_t*)item; + + if( NULL != state_str_a ) { + free(state_str_a); + state_str_a = NULL; + } + if( NULL != state_str_b ) { + free(state_str_b); + state_str_b = NULL; + } + + orte_snapc_ckpt_state_str(&state_str_a, orted_snapshot->state); + orte_snapc_ckpt_state_str(&state_str_b, min_state); + + OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, + "Global) ... Checking [%d %s] vs [%d %s]", + (int)orted_snapshot->state, state_str_a, + min_state, state_str_b )); + + if( (int)min_state > (int)orted_snapshot->state ) { + min_state = orted_snapshot->state; + + OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, + "Global) ... Update --> Min State [%d %s]", + (int)min_state, state_str_a )); + } } - *vpid_start = 0; - *vpid_range = jdata->num_procs; - - cleanup: - return exit_status; + if( NULL != state_str_b ) { + free(state_str_b); + state_str_b = NULL; + } + orte_snapc_ckpt_state_str(&state_str_b, min_state); + OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, + "Global) ... Min State [%d %s]", + (int)min_state, state_str_b )); + + if( NULL != state_str_a ) { + free(state_str_a); + state_str_a = NULL; + } + if( NULL != state_str_b ) { + free(state_str_b); + state_str_b = NULL; + } + + return min_state; } +static double get_time(void) { + double wtime; + +#if OPAL_TIMER_USEC_NATIVE + wtime = (double)opal_timer_base_get_usec() / 1000000.0; +#else + struct timeval tv; + gettimeofday(&tv, NULL); + wtime = tv.tv_sec; + wtime += (double)tv.tv_usec / 1000000.0; +#endif + + return wtime; +} + +static void print_time(void) { + double t_local, t_transfer, t_cleanup, t_total; + + if(!orte_snapc_full_timing_enabled) { + return; + } + + t_total = timer_end - timer_start; + + t_local = timer_local_done - timer_start; + + if(orte_snapc_base_store_in_place || orte_snapc_full_skip_filem) { + t_transfer = 0; + t_cleanup = timer_end - timer_local_done; + } else { + t_transfer = timer_xfer_done - timer_local_done; + t_cleanup = timer_end - timer_xfer_done; + } + + opal_output(0, "Checkpoint Time:"); + opal_output(0, "\tLocal : %10.2f s\n", t_local); + opal_output(0, "\tTransfer: %10.2f s\n", t_transfer); + opal_output(0, "\tCleanup : %10.2f s\n", t_cleanup); + opal_output(0, "\tTotal : %10.2f s\n", t_total); + + return; +} diff --git a/orte/mca/snapc/full/snapc_full_local.c b/orte/mca/snapc/full/snapc_full_local.c index 85e5ce4d8c..cf8e9930f5 100644 --- a/orte/mca/snapc/full/snapc_full_local.c +++ b/orte/mca/snapc/full/snapc_full_local.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2007 The Trustees of Indiana University. + * Copyright (c) 2004-2009 The Trustees of Indiana University. * All rights reserved. * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. * All rights reserved. @@ -68,63 +68,79 @@ /************************************ * Locally Global vars & functions :) ************************************/ -static bool snapc_local_recv_issued = false; -static bool snapc_local_proc_recv_issued = false; +static orte_jobid_t current_local_jobid = 0; +static opal_list_t snapc_local_vpids; +static int current_job_ckpt_state = ORTE_SNAPC_CKPT_STATE_NONE; -static int snapc_full_local_start_listener(void); -static int snapc_full_local_stop_listener(void); -static int snapc_full_local_start_proc_listener(void); -static int snapc_full_local_stop_proc_listener(void); -static void snapc_full_local_cmd_recv(int status, +static bool snapc_local_hnp_recv_issued = false; +static int snapc_full_local_start_hnp_listener(void); +static int snapc_full_local_stop_hnp_listener(void); +static void snapc_full_local_hnp_cmd_recv(int status, + orte_process_name_t* sender, + opal_buffer_t* buffer, + orte_rml_tag_t tag, + void* cbdata); +static void snapc_full_process_hnp_request_cmd(int fd, short event, void *cbdata); + +static bool snapc_local_app_recv_issued = false; +static int snapc_full_local_start_app_listener(void); +static int snapc_full_local_stop_app_listener(void); +static void snapc_full_local_app_cmd_recv(int status, orte_process_name_t* sender, opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata); -static void snapc_full_local_process_job_update_cmd(orte_process_name_t* sender, - opal_buffer_t* buffer); -static void snapc_full_local_process_app_update_cmd(orte_process_name_t* sender, - opal_buffer_t* buffer); -static int snapc_full_local_send_vpid_assoc(void); +static void snapc_full_local_process_app_update_cmd(int fd, short event, void *cbdata); -static int orte_snapc_full_local_set_vpid_ckpt_info( orte_process_name_t proc, - size_t ckpt_state, - char *ckpt_snapshot_ref, - char *ckpt_snapshot_loc); - -static orte_snapc_full_local_snapshot_t *find_vpid_snapshot(orte_process_name_t *name ); +static orte_snapc_full_app_snapshot_t *find_vpid_snapshot(orte_process_name_t *name ); static int snapc_full_local_get_vpids(void); -static int snapc_full_local_setup_snapshot_dir(char * snapshot_ref, char * sugg_dir, char **actual_dir); -static int snapc_full_establish_dir(void); +static void snapc_full_local_process_job_update_cmd(orte_process_name_t* sender, + opal_buffer_t* buffer, + bool quick); -static int snapc_full_local_start_checkpoint_all(size_t ckpt_state); -static int snapc_full_local_start_ckpt_open_comm(orte_snapc_full_local_snapshot_t *vpid_snapshot); -static int snapc_full_local_start_ckpt_handshake_term(orte_snapc_full_local_snapshot_t *vpid_snapshot, bool term); -static int snapc_full_local_start_ckpt_handshake(orte_snapc_full_local_snapshot_t *vpid_snapshot); -static int snapc_full_local_end_ckpt_handshake(orte_snapc_full_local_snapshot_t *vpid_snapshot); +int local_coord_job_state_update(orte_jobid_t jobid, + int job_ckpt_state, + char **job_ckpt_ref, + char **job_ckpt_loc); + +static int local_coord_job_state_update_finished_local(void); + +static int snapc_full_local_setup_snapshot_dir(char * snapshot_ref, char * sugg_dir, char **actual_dir); +#if 0 +static int snapc_full_establish_dir(void); +#endif +static int snapc_full_get_min_state(void); + +static int snapc_full_local_update_coord(int state, bool quick); + +static int snapc_full_local_start_checkpoint_all(int ckpt_state); +static int snapc_full_local_start_ckpt_open_comm(orte_snapc_full_app_snapshot_t *vpid_snapshot); +static int snapc_full_local_start_ckpt_handshake_term(orte_snapc_full_app_snapshot_t *vpid_snapshot, bool term); +static int snapc_full_local_start_ckpt_handshake(orte_snapc_full_app_snapshot_t *vpid_snapshot); +static int snapc_full_local_end_ckpt_handshake(orte_snapc_full_app_snapshot_t *vpid_snapshot); static void snapc_full_local_comm_read_event(int fd, short flags, void *arg); -static opal_list_t snapc_local_vpids; -static orte_jobid_t snapc_local_jobid; -static size_t cur_job_ckpt_state = ORTE_SNAPC_CKPT_STATE_NONE; /************************ * Function Definitions ************************/ int local_coord_init( void ) { - snapc_local_jobid = -1; + current_local_jobid = -1; + current_job_ckpt_state = ORTE_SNAPC_CKPT_STATE_NONE; return ORTE_SUCCESS; } int local_coord_finalize( void ) { - if( snapc_local_jobid >= 0 ) { - return local_coord_release_job(snapc_local_jobid); + if( current_local_jobid >= 0 ) { + return local_coord_release_job(current_local_jobid); } + current_job_ckpt_state = ORTE_SNAPC_CKPT_STATE_NONE; return ORTE_SUCCESS; } @@ -135,17 +151,17 @@ int local_coord_setup_job(orte_jobid_t jobid) /* * Set the jobid that we are responsible for */ - snapc_local_jobid = jobid; - OBJ_CONSTRUCT(&snapc_local_vpids, opal_list_t); - + current_local_jobid = jobid; OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, - "Local) Monitor local jobid %s\n", - ORTE_JOBID_PRINT(snapc_local_jobid))); + "Local) Setting up jobid %s\n", + ORTE_JOBID_PRINT(current_local_jobid))); /* * Get the list of vpid's that we care about */ + OBJ_CONSTRUCT(&snapc_local_vpids, opal_list_t); if( ORTE_SUCCESS != (ret = snapc_full_local_get_vpids()) ) { + ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } @@ -154,17 +170,21 @@ int local_coord_setup_job(orte_jobid_t jobid) * Wait for the snapshot directory to be established before registering * the callbacks since they use the same tag. */ +#if 0 if(orte_snapc_base_establish_global_snapshot_dir) { if( ORTE_SUCCESS != (ret = snapc_full_establish_dir() ) ) { + ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } } +#endif /* * Setup Global Coordinator listener */ - if( ORTE_SUCCESS != (ret = snapc_full_local_start_listener() ) ) { + if( ORTE_SUCCESS != (ret = snapc_full_local_start_hnp_listener() ) ) { + ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } @@ -172,14 +192,16 @@ int local_coord_setup_job(orte_jobid_t jobid) /* * Setup Global Coordinator listener for Application updates */ - if( ORTE_SUCCESS != (ret = snapc_full_local_start_proc_listener() ) ) { + if( ORTE_SUCCESS != (ret = snapc_full_local_start_app_listener() ) ) { + ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, - "Local) Finished setting up job\n")); + "Local) Finished setup of job %s", + ORTE_JOBID_PRINT(current_local_jobid) )); cleanup: return exit_status; @@ -201,8 +223,8 @@ int local_coord_release_job(orte_jobid_t jobid) for(item = opal_list_get_first(&snapc_local_vpids); item != opal_list_get_end(&snapc_local_vpids); item = opal_list_get_next(item) ) { - orte_snapc_full_local_snapshot_t *vpid_snapshot; - vpid_snapshot = (orte_snapc_full_local_snapshot_t*)item; + orte_snapc_full_app_snapshot_t *vpid_snapshot; + vpid_snapshot = (orte_snapc_full_app_snapshot_t*)item; if(ORTE_SNAPC_CKPT_STATE_NONE != vpid_snapshot->super.state && ORTE_SNAPC_CKPT_STATE_ERROR != vpid_snapshot->super.state && @@ -220,11 +242,13 @@ int local_coord_release_job(orte_jobid_t jobid) /* * Stop Global Coordinator listeners */ - if( ORTE_SUCCESS != (ret = snapc_full_local_stop_proc_listener() ) ) { + if( ORTE_SUCCESS != (ret = snapc_full_local_stop_app_listener() ) ) { + ORTE_ERROR_LOG(ret); exit_status = ret; } - if( ORTE_SUCCESS != (ret = snapc_full_local_stop_listener() ) ) { + if( ORTE_SUCCESS != (ret = snapc_full_local_stop_hnp_listener() ) ) { + ORTE_ERROR_LOG(ret); exit_status = ret; } @@ -234,10 +258,13 @@ int local_coord_release_job(orte_jobid_t jobid) /****************** * Local functions ******************/ -static int snapc_full_local_start_listener(void) + +/****************** + * Setup Listeners + ******************/ +static int snapc_full_local_start_hnp_listener(void) { - int exit_status = ORTE_SUCCESS; - int rc; + int ret, exit_status = ORTE_SUCCESS; /* * Global Coordinator: Do not register a Local listener @@ -246,35 +273,35 @@ static int snapc_full_local_start_listener(void) return ORTE_SUCCESS; } - if (snapc_local_recv_issued ) { + if (snapc_local_hnp_recv_issued ) { return ORTE_SUCCESS; } OPAL_OUTPUT_VERBOSE((5, mca_snapc_full_component.super.output_handle, - "Local) Receive: Start command recv")); + "Local) Startup Coordinator Channel")); /* * Coordinator command listener */ - if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_SNAPC_FULL, - ORTE_RML_PERSISTENT, - snapc_full_local_cmd_recv, - NULL))) { - ORTE_ERROR_LOG(rc); - exit_status = rc; + if (ORTE_SUCCESS != (ret = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, + ORTE_RML_TAG_SNAPC_FULL, + ORTE_RML_PERSISTENT, + snapc_full_local_hnp_cmd_recv, + NULL))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; goto cleanup; } - snapc_local_recv_issued = true; + snapc_local_hnp_recv_issued = true; cleanup: return exit_status; } -static int snapc_full_local_stop_listener(void) +static int snapc_full_local_stop_hnp_listener(void) { - int rc, exit_status = ORTE_SUCCESS; + int ret, exit_status = ORTE_SUCCESS; /* * Global Coordinator: Does not register a Local listener @@ -283,146 +310,255 @@ static int snapc_full_local_stop_listener(void) return ORTE_SUCCESS; } - if (!snapc_local_recv_issued ) { + if (!snapc_local_hnp_recv_issued ) { return ORTE_SUCCESS; } OPAL_OUTPUT_VERBOSE((5, mca_snapc_full_component.super.output_handle, - "Local) Receive stop command recv")); + "Local) Shutdown Coordinator Channel")); - if (ORTE_SUCCESS != (rc = orte_rml.recv_cancel(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_SNAPC_FULL))) { - ORTE_ERROR_LOG(rc); - exit_status = rc; + if (ORTE_SUCCESS != (ret = orte_rml.recv_cancel(ORTE_NAME_WILDCARD, + ORTE_RML_TAG_SNAPC_FULL))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; goto cleanup; } - snapc_local_recv_issued = false; + snapc_local_hnp_recv_issued = false; cleanup: return exit_status; } -static int snapc_full_local_start_proc_listener(void) +static int snapc_full_local_start_app_listener(void) { - int exit_status = ORTE_SUCCESS; - int rc; + int ret, exit_status = ORTE_SUCCESS; - if (snapc_local_proc_recv_issued) { + if (snapc_local_app_recv_issued) { return ORTE_SUCCESS; } OPAL_OUTPUT_VERBOSE((5, mca_snapc_full_component.super.output_handle, - "Local) Receive (Command line): Start command recv")); + "Local) Startup Application State Channel")); /* * Coordinator command listener */ - if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_SNAPC, - ORTE_RML_PERSISTENT, - snapc_full_local_cmd_recv, - NULL))) { - ORTE_ERROR_LOG(rc); - exit_status = rc; + if (ORTE_SUCCESS != (ret = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, + ORTE_RML_TAG_SNAPC, + ORTE_RML_PERSISTENT, + snapc_full_local_app_cmd_recv, + NULL))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; goto cleanup; } - snapc_local_proc_recv_issued = true; + snapc_local_app_recv_issued = true; cleanup: return exit_status; } -static int snapc_full_local_stop_proc_listener(void) +static int snapc_full_local_stop_app_listener(void) { - int exit_status = ORTE_SUCCESS; - int rc; + int ret, exit_status = ORTE_SUCCESS; - if (!snapc_local_proc_recv_issued ) { + if (!snapc_local_app_recv_issued ) { return ORTE_SUCCESS; } OPAL_OUTPUT_VERBOSE((5, mca_snapc_full_component.super.output_handle, - "Local) Receive (Command Line) stop command")); + "Local) Shutdown Application State Channel")); - if (ORTE_SUCCESS != (rc = orte_rml.recv_cancel(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_SNAPC))) { - ORTE_ERROR_LOG(rc); - exit_status = rc; + if (ORTE_SUCCESS != (ret = orte_rml.recv_cancel(ORTE_NAME_WILDCARD, + ORTE_RML_TAG_SNAPC))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; goto cleanup; } - snapc_local_proc_recv_issued = false; + snapc_local_app_recv_issued = false; cleanup: return exit_status; } -void snapc_full_local_cmd_recv(int status, - orte_process_name_t* sender, - opal_buffer_t* buffer, - orte_rml_tag_t tag, - void* cbdata) +/****************** + * Listener Callbacks + ******************/ +void snapc_full_local_app_cmd_recv(int status, + orte_process_name_t* sender, + opal_buffer_t* buffer, + orte_rml_tag_t tag, + void* cbdata) { - orte_snapc_full_cmd_flag_t command; - orte_std_cntr_t count; - int rc; - - OPAL_OUTPUT_VERBOSE((5, mca_snapc_full_component.super.output_handle, - "Local) Receive a command message.")); + if( ORTE_RML_TAG_SNAPC != tag ) { + opal_output(mca_snapc_full_component.super.output_handle, + "Local) Error: Unknown tag: Received a command message from %s (tag = %d).", + ORTE_NAME_PRINT(sender), tag); + ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); + return; + } /* * This is the local process contacting us with its updated pid information */ - if( ORTE_RML_TAG_SNAPC == tag ) { - OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, - "Local) Application: Update pid operation")); - snapc_full_local_process_app_update_cmd(sender, buffer); + OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, + "Local) Application: Update pid operation.")); + + /* + * Do not handle here, use the event engine to queue this until we are out + * of the RML + */ + ORTE_MESSAGE_EVENT(sender, buffer, tag, snapc_full_local_process_app_update_cmd); + + return; +} + +void snapc_full_local_hnp_cmd_recv(int status, + orte_process_name_t* sender, + opal_buffer_t* buffer, + orte_rml_tag_t tag, + void* cbdata) +{ + if( ORTE_RML_TAG_SNAPC_FULL != tag ) { + opal_output(mca_snapc_full_component.super.output_handle, + "Local) Error: Unknown tag: Received a command message from %s (tag = %d).", + ORTE_NAME_PRINT(sender), tag); + ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return; } /* - * Otherwise this is an inter-coordinator command (usually updating state info). + * This is a Global Coordinator message. + */ + OPAL_OUTPUT_VERBOSE((5, mca_snapc_full_component.super.output_handle, + "Local) Receive a command message.")); + + /* + * Do not process this right away - we need to get out of the recv before + * we process the message to avoid performing the rest of the job while + * inside this receive! Instead, setup an event so that the message gets processed + * as soon as we leave the recv. + * + * The macro makes a copy of the buffer, which we release above - the incoming + * buffer, however, is NOT released here, although its payload IS transferred + * to the message buffer for later processing + * + */ + ORTE_MESSAGE_EVENT(sender, buffer, tag, snapc_full_process_hnp_request_cmd); + + return; +} + +/****************** + * Listener Handlers + ******************/ +static void snapc_full_local_process_app_update_cmd(int fd, short event, void *cbdata) +{ + int ret; + orte_message_event_t *mev = (orte_message_event_t*)cbdata; + orte_snapc_full_app_snapshot_t *vpid_snapshot = NULL; + orte_snapc_cmd_flag_t command; + orte_process_name_t proc; + pid_t proc_pid = 0; + orte_std_cntr_t count; + + /* + * Verify the command */ count = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &command, &count, ORTE_SNAPC_FULL_CMD))) { - ORTE_ERROR_LOG(rc); - return; + if (ORTE_SUCCESS != (ret = opal_dss.unpack(mev->buffer, &command, &count, ORTE_SNAPC_CMD))) { + ORTE_ERROR_LOG(ret); + goto cleanup; + } + + if( ORTE_SNAPC_LOCAL_UPDATE_CMD != command ) { + OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, + "Local) Warning: Expected an application command (%d) but received (%d)\n", + ORTE_SNAPC_LOCAL_UPDATE_CMD, command)); + goto cleanup; + } + + /* + * Unpack the data + * - process name + * - PID + */ + count = 1; + if (ORTE_SUCCESS != (ret = opal_dss.unpack(mev->buffer, &proc, &count, ORTE_NAME))) { + ORTE_ERROR_LOG(ret); + goto cleanup; + } + count = 1; + if (ORTE_SUCCESS != (ret = opal_dss.unpack(mev->buffer, &proc_pid, &count, OPAL_PID))) { + ORTE_ERROR_LOG(ret); + goto cleanup; + } + + if( NULL == (vpid_snapshot = find_vpid_snapshot(&proc)) ) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + goto cleanup; + } + + /* JJH: Maybe we should save the old and the newly restarted pid? */ + vpid_snapshot->process_pid = proc_pid; + + cleanup: + /* release the message event */ + OBJ_RELEASE(mev); + return; +} + +static void snapc_full_process_hnp_request_cmd(int fd, short event, void *cbdata) +{ + int ret; + orte_message_event_t *mev = (orte_message_event_t*)cbdata; + orte_process_name_t *sender = NULL; + orte_snapc_full_cmd_flag_t command; + orte_std_cntr_t count; + + sender = &(mev->sender); + + count = 1; + if (ORTE_SUCCESS != (ret = opal_dss.unpack(mev->buffer, &command, &count, ORTE_SNAPC_FULL_CMD))) { + ORTE_ERROR_LOG(ret); + goto cleanup; } switch (command) { + case ORTE_SNAPC_FULL_UPDATE_JOB_STATE_QUICK_CMD: + OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, + "Local) Command: Update Job state command (quick)")); + + snapc_full_local_process_job_update_cmd(sender, mev->buffer, true); + break; + case ORTE_SNAPC_FULL_UPDATE_JOB_STATE_CMD: OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, "Local) Command: Update Job state command")); - snapc_full_local_process_job_update_cmd(sender, buffer); - break; - - case ORTE_SNAPC_FULL_UPDATE_PROC_STATE_CMD: - OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, - "Local) Command: Update Job state command")); - /* Nothing to do */ - break; - - case ORTE_SNAPC_FULL_VPID_ASSOC_CMD: - OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, - "Local) Command: Update process/orted associations")); - - /* Nothing to do */ + snapc_full_local_process_job_update_cmd(sender, mev->buffer, false); break; default: ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS); } + + cleanup: + /* release the message event */ + OBJ_RELEASE(mev); + return; } static void snapc_full_local_process_job_update_cmd(orte_process_name_t* sender, - opal_buffer_t* buffer) + opal_buffer_t* buffer, + bool quick) { int ret, exit_status = ORTE_SUCCESS; orte_jobid_t jobid; - size_t job_ckpt_state; + int job_ckpt_state; char *job_ckpt_ref = NULL; char *job_ckpt_loc = NULL; orte_std_cntr_t count; @@ -440,23 +576,28 @@ static void snapc_full_local_process_job_update_cmd(orte_process_name_t* sender, exit_status = ret; goto cleanup; } + count = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &job_ckpt_state, &count, OPAL_SIZE))) { + if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &job_ckpt_state, &count, OPAL_INT))) { ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } - count = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &job_ckpt_ref, &count, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - count = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &job_ckpt_loc, &count, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; + + if( !quick ) { + count = 1; + if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &job_ckpt_ref, &count, OPAL_STRING))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + + count = 1; + if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &job_ckpt_loc, &count, OPAL_STRING))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } } if( ORTE_SUCCESS != (ret = local_coord_job_state_update(jobid, job_ckpt_state, &job_ckpt_ref, &job_ckpt_loc)) ) { @@ -464,29 +605,42 @@ static void snapc_full_local_process_job_update_cmd(orte_process_name_t* sender, exit_status = ret; goto cleanup; } - + cleanup: return; } + int local_coord_job_state_update(orte_jobid_t jobid, - size_t job_ckpt_state, + int job_ckpt_state, char **job_ckpt_ref, char **job_ckpt_loc) { int ret, exit_status = ORTE_SUCCESS; - orte_snapc_full_local_snapshot_t *vpid_snapshot; + orte_snapc_full_app_snapshot_t *vpid_snapshot = NULL; opal_list_item_t* item = NULL; + char * state_str = NULL; OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, - "Local) Job State: %d\n", - (int)job_ckpt_state)); + "Local) Job %s: Changed to state to:\n", + ORTE_JOBID_PRINT(jobid))); + orte_snapc_ckpt_state_str(&state_str, job_ckpt_state); OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, - "Local) Snapshot Ref: (%s)\n", - *job_ckpt_ref)); - OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, - "Local) Remote Location: (%s)\n", - *job_ckpt_loc)); + "Local) Job State: %d (%s)\n", + (int)job_ckpt_state, state_str )); + free(state_str); + state_str = NULL; + + if( NULL != *job_ckpt_ref ) { + OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, + "Local) Snapshot Ref: (%s)\n", + *job_ckpt_ref)); + } + if( NULL != *job_ckpt_loc ) { + OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, + "Local) Remote Location: (%s)\n", + *job_ckpt_loc)); + } /* * Update the vpid structure if we need to. @@ -494,10 +648,13 @@ int local_coord_job_state_update(orte_jobid_t jobid, * for the application. */ if( ORTE_SUCCESS != (ret = snapc_full_local_get_vpids() ) ) { + ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } + current_job_ckpt_state = job_ckpt_state; + /* * If we have been asked to checkpoint do so */ @@ -509,534 +666,165 @@ int local_coord_job_state_update(orte_jobid_t jobid, for(item = opal_list_get_first(&snapc_local_vpids); item != opal_list_get_end(&snapc_local_vpids); item = opal_list_get_next(item) ) { - vpid_snapshot = (orte_snapc_full_local_snapshot_t*)item; + vpid_snapshot = (orte_snapc_full_app_snapshot_t*)item; + + vpid_snapshot->super.state = job_ckpt_state; if( ORTE_SNAPC_CKPT_STATE_PENDING_TERM == job_ckpt_state ) { - vpid_snapshot->super.term = true; + vpid_snapshot->term = true; } else { - vpid_snapshot->super.term = false; + vpid_snapshot->term = false; } /* * Update it's local information */ - if( NULL != vpid_snapshot->super.crs_snapshot_super.reference_name ) { - free(vpid_snapshot->super.crs_snapshot_super.reference_name); - vpid_snapshot->super.crs_snapshot_super.reference_name = NULL; + if( NULL != vpid_snapshot->super.reference_name ) { + free(vpid_snapshot->super.reference_name); + vpid_snapshot->super.reference_name = NULL; } - vpid_snapshot->super.crs_snapshot_super.reference_name = opal_crs_base_unique_snapshot_name(vpid_snapshot->super.process_name.vpid); + vpid_snapshot->super.reference_name = opal_crs_base_unique_snapshot_name(vpid_snapshot->super.process_name.vpid); /* global_directory/local_snapshot_vpid/... */ - if( NULL != vpid_snapshot->super.crs_snapshot_super.local_location ) { - free(vpid_snapshot->super.crs_snapshot_super.local_location); - vpid_snapshot->super.crs_snapshot_super.local_location = NULL; + if( NULL != vpid_snapshot->super.local_location ) { + free(vpid_snapshot->super.local_location); + vpid_snapshot->super.local_location = NULL; } if( orte_snapc_base_store_in_place ) { - asprintf(&(vpid_snapshot->super.crs_snapshot_super.local_location), + asprintf(&(vpid_snapshot->super.local_location), "%s/%s", *job_ckpt_loc, - vpid_snapshot->super.crs_snapshot_super.reference_name); + vpid_snapshot->super.reference_name); } else { /* Use the OPAL CRS base snapshot dir * JJH: Do we want to do something more interesting? */ - asprintf(&(vpid_snapshot->super.crs_snapshot_super.local_location), + asprintf(&(vpid_snapshot->super.local_location), "%s/%s", opal_crs_base_snapshot_dir, - vpid_snapshot->super.crs_snapshot_super.reference_name); + vpid_snapshot->super.reference_name); } - if( NULL != vpid_snapshot->super.crs_snapshot_super.remote_location ) { - free(vpid_snapshot->super.crs_snapshot_super.remote_location); - vpid_snapshot->super.crs_snapshot_super.remote_location = NULL; + if( NULL != vpid_snapshot->super.remote_location ) { + free(vpid_snapshot->super.remote_location); + vpid_snapshot->super.remote_location = NULL; } - asprintf(&(vpid_snapshot->super.crs_snapshot_super.remote_location), + asprintf(&(vpid_snapshot->super.remote_location), "%s/%s", *job_ckpt_loc, - vpid_snapshot->super.crs_snapshot_super.reference_name); + vpid_snapshot->super.reference_name); - /* - * Update the Global Coordinator - */ - if( ORTE_SUCCESS != (ret = orte_snapc_full_local_set_vpid_ckpt_info( vpid_snapshot->super.process_name, - job_ckpt_state, - vpid_snapshot->super.crs_snapshot_super.reference_name, - vpid_snapshot->super.crs_snapshot_super.local_location ) ) ) { - exit_status = ret; - goto cleanup; - } } - cur_job_ckpt_state = job_ckpt_state; - /* * Start checkpointing all local processes */ if( ORTE_SUCCESS != (ret = snapc_full_local_start_checkpoint_all(job_ckpt_state) ) ) { - exit_status = ret; - goto cleanup; - } - } - else if( ORTE_SNAPC_CKPT_STATE_FILE_XFER == job_ckpt_state || - ORTE_SNAPC_CKPT_STATE_FINISHED == job_ckpt_state ) { - - if( cur_job_ckpt_state == ORTE_SNAPC_CKPT_STATE_FILE_XFER ) { - OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, - "Local) Already released processes on file transfer. Mark job as finished\n")); - cur_job_ckpt_state = job_ckpt_state; - exit_status = ORTE_SUCCESS; - goto cleanup; - } - - /* - * Release all checkpointed processes now that the checkpoint is complete - * If the request was to checkpoint then terminate this command will tell - * the application to do so upon release. - */ - for(item = opal_list_get_first(&snapc_local_vpids); - item != opal_list_get_end(&snapc_local_vpids); - item = opal_list_get_next(item) ) { - vpid_snapshot = (orte_snapc_full_local_snapshot_t*)item; - - OPAL_OUTPUT_VERBOSE((15, mca_snapc_full_component.super.output_handle, - "Local) Job Ckpt finished tell process %s\n", - ORTE_NAME_PRINT(&vpid_snapshot->super.process_name))); - - if( ORTE_SUCCESS != (ret = snapc_full_local_end_ckpt_handshake(vpid_snapshot) ) ) { - opal_output(mca_snapc_full_component.super.output_handle, - "Local) Error: Unable to finish the handshake with peer %s. %d\n", - ORTE_NAME_PRINT(&vpid_snapshot->super.process_name), ret); - exit_status = OPAL_ERROR; - goto cleanup; - } - } - - cur_job_ckpt_state = job_ckpt_state; - } else { - cur_job_ckpt_state = job_ckpt_state; - } - - cleanup: - return exit_status; -} - -static void snapc_full_local_process_app_update_cmd(orte_process_name_t* sender, - opal_buffer_t* buffer) -{ - int ret, exit_status = ORTE_SUCCESS; - orte_snapc_full_local_snapshot_t *vpid_snapshot = NULL; - orte_snapc_cmd_flag_t command; - orte_process_name_t proc; - pid_t proc_pid = 0; - orte_std_cntr_t count; - - /* - * Verify the command - */ - count = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &command, &count, ORTE_SNAPC_CMD))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - if( ORTE_SNAPC_LOCAL_UPDATE_CMD != command ) { - OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, - "Local) Warning: Expected an application command (%d) but received (%d)\n", - ORTE_SNAPC_LOCAL_UPDATE_CMD, command)); - return; - } - - /* - * Unpack the data - * - process name - * - PID - */ - count = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &proc, &count, ORTE_NAME))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - count = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &proc_pid, &count, OPAL_PID))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - if( NULL == (vpid_snapshot = find_vpid_snapshot(&proc)) ) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - exit_status = ORTE_ERR_NOT_FOUND; - goto cleanup; - } - - /* JJH: Maybe we should save the old and the newly restarted pid? */ - vpid_snapshot->super.process_pid = proc_pid; - - cleanup: - return; -} - -static int snapc_full_local_send_vpid_assoc(void) -{ - int ret, exit_status = ORTE_SUCCESS; - orte_snapc_full_local_snapshot_t *vpid_snapshot = NULL; - opal_list_item_t* item = NULL; - opal_buffer_t buffer; - orte_snapc_full_cmd_flag_t command = ORTE_SNAPC_FULL_VPID_ASSOC_CMD; - size_t num_vpids = 0; - - /* - * Global Coordinator: Operate locally - */ - if( ORTE_SNAPC_GLOBAL_COORD_TYPE == (orte_snapc_coord_type & ORTE_SNAPC_GLOBAL_COORD_TYPE)) { - for(item = opal_list_get_first(&snapc_local_vpids); - item != opal_list_get_end(&snapc_local_vpids); - item = opal_list_get_next(item) ) { - vpid_snapshot = (orte_snapc_full_local_snapshot_t*)item; - global_coord_vpid_assoc_update(*ORTE_PROC_MY_NAME, vpid_snapshot->super.process_name); - } - return ORTE_SUCCESS; - } - - /* - * Local Coordinator: Send Global Coordinator the information - */ - num_vpids = opal_list_get_size(&snapc_local_vpids); - if( num_vpids <= 0 ) { - return exit_status; - } - - OBJ_CONSTRUCT(&buffer, opal_buffer_t); - - if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &command, 1, ORTE_SNAPC_FULL_CMD))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &num_vpids, 1, OPAL_SIZE))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - for(item = opal_list_get_first(&snapc_local_vpids); - item != opal_list_get_end(&snapc_local_vpids); - item = opal_list_get_next(item) ) { - vpid_snapshot = (orte_snapc_full_local_snapshot_t*)item; - - if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &(vpid_snapshot->super.process_name), 1, ORTE_NAME))) { ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } } - - if (0 > (ret = orte_rml.send_buffer(ORTE_PROC_MY_HNP, &buffer, ORTE_RML_TAG_SNAPC_FULL, 0))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - cleanup: - OBJ_DESTRUCT(&buffer); - - return exit_status; -} - -static int snapc_full_establish_dir(void) -{ - int ret, exit_status = ORTE_SUCCESS; - orte_snapc_full_cmd_flag_t command = ORTE_SNAPC_FULL_ESTABLISH_DIR_CMD; - opal_buffer_t buffer; - char * ckpt_snapshot_ref = NULL; - char * ckpt_snapshot_loc = NULL; - orte_std_cntr_t count; - /* - * Global Coordinator: Operate locally + * Release all checkpointed processes now that the checkpoint is complete + * If the request was to checkpoint then terminate this command will tell + * the application to do so upon release. */ - if( ORTE_SNAPC_GLOBAL_COORD_TYPE == (orte_snapc_coord_type & ORTE_SNAPC_GLOBAL_COORD_TYPE)) { - opal_output(0, "Error: Not supported!\n"); - return ORTE_SUCCESS; - } - - OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, - "Local) Contact the HNP for global snapshot directory information to establish\n")); - - /* Notify HNP of request for information */ - OBJ_CONSTRUCT(&buffer, opal_buffer_t); - - if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &command, 1, ORTE_SNAPC_FULL_CMD))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - if (0 > (ret = orte_rml.send_buffer(ORTE_PROC_MY_HNP, &buffer, ORTE_RML_TAG_SNAPC_FULL, 0))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - OBJ_DESTRUCT(&buffer); - - /* Wait for the HNP to release us */ - OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, - "Local) Wait for response to global snapshot directory information request\n")); - OBJ_CONSTRUCT(&buffer, opal_buffer_t); - - if( ORTE_SUCCESS != (ret = orte_rml.recv_buffer(ORTE_PROC_MY_HNP, - &buffer, - ORTE_RML_TAG_SNAPC_FULL, - ORTE_RML_NON_PERSISTENT) ) ) { - OBJ_DESTRUCT(&buffer); - exit_status = ret; - goto cleanup; - } - - /* - * Unpack the data - * - command - * - ckpt_reference - * - ckpt_location - */ - count = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(&buffer, &command, &count, ORTE_SNAPC_FULL_CMD))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - count = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(&buffer, &ckpt_snapshot_ref, &count, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - count = 1; - if (ORTE_SUCCESS != (ret = opal_dss.unpack(&buffer, &ckpt_snapshot_loc, &count, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - if( NULL != ckpt_snapshot_loc && - (0 != strncmp(ckpt_snapshot_loc, "", strlen(""))) ) { - orte_snapc_base_global_snapshot_loc = strdup(ckpt_snapshot_loc); - } - - OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, - "Local) The global snapshot directory has been established at [%s]\n", - orte_snapc_base_global_snapshot_loc)); - - cleanup: - OBJ_DESTRUCT(&buffer); - if( NULL != ckpt_snapshot_ref ) { - free(ckpt_snapshot_ref); - ckpt_snapshot_ref = NULL; - } - if( NULL != ckpt_snapshot_loc ) { - free(ckpt_snapshot_loc); - ckpt_snapshot_loc = NULL; - } - - return exit_status; -} - -static int snapc_full_local_setup_snapshot_dir(char * snapshot_ref, char * sugg_dir, char **actual_dir) -{ - int ret, exit_status = ORTE_SUCCESS; - mode_t my_mode = S_IRWXU; - - /* See if we can use the suggested directory */ - if(OPAL_SUCCESS != (ret = opal_os_dirpath_create(sugg_dir, my_mode) ) ) { - /* Can't use that directory, try the default directory from OPAL CRS */ - *actual_dir = strdup(opal_crs_base_get_snapshot_directory(snapshot_ref)); - - if(OPAL_SUCCESS != (ret = opal_os_dirpath_create(*actual_dir, my_mode) ) ) { - /* Can't use that either, so let's give up */ - exit_status = ret; + else if( ORTE_SNAPC_CKPT_STATE_FINISHED_LOCAL == job_ckpt_state ) { + if( ORTE_SUCCESS != (ret = local_coord_job_state_update_finished_local() ) ) { + ORTE_ERROR_LOG(ORTE_ERROR); + exit_status = ORTE_ERROR; goto cleanup; } } + /* + * Once we get the FINISHED state then the checkpoint is all done, and we + * reset our state to NONE. + */ + else if( ORTE_SNAPC_CKPT_STATE_FINISHED == job_ckpt_state ) { + OPAL_OUTPUT_VERBOSE((15, mca_snapc_full_component.super.output_handle, + "Local) Job Ckpt finished - Cleanup\n")); + + for(item = opal_list_get_first(&snapc_local_vpids); + item != opal_list_get_end(&snapc_local_vpids); + item = opal_list_get_next(item) ) { + vpid_snapshot = (orte_snapc_full_app_snapshot_t*)item; + + /* Forgot to close the pipes to the application + * This can happen if we never received a FINISHED_LOCAL, but only + * a FINISHED + */ + if( vpid_snapshot->comm_pipe_w_fd > 0 ) { + if( ORTE_SUCCESS != (ret = local_coord_job_state_update_finished_local() ) ) { + ORTE_ERROR_LOG(ORTE_ERROR); + exit_status = ORTE_ERROR; + goto cleanup; + } + } + + vpid_snapshot->super.state = ORTE_SNAPC_CKPT_STATE_NONE; + } + } + /* + * States not handled + */ + else if( ORTE_SNAPC_CKPT_STATE_FILE_XFER == job_ckpt_state ) { + ; + } else { - /* We are able to use that directory */ - *actual_dir = strdup(sugg_dir); + ; + } + + cleanup: + if( NULL != state_str ) { + free(state_str); + state_str = NULL; } - cleanup: return exit_status; } -static int snapc_full_local_get_vpids(void) -{ - int ret, exit_status = ORTE_SUCCESS; - opal_list_item_t *item = NULL; - orte_snapc_full_local_snapshot_t *vpid_snapshot = NULL; - orte_odls_child_t *child = NULL; - size_t list_len = 0; - bool new_child = false; - - /* - * If the list is populated, and has updated pid information then - * there is nothing to update. - */ - list_len = opal_list_get_size(&snapc_local_vpids); - if( list_len > 0 ) { - vpid_snapshot = (orte_snapc_full_local_snapshot_t*)opal_list_get_first(&snapc_local_vpids); - if( 0 < vpid_snapshot->super.process_pid ) { - return ORTE_SUCCESS; - } - } - - /* - * Otherwise update or populate the list - */ - for (item = opal_list_get_first(&orte_local_children); - item != opal_list_get_end(&orte_local_children); - item = opal_list_get_next(item)) { - child = (orte_odls_child_t*)item; - - /* if the list is empty or this child is not in the list then add it */ - if( 0 >= list_len || - NULL == (vpid_snapshot = find_vpid_snapshot(child->name)) ) { - vpid_snapshot = OBJ_NEW(orte_snapc_full_local_snapshot_t); - opal_list_append(&snapc_local_vpids, &(vpid_snapshot->super.crs_snapshot_super.super)); - new_child = true; - } - else { - new_child = false; - } - - vpid_snapshot->super.process_pid = child->pid; - vpid_snapshot->super.process_name.jobid = child->name->jobid; - vpid_snapshot->super.process_name.vpid = child->name->vpid; - } - - /* - * Send list to global coordinator - */ - if( new_child ) { - if( ORTE_SUCCESS != (ret = snapc_full_local_send_vpid_assoc() ) ) { - exit_status = ret; - goto cleanup; - } - } - - cleanup: - return exit_status; -} - -static orte_snapc_full_local_snapshot_t *find_vpid_snapshot(orte_process_name_t *name ) +static int local_coord_job_state_update_finished_local(void) { + int ret; + orte_snapc_full_app_snapshot_t *vpid_snapshot = NULL; opal_list_item_t* item = NULL; - orte_snapc_full_local_snapshot_t *vpid_snapshot = NULL; + + OPAL_OUTPUT_VERBOSE((15, mca_snapc_full_component.super.output_handle, + "Local) Job Ckpt finished tell all processes\n")); for(item = opal_list_get_first(&snapc_local_vpids); item != opal_list_get_end(&snapc_local_vpids); item = opal_list_get_next(item) ) { - vpid_snapshot = (orte_snapc_full_local_snapshot_t*)item; + vpid_snapshot = (orte_snapc_full_app_snapshot_t*)item; - if( name->jobid == vpid_snapshot->super.process_name.jobid && - name->vpid == vpid_snapshot->super.process_name.vpid ) { - return vpid_snapshot; + OPAL_OUTPUT_VERBOSE((15, mca_snapc_full_component.super.output_handle, + "Local) Tell process %s\n", + ORTE_NAME_PRINT(&vpid_snapshot->super.process_name))); + + if( ORTE_SUCCESS != (ret = snapc_full_local_end_ckpt_handshake(vpid_snapshot) ) ) { + opal_output(mca_snapc_full_component.super.output_handle, + "Local) Error: Unable to finish the handshake with peer %s. %d\n", + ORTE_NAME_PRINT(&vpid_snapshot->super.process_name), ret); + ORTE_ERROR_LOG(ORTE_ERROR); + return ORTE_ERROR; } } - return NULL; -} - -static int orte_snapc_full_local_set_vpid_ckpt_info( orte_process_name_t proc, - size_t ckpt_state, - char *ckpt_snapshot_ref, - char *ckpt_snapshot_loc) -{ - int ret, exit_status = ORTE_SUCCESS; - opal_buffer_t buffer; - orte_snapc_full_cmd_flag_t command = ORTE_SNAPC_FULL_UPDATE_PROC_STATE_CMD; - - /* - * Global Coordinator: Operate locally - */ - if( ORTE_SNAPC_GLOBAL_COORD_TYPE == (orte_snapc_coord_type & ORTE_SNAPC_GLOBAL_COORD_TYPE)) { - if( ORTE_SUCCESS != (ret = global_coord_vpid_state_update(proc, ckpt_state, &ckpt_snapshot_ref, &ckpt_snapshot_loc)) ) { - ORTE_ERROR_LOG(ret); - return ret; - } - return ORTE_SUCCESS; - } - - OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, - "Local) Process %s: Changed state to:\n", - ORTE_NAME_PRINT(&proc))); - OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, - "Local) State: %d\n", - (int)ckpt_state)); - OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, - "Local) Snapshot Ref: [%s]\n", - ckpt_snapshot_ref)); - OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, - "Local) Location: [%s]\n", - ckpt_snapshot_loc)); - - /* - * Local Coordinator: Send Global Coordinator the information - */ - OBJ_CONSTRUCT(&buffer, opal_buffer_t); - - if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &command, 1, ORTE_SNAPC_FULL_CMD )) ) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &proc, 1, ORTE_NAME))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &ckpt_state, 1, OPAL_SIZE))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &ckpt_snapshot_ref, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &ckpt_snapshot_loc, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - if (0 > (ret = orte_rml.send_buffer(ORTE_PROC_MY_HNP, &buffer, ORTE_RML_TAG_SNAPC_FULL, 0))) { - ORTE_ERROR_LOG(ret); - exit_status = ret; - goto cleanup; - } - - cleanup: - OBJ_DESTRUCT(&buffer); - - return exit_status; + return ORTE_SUCCESS; } /************************ * Start the checkpoint ************************/ -static int snapc_full_local_start_checkpoint_all(size_t ckpt_state) +static int snapc_full_local_start_checkpoint_all(int ckpt_state) { int ret, exit_status = ORTE_SUCCESS; - orte_snapc_full_local_snapshot_t *vpid_snapshot; + orte_snapc_full_app_snapshot_t *vpid_snapshot; opal_list_item_t* item = NULL; char * actual_local_dir = NULL; bool ckpt_n_term = false; @@ -1068,7 +856,7 @@ static int snapc_full_local_start_checkpoint_all(size_t ckpt_state) for(item = opal_list_get_first(&snapc_local_vpids); item != opal_list_get_end(&snapc_local_vpids); item = opal_list_get_next(item) ) { - vpid_snapshot = (orte_snapc_full_local_snapshot_t*)item; + vpid_snapshot = (orte_snapc_full_app_snapshot_t*)item; /* * Set up the snapshot directory per suggestion from @@ -1076,24 +864,26 @@ static int snapc_full_local_start_checkpoint_all(size_t ckpt_state) * If we can't create the suggested local directory, do what we can and update * local directory reference in the GPR */ - if( ORTE_SUCCESS != (ret = snapc_full_local_setup_snapshot_dir(vpid_snapshot->super.crs_snapshot_super.reference_name, - vpid_snapshot->super.crs_snapshot_super.local_location, + if( ORTE_SUCCESS != (ret = snapc_full_local_setup_snapshot_dir(vpid_snapshot->super.reference_name, + vpid_snapshot->super.local_location, &actual_local_dir) ) ) { + ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, "Local) Using directory (%s)\n", - vpid_snapshot->super.crs_snapshot_super.local_location)); + vpid_snapshot->super.local_location)); /* Dummy check */ - if( vpid_snapshot->super.process_pid == 0 ) { + if( vpid_snapshot->process_pid == 0 ) { ret = snapc_full_local_get_vpids(); - if( ORTE_SUCCESS != ret || vpid_snapshot->super.process_pid == 0 ) { + if( ORTE_SUCCESS != ret || vpid_snapshot->process_pid == 0 ) { opal_output( mca_snapc_full_component.super.output_handle, "local) Cannot checkpoint an invalid pid (%d)\n", - vpid_snapshot->super.process_pid); + vpid_snapshot->process_pid); + ORTE_ERROR_LOG(ORTE_ERROR); exit_status = ORTE_ERROR; goto cleanup; } @@ -1106,7 +896,7 @@ static int snapc_full_local_start_checkpoint_all(size_t ckpt_state) for(item = opal_list_get_first(&snapc_local_vpids); item != opal_list_get_end(&snapc_local_vpids); item = opal_list_get_next(item) ) { - vpid_snapshot = (orte_snapc_full_local_snapshot_t*)item; + vpid_snapshot = (orte_snapc_full_app_snapshot_t*)item; /* * Create named pipe references for this process @@ -1117,23 +907,23 @@ static int snapc_full_local_start_checkpoint_all(size_t ckpt_state) free(tmp_pid); tmp_pid = NULL; } - asprintf(&tmp_pid, "%d", vpid_snapshot->super.process_pid); + asprintf(&tmp_pid, "%d", vpid_snapshot->process_pid); asprintf(&(vpid_snapshot->comm_pipe_w), "%s/%s.%s", opal_cr_pipe_dir, OPAL_CR_NAMED_PROG_R, tmp_pid); asprintf(&(vpid_snapshot->comm_pipe_r), "%s/%s.%s", opal_cr_pipe_dir, OPAL_CR_NAMED_PROG_W, tmp_pid); } OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle, "Local) Signal process (%d) with signal %d\n", - (int) vpid_snapshot->super.process_pid, + (int) vpid_snapshot->process_pid, opal_cr_entry_point_signal)); /* * Signal the application */ - if( 0 != (ret = kill(vpid_snapshot->super.process_pid, opal_cr_entry_point_signal) ) ) { + if( 0 != (ret = kill(vpid_snapshot->process_pid, opal_cr_entry_point_signal) ) ) { opal_output(mca_snapc_full_component.super.output_handle, "local) Error: Failed to signal process %d with signal %d. %d\n", - (int) vpid_snapshot->super.process_pid, + (int) vpid_snapshot->process_pid, opal_cr_entry_point_signal, ret); exit_status = ret; @@ -1147,26 +937,26 @@ static int snapc_full_local_start_checkpoint_all(size_t ckpt_state) for(item = opal_list_get_first(&snapc_local_vpids); item != opal_list_get_end(&snapc_local_vpids); item = opal_list_get_next(item) ) { - vpid_snapshot = (orte_snapc_full_local_snapshot_t*)item; + vpid_snapshot = (orte_snapc_full_app_snapshot_t*)item; if( ORTE_SUCCESS != (ret = snapc_full_local_start_ckpt_open_comm(vpid_snapshot) ) ) { opal_output(mca_snapc_full_component.super.output_handle, "local) Error: Unable to initiate the handshake with peer %s. %d\n", ORTE_NAME_PRINT(&vpid_snapshot->super.process_name), ret); - exit_status = OPAL_ERROR; + ORTE_ERROR_LOG(ORTE_ERROR); + exit_status = ORTE_ERROR; goto cleanup; } + vpid_snapshot->super.state = ORTE_SNAPC_CKPT_STATE_RUNNING; + } - /* - * Update so that folks know that we are working on it - */ - if( ORTE_SUCCESS != (ret = orte_snapc_full_local_set_vpid_ckpt_info( vpid_snapshot->super.process_name, - ORTE_SNAPC_CKPT_STATE_RUNNING, - vpid_snapshot->super.crs_snapshot_super.reference_name, - vpid_snapshot->super.crs_snapshot_super.local_location ) ) ) { - exit_status = ret; - goto cleanup; - } + /* + * Progress Update to Global Coordinator + */ + if( ORTE_SUCCESS != (ret = snapc_full_local_update_coord(ORTE_SNAPC_CKPT_STATE_RUNNING, true) ) ) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; } /* @@ -1175,13 +965,14 @@ static int snapc_full_local_start_checkpoint_all(size_t ckpt_state) for(item = opal_list_get_first(&snapc_local_vpids); item != opal_list_get_end(&snapc_local_vpids); item = opal_list_get_next(item) ) { - vpid_snapshot = (orte_snapc_full_local_snapshot_t*)item; + vpid_snapshot = (orte_snapc_full_app_snapshot_t*)item; if( ORTE_SUCCESS != (ret = snapc_full_local_start_ckpt_handshake_term(vpid_snapshot, ckpt_n_term) ) ) { opal_output(mca_snapc_full_component.super.output_handle, "local) Error: Unable to initiate the handshake with peer %s. %d\n", ORTE_NAME_PRINT(&vpid_snapshot->super.process_name), ret); - exit_status = OPAL_ERROR; + ORTE_ERROR_LOG(ORTE_ERROR); + exit_status = ORTE_ERROR; goto cleanup; } } @@ -1192,13 +983,14 @@ static int snapc_full_local_start_checkpoint_all(size_t ckpt_state) for(item = opal_list_get_first(&snapc_local_vpids); item != opal_list_get_end(&snapc_local_vpids); item = opal_list_get_next(item) ) { - vpid_snapshot = (orte_snapc_full_local_snapshot_t*)item; + vpid_snapshot = (orte_snapc_full_app_snapshot_t*)item; if( ORTE_SUCCESS != (ret = snapc_full_local_start_ckpt_handshake(vpid_snapshot) ) ) { opal_output(mca_snapc_full_component.super.output_handle, "local) Error: Unable to initiate the handshake with peer %s. %d\n", ORTE_NAME_PRINT(&vpid_snapshot->super.process_name), ret); - exit_status = OPAL_ERROR; + ORTE_ERROR_LOG(ORTE_ERROR); + exit_status = ORTE_ERROR; goto cleanup; } } @@ -1216,7 +1008,107 @@ static int snapc_full_local_start_checkpoint_all(size_t ckpt_state) return exit_status; } -static int snapc_full_local_start_ckpt_open_comm(orte_snapc_full_local_snapshot_t *vpid_snapshot) +static int snapc_full_local_update_coord(int state, bool quick) +{ + int ret, exit_status = ORTE_SUCCESS; + opal_buffer_t buffer; + orte_snapc_full_cmd_flag_t command; + opal_list_item_t* item = NULL; + orte_snapc_full_app_snapshot_t *vpid_snapshot = NULL; + char *crs_agent = NULL; + size_t sz = 0; + char *loc_location = NULL; + + /* + * Local Coordinator: Send Global Coordinator state information + */ + OBJ_CONSTRUCT(&buffer, opal_buffer_t); + + if( quick ) { + command = ORTE_SNAPC_FULL_UPDATE_ORTED_STATE_QUICK_CMD; + } else { + command = ORTE_SNAPC_FULL_UPDATE_ORTED_STATE_CMD; + } + if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &command, 1, ORTE_SNAPC_FULL_CMD )) ) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + + if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &state, 1, OPAL_INT))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + + /* Optionally send only an abbreviated message to improve scalability */ + if( quick ) { + goto send_data; + } + + crs_agent = strdup(opal_crs_base_selected_component.base_version.mca_component_name); + if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &(crs_agent), 1, OPAL_STRING))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + + sz = opal_list_get_size(&snapc_local_vpids); + if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &sz, 1, OPAL_SIZE))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + + for(item = opal_list_get_first(&snapc_local_vpids); + item != opal_list_get_end(&snapc_local_vpids); + item = opal_list_get_next(item) ) { + vpid_snapshot = (orte_snapc_full_app_snapshot_t*)item; + + if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &(vpid_snapshot->super.process_name), 1, ORTE_NAME))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + + if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &(vpid_snapshot->super.reference_name), 1, OPAL_STRING))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + + loc_location = opal_dirname(vpid_snapshot->super.local_location); + if (ORTE_SUCCESS != (ret = opal_dss.pack(&buffer, &(loc_location), 1, OPAL_STRING))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + + } + + send_data: + if (0 > (ret = orte_rml.send_buffer(ORTE_PROC_MY_HNP, &buffer, ORTE_RML_TAG_SNAPC_FULL, 0))) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + + cleanup: + OBJ_DESTRUCT(&buffer); + + if( NULL != crs_agent ) { + free(crs_agent); + crs_agent = NULL; + } + if( NULL != loc_location ) { + free(loc_location); + loc_location = NULL; + } + + return exit_status; +} + +static int snapc_full_local_start_ckpt_open_comm(orte_snapc_full_app_snapshot_t *vpid_snapshot) { int ret, exit_status = ORTE_SUCCESS; int usleep_time = 1000; @@ -1272,7 +1164,7 @@ static int snapc_full_local_start_ckpt_open_comm(orte_snapc_full_local_snapshot_ * In either case there is nothing we can do. */ orte_show_help("help-opal-checkpoint.txt", "pid_does_not_exist", true, - vpid_snapshot->super.process_pid, + vpid_snapshot->process_pid, vpid_snapshot->comm_pipe_r, vpid_snapshot->comm_pipe_w); @@ -1311,7 +1203,7 @@ static int snapc_full_local_start_ckpt_open_comm(orte_snapc_full_local_snapshot_ return exit_status; } -static int snapc_full_local_start_ckpt_handshake_term(orte_snapc_full_local_snapshot_t *vpid_snapshot, bool term) +static int snapc_full_local_start_ckpt_handshake_term(orte_snapc_full_app_snapshot_t *vpid_snapshot, bool term) { int ret, exit_status = ORTE_SUCCESS; int term_rep; @@ -1339,7 +1231,7 @@ static int snapc_full_local_start_ckpt_handshake_term(orte_snapc_full_local_snap return exit_status; } -static int snapc_full_local_start_ckpt_handshake(orte_snapc_full_local_snapshot_t *vpid_snapshot) +static int snapc_full_local_start_ckpt_handshake(orte_snapc_full_app_snapshot_t *vpid_snapshot) { int ret, exit_status = ORTE_SUCCESS; char *local_dir = NULL; @@ -1362,7 +1254,7 @@ static int snapc_full_local_start_ckpt_handshake(orte_snapc_full_local_snapshot_ orte_show_help("help-opal-checkpoint.txt", "ckpt:in_progress", true, - vpid_snapshot->super.process_pid); + vpid_snapshot->process_pid); exit_status = OPAL_ERROR; goto cleanup; } @@ -1370,7 +1262,7 @@ static int snapc_full_local_start_ckpt_handshake(orte_snapc_full_local_snapshot_ orte_show_help("help-opal-checkpoint.txt", "ckpt:req_null", true, - vpid_snapshot->super.process_pid); + vpid_snapshot->process_pid); exit_status = OPAL_ERROR; goto cleanup; } @@ -1378,7 +1270,7 @@ static int snapc_full_local_start_ckpt_handshake(orte_snapc_full_local_snapshot_ orte_show_help("help-opal-checkpoint.txt", "ckpt:req_error", true, - vpid_snapshot->super.process_pid); + vpid_snapshot->process_pid); exit_status = OPAL_ERROR; goto cleanup; } @@ -1394,7 +1286,7 @@ static int snapc_full_local_start_ckpt_handshake(orte_snapc_full_local_snapshot_ /* * Send: Snapshot Name */ - len = strlen(vpid_snapshot->super.crs_snapshot_super.reference_name) + 1; + len = strlen(vpid_snapshot->super.reference_name) + 1; if( sizeof(int) != (ret = write(vpid_snapshot->comm_pipe_w_fd, &len, sizeof(int))) ) { opal_output(mca_snapc_full_component.super.output_handle, "local) Error: Unable to write snapshot name len (%d) to named pipe (%s). %d\n", @@ -1404,10 +1296,10 @@ static int snapc_full_local_start_ckpt_handshake(orte_snapc_full_local_snapshot_ } tmp_size = sizeof(char) * len; - if( tmp_size != (ret = write(vpid_snapshot->comm_pipe_w_fd, (vpid_snapshot->super.crs_snapshot_super.reference_name), (sizeof(char) * len))) ) { + if( tmp_size != (ret = write(vpid_snapshot->comm_pipe_w_fd, (vpid_snapshot->super.reference_name), (sizeof(char) * len))) ) { opal_output(mca_snapc_full_component.super.output_handle, "local) Error: Unable to write snapshot name (%s) to named pipe (%s). %d\n", - vpid_snapshot->super.crs_snapshot_super.reference_name, vpid_snapshot->comm_pipe_w, ret); + vpid_snapshot->super.reference_name, vpid_snapshot->comm_pipe_w, ret); exit_status = OPAL_ERROR; goto cleanup; } @@ -1415,7 +1307,7 @@ static int snapc_full_local_start_ckpt_handshake(orte_snapc_full_local_snapshot_ /* * Send: Snapshot Location */ - local_dir = strdup(vpid_snapshot->super.crs_snapshot_super.local_location); + local_dir = strdup(vpid_snapshot->super.local_location); local_dir = opal_dirname(local_dir); len = strlen(local_dir) + 1; if( sizeof(int) != (ret = write(vpid_snapshot->comm_pipe_w_fd, &len, sizeof(int))) ) { @@ -1444,7 +1336,7 @@ static int snapc_full_local_start_ckpt_handshake(orte_snapc_full_local_snapshot_ return exit_status; } -static int snapc_full_local_end_ckpt_handshake(orte_snapc_full_local_snapshot_t *vpid_snapshot) +static int snapc_full_local_end_ckpt_handshake(orte_snapc_full_app_snapshot_t *vpid_snapshot) { int ret, exit_status = ORTE_SUCCESS; int last_cmd = 0; @@ -1456,7 +1348,7 @@ static int snapc_full_local_end_ckpt_handshake(orte_snapc_full_local_snapshot_t return exit_status; } - if( vpid_snapshot->super.term ) { + if( vpid_snapshot->term ) { last_cmd = 999; } else { last_cmd = 0; @@ -1488,14 +1380,16 @@ static int snapc_full_local_end_ckpt_handshake(orte_snapc_full_local_snapshot_t static void snapc_full_local_comm_read_event(int fd, short flags, void *arg) { int ret, exit_status = ORTE_SUCCESS; - orte_snapc_full_local_snapshot_t *vpid_snapshot = NULL; - size_t loc_state = ORTE_SNAPC_CKPT_STATE_FINISHED; + orte_snapc_full_app_snapshot_t *vpid_snapshot = NULL; int ckpt_state; + int loc_min_state; + bool send_quick = true; + char * state_str = NULL; - vpid_snapshot = (orte_snapc_full_local_snapshot_t *)arg; + vpid_snapshot = (orte_snapc_full_app_snapshot_t *)arg; OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, - "Local) Read Event: Process %s done...\n", + "Local) Read Event: Process %s done checkpointing...\n", ORTE_NAME_PRINT(&vpid_snapshot->super.process_name))); /* @@ -1505,27 +1399,70 @@ static void snapc_full_local_comm_read_event(int fd, short flags, void *arg) opal_output(mca_snapc_full_component.super.output_handle, "local) Error: Unable to read state from named pipe (%s). %d\n", vpid_snapshot->comm_pipe_r, ret); - exit_status = OPAL_ERROR; + ORTE_ERROR_LOG(ORTE_ERROR); + exit_status = ORTE_ERROR; goto cleanup; } - if( ckpt_state == OPAL_CRS_ERROR ) { - loc_state = ORTE_SNAPC_CKPT_STATE_ERROR; - } - /* * Now that the checkpoint is finished * Update our status information */ - vpid_snapshot->super.state = loc_state; - if( ORTE_SUCCESS != (ret = orte_snapc_full_local_set_vpid_ckpt_info( vpid_snapshot->super.process_name, - loc_state, - vpid_snapshot->super.crs_snapshot_super.reference_name, - vpid_snapshot->super.crs_snapshot_super.local_location ) ) ) { - exit_status = ret; + if( ckpt_state == OPAL_CRS_ERROR ) { + vpid_snapshot->super.state = ORTE_SNAPC_CKPT_STATE_ERROR; + } else { + vpid_snapshot->super.state = ORTE_SNAPC_CKPT_STATE_FINISHED_LOCAL; + } + + /* + * If error, then exit early + */ + if( ORTE_SNAPC_CKPT_STATE_ERROR == vpid_snapshot->super.state ) { + /* JJH - The error path needs some more work */ + if( ORTE_SUCCESS != (ret = snapc_full_local_update_coord(ORTE_SNAPC_CKPT_STATE_ERROR, true) ) ) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } goto cleanup; } + /* + * If all processes have finished locally, notify Global Coordinator + * if(FIN_LOCAL) { + * -- wait for the FIN from Global Coord -- + * } + */ + loc_min_state = snapc_full_get_min_state(); + if( loc_min_state > current_job_ckpt_state && + ORTE_SNAPC_CKPT_STATE_FINISHED_LOCAL == loc_min_state ) { + + orte_snapc_ckpt_state_str(&state_str, loc_min_state); + OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, + "Local) Daemon State Changed: %d (%s)", + (int)loc_min_state, state_str )); + free(state_str); + state_str = NULL; + + send_quick = false; + + current_job_ckpt_state = loc_min_state; + if( ORTE_SUCCESS != (ret = snapc_full_local_update_coord(loc_min_state, send_quick) ) ) { + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + } + + /* + * If file transfer required just set state + * Global Coordinator does not need to be notified again since it can see + * these variables and knows what to do. + */ + if( !orte_snapc_base_store_in_place && !orte_snapc_full_skip_filem ) { + vpid_snapshot->super.state = ORTE_SNAPC_CKPT_STATE_FILE_XFER; + } + cleanup: /* * Disable events @@ -1533,5 +1470,152 @@ static void snapc_full_local_comm_read_event(int fd, short flags, void *arg) opal_event_del(&(vpid_snapshot->comm_pipe_r_eh)); vpid_snapshot->is_eh_active = false; + if( NULL != state_str ) { + free(state_str); + state_str = NULL; + } + return; } + +static int snapc_full_get_min_state(void) +{ + int min_state = ORTE_SNAPC_CKPT_MAX; + orte_snapc_full_app_snapshot_t *vpid_snapshot = NULL; + opal_list_item_t* item = NULL; + char * state_str_a = NULL; + char * state_str_b = NULL; + + for(item = opal_list_get_first(&snapc_local_vpids); + item != opal_list_get_end(&snapc_local_vpids); + item = opal_list_get_next(item) ) { + vpid_snapshot = (orte_snapc_full_app_snapshot_t*)item; + + if( NULL != state_str_a ) { + free(state_str_a); + } + if( NULL != state_str_b ) { + free(state_str_b); + } + + orte_snapc_ckpt_state_str(&state_str_a, vpid_snapshot->super.state); + orte_snapc_ckpt_state_str(&state_str_b, min_state); + + OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, + "Local) ... Checking [%d %s] vs [%d %s]", + (int)vpid_snapshot->super.state, state_str_a, + (int)min_state, state_str_b )); + if( min_state > vpid_snapshot->super.state ) { + min_state = vpid_snapshot->super.state; + } + } + + if( NULL != state_str_b ) { + free(state_str_b); + state_str_b = NULL; + } + orte_snapc_ckpt_state_str(&state_str_b, min_state); + OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle, + "Local) ... Min State [%d %s]", + (int)min_state, state_str_b )); + + if( NULL != state_str_a ) { + free(state_str_a); + state_str_a = NULL; + } + if( NULL != state_str_b ) { + free(state_str_b); + state_str_b = NULL; + } + + return min_state; +} + +static int snapc_full_local_setup_snapshot_dir(char * snapshot_ref, char * sugg_dir, char **actual_dir) +{ + int ret, exit_status = ORTE_SUCCESS; + mode_t my_mode = S_IRWXU; + + /* See if we can use the suggested directory */ + if(OPAL_SUCCESS != (ret = opal_os_dirpath_create(sugg_dir, my_mode) ) ) { + /* Can't use that directory, try the default directory from OPAL CRS */ + *actual_dir = strdup(opal_crs_base_get_snapshot_directory(snapshot_ref)); + + if(OPAL_SUCCESS != (ret = opal_os_dirpath_create(*actual_dir, my_mode) ) ) { + /* Can't use that either, so let's give up */ + ORTE_ERROR_LOG(ret); + exit_status = ret; + goto cleanup; + } + } + else { + /* We are able to use that directory */ + *actual_dir = strdup(sugg_dir); + } + + cleanup: + return exit_status; +} + + +static int snapc_full_local_get_vpids(void) +{ + opal_list_item_t *item = NULL; + orte_snapc_full_app_snapshot_t *vpid_snapshot = NULL; + orte_odls_child_t *child = NULL; + size_t list_len = 0; + + /* + * If the list is populated, and has updated pid information then + * there is nothing to update. + */ + list_len = opal_list_get_size(&snapc_local_vpids); + if( list_len > 0 ) { + vpid_snapshot = (orte_snapc_full_app_snapshot_t*)opal_list_get_first(&snapc_local_vpids); + if( 0 < vpid_snapshot->process_pid ) { + return ORTE_SUCCESS; + } + } + + /* + * Otherwise update or populate the list + */ + for (item = opal_list_get_first(&orte_local_children); + item != opal_list_get_end(&orte_local_children); + item = opal_list_get_next(item)) { + child = (orte_odls_child_t*)item; + + /* if the list is empty or this child is not in the list then add it */ + if( 0 >= list_len || + NULL == (vpid_snapshot = find_vpid_snapshot(child->name)) ) { + vpid_snapshot = OBJ_NEW(orte_snapc_full_app_snapshot_t); + opal_list_append(&snapc_local_vpids, &(vpid_snapshot->super.super)); + } + + vpid_snapshot->process_pid = child->pid; + vpid_snapshot->super.process_name.jobid = child->name->jobid; + vpid_snapshot->super.process_name.vpid = child->name->vpid; + } + + return ORTE_SUCCESS; +} + +static orte_snapc_full_app_snapshot_t *find_vpid_snapshot(orte_process_name_t *name ) +{ + opal_list_item_t* item = NULL; + orte_snapc_full_app_snapshot_t *vpid_snapshot = NULL; + + for(item = opal_list_get_first(&snapc_local_vpids); + item != opal_list_get_end(&snapc_local_vpids); + item = opal_list_get_next(item) ) { + vpid_snapshot = (orte_snapc_full_app_snapshot_t*)item; + + if( name->jobid == vpid_snapshot->super.process_name.jobid && + name->vpid == vpid_snapshot->super.process_name.vpid ) { + return vpid_snapshot; + } + } + + return NULL; +} + diff --git a/orte/mca/snapc/full/snapc_full_module.c b/orte/mca/snapc/full/snapc_full_module.c index 4db80dff64..e4c3d0f4d9 100644 --- a/orte/mca/snapc/full/snapc_full_module.c +++ b/orte/mca/snapc/full/snapc_full_module.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2008 The Trustees of Indiana University. + * Copyright (c) 2004-2009 The Trustees of Indiana University. * All rights reserved. * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. * All rights reserved. @@ -50,24 +50,24 @@ static orte_snapc_base_module_t loc_module = { /* * Global Snapshot structure */ -void orte_snapc_full_global_construct(orte_snapc_full_global_snapshot_t *obj); -void orte_snapc_full_global_destruct( orte_snapc_full_global_snapshot_t *obj); +void orte_snapc_full_orted_construct(orte_snapc_full_orted_snapshot_t *obj); +void orte_snapc_full_orted_destruct( orte_snapc_full_orted_snapshot_t *obj); -OBJ_CLASS_INSTANCE(orte_snapc_full_global_snapshot_t, - orte_snapc_base_snapshot_t, - orte_snapc_full_global_construct, - orte_snapc_full_global_destruct); +OBJ_CLASS_INSTANCE(orte_snapc_full_orted_snapshot_t, + orte_snapc_base_global_snapshot_t, + orte_snapc_full_orted_construct, + orte_snapc_full_orted_destruct); /* * Local Snapshot structure */ -void orte_snapc_full_local_construct(orte_snapc_full_local_snapshot_t *obj); -void orte_snapc_full_local_destruct( orte_snapc_full_local_snapshot_t *obj); +void orte_snapc_full_app_construct(orte_snapc_full_app_snapshot_t *obj); +void orte_snapc_full_app_destruct( orte_snapc_full_app_snapshot_t *obj); -OBJ_CLASS_INSTANCE(orte_snapc_full_local_snapshot_t, - orte_snapc_base_snapshot_t, - orte_snapc_full_local_construct, - orte_snapc_full_local_destruct); +OBJ_CLASS_INSTANCE(orte_snapc_full_app_snapshot_t, + orte_snapc_base_local_snapshot_t, + orte_snapc_full_app_construct, + orte_snapc_full_app_destruct); /************************************ * Locally Global vars & functions :) @@ -77,29 +77,53 @@ OBJ_CLASS_INSTANCE(orte_snapc_full_local_snapshot_t, /************************ * Function Definitions ************************/ -void orte_snapc_full_global_construct(orte_snapc_full_global_snapshot_t *snapshot) { - snapshot->local_coord.vpid = 0; - snapshot->local_coord.jobid = 0; +void orte_snapc_full_orted_construct(orte_snapc_full_orted_snapshot_t *snapshot) { + snapshot->process_name.jobid = 0; + snapshot->process_name.vpid = 0; + + snapshot->state = ORTE_SNAPC_CKPT_STATE_NONE; + + snapshot->opal_crs = NULL; + + snapshot->term = false; + + snapshot->filem_request = NULL; } -void orte_snapc_full_global_destruct( orte_snapc_full_global_snapshot_t *snapshot) { - snapshot->local_coord.vpid = 0; - snapshot->local_coord.jobid = 0; +void orte_snapc_full_orted_destruct( orte_snapc_full_orted_snapshot_t *snapshot) { + snapshot->process_name.jobid = 0; + snapshot->process_name.vpid = 0; + + snapshot->state = ORTE_SNAPC_CKPT_STATE_NONE; + + if( NULL != snapshot->opal_crs ) { + free( snapshot->opal_crs ); + snapshot->opal_crs = NULL; + } + + snapshot->term = false; + + if( NULL != snapshot->filem_request ) { + OBJ_RELEASE(snapshot->filem_request); + snapshot->filem_request = NULL; + } } -void orte_snapc_full_local_construct(orte_snapc_full_local_snapshot_t *obj) { +void orte_snapc_full_app_construct(orte_snapc_full_app_snapshot_t *obj) { obj->comm_pipe_r = NULL; obj->comm_pipe_w = NULL; obj->comm_pipe_r_fd = -1; obj->comm_pipe_w_fd = -1; - obj->ckpt_state = ORTE_SNAPC_CKPT_STATE_NONE; - obj->is_eh_active = false; + + obj->process_pid = 0; + + obj->term = false; } -void orte_snapc_full_local_destruct( orte_snapc_full_local_snapshot_t *obj) { +void orte_snapc_full_app_destruct( orte_snapc_full_app_snapshot_t *obj) { if( NULL != obj->comm_pipe_r ) { free(obj->comm_pipe_r); obj->comm_pipe_r = NULL; @@ -113,9 +137,11 @@ void orte_snapc_full_local_destruct( orte_snapc_full_local_snapshot_t *obj) { obj->comm_pipe_r_fd = -1; obj->comm_pipe_w_fd = -1; - obj->ckpt_state = ORTE_SNAPC_CKPT_STATE_NONE; - obj->is_eh_active = false; + + obj->process_pid = 0; + + obj->term = false; } /* diff --git a/orte/mca/snapc/snapc.h b/orte/mca/snapc/snapc.h index 794b5a641f..1df1ee3ceb 100644 --- a/orte/mca/snapc/snapc.h +++ b/orte/mca/snapc/snapc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University @@ -89,45 +89,61 @@ BEGIN_C_DECLS /** * States that a process can be in while checkpointing */ -/* Doing no checkpoint -- Quiet state */ -#define ORTE_SNAPC_CKPT_STATE_NONE 0 -/* There has been a request for a checkpoint from one of the applications */ -#define ORTE_SNAPC_CKPT_STATE_REQUEST 1 -/* There is a Pending checkpoint for this process */ -#define ORTE_SNAPC_CKPT_STATE_PENDING 2 -/* There is a Pending checkpoint for this process, terminate the process after checkpoint */ -#define ORTE_SNAPC_CKPT_STATE_PENDING_TERM 3 -/* Running the checkpoint */ -#define ORTE_SNAPC_CKPT_STATE_RUNNING 4 -/* Finished the checkpoint */ -#define ORTE_SNAPC_CKPT_STATE_FILE_XFER 5 -/* Finished the checkpoint */ -#define ORTE_SNAPC_CKPT_STATE_FINISHED 6 -/* Unable to checkpoint this job */ -#define ORTE_SNAPC_CKPT_STATE_NO_CKPT 7 /* Reached an error */ -#define ORTE_SNAPC_CKPT_STATE_ERROR 8 +#define ORTE_SNAPC_CKPT_STATE_ERROR 0 + +/* Doing no checkpoint -- Quiet state */ +#define ORTE_SNAPC_CKPT_STATE_NONE 1 +/* There has been a request for a checkpoint from one of the applications */ +#define ORTE_SNAPC_CKPT_STATE_REQUEST 2 +/* There is a Pending checkpoint for this process */ +#define ORTE_SNAPC_CKPT_STATE_PENDING 3 +/* There is a Pending checkpoint for this process, terminate the process after checkpoint */ +#define ORTE_SNAPC_CKPT_STATE_PENDING_TERM 4 +/* Running the checkpoint */ +#define ORTE_SNAPC_CKPT_STATE_RUNNING 5 +/* Finished the checkpoint locally */ +#define ORTE_SNAPC_CKPT_STATE_FINISHED_LOCAL 6 +/* File Transfer in progress */ +#define ORTE_SNAPC_CKPT_STATE_FILE_XFER 8 +/* Finished the checkpoint */ +#define ORTE_SNAPC_CKPT_STATE_FINISHED 9 +/* Unable to checkpoint this job */ +#define ORTE_SNAPC_CKPT_STATE_NO_CKPT 10 +#define ORTE_SNAPC_CKPT_MAX 11 /** * Definition of a orte local snapshot. * Similar to the opal_crs_base_snapshot_t except that it * contains process contact information. */ -struct orte_snapc_base_snapshot_1_0_0_t { - opal_crs_base_snapshot_t crs_snapshot_super; +struct orte_snapc_base_local_snapshot_1_0_0_t { + /** List super object */ + opal_list_item_t super; + /** ORTE Process name */ orte_process_name_t process_name; - /** PID of the application process that generated this snapshot */ - pid_t process_pid; - /** State of the checkpoint */ - size_t state; - /** Terminate this process after a checkpoint */ - bool term; -}; -typedef struct orte_snapc_base_snapshot_1_0_0_t orte_snapc_base_snapshot_1_0_0_t; -typedef struct orte_snapc_base_snapshot_1_0_0_t orte_snapc_base_snapshot_t; -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_snapc_base_snapshot_t); + /** State of the checkpoint */ + int state; + + /** Unique name of the local snapshot */ + char * reference_name; + + /** Local location of the local snapshot Absolute path */ + char * local_location; + + /** Remote location of the local snapshot Absolute path */ + char * remote_location; + + /** CRS agent */ + char * opal_crs; + +}; +typedef struct orte_snapc_base_local_snapshot_1_0_0_t orte_snapc_base_local_snapshot_1_0_0_t; +typedef struct orte_snapc_base_local_snapshot_1_0_0_t orte_snapc_base_local_snapshot_t; + +ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_snapc_base_local_snapshot_t); /** * Definition of the global snapshot. @@ -138,12 +154,9 @@ struct orte_snapc_base_global_snapshot_1_0_0_t { /** This is an object, so must have super */ opal_list_item_t super; - /** A list of orte_snapc_base_snapshot_ts */ - opal_list_t snapshots; + /** A list of orte_snapc_base_snapshot_t's */ + opal_list_t local_snapshots; - /* ORTE SnapC Component used to generate the global snapshot */ - char * component_name; - /** Unique name of the global snapshot */ char * reference_name; @@ -152,12 +165,6 @@ struct orte_snapc_base_global_snapshot_1_0_0_t { /** Sequence Number */ int seq_num; - - /** Beginning timestamp */ - char * start_time; - - /** Ending timestamp */ - char * end_time; }; typedef struct orte_snapc_base_global_snapshot_1_0_0_t orte_snapc_base_global_snapshot_1_0_0_t; typedef struct orte_snapc_base_global_snapshot_1_0_0_t orte_snapc_base_global_snapshot_t; diff --git a/orte/tools/orte-checkpoint/orte-checkpoint.c b/orte/tools/orte-checkpoint/orte-checkpoint.c index d1213dfd05..156e3bb48b 100644 --- a/orte/tools/orte-checkpoint/orte-checkpoint.c +++ b/orte/tools/orte-checkpoint/orte-checkpoint.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University @@ -76,6 +76,8 @@ #include "orte/mca/snapc/snapc.h" #include "orte/mca/snapc/base/base.h" +#include MCA_timer_IMPLEMENTATION_HEADER + /****************** * Local Functions ******************/ @@ -108,11 +110,16 @@ static int global_sequence_num = 0; *****************************************/ static bool listener_started = false; +static double timer_start = 0; +static double timer_last = 0; +static double get_time(void); + typedef struct { bool help; int pid; bool term; bool verbose; + int verbose_level; orte_jobid_t req_hnp; /**< User Requested HNP */ bool nowait; /* Do not wait for checkpoint to complete before returning */ bool status; /* Display status messages while checkpoint is progressing */ @@ -135,6 +142,12 @@ opal_cmd_line_init_t cmd_line_opts[] = { &orte_checkpoint_globals.verbose, OPAL_CMD_LINE_TYPE_BOOL, "Be Verbose" }, + { NULL, NULL, NULL, + 'V', NULL, NULL, + 1, + &orte_checkpoint_globals.verbose_level, OPAL_CMD_LINE_TYPE_INT, + "Set the verbosity level (For additional debugging information)" }, + { NULL, NULL, NULL, '\0', NULL, "term", 0, @@ -279,6 +292,7 @@ static int parse_args(int argc, char *argv[]) { orte_checkpoint_globals.pid = -1; orte_checkpoint_globals.term = false; orte_checkpoint_globals.verbose = false; + orte_checkpoint_globals.verbose_level = 0; orte_checkpoint_globals.req_hnp = ORTE_JOBID_INVALID; orte_checkpoint_globals.nowait = false; orte_checkpoint_globals.status = false; @@ -344,6 +358,14 @@ static int parse_args(int argc, char *argv[]) { goto cleanup; } + if(orte_checkpoint_globals.verbose_level < 0 ) { + orte_checkpoint_globals.verbose_level = 0; + } + + if(orte_checkpoint_globals.verbose_level > 0) { + orte_checkpoint_globals.verbose = true; + } + /* * If the user did not supply an hnp jobid, then they must * supply the PID of MPIRUN @@ -474,7 +496,7 @@ static int ckpt_init(int argc, char *argv[]) { */ if( orte_checkpoint_globals.verbose ) { orte_checkpoint_globals.output = opal_output_open(NULL); - opal_output_set_verbosity(orte_checkpoint_globals.output, 10); + opal_output_set_verbosity(orte_checkpoint_globals.output, orte_checkpoint_globals.verbose_level); } else { orte_checkpoint_globals.output = 0; /* Default=STDERR */ } @@ -661,6 +683,8 @@ notify_process_for_checkpoint(int term) "orte_checkpoint: notify_hnp: Contact Head Node Process PID %d\n", orte_checkpoint_globals.pid); + timer_start = get_time(); + /*********************************** * Notify HNP of checkpoint request * Send: @@ -709,18 +733,51 @@ notify_process_for_checkpoint(int term) /*************** * Pretty Print ***************/ +static double get_time(void) { + double wtime; + +#if OPAL_TIMER_USEC_NATIVE + wtime = (double)opal_timer_base_get_usec() / 1000000.0; +#else + struct timeval tv; + gettimeofday(&tv, NULL); + wtime = tv.tv_sec; + wtime += (double)tv.tv_usec / 1000000.0; +#endif + + return wtime; +} + static int pretty_print_status(void) { char * state_str = NULL; + double cur_time; - state_str = orte_snapc_ckpt_state_str(orte_checkpoint_globals.ckpt_status); + cur_time = get_time(); + + if( timer_last == 0 ) { + timer_last = cur_time; + } + + orte_snapc_ckpt_state_str(&state_str, orte_checkpoint_globals.ckpt_status); + + if( NULL != global_snapshot_handle ) { + opal_output(0, + "[%6.2f / %6.2f] %*s - %s\n", + (cur_time - timer_last), (cur_time - timer_start), + 25, state_str, global_snapshot_handle); + } else { + opal_output(0, + "[%6.2f / %6.2f] %*s - ...\n", + (cur_time - timer_last), (cur_time - timer_start), + 25, state_str); + } - opal_output(0, - "%*s - Global Snapshot Reference: %s\n", - 25, state_str, global_snapshot_handle); if( NULL != state_str) { free(state_str); } - + + timer_last = cur_time; + return ORTE_SUCCESS; } diff --git a/orte/tools/orte-ps/orte-ps.c b/orte/tools/orte-ps/orte-ps.c index 2bb7369e8b..3ebb267818 100644 --- a/orte/tools/orte-ps/orte-ps.c +++ b/orte/tools/orte-ps/orte-ps.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University @@ -491,6 +491,7 @@ static int pretty_print_jobs(orte_job_t **jobs, orte_std_cntr_t num_jobs) { orte_std_cntr_t i; char *jobstr; orte_jobid_t mask=0x0000ffff; + char * state_str = NULL; for(i=0; i < num_jobs; i++) { job = jobs[i]; @@ -513,9 +514,10 @@ static int pretty_print_jobs(orte_job_t **jobs, orte_std_cntr_t num_jobs) { len_slots = 6; len_vpid_r = (int) strlen("Num Procs"); #if OPAL_ENABLE_FT == 1 - len_ckpt_s = (int) (strlen(orte_snapc_ckpt_state_str(job->ckpt_state)) < strlen("Ckpt State") ? + orte_snapc_ckpt_state_str(&state_str, job->ckpt_state); + len_ckpt_s = (int) (strlen(state_str) < strlen("Ckpt State") ? strlen("Ckpt State") : - strlen(orte_snapc_ckpt_state_str(job->ckpt_state)) ); + strlen(state_str) ); len_ckpt_r = (int) (NULL == job->ckpt_snapshot_ref ? strlen("Ckpt Ref") : (strlen(job->ckpt_snapshot_ref) < strlen("Ckpt Ref") ? strlen("Ckpt Ref") : @@ -525,6 +527,7 @@ static int pretty_print_jobs(orte_job_t **jobs, orte_std_cntr_t num_jobs) { strlen("Ckpt Loc") : strlen(job->ckpt_snapshot_loc) ) ); #else + state_str = NULL; len_ckpt_s = -3; len_ckpt_r = -3; len_ckpt_l = -3; @@ -564,7 +567,7 @@ static int pretty_print_jobs(orte_job_t **jobs, orte_std_cntr_t num_jobs) { printf("%*d | ", len_slots , (uint)job->total_slots_alloc); printf("%*d | ", len_vpid_r, job->num_procs); #if OPAL_ENABLE_FT == 1 - printf("%*s | ", len_ckpt_s, orte_snapc_ckpt_state_str(job->ckpt_state)); + printf("%*s | ", len_ckpt_s, state_str); printf("%*s | ", len_ckpt_r, (NULL == job->ckpt_snapshot_ref ? "" : job->ckpt_snapshot_ref) ); @@ -597,6 +600,7 @@ static int pretty_print_vpids(orte_job_t *job) { orte_proc_t *vpid; orte_app_context_t *app; char *o_proc_name; + char *state_str = NULL; /* * Caculate segment lengths @@ -663,8 +667,9 @@ static int pretty_print_vpids(orte_job_t *job) { len_state = strlen(pretty_vpid_state(vpid->state)); #if OPAL_ENABLE_FT == 1 - if( (int)strlen(orte_snapc_ckpt_state_str(vpid->ckpt_state)) > len_ckpt_s) - len_ckpt_s = strlen(orte_snapc_ckpt_state_str(vpid->ckpt_state)); + orte_snapc_ckpt_state_str(&state_str, vpid->ckpt_state); + if( (int)strlen(state_str) > len_ckpt_s) + len_ckpt_s = strlen(state_str); if( NULL != vpid->ckpt_snapshot_ref && (int)strlen(vpid->ckpt_snapshot_ref) > len_ckpt_r) @@ -673,6 +678,8 @@ static int pretty_print_vpids(orte_job_t *job) { if( NULL != vpid->ckpt_snapshot_loc && (int)strlen(vpid->ckpt_snapshot_loc) > len_ckpt_l) len_ckpt_l = strlen(vpid->ckpt_snapshot_loc); +#else + state_str = NULL; #endif } @@ -739,7 +746,7 @@ static int pretty_print_vpids(orte_job_t *job) { printf("%*s | ", len_state , pretty_vpid_state(vpid->state)); #if OPAL_ENABLE_FT == 1 - printf("%*s | ", len_ckpt_s, orte_snapc_ckpt_state_str(vpid->ckpt_state)); + printf("%*s | ", len_ckpt_s, state_str); printf("%*s | ", len_ckpt_r, (NULL == vpid->ckpt_snapshot_ref ? "" : vpid->ckpt_snapshot_ref)); diff --git a/orte/tools/orte-restart/orte-restart.c b/orte/tools/orte-restart/orte-restart.c index b12a542433..1fe118c3d1 100644 --- a/orte/tools/orte-restart/orte-restart.c +++ b/orte/tools/orte-restart/orte-restart.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University @@ -151,9 +151,10 @@ int main(int argc, char *argv[]) { int ret, exit_status = ORTE_SUCCESS; - pid_t child_pid; + pid_t child_pid = 0; orte_snapc_base_global_snapshot_t *snapshot = NULL; - + char *tmp_str = NULL; + /*************** * Initialize ***************/ @@ -164,7 +165,10 @@ main(int argc, char *argv[]) snapshot = OBJ_NEW(orte_snapc_base_global_snapshot_t); snapshot->reference_name = strdup(orte_restart_globals.filename); - snapshot->local_location = opal_dirname(orte_snapc_base_get_global_snapshot_directory(snapshot->reference_name)); + orte_snapc_base_get_global_snapshot_directory(&tmp_str, snapshot->reference_name); + snapshot->local_location = opal_dirname(tmp_str); + free(tmp_str); + tmp_str = NULL; /* * Check for existence of the file @@ -453,11 +457,11 @@ static int create_appfile(orte_snapc_base_global_snapshot_t *snapshot) goto cleanup; } - for(item = opal_list_get_first(&snapshot->snapshots); - item != opal_list_get_end(&snapshot->snapshots); + for(item = opal_list_get_first(&snapshot->local_snapshots); + item != opal_list_get_end(&snapshot->local_snapshots); item = opal_list_get_next(item) ) { - orte_snapc_base_snapshot_t *vpid_snapshot; - vpid_snapshot = (orte_snapc_base_snapshot_t*)item; + orte_snapc_base_local_snapshot_t *vpid_snapshot; + vpid_snapshot = (orte_snapc_base_local_snapshot_t*)item; fprintf(appfile, "#\n"); fprintf(appfile, "# Old Process Name: %u.%u\n", @@ -467,13 +471,15 @@ static int create_appfile(orte_snapc_base_global_snapshot_t *snapshot) fprintf(appfile, "-np 1 "); if(orte_restart_globals.preload) { fprintf(appfile, "--preload-files %s/%s ", - vpid_snapshot->crs_snapshot_super.local_location, - vpid_snapshot->crs_snapshot_super.reference_name); + vpid_snapshot->local_location, + vpid_snapshot->reference_name); fprintf(appfile, "--preload-files-dest-dir . "); } /* JJH: Make this match what the user originally specified on the command line */ fprintf(appfile, "-am ft-enable-cr "); + fprintf(appfile, " opal-restart "); + /* JJH: Make sure this changes if ever the default location of the local file is changed, * currently it is safe to assume that it is in the current working directory. * @@ -486,9 +492,9 @@ static int create_appfile(orte_snapc_base_global_snapshot_t *snapshot) else { /* If we are *not* preloading the files, the point to the original checkpoint * directory to access the checkpoint files. */ - fprintf(appfile, "-mca crs_base_snapshot_dir %s ", vpid_snapshot->crs_snapshot_super.local_location); + fprintf(appfile, "-mca crs_base_snapshot_dir %s ", vpid_snapshot->local_location); } - fprintf(appfile, "%s\n", vpid_snapshot->crs_snapshot_super.reference_name); + fprintf(appfile, "%s\n", vpid_snapshot->reference_name); } cleanup: