1
1

Add a quick check to make sure the BLCR being used has a working cr_request.

If it doesn (version < 0.6.0) then fallback to fork/exec of cr_checkpoint
command.

This commit was SVN r16400.
Этот коммит содержится в:
Josh Hursey 2007-10-09 13:51:28 +00:00
родитель fd51d9cf28
Коммит 06a30e7f3a

Просмотреть файл

@ -111,6 +111,7 @@ static char *blcr_checkpoint_cmd = NULL;
static opal_condition_t blcr_cond;
static opal_mutex_t blcr_lock;
static bool have_working_cr_request = false;
void opal_crs_blcr_construct(opal_crs_blcr_snapshot_t *snapshot) {
snapshot->context_filename = NULL;
@ -154,6 +155,29 @@ int opal_crs_blcr_module_init(void)
"Error: crs:blcr: module_init: cr_init failed (%d)\n", client_id);
return OPAL_ERROR;
}
/*
* Check for a working cr_request, version 0.6.0 and later
* In earlier version there was a bug with moving the context file from
* one location to another if cr_request was used. This was fixed in the
* 0.6.0 series.
*/
have_working_cr_request = true;
if( CR_RELEASE_MAJOR <= 0 &&
CR_RELEASE_MINOR < 6 ) {
have_working_cr_request = false;
}
if( !have_working_cr_request ) {
opal_output_verbose(1, mca_crs_blcr_component.super.output_handle,
"crs:blcr: WARNING: The BLCR version installed (%s) does not support cr_request(). "
"Using an alternative technique.\n",
CR_RELEASE_VERSION);
}
else {
opal_output_verbose(15, mca_crs_blcr_component.super.output_handle,
"crs:blcr: The BLCR version installed (%s) supports cr_request(). Yay!",
CR_RELEASE_VERSION);
}
}
blcr_restart_cmd = strdup("cr_restart");
@ -262,12 +286,8 @@ int opal_crs_blcr_checkpoint(pid_t pid, opal_crs_base_snapshot_t *base_snapshot,
* If threading based checkpoint is enabled we cannot use the cr_request()
* function to checkpoint ourselves. If we are a thread, then it is likely
* that we have not properly initalized this module.
* Additionally there is a bug with use cr_request and moving the context file from
* the location where it was created (As if v0.4.2). So this funciton cannot be used
* with this version of BLCR.
* JJH RETURN HERE...
*/
if(pid == getpid() ) {
if( have_working_cr_request && pid == getpid() ) {
char *loc_fname = NULL;
blcr_get_checkpoint_filename(&(snapshot->context_filename), pid);
@ -291,11 +311,10 @@ int opal_crs_blcr_checkpoint(pid_t pid, opal_crs_base_snapshot_t *base_snapshot,
*state = blcr_current_state;
free(loc_fname);
}
else
/*
* Checkpointing another process
*/
{
else {
ret = blcr_checkpoint_peer(pid, snapshot->super.local_location, &(snapshot->context_filename));
if(OPAL_SUCCESS != ret) {