Trying to get the C/R code to compile again. This patch
includes various fixes all over the C/R code which are hard to group like the other patches. Changes from V1: * explain why mca_base_component_distill_checkpoint_ready no longer works * compare return result of opal functions with OPAL_* values Changes from V2: * use orte_rml_oob_ft_event() instead of referencing through the modules * properly protect variable (thanks to --enable-picky) This commit was SVN r29922.
Этот коммит содержится в:
родитель
b95a9d865a
Коммит
b42aad44a3
@ -191,7 +191,7 @@ int mca_bml_r2_ft_event(int state)
|
|||||||
|
|
||||||
for(p = 0; p < (int)num_procs; ++p) {
|
for(p = 0; p < (int)num_procs; ++p) {
|
||||||
if( NULL != procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) {
|
if( NULL != procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) {
|
||||||
OBJ_RELEASE((mca_bml_base_endpoint_t*) procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]);
|
OBJ_RELEASE(procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]);
|
||||||
procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = NULL;
|
procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -263,9 +263,9 @@ int mca_bml_r2_ft_event(int state)
|
|||||||
mca_base_var_get_value(param_type, &btl_list, NULL, NULL);
|
mca_base_var_get_value(param_type, &btl_list, NULL, NULL);
|
||||||
opal_output_verbose(11, ompi_cr_output,
|
opal_output_verbose(11, ompi_cr_output,
|
||||||
"Restart (New BTL MCA): <%s>\n", btl_list ? btl_list[0] : "");
|
"Restart (New BTL MCA): <%s>\n", btl_list ? btl_list[0] : "");
|
||||||
if( NULL != param_list ) {
|
if( NULL != btl_list ) {
|
||||||
free(param_list);
|
free(btl_list);
|
||||||
param_list = NULL;
|
btl_list = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -286,7 +286,7 @@ int mca_bml_r2_ft_event(int state)
|
|||||||
|
|
||||||
for(p = 0; p < (int)num_procs; ++p) {
|
for(p = 0; p < (int)num_procs; ++p) {
|
||||||
if( NULL != procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) {
|
if( NULL != procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) {
|
||||||
OBJ_RELEASE((mca_bml_base_endpoint_t*) procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]);
|
OBJ_RELEASE(procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]);
|
||||||
procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = NULL;
|
procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -141,9 +141,18 @@ static int open_components(mca_base_framework_t *framework)
|
|||||||
* NTH: Logic moved to mca_base_components_filter.
|
* NTH: Logic moved to mca_base_components_filter.
|
||||||
*/
|
*/
|
||||||
#if (OPAL_ENABLE_FT == 1) && (OPAL_ENABLE_FT_CR == 1)
|
#if (OPAL_ENABLE_FT == 1) && (OPAL_ENABLE_FT_CR == 1)
|
||||||
|
#ifdef ENABLE_FT_FIXED
|
||||||
|
/* FIXME_FT
|
||||||
|
*
|
||||||
|
* the variable mca_base_component_distill_checkpoint_ready
|
||||||
|
* was removed by commit 8181c8273c486bba59b3dead324939eac1a58b8c (r28237)
|
||||||
|
* "Introduce the MCA framework system. This formalizes the interface frameworks must provide."
|
||||||
|
*
|
||||||
|
* */
|
||||||
if (mca_base_component_distill_checkpoint_ready) {
|
if (mca_base_component_distill_checkpoint_ready) {
|
||||||
open_only_flags |= MCA_BASE_METADATA_PARAM_CHECKPOINT;
|
open_only_flags |= MCA_BASE_METADATA_PARAM_CHECKPOINT;
|
||||||
}
|
}
|
||||||
|
#endif /* ENABLE_FT_FIXED */
|
||||||
#endif /* (OPAL_ENABLE_FT == 1) && (OPAL_ENABLE_FT_CR == 1) */
|
#endif /* (OPAL_ENABLE_FT == 1) && (OPAL_ENABLE_FT_CR == 1) */
|
||||||
|
|
||||||
/* If mca_base_framework_register_components was called with the MCA_BASE_COMPONENTS_ALL flag
|
/* If mca_base_framework_register_components was called with the MCA_BASE_COMPONENTS_ALL flag
|
||||||
|
@ -90,9 +90,9 @@ static int crs_self_register (void)
|
|||||||
mca_crs_self_component.super.priority = 20;
|
mca_crs_self_component.super.priority = 20;
|
||||||
ret = mca_base_component_var_register (&mca_crs_self_component.super.base_version,
|
ret = mca_base_component_var_register (&mca_crs_self_component.super.base_version,
|
||||||
"priority", "Priority of the CRS self component "
|
"priority", "Priority of the CRS self component "
|
||||||
"(default: 20)", MCA_BASE_VAR_TYPE_INT, NULL,
|
"(default: 20)", MCA_BASE_VAR_TYPE_INT, NULL, 0,
|
||||||
MCA_BASE_VAR_FLAG_SETTABLE,
|
MCA_BASE_VAR_FLAG_SETTABLE,
|
||||||
OPAL_INFO_LVL_9, MPI_BASE_VAR_SCOPE_ALL_EQ,
|
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ,
|
||||||
&mca_crs_self_component.super.priority);
|
&mca_crs_self_component.super.priority);
|
||||||
if (0 > ret) {
|
if (0 > ret) {
|
||||||
return ret;
|
return ret;
|
||||||
@ -102,8 +102,8 @@ static int crs_self_register (void)
|
|||||||
ret = mca_base_component_var_register (&mca_crs_self_component.super.base_version,
|
ret = mca_base_component_var_register (&mca_crs_self_component.super.base_version,
|
||||||
"verbose",
|
"verbose",
|
||||||
"Verbose level for the CRS self component",
|
"Verbose level for the CRS self component",
|
||||||
MCA_BASE_VAR_TYPE_INT, NULL,MCA_BASE_VAR_FLAG_SETTABLE,
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||||||
OPAL_INFO_LVL_9, MPI_BASE_VAR_SCOPE_LOCAL,
|
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
|
||||||
&mca_crs_self_component.super.verbose);
|
&mca_crs_self_component.super.verbose);
|
||||||
if (0 > ret) {
|
if (0 > ret) {
|
||||||
return ret;
|
return ret;
|
||||||
@ -116,8 +116,8 @@ static int crs_self_register (void)
|
|||||||
ret = mca_base_component_var_register (&mca_crs_self_component.super.base_version,
|
ret = mca_base_component_var_register (&mca_crs_self_component.super.base_version,
|
||||||
"prefix",
|
"prefix",
|
||||||
"Prefix for user defined callback functions",
|
"Prefix for user defined callback functions",
|
||||||
MCA_BASE_VAR_TYPE_STRING, NULL, MCA_BASE_VAR_FLAG_SETTABLE,
|
MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||||||
OPAL_INFO_LVL_9, MPI_BASE_VAR_SCOPE_LOCAL,
|
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
|
||||||
&mca_crs_self_component.prefix);
|
&mca_crs_self_component.prefix);
|
||||||
if (0 > ret) {
|
if (0 > ret) {
|
||||||
return ret;
|
return ret;
|
||||||
@ -126,8 +126,8 @@ static int crs_self_register (void)
|
|||||||
ret = mca_base_component_var_register (&mca_crs_self_component.super.base_version,
|
ret = mca_base_component_var_register (&mca_crs_self_component.super.base_version,
|
||||||
"do_restart",
|
"do_restart",
|
||||||
"Start execution by calling restart callback",
|
"Start execution by calling restart callback",
|
||||||
MCA_BASE_VAR_TYPE_BOOL, NULL, MCA_BASE_VAR_FLAG_SETTABLE,
|
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||||||
OPAL_INFO_LVL_9, MPI_BASE_VAR_SCOPE_LOCAL,
|
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
|
||||||
&mca_crs_self_component.do_restart);
|
&mca_crs_self_component.do_restart);
|
||||||
return (0 > ret) ? ret : OPAL_SUCCESS;
|
return (0 > ret) ? ret : OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -247,7 +247,7 @@ main(int argc, char *argv[])
|
|||||||
* restart on this node because it doesn't have the proper checkpointer
|
* restart on this node because it doesn't have the proper checkpointer
|
||||||
* available.
|
* available.
|
||||||
*/
|
*/
|
||||||
if( OPAL_SUCCESS != (ret = opal_crs_base_open()) ) {
|
if( OPAL_SUCCESS != (ret = opal_crs_base_open(MCA_BASE_OPEN_DEFAULT)) ) {
|
||||||
opal_show_help("help-opal-restart.txt", "comp_select_failure", true,
|
opal_show_help("help-opal-restart.txt", "comp_select_failure", true,
|
||||||
"crs", ret);
|
"crs", ret);
|
||||||
exit_status = ret;
|
exit_status = ret;
|
||||||
|
@ -366,7 +366,7 @@ void orte_errmgr_base_migrate_state_notify(int state)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void orte_errmgr_base_proc_state_notify(orte_proc_state_t state, orte_process_name_t *proc)
|
int orte_errmgr_base_proc_state_notify(orte_proc_state_t state, orte_process_name_t *proc)
|
||||||
{
|
{
|
||||||
if (NULL != proc) {
|
if (NULL != proc) {
|
||||||
switch(state) {
|
switch(state) {
|
||||||
|
@ -145,7 +145,7 @@ int orte_ess_base_tool_setup(void)
|
|||||||
/*
|
/*
|
||||||
* Setup the SnapC
|
* Setup the SnapC
|
||||||
*/
|
*/
|
||||||
if (ORTE_SUCCESS != (ret = orte_snapc_base_open())) {
|
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_snapc_base_framework, 0))) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
error = "orte_snapc_base_open";
|
error = "orte_snapc_base_open";
|
||||||
goto error;
|
goto error;
|
||||||
@ -175,7 +175,7 @@ int orte_ess_base_tool_finalize(void)
|
|||||||
orte_wait_finalize();
|
orte_wait_finalize();
|
||||||
|
|
||||||
#if OPAL_ENABLE_FT_CR == 1
|
#if OPAL_ENABLE_FT_CR == 1
|
||||||
orte_snapc_base_close();
|
mca_base_framework_close(&orte_snapc_base_framework);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* if I am a tool, then all I will have done is
|
/* if I am a tool, then all I will have done is
|
||||||
|
2
orte/mca/ess/env/ess_env_module.c
поставляемый
2
orte/mca/ess/env/ess_env_module.c
поставляемый
@ -422,7 +422,7 @@ static int rte_ft_event(int state)
|
|||||||
exit_status = ret;
|
exit_status = ret;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
if (ORTE_SUCCESS != (ret = orte_db.remove(NULL, NULL))) {
|
if (OPAL_SUCCESS != (ret = opal_db.remove(NULL, NULL))) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
exit_status = ret;
|
exit_status = ret;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
|
@ -400,10 +400,13 @@ void orte_plm_base_complete_setup(int fd, short args, void *cbdata)
|
|||||||
/*
|
/*
|
||||||
* Notify the Global SnapC component regarding new job (even if it was restarted)
|
* Notify the Global SnapC component regarding new job (even if it was restarted)
|
||||||
*/
|
*/
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
if( ORTE_SUCCESS != (rc = orte_snapc.setup_job(jdata->jobid) ) ) {
|
if( ORTE_SUCCESS != (rc = orte_snapc.setup_job(jdata->jobid) ) ) {
|
||||||
/* Silent Failure :/ JJH */
|
/* Silent Failure :/ JJH */
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
|
@ -185,8 +185,7 @@ orte_rml_oob_ft_event(int state) {
|
|||||||
;
|
;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( ORTE_SUCCESS !=
|
if( ORTE_SUCCESS != (ret = orte_rml_oob_ft_event(state)) ) {
|
||||||
(ret = orte_oob.ft_event(state)) ) {
|
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
exit_status = ret;
|
exit_status = ret;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
@ -208,7 +207,7 @@ orte_rml_oob_ft_event(int state) {
|
|||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( ORTE_SUCCESS != (ret = mca_oob_base_select())) {
|
if( ORTE_SUCCESS != (ret = orte_oob_base_select())) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
exit_status = ret;
|
exit_status = ret;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
|
@ -99,6 +99,12 @@ static int current_cr_state = OPAL_CRS_NONE;
|
|||||||
static orte_sstore_base_handle_t current_ss_handle = ORTE_SSTORE_HANDLE_INVALID, last_ss_handle = ORTE_SSTORE_HANDLE_INVALID;
|
static orte_sstore_base_handle_t current_ss_handle = ORTE_SSTORE_HANDLE_INVALID, last_ss_handle = ORTE_SSTORE_HANDLE_INVALID;
|
||||||
static opal_crs_base_ckpt_options_t *current_options = NULL;
|
static opal_crs_base_ckpt_options_t *current_options = NULL;
|
||||||
|
|
||||||
|
static void snapc_full_app_callback_recv(int status,
|
||||||
|
orte_process_name_t* sender,
|
||||||
|
opal_buffer_t* buffer,
|
||||||
|
orte_rml_tag_t tag,
|
||||||
|
void* cbdata);
|
||||||
|
|
||||||
/************************
|
/************************
|
||||||
* Function Definitions
|
* Function Definitions
|
||||||
************************/
|
************************/
|
||||||
@ -1673,3 +1679,12 @@ int app_coord_request_op(orte_snapc_base_request_op_t *datum)
|
|||||||
|
|
||||||
return exit_status;
|
return exit_status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* dummy implementation of a callback function to get it to compile again */
|
||||||
|
static void snapc_full_app_callback_recv(int status,
|
||||||
|
orte_process_name_t* sender,
|
||||||
|
opal_buffer_t* buffer,
|
||||||
|
orte_rml_tag_t tag,
|
||||||
|
void* cbdata)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
@ -32,6 +32,7 @@ const char *orte_snapc_full_component_version_string =
|
|||||||
*/
|
*/
|
||||||
static int snapc_full_open(void);
|
static int snapc_full_open(void);
|
||||||
static int snapc_full_close(void);
|
static int snapc_full_close(void);
|
||||||
|
static int snapc_full_register(void);
|
||||||
|
|
||||||
bool orte_snapc_full_skip_app = false;
|
bool orte_snapc_full_skip_app = false;
|
||||||
bool orte_snapc_full_timing_enabled = false;
|
bool orte_snapc_full_timing_enabled = false;
|
||||||
@ -74,7 +75,7 @@ orte_snapc_full_component_t mca_snapc_full_component = {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static int snaps_full_register (void)
|
static int snapc_full_register (void)
|
||||||
{
|
{
|
||||||
mca_base_component_t *component = &mca_snapc_full_component.super.base_version;
|
mca_base_component_t *component = &mca_snapc_full_component.super.base_version;
|
||||||
/*
|
/*
|
||||||
@ -129,7 +130,7 @@ static int snaps_full_register (void)
|
|||||||
OPAL_INFO_LVL_9,
|
OPAL_INFO_LVL_9,
|
||||||
MCA_BASE_VAR_SCOPE_LOCAL,
|
MCA_BASE_VAR_SCOPE_LOCAL,
|
||||||
&orte_snapc_full_progress_meter);
|
&orte_snapc_full_progress_meter);
|
||||||
orte_snapc_full_progress_meter = (value % 101);
|
orte_snapc_full_progress_meter %= 101;
|
||||||
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -148,7 +149,7 @@ static int snapc_full_open(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* recheck the progress meter (it may have changed between register and open) */
|
/* recheck the progress meter (it may have changed between register and open) */
|
||||||
orte_snapc_full_progress_meter = (value % 101);
|
orte_snapc_full_progress_meter %= 101;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Debug Output
|
* Debug Output
|
||||||
|
@ -513,7 +513,7 @@ int global_coord_end_ckpt(orte_snapc_base_quiesce_t *datum)
|
|||||||
if( currently_migrating ) {
|
if( currently_migrating ) {
|
||||||
OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
|
OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
|
||||||
"Global) End Ckpt: Flush the modex cached data\n"));
|
"Global) End Ckpt: Flush the modex cached data\n"));
|
||||||
if (ORTE_SUCCESS != (ret = orte_db.remove(NULL, NULL))) {
|
if (OPAL_SUCCESS != (ret = opal_db.remove(NULL, NULL))) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
exit_status = ret;
|
exit_status = ret;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
@ -1138,7 +1138,7 @@ void snapc_full_global_orted_recv(int status,
|
|||||||
OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
|
OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
|
||||||
"Global) Command: Job State Update (quick)"));
|
"Global) Command: Job State Update (quick)"));
|
||||||
|
|
||||||
snapc_full_process_job_update_cmd(&sender, buffer, true);
|
snapc_full_process_job_update_cmd(sender, buffer, true);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ORTE_SNAPC_FULL_UPDATE_JOB_STATE_CMD:
|
case ORTE_SNAPC_FULL_UPDATE_JOB_STATE_CMD:
|
||||||
@ -1974,7 +1974,7 @@ static void snapc_full_process_job_update_cmd(orte_process_name_t* sender,
|
|||||||
|
|
||||||
static int snapc_full_establish_snapshot_dir(bool empty_metadata)
|
static int snapc_full_establish_snapshot_dir(bool empty_metadata)
|
||||||
{
|
{
|
||||||
const char **value = NULL;
|
char **value = NULL;
|
||||||
int idx = 0;
|
int idx = 0;
|
||||||
|
|
||||||
/*********************
|
/*********************
|
||||||
@ -1998,7 +1998,7 @@ static int snapc_full_establish_snapshot_dir(bool empty_metadata)
|
|||||||
opal_show_help("help-orte-restart.txt", "amca_param_not_found", true);
|
opal_show_help("help-orte-restart.txt", "amca_param_not_found", true);
|
||||||
}
|
}
|
||||||
if( 0 < idx ) {
|
if( 0 < idx ) {
|
||||||
mca_base_var_get_value (idx, &value, sizeof (value), NULL, NULL);
|
mca_base_var_get_value (idx, &value, NULL, NULL);
|
||||||
|
|
||||||
if (*value) {
|
if (*value) {
|
||||||
orte_sstore.set_attr(global_snapshot.ss_handle,
|
orte_sstore.set_attr(global_snapshot.ss_handle,
|
||||||
|
@ -1776,7 +1776,7 @@ static void snapc_full_local_comm_read_event(int fd, short flags, void *arg)
|
|||||||
if( currently_migrating && !flushed_modex ) {
|
if( currently_migrating && !flushed_modex ) {
|
||||||
OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
|
OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
|
||||||
"Local) Read Event: Flush the modex cached data\n"));
|
"Local) Read Event: Flush the modex cached data\n"));
|
||||||
if (ORTE_SUCCESS != (ret = orte_db.remove(NULL, NULL))) {
|
if (OPAL_SUCCESS != (ret = opal_db.remove(NULL, NULL))) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
exit_status = ret;
|
exit_status = ret;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
|
@ -235,3 +235,8 @@ static int sstore_stage_close(void)
|
|||||||
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int sstore_stage_register(void)
|
||||||
|
{
|
||||||
|
return ORTE_SUCCESS;
|
||||||
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user