1
1

Merge pull request #348 from adrianreber/topic/orte_cr_continue_like_restart

Topic/orte cr continue like restart
Этот коммит содержится в:
adrianreber 2015-03-12 14:54:02 +01:00
родитель b4d6420797 c08e234af7
Коммит 714d9aa67e
18 изменённых файлов: 119 добавлений и 292 удалений

Просмотреть файл

@ -27,6 +27,7 @@
#include "opal/runtime/opal_progress.h"
#include "opal/mca/btl/base/base.h"
#include "opal/mca/pmix/pmix.h"
#include "ompi/runtime/ompi_cr.h"
#include "ompi/mca/bml/base/base.h"
@ -48,7 +49,6 @@ int mca_bml_r2_ft_event(int state)
int loc_state;
int param_type = -1;
const char **btl_list;
ompi_rte_collective_t coll;
if(OPAL_CRS_CHECKPOINT == state) {
/* Do nothing for now */
@ -57,7 +57,7 @@ int mca_bml_r2_ft_event(int state)
first_continue_pass = !first_continue_pass;
/* Since nothing in Checkpoint, we are fine here (unless required by BTL) */
if( orte_cr_continue_like_restart && !first_continue_pass) {
if (opal_cr_continue_like_restart && !first_continue_pass) {
procs = ompi_proc_all(&num_procs);
if(NULL == procs) {
return OMPI_ERR_OUT_OF_RESOURCE;
@ -139,7 +139,7 @@ int mca_bml_r2_ft_event(int state)
}
else if(OPAL_CRS_CONTINUE == state) {
/* Matches OPAL_CRS_RESTART_PRE */
if( orte_cr_continue_like_restart && first_continue_pass) {
if (opal_cr_continue_like_restart && first_continue_pass) {
if( OMPI_SUCCESS != (ret = mca_bml_r2_finalize()) ) {
opal_output(0, "bml:r2: ft_event(Restart): Failed to finalize BML framework\n");
return ret;
@ -150,20 +150,12 @@ int mca_bml_r2_ft_event(int state)
}
}
/* Matches OPAL_CRS_RESTART */
else if( orte_cr_continue_like_restart && !first_continue_pass ) {
else if (opal_cr_continue_like_restart && !first_continue_pass) {
/*
* Barrier to make all processes have been successfully restarted before
* we try to remove some restart only files.
*/
OBJ_CONSTRUCT(&coll, ompi_rte_collective_t);
coll.id = ompi_process_info.peer_init_barrier;
if (OMPI_SUCCESS != (ret = ompi_rte_barrier(&coll))) {
opal_output(0, "bml:r2: ft_event(Restart): Failed in ompi_rte_barrier (%d)", ret);
return ret;
}
while (coll.active) {
opal_progress();
}
opal_pmix.fence(NULL, 0);
/*
* Re-open the BTL framework to get the full list of components.
@ -233,15 +225,7 @@ int mca_bml_r2_ft_event(int state)
* Barrier to make all processes have been successfully restarted before
* we try to remove some restart only files.
*/
OBJ_CONSTRUCT(&coll, ompi_rte_collective_t);
coll.id = ompi_process_info.peer_init_barrier;
if (OMPI_SUCCESS != (ret = ompi_rte_barrier(&coll))) {
opal_output(0, "bml:r2: ft_event(Restart): Failed in ompi_rte_barrier (%d)", ret);
return ret;
}
while (coll.active) {
opal_progress();
}
opal_pmix.fence(NULL, 0);
/*
* Re-open the BTL framework to get the full list of components.

Просмотреть файл

@ -33,6 +33,7 @@
#include "opal/util/opal_environ.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/mca/pmix/pmix.h"
#include "ompi/request/request.h"
#include "ompi/mca/rte/rte.h"
@ -1445,8 +1446,8 @@ ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_add_procs(
for( i = 0; i < nprocs; ++i) {
HOKE_PEER_REF_ALLOC(new_peer_ref);
new_peer_ref->proc_name.jobid = procs[i]->proc_name.jobid;
new_peer_ref->proc_name.vpid = procs[i]->proc_name.vpid;
new_peer_ref->proc_name.jobid = OMPI_CAST_RTE_NAME(&procs[i]->super.proc_name)->jobid;
new_peer_ref->proc_name.vpid = OMPI_CAST_RTE_NAME(&procs[i]->super.proc_name)->vpid;
opal_list_append(&ompi_crcp_bkmrk_pml_peer_refs, &(new_peer_ref->super));
}
@ -1474,11 +1475,11 @@ ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_del_procs(
"crcp:bkmrk: pml_del_procs()"));
for( i = 0; i < nprocs; ++i) {
item = (opal_list_item_t*)find_peer(procs[i]->proc_name);
item = (opal_list_item_t*)find_peer(*(ompi_process_name_t*)&procs[i]->super.proc_name);
if(NULL == item) {
opal_output(mca_crcp_bkmrk_component.super.output_handle,
"crcp:bkmrk: del_procs: Unable to find peer %s\n",
OMPI_NAME_PRINT(&(procs[i]->proc_name)));
OMPI_NAME_PRINT(&procs[i]->super.proc_name));
exit_status = OMPI_ERROR;
goto DONE;
}
@ -3006,12 +3007,9 @@ ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_ft_event(
static bool first_continue_pass = false;
opal_list_item_t* item = NULL;
int exit_status = OMPI_SUCCESS;
ompi_rte_collective_t coll;
int ret;
ft_event_state = state;
OBJ_CONSTRUCT(&coll, ompi_rte_collective_t);
coll.id = ompi_process_info.peer_init_barrier;
if( step_to_return_to == 1 ) {
goto STEP_1;
@ -3030,8 +3028,7 @@ ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_ft_event(
if( opal_cr_timing_barrier_enabled ) {
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCPBR0);
ompi_rte_barrier(&coll);
OMPI_WAIT_FOR_COMPLETION(coll.active);
opal_pmix.fence(NULL, 0);
}
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCP0);
@ -3076,7 +3073,7 @@ ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_ft_event(
first_continue_pass = !first_continue_pass;
/* Only finalize the Protocol after the PML has been rebuilt */
if( orte_cr_continue_like_restart && first_continue_pass ) {
if (opal_cr_continue_like_restart && first_continue_pass) {
goto DONE;
}
@ -3099,8 +3096,7 @@ ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_ft_event(
if( opal_cr_timing_barrier_enabled ) {
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_COREBR1);
ompi_rte_barrier(&coll);
OMPI_WAIT_FOR_COMPLETION(coll.active);
opal_pmix.fence(NULL, 0);
}
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CORE2);
}
@ -3156,7 +3152,6 @@ ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_ft_event(
}
DONE:
OBJ_DESTRUCT(&coll);
step_to_return_to = 0;
ft_event_state = OPAL_CRS_RUNNING;
@ -3919,7 +3914,7 @@ static int drain_message_find_any(size_t count, int tag, int peer,
if( OPAL_EQUAL != ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL,
&(cur_peer_ref->proc_name),
&(comm->c_local_group->grp_proc_pointers[peer]->proc_name)) ) {
OMPI_CAST_RTE_NAME(&comm->c_local_group->grp_proc_pointers[peer]->super.proc_name))) {
continue;
}
}
@ -4162,7 +4157,7 @@ static ompi_crcp_bkmrk_pml_peer_ref_t * find_peer(ompi_process_name_t proc)
static int find_peer_in_comm(struct ompi_communicator_t* comm, int proc_idx,
ompi_crcp_bkmrk_pml_peer_ref_t **peer_ref)
{
*peer_ref = find_peer(comm->c_remote_group->grp_proc_pointers[proc_idx]->proc_name);
*peer_ref = find_peer(*(ompi_process_name_t *)&comm->c_remote_group->grp_proc_pointers[proc_idx]->super.proc_name);
if( NULL == *peer_ref) {
opal_output(mca_crcp_bkmrk_component.super.output_handle,
@ -6212,19 +6207,15 @@ static void clear_timers(void) {
static void display_all_timers(int state) {
bool report_ready = false;
double barrier_start, barrier_stop;
ompi_rte_collective_t coll;
int i;
OBJ_CONSTRUCT(&coll, ompi_rte_collective_t);
coll.id = ompi_process_info.peer_init_barrier;
if( 0 != OMPI_PROC_MY_NAME->vpid ) {
if( 2 > timing_enabled ) {
goto done;
return;
}
else if( 2 == timing_enabled ) {
ompi_rte_barrier(&coll);
OMPI_WAIT_FOR_COMPLETION(coll.active);
goto done;
opal_pmix.fence(NULL, 0);
return;
}
}
@ -6234,7 +6225,7 @@ static void display_all_timers(int state) {
}
}
if( !report_ready ) {
goto done;
return;
}
opal_output(0, "crcp:bkmrk: timing(%20s): ******************** Begin: [State = %12s]\n", "Summary", opal_crs_base_state_str(state));
@ -6244,8 +6235,7 @@ static void display_all_timers(int state) {
if( timing_enabled >= 2) {
barrier_start = get_time();
ompi_rte_barrier(&coll);
OMPI_WAIT_FOR_COMPLETION(coll.active);
opal_pmix.fence(NULL, 0);
barrier_stop = get_time();
opal_output(0,
"crcp:bkmrk: timing(%20s): %20s = %10.2f s\n",
@ -6256,8 +6246,6 @@ static void display_all_timers(int state) {
opal_output(0, "crcp:bkmrk: timing(%20s): ******************** End: [State = %12s]\n", "Summary", opal_crs_base_state_str(state));
done:
OBJ_DESTRUCT(&coll);
}
static void display_indv_timer(int idx, int proc, int msgs) {

Просмотреть файл

@ -33,6 +33,7 @@
#include "opal/util/show_help.h"
#include "opal/mca/btl/btl.h"
#include "opal/mca/btl/base/base.h"
#include "opal/mca/pmix/pmix.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/pml/base/base.h"
@ -663,15 +664,11 @@ int mca_pml_bfo_ft_event( int state )
ompi_proc_t** procs = NULL;
size_t num_procs;
int ret, p;
ompi_rte_collective_t *coll, *modex;
coll = OBJ_NEW(ompi_rte_collective_t);
coll->id = ompi_process_info.peer_init_barrier;
if(OPAL_CRS_CHECKPOINT == state) {
if( opal_cr_timing_barrier_enabled ) {
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCPBR1);
ompi_rte_barrier(coll);
OMPI_WAIT_FOR_COMPLETION(coll->active);
opal_pmix.fence(NULL, 0);
}
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P0);
@ -682,20 +679,18 @@ int mca_pml_bfo_ft_event( int state )
if( !first_continue_pass ) {
if( opal_cr_timing_barrier_enabled ) {
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_COREBR0);
ompi_rte_barrier(coll);
OMPI_WAIT_FOR_COMPLETION(coll->active);
opal_pmix.fence(NULL, 0);
}
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P2);
}
if( orte_cr_continue_like_restart && !first_continue_pass ) {
if (opal_cr_continue_like_restart && !first_continue_pass) {
/*
* Get a list of processes
*/
procs = ompi_proc_all(&num_procs);
if(NULL == procs) {
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto clean;
return OMPI_ERR_OUT_OF_RESOURCE;
}
/*
@ -713,7 +708,7 @@ int mca_pml_bfo_ft_event( int state )
OBJ_RELEASE(procs[p]);
}
free (procs);
goto clean;
return ret;
}
}
}
@ -726,8 +721,7 @@ int mca_pml_bfo_ft_event( int state )
*/
procs = ompi_proc_all(&num_procs);
if(NULL == procs) {
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto clean;
return OMPI_ERR_OUT_OF_RESOURCE;
}
/*
@ -753,7 +747,7 @@ int mca_pml_bfo_ft_event( int state )
OBJ_RELEASE(procs[p]);
}
free (procs);
goto clean;
return ret;
}
}
else if(OPAL_CRS_TERM == state ) {
@ -785,28 +779,17 @@ int mca_pml_bfo_ft_event( int state )
if( !first_continue_pass ) {
if( opal_cr_timing_barrier_enabled ) {
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR1);
ompi_rte_barrier(coll);
OMPI_WAIT_FOR_COMPLETION(coll->active);
opal_pmix.fence(NULL, 0);
}
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P3);
}
if( orte_cr_continue_like_restart && !first_continue_pass ) {
if (opal_cr_continue_like_restart && !first_continue_pass) {
/*
* Exchange the modex information once again.
* BTLs will have republished their modex information.
*/
modex = OBJ_NEW(ompi_rte_collective_t);
modex->id = ompi_process_info.peer_modex;
if (OMPI_SUCCESS != (ret = orte_grpcomm.modex(modex))) {
opal_output(0,
"pml:bfo: ft_event(Restart): Failed orte_grpcomm.modex() = %d",
ret);
OBJ_RELEASE(modex);
goto clean;
}
OMPI_WAIT_FOR_COMPLETION(modex->active);
OBJ_RELEASE(modex);
opal_pmix.fence(NULL, 0);
/*
* Startup the PML stack now that the modex is running again
@ -814,15 +797,11 @@ int mca_pml_bfo_ft_event( int state )
*/
if( OMPI_SUCCESS != (ret = mca_pml_bfo_add_procs(procs, num_procs) ) ) {
opal_output(0, "pml:bfo: ft_event(Restart): Failed in add_procs (%d)", ret);
goto clean;
return ret;
}
/* Is this barrier necessary ? JJH */
if (OMPI_SUCCESS != (ret = ompi_rte_barrier(coll))) {
opal_output(0, "pml:bfo: ft_event(Restart): Failed in ompi_rte_barrier (%d)", ret);
return ret;
}
OMPI_WAIT_FOR_COMPLETION(coll->active);
opal_pmix.fence(NULL, 0);
if( NULL != procs ) {
for(p = 0; p < (int)num_procs; ++p) {
@ -835,8 +814,7 @@ int mca_pml_bfo_ft_event( int state )
if( !first_continue_pass ) {
if( opal_cr_timing_barrier_enabled ) {
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR2);
ompi_rte_barrier(coll);
OMPI_WAIT_FOR_COMPLETION(coll->active);
opal_pmix.fence(NULL, 0);
}
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCP1);
}
@ -849,17 +827,7 @@ int mca_pml_bfo_ft_event( int state )
* Exchange the modex information once again.
* BTLs will have republished their modex information.
*/
modex = OBJ_NEW(ompi_rte_collective_t);
modex->id = ompi_process_info.peer_modex;
if (OMPI_SUCCESS != (ret = orte_grpcomm.modex(NULL))) {
opal_output(0,
"pml:bfo: ft_event(Restart): Failed orte_grpcomm.modex() = %d",
ret);
OBJ_RELEASE(modex);
goto clean;
}
OMPI_WAIT_FOR_COMPLETION(modex->active);
OBJ_RELEASE(modex);
opal_pmix.fence(NULL, 0);
/*
* Startup the PML stack now that the modex is running again
@ -867,15 +835,11 @@ int mca_pml_bfo_ft_event( int state )
*/
if( OMPI_SUCCESS != (ret = mca_pml_bfo_add_procs(procs, num_procs) ) ) {
opal_output(0, "pml:bfo: ft_event(Restart): Failed in add_procs (%d)", ret);
goto clean;
return ret;
}
/* Is this barrier necessary ? JJH */
if (OMPI_SUCCESS != (ret = ompi_rte_barrier(coll))) {
opal_output(0, "pml:bfo: ft_event(Restart): Failed in ompi_rte_barrier (%d)", ret);
goto clean;
}
OMPI_WAIT_FOR_COMPLETION(coll->active);
opal_pmix.fence(NULL, 0);
if( NULL != procs ) {
for(p = 0; p < (int)num_procs; ++p) {
@ -892,11 +856,7 @@ int mca_pml_bfo_ft_event( int state )
;
}
ret = OMPI_SUCCESS;
clean:
OBJ_RELEASE(coll);
return ret;
return OMPI_SUCCESS;
}
#endif /* OPAL_ENABLE_FT_CR */

Просмотреть файл

@ -40,6 +40,7 @@
#include "ompi/mca/pml/base/base.h"
#include "ompi/mca/pml/base/base.h"
#include "ompi/mca/bml/base/base.h"
#include "opal/mca/pmix/pmix.h"
#include "ompi/runtime/ompi_cr.h"
#include "pml_ob1.h"
@ -792,15 +793,11 @@ int mca_pml_ob1_ft_event( int state )
ompi_proc_t** procs = NULL;
size_t num_procs;
int ret, p;
ompi_rte_collective_t *coll, *modex;
coll = OBJ_NEW(ompi_rte_collective_t);
coll->id = ompi_process_info.peer_init_barrier;
if(OPAL_CRS_CHECKPOINT == state) {
if( opal_cr_timing_barrier_enabled ) {
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCPBR1);
ompi_rte_barrier(coll);
OMPI_WAIT_FOR_COMPLETION(coll->active);
opal_pmix.fence(NULL, 0);
}
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P0);
@ -811,20 +808,18 @@ int mca_pml_ob1_ft_event( int state )
if( !first_continue_pass ) {
if( opal_cr_timing_barrier_enabled ) {
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_COREBR0);
ompi_rte_barrier(coll);
OMPI_WAIT_FOR_COMPLETION(coll->active);
opal_pmix.fence(NULL, 0);
}
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P2);
}
if( orte_cr_continue_like_restart && !first_continue_pass ) {
if (opal_cr_continue_like_restart && !first_continue_pass) {
/*
* Get a list of processes
*/
procs = ompi_proc_all(&num_procs);
if(NULL == procs) {
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto clean;
return OMPI_ERR_OUT_OF_RESOURCE;
}
/*
@ -842,7 +837,7 @@ int mca_pml_ob1_ft_event( int state )
OBJ_RELEASE(procs[p]);
}
free (procs);
goto clean;
return ret;
}
}
}
@ -855,8 +850,7 @@ int mca_pml_ob1_ft_event( int state )
*/
procs = ompi_proc_all(&num_procs);
if(NULL == procs) {
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto clean;
return OMPI_ERR_OUT_OF_RESOURCE;
}
/*
@ -882,7 +876,7 @@ int mca_pml_ob1_ft_event( int state )
OBJ_RELEASE(procs[p]);
}
free (procs);
goto clean;
return ret;
}
}
else if(OPAL_CRS_TERM == state ) {
@ -914,28 +908,13 @@ int mca_pml_ob1_ft_event( int state )
if( !first_continue_pass ) {
if( opal_cr_timing_barrier_enabled ) {
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR1);
ompi_rte_barrier(coll);
OMPI_WAIT_FOR_COMPLETION(coll->active);
opal_pmix.fence(NULL, 0);
}
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P3);
}
if( orte_cr_continue_like_restart && !first_continue_pass ) {
/*
* Exchange the modex information once again.
* BTLs will have republished their modex information.
*/
modex = OBJ_NEW(ompi_rte_collective_t);
modex->id = ompi_process_info.peer_modex;
if (OMPI_SUCCESS != (ret = orte_grpcomm.modex(modex))) {
opal_output(0,
"pml:ob1: ft_event(Restart): Failed orte_grpcomm.modex() = %d",
ret);
OBJ_RELEASE(modex);
goto clean;
}
OMPI_WAIT_FOR_COMPLETION(modex->active);
OBJ_RELEASE(modex);
if (opal_cr_continue_like_restart && !first_continue_pass) {
opal_pmix.fence(NULL, 0);
/*
* Startup the PML stack now that the modex is running again
@ -943,15 +922,11 @@ int mca_pml_ob1_ft_event( int state )
*/
if( OMPI_SUCCESS != (ret = mca_pml_ob1_add_procs(procs, num_procs) ) ) {
opal_output(0, "pml:ob1: ft_event(Restart): Failed in add_procs (%d)", ret);
goto clean;
return ret;
}
/* Is this barrier necessary ? JJH */
if (OMPI_SUCCESS != (ret = ompi_rte_barrier(coll))) {
opal_output(0, "pml:ob1: ft_event(Restart): Failed in ompi_rte_barrier (%d)", ret);
goto clean;
}
OMPI_WAIT_FOR_COMPLETION(coll->active);
opal_pmix.fence(NULL, 0);
if( NULL != procs ) {
for(p = 0; p < (int)num_procs; ++p) {
@ -964,8 +939,7 @@ int mca_pml_ob1_ft_event( int state )
if( !first_continue_pass ) {
if( opal_cr_timing_barrier_enabled ) {
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR2);
ompi_rte_barrier(coll);
OMPI_WAIT_FOR_COMPLETION(coll->active);
opal_pmix.fence(NULL, 0);
}
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCP1);
}
@ -978,17 +952,7 @@ int mca_pml_ob1_ft_event( int state )
* Exchange the modex information once again.
* BTLs will have republished their modex information.
*/
modex = OBJ_NEW(ompi_rte_collective_t);
modex->id = ompi_process_info.peer_modex;
if (OMPI_SUCCESS != (ret = orte_grpcomm.modex(modex))) {
opal_output(0,
"pml:ob1: ft_event(Restart): Failed orte_grpcomm.modex() = %d",
ret);
OBJ_RELEASE(modex);
goto clean;
}
OMPI_WAIT_FOR_COMPLETION(modex->active);
OBJ_RELEASE(modex);
opal_pmix.fence(NULL, 0);
/*
* Startup the PML stack now that the modex is running again
@ -996,15 +960,11 @@ int mca_pml_ob1_ft_event( int state )
*/
if( OMPI_SUCCESS != (ret = mca_pml_ob1_add_procs(procs, num_procs) ) ) {
opal_output(0, "pml:ob1: ft_event(Restart): Failed in add_procs (%d)", ret);
goto clean;
return ret;
}
/* Is this barrier necessary ? JJH */
if (OMPI_SUCCESS != (ret = ompi_rte_barrier(coll))) {
opal_output(0, "pml:ob1: ft_event(Restart): Failed in ompi_rte_barrier (%d)", ret);
goto clean;
}
OMPI_WAIT_FOR_COMPLETION(coll->active);
opal_pmix.fence(NULL, 0);
if( NULL != procs ) {
for(p = 0; p < (int)num_procs; ++p) {
@ -1021,11 +981,7 @@ int mca_pml_ob1_ft_event( int state )
;
}
ret = OMPI_SUCCESS;
clean:
OBJ_RELEASE(coll);
return ret;
return OMPI_SUCCESS;
}
#endif /* OPAL_ENABLE_FT_CR */

Просмотреть файл

@ -410,7 +410,7 @@ static int ompi_cr_coord_pre_continue(void) {
opal_output_verbose(10, ompi_cr_output,
"ompi_cr: coord_pre_continue: ompi_cr_coord_pre_continue()");
if( orte_cr_continue_like_restart ) {
if (opal_cr_continue_like_restart) {
/* Mimic ompi_cr_coord_pre_restart(); */
if( OMPI_SUCCESS != (ret = mca_pml.pml_ft_event(OPAL_CRS_CONTINUE))) {
exit_status = ret;

Просмотреть файл

@ -1743,7 +1743,7 @@ int mca_btl_openib_ft_event(int state) {
if(OPAL_CRS_CHECKPOINT == state) {
/* Continue must reconstruct the routes (including modex), since we
* have to tear down the devices completely. */
orte_cr_continue_like_restart = true;
opal_cr_continue_like_restart = true;
/*
* To keep the node from crashing we need to call ibv_close_device

Просмотреть файл

@ -1281,13 +1281,16 @@ int mca_btl_sm_ft_event(int state) {
* for these old file handles. The restart procedure will make sure
* these files get cleaned up appropriately.
*/
/* Disabled to get FT code compiled again
* TODO: FIXIT soon
orte_sstore.set_attr(orte_sstore_handle_current,
SSTORE_METADATA_LOCAL_TOUCH,
mca_btl_sm_component.sm_seg->shmem_ds.seg_name);
*/
}
}
else if(OPAL_CRS_CONTINUE == state) {
if( orte_cr_continue_like_restart ) {
if (opal_cr_continue_like_restart) {
if( NULL != mca_btl_sm_component.sm_seg ) {
/* Add shared memory file */
opal_crs_base_cleanup_append(mca_btl_sm_component.sm_seg->shmem_ds.seg_name, false);

Просмотреть файл

@ -1251,13 +1251,16 @@ int mca_btl_smcuda_ft_event(int state) {
* for these old file handles. The restart procedure will make sure
* these files get cleaned up appropriately.
*/
/* Disabled to get FT code compiled again
* TODO: FIXIT soon
orte_sstore.set_attr(orte_sstore_handle_current,
SSTORE_METADATA_LOCAL_TOUCH,
mca_btl_smcuda_component.sm_seg->shmem_ds.seg_name);
*/
}
}
else if(OPAL_CRS_CONTINUE == state) {
if( orte_cr_continue_like_restart ) {
if (opal_cr_continue_like_restart) {
if( NULL != mca_btl_smcuda_component.sm_seg ) {
/* Add shared memory file */
opal_crs_base_cleanup_append(mca_btl_smcuda_component.sm_seg->shmem_ds.seg_name, false);

Просмотреть файл

@ -173,12 +173,15 @@ int mca_mpool_sm_ft_event(int state) {
asprintf( &file_name, "%s"OPAL_PATH_SEP"shared_mem_pool.%s",
opal_process_info.job_session_dir,
opal_proc_local_get()->proc_hostname );
/* Disabled to get FT code compiled again
* TODO: FIXIT soon
orte_sstore.set_attr(orte_sstore_handle_current, SSTORE_METADATA_LOCAL_TOUCH, file_name);
*/
free(file_name);
file_name = NULL;
}
else if(OPAL_CRS_CONTINUE == state) {
if(orte_cr_continue_like_restart) {
if (opal_cr_continue_like_restart) {
/* Find the sm module */
self_module = mca_mpool_base_module_lookup("sm");
self_sm_module = (mca_mpool_sm_module_t*) self_module;

Просмотреть файл

@ -131,6 +131,8 @@ int opal_cr_checkpoint_request = OPAL_CR_STATUS_NONE;
static bool opal_cr_debug_sigpipe = false;
bool opal_cr_continue_like_restart = false;
#if OPAL_ENABLE_FT_THREAD == 1
/*****************
* Threading Functions and Variables

Просмотреть файл

@ -91,6 +91,12 @@ typedef enum opal_cr_ckpt_cmd_state_t opal_cr_ckpt_cmd_state_t;
/* The current state of a checkpoint operation */
OPAL_DECLSPEC extern int opal_cr_checkpointing_state;
/*
* If one of the BTLs that shutdown require a full, clean rebuild of the
* point-to-point stack on 'continue' as well as 'restart'.
*/
OPAL_DECLSPEC extern bool opal_cr_continue_like_restart;
#if OPAL_ENABLE_CRDEBUG == 1
/* Whether or not C/R Debugging is enabled for this process */
OPAL_DECLSPEC extern int MPIR_debug_with_checkpoint;

82
orte/mca/ess/env/ess_env_module.c поставляемый
Просмотреть файл

@ -191,10 +191,6 @@ static int rte_ft_event(int state)
{
int ret, exit_status = ORTE_SUCCESS;
orte_proc_type_t svtype;
orte_grpcomm_collective_t coll;
OBJ_CONSTRUCT(&coll, orte_grpcomm_collective_t);
coll.id = orte_process_info.snapc_init_barrier;
/******** Checkpoint Prep ********/
if(OPAL_CRS_CHECKPOINT == state) {
@ -203,8 +199,7 @@ static int rte_ft_event(int state)
*/
if( ORTE_SUCCESS != (ret = orte_snapc.ft_event(OPAL_CRS_CHECKPOINT))) {
ORTE_ERROR_LOG(ret);
exit_status = ret;
goto cleanup;
return ret;
}
/*
@ -212,8 +207,7 @@ static int rte_ft_event(int state)
*/
if( ORTE_SUCCESS != (ret = orte_routed.ft_event(OPAL_CRS_CHECKPOINT))) {
ORTE_ERROR_LOG(ret);
exit_status = ret;
goto cleanup;
return ret;
}
/*
@ -221,8 +215,7 @@ static int rte_ft_event(int state)
*/
if( ORTE_SUCCESS != (ret = orte_rml.ft_event(OPAL_CRS_CHECKPOINT))) {
ORTE_ERROR_LOG(ret);
exit_status = ret;
goto cleanup;
return ret;
}
}
/******** Continue Recovery ********/
@ -236,8 +229,7 @@ static int rte_ft_event(int state)
*/
if( ORTE_SUCCESS != (ret = orte_rml.ft_event(OPAL_CRS_CONTINUE))) {
ORTE_ERROR_LOG(ret);
exit_status = ret;
goto cleanup;
return ret;
}
/*
@ -245,8 +237,7 @@ static int rte_ft_event(int state)
*/
if( ORTE_SUCCESS != (ret = orte_routed.ft_event(OPAL_CRS_CONTINUE))) {
ORTE_ERROR_LOG(ret);
exit_status = ret;
goto cleanup;
return ret;
}
/*
@ -254,23 +245,15 @@ static int rte_ft_event(int state)
*/
if( ORTE_SUCCESS != (ret = orte_snapc.ft_event(OPAL_CRS_CONTINUE))) {
ORTE_ERROR_LOG(ret);
exit_status = ret;
goto cleanup;
return ret;
}
if( orte_cr_continue_like_restart ) {
if (opal_cr_continue_like_restart) {
/*
* Barrier to make all processes have been successfully restarted before
* we try to remove some restart only files.
*/
if (ORTE_SUCCESS != (ret = orte_grpcomm.barrier(&coll))) {
opal_output(0, "ess:env: ft_event(%2d): Failed in orte_grpcomm.barrier (%d)",
state, ret);
exit_status = ret;
goto cleanup;
}
coll.active = true;
ORTE_WAIT_FOR_COMPLETION(coll.active);
opal_pmix.fence(NULL, 0);
if( orte_cr_flush_restart_files ) {
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output,
@ -291,11 +274,6 @@ static int rte_ft_event(int state)
* This should follow the ess init() function
*/
/*
* Clear nidmap and jmap
*/
orte_util_nidmap_finalize();
/*
* - Reset Contact information
*/
@ -308,8 +286,7 @@ static int rte_ft_event(int state)
*/
if( ORTE_SUCCESS != (ret = orte_rml.ft_event(OPAL_CRS_RESTART))) {
ORTE_ERROR_LOG(ret);
exit_status = ret;
goto cleanup;
return ret;
}
/*
@ -320,25 +297,14 @@ static int rte_ft_event(int state)
orte_process_info.proc_type = ORTE_PROC_TOOL;
if (ORTE_SUCCESS != (ret = orte_routed.finalize()) ) {
ORTE_ERROR_LOG(ret);
exit_status = ret;
goto cleanup;
return ret;
}
orte_process_info.proc_type = svtype;
if (ORTE_SUCCESS != (ret = orte_routed.initialize()) ) {
ORTE_ERROR_LOG(ret);
exit_status = ret;
goto cleanup;
return ret;
}
/*
* Group Comm - Clean out stale data
*/
orte_grpcomm.finalize();
if (ORTE_SUCCESS != (ret = orte_grpcomm.init())) {
ORTE_ERROR_LOG(ret);
exit_status = ret;
goto cleanup;
}
/* RHC: you can't pass NULL as the identifier - what you'll need to do is
* close all open dstore handles, and then open the ones you need
*/
@ -355,14 +321,12 @@ static int rte_ft_event(int state)
*/
if (ORTE_SUCCESS != (ret = orte_plm.finalize())) {
ORTE_ERROR_LOG(ret);
exit_status = ret;
goto cleanup;
return ret;
}
if (ORTE_SUCCESS != (ret = orte_plm.init())) {
ORTE_ERROR_LOG(ret);
exit_status = ret;
goto cleanup;
return ret;
}
/*
@ -370,8 +334,7 @@ static int rte_ft_event(int state)
*/
if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) {
ORTE_ERROR_LOG(ret);
exit_status = ret;
goto cleanup;
return ret;
}
/*
@ -379,22 +342,14 @@ static int rte_ft_event(int state)
*/
if( ORTE_SUCCESS != (ret = orte_routed.ft_event(OPAL_CRS_RESTART))) {
ORTE_ERROR_LOG(ret);
exit_status = ret;
goto cleanup;
return ret;
}
/*
* Barrier to make all processes have been successfully restarted before
* we try to remove some restart only files.
*/
if (ORTE_SUCCESS != (ret = orte_grpcomm.barrier(&coll))) {
opal_output(0, "ess:env ft_event(%2d): Failed in orte_grpcomm.barrier (%d)",
state, ret);
exit_status = ret;
goto cleanup;
}
coll.active = true;
ORTE_WAIT_FOR_COMPLETION(coll.active);
opal_pmix.fence(NULL, 0);
if( orte_cr_flush_restart_files ) {
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output,
@ -426,8 +381,7 @@ static int rte_ft_event(int state)
*/
if( ORTE_SUCCESS != (ret = orte_snapc.ft_event(OPAL_CRS_RESTART))) {
ORTE_ERROR_LOG(ret);
exit_status = ret;
goto cleanup;
return ret;
}
}
else if (OPAL_CRS_TERM == state ) {
@ -437,8 +391,6 @@ static int rte_ft_event(int state)
/* Error state = Nothing */
}
cleanup:
OBJ_DESTRUCT(&coll);
return exit_status;
}
#endif

Просмотреть файл

@ -50,6 +50,7 @@
#include "opal/mca/crs/base/base.h"
#include "orte/util/name_fns.h"
#include "opal/mca/pmix/pmix.h"
#include "orte/mca/snapc/snapc.h"
#include "orte/mca/snapc/base/base.h"
#include "orte/mca/errmgr/errmgr.h"
@ -110,7 +111,6 @@ int app_coord_init()
orte_snapc_full_cmd_flag_t command = ORTE_SNAPC_FULL_REQUEST_OP_CMD;
orte_snapc_base_request_op_event_t op_event = ORTE_SNAPC_OP_INIT;
opal_buffer_t *buffer = NULL;
orte_grpcomm_collective_t *coll;
OPAL_OUTPUT_VERBOSE((20, mca_snapc_full_component.super.output_handle,
"App) Initalized for Application %s\n",
@ -154,16 +154,7 @@ int app_coord_init()
"app) Startup Barrier..."));
}
coll = OBJ_NEW(orte_grpcomm_collective_t);
coll->id = orte_process_info.snapc_init_barrier;
if( ORTE_SUCCESS != (ret = orte_grpcomm.barrier(coll)) ) {
ORTE_ERROR_LOG(ret);
exit_status = ret;
goto cleanup;
}
coll->active = true;
ORTE_WAIT_FOR_COMPLETION(coll->active);
OBJ_RELEASE(coll);
opal_pmix.fence(NULL, 0);
if( 0 == ORTE_PROC_MY_NAME->vpid ) {
OPAL_OUTPUT_VERBOSE((3, mca_snapc_full_component.super.output_handle,
@ -217,7 +208,6 @@ int app_coord_finalize()
orte_snapc_base_request_op_event_t op_event = ORTE_SNAPC_OP_FIN;
opal_buffer_t *buffer = NULL;
orte_std_cntr_t count;
orte_grpcomm_collective_t *coll;
orte_rml_recv_cb_t *rb = NULL;
/*
@ -230,15 +220,7 @@ int app_coord_finalize()
"app) Shutdown Barrier..."));
}
coll = OBJ_NEW(orte_grpcomm_collective_t);
coll->id = orte_process_info.snapc_init_barrier;
if( ORTE_SUCCESS != (ret = orte_grpcomm.barrier(coll)) ) {
ORTE_ERROR_LOG(ret);
exit_status = ret;
goto cleanup;
}
coll->active = true;
ORTE_WAIT_FOR_COMPLETION(coll->active);
opal_pmix.fence(NULL, 0);
if( 0 == ORTE_PROC_MY_NAME->vpid ) {
OPAL_OUTPUT_VERBOSE((3, mca_snapc_full_component.super.output_handle,
@ -309,13 +291,6 @@ int app_coord_finalize()
"app) Shutdown Barrier: Waiting on barrier...!"));
}
coll->id = orte_process_info.snapc_fini_barrier;
if( ORTE_SUCCESS != (ret = orte_grpcomm.barrier(coll)) ) {
ORTE_ERROR_LOG(ret);
exit_status = ret;
goto cleanup;
}
if( 0 == ORTE_PROC_MY_NAME->vpid ) {
OPAL_OUTPUT_VERBOSE((3, mca_snapc_full_component.super.output_handle,
"app) Shutdown Barrier, Done!"));
@ -332,8 +307,6 @@ int app_coord_finalize()
rb = NULL;
}
OBJ_RELEASE(coll);
/*
* Cleanup named pipes
*/
@ -398,7 +371,7 @@ int snapc_full_app_notify_response(opal_cr_ckpt_cmd_state_t resp)
}
/* Default: use the fast way */
orte_cr_continue_like_restart = false;
opal_cr_continue_like_restart = false;
orte_cr_flush_restart_files = true;
OPAL_OUTPUT_VERBOSE((10, mca_snapc_full_component.super.output_handle,
@ -480,7 +453,7 @@ int snapc_full_app_notify_response(opal_cr_ckpt_cmd_state_t resp)
* otherwise just continue.
*/
if( currently_all_migrating ) {
orte_cr_continue_like_restart = true;
opal_cr_continue_like_restart = true;
orte_cr_flush_restart_files = false;
}
if( !currently_migrating && currently_all_migrating ) {

Просмотреть файл

@ -524,13 +524,6 @@ int global_coord_end_ckpt(orte_snapc_base_quiesce_t *datum)
}
#endif
orte_grpcomm.finalize();
if (ORTE_SUCCESS != (ret = orte_grpcomm.init())) {
ORTE_ERROR_LOG(ret);
exit_status = ret;
goto cleanup;
}
SNAPC_FULL_SET_TIMER(SNAPC_FULL_TIMER_ESTABLISH);
if( ORTE_SUCCESS != (ret = orte_snapc_full_global_set_job_ckpt_info(current_global_jobid,
ORTE_SNAPC_CKPT_STATE_FINISHED_LOCAL,
@ -2106,6 +2099,7 @@ static int orte_snapc_full_global_set_job_ckpt_info( orte_jobid_t jobid,
orte_proc_t *proc = NULL;
opal_list_item_t *item = NULL;
size_t num_procs;
orte_grpcomm_signature_t *sig;
/*
* Update all Local Coordinators (broadcast operation)
@ -2187,7 +2181,12 @@ static int orte_snapc_full_global_set_job_ckpt_info( orte_jobid_t jobid,
free(state_str);
state_str = NULL;
if( ORTE_SUCCESS != (ret = orte_grpcomm.xcast(ORTE_PROC_MY_NAME->jobid, buffer, ORTE_RML_TAG_SNAPC_FULL))) {
/* goes to all daemons */
sig = OBJ_NEW(orte_grpcomm_signature_t);
sig->signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t));
sig->signature[0].jobid = ORTE_PROC_MY_NAME->jobid;
sig->signature[0].vpid = ORTE_VPID_WILDCARD;
if (ORTE_SUCCESS != (ret = orte_grpcomm.xcast(sig, ORTE_RML_TAG_SNAPC_FULL, buffer))) {
ORTE_ERROR_LOG(ret);
exit_status = ret;
goto cleanup;
@ -2204,6 +2203,7 @@ static int orte_snapc_full_global_set_job_ckpt_info( orte_jobid_t jobid,
}
OBJ_RELEASE(buffer);
OBJ_RELEASE(sig);
return exit_status;
}

Просмотреть файл

@ -1763,11 +1763,6 @@ static void snapc_full_local_comm_read_event(int fd, short flags, void *arg)
}
#endif
orte_grpcomm.finalize();
if (ORTE_SUCCESS != (ret = orte_grpcomm.init())) {
ORTE_ERROR_LOG(ret);
goto cleanup;
}
flushed_modex = true;
}

Просмотреть файл

@ -1521,6 +1521,7 @@ static int xcast_remove_all(orte_sstore_stage_global_snapshot_info_t *handle_inf
int ret, exit_status = ORTE_SUCCESS;
opal_buffer_t *loc_buffer = NULL;
orte_sstore_stage_cmd_flag_t command;
orte_grpcomm_signature_t *sig;
handle_info->num_procs_done = 0;
@ -1539,7 +1540,12 @@ static int xcast_remove_all(orte_sstore_stage_global_snapshot_info_t *handle_inf
goto cleanup;
}
if( ORTE_SUCCESS != (ret = orte_grpcomm.xcast(ORTE_PROC_MY_NAME->jobid, loc_buffer, ORTE_RML_TAG_SSTORE_INTERNAL))) {
/* goes to all daemons */
sig = OBJ_NEW(orte_grpcomm_signature_t);
sig->signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t));
sig->signature[0].jobid = ORTE_PROC_MY_NAME->jobid;
sig->signature[0].vpid = ORTE_VPID_WILDCARD;
if (ORTE_SUCCESS != (ret = orte_grpcomm.xcast(sig, ORTE_RML_TAG_SSTORE_INTERNAL, loc_buffer))) {
ORTE_ERROR_LOG(ret);
exit_status = ret;
goto cleanup;
@ -1554,6 +1560,8 @@ static int xcast_remove_all(orte_sstore_stage_global_snapshot_info_t *handle_inf
loc_buffer = NULL;
}
OBJ_RELEASE(sig);
return exit_status;
}

Просмотреть файл

@ -75,7 +75,6 @@ static int orte_cr_coord_post_ckpt(void);
static int orte_cr_coord_post_restart(void);
static int orte_cr_coord_post_continue(void);
bool orte_cr_continue_like_restart = false;
bool orte_cr_flush_restart_files = true;
/*************
@ -137,7 +136,7 @@ int orte_cr_init(void)
opal_cr_reg_coord_callback(orte_cr_coord, &prev_coord_callback);
/* Typically this is not needed. Individual BTLs will set this as needed */
orte_cr_continue_like_restart = false;
opal_cr_continue_like_restart = false;
orte_cr_flush_restart_files = true;
cleanup:

Просмотреть файл

@ -50,11 +50,6 @@ BEGIN_C_DECLS
ORTE_DECLSPEC int orte_cr_entry_point_init(void);
ORTE_DECLSPEC int orte_cr_entry_point_finalize(void);
/*
* If one of the BTLs that shutdown require a full, clean rebuild of the
* point-to-point stack on 'continue' as well as 'restart'.
*/
ORTE_DECLSPEC extern bool orte_cr_continue_like_restart;
ORTE_DECLSPEC extern bool orte_cr_flush_restart_files;
END_C_DECLS