Make sure to check for the NO_CKPT state while waiting. This means that the target was not able to checkpoint [ever | at this time]. So {{{ompi-checkpoint}}} should exit after printing the error message, instead of hanging and waiting.
Will need to be moved to v1.5 and v1.4. v1.4 will require a custom patch, but should apply cleanly to v1.5. CMRs to follow. This commit was SVN r22289.
Этот коммит содержится в:
родитель
e8de64d5a0
Коммит
4357159ac9
@ -287,11 +287,17 @@ main(int argc, char *argv[])
|
||||
if(!orte_checkpoint_globals.nowait) {
|
||||
while( ORTE_SNAPC_CKPT_STATE_FINISHED != orte_checkpoint_globals.ckpt_status &&
|
||||
ORTE_SNAPC_CKPT_STATE_STOPPED != orte_checkpoint_globals.ckpt_status &&
|
||||
ORTE_SNAPC_CKPT_STATE_NO_CKPT != orte_checkpoint_globals.ckpt_status &&
|
||||
ORTE_SNAPC_CKPT_STATE_ERROR != orte_checkpoint_globals.ckpt_status ) {
|
||||
opal_progress();
|
||||
}
|
||||
}
|
||||
|
||||
if( ORTE_SNAPC_CKPT_STATE_NO_CKPT == orte_checkpoint_globals.ckpt_status ) {
|
||||
exit_status = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if( ORTE_SNAPC_CKPT_STATE_ERROR == orte_checkpoint_globals.ckpt_status ) {
|
||||
opal_show_help("help-orte-checkpoint.txt", "ckpt_failure", true,
|
||||
orte_checkpoint_globals.pid, ORTE_ERROR);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user