1
1

Add an API to the errmgr so that apps can register for a callback to warn them of an impending migration - this gives apps a chance to cleanly terminate prior to being migrated for external reasons (e.g., impending failures). The timeout provided indicates to the daemon how long it should wait before proceeding to kill/migrate the process - if the process fails to exit before that time, the daemon will kill it.

This commit was SVN r24412.
Этот коммит содержится в:
Ralph Castain 2011-02-18 02:48:12 +00:00
родитель a0f6e153c7
Коммит b98a2917ff
7 изменённых файлов: 42 добавлений и 12 удалений

Просмотреть файл

@ -56,7 +56,8 @@ orte_errmgr_base_module_t orte_errmgr_app_module = {
update_state,
NULL,
NULL,
NULL
NULL,
orte_errmgr_base_register_migration_warning
};
/************************

Просмотреть файл

@ -250,6 +250,11 @@ int orte_errmgr_base_update_state(orte_jobid_t job,
*/
return ORTE_SUCCESS;
}
void orte_errmgr_base_register_migration_warning(struct timeval *tv)
{
/* stub function - ignore */
return;
}
/********************
* Utility functions

Просмотреть файл

@ -63,7 +63,8 @@ orte_errmgr_base_module_t orte_errmgr = {
orte_errmgr_base_update_state,
NULL, /* predicted_fault */
NULL, /* suggest_map_targets */
NULL /* ft_event */
NULL, /* ft_event */
orte_errmgr_base_register_migration_warning
};
/**

Просмотреть файл

@ -28,6 +28,10 @@
#include "orte/constants.h"
#include "orte/types.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#include "opal/dss/dss_types.h"
#include "orte/mca/plm/plm_types.h"
#include "orte/runtime/orte_globals.h"
@ -71,5 +75,7 @@ ORTE_DECLSPEC int orte_errmgr_base_update_state(orte_jobid_t job,
pid_t pid,
orte_exit_code_t exit_code);
ORTE_DECLSPEC void orte_errmgr_base_register_migration_warning(struct timeval *tv);
END_C_DECLS
#endif

Просмотреть файл

@ -231,27 +231,42 @@ typedef int (*orte_errmgr_base_module_suggest_map_targets_fn_t)(orte_proc_t *pro
*/
typedef int (*orte_errmgr_base_module_ft_event_fn_t)(int state);
/**
* Register a callback to alert caller when ORTE is preparing to
* migrate the process to another location. This provides an
* opportunity for the process to checkpoint any required state,
* and to cleanly shutdown.
*
* @param[in] delay Time to delay before assuming process is stuck
* and cannot exit on its own - and thus, go
* ahead and migrate it
*/
typedef void (*orte_errmgr_base_module_register_migration_warning_fn_t)(struct timeval *tv);
/*
* Module Structure
*/
struct orte_errmgr_base_module_2_3_0_t {
/** Initialization Function */
orte_errmgr_base_module_init_fn_t init;
orte_errmgr_base_module_init_fn_t init;
/** Finalization Function */
orte_errmgr_base_module_finalize_fn_t finalize;
orte_errmgr_base_module_finalize_fn_t finalize;
orte_errmgr_base_module_log_fn_t log;
orte_errmgr_base_module_abort_fn_t abort;
orte_errmgr_base_module_log_fn_t log;
orte_errmgr_base_module_abort_fn_t abort;
/** Actual process failure notification */
orte_errmgr_base_module_update_state_fn_t update_state;
orte_errmgr_base_module_update_state_fn_t update_state;
/** Predicted process/node failure notification */
orte_errmgr_base_module_predicted_fault_fn_t predicted_fault;
orte_errmgr_base_module_predicted_fault_fn_t predicted_fault;
/** Suggest a node to map a restarting process onto */
orte_errmgr_base_module_suggest_map_targets_fn_t suggest_map_targets;
orte_errmgr_base_module_suggest_map_targets_fn_t suggest_map_targets;
/** Handle any FT Notifications */
orte_errmgr_base_module_ft_event_fn_t ft_event;
orte_errmgr_base_module_ft_event_fn_t ft_event;
/* Register to be warned of impending migration */
orte_errmgr_base_module_register_migration_warning_fn_t register_migration_warning;
};
typedef struct orte_errmgr_base_module_2_3_0_t orte_errmgr_base_module_2_3_0_t;
typedef orte_errmgr_base_module_2_3_0_t orte_errmgr_base_module_t;

Просмотреть файл

@ -72,7 +72,8 @@ static orte_errmgr_base_module_t global_module = {
/* Suggest proc to node mapping */
orte_errmgr_hnp_global_suggest_map_targets,
/* FT Event hook */
orte_errmgr_hnp_global_ft_event
orte_errmgr_hnp_global_ft_event,
orte_errmgr_base_register_migration_warning
};

Просмотреть файл

@ -92,7 +92,8 @@ orte_errmgr_base_module_t orte_errmgr_orted_module = {
update_state,
predicted_fault,
suggest_map_targets,
ft_event
ft_event,
orte_errmgr_base_register_migration_warning
};
/************************