* Make sure all fields are filled in when setting process status
* add debuging code for the callback. Since gdb doesn't really like doing things like waitpid for processes, spin when we are in the handler in a way that gdb can easily attach and debug This commit was SVN r3785.
Этот коммит содержится в:
родитель
fa98bd54c7
Коммит
d6e0552080
@ -75,6 +75,7 @@ extern "C" {
|
||||
char* rsh_agent;
|
||||
int constraints;
|
||||
unsigned int delay_time;
|
||||
bool debug_callback;
|
||||
};
|
||||
typedef struct mca_pcm_rsh_module_t mca_pcm_rsh_module_t;
|
||||
|
||||
|
@ -65,7 +65,7 @@ static int mca_pcm_rsh_param_ignore_stderr;
|
||||
static int mca_pcm_rsh_param_priority;
|
||||
static int mca_pcm_rsh_param_agent;
|
||||
static int mca_pcm_rsh_param_delay_time;
|
||||
|
||||
static int mca_pcm_rsh_param_debug_callback;
|
||||
|
||||
int
|
||||
mca_pcm_rsh_component_open(void)
|
||||
@ -87,6 +87,9 @@ mca_pcm_rsh_component_open(void)
|
||||
mca_pcm_rsh_param_delay_time =
|
||||
mca_base_param_register_int("pcm", "rsh", "delay", NULL, 0);
|
||||
|
||||
mca_pcm_rsh_param_debug_callback =
|
||||
mca_base_param_register_int("pcm", "rsh", "debug_callback", NULL, 0);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -123,6 +126,13 @@ mca_pcm_rsh_init(int *priority,
|
||||
&(me->rsh_agent));
|
||||
mca_base_param_lookup_int(mca_pcm_rsh_param_delay_time,
|
||||
(int*)&(me->delay_time));
|
||||
mca_base_param_lookup_int(mca_pcm_rsh_param_debug_callback,
|
||||
&ret);
|
||||
if (ret != 0) {
|
||||
me->debug_callback = true;
|
||||
} else {
|
||||
me->debug_callback = false;
|
||||
}
|
||||
|
||||
ret = mca_llm_base_select("rsh", &(me->llm), have_threads);
|
||||
|
||||
|
@ -507,11 +507,19 @@ internal_wait_cb(pid_t pid, int status, void *data)
|
||||
int ret;
|
||||
ompi_process_name_t *proc_name;
|
||||
mca_pcm_rsh_module_t *me = (mca_pcm_rsh_module_t*) data;
|
||||
ompi_rte_process_status_t proc_status;
|
||||
ompi_rte_process_status_t *proc_status;
|
||||
volatile int spin = 0;
|
||||
|
||||
ompi_output_verbose(10, mca_pcm_base_output,
|
||||
"process %d exited with status %d", pid, status);
|
||||
|
||||
if (me->debug_callback) {
|
||||
printf("internal_wait_cb in pid %d spinning for attach.\n", getpid());
|
||||
printf("use \"set variable spin = 0\" to stop spinning\n");
|
||||
spin = 1;
|
||||
while (spin != 0) ;
|
||||
}
|
||||
|
||||
ret = mca_pcm_base_job_list_get_job_info(me->jobs, pid, &jobid,
|
||||
&lower, &upper, true);
|
||||
if (ret != OMPI_SUCCESS) {
|
||||
@ -521,11 +529,13 @@ internal_wait_cb(pid_t pid, int status, void *data)
|
||||
}
|
||||
|
||||
/* unregister all the procs */
|
||||
proc_status.status_key = OMPI_PROC_KILLED;
|
||||
proc_status.exit_code = (ompi_exit_code_t)status;
|
||||
for (i = lower ; i <= upper ; ++i) {
|
||||
proc_name = mca_ns_base_create_process_name(0, jobid, i);
|
||||
ompi_rte_set_process_status(&proc_status, proc_name);
|
||||
proc_name = mca_ns_base_create_process_name(0, jobid, i);
|
||||
proc_status = ompi_rte_get_process_status(proc_name);
|
||||
proc_status->status_key = OMPI_PROC_KILLED;
|
||||
proc_status->exit_code = (ompi_exit_code_t)status;
|
||||
printf("setting process status\n");
|
||||
ompi_rte_set_process_status(proc_status, proc_name);
|
||||
free(proc_name);
|
||||
}
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user