* Registry no longer barfs when it gets pounded at startup, so remove
sleep hack. * Fix some places where processes would not get cleaned up if an error occured during startup This commit was SVN r3824.
Этот коммит содержится в:
родитель
adf9c7cb81
Коммит
e0483a5c26
@ -233,19 +233,13 @@ main(int argc, char *argv[])
|
|||||||
/* launch processes, and do the right cleanup things */
|
/* launch processes, and do the right cleanup things */
|
||||||
num_running_procs = 0;
|
num_running_procs = 0;
|
||||||
for (i = 0 ; i < opts.num_fork_procs ; ++i) {
|
for (i = 0 ; i < opts.num_fork_procs ; ++i) {
|
||||||
/* BWB - XXX - fix me. This sleep is here because the
|
|
||||||
registry in mpirun can't keep up if you launch a large number
|
|
||||||
of processes all at once. And with this loop, it really is
|
|
||||||
basically all at once. 15 seems to be a good balance for a
|
|
||||||
hack. I was able to launch 30 procs on a 2.0Ghz G5 this way,
|
|
||||||
so that should cover us for now */
|
|
||||||
if (i % 15 == 0) sleep(1);
|
|
||||||
pid = fork();
|
pid = fork();
|
||||||
if (pid < 0) {
|
if (pid < 0) {
|
||||||
/* error :( */
|
/* error :( */
|
||||||
|
orig_errno = errno;
|
||||||
ompi_show_help("help-bootproxy.txt", "could-not-fork",
|
ompi_show_help("help-bootproxy.txt", "could-not-fork",
|
||||||
true, sched->argv[0], strerror(errno));
|
true, sched->argv[0], strerror(errno));
|
||||||
exit(errno);
|
break;
|
||||||
} else if (pid == 0) {
|
} else if (pid == 0) {
|
||||||
/* child */
|
/* child */
|
||||||
|
|
||||||
@ -281,11 +275,27 @@ main(int argc, char *argv[])
|
|||||||
started_pids[i] = pid;
|
started_pids[i] = pid;
|
||||||
num_running_procs++;
|
num_running_procs++;
|
||||||
}
|
}
|
||||||
|
if (pid == waitpid(pid, &status, WNOHANG)) {
|
||||||
|
/* well, that aborted quickly. we should to */
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
OBJ_RELEASE(sched);
|
OBJ_RELEASE(sched);
|
||||||
|
|
||||||
|
/* if we didn't launch everyone locally, cleanup */
|
||||||
|
if (num_running_procs != opts.num_fork_procs) {
|
||||||
|
for (i = 0 ; i < num_running_procs ; ++i) {
|
||||||
|
kill(started_pids[i], SIGTERM);
|
||||||
|
ret = waitpid(started_pids[i], &status, WNOHANG);
|
||||||
|
if (ret != started_pids[i]) {
|
||||||
|
sleep(1);
|
||||||
|
kill(started_pids[i], SIGKILL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
status = 0;
|
status = 0;
|
||||||
|
|
||||||
/* if we want qos, hang around until the first process exits. We
|
/* if we want qos, hang around until the first process exits. We
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user