From e0483a5c26354051cab6dd16991063180573e6a5 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Wed, 15 Dec 2004 19:22:30 +0000 Subject: [PATCH] * Registry no longer barfs when it gets pounded at startup, so remove sleep hack. * Fix some places where processes would not get cleaned up if an error occured during startup This commit was SVN r3824. --- src/tools/bootproxy/bootproxy.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/tools/bootproxy/bootproxy.c b/src/tools/bootproxy/bootproxy.c index 008ea39ae7..0dfbe8e32d 100644 --- a/src/tools/bootproxy/bootproxy.c +++ b/src/tools/bootproxy/bootproxy.c @@ -233,19 +233,13 @@ main(int argc, char *argv[]) /* launch processes, and do the right cleanup things */ num_running_procs = 0; for (i = 0 ; i < opts.num_fork_procs ; ++i) { - /* BWB - XXX - fix me. This sleep is here because the - registry in mpirun can't keep up if you launch a large number - of processes all at once. And with this loop, it really is - basically all at once. 15 seems to be a good balance for a - hack. I was able to launch 30 procs on a 2.0Ghz G5 this way, - so that should cover us for now */ - if (i % 15 == 0) sleep(1); pid = fork(); if (pid < 0) { /* error :( */ + orig_errno = errno; ompi_show_help("help-bootproxy.txt", "could-not-fork", true, sched->argv[0], strerror(errno)); - exit(errno); + break; } else if (pid == 0) { /* child */ @@ -281,11 +275,27 @@ main(int argc, char *argv[]) started_pids[i] = pid; num_running_procs++; } + if (pid == waitpid(pid, &status, WNOHANG)) { + /* well, that aborted quickly. we should to */ + break; + } } } OBJ_RELEASE(sched); + /* if we didn't launch everyone locally, cleanup */ + if (num_running_procs != opts.num_fork_procs) { + for (i = 0 ; i < num_running_procs ; ++i) { + kill(started_pids[i], SIGTERM); + ret = waitpid(started_pids[i], &status, WNOHANG); + if (ret != started_pids[i]) { + sleep(1); + kill(started_pids[i], SIGKILL); + } + } + } + status = 0; /* if we want qos, hang around until the first process exits. We