From b7aeaae3a34d299359eb358b4672a72fec56d075 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Fri, 20 Aug 2004 18:48:57 +0000 Subject: [PATCH] * Make sure that the env pcmclient does something rational even if the vpid_start variable isn't set * "working" mpirun with rsh pcm. Still no job monitoring or cleanup (as those aren't really available for rsh - cleanup will mostly work in the long term) * minor fixes for pcm_base_comm functions to properly deal with empty environments This commit was SVN r2250. --- src/mca/llm/hostfile/etc/llm_hostfile | 2 +- src/mca/pcm/base/pcm_base_comm.c | 14 ++-- src/mca/pcm/rsh/src/pcm_rsh_spawn.c | 73 +++++++++++++--- .../pcmclient/env/pcmclient_env_component.c | 2 +- src/tools/bootproxy/bootproxy.c | 84 +++++++++++++++++-- src/tools/mpirun/mpirun2.cc | 23 +++-- 6 files changed, 165 insertions(+), 33 deletions(-) diff --git a/src/mca/llm/hostfile/etc/llm_hostfile b/src/mca/llm/hostfile/etc/llm_hostfile index 2fbb50c4a8..f9dbf59197 100644 --- a/src/mca/llm/hostfile/etc/llm_hostfile +++ b/src/mca/llm/hostfile/etc/llm_hostfile @@ -1 +1 @@ -localhost +localhost count=200 diff --git a/src/mca/pcm/base/pcm_base_comm.c b/src/mca/pcm/base/pcm_base_comm.c index 63df4351be..49134fd352 100644 --- a/src/mca/pcm/base/pcm_base_comm.c +++ b/src/mca/pcm/base/pcm_base_comm.c @@ -40,11 +40,15 @@ mca_pcm_base_send_schedule(FILE *fp, } /* ENV - since we don't have a envc, must create ourselves...*/ - for (envc = 0 ; (sched->env)[envc] != NULL ; ++envc) ; - fprintf(fp, "%d\n", envc); - for (i = 0 ; i < envc ; ++i) { - fprintf(fp, "%d %s\n", (int) strlen((sched->env)[i]), - (sched->env)[i]); + if (sched->env == NULL) { + fprintf(fp, "%d\n", 0); + } else { + for (envc = 0 ; (sched->env)[envc] != NULL ; ++envc) ; + fprintf(fp, "%d\n", envc); + for (i = 0 ; i < envc ; ++i) { + fprintf(fp, "%d %s\n", (int) strlen((sched->env)[i]), + (sched->env)[i]); + } } /* CWD */ diff --git a/src/mca/pcm/rsh/src/pcm_rsh_spawn.c b/src/mca/pcm/rsh/src/pcm_rsh_spawn.c index 7a22b7d0b4..1aecd3c161 100644 --- a/src/mca/pcm/rsh/src/pcm_rsh_spawn.c +++ b/src/mca/pcm/rsh/src/pcm_rsh_spawn.c @@ -25,13 +25,19 @@ #include "runtime/runtime_types.h" #include "util/output.h" #include "util/argv.h" +#include "util/numtostr.h" - +#if 1 #define BOOTAGENT "mca_pcm_rsh_bootproxy" +#else +#define BOOTAGENT "cat" +#endif #define PRS_BUFSIZE 1024 static int internal_spawn_proc(int jobid, ompi_rte_node_schedule_t *sched, - ompi_list_t *nodelist); + ompi_list_t *nodelist, + int my_start_vpid, int global_start_vpid, + int num_procs); bool @@ -49,14 +55,36 @@ mca_pcm_rsh_spawn_procs(int jobid, ompi_list_t *schedlist) { ompi_list_item_t *sched_item, *node_item; ompi_rte_node_schedule_t *sched; + ompi_rte_node_allocation_t *node; ompi_list_t launch; ompi_list_t done; int ret, i; int width = 1; + int local_start_vpid = 0; + int global_start_vpid = 0; + int num_procs = 0; OBJ_CONSTRUCT(&launch, ompi_list_t); OBJ_CONSTRUCT(&done, ompi_list_t); + + + for (sched_item = ompi_list_get_first(schedlist) ; + sched_item != ompi_list_get_end(schedlist) ; + sched_item = ompi_list_get_next(sched_item)) { + sched = (ompi_rte_node_schedule_t*) sched_item; + + for (node_item = ompi_list_get_first(sched->nodelist) ; + node_item != ompi_list_get_end(sched->nodelist) ; + node_item = ompi_list_get_next(node_item)) { + node = (ompi_rte_node_allocation_t*) node_item; + num_procs += node->count; + } + } + + /* BWB - make sure vpids are reserved */ + local_start_vpid = global_start_vpid; + for (sched_item = ompi_list_get_first(schedlist) ; sched_item != ompi_list_get_end(schedlist) ; sched_item = ompi_list_get_next(sched_item)) { @@ -74,7 +102,7 @@ mca_pcm_rsh_spawn_procs(int jobid, ompi_list_t *schedlist) /* find enough entries for this slice to go */ for (i = 0 ; i < width && node_item != ompi_list_get_end(sched->nodelist) ; - node_item = ompi_list_get_next(node_item)) { } + node_item = ompi_list_get_next(node_item), ++i) { } /* if we don't have anyone, get us out of here.. */ if (i == 0) { continue; @@ -88,7 +116,9 @@ mca_pcm_rsh_spawn_procs(int jobid, ompi_list_t *schedlist) /* do the launch to the first node in the list, passing him the rest of the list */ - ret = internal_spawn_proc(jobid, sched, &launch); + ret = internal_spawn_proc(jobid, sched, &launch, + local_start_vpid, global_start_vpid, + num_procs); if (OMPI_SUCCESS != ret) { /* well, crap! put ourselves back together, I guess. Should call killjob */ @@ -98,6 +128,8 @@ mca_pcm_rsh_spawn_procs(int jobid, ompi_list_t *schedlist) &done); return ret; } + local_start_vpid += + ((ompi_rte_node_allocation_t*) ompi_list_get_first(&launch))->count; /* copy the list over to the done part */ ompi_list_join(&done, ompi_list_get_end(&done), &launch); @@ -219,7 +251,8 @@ cleanup: static int internal_spawn_proc(int jobid, ompi_rte_node_schedule_t *sched, - ompi_list_t *nodelist) + ompi_list_t *nodelist, int my_start_vpid, + int global_start_vpid, int num_procs) { int kidstdin[2]; /* child stdin pipe */ bool needs_profile = false; @@ -235,6 +268,7 @@ internal_spawn_proc(int jobid, ompi_rte_node_schedule_t *sched, FILE *fp; int status; /* exit status */ int i; + char *tmp; start_node = (ompi_rte_node_allocation_t*) ompi_list_get_first(nodelist); @@ -270,8 +304,24 @@ internal_spawn_proc(int jobid, ompi_rte_node_schedule_t *sched, /* build the command to start */ ompi_argv_append(&cmdc, &cmdv, BOOTAGENT); - /* BWB - turn on debugging for now */ - ompi_argv_append(&cmdc, &cmdv, "-v"); + + /* starting vpid for launchee's procs */ + tmp = ltostr(my_start_vpid); + ompi_argv_append(&cmdc, &cmdv, "--local_start_vpid"); + ompi_argv_append(&cmdc, &cmdv, tmp); + free(tmp); + + /* global starting vpid for this pcm spawn */ + tmp = ltostr(global_start_vpid); + ompi_argv_append(&cmdc, &cmdv, "--global_start_vpid"); + ompi_argv_append(&cmdc, &cmdv, tmp); + free(tmp); + + /* number of procs in this pcm spawn */ + tmp = ltostr(num_procs); + ompi_argv_append(&cmdc, &cmdv, "--num_procs"); + ompi_argv_append(&cmdc, &cmdv, tmp); + free(tmp); /* add the end of the .profile thing if required */ if (needs_profile) { @@ -293,7 +343,7 @@ internal_spawn_proc(int jobid, ompi_rte_node_schedule_t *sched, } else if (pid == 0) { /* child */ - if ((dup2(kidstdin[1], 0) < 0)) { + if ((dup2(kidstdin[0], 0) < 0)) { perror(cmdv[0]); exit(errno); } @@ -314,14 +364,17 @@ internal_spawn_proc(int jobid, ompi_rte_node_schedule_t *sched, } else { /* parent */ - if (close(kidstdin[1])) { +#if 0 + if (close(kidstdin[0])) { kill(pid, SIGTERM); ret = OMPI_ERROR; goto proc_cleanup; } +#endif /* send our stuff down the wire */ - fp = fdopen(kidstdin[0], "w"); + fp = fdopen(kidstdin[1], "a"); + if (fp == NULL) { perror("fdopen"); abort(); } ret = mca_pcm_base_send_schedule(fp, jobid, sched, nodelist); fclose(fp); if (OMPI_SUCCESS != ret) { diff --git a/src/mca/pcmclient/env/pcmclient_env_component.c b/src/mca/pcmclient/env/pcmclient_env_component.c index 5b6450667c..86739fe812 100644 --- a/src/mca/pcmclient/env/pcmclient_env_component.c +++ b/src/mca/pcmclient/env/pcmclient_env_component.c @@ -70,7 +70,7 @@ mca_pcmclient_env_open(void) param_procid = mca_base_param_register_int("pcmclient", "env", "procid", NULL, -1); param_vpid_start = mca_base_param_register_int("pcmclient", "env", - "vpid_start", NULL, -1); + "vpid_start", NULL, 0); param_num_procs = mca_base_param_register_int("pcmclient", "env", "num_procs", NULL, -1); diff --git a/src/tools/bootproxy/bootproxy.c b/src/tools/bootproxy/bootproxy.c index defe01cd42..593c487f75 100644 --- a/src/tools/bootproxy/bootproxy.c +++ b/src/tools/bootproxy/bootproxy.c @@ -12,18 +12,62 @@ #include #include +static void +show_usage(char *myname) +{ + printf("usage: %s --local_start_vpid [vpid] --global_start_vpid [vpid]\n" + " --num_procs [num]\n\n", myname); +} + int main(int argc, char *argv[]) { ompi_rte_node_schedule_t *sched; - ompi_rte_node_allocation_t *nodelist; + ompi_rte_node_allocation_t *node; pid_t pid; int i; int ret; int jobid; + ompi_cmd_line_t *cmd_line = NULL; + int local_vpid_start, global_vpid_start; + int cellid = 0; + int num_procs; + char *env_buf; ompi_init(argc, argv); + cmd_line = ompi_cmd_line_create(); + ompi_cmd_line_make_opt(cmd_line, '\0', "local_start_vpid", 1, + "starting vpid to use when launching"); + ompi_cmd_line_make_opt(cmd_line, '\0', "global_start_vpid", 1, + "starting vpid to use when launching"); + ompi_cmd_line_make_opt(cmd_line, '\0', "num_procs", 1, + "number of procs in job"); + + if (OMPI_SUCCESS != ompi_cmd_line_parse(cmd_line, false, argc, argv)) { + show_usage(argv[0]); + exit(1); + } + + if (!ompi_cmd_line_is_taken(cmd_line, "local_start_vpid")) { + show_usage(argv[0]); + exit(1); + } + local_vpid_start = + atoi(ompi_cmd_line_get_param(cmd_line, "local_start_vpid", 0, 0)); + + if (!ompi_cmd_line_is_taken(cmd_line, "global_start_vpid")) { + show_usage(argv[0]); + exit(1); + } + global_vpid_start = + atoi(ompi_cmd_line_get_param(cmd_line, "global_start_vpid", 0, 0)); + + if (!ompi_cmd_line_is_taken(cmd_line, "num_procs")) { + show_usage(argv[0]); + exit(1); + } + num_procs = atoi(ompi_cmd_line_get_param(cmd_line, "num_procs", 0, 0)); sched = OBJ_NEW(ompi_rte_node_schedule_t); @@ -39,11 +83,24 @@ main(int argc, char *argv[]) if (ompi_list_get_size(sched->nodelist) > 1) { fprintf(stderr, "Received more than one node - ignoring extra info\n"); } + if (ompi_list_get_size(sched->nodelist) < 1) { + fprintf(stderr, "Received less than one node\n"); + } /* fill our environment */ for (i = 0 ; sched->env[i] != NULL ; ++i) { putenv(sched->env[i]); } + /* constant pcmclient info */ + asprintf(&env_buf, "OMPI_MCA_pcmclient_env_cellid=%d", cellid); + putenv(env_buf); + asprintf(&env_buf, "OMPI_MCA_pcmclient_env_jobid=%d", jobid); + putenv(env_buf); + asprintf(&env_buf, "OMPI_MCA_pcmclient_env_num_procs=%d", num_procs); + putenv(env_buf); + asprintf(&env_buf, "OMPI_MCA_pcmclient_env_vpid_start=%d", + global_vpid_start); + putenv(env_buf); /* get in the right place */ if (sched->cwd != NULL) { @@ -54,15 +111,24 @@ main(int argc, char *argv[]) } } + node = (ompi_rte_node_allocation_t*) ompi_list_get_first(sched->nodelist); /* let's go! - if we are the parent, don't stick around... */ - pid = fork(); - if (pid < 0) { - /* error :( */ - perror("fork"); - } else if (pid == 0) { - /* child */ - execvp(sched->argv[0], sched->argv); - perror("exec"); + for (i = 0 ; i < node->count ; ++i) { + pid = fork(); + if (pid < 0) { + /* error :( */ + perror("fork"); + } else if (pid == 0) { + /* do the putenv here so that we don't look like we have a + giant memory leak */ + asprintf(&env_buf, "OMPI_MCA_pcmclient_env_procid=%d", + local_vpid_start + i); + putenv(env_buf); + + /* child */ + execvp(sched->argv[0], sched->argv); + perror("exec"); + } } OBJ_RELEASE(sched); diff --git a/src/tools/mpirun/mpirun2.cc b/src/tools/mpirun/mpirun2.cc index 927ee5bffe..6a42d551f3 100644 --- a/src/tools/mpirun/mpirun2.cc +++ b/src/tools/mpirun/mpirun2.cc @@ -6,6 +6,7 @@ #include "ompi_config.h" #include "mca/ns/ns.h" +#include "mca/pcm/base/base.h" #include "runtime/runtime.h" #include "mca/base/base.h" #include "util/cmd_line.h" @@ -15,6 +16,8 @@ #include #include +extern char** environ; + static long num_running_procs; static int @@ -36,9 +39,8 @@ main(int argc, char *argv[]) ompi_list_t *nodelist = NULL; ompi_list_t schedlist; mca_ns_base_jobid_t new_jobid; - int num_procs; + int num_procs = 1; ompi_rte_node_schedule_t *sched; - ompi_list_item_t *nodeitem; char cwd[MAXPATHLEN]; /* @@ -111,8 +113,8 @@ main(int argc, char *argv[]) new_jobid = getpid(); /* BWB - fix jobid, procs, and nodes */ - nodelist = ompi_rte_allocate_resources(0, 0, 2); - if (NULL != nodelist) { + nodelist = ompi_rte_allocate_resources(new_jobid, 0, num_procs); + if (NULL == nodelist) { /* BWB show_help */ printf("show_help: ompi_rte_allocate_resources failed\n"); return -1; @@ -123,11 +125,18 @@ main(int argc, char *argv[]) */ OBJ_CONSTRUCT(&schedlist, ompi_list_t); sched = OBJ_NEW(ompi_rte_node_schedule_t); - OBJ_CONSTRUCT(&(sched->nodelist), ompi_list_t); + ompi_list_append(&schedlist, (ompi_list_item_t*) sched); ompi_cmd_line_get_tail(cmd_line, &(sched->argc), &(sched->argv)); - sched->env = NULL; + mca_pcm_base_build_base_env(environ, &(sched->env)); getcwd(cwd, MAXPATHLEN); sched->cwd = strdup(cwd); + sched->nodelist = nodelist; + + if (sched->argc == 0) { + printf("no app to start\n"); + return 1; + } + /* * register the monitor @@ -157,7 +166,7 @@ main(int argc, char *argv[]) mca_base_close(); ompi_finalize(); - OBJ_DESTRUCT(&sched); + OBJ_DESTRUCT(&schedlist); return 0; }