From 2dc55f12da8653e795b4da7892d7c9bad793af95 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Mon, 20 Sep 2004 18:25:00 +0000 Subject: [PATCH] * add more selection criteria to for the pcm selection code * remove the ns param switch - always use the ns at this point * clean up some of the evil rms code that wasn't multi-pcm safe. still have somme work on this front This commit was SVN r2779. --- src/mca/pcm/rms/src/pcm_rms.c | 8 +--- src/mca/pcm/rms/src/pcm_rms.h | 1 - src/mca/pcm/rms/src/pcm_rms_component.c | 56 +++++++++++++++---------- src/mca/pcm/rsh/pcm_rsh.h | 1 - src/mca/pcm/rsh/pcm_rsh_component.c | 11 ++--- src/mca/pcm/rsh/pcm_rsh_spawn.c | 7 +--- src/runtime/runtime.h | 4 ++ 7 files changed, 43 insertions(+), 45 deletions(-) diff --git a/src/mca/pcm/rms/src/pcm_rms.c b/src/mca/pcm/rms/src/pcm_rms.c index 65410b9d76..6a7c6c3a22 100644 --- a/src/mca/pcm/rms/src/pcm_rms.c +++ b/src/mca/pcm/rms/src/pcm_rms.c @@ -49,14 +49,8 @@ mca_pcm_rms_allocate_resources(struct mca_pcm_base_module_1_0_0_t* me, /* For now, just punt on whether we can actually fullfill the request or not */ total_procs = (nodes == 0) ? procs : nodes * procs; - if (mca_pcm_rms_use_ns) { - node_alloc->start = + node_alloc->start = (int) ompi_name_server.reserve_range(jobid, total_procs); - } else { - /* BWB - remove the USE_NS code once the failures in PTL / NS - due to unexpected offsets are fixed up */ - node_alloc->start = 0; - } node_alloc->nodes = nodes; node_alloc->count = procs; diff --git a/src/mca/pcm/rms/src/pcm_rms.h b/src/mca/pcm/rms/src/pcm_rms.h index 1aa9e25316..a24453d501 100644 --- a/src/mca/pcm/rms/src/pcm_rms.h +++ b/src/mca/pcm/rms/src/pcm_rms.h @@ -95,6 +95,5 @@ extern "C" { * Module variables */ extern int mca_pcm_rms_output; -extern int mca_pcm_rms_use_ns; #endif /* MCA_PCM_RMS_H_ */ diff --git a/src/mca/pcm/rms/src/pcm_rms_component.c b/src/mca/pcm/rms/src/pcm_rms_component.c index f8ee05c234..8e50f75116 100644 --- a/src/mca/pcm/rms/src/pcm_rms_component.c +++ b/src/mca/pcm/rms/src/pcm_rms_component.c @@ -18,6 +18,7 @@ #include "mca/pcm/base/base.h" #include "mca/llm/base/base.h" #include "util/path.h" +#include "runtime/runtime.h" #include #include @@ -49,17 +50,6 @@ mca_pcm_base_component_1_0_0_t mca_pcm_rms_component = { }; -struct mca_pcm_base_module_1_0_0_t mca_pcm_rms_1_0_0 = { - mca_pcm_rms_allocate_resources, - mca_pcm_rms_can_spawn, - mca_pcm_rms_spawn_procs, - mca_pcm_rms_kill_proc, - mca_pcm_rms_kill_job, - mca_pcm_rms_deallocate_resources, - mca_pcm_rms_finalize -}; - - /* need to create output stream to dump in file */ ompi_output_stream_t mca_pcm_rms_output_stream = { false, /* lds_is_debugging BWB - change me for release */ @@ -81,30 +71,28 @@ ompi_output_stream_t mca_pcm_rms_output_stream = { */ static int mca_pcm_rms_param_priority; static int mca_pcm_rms_param_debug; -static int mca_pcm_rms_param_use_ns; /* * Component variables. All of these are shared among the module * instances, so they don't need to go in a special structure or * anything. */ -int mca_pcm_rms_output = 0; -int mca_pcm_rms_use_ns; +int mca_pcm_rms_output = -1; + int mca_pcm_rms_component_open(void) { - mca_pcm_rms_param_debug = - mca_base_param_register_int("pcm", "rms", "debug", NULL, 100); + mca_pcm_rms_param_debug = + mca_base_param_register_int("pcm", "rms", "debug", NULL, 100); mca_pcm_rms_param_priority = mca_base_param_register_int("pcm", "rms", "priority", NULL, 5); - mca_pcm_rms_param_use_ns = - mca_base_param_register_int("pcm", "rms", "use_ns", NULL, 1); - mca_pcm_rms_job_list_init(); + mca_pcm_rms_output = ompi_output_open(&mca_pcm_rms_output_stream); + return OMPI_SUCCESS; } @@ -127,27 +115,49 @@ mca_pcm_rms_init(int *priority, int debug; char *prun; int num_cpus; + mca_pcm_base_module_t *me; + /* debugging gorp */ mca_base_param_lookup_int(mca_pcm_rms_param_debug, &debug); - mca_pcm_rms_output = ompi_output_open(&mca_pcm_rms_output_stream); ompi_output_set_verbosity(mca_pcm_rms_output, debug); + /* get our priority - if 0, we don't run */ mca_base_param_lookup_int(mca_pcm_rms_param_priority, priority); + if (0 == priority) return NULL; - mca_base_param_lookup_int(mca_pcm_rms_param_use_ns, &mca_pcm_rms_use_ns); + /* fill in params */ *allow_multi_user_threads = true; *have_hidden_threads = false; + /* check constrains */ + /* no daemon */ + if (0 != (constraints & OMPI_RTE_SPAWN_DAEMON)) return NULL; + /* no MPI_COMM_SPAWN* */ + if (0 != (constraints & OMPI_RTE_SPAWN_FROM_MPI)) return NULL; + + /* see if we are an RMS system */ + /* BWB - is there a better way to do this */ num_cpus = rms_numCpus(NULL); if (num_cpus <= 0) return NULL; - /* poke around for prun */ prun = ompi_path_env_findv("prun", X_OK, environ, NULL); if (NULL == prun) return NULL; free(prun); - return &mca_pcm_rms_1_0_0; + /* ok, now let's try to fire up */ + me = malloc(sizeof(mca_pcm_base_module_t)); + if (NULL == me) return NULL; + + me->pcm_allocate_resources = mca_pcm_rms_allocate_resources; + me->pcm_can_spawn = mca_pcm_rms_can_spawn; + me->pcm_spawn_procs = mca_pcm_rms_spawn_procs; + me->pcm_kill_proc = mca_pcm_rms_kill_proc; + me->pcm_kill_job = mca_pcm_rms_kill_job; + me->pcm_deallocate_resources = mca_pcm_rms_deallocate_resources; + me->pcm_finalize = mca_pcm_rms_finalize; + + return me; } diff --git a/src/mca/pcm/rsh/pcm_rsh.h b/src/mca/pcm/rsh/pcm_rsh.h index 6813882410..869b1acc37 100644 --- a/src/mca/pcm/rsh/pcm_rsh.h +++ b/src/mca/pcm/rsh/pcm_rsh.h @@ -59,7 +59,6 @@ extern "C" { int fast_boot; int ignore_stderr; char* rsh_agent; - int use_ns; }; typedef struct mca_pcm_rsh_module_t mca_pcm_rsh_module_t; diff --git a/src/mca/pcm/rsh/pcm_rsh_component.c b/src/mca/pcm/rsh/pcm_rsh_component.c index 7dbfb00e66..0f38836fa6 100644 --- a/src/mca/pcm/rsh/pcm_rsh_component.c +++ b/src/mca/pcm/rsh/pcm_rsh_component.c @@ -71,13 +71,12 @@ static int mca_pcm_rsh_param_ignore_stderr; static int mca_pcm_rsh_param_priority; static int mca_pcm_rsh_param_agent; static int mca_pcm_rsh_param_debug; -static int mca_pcm_rsh_param_use_ns; /* * component variables */ /* debugging output stream */ -int mca_pcm_rsh_output = 0; +int mca_pcm_rsh_output = -1; int @@ -96,8 +95,6 @@ mca_pcm_rsh_component_open(void) mca_base_param_register_int("pcm", "rsh", "fast", NULL, 1); mca_pcm_rsh_param_ignore_stderr = mca_base_param_register_int("pcm", "rsh", "ignore_stderr", NULL, 0); - mca_pcm_rsh_param_use_ns = - mca_base_param_register_int("pcm", "rsh", "use_ns", NULL, 1); mca_pcm_rsh_param_priority = mca_base_param_register_int("pcm", "rsh", "priority", NULL, 1); @@ -148,18 +145,16 @@ mca_pcm_rsh_init(int *priority, &(me->ignore_stderr)); mca_base_param_lookup_string(mca_pcm_rsh_param_agent, &(me->rsh_agent)); - mca_base_param_lookup_int(mca_pcm_rsh_param_use_ns, - &(me->use_ns)); *allow_multi_user_threads = true; *have_hidden_threads = false; - ret = mca_llm_base_select("pcm", &(me->llm), + ret = mca_llm_base_select("rsh", &(me->llm), allow_multi_user_threads, have_hidden_threads); if (OMPI_SUCCESS != ret) { /* well, that can't be good. guess we can't run */ - ompi_output_verbose(5, mca_pcm_rsh_output, "select: no llm found"); + ompi_output_verbose(5, mca_pcm_rsh_output, "init: no llm found"); return NULL; } diff --git a/src/mca/pcm/rsh/pcm_rsh_spawn.c b/src/mca/pcm/rsh/pcm_rsh_spawn.c index 59f711e847..f756e200f6 100644 --- a/src/mca/pcm/rsh/pcm_rsh_spawn.c +++ b/src/mca/pcm/rsh/pcm_rsh_spawn.c @@ -91,11 +91,8 @@ mca_pcm_rsh_spawn_procs(struct mca_pcm_base_module_1_0_0_t* me_super, /* BWB - make sure vpids are reserved */ local_start_vpid = 0; - if (me->use_ns) { - global_start_vpid = (int) ompi_name_server.reserve_range(jobid, num_procs); - } else { - global_start_vpid = 0; - } + global_start_vpid = (int) ompi_name_server.reserve_range(jobid, num_procs); + for (sched_item = ompi_list_get_first(schedlist) ; sched_item != ompi_list_get_end(schedlist) ; diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h index 2c50307441..45e7fc3ff0 100644 --- a/src/runtime/runtime.h +++ b/src/runtime/runtime.h @@ -21,7 +21,11 @@ mpiruntime/mpiruntime.h directly */ #include "mpi/runtime/mpiruntime.h" +/* constants for spawn constraints */ #define OMPI_RTE_SPAWN_MULTI_CELL 0x0001 +#define OMPI_RTE_SPAWN_DAEMON 0x0002 +#define OMPI_RTE_SPAWN_HIGH_QOS 0x0004 +#define OMPI_RTE_SPAWN_FROM_MPI 0x0008 #ifdef __cplusplus extern "C" {