1
1

* add more selection criteria to for the pcm selection code

* remove the ns param switch - always use the ns at this point
* clean up some of the evil rms code that wasn't multi-pcm safe.  still
  have somme work on this front

This commit was SVN r2779.
Этот коммит содержится в:
Brian Barrett 2004-09-20 18:25:00 +00:00
родитель efc09dfc94
Коммит 2dc55f12da
7 изменённых файлов: 43 добавлений и 45 удалений

Просмотреть файл

@ -49,14 +49,8 @@ mca_pcm_rms_allocate_resources(struct mca_pcm_base_module_1_0_0_t* me,
/* For now, just punt on whether we can actually fullfill the request or not */ /* For now, just punt on whether we can actually fullfill the request or not */
total_procs = (nodes == 0) ? procs : nodes * procs; total_procs = (nodes == 0) ? procs : nodes * procs;
if (mca_pcm_rms_use_ns) { node_alloc->start =
node_alloc->start =
(int) ompi_name_server.reserve_range(jobid, total_procs); (int) ompi_name_server.reserve_range(jobid, total_procs);
} else {
/* BWB - remove the USE_NS code once the failures in PTL / NS
due to unexpected offsets are fixed up */
node_alloc->start = 0;
}
node_alloc->nodes = nodes; node_alloc->nodes = nodes;
node_alloc->count = procs; node_alloc->count = procs;

Просмотреть файл

@ -95,6 +95,5 @@ extern "C" {
* Module variables * Module variables
*/ */
extern int mca_pcm_rms_output; extern int mca_pcm_rms_output;
extern int mca_pcm_rms_use_ns;
#endif /* MCA_PCM_RMS_H_ */ #endif /* MCA_PCM_RMS_H_ */

Просмотреть файл

@ -18,6 +18,7 @@
#include "mca/pcm/base/base.h" #include "mca/pcm/base/base.h"
#include "mca/llm/base/base.h" #include "mca/llm/base/base.h"
#include "util/path.h" #include "util/path.h"
#include "runtime/runtime.h"
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@ -49,17 +50,6 @@ mca_pcm_base_component_1_0_0_t mca_pcm_rms_component = {
}; };
struct mca_pcm_base_module_1_0_0_t mca_pcm_rms_1_0_0 = {
mca_pcm_rms_allocate_resources,
mca_pcm_rms_can_spawn,
mca_pcm_rms_spawn_procs,
mca_pcm_rms_kill_proc,
mca_pcm_rms_kill_job,
mca_pcm_rms_deallocate_resources,
mca_pcm_rms_finalize
};
/* need to create output stream to dump in file */ /* need to create output stream to dump in file */
ompi_output_stream_t mca_pcm_rms_output_stream = { ompi_output_stream_t mca_pcm_rms_output_stream = {
false, /* lds_is_debugging BWB - change me for release */ false, /* lds_is_debugging BWB - change me for release */
@ -81,30 +71,28 @@ ompi_output_stream_t mca_pcm_rms_output_stream = {
*/ */
static int mca_pcm_rms_param_priority; static int mca_pcm_rms_param_priority;
static int mca_pcm_rms_param_debug; static int mca_pcm_rms_param_debug;
static int mca_pcm_rms_param_use_ns;
/* /*
* Component variables. All of these are shared among the module * Component variables. All of these are shared among the module
* instances, so they don't need to go in a special structure or * instances, so they don't need to go in a special structure or
* anything. * anything.
*/ */
int mca_pcm_rms_output = 0; int mca_pcm_rms_output = -1;
int mca_pcm_rms_use_ns;
int int
mca_pcm_rms_component_open(void) mca_pcm_rms_component_open(void)
{ {
mca_pcm_rms_param_debug = mca_pcm_rms_param_debug =
mca_base_param_register_int("pcm", "rms", "debug", NULL, 100); mca_base_param_register_int("pcm", "rms", "debug", NULL, 100);
mca_pcm_rms_param_priority = mca_pcm_rms_param_priority =
mca_base_param_register_int("pcm", "rms", "priority", NULL, 5); mca_base_param_register_int("pcm", "rms", "priority", NULL, 5);
mca_pcm_rms_param_use_ns =
mca_base_param_register_int("pcm", "rms", "use_ns", NULL, 1);
mca_pcm_rms_job_list_init(); mca_pcm_rms_job_list_init();
mca_pcm_rms_output = ompi_output_open(&mca_pcm_rms_output_stream);
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
@ -127,27 +115,49 @@ mca_pcm_rms_init(int *priority,
int debug; int debug;
char *prun; char *prun;
int num_cpus; int num_cpus;
mca_pcm_base_module_t *me;
/* debugging gorp */
mca_base_param_lookup_int(mca_pcm_rms_param_debug, &debug); mca_base_param_lookup_int(mca_pcm_rms_param_debug, &debug);
mca_pcm_rms_output = ompi_output_open(&mca_pcm_rms_output_stream);
ompi_output_set_verbosity(mca_pcm_rms_output, debug); ompi_output_set_verbosity(mca_pcm_rms_output, debug);
/* get our priority - if 0, we don't run */
mca_base_param_lookup_int(mca_pcm_rms_param_priority, priority); mca_base_param_lookup_int(mca_pcm_rms_param_priority, priority);
if (0 == priority) return NULL;
mca_base_param_lookup_int(mca_pcm_rms_param_use_ns, &mca_pcm_rms_use_ns); /* fill in params */
*allow_multi_user_threads = true; *allow_multi_user_threads = true;
*have_hidden_threads = false; *have_hidden_threads = false;
/* check constrains */
/* no daemon */
if (0 != (constraints & OMPI_RTE_SPAWN_DAEMON)) return NULL;
/* no MPI_COMM_SPAWN* */
if (0 != (constraints & OMPI_RTE_SPAWN_FROM_MPI)) return NULL;
/* see if we are an RMS system */
/* BWB - is there a better way to do this */
num_cpus = rms_numCpus(NULL); num_cpus = rms_numCpus(NULL);
if (num_cpus <= 0) return NULL; if (num_cpus <= 0) return NULL;
/* poke around for prun */
prun = ompi_path_env_findv("prun", X_OK, environ, NULL); prun = ompi_path_env_findv("prun", X_OK, environ, NULL);
if (NULL == prun) return NULL; if (NULL == prun) return NULL;
free(prun); free(prun);
return &mca_pcm_rms_1_0_0; /* ok, now let's try to fire up */
me = malloc(sizeof(mca_pcm_base_module_t));
if (NULL == me) return NULL;
me->pcm_allocate_resources = mca_pcm_rms_allocate_resources;
me->pcm_can_spawn = mca_pcm_rms_can_spawn;
me->pcm_spawn_procs = mca_pcm_rms_spawn_procs;
me->pcm_kill_proc = mca_pcm_rms_kill_proc;
me->pcm_kill_job = mca_pcm_rms_kill_job;
me->pcm_deallocate_resources = mca_pcm_rms_deallocate_resources;
me->pcm_finalize = mca_pcm_rms_finalize;
return me;
} }

Просмотреть файл

@ -59,7 +59,6 @@ extern "C" {
int fast_boot; int fast_boot;
int ignore_stderr; int ignore_stderr;
char* rsh_agent; char* rsh_agent;
int use_ns;
}; };
typedef struct mca_pcm_rsh_module_t mca_pcm_rsh_module_t; typedef struct mca_pcm_rsh_module_t mca_pcm_rsh_module_t;

Просмотреть файл

@ -71,13 +71,12 @@ static int mca_pcm_rsh_param_ignore_stderr;
static int mca_pcm_rsh_param_priority; static int mca_pcm_rsh_param_priority;
static int mca_pcm_rsh_param_agent; static int mca_pcm_rsh_param_agent;
static int mca_pcm_rsh_param_debug; static int mca_pcm_rsh_param_debug;
static int mca_pcm_rsh_param_use_ns;
/* /*
* component variables * component variables
*/ */
/* debugging output stream */ /* debugging output stream */
int mca_pcm_rsh_output = 0; int mca_pcm_rsh_output = -1;
int int
@ -96,8 +95,6 @@ mca_pcm_rsh_component_open(void)
mca_base_param_register_int("pcm", "rsh", "fast", NULL, 1); mca_base_param_register_int("pcm", "rsh", "fast", NULL, 1);
mca_pcm_rsh_param_ignore_stderr = mca_pcm_rsh_param_ignore_stderr =
mca_base_param_register_int("pcm", "rsh", "ignore_stderr", NULL, 0); mca_base_param_register_int("pcm", "rsh", "ignore_stderr", NULL, 0);
mca_pcm_rsh_param_use_ns =
mca_base_param_register_int("pcm", "rsh", "use_ns", NULL, 1);
mca_pcm_rsh_param_priority = mca_pcm_rsh_param_priority =
mca_base_param_register_int("pcm", "rsh", "priority", NULL, 1); mca_base_param_register_int("pcm", "rsh", "priority", NULL, 1);
@ -148,18 +145,16 @@ mca_pcm_rsh_init(int *priority,
&(me->ignore_stderr)); &(me->ignore_stderr));
mca_base_param_lookup_string(mca_pcm_rsh_param_agent, mca_base_param_lookup_string(mca_pcm_rsh_param_agent,
&(me->rsh_agent)); &(me->rsh_agent));
mca_base_param_lookup_int(mca_pcm_rsh_param_use_ns,
&(me->use_ns));
*allow_multi_user_threads = true; *allow_multi_user_threads = true;
*have_hidden_threads = false; *have_hidden_threads = false;
ret = mca_llm_base_select("pcm", &(me->llm), ret = mca_llm_base_select("rsh", &(me->llm),
allow_multi_user_threads, allow_multi_user_threads,
have_hidden_threads); have_hidden_threads);
if (OMPI_SUCCESS != ret) { if (OMPI_SUCCESS != ret) {
/* well, that can't be good. guess we can't run */ /* well, that can't be good. guess we can't run */
ompi_output_verbose(5, mca_pcm_rsh_output, "select: no llm found"); ompi_output_verbose(5, mca_pcm_rsh_output, "init: no llm found");
return NULL; return NULL;
} }

Просмотреть файл

@ -91,11 +91,8 @@ mca_pcm_rsh_spawn_procs(struct mca_pcm_base_module_1_0_0_t* me_super,
/* BWB - make sure vpids are reserved */ /* BWB - make sure vpids are reserved */
local_start_vpid = 0; local_start_vpid = 0;
if (me->use_ns) { global_start_vpid = (int) ompi_name_server.reserve_range(jobid, num_procs);
global_start_vpid = (int) ompi_name_server.reserve_range(jobid, num_procs);
} else {
global_start_vpid = 0;
}
for (sched_item = ompi_list_get_first(schedlist) ; for (sched_item = ompi_list_get_first(schedlist) ;
sched_item != ompi_list_get_end(schedlist) ; sched_item != ompi_list_get_end(schedlist) ;

Просмотреть файл

@ -21,7 +21,11 @@
mpiruntime/mpiruntime.h directly */ mpiruntime/mpiruntime.h directly */
#include "mpi/runtime/mpiruntime.h" #include "mpi/runtime/mpiruntime.h"
/* constants for spawn constraints */
#define OMPI_RTE_SPAWN_MULTI_CELL 0x0001 #define OMPI_RTE_SPAWN_MULTI_CELL 0x0001
#define OMPI_RTE_SPAWN_DAEMON 0x0002
#define OMPI_RTE_SPAWN_HIGH_QOS 0x0004
#define OMPI_RTE_SPAWN_FROM_MPI 0x0008
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {