1
1

* add more selection criteria to for the pcm selection code

* remove the ns param switch - always use the ns at this point
* clean up some of the evil rms code that wasn't multi-pcm safe.  still
  have somme work on this front

This commit was SVN r2779.
Этот коммит содержится в:
Brian Barrett 2004-09-20 18:25:00 +00:00
родитель efc09dfc94
Коммит 2dc55f12da
7 изменённых файлов: 43 добавлений и 45 удалений

Просмотреть файл

@ -49,14 +49,8 @@ mca_pcm_rms_allocate_resources(struct mca_pcm_base_module_1_0_0_t* me,
/* For now, just punt on whether we can actually fullfill the request or not */
total_procs = (nodes == 0) ? procs : nodes * procs;
if (mca_pcm_rms_use_ns) {
node_alloc->start =
node_alloc->start =
(int) ompi_name_server.reserve_range(jobid, total_procs);
} else {
/* BWB - remove the USE_NS code once the failures in PTL / NS
due to unexpected offsets are fixed up */
node_alloc->start = 0;
}
node_alloc->nodes = nodes;
node_alloc->count = procs;

Просмотреть файл

@ -95,6 +95,5 @@ extern "C" {
* Module variables
*/
extern int mca_pcm_rms_output;
extern int mca_pcm_rms_use_ns;
#endif /* MCA_PCM_RMS_H_ */

Просмотреть файл

@ -18,6 +18,7 @@
#include "mca/pcm/base/base.h"
#include "mca/llm/base/base.h"
#include "util/path.h"
#include "runtime/runtime.h"
#include <stdio.h>
#include <stdlib.h>
@ -49,17 +50,6 @@ mca_pcm_base_component_1_0_0_t mca_pcm_rms_component = {
};
struct mca_pcm_base_module_1_0_0_t mca_pcm_rms_1_0_0 = {
mca_pcm_rms_allocate_resources,
mca_pcm_rms_can_spawn,
mca_pcm_rms_spawn_procs,
mca_pcm_rms_kill_proc,
mca_pcm_rms_kill_job,
mca_pcm_rms_deallocate_resources,
mca_pcm_rms_finalize
};
/* need to create output stream to dump in file */
ompi_output_stream_t mca_pcm_rms_output_stream = {
false, /* lds_is_debugging BWB - change me for release */
@ -81,30 +71,28 @@ ompi_output_stream_t mca_pcm_rms_output_stream = {
*/
static int mca_pcm_rms_param_priority;
static int mca_pcm_rms_param_debug;
static int mca_pcm_rms_param_use_ns;
/*
* Component variables. All of these are shared among the module
* instances, so they don't need to go in a special structure or
* anything.
*/
int mca_pcm_rms_output = 0;
int mca_pcm_rms_use_ns;
int mca_pcm_rms_output = -1;
int
mca_pcm_rms_component_open(void)
{
mca_pcm_rms_param_debug =
mca_base_param_register_int("pcm", "rms", "debug", NULL, 100);
mca_pcm_rms_param_debug =
mca_base_param_register_int("pcm", "rms", "debug", NULL, 100);
mca_pcm_rms_param_priority =
mca_base_param_register_int("pcm", "rms", "priority", NULL, 5);
mca_pcm_rms_param_use_ns =
mca_base_param_register_int("pcm", "rms", "use_ns", NULL, 1);
mca_pcm_rms_job_list_init();
mca_pcm_rms_output = ompi_output_open(&mca_pcm_rms_output_stream);
return OMPI_SUCCESS;
}
@ -127,27 +115,49 @@ mca_pcm_rms_init(int *priority,
int debug;
char *prun;
int num_cpus;
mca_pcm_base_module_t *me;
/* debugging gorp */
mca_base_param_lookup_int(mca_pcm_rms_param_debug, &debug);
mca_pcm_rms_output = ompi_output_open(&mca_pcm_rms_output_stream);
ompi_output_set_verbosity(mca_pcm_rms_output, debug);
/* get our priority - if 0, we don't run */
mca_base_param_lookup_int(mca_pcm_rms_param_priority, priority);
if (0 == priority) return NULL;
mca_base_param_lookup_int(mca_pcm_rms_param_use_ns, &mca_pcm_rms_use_ns);
/* fill in params */
*allow_multi_user_threads = true;
*have_hidden_threads = false;
/* check constrains */
/* no daemon */
if (0 != (constraints & OMPI_RTE_SPAWN_DAEMON)) return NULL;
/* no MPI_COMM_SPAWN* */
if (0 != (constraints & OMPI_RTE_SPAWN_FROM_MPI)) return NULL;
/* see if we are an RMS system */
/* BWB - is there a better way to do this */
num_cpus = rms_numCpus(NULL);
if (num_cpus <= 0) return NULL;
/* poke around for prun */
prun = ompi_path_env_findv("prun", X_OK, environ, NULL);
if (NULL == prun) return NULL;
free(prun);
return &mca_pcm_rms_1_0_0;
/* ok, now let's try to fire up */
me = malloc(sizeof(mca_pcm_base_module_t));
if (NULL == me) return NULL;
me->pcm_allocate_resources = mca_pcm_rms_allocate_resources;
me->pcm_can_spawn = mca_pcm_rms_can_spawn;
me->pcm_spawn_procs = mca_pcm_rms_spawn_procs;
me->pcm_kill_proc = mca_pcm_rms_kill_proc;
me->pcm_kill_job = mca_pcm_rms_kill_job;
me->pcm_deallocate_resources = mca_pcm_rms_deallocate_resources;
me->pcm_finalize = mca_pcm_rms_finalize;
return me;
}

Просмотреть файл

@ -59,7 +59,6 @@ extern "C" {
int fast_boot;
int ignore_stderr;
char* rsh_agent;
int use_ns;
};
typedef struct mca_pcm_rsh_module_t mca_pcm_rsh_module_t;

Просмотреть файл

@ -71,13 +71,12 @@ static int mca_pcm_rsh_param_ignore_stderr;
static int mca_pcm_rsh_param_priority;
static int mca_pcm_rsh_param_agent;
static int mca_pcm_rsh_param_debug;
static int mca_pcm_rsh_param_use_ns;
/*
* component variables
*/
/* debugging output stream */
int mca_pcm_rsh_output = 0;
int mca_pcm_rsh_output = -1;
int
@ -96,8 +95,6 @@ mca_pcm_rsh_component_open(void)
mca_base_param_register_int("pcm", "rsh", "fast", NULL, 1);
mca_pcm_rsh_param_ignore_stderr =
mca_base_param_register_int("pcm", "rsh", "ignore_stderr", NULL, 0);
mca_pcm_rsh_param_use_ns =
mca_base_param_register_int("pcm", "rsh", "use_ns", NULL, 1);
mca_pcm_rsh_param_priority =
mca_base_param_register_int("pcm", "rsh", "priority", NULL, 1);
@ -148,18 +145,16 @@ mca_pcm_rsh_init(int *priority,
&(me->ignore_stderr));
mca_base_param_lookup_string(mca_pcm_rsh_param_agent,
&(me->rsh_agent));
mca_base_param_lookup_int(mca_pcm_rsh_param_use_ns,
&(me->use_ns));
*allow_multi_user_threads = true;
*have_hidden_threads = false;
ret = mca_llm_base_select("pcm", &(me->llm),
ret = mca_llm_base_select("rsh", &(me->llm),
allow_multi_user_threads,
have_hidden_threads);
if (OMPI_SUCCESS != ret) {
/* well, that can't be good. guess we can't run */
ompi_output_verbose(5, mca_pcm_rsh_output, "select: no llm found");
ompi_output_verbose(5, mca_pcm_rsh_output, "init: no llm found");
return NULL;
}

Просмотреть файл

@ -91,11 +91,8 @@ mca_pcm_rsh_spawn_procs(struct mca_pcm_base_module_1_0_0_t* me_super,
/* BWB - make sure vpids are reserved */
local_start_vpid = 0;
if (me->use_ns) {
global_start_vpid = (int) ompi_name_server.reserve_range(jobid, num_procs);
} else {
global_start_vpid = 0;
}
global_start_vpid = (int) ompi_name_server.reserve_range(jobid, num_procs);
for (sched_item = ompi_list_get_first(schedlist) ;
sched_item != ompi_list_get_end(schedlist) ;

Просмотреть файл

@ -21,7 +21,11 @@
mpiruntime/mpiruntime.h directly */
#include "mpi/runtime/mpiruntime.h"
/* constants for spawn constraints */
#define OMPI_RTE_SPAWN_MULTI_CELL 0x0001
#define OMPI_RTE_SPAWN_DAEMON 0x0002
#define OMPI_RTE_SPAWN_HIGH_QOS 0x0004
#define OMPI_RTE_SPAWN_FROM_MPI 0x0008
#ifdef __cplusplus
extern "C" {