* add more selection criteria to for the pcm selection code
* remove the ns param switch - always use the ns at this point * clean up some of the evil rms code that wasn't multi-pcm safe. still have somme work on this front This commit was SVN r2779.
Этот коммит содержится в:
родитель
efc09dfc94
Коммит
2dc55f12da
@ -49,14 +49,8 @@ mca_pcm_rms_allocate_resources(struct mca_pcm_base_module_1_0_0_t* me,
|
||||
|
||||
/* For now, just punt on whether we can actually fullfill the request or not */
|
||||
total_procs = (nodes == 0) ? procs : nodes * procs;
|
||||
if (mca_pcm_rms_use_ns) {
|
||||
node_alloc->start =
|
||||
node_alloc->start =
|
||||
(int) ompi_name_server.reserve_range(jobid, total_procs);
|
||||
} else {
|
||||
/* BWB - remove the USE_NS code once the failures in PTL / NS
|
||||
due to unexpected offsets are fixed up */
|
||||
node_alloc->start = 0;
|
||||
}
|
||||
node_alloc->nodes = nodes;
|
||||
node_alloc->count = procs;
|
||||
|
||||
|
@ -95,6 +95,5 @@ extern "C" {
|
||||
* Module variables
|
||||
*/
|
||||
extern int mca_pcm_rms_output;
|
||||
extern int mca_pcm_rms_use_ns;
|
||||
|
||||
#endif /* MCA_PCM_RMS_H_ */
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "mca/pcm/base/base.h"
|
||||
#include "mca/llm/base/base.h"
|
||||
#include "util/path.h"
|
||||
#include "runtime/runtime.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
@ -49,17 +50,6 @@ mca_pcm_base_component_1_0_0_t mca_pcm_rms_component = {
|
||||
};
|
||||
|
||||
|
||||
struct mca_pcm_base_module_1_0_0_t mca_pcm_rms_1_0_0 = {
|
||||
mca_pcm_rms_allocate_resources,
|
||||
mca_pcm_rms_can_spawn,
|
||||
mca_pcm_rms_spawn_procs,
|
||||
mca_pcm_rms_kill_proc,
|
||||
mca_pcm_rms_kill_job,
|
||||
mca_pcm_rms_deallocate_resources,
|
||||
mca_pcm_rms_finalize
|
||||
};
|
||||
|
||||
|
||||
/* need to create output stream to dump in file */
|
||||
ompi_output_stream_t mca_pcm_rms_output_stream = {
|
||||
false, /* lds_is_debugging BWB - change me for release */
|
||||
@ -81,30 +71,28 @@ ompi_output_stream_t mca_pcm_rms_output_stream = {
|
||||
*/
|
||||
static int mca_pcm_rms_param_priority;
|
||||
static int mca_pcm_rms_param_debug;
|
||||
static int mca_pcm_rms_param_use_ns;
|
||||
|
||||
/*
|
||||
* Component variables. All of these are shared among the module
|
||||
* instances, so they don't need to go in a special structure or
|
||||
* anything.
|
||||
*/
|
||||
int mca_pcm_rms_output = 0;
|
||||
int mca_pcm_rms_use_ns;
|
||||
int mca_pcm_rms_output = -1;
|
||||
|
||||
|
||||
int
|
||||
mca_pcm_rms_component_open(void)
|
||||
{
|
||||
mca_pcm_rms_param_debug =
|
||||
mca_base_param_register_int("pcm", "rms", "debug", NULL, 100);
|
||||
mca_pcm_rms_param_debug =
|
||||
mca_base_param_register_int("pcm", "rms", "debug", NULL, 100);
|
||||
|
||||
mca_pcm_rms_param_priority =
|
||||
mca_base_param_register_int("pcm", "rms", "priority", NULL, 5);
|
||||
|
||||
mca_pcm_rms_param_use_ns =
|
||||
mca_base_param_register_int("pcm", "rms", "use_ns", NULL, 1);
|
||||
|
||||
mca_pcm_rms_job_list_init();
|
||||
|
||||
mca_pcm_rms_output = ompi_output_open(&mca_pcm_rms_output_stream);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -127,27 +115,49 @@ mca_pcm_rms_init(int *priority,
|
||||
int debug;
|
||||
char *prun;
|
||||
int num_cpus;
|
||||
mca_pcm_base_module_t *me;
|
||||
|
||||
/* debugging gorp */
|
||||
mca_base_param_lookup_int(mca_pcm_rms_param_debug, &debug);
|
||||
mca_pcm_rms_output = ompi_output_open(&mca_pcm_rms_output_stream);
|
||||
ompi_output_set_verbosity(mca_pcm_rms_output, debug);
|
||||
|
||||
/* get our priority - if 0, we don't run */
|
||||
mca_base_param_lookup_int(mca_pcm_rms_param_priority, priority);
|
||||
if (0 == priority) return NULL;
|
||||
|
||||
mca_base_param_lookup_int(mca_pcm_rms_param_use_ns, &mca_pcm_rms_use_ns);
|
||||
/* fill in params */
|
||||
|
||||
*allow_multi_user_threads = true;
|
||||
*have_hidden_threads = false;
|
||||
|
||||
/* check constrains */
|
||||
/* no daemon */
|
||||
if (0 != (constraints & OMPI_RTE_SPAWN_DAEMON)) return NULL;
|
||||
/* no MPI_COMM_SPAWN* */
|
||||
if (0 != (constraints & OMPI_RTE_SPAWN_FROM_MPI)) return NULL;
|
||||
|
||||
/* see if we are an RMS system */
|
||||
/* BWB - is there a better way to do this */
|
||||
num_cpus = rms_numCpus(NULL);
|
||||
if (num_cpus <= 0) return NULL;
|
||||
|
||||
/* poke around for prun */
|
||||
prun = ompi_path_env_findv("prun", X_OK, environ, NULL);
|
||||
if (NULL == prun) return NULL;
|
||||
free(prun);
|
||||
|
||||
return &mca_pcm_rms_1_0_0;
|
||||
/* ok, now let's try to fire up */
|
||||
me = malloc(sizeof(mca_pcm_base_module_t));
|
||||
if (NULL == me) return NULL;
|
||||
|
||||
me->pcm_allocate_resources = mca_pcm_rms_allocate_resources;
|
||||
me->pcm_can_spawn = mca_pcm_rms_can_spawn;
|
||||
me->pcm_spawn_procs = mca_pcm_rms_spawn_procs;
|
||||
me->pcm_kill_proc = mca_pcm_rms_kill_proc;
|
||||
me->pcm_kill_job = mca_pcm_rms_kill_job;
|
||||
me->pcm_deallocate_resources = mca_pcm_rms_deallocate_resources;
|
||||
me->pcm_finalize = mca_pcm_rms_finalize;
|
||||
|
||||
return me;
|
||||
}
|
||||
|
||||
|
||||
|
@ -59,7 +59,6 @@ extern "C" {
|
||||
int fast_boot;
|
||||
int ignore_stderr;
|
||||
char* rsh_agent;
|
||||
int use_ns;
|
||||
};
|
||||
typedef struct mca_pcm_rsh_module_t mca_pcm_rsh_module_t;
|
||||
|
||||
|
@ -71,13 +71,12 @@ static int mca_pcm_rsh_param_ignore_stderr;
|
||||
static int mca_pcm_rsh_param_priority;
|
||||
static int mca_pcm_rsh_param_agent;
|
||||
static int mca_pcm_rsh_param_debug;
|
||||
static int mca_pcm_rsh_param_use_ns;
|
||||
|
||||
/*
|
||||
* component variables
|
||||
*/
|
||||
/* debugging output stream */
|
||||
int mca_pcm_rsh_output = 0;
|
||||
int mca_pcm_rsh_output = -1;
|
||||
|
||||
|
||||
int
|
||||
@ -96,8 +95,6 @@ mca_pcm_rsh_component_open(void)
|
||||
mca_base_param_register_int("pcm", "rsh", "fast", NULL, 1);
|
||||
mca_pcm_rsh_param_ignore_stderr =
|
||||
mca_base_param_register_int("pcm", "rsh", "ignore_stderr", NULL, 0);
|
||||
mca_pcm_rsh_param_use_ns =
|
||||
mca_base_param_register_int("pcm", "rsh", "use_ns", NULL, 1);
|
||||
|
||||
mca_pcm_rsh_param_priority =
|
||||
mca_base_param_register_int("pcm", "rsh", "priority", NULL, 1);
|
||||
@ -148,18 +145,16 @@ mca_pcm_rsh_init(int *priority,
|
||||
&(me->ignore_stderr));
|
||||
mca_base_param_lookup_string(mca_pcm_rsh_param_agent,
|
||||
&(me->rsh_agent));
|
||||
mca_base_param_lookup_int(mca_pcm_rsh_param_use_ns,
|
||||
&(me->use_ns));
|
||||
|
||||
*allow_multi_user_threads = true;
|
||||
*have_hidden_threads = false;
|
||||
|
||||
ret = mca_llm_base_select("pcm", &(me->llm),
|
||||
ret = mca_llm_base_select("rsh", &(me->llm),
|
||||
allow_multi_user_threads,
|
||||
have_hidden_threads);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
/* well, that can't be good. guess we can't run */
|
||||
ompi_output_verbose(5, mca_pcm_rsh_output, "select: no llm found");
|
||||
ompi_output_verbose(5, mca_pcm_rsh_output, "init: no llm found");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -91,11 +91,8 @@ mca_pcm_rsh_spawn_procs(struct mca_pcm_base_module_1_0_0_t* me_super,
|
||||
|
||||
/* BWB - make sure vpids are reserved */
|
||||
local_start_vpid = 0;
|
||||
if (me->use_ns) {
|
||||
global_start_vpid = (int) ompi_name_server.reserve_range(jobid, num_procs);
|
||||
} else {
|
||||
global_start_vpid = 0;
|
||||
}
|
||||
global_start_vpid = (int) ompi_name_server.reserve_range(jobid, num_procs);
|
||||
|
||||
|
||||
for (sched_item = ompi_list_get_first(schedlist) ;
|
||||
sched_item != ompi_list_get_end(schedlist) ;
|
||||
|
@ -21,7 +21,11 @@
|
||||
mpiruntime/mpiruntime.h directly */
|
||||
#include "mpi/runtime/mpiruntime.h"
|
||||
|
||||
/* constants for spawn constraints */
|
||||
#define OMPI_RTE_SPAWN_MULTI_CELL 0x0001
|
||||
#define OMPI_RTE_SPAWN_DAEMON 0x0002
|
||||
#define OMPI_RTE_SPAWN_HIGH_QOS 0x0004
|
||||
#define OMPI_RTE_SPAWN_FROM_MPI 0x0008
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user