1
1

* reorg the RSH pcm to not have any static data structure (keeping

everything in the "this" structure) so that multiple RSH pcm
  modules can be loaded at once

This commit was SVN r2598.
Этот коммит содержится в:
Brian Barrett 2004-09-10 16:41:25 +00:00
родитель 4322c77874
Коммит 75b3d0691a
4 изменённых файлов: 93 добавлений и 85 удалений

Просмотреть файл

@ -51,25 +51,26 @@ extern "C" {
mca_ns_base_jobid_t jobid,
ompi_list_t *nodelist);
struct mca_pcm_rsh_module_t {
mca_pcm_base_module_t super;
mca_llm_base_module_t llm;
int no_profile;
int fast_boot;
int ignore_stderr;
char* rsh_agent;
int use_ns;
};
typedef struct mca_pcm_rsh_module_t mca_pcm_rsh_module_t;
#ifdef __cplusplus
}
#endif
/*
* Module variables
* component variables
*/
/* should we avoid running .profile, even if the shell says we should */
extern int mca_pcm_rsh_no_profile;
/* should we assume same shell on remote as locally? */
extern int mca_pcm_rsh_fast;
/* should we ignore things on stderr? */
extern int mca_pcm_rsh_ignore_stderr;
/* how should we fire procs up on the remote side? */
extern char *mca_pcm_rsh_agent;
/* debugging output stream */
extern int mca_pcm_rsh_output;
extern int mca_pcm_rsh_use_ns;
extern mca_llm_base_module_t mca_pcm_rsh_llm;
#endif /* MCA_PCM_RSH_H_ */

Просмотреть файл

@ -14,18 +14,22 @@
ompi_list_t *
mca_pcm_rsh_allocate_resources(struct mca_pcm_base_module_1_0_0_t* me,
mca_pcm_rsh_allocate_resources(struct mca_pcm_base_module_1_0_0_t* me_super,
mca_ns_base_jobid_t jobid,
int nodes, int procs)
{
return mca_pcm_rsh_llm.llm_allocate_resources(jobid, nodes, procs);
mca_pcm_rsh_module_t *me = (mca_pcm_rsh_module_t*) me_super;
return me->llm.llm_allocate_resources(jobid, nodes, procs);
}
int
mca_pcm_rsh_deallocate_resources(struct mca_pcm_base_module_1_0_0_t* me,
mca_pcm_rsh_deallocate_resources(struct mca_pcm_base_module_1_0_0_t* me_super,
mca_ns_base_jobid_t jobid,
ompi_list_t *nodelist)
{
return mca_pcm_rsh_llm.llm_deallocate_resources(jobid, nodelist);
mca_pcm_rsh_module_t *me = (mca_pcm_rsh_module_t*) me_super;
return me->llm.llm_deallocate_resources(jobid, nodelist);
}

Просмотреть файл

@ -45,18 +45,6 @@ mca_pcm_base_component_1_0_0_t mca_pcm_rsh_component = {
};
struct mca_pcm_base_module_1_0_0_t mca_pcm_rsh_1_0_0 = {
mca_pcm_base_no_unique_name,
mca_pcm_rsh_allocate_resources,
mca_pcm_rsh_can_spawn,
mca_pcm_rsh_spawn_procs,
mca_pcm_rsh_kill_proc,
mca_pcm_rsh_kill_job,
mca_pcm_rsh_deallocate_resources,
mca_pcm_rsh_finalize
};
/* need to create output stream to dump in file */
ompi_output_stream_t mca_pcm_rsh_output_stream = {
false, /* lds_is_debugging BWB - change me for release */
@ -74,7 +62,7 @@ ompi_output_stream_t mca_pcm_rsh_output_stream = {
/*
* Module variables handles
* component variables handles
*/
static int mca_pcm_rsh_param_no_profile;
static int mca_pcm_rsh_param_fast;
@ -85,21 +73,11 @@ static int mca_pcm_rsh_param_debug;
static int mca_pcm_rsh_param_use_ns;
/*
* Module variables
* component variables
*/
/* should we avoid running .profile, even if the shell says we should */
int mca_pcm_rsh_no_profile;
/* should we assume same shell on remote as locally? */
int mca_pcm_rsh_fast;
/* should we ignore things on stderr? */
int mca_pcm_rsh_ignore_stderr;
/* how should we fire procs up on the remote side? */
char *mca_pcm_rsh_agent;
/* debugging output stream */
int mca_pcm_rsh_output = 0;
int mca_pcm_rsh_use_ns;
mca_llm_base_module_t mca_pcm_rsh_llm;
int
mca_pcm_rsh_component_open(void)
@ -118,11 +96,13 @@ mca_pcm_rsh_component_open(void)
mca_pcm_rsh_param_ignore_stderr =
mca_base_param_register_int("pcm", "rsh", "ignore_stderr", NULL, 0);
mca_pcm_rsh_param_use_ns =
mca_base_param_register_int("pcm", "rsh", "use_ns", NULL, 0);
mca_base_param_register_int("pcm", "rsh", "use_ns", NULL, 1);
mca_pcm_rsh_param_priority =
mca_base_param_register_int("pcm", "rsh", "priority", NULL, 1);
mca_pcm_rsh_output = ompi_output_open(&mca_pcm_rsh_output_stream);
return OMPI_SUCCESS;
}
@ -130,7 +110,11 @@ mca_pcm_rsh_component_open(void)
int
mca_pcm_rsh_component_close(void)
{
return OMPI_SUCCESS;
if (mca_pcm_rsh_output > 0) {
ompi_output_close(mca_pcm_rsh_output);
}
return OMPI_SUCCESS;
}
@ -142,30 +126,34 @@ mca_pcm_rsh_init(int *priority,
{
int debug;
int ret;
mca_pcm_rsh_module_t *me;
/* do debugging gorp */
mca_base_param_lookup_int(mca_pcm_rsh_param_debug, &debug);
mca_pcm_rsh_output = ompi_output_open(&mca_pcm_rsh_output_stream);
ompi_output_set_verbosity(mca_pcm_rsh_output, debug);
/* get our priority */
mca_base_param_lookup_int(mca_pcm_rsh_param_priority, priority);
me = malloc(sizeof(mca_pcm_rsh_module_t));
if (NULL == me) return NULL;
/* fill in params */
mca_base_param_lookup_int(mca_pcm_rsh_param_no_profile,
&mca_pcm_rsh_no_profile);
&(me->no_profile));
mca_base_param_lookup_int(mca_pcm_rsh_param_fast,
&mca_pcm_rsh_fast);
&(me->fast_boot));
mca_base_param_lookup_int(mca_pcm_rsh_param_ignore_stderr,
&mca_pcm_rsh_ignore_stderr);
mca_base_param_lookup_int(mca_pcm_rsh_param_ignore_stderr,
&mca_pcm_rsh_ignore_stderr);
&(me->ignore_stderr));
mca_base_param_lookup_string(mca_pcm_rsh_param_agent,
&mca_pcm_rsh_agent);
&(me->rsh_agent));
mca_base_param_lookup_int(mca_pcm_rsh_param_use_ns,
&(me->use_ns));
*allow_multi_user_threads = true;
*have_hidden_threads = false;
mca_base_param_lookup_int(mca_pcm_rsh_param_use_ns,
&mca_pcm_rsh_use_ns);
ret = mca_llm_base_select("pcm", &mca_pcm_rsh_llm,
ret = mca_llm_base_select("pcm", &(me->llm),
allow_multi_user_threads,
have_hidden_threads);
if (OMPI_SUCCESS != ret) {
@ -174,26 +162,41 @@ mca_pcm_rsh_init(int *priority,
return NULL;
}
/*
* fill in the function pointers
*/
me->super.pcm_get_unique_name = mca_pcm_base_no_unique_name;
me->super.pcm_allocate_resources = mca_pcm_rsh_allocate_resources;
me->super.pcm_can_spawn = mca_pcm_rsh_can_spawn;
me->super.pcm_spawn_procs = mca_pcm_rsh_spawn_procs;
me->super.pcm_kill_proc = mca_pcm_rsh_kill_proc;
me->super.pcm_kill_job = mca_pcm_rsh_kill_job;
me->super.pcm_deallocate_resources = mca_pcm_rsh_deallocate_resources;
me->super.pcm_finalize = mca_pcm_rsh_finalize;
/* DO SOME PARAM "FIXING" */
/* BWB - remove param fixing before 1.0 */
if (0 == mca_pcm_rsh_no_profile) {
if (0 == me->no_profile) {
printf("WARNING: reseting mca_pcm_rsh_no_profile to 1\n");
mca_pcm_rsh_no_profile = 1;
me->no_profile = 1;
}
if (0 == mca_pcm_rsh_fast) {
if (0 == me->fast_boot) {
printf("WARNING: reseting mca_pcm_rsh_fast to 1\n");
mca_pcm_rsh_fast = 1;
me->fast_boot = 1;
}
return &mca_pcm_rsh_1_0_0;
return (mca_pcm_base_module_t*) me;
}
int
mca_pcm_rsh_finalize(struct mca_pcm_base_module_1_0_0_t* me)
mca_pcm_rsh_finalize(struct mca_pcm_base_module_1_0_0_t* me_super)
{
if (mca_pcm_rsh_output > 0) {
ompi_output_close(mca_pcm_rsh_output);
mca_pcm_rsh_module_t *me = (mca_pcm_rsh_module_t*) me_super;
if (me != NULL) {
if (NULL != me->rsh_agent) free(me->rsh_agent);
free(me);
}
return OMPI_SUCCESS;

Просмотреть файл

@ -30,21 +30,18 @@
#include "util/numtostr.h"
#include "mca/ns/base/base.h"
#if 1
#define BOOTAGENT "mca_pcm_rsh_bootproxy"
#else
#define BOOTAGENT "cat"
#endif
#define PRS_BUFSIZE 1024
static int internal_spawn_proc(mca_ns_base_jobid_t jobid, ompi_rte_node_schedule_t *sched,
static int internal_spawn_proc(mca_pcm_rsh_module_t *me,
mca_ns_base_jobid_t jobid, ompi_rte_node_schedule_t *sched,
ompi_list_t *hostlist,
int my_start_vpid, int global_start_vpid,
int num_procs);
bool
mca_pcm_rsh_can_spawn(struct mca_pcm_base_module_1_0_0_t* me)
mca_pcm_rsh_can_spawn(struct mca_pcm_base_module_1_0_0_t* me_super)
{
/* we can always try to rsh some more... Might not always work as
* the caller hopes
@ -54,9 +51,10 @@ mca_pcm_rsh_can_spawn(struct mca_pcm_base_module_1_0_0_t* me)
int
mca_pcm_rsh_spawn_procs(struct mca_pcm_base_module_1_0_0_t* me,
mca_pcm_rsh_spawn_procs(struct mca_pcm_base_module_1_0_0_t* me_super,
mca_ns_base_jobid_t jobid, ompi_list_t *schedlist)
{
mca_pcm_rsh_module_t *me = (mca_pcm_rsh_module_t*) me_super;
ompi_list_item_t *sched_item, *node_item, *host_item;
ompi_rte_node_schedule_t *sched;
ompi_rte_node_allocation_t *node;
@ -93,7 +91,7 @@ mca_pcm_rsh_spawn_procs(struct mca_pcm_base_module_1_0_0_t* me,
/* BWB - make sure vpids are reserved */
local_start_vpid = 0;
if (mca_pcm_rsh_use_ns) {
if (me->use_ns) {
global_start_vpid = (int) ompi_name_server.reserve_range(jobid, num_procs);
} else {
global_start_vpid = 0;
@ -141,7 +139,7 @@ mca_pcm_rsh_spawn_procs(struct mca_pcm_base_module_1_0_0_t* me,
/* do the launch to the first node in the list, passing
him the rest of the list */
ret = internal_spawn_proc(jobid, sched, &launch,
ret = internal_spawn_proc(me, jobid, sched, &launch,
local_start_vpid, global_start_vpid,
num_procs);
if (OMPI_SUCCESS != ret) {
@ -173,7 +171,8 @@ mca_pcm_rsh_spawn_procs(struct mca_pcm_base_module_1_0_0_t* me,
static int
internal_need_profile(mca_llm_base_hostfile_node_t *start_node,
internal_need_profile(mca_pcm_rsh_module_t *me,
mca_llm_base_hostfile_node_t *start_node,
int stderr_is_error, bool *needs_profile)
{
struct passwd *p;
@ -190,16 +189,16 @@ internal_need_profile(mca_llm_base_hostfile_node_t *start_node,
*
* The following logic is used:
*
* if mca_pcm_rsh_no_profile is 1, don't do profile
* if mca_pcm_rsh_fast is 1, remote shell is assumed same as local
* if me->no_profile is 1, don't do profile
* if me->fast_boot is 1, remote shell is assumed same as local
* if shell is sh/ksh, run profile, otherwise don't
*/
if (1 == mca_pcm_rsh_no_profile) {
if (1 == me->no_profile) {
*needs_profile = false;
return OMPI_SUCCESS;
}
if (1 == mca_pcm_rsh_fast) {
if (1 == me->fast_boot) {
p = getpwuid(getuid());
if (NULL == p) return OMPI_ERROR;
@ -214,7 +213,7 @@ internal_need_profile(mca_llm_base_hostfile_node_t *start_node,
/* we have to look at the other side and get our shell */
username = mca_pcm_base_get_username(start_node);
cmdv = ompi_argv_split(mca_pcm_rsh_agent, ' ');
cmdv = ompi_argv_split(me->rsh_agent, ' ');
cmdc = ompi_argv_count(cmdv);
ompi_argv_append(&cmdc, &cmdv, start_node->hostname);
@ -279,7 +278,8 @@ cleanup:
static int
internal_spawn_proc(mca_ns_base_jobid_t jobid, ompi_rte_node_schedule_t *sched,
internal_spawn_proc(mca_pcm_rsh_module_t *me,
mca_ns_base_jobid_t jobid, ompi_rte_node_schedule_t *sched,
ompi_list_t *hostlist, int my_start_vpid,
int global_start_vpid, int num_procs)
{
@ -290,7 +290,7 @@ internal_spawn_proc(mca_ns_base_jobid_t jobid, ompi_rte_node_schedule_t *sched,
char *cmd0 = NULL;
int cmdc = 0;
char *printable = NULL;
int stderr_is_error = mca_pcm_rsh_ignore_stderr == 0 ? 1 : 0;
int stderr_is_error = me->ignore_stderr == 0 ? 1 : 0;
char *username = NULL;
int ret;
pid_t pid;
@ -306,7 +306,7 @@ internal_spawn_proc(mca_ns_base_jobid_t jobid, ompi_rte_node_schedule_t *sched,
/*
* Check to see if we need to do the .profile thing
*/
ret = internal_need_profile(start_node, stderr_is_error,
ret = internal_need_profile(me, start_node, stderr_is_error,
&needs_profile);
if (OMPI_SUCCESS != ret) {
goto cleanup;
@ -318,7 +318,7 @@ internal_spawn_proc(mca_ns_base_jobid_t jobid, ompi_rte_node_schedule_t *sched,
*/
/* build up the rsh command part */
cmdv = ompi_argv_split(mca_pcm_rsh_agent, ' ');
cmdv = ompi_argv_split(me->rsh_agent, ' ');
cmdc = ompi_argv_count(cmdv);
ompi_argv_append(&cmdc, &cmdv, start_node->hostname);
@ -335,7 +335,7 @@ internal_spawn_proc(mca_ns_base_jobid_t jobid, ompi_rte_node_schedule_t *sched,
/* build the command to start */
ompi_argv_append(&cmdc, &cmdv, BOOTAGENT);
#if 1
/* starting vpid for launchee's procs */
tmp = ltostr(my_start_vpid);
ompi_argv_append(&cmdc, &cmdv, "--local_start_vpid");
@ -353,7 +353,7 @@ internal_spawn_proc(mca_ns_base_jobid_t jobid, ompi_rte_node_schedule_t *sched,
ompi_argv_append(&cmdc, &cmdv, "--num_procs");
ompi_argv_append(&cmdc, &cmdv, tmp);
free(tmp);
#endif
/* add the end of the .profile thing if required */
if (needs_profile) {
ompi_argv_append(&cmdc, &cmdv, ")");