From 75b3d0691a2d5c17e3b7748739ebe56bb5eadf86 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Fri, 10 Sep 2004 16:41:25 +0000 Subject: [PATCH] * reorg the RSH pcm to not have any static data structure (keeping everything in the "this" structure) so that multiple RSH pcm modules can be loaded at once This commit was SVN r2598. --- src/mca/pcm/rsh/src/pcm_rsh.h | 27 +++---- src/mca/pcm/rsh/src/pcm_rsh_allocate.c | 12 ++- src/mca/pcm/rsh/src/pcm_rsh_component.c | 97 +++++++++++++------------ src/mca/pcm/rsh/src/pcm_rsh_spawn.c | 42 +++++------ 4 files changed, 93 insertions(+), 85 deletions(-) diff --git a/src/mca/pcm/rsh/src/pcm_rsh.h b/src/mca/pcm/rsh/src/pcm_rsh.h index 333bec815d..6813882410 100644 --- a/src/mca/pcm/rsh/src/pcm_rsh.h +++ b/src/mca/pcm/rsh/src/pcm_rsh.h @@ -51,25 +51,26 @@ extern "C" { mca_ns_base_jobid_t jobid, ompi_list_t *nodelist); + struct mca_pcm_rsh_module_t { + mca_pcm_base_module_t super; + mca_llm_base_module_t llm; + + int no_profile; + int fast_boot; + int ignore_stderr; + char* rsh_agent; + int use_ns; + }; + typedef struct mca_pcm_rsh_module_t mca_pcm_rsh_module_t; + #ifdef __cplusplus } #endif /* - * Module variables + * component variables */ -/* should we avoid running .profile, even if the shell says we should */ -extern int mca_pcm_rsh_no_profile; -/* should we assume same shell on remote as locally? */ -extern int mca_pcm_rsh_fast; -/* should we ignore things on stderr? */ -extern int mca_pcm_rsh_ignore_stderr; -/* how should we fire procs up on the remote side? */ -extern char *mca_pcm_rsh_agent; - +/* debugging output stream */ extern int mca_pcm_rsh_output; -extern int mca_pcm_rsh_use_ns; -extern mca_llm_base_module_t mca_pcm_rsh_llm; - #endif /* MCA_PCM_RSH_H_ */ diff --git a/src/mca/pcm/rsh/src/pcm_rsh_allocate.c b/src/mca/pcm/rsh/src/pcm_rsh_allocate.c index 43ffa3166c..8ecf1ed9e0 100644 --- a/src/mca/pcm/rsh/src/pcm_rsh_allocate.c +++ b/src/mca/pcm/rsh/src/pcm_rsh_allocate.c @@ -14,18 +14,22 @@ ompi_list_t * -mca_pcm_rsh_allocate_resources(struct mca_pcm_base_module_1_0_0_t* me, +mca_pcm_rsh_allocate_resources(struct mca_pcm_base_module_1_0_0_t* me_super, mca_ns_base_jobid_t jobid, int nodes, int procs) { - return mca_pcm_rsh_llm.llm_allocate_resources(jobid, nodes, procs); + mca_pcm_rsh_module_t *me = (mca_pcm_rsh_module_t*) me_super; + + return me->llm.llm_allocate_resources(jobid, nodes, procs); } int -mca_pcm_rsh_deallocate_resources(struct mca_pcm_base_module_1_0_0_t* me, +mca_pcm_rsh_deallocate_resources(struct mca_pcm_base_module_1_0_0_t* me_super, mca_ns_base_jobid_t jobid, ompi_list_t *nodelist) { - return mca_pcm_rsh_llm.llm_deallocate_resources(jobid, nodelist); + mca_pcm_rsh_module_t *me = (mca_pcm_rsh_module_t*) me_super; + + return me->llm.llm_deallocate_resources(jobid, nodelist); } diff --git a/src/mca/pcm/rsh/src/pcm_rsh_component.c b/src/mca/pcm/rsh/src/pcm_rsh_component.c index 21bfa68273..02736c9eb0 100644 --- a/src/mca/pcm/rsh/src/pcm_rsh_component.c +++ b/src/mca/pcm/rsh/src/pcm_rsh_component.c @@ -45,18 +45,6 @@ mca_pcm_base_component_1_0_0_t mca_pcm_rsh_component = { }; -struct mca_pcm_base_module_1_0_0_t mca_pcm_rsh_1_0_0 = { - mca_pcm_base_no_unique_name, - mca_pcm_rsh_allocate_resources, - mca_pcm_rsh_can_spawn, - mca_pcm_rsh_spawn_procs, - mca_pcm_rsh_kill_proc, - mca_pcm_rsh_kill_job, - mca_pcm_rsh_deallocate_resources, - mca_pcm_rsh_finalize -}; - - /* need to create output stream to dump in file */ ompi_output_stream_t mca_pcm_rsh_output_stream = { false, /* lds_is_debugging BWB - change me for release */ @@ -74,7 +62,7 @@ ompi_output_stream_t mca_pcm_rsh_output_stream = { /* - * Module variables handles + * component variables handles */ static int mca_pcm_rsh_param_no_profile; static int mca_pcm_rsh_param_fast; @@ -85,21 +73,11 @@ static int mca_pcm_rsh_param_debug; static int mca_pcm_rsh_param_use_ns; /* - * Module variables + * component variables */ -/* should we avoid running .profile, even if the shell says we should */ -int mca_pcm_rsh_no_profile; -/* should we assume same shell on remote as locally? */ -int mca_pcm_rsh_fast; -/* should we ignore things on stderr? */ -int mca_pcm_rsh_ignore_stderr; -/* how should we fire procs up on the remote side? */ -char *mca_pcm_rsh_agent; - +/* debugging output stream */ int mca_pcm_rsh_output = 0; -int mca_pcm_rsh_use_ns; -mca_llm_base_module_t mca_pcm_rsh_llm; int mca_pcm_rsh_component_open(void) @@ -118,11 +96,13 @@ mca_pcm_rsh_component_open(void) mca_pcm_rsh_param_ignore_stderr = mca_base_param_register_int("pcm", "rsh", "ignore_stderr", NULL, 0); mca_pcm_rsh_param_use_ns = - mca_base_param_register_int("pcm", "rsh", "use_ns", NULL, 0); + mca_base_param_register_int("pcm", "rsh", "use_ns", NULL, 1); mca_pcm_rsh_param_priority = mca_base_param_register_int("pcm", "rsh", "priority", NULL, 1); + mca_pcm_rsh_output = ompi_output_open(&mca_pcm_rsh_output_stream); + return OMPI_SUCCESS; } @@ -130,7 +110,11 @@ mca_pcm_rsh_component_open(void) int mca_pcm_rsh_component_close(void) { - return OMPI_SUCCESS; + if (mca_pcm_rsh_output > 0) { + ompi_output_close(mca_pcm_rsh_output); + } + + return OMPI_SUCCESS; } @@ -142,30 +126,34 @@ mca_pcm_rsh_init(int *priority, { int debug; int ret; + mca_pcm_rsh_module_t *me; + /* do debugging gorp */ mca_base_param_lookup_int(mca_pcm_rsh_param_debug, &debug); - mca_pcm_rsh_output = ompi_output_open(&mca_pcm_rsh_output_stream); ompi_output_set_verbosity(mca_pcm_rsh_output, debug); + /* get our priority */ mca_base_param_lookup_int(mca_pcm_rsh_param_priority, priority); - + + me = malloc(sizeof(mca_pcm_rsh_module_t)); + if (NULL == me) return NULL; + + /* fill in params */ mca_base_param_lookup_int(mca_pcm_rsh_param_no_profile, - &mca_pcm_rsh_no_profile); + &(me->no_profile)); mca_base_param_lookup_int(mca_pcm_rsh_param_fast, - &mca_pcm_rsh_fast); + &(me->fast_boot)); mca_base_param_lookup_int(mca_pcm_rsh_param_ignore_stderr, - &mca_pcm_rsh_ignore_stderr); - mca_base_param_lookup_int(mca_pcm_rsh_param_ignore_stderr, - &mca_pcm_rsh_ignore_stderr); + &(me->ignore_stderr)); mca_base_param_lookup_string(mca_pcm_rsh_param_agent, - &mca_pcm_rsh_agent); + &(me->rsh_agent)); + mca_base_param_lookup_int(mca_pcm_rsh_param_use_ns, + &(me->use_ns)); + *allow_multi_user_threads = true; *have_hidden_threads = false; - mca_base_param_lookup_int(mca_pcm_rsh_param_use_ns, - &mca_pcm_rsh_use_ns); - - ret = mca_llm_base_select("pcm", &mca_pcm_rsh_llm, + ret = mca_llm_base_select("pcm", &(me->llm), allow_multi_user_threads, have_hidden_threads); if (OMPI_SUCCESS != ret) { @@ -174,26 +162,41 @@ mca_pcm_rsh_init(int *priority, return NULL; } + /* + * fill in the function pointers + */ + me->super.pcm_get_unique_name = mca_pcm_base_no_unique_name; + me->super.pcm_allocate_resources = mca_pcm_rsh_allocate_resources; + me->super.pcm_can_spawn = mca_pcm_rsh_can_spawn; + me->super.pcm_spawn_procs = mca_pcm_rsh_spawn_procs; + me->super.pcm_kill_proc = mca_pcm_rsh_kill_proc; + me->super.pcm_kill_job = mca_pcm_rsh_kill_job; + me->super.pcm_deallocate_resources = mca_pcm_rsh_deallocate_resources; + me->super.pcm_finalize = mca_pcm_rsh_finalize; + /* DO SOME PARAM "FIXING" */ /* BWB - remove param fixing before 1.0 */ - if (0 == mca_pcm_rsh_no_profile) { + if (0 == me->no_profile) { printf("WARNING: reseting mca_pcm_rsh_no_profile to 1\n"); - mca_pcm_rsh_no_profile = 1; + me->no_profile = 1; } - if (0 == mca_pcm_rsh_fast) { + if (0 == me->fast_boot) { printf("WARNING: reseting mca_pcm_rsh_fast to 1\n"); - mca_pcm_rsh_fast = 1; + me->fast_boot = 1; } - return &mca_pcm_rsh_1_0_0; + return (mca_pcm_base_module_t*) me; } int -mca_pcm_rsh_finalize(struct mca_pcm_base_module_1_0_0_t* me) +mca_pcm_rsh_finalize(struct mca_pcm_base_module_1_0_0_t* me_super) { - if (mca_pcm_rsh_output > 0) { - ompi_output_close(mca_pcm_rsh_output); + mca_pcm_rsh_module_t *me = (mca_pcm_rsh_module_t*) me_super; + + if (me != NULL) { + if (NULL != me->rsh_agent) free(me->rsh_agent); + free(me); } return OMPI_SUCCESS; diff --git a/src/mca/pcm/rsh/src/pcm_rsh_spawn.c b/src/mca/pcm/rsh/src/pcm_rsh_spawn.c index 3f9967cb81..47546ae25c 100644 --- a/src/mca/pcm/rsh/src/pcm_rsh_spawn.c +++ b/src/mca/pcm/rsh/src/pcm_rsh_spawn.c @@ -30,21 +30,18 @@ #include "util/numtostr.h" #include "mca/ns/base/base.h" -#if 1 #define BOOTAGENT "mca_pcm_rsh_bootproxy" -#else -#define BOOTAGENT "cat" -#endif #define PRS_BUFSIZE 1024 -static int internal_spawn_proc(mca_ns_base_jobid_t jobid, ompi_rte_node_schedule_t *sched, +static int internal_spawn_proc(mca_pcm_rsh_module_t *me, + mca_ns_base_jobid_t jobid, ompi_rte_node_schedule_t *sched, ompi_list_t *hostlist, int my_start_vpid, int global_start_vpid, int num_procs); bool -mca_pcm_rsh_can_spawn(struct mca_pcm_base_module_1_0_0_t* me) +mca_pcm_rsh_can_spawn(struct mca_pcm_base_module_1_0_0_t* me_super) { /* we can always try to rsh some more... Might not always work as * the caller hopes @@ -54,9 +51,10 @@ mca_pcm_rsh_can_spawn(struct mca_pcm_base_module_1_0_0_t* me) int -mca_pcm_rsh_spawn_procs(struct mca_pcm_base_module_1_0_0_t* me, +mca_pcm_rsh_spawn_procs(struct mca_pcm_base_module_1_0_0_t* me_super, mca_ns_base_jobid_t jobid, ompi_list_t *schedlist) { + mca_pcm_rsh_module_t *me = (mca_pcm_rsh_module_t*) me_super; ompi_list_item_t *sched_item, *node_item, *host_item; ompi_rte_node_schedule_t *sched; ompi_rte_node_allocation_t *node; @@ -93,7 +91,7 @@ mca_pcm_rsh_spawn_procs(struct mca_pcm_base_module_1_0_0_t* me, /* BWB - make sure vpids are reserved */ local_start_vpid = 0; - if (mca_pcm_rsh_use_ns) { + if (me->use_ns) { global_start_vpid = (int) ompi_name_server.reserve_range(jobid, num_procs); } else { global_start_vpid = 0; @@ -141,7 +139,7 @@ mca_pcm_rsh_spawn_procs(struct mca_pcm_base_module_1_0_0_t* me, /* do the launch to the first node in the list, passing him the rest of the list */ - ret = internal_spawn_proc(jobid, sched, &launch, + ret = internal_spawn_proc(me, jobid, sched, &launch, local_start_vpid, global_start_vpid, num_procs); if (OMPI_SUCCESS != ret) { @@ -173,7 +171,8 @@ mca_pcm_rsh_spawn_procs(struct mca_pcm_base_module_1_0_0_t* me, static int -internal_need_profile(mca_llm_base_hostfile_node_t *start_node, +internal_need_profile(mca_pcm_rsh_module_t *me, + mca_llm_base_hostfile_node_t *start_node, int stderr_is_error, bool *needs_profile) { struct passwd *p; @@ -190,16 +189,16 @@ internal_need_profile(mca_llm_base_hostfile_node_t *start_node, * * The following logic is used: * - * if mca_pcm_rsh_no_profile is 1, don't do profile - * if mca_pcm_rsh_fast is 1, remote shell is assumed same as local + * if me->no_profile is 1, don't do profile + * if me->fast_boot is 1, remote shell is assumed same as local * if shell is sh/ksh, run profile, otherwise don't */ - if (1 == mca_pcm_rsh_no_profile) { + if (1 == me->no_profile) { *needs_profile = false; return OMPI_SUCCESS; } - if (1 == mca_pcm_rsh_fast) { + if (1 == me->fast_boot) { p = getpwuid(getuid()); if (NULL == p) return OMPI_ERROR; @@ -214,7 +213,7 @@ internal_need_profile(mca_llm_base_hostfile_node_t *start_node, /* we have to look at the other side and get our shell */ username = mca_pcm_base_get_username(start_node); - cmdv = ompi_argv_split(mca_pcm_rsh_agent, ' '); + cmdv = ompi_argv_split(me->rsh_agent, ' '); cmdc = ompi_argv_count(cmdv); ompi_argv_append(&cmdc, &cmdv, start_node->hostname); @@ -279,7 +278,8 @@ cleanup: static int -internal_spawn_proc(mca_ns_base_jobid_t jobid, ompi_rte_node_schedule_t *sched, +internal_spawn_proc(mca_pcm_rsh_module_t *me, + mca_ns_base_jobid_t jobid, ompi_rte_node_schedule_t *sched, ompi_list_t *hostlist, int my_start_vpid, int global_start_vpid, int num_procs) { @@ -290,7 +290,7 @@ internal_spawn_proc(mca_ns_base_jobid_t jobid, ompi_rte_node_schedule_t *sched, char *cmd0 = NULL; int cmdc = 0; char *printable = NULL; - int stderr_is_error = mca_pcm_rsh_ignore_stderr == 0 ? 1 : 0; + int stderr_is_error = me->ignore_stderr == 0 ? 1 : 0; char *username = NULL; int ret; pid_t pid; @@ -306,7 +306,7 @@ internal_spawn_proc(mca_ns_base_jobid_t jobid, ompi_rte_node_schedule_t *sched, /* * Check to see if we need to do the .profile thing */ - ret = internal_need_profile(start_node, stderr_is_error, + ret = internal_need_profile(me, start_node, stderr_is_error, &needs_profile); if (OMPI_SUCCESS != ret) { goto cleanup; @@ -318,7 +318,7 @@ internal_spawn_proc(mca_ns_base_jobid_t jobid, ompi_rte_node_schedule_t *sched, */ /* build up the rsh command part */ - cmdv = ompi_argv_split(mca_pcm_rsh_agent, ' '); + cmdv = ompi_argv_split(me->rsh_agent, ' '); cmdc = ompi_argv_count(cmdv); ompi_argv_append(&cmdc, &cmdv, start_node->hostname); @@ -335,7 +335,7 @@ internal_spawn_proc(mca_ns_base_jobid_t jobid, ompi_rte_node_schedule_t *sched, /* build the command to start */ ompi_argv_append(&cmdc, &cmdv, BOOTAGENT); -#if 1 + /* starting vpid for launchee's procs */ tmp = ltostr(my_start_vpid); ompi_argv_append(&cmdc, &cmdv, "--local_start_vpid"); @@ -353,7 +353,7 @@ internal_spawn_proc(mca_ns_base_jobid_t jobid, ompi_rte_node_schedule_t *sched, ompi_argv_append(&cmdc, &cmdv, "--num_procs"); ompi_argv_append(&cmdc, &cmdv, tmp); free(tmp); -#endif + /* add the end of the .profile thing if required */ if (needs_profile) { ompi_argv_append(&cmdc, &cmdv, ")");