1
1

A little cleanup and progress:

- build a proper srun argv
- launch the srun
- still have several "JMS" comments that need to be addressed

This commit was SVN r7036.
Этот коммит содержится в:
Jeff Squyres 2005-08-25 16:38:42 +00:00
родитель 71a28437bf
Коммит 524ded4896

Просмотреть файл

@ -22,9 +22,14 @@
#include "ompi_config.h"
#include <sys/types.h>
#include <unistd.h>
#include <signal.h>
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "opal/util/opal_environ.h"
#include "opal/util/path.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/runtime/runtime.h"
#include "orte/include/orte_constants.h"
@ -46,9 +51,12 @@ static int pls_slurm_terminate_job(orte_jobid_t jobid);
static int pls_slurm_terminate_proc(const orte_process_name_t *name);
static int pls_slurm_finalize(void);
static int pls_slurm_start_proc(char *nodename, int argc, char **argv,
char **env);
static int pls_slurm_start_proc(int argc, char **argv, char **env);
/*
* Global variable
*/
orte_pls_base_module_1_0_0_t orte_pls_slurm_module = {
pls_slurm_launch,
pls_slurm_terminate_job,
@ -56,7 +64,15 @@ orte_pls_base_module_1_0_0_t orte_pls_slurm_module = {
pls_slurm_finalize
};
/*
* Local variable
*/
static pid_t srun_pid = 0;
/*
* External
*/
extern char **environ;
@ -73,7 +89,10 @@ static int pls_slurm_launch(orte_jobid_t jobid)
char **argv;
int argc;
int rc;
char *tmp;
char** env;
char* var;
/* query the list of nodes allocated to the job - don't need the entire
* mapping - as the daemon/proxy is responsibe for determining the apps
* to launch on each node.
@ -106,9 +125,18 @@ static int pls_slurm_launch(orte_jobid_t jobid)
argv = NULL;
argc = 0;
/* add the srun command */
opal_argv_append(&argc, &argv, "srun");
/* "--mpi=lam" implies the task count equals the node count */
opal_argv_append(&argc, &argv, "--mpi=lam");
asprintf(&tmp, "--nodes=%lu", num_nodes);
opal_argv_append(&argc, &argv, tmp);
free(tmp);
/* add the daemon command (as specified by user) */
opal_argv_append(&argc, &argv, mca_pls_slurm_component.orted);
opal_argv_append(&argc, &argv, "--no-daemonize");
/* check for debug flags */
@ -118,27 +146,32 @@ static int pls_slurm_launch(orte_jobid_t jobid)
opal_argv_append(&argc, &argv, "--bootproxy");
opal_argv_append(&argc, &argv, jobid_string);
opal_argv_append(&argc, &argv, "--name");
/* JMS: what to do here? */
proc_name_index = argc;
opal_argv_append(&argc, &argv, "");
opal_argv_append(&argc, &argv, "BOGUS");
/* tell the daemon how many procs are in the daemon's job */
/* JMS: what does this do -- is it necessary? i.e., doesn't the
daemon pull this info from the gpr? */
opal_argv_append(&argc, &argv, "--num_procs");
asprintf(&param, "%lu", (unsigned long)(vpid + num_nodes));
opal_argv_append(&argc, &argv, param);
free(param);
/* tell the daemon the starting vpid of the daemon's job */
/* JMS: ditto previous comment */
opal_argv_append(&argc, &argv, "--vpid_start");
opal_argv_append(&argc, &argv, "0");
opal_argv_append(&argc, &argv, "--nodename");
node_name_index = argc;
opal_argv_append(&argc, &argv, "");
/* JMS: what to do here? */
opal_argv_append(&argc, &argv, "BOGUS");
/* pass along the universe name and location info */
opal_argv_append(&argc, &argv, "--universe");
asprintf(&param, "%s@%s:%s", orte_universe_info.uid,
orte_universe_info.host, orte_universe_info.name);
orte_universe_info.host, orte_universe_info.name);
opal_argv_append(&argc, &argv, param);
free(param);
@ -175,21 +208,13 @@ static int pls_slurm_launch(orte_jobid_t jobid)
}
}
/*
* Iterate through each of the nodes and spin
* up a daemon.
*/
for(item = opal_list_get_first(&nodes);
item != opal_list_get_end(&nodes);
item = opal_list_get_next(item)) {
/* Bookkeeping -- save the node names */
for (item = opal_list_get_first(&nodes);
item != opal_list_get_end(&nodes);
item = opal_list_get_next(item)) {
orte_ras_node_t* node = (orte_ras_node_t*)item;
orte_process_name_t* name;
char* name_string;
char** env;
char* var;
/* setup node name */
argv[node_name_index] = node->node_name;
/* initialize daemons process name */
rc = orte_ns.create_process_name(&name, node->node_cellid, 0, vpid);
@ -198,70 +223,61 @@ static int pls_slurm_launch(orte_jobid_t jobid)
goto cleanup;
}
/* setup per-node options */
if (mca_pls_slurm_component.debug) {
opal_output(0, "pls:slurm: launching on node %s\n",
node->node_name);
}
/* setup process name */
rc = orte_ns.get_proc_name_string(&name_string, name);
if (ORTE_SUCCESS != rc) {
opal_output(0, "pls:slurm: unable to create process name");
exit(-1);
}
argv[proc_name_index] = name_string;
/* setup environment */
env = opal_argv_copy(environ);
var = mca_base_param_environ_variable("seed",NULL,NULL);
opal_setenv(var, "0", true, &env);
/* set the progress engine schedule for this node.
* if node_slots is set to zero, then we default to
* NOT being oversubscribed
*/
if (node->node_slots > 0 &&
node->node_slots_inuse > node->node_slots) {
if (mca_pls_slurm_component.debug) {
opal_output(0, "pls:slurm: oversubscribed -- setting mpi_yield_when_idle to 1");
}
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
opal_setenv(var, "1", true, &env);
} else {
if (mca_pls_slurm_component.debug) {
opal_output(0, "pls:slurm: not oversubscribed -- setting mpi_yield_when_idle to 0");
}
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
opal_setenv(var, "0", true, &env);
}
free(var);
/* save the daemons name on the node */
if (ORTE_SUCCESS != (rc = orte_pls_base_proxy_set_node_name(node,jobid,name))) {
if (ORTE_SUCCESS !=
(rc = orte_pls_base_proxy_set_node_name(node, jobid, name))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* exec the daemon */
if (mca_pls_slurm_component.debug) {
param = opal_argv_join(argv, ' ');
if (NULL != param) {
opal_output(0, "pls:slurm: executing: %s", param);
free(param);
}
}
rc = pls_slurm_start_proc(node->node_name, argc, argv, env);
if (ORTE_SUCCESS != rc) {
opal_output(0, "pls:slurm: start_procs returned error %d", rc);
goto cleanup;
}
vpid++;
free(name);
}
/* setup environment */
env = opal_argv_copy(environ);
var = mca_base_param_environ_variable("seed", NULL, NULL);
opal_setenv(var, "0", true, &env);
#if 0
/* JMS What to do for sched_yield? */
/* set the progress engine schedule for this node. if node_slots
is set to zero, then we default to NOT being oversubscribed */
if (node->node_slots > 0 &&
node->node_slots_inuse > node->node_slots) {
if (mca_pls_slurm_component.debug) {
opal_output(0, "pls:slurm: oversubscribed -- setting mpi_yield_when_idle to 1");
}
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
opal_setenv(var, "1", true, &env);
} else {
if (mca_pls_slurm_component.debug) {
opal_output(0, "pls:slurm: not oversubscribed -- setting mpi_yield_when_idle to 0");
}
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
opal_setenv(var, "0", true, &env);
}
free(var);
#endif
/* exec the daemon */
rc = pls_slurm_start_proc(argc, argv, env);
if (ORTE_SUCCESS != rc) {
opal_output(0, "pls:slurm: start_procs returned error %d", rc);
goto cleanup;
}
/* JMS: short we stash the srun pid in the gpr somewhere for cleanup? */
/* JMS: how do we catch when srun dies? */
cleanup:
while (NULL != (item = opal_list_remove_first(&nodes))) {
OBJ_RELEASE(item);
@ -273,6 +289,11 @@ cleanup:
static int pls_slurm_terminate_job(orte_jobid_t jobid)
{
if (0 != srun_pid) {
kill(srun_pid, SIGHUP);
/* JMS need appropriate code here to reap */
srun_pid = 0;
}
return orte_pls_base_proxy_terminate_job(jobid);
}
@ -298,13 +319,24 @@ static int pls_slurm_finalize(void)
}
static int pls_slurm_start_proc(char *nodename, int argc, char **argv,
char **env)
static int pls_slurm_start_proc(int argc, char **argv, char **env)
{
char *a = opal_argv_join(argv, ' ');
int rc;
char *exec_argv = opal_path_findv(argv[0], 0, env, NULL);
printf("SLURM Starting on node %s: %s\n", nodename, a);
free(a);
if (NULL == exec_argv) {
return ORTE_ERR_NOT_FOUND;
}
srun_pid = fork();
if (-1 == srun_pid) {
printf("Fork failed!\n");
return ORTE_ERR_IN_ERRNO;
} else if (0 == srun_pid) {
rc = execve(exec_argv, argv, env);
printf("execve failed! (%s)\n", argv[0]);
return ORTE_ERR_IN_ERRNO;
}
return ORTE_SUCCESS;
}