A little cleanup and progress:
- build a proper srun argv - launch the srun - still have several "JMS" comments that need to be addressed This commit was SVN r7036.
Этот коммит содержится в:
родитель
71a28437bf
Коммит
524ded4896
@ -22,9 +22,14 @@
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <signal.h>
|
||||
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/opal_environ.h"
|
||||
#include "opal/util/path.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "orte/runtime/runtime.h"
|
||||
#include "orte/include/orte_constants.h"
|
||||
@ -46,9 +51,12 @@ static int pls_slurm_terminate_job(orte_jobid_t jobid);
|
||||
static int pls_slurm_terminate_proc(const orte_process_name_t *name);
|
||||
static int pls_slurm_finalize(void);
|
||||
|
||||
static int pls_slurm_start_proc(char *nodename, int argc, char **argv,
|
||||
char **env);
|
||||
static int pls_slurm_start_proc(int argc, char **argv, char **env);
|
||||
|
||||
|
||||
/*
|
||||
* Global variable
|
||||
*/
|
||||
orte_pls_base_module_1_0_0_t orte_pls_slurm_module = {
|
||||
pls_slurm_launch,
|
||||
pls_slurm_terminate_job,
|
||||
@ -56,7 +64,15 @@ orte_pls_base_module_1_0_0_t orte_pls_slurm_module = {
|
||||
pls_slurm_finalize
|
||||
};
|
||||
|
||||
/*
|
||||
* Local variable
|
||||
*/
|
||||
static pid_t srun_pid = 0;
|
||||
|
||||
|
||||
/*
|
||||
* External
|
||||
*/
|
||||
extern char **environ;
|
||||
|
||||
|
||||
@ -73,7 +89,10 @@ static int pls_slurm_launch(orte_jobid_t jobid)
|
||||
char **argv;
|
||||
int argc;
|
||||
int rc;
|
||||
|
||||
char *tmp;
|
||||
char** env;
|
||||
char* var;
|
||||
|
||||
/* query the list of nodes allocated to the job - don't need the entire
|
||||
* mapping - as the daemon/proxy is responsibe for determining the apps
|
||||
* to launch on each node.
|
||||
@ -106,9 +125,18 @@ static int pls_slurm_launch(orte_jobid_t jobid)
|
||||
argv = NULL;
|
||||
argc = 0;
|
||||
|
||||
/* add the srun command */
|
||||
opal_argv_append(&argc, &argv, "srun");
|
||||
|
||||
/* "--mpi=lam" implies the task count equals the node count */
|
||||
opal_argv_append(&argc, &argv, "--mpi=lam");
|
||||
|
||||
asprintf(&tmp, "--nodes=%lu", num_nodes);
|
||||
opal_argv_append(&argc, &argv, tmp);
|
||||
free(tmp);
|
||||
|
||||
/* add the daemon command (as specified by user) */
|
||||
opal_argv_append(&argc, &argv, mca_pls_slurm_component.orted);
|
||||
|
||||
opal_argv_append(&argc, &argv, "--no-daemonize");
|
||||
|
||||
/* check for debug flags */
|
||||
@ -118,27 +146,32 @@ static int pls_slurm_launch(orte_jobid_t jobid)
|
||||
opal_argv_append(&argc, &argv, "--bootproxy");
|
||||
opal_argv_append(&argc, &argv, jobid_string);
|
||||
opal_argv_append(&argc, &argv, "--name");
|
||||
/* JMS: what to do here? */
|
||||
proc_name_index = argc;
|
||||
opal_argv_append(&argc, &argv, "");
|
||||
opal_argv_append(&argc, &argv, "BOGUS");
|
||||
|
||||
/* tell the daemon how many procs are in the daemon's job */
|
||||
/* JMS: what does this do -- is it necessary? i.e., doesn't the
|
||||
daemon pull this info from the gpr? */
|
||||
opal_argv_append(&argc, &argv, "--num_procs");
|
||||
asprintf(¶m, "%lu", (unsigned long)(vpid + num_nodes));
|
||||
opal_argv_append(&argc, &argv, param);
|
||||
free(param);
|
||||
|
||||
/* tell the daemon the starting vpid of the daemon's job */
|
||||
/* JMS: ditto previous comment */
|
||||
opal_argv_append(&argc, &argv, "--vpid_start");
|
||||
opal_argv_append(&argc, &argv, "0");
|
||||
|
||||
opal_argv_append(&argc, &argv, "--nodename");
|
||||
node_name_index = argc;
|
||||
opal_argv_append(&argc, &argv, "");
|
||||
/* JMS: what to do here? */
|
||||
opal_argv_append(&argc, &argv, "BOGUS");
|
||||
|
||||
/* pass along the universe name and location info */
|
||||
opal_argv_append(&argc, &argv, "--universe");
|
||||
asprintf(¶m, "%s@%s:%s", orte_universe_info.uid,
|
||||
orte_universe_info.host, orte_universe_info.name);
|
||||
orte_universe_info.host, orte_universe_info.name);
|
||||
opal_argv_append(&argc, &argv, param);
|
||||
free(param);
|
||||
|
||||
@ -175,21 +208,13 @@ static int pls_slurm_launch(orte_jobid_t jobid)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate through each of the nodes and spin
|
||||
* up a daemon.
|
||||
*/
|
||||
for(item = opal_list_get_first(&nodes);
|
||||
item != opal_list_get_end(&nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
/* Bookkeeping -- save the node names */
|
||||
for (item = opal_list_get_first(&nodes);
|
||||
item != opal_list_get_end(&nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
orte_ras_node_t* node = (orte_ras_node_t*)item;
|
||||
orte_process_name_t* name;
|
||||
char* name_string;
|
||||
char** env;
|
||||
char* var;
|
||||
|
||||
/* setup node name */
|
||||
argv[node_name_index] = node->node_name;
|
||||
|
||||
/* initialize daemons process name */
|
||||
rc = orte_ns.create_process_name(&name, node->node_cellid, 0, vpid);
|
||||
@ -198,70 +223,61 @@ static int pls_slurm_launch(orte_jobid_t jobid)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* setup per-node options */
|
||||
if (mca_pls_slurm_component.debug) {
|
||||
opal_output(0, "pls:slurm: launching on node %s\n",
|
||||
node->node_name);
|
||||
}
|
||||
|
||||
/* setup process name */
|
||||
rc = orte_ns.get_proc_name_string(&name_string, name);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
opal_output(0, "pls:slurm: unable to create process name");
|
||||
exit(-1);
|
||||
}
|
||||
argv[proc_name_index] = name_string;
|
||||
|
||||
/* setup environment */
|
||||
env = opal_argv_copy(environ);
|
||||
var = mca_base_param_environ_variable("seed",NULL,NULL);
|
||||
opal_setenv(var, "0", true, &env);
|
||||
|
||||
/* set the progress engine schedule for this node.
|
||||
* if node_slots is set to zero, then we default to
|
||||
* NOT being oversubscribed
|
||||
*/
|
||||
if (node->node_slots > 0 &&
|
||||
node->node_slots_inuse > node->node_slots) {
|
||||
if (mca_pls_slurm_component.debug) {
|
||||
opal_output(0, "pls:slurm: oversubscribed -- setting mpi_yield_when_idle to 1");
|
||||
}
|
||||
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
|
||||
opal_setenv(var, "1", true, &env);
|
||||
} else {
|
||||
if (mca_pls_slurm_component.debug) {
|
||||
opal_output(0, "pls:slurm: not oversubscribed -- setting mpi_yield_when_idle to 0");
|
||||
}
|
||||
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
|
||||
opal_setenv(var, "0", true, &env);
|
||||
}
|
||||
free(var);
|
||||
|
||||
/* save the daemons name on the node */
|
||||
if (ORTE_SUCCESS != (rc = orte_pls_base_proxy_set_node_name(node,jobid,name))) {
|
||||
if (ORTE_SUCCESS !=
|
||||
(rc = orte_pls_base_proxy_set_node_name(node, jobid, name))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* exec the daemon */
|
||||
if (mca_pls_slurm_component.debug) {
|
||||
param = opal_argv_join(argv, ' ');
|
||||
if (NULL != param) {
|
||||
opal_output(0, "pls:slurm: executing: %s", param);
|
||||
free(param);
|
||||
}
|
||||
}
|
||||
|
||||
rc = pls_slurm_start_proc(node->node_name, argc, argv, env);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
opal_output(0, "pls:slurm: start_procs returned error %d", rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
vpid++;
|
||||
free(name);
|
||||
}
|
||||
|
||||
/* setup environment */
|
||||
env = opal_argv_copy(environ);
|
||||
var = mca_base_param_environ_variable("seed", NULL, NULL);
|
||||
opal_setenv(var, "0", true, &env);
|
||||
|
||||
#if 0
|
||||
/* JMS What to do for sched_yield? */
|
||||
|
||||
/* set the progress engine schedule for this node. if node_slots
|
||||
is set to zero, then we default to NOT being oversubscribed */
|
||||
if (node->node_slots > 0 &&
|
||||
node->node_slots_inuse > node->node_slots) {
|
||||
if (mca_pls_slurm_component.debug) {
|
||||
opal_output(0, "pls:slurm: oversubscribed -- setting mpi_yield_when_idle to 1");
|
||||
}
|
||||
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
|
||||
opal_setenv(var, "1", true, &env);
|
||||
} else {
|
||||
if (mca_pls_slurm_component.debug) {
|
||||
opal_output(0, "pls:slurm: not oversubscribed -- setting mpi_yield_when_idle to 0");
|
||||
}
|
||||
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
|
||||
opal_setenv(var, "0", true, &env);
|
||||
}
|
||||
free(var);
|
||||
#endif
|
||||
|
||||
/* exec the daemon */
|
||||
rc = pls_slurm_start_proc(argc, argv, env);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
opal_output(0, "pls:slurm: start_procs returned error %d", rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* JMS: short we stash the srun pid in the gpr somewhere for cleanup? */
|
||||
/* JMS: how do we catch when srun dies? */
|
||||
|
||||
cleanup:
|
||||
while (NULL != (item = opal_list_remove_first(&nodes))) {
|
||||
OBJ_RELEASE(item);
|
||||
@ -273,6 +289,11 @@ cleanup:
|
||||
|
||||
static int pls_slurm_terminate_job(orte_jobid_t jobid)
|
||||
{
|
||||
if (0 != srun_pid) {
|
||||
kill(srun_pid, SIGHUP);
|
||||
/* JMS need appropriate code here to reap */
|
||||
srun_pid = 0;
|
||||
}
|
||||
return orte_pls_base_proxy_terminate_job(jobid);
|
||||
}
|
||||
|
||||
@ -298,13 +319,24 @@ static int pls_slurm_finalize(void)
|
||||
}
|
||||
|
||||
|
||||
static int pls_slurm_start_proc(char *nodename, int argc, char **argv,
|
||||
char **env)
|
||||
static int pls_slurm_start_proc(int argc, char **argv, char **env)
|
||||
{
|
||||
char *a = opal_argv_join(argv, ' ');
|
||||
int rc;
|
||||
char *exec_argv = opal_path_findv(argv[0], 0, env, NULL);
|
||||
|
||||
printf("SLURM Starting on node %s: %s\n", nodename, a);
|
||||
free(a);
|
||||
if (NULL == exec_argv) {
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
srun_pid = fork();
|
||||
if (-1 == srun_pid) {
|
||||
printf("Fork failed!\n");
|
||||
return ORTE_ERR_IN_ERRNO;
|
||||
} else if (0 == srun_pid) {
|
||||
rc = execve(exec_argv, argv, env);
|
||||
printf("execve failed! (%s)\n", argv[0]);
|
||||
return ORTE_ERR_IN_ERRNO;
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user