1
1

First cut -- not working yet -- checkpointing to move to another

machine.

This commit was SVN r7018.
Этот коммит содержится в:
Jeff Squyres 2005-08-24 22:19:48 +00:00
родитель 9770a41283
Коммит 9755a7f7fa
3 изменённых файлов: 315 добавлений и 53 удалений

Просмотреть файл

@ -26,15 +26,22 @@
extern "C" {
#endif
struct orte_pls_slurm_component_t {
orte_pls_base_component_t super;
int priority;
int debug;
char *orted;
};
typedef struct orte_pls_slurm_component_t orte_pls_slurm_component_t;
/*
* Globally exported variable
*/
OMPI_COMP_EXPORT extern orte_pls_base_component_1_0_0_t
orte_pls_slurm_component;
OMPI_COMP_EXPORT extern orte_pls_slurm_component_t
mca_pls_slurm_component;
OMPI_COMP_EXPORT extern orte_pls_base_module_1_0_0_t
orte_pls_slurm_module;
OMPI_COMP_EXPORT extern int orte_pls_slurm_param_priorty;
#if defined(c_plusplus) || defined(__cplusplus)
}

Просмотреть файл

@ -22,10 +22,11 @@
#include "ompi_config.h"
#include "include/orte_constants.h"
#include "mca/pls/pls.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/include/orte_constants.h"
#include "orte/mca/pls/pls.h"
#include "orte/mca/pls/base/base.h"
#include "pls_slurm.h"
#include "mca/base/mca_base_param.h"
/*
@ -35,12 +36,6 @@ const char *mca_pls_slurm_component_version_string =
"Open MPI slurm pls MCA component version " ORTE_VERSION;
/*
* Local variable
*/
static int param_priority = -1;
/*
* Local functions
*/
@ -53,48 +48,65 @@ static struct orte_pls_base_module_1_0_0_t *pls_slurm_init(int *priority);
* and pointers to our public functions in it
*/
orte_pls_base_component_1_0_0_t mca_pls_slurm_component = {
/* First, the mca_component_t struct containing meta information
about the component itself */
orte_pls_slurm_component_t mca_pls_slurm_component = {
{
/* Indicate that we are a pls v1.0.0 component (which also
implies a specific MCA version) */
/* First, the mca_component_t struct containing meta
information about the component itself */
ORTE_PLS_BASE_VERSION_1_0_0,
{
/* Indicate that we are a pls v1.0.0 component (which also
implies a specific MCA version) */
/* Component name and version */
ORTE_PLS_BASE_VERSION_1_0_0,
/* Component name and version */
"slurm",
ORTE_MAJOR_VERSION,
ORTE_MINOR_VERSION,
ORTE_RELEASE_VERSION,
/* Component open and close functions */
pls_slurm_open,
NULL
},
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
true
},
/* Initialization / querying functions */
pls_slurm_init
}
"slurm",
ORTE_MAJOR_VERSION,
ORTE_MINOR_VERSION,
ORTE_RELEASE_VERSION,
/* Component open and close functions */
pls_slurm_open,
NULL
},
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
true
},
/* Initialization / querying functions */
pls_slurm_init
/* Other orte_pls_slurm_component_t items -- left uninitialized
here; will be initialized in pls_slurm_open() */
};
static int pls_slurm_open(void)
{
param_priority =
mca_base_param_register_int("pls", "slurm", "priority", NULL, 75);
mca_base_component_t *comp = &mca_pls_slurm_component.super.pls_version;
mca_base_param_reg_int(comp, "debug", "Enable debugging of slurm pls",
false, false, 0,
&mca_pls_slurm_component.debug);
mca_base_param_reg_int(comp, "priority", "Default selection priority",
false, false, 75,
&mca_pls_slurm_component.priority);
mca_base_param_reg_string(comp, "orted",
"Command to use to start proxy orted",
false, false, "orted",
&mca_pls_slurm_component.orted);
return ORTE_SUCCESS;
}
@ -102,11 +114,12 @@ static int pls_slurm_open(void)
static struct orte_pls_base_module_1_0_0_t *pls_slurm_init(int *priority)
{
/* Are we runing under SLURM? */
/* Are we running under a SLURM job? */
if (NULL != getenv("SLURM_JOBID")) {
mca_base_param_lookup_int(param_priority, priority);
*priority = mca_pls_slurm_component.priority;
opal_output(orte_pls_base.pls_output,
"pls:slurm: available for selection");
return &orte_pls_slurm_module;
}

Просмотреть файл

@ -22,8 +22,19 @@
#include "ompi_config.h"
#include "include/orte_constants.h"
#include "mca/pls/pls.h"
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "opal/util/opal_environ.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/runtime/runtime.h"
#include "orte/include/orte_constants.h"
#include "orte/include/orte_types.h"
#include "orte/include/orte_constants.h"
#include "orte/mca/pls/pls.h"
#include "orte/mca/pls/base/base.h"
#include "orte/mca/ns/base/base.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "pls_slurm.h"
@ -35,6 +46,8 @@ static int pls_slurm_terminate_job(orte_jobid_t jobid);
static int pls_slurm_terminate_proc(const orte_process_name_t *name);
static int pls_slurm_finalize(void);
static int pls_slurm_start_proc(char *nodename, int argc, char **argv,
char **env);
orte_pls_base_module_1_0_0_t orte_pls_slurm_module = {
pls_slurm_launch,
@ -44,25 +57,254 @@ orte_pls_base_module_1_0_0_t orte_pls_slurm_module = {
};
extern char **environ;
static int pls_slurm_launch(orte_jobid_t jobid)
{
return ORTE_ERR_NOT_IMPLEMENTED;
opal_list_t nodes;
opal_list_item_t* item;
size_t num_nodes;
orte_vpid_t vpid;
int node_name_index;
int proc_name_index;
char *jobid_string;
char *uri, *param;
char **argv;
int argc;
int rc;
/* query the list of nodes allocated to the job - don't need the entire
* mapping - as the daemon/proxy is responsibe for determining the apps
* to launch on each node.
*/
OBJ_CONSTRUCT(&nodes, opal_list_t);
rc = orte_ras_base_node_query_alloc(&nodes, jobid);
if (ORTE_SUCCESS != rc) {
goto cleanup;
}
/*
* Allocate a range of vpids for the daemons.
*/
num_nodes = opal_list_get_size(&nodes);
if (num_nodes == 0) {
return ORTE_ERR_BAD_PARAM;
}
rc = orte_ns.reserve_range(0, num_nodes, &vpid);
if (ORTE_SUCCESS != rc) {
goto cleanup;
}
/* need integer value for command line parameter */
asprintf(&jobid_string, "%lu", (unsigned long) jobid);
/*
* start building argv array
*/
argv = NULL;
argc = 0;
/* add the daemon command (as specified by user) */
opal_argv_append(&argc, &argv, mca_pls_slurm_component.orted);
opal_argv_append(&argc, &argv, "--no-daemonize");
/* check for debug flags */
orte_pls_base_proxy_mca_argv(&argc, &argv);
/* proxy information */
opal_argv_append(&argc, &argv, "--bootproxy");
opal_argv_append(&argc, &argv, jobid_string);
opal_argv_append(&argc, &argv, "--name");
proc_name_index = argc;
opal_argv_append(&argc, &argv, "");
/* tell the daemon how many procs are in the daemon's job */
opal_argv_append(&argc, &argv, "--num_procs");
asprintf(&param, "%lu", (unsigned long)(vpid + num_nodes));
opal_argv_append(&argc, &argv, param);
free(param);
/* tell the daemon the starting vpid of the daemon's job */
opal_argv_append(&argc, &argv, "--vpid_start");
opal_argv_append(&argc, &argv, "0");
opal_argv_append(&argc, &argv, "--nodename");
node_name_index = argc;
opal_argv_append(&argc, &argv, "");
/* pass along the universe name and location info */
opal_argv_append(&argc, &argv, "--universe");
asprintf(&param, "%s@%s:%s", orte_universe_info.uid,
orte_universe_info.host, orte_universe_info.name);
opal_argv_append(&argc, &argv, param);
free(param);
/* setup ns contact info */
opal_argv_append(&argc, &argv, "--nsreplica");
if (NULL != orte_process_info.ns_replica_uri) {
uri = strdup(orte_process_info.ns_replica_uri);
} else {
uri = orte_rml.get_uri();
}
asprintf(&param, "\"%s\"", uri);
opal_argv_append(&argc, &argv, param);
free(uri);
free(param);
/* setup gpr contact info */
opal_argv_append(&argc, &argv, "--gprreplica");
if (NULL != orte_process_info.gpr_replica_uri) {
uri = strdup(orte_process_info.gpr_replica_uri);
} else {
uri = orte_rml.get_uri();
}
asprintf(&param, "\"%s\"", uri);
opal_argv_append(&argc, &argv, param);
free(uri);
free(param);
if (mca_pls_slurm_component.debug) {
param = opal_argv_join(argv, ' ');
if (NULL != param) {
opal_output(0, "pls:slurm: final top-level argv:");
opal_output(0, "pls:slurm: %s", param);
free(param);
}
}
/*
* Iterate through each of the nodes and spin
* up a daemon.
*/
for(item = opal_list_get_first(&nodes);
item != opal_list_get_end(&nodes);
item = opal_list_get_next(item)) {
orte_ras_node_t* node = (orte_ras_node_t*)item;
orte_process_name_t* name;
char* name_string;
char** env;
char* var;
/* setup node name */
argv[node_name_index] = node->node_name;
/* initialize daemons process name */
rc = orte_ns.create_process_name(&name, node->node_cellid, 0, vpid);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* setup per-node options */
if (mca_pls_slurm_component.debug) {
opal_output(0, "pls:slurm: launching on node %s\n",
node->node_name);
}
/* setup process name */
rc = orte_ns.get_proc_name_string(&name_string, name);
if (ORTE_SUCCESS != rc) {
opal_output(0, "pls:slurm: unable to create process name");
exit(-1);
}
argv[proc_name_index] = name_string;
/* setup environment */
env = opal_argv_copy(environ);
var = mca_base_param_environ_variable("seed",NULL,NULL);
opal_setenv(var, "0", true, &env);
/* set the progress engine schedule for this node.
* if node_slots is set to zero, then we default to
* NOT being oversubscribed
*/
if (node->node_slots > 0 &&
node->node_slots_inuse > node->node_slots) {
if (mca_pls_slurm_component.debug) {
opal_output(0, "pls:slurm: oversubscribed -- setting mpi_yield_when_idle to 1");
}
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
opal_setenv(var, "1", true, &env);
} else {
if (mca_pls_slurm_component.debug) {
opal_output(0, "pls:slurm: not oversubscribed -- setting mpi_yield_when_idle to 0");
}
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
opal_setenv(var, "0", true, &env);
}
free(var);
/* save the daemons name on the node */
if (ORTE_SUCCESS != (rc = orte_pls_base_proxy_set_node_name(node,jobid,name))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* exec the daemon */
if (mca_pls_slurm_component.debug) {
param = opal_argv_join(argv, ' ');
if (NULL != param) {
opal_output(0, "pls:slurm: executing: %s", param);
free(param);
}
}
rc = pls_slurm_start_proc(node->node_name, argc, argv, env);
if (ORTE_SUCCESS != rc) {
opal_output(0, "pls:slurm: start_procs returned error %d", rc);
goto cleanup;
}
vpid++;
free(name);
}
cleanup:
while (NULL != (item = opal_list_remove_first(&nodes))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&nodes);
return rc;
}
static int pls_slurm_terminate_job(orte_jobid_t jobid)
{
return ORTE_ERR_NOT_IMPLEMENTED;
return orte_pls_base_proxy_terminate_job(jobid);
}
/*
* The way we've used SLURM, we can't kill individual processes --
* we'll kill the entire job
*/
static int pls_slurm_terminate_proc(const orte_process_name_t *name)
{
return ORTE_ERR_NOT_IMPLEMENTED;
opal_output(orte_pls_base.pls_output,
"pls:slurm:terminate_proc: not supported");
return ORTE_ERR_NOT_SUPPORTED;
}
static int pls_slurm_finalize(void)
{
return ORTE_ERR_NOT_IMPLEMENTED;
/* cleanup any pending recvs */
orte_rml.recv_cancel(ORTE_RML_NAME_ANY, ORTE_RML_TAG_RMGR_CLNT);
return ORTE_SUCCESS;
}
static int pls_slurm_start_proc(char *nodename, int argc, char **argv,
char **env)
{
char *a = opal_argv_join(argv, ' ');
printf("SLURM Starting on node %s: %s\n", nodename, a);
free(a);
return ORTE_SUCCESS;
}