Enable VM launch.
For some time, ORTE has had the ability to launch daemons on all nodes prior to launching an application. It has largely been used outside of the OMPI community, and so was never explicitly turned "on" inside OMPI releases. Nevertheless, the code has been there. Allowing VM launches does not require ANY changes to existing PLM components. All that was required was to have orterun launch the daemons as a separate call to orte_plm.spawn -prior- to launching the applications. The rest of the VM support code resides in the rmaps framework: (a) a check when asked to map a job to see if it is the daemon job, and (b) a separate "setup_virtual_machine" mapper in the rmaps base that creates the required map so the PLM's will do the right thing. In order to support those users who have no RM allocation but like to give the allocation in the form of a -host or -hostfile argument to their application, there is a little more code in orterun and the setup_virtual_machine mapper to capture information passed in that manner. This has been tested with rsh and slurm environments, and, since there is nothing environment-specific in the implementation, should work in others as well - but needs to be proven. This commit was SVN r24524.
Этот коммит содержится в:
родитель
80265b472e
Коммит
dc6f616599
@ -138,6 +138,7 @@ static int rte_init(void)
|
|||||||
orte_job_t *jdata;
|
orte_job_t *jdata;
|
||||||
orte_node_t *node;
|
orte_node_t *node;
|
||||||
orte_proc_t *proc;
|
orte_proc_t *proc;
|
||||||
|
orte_app_context_t *app;
|
||||||
int value;
|
int value;
|
||||||
|
|
||||||
/* run the prolog */
|
/* run the prolog */
|
||||||
@ -474,6 +475,11 @@ static int rte_init(void)
|
|||||||
jdata->jobid = ORTE_PROC_MY_NAME->jobid;
|
jdata->jobid = ORTE_PROC_MY_NAME->jobid;
|
||||||
opal_pointer_array_set_item(orte_job_data, 0, jdata);
|
opal_pointer_array_set_item(orte_job_data, 0, jdata);
|
||||||
|
|
||||||
|
/* every job requires at least one app */
|
||||||
|
app = OBJ_NEW(orte_app_context_t);
|
||||||
|
opal_pointer_array_set_item(jdata->apps, 0, app);
|
||||||
|
jdata->num_apps++;
|
||||||
|
|
||||||
/* create and store a node object where we are */
|
/* create and store a node object where we are */
|
||||||
node = OBJ_NEW(orte_node_t);
|
node = OBJ_NEW(orte_node_t);
|
||||||
node->name = strdup(orte_process_info.nodename);
|
node->name = strdup(orte_process_info.nodename);
|
||||||
|
@ -813,6 +813,7 @@ int orte_rmaps_base_define_daemons(orte_job_t *jdata)
|
|||||||
|
|
||||||
int orte_rmaps_base_setup_virtual_machine(orte_job_t *jdata)
|
int orte_rmaps_base_setup_virtual_machine(orte_job_t *jdata)
|
||||||
{
|
{
|
||||||
|
orte_job_t *jdat;
|
||||||
orte_node_t *node;
|
orte_node_t *node;
|
||||||
orte_proc_t *proc;
|
orte_proc_t *proc;
|
||||||
orte_job_map_t *map;
|
orte_job_map_t *map;
|
||||||
@ -820,8 +821,9 @@ int orte_rmaps_base_setup_virtual_machine(orte_job_t *jdata)
|
|||||||
opal_list_item_t *item;
|
opal_list_item_t *item;
|
||||||
orte_app_context_t *app;
|
orte_app_context_t *app;
|
||||||
orte_std_cntr_t num_slots;
|
orte_std_cntr_t num_slots;
|
||||||
int rc;
|
int rc, i, n;
|
||||||
|
bool ignored;
|
||||||
|
|
||||||
/* get the daemon app if provided - may include -host or hostfile
|
/* get the daemon app if provided - may include -host or hostfile
|
||||||
* info about available nodes
|
* info about available nodes
|
||||||
*/
|
*/
|
||||||
@ -839,9 +841,50 @@ int orte_rmaps_base_setup_virtual_machine(orte_job_t *jdata)
|
|||||||
OBJ_DESTRUCT(&node_list);
|
OBJ_DESTRUCT(&node_list);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
/* check all other known jobs to see if they have something to
|
||||||
|
* add to the allocation - we won't have seen these and the
|
||||||
|
* daemon job won't have any in its app
|
||||||
|
*/
|
||||||
|
for (i=0; i < orte_job_data->size; i++) {
|
||||||
|
if (NULL == (jdat = (orte_job_t*)opal_pointer_array_get_item(orte_job_data, i))) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
for (n=0; n < jdat->apps->size; n++) {
|
||||||
|
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdat->apps, n))) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (NULL != app->hostfile) {
|
||||||
|
/* hostfile was specified - parse it and add it to the list. The
|
||||||
|
* function automatically ignores duplicates
|
||||||
|
*/
|
||||||
|
if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&node_list,
|
||||||
|
&ignored,
|
||||||
|
app->hostfile))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
OBJ_DESTRUCT(&node_list);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (NULL != app->dash_host) {
|
||||||
|
/* parse and add to list, ignoring duplicates */
|
||||||
|
if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&node_list,
|
||||||
|
&ignored,
|
||||||
|
app->dash_host))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
OBJ_DESTRUCT(&node_list);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* add all these nodes to the map */
|
/* add all these nodes to the map */
|
||||||
while (NULL != (item = opal_list_remove_first(&node_list))) {
|
while (NULL != (item = opal_list_remove_first(&node_list))) {
|
||||||
node = (orte_node_t*)item;
|
node = (orte_node_t*)item;
|
||||||
|
/* if this is my node, ignore it - we are already here */
|
||||||
|
if (0 == strcmp(node->name, orte_process_info.nodename)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
opal_pointer_array_add(map->nodes, (void*)node);
|
opal_pointer_array_add(map->nodes, (void*)node);
|
||||||
++(map->num_nodes);
|
++(map->num_nodes);
|
||||||
/* if this node already has a daemon, release that object
|
/* if this node already has a daemon, release that object
|
||||||
|
@ -80,6 +80,7 @@
|
|||||||
#include "orte/mca/debugger/base/base.h"
|
#include "orte/mca/debugger/base/base.h"
|
||||||
#include "orte/mca/odls/odls.h"
|
#include "orte/mca/odls/odls.h"
|
||||||
#include "orte/mca/plm/plm.h"
|
#include "orte/mca/plm/plm.h"
|
||||||
|
#include "orte/mca/plm/base/plm_private.h"
|
||||||
#include "orte/mca/rml/rml.h"
|
#include "orte/mca/rml/rml.h"
|
||||||
#include "orte/mca/rml/rml_types.h"
|
#include "orte/mca/rml/rml_types.h"
|
||||||
#include "orte/mca/rml/base/rml_contact.h"
|
#include "orte/mca/rml/base/rml_contact.h"
|
||||||
@ -429,6 +430,10 @@ static opal_cmd_line_init_t cmd_line_init[] = {
|
|||||||
NULL, OPAL_CMD_LINE_TYPE_INT,
|
NULL, OPAL_CMD_LINE_TYPE_INT,
|
||||||
"Max number of times to restart a failed process" },
|
"Max number of times to restart a failed process" },
|
||||||
|
|
||||||
|
{ "orte", "vm", "launch", '\0', "vm", "vm", 0,
|
||||||
|
&orterun_globals.launch_vm, OPAL_CMD_LINE_TYPE_BOOL,
|
||||||
|
"Launch daemons on all nodes at start to create a virtual machine [Default = false]" },
|
||||||
|
|
||||||
#if OPAL_ENABLE_CRDEBUG == 1
|
#if OPAL_ENABLE_CRDEBUG == 1
|
||||||
{ "opal", "cr", "enable_crdebug", '\0', "crdebug", "crdebug", 0,
|
{ "opal", "cr", "enable_crdebug", '\0', "crdebug", "crdebug", 0,
|
||||||
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
||||||
@ -462,6 +467,7 @@ int orterun(int argc, char *argv[])
|
|||||||
opal_cmd_line_t cmd_line;
|
opal_cmd_line_t cmd_line;
|
||||||
char * tmp_env_var = NULL;
|
char * tmp_env_var = NULL;
|
||||||
orte_debugger_breakpoint_fn_t foo;
|
orte_debugger_breakpoint_fn_t foo;
|
||||||
|
orte_job_t *daemons;
|
||||||
|
|
||||||
/* find our basename (the name of the executable) so that we can
|
/* find our basename (the name of the executable) so that we can
|
||||||
use it in pretty-print error messages */
|
use it in pretty-print error messages */
|
||||||
@ -472,7 +478,7 @@ int orterun(int argc, char *argv[])
|
|||||||
opal_cmd_line_create(&cmd_line, cmd_line_init);
|
opal_cmd_line_create(&cmd_line, cmd_line_init);
|
||||||
mca_base_cmd_line_setup(&cmd_line);
|
mca_base_cmd_line_setup(&cmd_line);
|
||||||
if (ORTE_SUCCESS != (rc = opal_cmd_line_parse(&cmd_line, true,
|
if (ORTE_SUCCESS != (rc = opal_cmd_line_parse(&cmd_line, true,
|
||||||
argc, argv)) ) {
|
argc, argv)) ) {
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -585,7 +591,7 @@ int orterun(int argc, char *argv[])
|
|||||||
|
|
||||||
if (0 == jdata->num_apps) {
|
if (0 == jdata->num_apps) {
|
||||||
/* This should never happen -- this case should be caught in
|
/* This should never happen -- this case should be caught in
|
||||||
create_app(), but let's just double check... */
|
create_app(), but let's just double check... */
|
||||||
orte_show_help("help-orterun.txt", "orterun:nothing-to-do",
|
orte_show_help("help-orterun.txt", "orterun:nothing-to-do",
|
||||||
true, orte_basename);
|
true, orte_basename);
|
||||||
exit(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
exit(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||||
@ -655,23 +661,23 @@ int orterun(int argc, char *argv[])
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Change the default behavior of libevent such that we want to
|
/* Change the default behavior of libevent such that we want to
|
||||||
continually block rather than blocking for the default timeout
|
continually block rather than blocking for the default timeout
|
||||||
and then looping around the progress engine again. There
|
and then looping around the progress engine again. There
|
||||||
should be nothing in the orted that cannot block in libevent
|
should be nothing in the orted that cannot block in libevent
|
||||||
until "something" happens (i.e., there's no need to keep
|
until "something" happens (i.e., there's no need to keep
|
||||||
cycling through progress because the only things that should
|
cycling through progress because the only things that should
|
||||||
happen will happen in libevent). This is a minor optimization,
|
happen will happen in libevent). This is a minor optimization,
|
||||||
but what the heck... :-) */
|
but what the heck... :-) */
|
||||||
opal_progress_set_event_flag(OPAL_EVLOOP_ONCE);
|
opal_progress_set_event_flag(OPAL_EVLOOP_ONCE);
|
||||||
|
|
||||||
/* If we have a prefix, then modify the PATH and
|
/* If we have a prefix, then modify the PATH and
|
||||||
LD_LIBRARY_PATH environment variables in our copy. This
|
LD_LIBRARY_PATH environment variables in our copy. This
|
||||||
will ensure that any locally-spawned children will
|
will ensure that any locally-spawned children will
|
||||||
have our executables and libraries in their path
|
have our executables and libraries in their path
|
||||||
|
|
||||||
For now, default to the prefix_dir provided in the first app_context.
|
For now, default to the prefix_dir provided in the first app_context.
|
||||||
Since there always MUST be at least one app_context, we are safe in
|
Since there always MUST be at least one app_context, we are safe in
|
||||||
doing this.
|
doing this.
|
||||||
*/
|
*/
|
||||||
if (NULL != ((orte_app_context_t*)jdata->apps->addr[0])->prefix_dir) {
|
if (NULL != ((orte_app_context_t*)jdata->apps->addr[0])->prefix_dir) {
|
||||||
char *oldenv, *newenv, *lib_base, *bin_base;
|
char *oldenv, *newenv, *lib_base, *bin_base;
|
||||||
@ -778,6 +784,58 @@ int orterun(int argc, char *argv[])
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* if we are launching the vm, now is the time to do so */
|
||||||
|
if (orterun_globals.launch_vm) {
|
||||||
|
int32_t ljob, i;
|
||||||
|
orte_app_context_t *app;
|
||||||
|
|
||||||
|
/* we may need to look at the apps for the user's job
|
||||||
|
* to get our full list of nodes, so prep the job for
|
||||||
|
* launch. This duplicates some code in orte_plm_base_setup_job
|
||||||
|
* that won't run if we do this here - eventually, we'll want
|
||||||
|
* to refactor the plm_base routine to avoid the duplication
|
||||||
|
*/
|
||||||
|
/* get a jobid for it */
|
||||||
|
if (ORTE_SUCCESS != (rc = orte_plm_base_create_jobid(jdata))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
goto DONE;
|
||||||
|
}
|
||||||
|
/* store it on the global job data pool - this is the key
|
||||||
|
* step required before we launch the daemons. It allows
|
||||||
|
* the orte_rmaps_base_setup_virtual_machine routine to
|
||||||
|
* search all apps for any hosts to be used by the vm
|
||||||
|
*/
|
||||||
|
ljob = ORTE_LOCAL_JOBID(jdata->jobid);
|
||||||
|
opal_pointer_array_set_item(orte_job_data, ljob, jdata);
|
||||||
|
|
||||||
|
/* set the job state */
|
||||||
|
jdata->state = ORTE_JOB_STATE_INIT;
|
||||||
|
|
||||||
|
/* if job recovery is not defined, set it to default */
|
||||||
|
if (!jdata->recovery_defined) {
|
||||||
|
/* set to system default */
|
||||||
|
jdata->enable_recovery = orte_enable_recovery;
|
||||||
|
}
|
||||||
|
/* if app recovery is not defined, set apps to defaults */
|
||||||
|
for (i=0; i < jdata->apps->size; i++) {
|
||||||
|
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!app->recovery_defined) {
|
||||||
|
app->max_restarts = orte_max_restarts;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* get the daemon job object */
|
||||||
|
daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
|
||||||
|
/* launch the daemons */
|
||||||
|
if (ORTE_SUCCESS != (rc = orte_plm.spawn(daemons))) {
|
||||||
|
fprintf(stderr, "%s: UNABLE TO LAUNCH VIRTUAL MACHINE\n", orte_basename);
|
||||||
|
goto DONE;
|
||||||
|
}
|
||||||
|
/* ensure all future jobs use the VM */
|
||||||
|
orte_default_mapping_policy |= ORTE_MAPPING_USE_VM;
|
||||||
|
}
|
||||||
|
|
||||||
/* setup for debugging */
|
/* setup for debugging */
|
||||||
orte_debugger.init_before_spawn(jdata);
|
orte_debugger.init_before_spawn(jdata);
|
||||||
|
|
||||||
@ -793,7 +851,7 @@ int orterun(int argc, char *argv[])
|
|||||||
/* we only reach this point by jumping there due
|
/* we only reach this point by jumping there due
|
||||||
* to an error - so just cleanup and leave
|
* to an error - so just cleanup and leave
|
||||||
*/
|
*/
|
||||||
DONE:
|
DONE:
|
||||||
ORTE_UPDATE_EXIT_STATUS(orte_exit_status);
|
ORTE_UPDATE_EXIT_STATUS(orte_exit_status);
|
||||||
orte_quit();
|
orte_quit();
|
||||||
|
|
||||||
@ -816,6 +874,7 @@ static int init_globals(void)
|
|||||||
orterun_globals.report_pid = NULL;
|
orterun_globals.report_pid = NULL;
|
||||||
orterun_globals.report_uri = NULL;
|
orterun_globals.report_uri = NULL;
|
||||||
orterun_globals.disable_recovery = false;
|
orterun_globals.disable_recovery = false;
|
||||||
|
orterun_globals.launch_vm = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Reset the other fields every time */
|
/* Reset the other fields every time */
|
||||||
|
@ -67,6 +67,7 @@ struct orterun_globals_t {
|
|||||||
char *sstore_load;
|
char *sstore_load;
|
||||||
#endif
|
#endif
|
||||||
bool disable_recovery;
|
bool disable_recovery;
|
||||||
|
bool launch_vm;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user