Final merge of stuff from /tmp/tm-stuff tree (merged through
/tmp/tm-merge). Validated by RHC. Summary: - Add --nolocal (and -nolocal) options to orterun - Make some scalability improvements to the tm pls This commit was SVN r10651.
Этот коммит содержится в:
родитель
d2bf3844e9
Коммит
538965aeb0
@ -9,6 +9,7 @@
|
|||||||
* University of Stuttgart. All rights reserved.
|
* University of Stuttgart. All rights reserved.
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -32,6 +33,7 @@ extern "C" {
|
|||||||
orte_pls_base_component_t super;
|
orte_pls_base_component_t super;
|
||||||
int priority;
|
int priority;
|
||||||
int debug;
|
int debug;
|
||||||
|
int verbose;
|
||||||
bool want_path_check;
|
bool want_path_check;
|
||||||
char *orted;
|
char *orted;
|
||||||
char **checked_paths;
|
char **checked_paths;
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
* University of Stuttgart. All rights reserved.
|
* University of Stuttgart. All rights reserved.
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -92,8 +93,10 @@ static int pls_tm_open(void)
|
|||||||
int tmp;
|
int tmp;
|
||||||
mca_base_component_t *comp = &mca_pls_tm_component.super.pls_version;
|
mca_base_component_t *comp = &mca_pls_tm_component.super.pls_version;
|
||||||
|
|
||||||
mca_base_param_reg_int(comp, "debug", "Enable debugging of TM pls",
|
mca_base_param_reg_int(comp, "debug", "Enable debugging of the TM pls",
|
||||||
false, false, 0, &mca_pls_tm_component.debug);
|
false, false, 0, &mca_pls_tm_component.debug);
|
||||||
|
mca_base_param_reg_int(comp, "verbose", "Enable verbose output of the TM pls",
|
||||||
|
false, false, 0, &mca_pls_tm_component.verbose);
|
||||||
|
|
||||||
mca_base_param_reg_int(comp, "priority", "Default selection priority",
|
mca_base_param_reg_int(comp, "priority", "Default selection priority",
|
||||||
false, false, 75, &mca_pls_tm_component.priority);
|
false, false, 75, &mca_pls_tm_component.priority);
|
||||||
|
@ -38,6 +38,7 @@
|
|||||||
#include <tm.h>
|
#include <tm.h>
|
||||||
|
|
||||||
#include "opal/install_dirs.h"
|
#include "opal/install_dirs.h"
|
||||||
|
#include "opal/event/event.h"
|
||||||
#include "opal/util/argv.h"
|
#include "opal/util/argv.h"
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
#include "opal/util/opal_environ.h"
|
#include "opal/util/opal_environ.h"
|
||||||
@ -64,6 +65,7 @@
|
|||||||
#include "pls_tm.h"
|
#include "pls_tm.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Local functions
|
* Local functions
|
||||||
*/
|
*/
|
||||||
@ -98,8 +100,8 @@ extern char **environ;
|
|||||||
static int
|
static int
|
||||||
pls_tm_launch(orte_jobid_t jobid)
|
pls_tm_launch(orte_jobid_t jobid)
|
||||||
{
|
{
|
||||||
opal_list_t nodes, mapping_list;
|
opal_list_t mapping;
|
||||||
opal_list_item_t *item, *item2;
|
opal_list_item_t *m_item, *n_item;
|
||||||
size_t num_nodes;
|
size_t num_nodes;
|
||||||
orte_vpid_t vpid;
|
orte_vpid_t vpid;
|
||||||
int node_name_index;
|
int node_name_index;
|
||||||
@ -110,27 +112,31 @@ pls_tm_launch(orte_jobid_t jobid)
|
|||||||
int argc;
|
int argc;
|
||||||
int rc;
|
int rc;
|
||||||
bool connected = false;
|
bool connected = false;
|
||||||
opal_list_t map;
|
int launched = 0, i;
|
||||||
char *cur_prefix;
|
|
||||||
char *bin_base = NULL, *lib_base = NULL;
|
char *bin_base = NULL, *lib_base = NULL;
|
||||||
|
|
||||||
/* Query the list of nodes allocated and mapped to this job.
|
/* Query the list of nodes allocated and mapped to this job.
|
||||||
* We need the entire mapping for a couple of reasons:
|
* We need the entire mapping for a couple of reasons:
|
||||||
* - need the prefix to start with.
|
* - need the prefix to start with.
|
||||||
* - need to know if we are launching on a subset of the allocated nodes
|
* - need to know if we are launching on a subset of the allocated nodes
|
||||||
* All other mapping responsibilities fall to orted in the fork PLS
|
|
||||||
*/
|
*/
|
||||||
OBJ_CONSTRUCT(&nodes, opal_list_t);
|
OBJ_CONSTRUCT(&mapping, opal_list_t);
|
||||||
OBJ_CONSTRUCT(&mapping_list, opal_list_t);
|
rc = orte_rmaps_base_get_map(jobid, &mapping);
|
||||||
rc = orte_rmaps_base_mapped_node_query(&mapping_list, &nodes, jobid);
|
|
||||||
if (ORTE_SUCCESS != rc) {
|
if (ORTE_SUCCESS != rc) {
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
num_nodes = 0;
|
||||||
|
for(m_item = opal_list_get_first(&mapping);
|
||||||
|
m_item != opal_list_get_end(&mapping);
|
||||||
|
m_item = opal_list_get_next(m_item)) {
|
||||||
|
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
|
||||||
|
num_nodes += opal_list_get_size(&map->nodes);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocate a range of vpids for the daemons.
|
* Allocate a range of vpids for the daemons.
|
||||||
*/
|
*/
|
||||||
num_nodes = opal_list_get_size(&nodes);
|
|
||||||
if (num_nodes == 0) {
|
if (num_nodes == 0) {
|
||||||
return ORTE_ERR_BAD_PARAM;
|
return ORTE_ERR_BAD_PARAM;
|
||||||
}
|
}
|
||||||
@ -226,100 +232,23 @@ pls_tm_launch(orte_jobid_t jobid)
|
|||||||
bin_base = opal_basename(OPAL_BINDIR);
|
bin_base = opal_basename(OPAL_BINDIR);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Iterate through each of the nodes and spin
|
* iterate through each of the contexts
|
||||||
* up a daemon.
|
|
||||||
*/
|
*/
|
||||||
for(item = opal_list_get_first(&nodes);
|
for (m_item = opal_list_get_first(&mapping);
|
||||||
item != opal_list_get_end(&nodes);
|
m_item != opal_list_get_end(&mapping);
|
||||||
item = opal_list_get_next(item)) {
|
m_item = opal_list_get_next(m_item)) {
|
||||||
orte_ras_node_t* node = (orte_ras_node_t*)item;
|
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
|
||||||
orte_process_name_t* name;
|
|
||||||
char* name_string;
|
|
||||||
char** env;
|
char** env;
|
||||||
char* var;
|
char* var;
|
||||||
size_t num_processes;
|
|
||||||
|
|
||||||
OBJ_CONSTRUCT(&map, opal_list_t);
|
|
||||||
/* Get the mapping of this very node */
|
|
||||||
rc = orte_rmaps_base_get_node_map(orte_process_info.my_name->cellid,
|
|
||||||
jobid,
|
|
||||||
node->node_name,
|
|
||||||
&map);
|
|
||||||
if (ORTE_SUCCESS != rc) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Copy the prefix-directory specified within the
|
|
||||||
corresponding app_context. If there are multiple,
|
|
||||||
different prefix's for this node, complain */
|
|
||||||
cur_prefix = NULL;
|
|
||||||
num_processes = 0;
|
|
||||||
for (item2 = opal_list_get_first(&map);
|
|
||||||
item2 != opal_list_get_end(&map);
|
|
||||||
item2 = opal_list_get_next(item2)) {
|
|
||||||
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*) item2;
|
|
||||||
char * app_prefix_dir = map->app->prefix_dir;
|
|
||||||
|
|
||||||
/* Increment the number of processes allocated to this node
|
|
||||||
* This allows us to accurately test for oversubscription */
|
|
||||||
num_processes += map->num_procs;
|
|
||||||
|
|
||||||
/* Check for already set cur_prefix -- if different,
|
|
||||||
complain */
|
|
||||||
if (NULL != app_prefix_dir) {
|
|
||||||
if (NULL != cur_prefix &&
|
|
||||||
0 != strcmp (cur_prefix, app_prefix_dir)) {
|
|
||||||
opal_show_help("help-pls-tm.txt", "multiple-prefixes",
|
|
||||||
true, node->node_name,
|
|
||||||
cur_prefix, app_prefix_dir);
|
|
||||||
return ORTE_ERR_FATAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If not yet set, copy it; iff set, then it's the
|
|
||||||
same anyway */
|
|
||||||
if (NULL == cur_prefix) {
|
|
||||||
cur_prefix = strdup(map->app->prefix_dir);
|
|
||||||
if (mca_pls_tm_component.debug) {
|
|
||||||
opal_output (0, "pls:tm: Set prefix:%s",
|
|
||||||
cur_prefix);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* setup node name */
|
|
||||||
argv[node_name_index] = node->node_name;
|
|
||||||
|
|
||||||
/* initialize daemons process name */
|
|
||||||
rc = orte_ns.create_process_name(&name, node->node_cellid, 0, vpid);
|
|
||||||
if (ORTE_SUCCESS != rc) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* setup per-node options */
|
|
||||||
if (mca_pls_tm_component.debug) {
|
|
||||||
opal_output(0, "pls:tm: launching on node %s",
|
|
||||||
node->node_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* setup process name */
|
|
||||||
rc = orte_ns.get_proc_name_string(&name_string, name);
|
|
||||||
if (ORTE_SUCCESS != rc) {
|
|
||||||
opal_output(0, "pls:tm: unable to create process name");
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
argv[proc_name_index] = name_string;
|
|
||||||
|
|
||||||
/* setup environment */
|
/* setup environment */
|
||||||
env = opal_argv_copy(environ);
|
env = opal_argv_copy(environ);
|
||||||
var = mca_base_param_environ_variable("seed",NULL,NULL);
|
var = mca_base_param_environ_variable("seed",NULL,NULL);
|
||||||
opal_setenv(var, "0", true, &env);
|
opal_setenv(var, "0", true, &env);
|
||||||
|
|
||||||
/* If we have a prefix, then modify the PATH and
|
/* If we have a prefix, then modify the PATH and
|
||||||
LD_LIBRARY_PATH environment variables. */
|
LD_LIBRARY_PATH environment variables. */
|
||||||
if (NULL != cur_prefix) {
|
if (NULL != map->app->prefix_dir) {
|
||||||
int i;
|
int i;
|
||||||
char *newenv;
|
char *newenv;
|
||||||
|
|
||||||
@ -327,7 +256,7 @@ pls_tm_launch(orte_jobid_t jobid)
|
|||||||
/* Reset PATH */
|
/* Reset PATH */
|
||||||
if (0 == strncmp("PATH=", env[i], 5)) {
|
if (0 == strncmp("PATH=", env[i], 5)) {
|
||||||
asprintf(&newenv, "%s/%s:%s",
|
asprintf(&newenv, "%s/%s:%s",
|
||||||
cur_prefix, bin_base, env[i] + 5);
|
map->app->prefix_dir, bin_base, env[i] + 5);
|
||||||
if (mca_pls_tm_component.debug) {
|
if (mca_pls_tm_component.debug) {
|
||||||
opal_output(0, "pls:tm: resetting PATH: %s",
|
opal_output(0, "pls:tm: resetting PATH: %s",
|
||||||
newenv);
|
newenv);
|
||||||
@ -335,11 +264,11 @@ pls_tm_launch(orte_jobid_t jobid)
|
|||||||
opal_setenv("PATH", newenv, true, &env);
|
opal_setenv("PATH", newenv, true, &env);
|
||||||
free(newenv);
|
free(newenv);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Reset LD_LIBRARY_PATH */
|
/* Reset LD_LIBRARY_PATH */
|
||||||
else if (0 == strncmp("LD_LIBRARY_PATH=", env[i], 16)) {
|
else if (0 == strncmp("LD_LIBRARY_PATH=", env[i], 16)) {
|
||||||
asprintf(&newenv, "%s/%s:%s",
|
asprintf(&newenv, "%s/%s:%s",
|
||||||
cur_prefix, lib_base, env[i] + 16);
|
map->app->prefix_dir, lib_base, env[i] + 16);
|
||||||
if (mca_pls_tm_component.debug) {
|
if (mca_pls_tm_component.debug) {
|
||||||
opal_output(0, "pls:tm: resetting LD_LIBRARY_PATH: %s",
|
opal_output(0, "pls:tm: resetting LD_LIBRARY_PATH: %s",
|
||||||
newenv);
|
newenv);
|
||||||
@ -348,9 +277,8 @@ pls_tm_launch(orte_jobid_t jobid)
|
|||||||
free(newenv);
|
free(newenv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
free(cur_prefix);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Do a quick sanity check to ensure that we can find the
|
/* Do a quick sanity check to ensure that we can find the
|
||||||
orted in the PATH */
|
orted in the PATH */
|
||||||
|
|
||||||
@ -361,68 +289,122 @@ pls_tm_launch(orte_jobid_t jobid)
|
|||||||
true, argv[0]);
|
true, argv[0]);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* set the progress engine schedule for this node.
|
/* Iterate through each of the nodes and spin
|
||||||
* if node_slots is set to zero, then we default to
|
* up a daemon.
|
||||||
* NOT being oversubscribed
|
|
||||||
*/
|
*/
|
||||||
if (node->node_slots > 0 &&
|
for (n_item = opal_list_get_first(&map->nodes);
|
||||||
num_processes > node->node_slots) {
|
n_item != opal_list_get_end(&map->nodes);
|
||||||
|
n_item = opal_list_get_next(n_item)) {
|
||||||
|
orte_rmaps_base_node_t* rmaps_node = (orte_rmaps_base_node_t*)n_item;
|
||||||
|
orte_ras_node_t* node = rmaps_node->node;
|
||||||
|
orte_process_name_t* name;
|
||||||
|
char* name_string;
|
||||||
|
size_t num_processes = 0;
|
||||||
|
|
||||||
|
/* already launched on this node */
|
||||||
|
if (0 != node->node_launched++) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* setup node name */
|
||||||
|
argv[node_name_index] = node->node_name;
|
||||||
|
|
||||||
|
/* initialize daemons process name */
|
||||||
|
rc = orte_ns.create_process_name(&name, node->node_cellid, 0, vpid);
|
||||||
|
if (ORTE_SUCCESS != rc) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* setup per-node options */
|
||||||
|
if (mca_pls_tm_component.debug ||
|
||||||
|
mca_pls_tm_component.verbose) {
|
||||||
|
opal_output(0, "pls:tm: launching on node %s",
|
||||||
|
node->node_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* setup process name */
|
||||||
|
rc = orte_ns.get_proc_name_string(&name_string, name);
|
||||||
|
if (ORTE_SUCCESS != rc) {
|
||||||
|
opal_output(0, "pls:tm: unable to create process name");
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
argv[proc_name_index] = name_string;
|
||||||
|
|
||||||
|
/* set the progress engine schedule for this node.
|
||||||
|
* if node_slots is set to zero, then we default to
|
||||||
|
* NOT being oversubscribed
|
||||||
|
*/
|
||||||
|
if (node->node_slots > 0 &&
|
||||||
|
opal_list_get_size(&rmaps_node->node_procs) > node->node_slots) {
|
||||||
|
if (mca_pls_tm_component.debug) {
|
||||||
|
opal_output(0, "pls:tm: oversubscribed -- setting mpi_yield_when_idle to 1 (%d %d)",
|
||||||
|
node->node_slots,
|
||||||
|
opal_list_get_size(&rmaps_node->node_procs));
|
||||||
|
}
|
||||||
|
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
|
||||||
|
opal_setenv(var, "1", true, &env);
|
||||||
|
} else {
|
||||||
|
if (mca_pls_tm_component.debug) {
|
||||||
|
opal_output(0, "pls:tm: not oversubscribed -- setting mpi_yield_when_idle to 0");
|
||||||
|
}
|
||||||
|
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
|
||||||
|
opal_setenv(var, "0", true, &env);
|
||||||
|
}
|
||||||
|
free(var);
|
||||||
|
|
||||||
|
/* save the daemons name on the node */
|
||||||
|
if (ORTE_SUCCESS != (rc = orte_pls_base_proxy_set_node_name(node,jobid,name))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* exec the daemon */
|
||||||
if (mca_pls_tm_component.debug) {
|
if (mca_pls_tm_component.debug) {
|
||||||
opal_output(0, "pls:tm: oversubscribed -- setting mpi_yield_when_idle to 1 (%d %d)",
|
param = opal_argv_join(argv, ' ');
|
||||||
node->node_slots, num_processes);
|
if (NULL != param) {
|
||||||
|
opal_output(0, "pls:tm: executing: %s", param);
|
||||||
|
free(param);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
|
|
||||||
opal_setenv(var, "1", true, &env);
|
rc = pls_tm_start_proc(node->node_name, argc, argv, env);
|
||||||
} else {
|
if (ORTE_SUCCESS != rc) {
|
||||||
if (mca_pls_tm_component.debug) {
|
opal_output(0, "pls:tm: start_procs returned error %d", rc);
|
||||||
opal_output(0, "pls:tm: not oversubscribed -- setting mpi_yield_when_idle to 0");
|
goto cleanup;
|
||||||
}
|
}
|
||||||
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
|
launched++;
|
||||||
opal_setenv(var, "0", true, &env);
|
vpid++;
|
||||||
|
free(name);
|
||||||
|
opal_event_loop(OPAL_EVLOOP_NONBLOCK);
|
||||||
}
|
}
|
||||||
free(var);
|
|
||||||
|
|
||||||
/* save the daemons name on the node */
|
|
||||||
if (ORTE_SUCCESS != (rc = orte_pls_base_proxy_set_node_name(node,jobid,name))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* exec the daemon */
|
|
||||||
if (mca_pls_tm_component.debug) {
|
|
||||||
param = opal_argv_join(argv, ' ');
|
|
||||||
if (NULL != param) {
|
|
||||||
opal_output(0, "pls:tm: executing: %s", param);
|
|
||||||
free(param);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
rc = pls_tm_start_proc(node->node_name, argc, argv, env);
|
|
||||||
if (ORTE_SUCCESS != rc) {
|
|
||||||
opal_output(0, "pls:tm: start_procs returned error %d", rc);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
vpid++;
|
|
||||||
free(name);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cleanup:
|
/* loop through all those that are launched and poll for
|
||||||
|
completion status */
|
||||||
|
|
||||||
|
for(i = 0; i < launched; i++){
|
||||||
|
int ret, local_err;
|
||||||
|
tm_event_t event;
|
||||||
|
ret = tm_poll(TM_NULL_EVENT, &event, 1, &local_err);
|
||||||
|
if (TM_SUCCESS != ret) {
|
||||||
|
errno = local_err;
|
||||||
|
opal_output(0, "pls:tm: failed to start a proc error %d", ret);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup:
|
||||||
if (connected) {
|
if (connected) {
|
||||||
pls_tm_disconnect();
|
pls_tm_disconnect();
|
||||||
}
|
}
|
||||||
|
|
||||||
while (NULL != (item = opal_list_remove_first(&nodes))) {
|
while (NULL != (m_item = opal_list_remove_first(&mapping))) {
|
||||||
OBJ_RELEASE(item);
|
OBJ_RELEASE(m_item);
|
||||||
}
|
}
|
||||||
OBJ_DESTRUCT(&nodes);
|
OBJ_DESTRUCT(&mapping);
|
||||||
|
|
||||||
while (NULL != (item = opal_list_remove_first(&mapping_list))) {
|
|
||||||
OBJ_RELEASE(item);
|
|
||||||
}
|
|
||||||
OBJ_DESTRUCT(&mapping_list);
|
|
||||||
|
|
||||||
if (NULL != lib_base) {
|
if (NULL != lib_base) {
|
||||||
free(lib_base);
|
free(lib_base);
|
||||||
}
|
}
|
||||||
@ -520,6 +502,8 @@ static tm_node_id *tm_node_ids = NULL;
|
|||||||
static int num_tm_hostnames, num_node_ids;
|
static int num_tm_hostnames, num_node_ids;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* we don't call this anymore */
|
||||||
/*
|
/*
|
||||||
* For a given TM node ID, get the string hostname corresponding to
|
* For a given TM node ID, get the string hostname corresponding to
|
||||||
* it.
|
* it.
|
||||||
@ -527,10 +511,10 @@ static int num_tm_hostnames, num_node_ids;
|
|||||||
static char*
|
static char*
|
||||||
get_tm_hostname(tm_node_id node)
|
get_tm_hostname(tm_node_id node)
|
||||||
{
|
{
|
||||||
int ret, local_errno;
|
|
||||||
char *hostname;
|
char *hostname;
|
||||||
tm_event_t event;
|
|
||||||
char buffer[256];
|
char buffer[256];
|
||||||
|
int ret, local_errno;
|
||||||
|
tm_event_t event;
|
||||||
char **argv;
|
char **argv;
|
||||||
|
|
||||||
/* Get the info string corresponding to this TM node ID */
|
/* Get the info string corresponding to this TM node ID */
|
||||||
@ -565,6 +549,7 @@ get_tm_hostname(tm_node_id node)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* we don't call this anymore!*/
|
||||||
static int
|
static int
|
||||||
query_tm_hostnames(void)
|
query_tm_hostnames(void)
|
||||||
{
|
{
|
||||||
@ -598,7 +583,7 @@ query_tm_hostnames(void)
|
|||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* we don't call this anymore! */
|
||||||
static int
|
static int
|
||||||
do_tm_resolve(char *hostname, tm_node_id *tnodeid)
|
do_tm_resolve(char *hostname, tm_node_id *tnodeid)
|
||||||
{
|
{
|
||||||
@ -637,7 +622,7 @@ do_tm_resolve(char *hostname, tm_node_id *tnodeid)
|
|||||||
static int
|
static int
|
||||||
pls_tm_start_proc(char *nodename, int argc, char **argv, char **env)
|
pls_tm_start_proc(char *nodename, int argc, char **argv, char **env)
|
||||||
{
|
{
|
||||||
int ret, local_err;
|
int ret;
|
||||||
tm_node_id node_id;
|
tm_node_id node_id;
|
||||||
tm_task_id task_id;
|
tm_task_id task_id;
|
||||||
tm_event_t event;
|
tm_event_t event;
|
||||||
@ -649,12 +634,6 @@ pls_tm_start_proc(char *nodename, int argc, char **argv, char **env)
|
|||||||
ret = tm_spawn(argc, argv, env, node_id, &task_id, &event);
|
ret = tm_spawn(argc, argv, env, node_id, &task_id, &event);
|
||||||
if (TM_SUCCESS != ret) return ORTE_ERROR;
|
if (TM_SUCCESS != ret) return ORTE_ERROR;
|
||||||
|
|
||||||
ret = tm_poll(TM_NULL_EVENT, &event, 1, &local_err);
|
|
||||||
if (TM_SUCCESS != ret) {
|
|
||||||
errno = local_err;
|
|
||||||
return ORTE_ERR_IN_ERRNO;
|
|
||||||
}
|
|
||||||
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
* University of Stuttgart. All rights reserved.
|
* University of Stuttgart. All rights reserved.
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -173,7 +174,10 @@ static int discover(opal_list_t* nodelist)
|
|||||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Iterate through all the nodes and make an entry for each */
|
/* Iterate through all the nodes and make an entry for each. TM
|
||||||
|
node ID's will never be duplicated, but they may end up
|
||||||
|
resolving to the same hostname (i.e., vcpu's on a single
|
||||||
|
host). */
|
||||||
|
|
||||||
OBJ_CONSTRUCT(&new_nodes, opal_list_t);
|
OBJ_CONSTRUCT(&new_nodes, opal_list_t);
|
||||||
for (i = 0; i < num_node_ids; ++i) {
|
for (i = 0; i < num_node_ids; ++i) {
|
||||||
@ -296,6 +300,6 @@ static int get_tm_hostname(tm_node_id node, char **hostname, char **arch)
|
|||||||
/* All done */
|
/* All done */
|
||||||
|
|
||||||
opal_output(orte_ras_base.ras_output,
|
opal_output(orte_ras_base.ras_output,
|
||||||
"ras:tm:hostname: got hostname %s", hostname);
|
"ras:tm:hostname: got hostname %s", *hostname);
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
* University of Stuttgart. All rights reserved.
|
* University of Stuttgart. All rights reserved.
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -67,7 +68,7 @@ int orte_rmaps_base_open(void)
|
|||||||
|
|
||||||
/* Debugging / verbose output */
|
/* Debugging / verbose output */
|
||||||
|
|
||||||
param = mca_base_param_reg_int_name("rmaps_base", "verbose",
|
param = mca_base_param_reg_int_name("rmaps", "base_verbose",
|
||||||
"Verbosity level for the rmaps framework",
|
"Verbosity level for the rmaps framework",
|
||||||
false, false, 0, &value);
|
false, false, 0, &value);
|
||||||
if (value != 0) {
|
if (value != 0) {
|
||||||
@ -78,13 +79,19 @@ int orte_rmaps_base_open(void)
|
|||||||
|
|
||||||
/* Are we scheduling by node or by slot? */
|
/* Are we scheduling by node or by slot? */
|
||||||
|
|
||||||
param = mca_base_param_reg_string_name("rmaps_base", "schedule_policy",
|
param = mca_base_param_reg_string_name("rmaps", "base_schedule_policy",
|
||||||
"Scheduling Policy for RMAPS. [slot | node]",
|
"Scheduling Policy for RMAPS. [slot | node]",
|
||||||
false, false, "slot", &policy);
|
false, false, "slot", &policy);
|
||||||
if (0 == strcmp(policy, "node")) {
|
if (0 == strcmp(policy, "node")) {
|
||||||
mca_base_param_set_string(param, "node");
|
mca_base_param_set_string(param, "node");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Should we schedule on the local node or not? */
|
||||||
|
|
||||||
|
mca_base_param_reg_int_name("rmaps", "base_schedule_local",
|
||||||
|
"If nonzero, allow scheduling MPI applications on the same node as mpirun (default). If zero, do not schedule any MPI applications on the same node as mpirun",
|
||||||
|
false, false, 1, &value);
|
||||||
|
|
||||||
/* Open up all the components that we can find */
|
/* Open up all the components that we can find */
|
||||||
|
|
||||||
if (ORTE_SUCCESS !=
|
if (ORTE_SUCCESS !=
|
||||||
|
@ -9,12 +9,14 @@
|
|||||||
* University of Stuttgart. All rights reserved.
|
* University of Stuttgart. All rights reserved.
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
*
|
*
|
||||||
* $HEADER$
|
* $HEADER$
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "orte_config.h"
|
#include "orte_config.h"
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#ifdef HAVE_UNISTD_H
|
#ifdef HAVE_UNISTD_H
|
||||||
@ -24,11 +26,14 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#endif /* HAVE_STRING_H */
|
#endif /* HAVE_STRING_H */
|
||||||
|
|
||||||
#include "orte/orte_constants.h"
|
#include "opal/mca/base/mca_base_param.h"
|
||||||
#include "orte/orte_types.h"
|
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
#include "opal/util/show_help.h"
|
#include "opal/util/show_help.h"
|
||||||
#include "opal/util/argv.h"
|
#include "opal/util/argv.h"
|
||||||
|
#include "opal/util/if.h"
|
||||||
|
#include "orte/orte_constants.h"
|
||||||
|
#include "orte/orte_types.h"
|
||||||
|
#include "orte/util/sys_info.h"
|
||||||
#include "orte/mca/ns/ns.h"
|
#include "orte/mca/ns/ns.h"
|
||||||
#include "orte/mca/gpr/gpr.h"
|
#include "orte/mca/gpr/gpr.h"
|
||||||
#include "orte/mca/rmaps/base/base.h"
|
#include "orte/mca/rmaps/base/base.h"
|
||||||
@ -416,7 +421,8 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid)
|
|||||||
int rc = ORTE_SUCCESS;
|
int rc = ORTE_SUCCESS;
|
||||||
bool bynode = true;
|
bool bynode = true;
|
||||||
char **mapped_nodes = NULL;
|
char **mapped_nodes = NULL;
|
||||||
int num_mapped_nodes = 0;
|
int num_mapped_nodes = 0;
|
||||||
|
int id, value;
|
||||||
|
|
||||||
/* query for the application context and allocated nodes */
|
/* query for the application context and allocated nodes */
|
||||||
if(ORTE_SUCCESS != (rc = orte_rmgr_base_get_app_context(jobid, &context, &num_context))) {
|
if(ORTE_SUCCESS != (rc = orte_rmgr_base_get_app_context(jobid, &context, &num_context))) {
|
||||||
@ -437,6 +443,24 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid)
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* If the "no local" option was set, then remove the local node
|
||||||
|
from the list */
|
||||||
|
|
||||||
|
id = mca_base_param_find("rmaps", NULL, "base_schedule_local");
|
||||||
|
mca_base_param_lookup_int(id, &value);
|
||||||
|
if (0 == value) {
|
||||||
|
for (item = opal_list_get_first(&nodes);
|
||||||
|
item != opal_list_get_end(&nodes);
|
||||||
|
item = opal_list_get_next(item) ) {
|
||||||
|
if (0 == strcmp(((orte_ras_node_t *) item)->node_name,
|
||||||
|
orte_system_info.nodename) ||
|
||||||
|
opal_ifislocal(((orte_ras_node_t *) item)->node_name)) {
|
||||||
|
opal_list_remove_item(&nodes, item);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Sanity check to make sure we have been allocated nodes */
|
/* Sanity check to make sure we have been allocated nodes */
|
||||||
if (0 == opal_list_get_size(&nodes)) {
|
if (0 == opal_list_get_size(&nodes)) {
|
||||||
OBJ_DESTRUCT(&nodes);
|
OBJ_DESTRUCT(&nodes);
|
||||||
@ -489,7 +513,8 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid)
|
|||||||
end, bounce back to the front (as would happen in the loop
|
end, bounce back to the front (as would happen in the loop
|
||||||
below)
|
below)
|
||||||
|
|
||||||
But do a bozo check to ensure that we don't have a empty node list.*/
|
But do a bozo check to ensure that we don't have a empty
|
||||||
|
node list.*/
|
||||||
if (0 == opal_list_get_size(&nodes)) {
|
if (0 == opal_list_get_size(&nodes)) {
|
||||||
rc = ORTE_ERR_TEMP_OUT_OF_RESOURCE;
|
rc = ORTE_ERR_TEMP_OUT_OF_RESOURCE;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
|
@ -181,6 +181,13 @@ Synonym for \fI-np\fP.
|
|||||||
.
|
.
|
||||||
.
|
.
|
||||||
.TP
|
.TP
|
||||||
|
.B -nolocal\fR,\fP --nolocal
|
||||||
|
Do not run any copies of the launched application on the same node as
|
||||||
|
orterun is running. This option will override listing the localhost
|
||||||
|
with \fB--host\fR or any other host-specifying mechanism.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.TP
|
||||||
.B -np \fR<#>\fP
|
.B -np \fR<#>\fP
|
||||||
Run this many copies of the program on the given nodes. This option
|
Run this many copies of the program on the given nodes. This option
|
||||||
indicates that the specified file is an executable program and not an
|
indicates that the specified file is an executable program and not an
|
||||||
@ -409,6 +416,30 @@ on hosts b and c.
|
|||||||
.
|
.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
.SS No Local Launch
|
||||||
|
.
|
||||||
|
Using the \fB--nolocal\fR option to orterun tells the system to not
|
||||||
|
launch any of the application processes on the same node that orterun
|
||||||
|
is running. While orterun typically blocks and consumes few system
|
||||||
|
resources, this option can be helpful for launching very large jobs
|
||||||
|
where orterun may actually need to use noticable amounts of memory
|
||||||
|
and/or processing time. \fB--nolocal\fR allows orteun to run without
|
||||||
|
sharing the local node with the launched applications, and likewise
|
||||||
|
allows the launched applications to run unhindered by orterun's system
|
||||||
|
usage.
|
||||||
|
.PP
|
||||||
|
Note that \fB--nolocal\fR will override any other specification to
|
||||||
|
launch the application on the local node. It will disqualify the
|
||||||
|
localhost from being capable of running any processes in the
|
||||||
|
application.
|
||||||
|
.
|
||||||
|
.
|
||||||
|
.TP
|
||||||
|
shell$ mpirun -np 1 --host localhost --nolocal hostname
|
||||||
|
This example will result in an error because orterun will not find
|
||||||
|
anywhere to launch the application.
|
||||||
|
.
|
||||||
|
.
|
||||||
.SS Application Context or Executable Program?
|
.SS Application Context or Executable Program?
|
||||||
.
|
.
|
||||||
To distinguish the two different forms, \fImpirun\fP
|
To distinguish the two different forms, \fImpirun\fP
|
||||||
|
@ -105,6 +105,7 @@ struct globals_t {
|
|||||||
bool by_node;
|
bool by_node;
|
||||||
bool by_slot;
|
bool by_slot;
|
||||||
bool debugger;
|
bool debugger;
|
||||||
|
bool no_local_schedule;
|
||||||
size_t num_procs;
|
size_t num_procs;
|
||||||
int exit_status;
|
int exit_status;
|
||||||
char *hostfile;
|
char *hostfile;
|
||||||
@ -209,6 +210,11 @@ opal_cmd_line_init_t cmd_line_init[] = {
|
|||||||
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
||||||
"List of hosts to invoke processes on" },
|
"List of hosts to invoke processes on" },
|
||||||
|
|
||||||
|
/* OSC mpiexec-like arguments */
|
||||||
|
{ NULL, NULL, NULL, '\0', "nolocal", "nolocal", 0,
|
||||||
|
&orterun_globals.no_local_schedule, OPAL_CMD_LINE_TYPE_BOOL,
|
||||||
|
"Do not run any MPI applications on the local node" },
|
||||||
|
|
||||||
/* User-level debugger arguments */
|
/* User-level debugger arguments */
|
||||||
{ NULL, NULL, NULL, '\0', "tv", "tv", 0,
|
{ NULL, NULL, NULL, '\0', "tv", "tv", 0,
|
||||||
&orterun_globals.debugger, OPAL_CMD_LINE_TYPE_BOOL,
|
&orterun_globals.debugger, OPAL_CMD_LINE_TYPE_BOOL,
|
||||||
@ -761,6 +767,7 @@ static int init_globals(void)
|
|||||||
false,
|
false,
|
||||||
false,
|
false,
|
||||||
false,
|
false,
|
||||||
|
false,
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
NULL,
|
NULL,
|
||||||
@ -855,7 +862,7 @@ static int parse_globals(int argc, char* argv[])
|
|||||||
* since it really should be initialized in rmaps_base_open */
|
* since it really should be initialized in rmaps_base_open */
|
||||||
if (orterun_globals.by_node || orterun_globals.by_slot) {
|
if (orterun_globals.by_node || orterun_globals.by_slot) {
|
||||||
char *policy = NULL;
|
char *policy = NULL;
|
||||||
id = mca_base_param_reg_string_name("rmaps_base", "schedule_policy",
|
id = mca_base_param_reg_string_name("rmaps", "base_schedule_policy",
|
||||||
"Scheduling policy for RMAPS. [slot | node]",
|
"Scheduling policy for RMAPS. [slot | node]",
|
||||||
false, false, "slot", &policy);
|
false, false, "slot", &policy);
|
||||||
|
|
||||||
@ -873,6 +880,17 @@ static int parse_globals(int argc, char* argv[])
|
|||||||
orterun_globals.by_slot = true;
|
orterun_globals.by_slot = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Do we want to allow MPI applications on the same node as
|
||||||
|
mpirun? */
|
||||||
|
id = mca_base_param_reg_int_name("rmaps", "base_schedule_local",
|
||||||
|
"If nonzero, allow scheduling MPI applications on the same node as mpirun (default). If zero, do not schedule any MPI applications on the same node as mpirun",
|
||||||
|
false, false, 1, &ret);
|
||||||
|
if (orterun_globals.no_local_schedule) {
|
||||||
|
mca_base_param_set_int(id, 0);
|
||||||
|
} else {
|
||||||
|
mca_base_param_set_int(id, 1);
|
||||||
|
}
|
||||||
|
|
||||||
/* If we don't want to wait, we don't want to wait */
|
/* If we don't want to wait, we don't want to wait */
|
||||||
|
|
||||||
if (orterun_globals.no_wait_for_job_completion) {
|
if (orterun_globals.no_wait_for_job_completion) {
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user