Final merge of stuff from /tmp/tm-stuff tree (merged through
/tmp/tm-merge). Validated by RHC. Summary: - Add --nolocal (and -nolocal) options to orterun - Make some scalability improvements to the tm pls This commit was SVN r10651.
Этот коммит содержится в:
родитель
d2bf3844e9
Коммит
538965aeb0
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -32,6 +33,7 @@ extern "C" {
|
||||
orte_pls_base_component_t super;
|
||||
int priority;
|
||||
int debug;
|
||||
int verbose;
|
||||
bool want_path_check;
|
||||
char *orted;
|
||||
char **checked_paths;
|
||||
|
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -92,8 +93,10 @@ static int pls_tm_open(void)
|
||||
int tmp;
|
||||
mca_base_component_t *comp = &mca_pls_tm_component.super.pls_version;
|
||||
|
||||
mca_base_param_reg_int(comp, "debug", "Enable debugging of TM pls",
|
||||
mca_base_param_reg_int(comp, "debug", "Enable debugging of the TM pls",
|
||||
false, false, 0, &mca_pls_tm_component.debug);
|
||||
mca_base_param_reg_int(comp, "verbose", "Enable verbose output of the TM pls",
|
||||
false, false, 0, &mca_pls_tm_component.verbose);
|
||||
|
||||
mca_base_param_reg_int(comp, "priority", "Default selection priority",
|
||||
false, false, 75, &mca_pls_tm_component.priority);
|
||||
|
@ -38,6 +38,7 @@
|
||||
#include <tm.h>
|
||||
|
||||
#include "opal/install_dirs.h"
|
||||
#include "opal/event/event.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/opal_environ.h"
|
||||
@ -64,6 +65,7 @@
|
||||
#include "pls_tm.h"
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Local functions
|
||||
*/
|
||||
@ -98,8 +100,8 @@ extern char **environ;
|
||||
static int
|
||||
pls_tm_launch(orte_jobid_t jobid)
|
||||
{
|
||||
opal_list_t nodes, mapping_list;
|
||||
opal_list_item_t *item, *item2;
|
||||
opal_list_t mapping;
|
||||
opal_list_item_t *m_item, *n_item;
|
||||
size_t num_nodes;
|
||||
orte_vpid_t vpid;
|
||||
int node_name_index;
|
||||
@ -110,27 +112,31 @@ pls_tm_launch(orte_jobid_t jobid)
|
||||
int argc;
|
||||
int rc;
|
||||
bool connected = false;
|
||||
opal_list_t map;
|
||||
char *cur_prefix;
|
||||
int launched = 0, i;
|
||||
char *bin_base = NULL, *lib_base = NULL;
|
||||
|
||||
/* Query the list of nodes allocated and mapped to this job.
|
||||
/* Query the list of nodes allocated and mapped to this job.
|
||||
* We need the entire mapping for a couple of reasons:
|
||||
* - need the prefix to start with.
|
||||
* - need to know if we are launching on a subset of the allocated nodes
|
||||
* All other mapping responsibilities fall to orted in the fork PLS
|
||||
*/
|
||||
OBJ_CONSTRUCT(&nodes, opal_list_t);
|
||||
OBJ_CONSTRUCT(&mapping_list, opal_list_t);
|
||||
rc = orte_rmaps_base_mapped_node_query(&mapping_list, &nodes, jobid);
|
||||
OBJ_CONSTRUCT(&mapping, opal_list_t);
|
||||
rc = orte_rmaps_base_get_map(jobid, &mapping);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
num_nodes = 0;
|
||||
for(m_item = opal_list_get_first(&mapping);
|
||||
m_item != opal_list_get_end(&mapping);
|
||||
m_item = opal_list_get_next(m_item)) {
|
||||
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
|
||||
num_nodes += opal_list_get_size(&map->nodes);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a range of vpids for the daemons.
|
||||
*/
|
||||
num_nodes = opal_list_get_size(&nodes);
|
||||
if (num_nodes == 0) {
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
@ -226,100 +232,23 @@ pls_tm_launch(orte_jobid_t jobid)
|
||||
bin_base = opal_basename(OPAL_BINDIR);
|
||||
|
||||
/*
|
||||
* Iterate through each of the nodes and spin
|
||||
* up a daemon.
|
||||
* iterate through each of the contexts
|
||||
*/
|
||||
for(item = opal_list_get_first(&nodes);
|
||||
item != opal_list_get_end(&nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
orte_ras_node_t* node = (orte_ras_node_t*)item;
|
||||
orte_process_name_t* name;
|
||||
char* name_string;
|
||||
for (m_item = opal_list_get_first(&mapping);
|
||||
m_item != opal_list_get_end(&mapping);
|
||||
m_item = opal_list_get_next(m_item)) {
|
||||
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)m_item;
|
||||
char** env;
|
||||
char* var;
|
||||
size_t num_processes;
|
||||
|
||||
OBJ_CONSTRUCT(&map, opal_list_t);
|
||||
/* Get the mapping of this very node */
|
||||
rc = orte_rmaps_base_get_node_map(orte_process_info.my_name->cellid,
|
||||
jobid,
|
||||
node->node_name,
|
||||
&map);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* Copy the prefix-directory specified within the
|
||||
corresponding app_context. If there are multiple,
|
||||
different prefix's for this node, complain */
|
||||
cur_prefix = NULL;
|
||||
num_processes = 0;
|
||||
for (item2 = opal_list_get_first(&map);
|
||||
item2 != opal_list_get_end(&map);
|
||||
item2 = opal_list_get_next(item2)) {
|
||||
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*) item2;
|
||||
char * app_prefix_dir = map->app->prefix_dir;
|
||||
|
||||
/* Increment the number of processes allocated to this node
|
||||
* This allows us to accurately test for oversubscription */
|
||||
num_processes += map->num_procs;
|
||||
|
||||
/* Check for already set cur_prefix -- if different,
|
||||
complain */
|
||||
if (NULL != app_prefix_dir) {
|
||||
if (NULL != cur_prefix &&
|
||||
0 != strcmp (cur_prefix, app_prefix_dir)) {
|
||||
opal_show_help("help-pls-tm.txt", "multiple-prefixes",
|
||||
true, node->node_name,
|
||||
cur_prefix, app_prefix_dir);
|
||||
return ORTE_ERR_FATAL;
|
||||
}
|
||||
|
||||
/* If not yet set, copy it; iff set, then it's the
|
||||
same anyway */
|
||||
if (NULL == cur_prefix) {
|
||||
cur_prefix = strdup(map->app->prefix_dir);
|
||||
if (mca_pls_tm_component.debug) {
|
||||
opal_output (0, "pls:tm: Set prefix:%s",
|
||||
cur_prefix);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* setup node name */
|
||||
argv[node_name_index] = node->node_name;
|
||||
|
||||
/* initialize daemons process name */
|
||||
rc = orte_ns.create_process_name(&name, node->node_cellid, 0, vpid);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* setup per-node options */
|
||||
if (mca_pls_tm_component.debug) {
|
||||
opal_output(0, "pls:tm: launching on node %s",
|
||||
node->node_name);
|
||||
}
|
||||
|
||||
/* setup process name */
|
||||
rc = orte_ns.get_proc_name_string(&name_string, name);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
opal_output(0, "pls:tm: unable to create process name");
|
||||
return rc;
|
||||
}
|
||||
argv[proc_name_index] = name_string;
|
||||
|
||||
|
||||
/* setup environment */
|
||||
env = opal_argv_copy(environ);
|
||||
var = mca_base_param_environ_variable("seed",NULL,NULL);
|
||||
opal_setenv(var, "0", true, &env);
|
||||
|
||||
|
||||
/* If we have a prefix, then modify the PATH and
|
||||
LD_LIBRARY_PATH environment variables. */
|
||||
if (NULL != cur_prefix) {
|
||||
if (NULL != map->app->prefix_dir) {
|
||||
int i;
|
||||
char *newenv;
|
||||
|
||||
@ -327,7 +256,7 @@ pls_tm_launch(orte_jobid_t jobid)
|
||||
/* Reset PATH */
|
||||
if (0 == strncmp("PATH=", env[i], 5)) {
|
||||
asprintf(&newenv, "%s/%s:%s",
|
||||
cur_prefix, bin_base, env[i] + 5);
|
||||
map->app->prefix_dir, bin_base, env[i] + 5);
|
||||
if (mca_pls_tm_component.debug) {
|
||||
opal_output(0, "pls:tm: resetting PATH: %s",
|
||||
newenv);
|
||||
@ -335,11 +264,11 @@ pls_tm_launch(orte_jobid_t jobid)
|
||||
opal_setenv("PATH", newenv, true, &env);
|
||||
free(newenv);
|
||||
}
|
||||
|
||||
|
||||
/* Reset LD_LIBRARY_PATH */
|
||||
else if (0 == strncmp("LD_LIBRARY_PATH=", env[i], 16)) {
|
||||
asprintf(&newenv, "%s/%s:%s",
|
||||
cur_prefix, lib_base, env[i] + 16);
|
||||
map->app->prefix_dir, lib_base, env[i] + 16);
|
||||
if (mca_pls_tm_component.debug) {
|
||||
opal_output(0, "pls:tm: resetting LD_LIBRARY_PATH: %s",
|
||||
newenv);
|
||||
@ -348,9 +277,8 @@ pls_tm_launch(orte_jobid_t jobid)
|
||||
free(newenv);
|
||||
}
|
||||
}
|
||||
free(cur_prefix);
|
||||
}
|
||||
|
||||
|
||||
/* Do a quick sanity check to ensure that we can find the
|
||||
orted in the PATH */
|
||||
|
||||
@ -361,68 +289,122 @@ pls_tm_launch(orte_jobid_t jobid)
|
||||
true, argv[0]);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* set the progress engine schedule for this node.
|
||||
* if node_slots is set to zero, then we default to
|
||||
* NOT being oversubscribed
|
||||
|
||||
/* Iterate through each of the nodes and spin
|
||||
* up a daemon.
|
||||
*/
|
||||
if (node->node_slots > 0 &&
|
||||
num_processes > node->node_slots) {
|
||||
for (n_item = opal_list_get_first(&map->nodes);
|
||||
n_item != opal_list_get_end(&map->nodes);
|
||||
n_item = opal_list_get_next(n_item)) {
|
||||
orte_rmaps_base_node_t* rmaps_node = (orte_rmaps_base_node_t*)n_item;
|
||||
orte_ras_node_t* node = rmaps_node->node;
|
||||
orte_process_name_t* name;
|
||||
char* name_string;
|
||||
size_t num_processes = 0;
|
||||
|
||||
/* already launched on this node */
|
||||
if (0 != node->node_launched++) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* setup node name */
|
||||
argv[node_name_index] = node->node_name;
|
||||
|
||||
/* initialize daemons process name */
|
||||
rc = orte_ns.create_process_name(&name, node->node_cellid, 0, vpid);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* setup per-node options */
|
||||
if (mca_pls_tm_component.debug ||
|
||||
mca_pls_tm_component.verbose) {
|
||||
opal_output(0, "pls:tm: launching on node %s",
|
||||
node->node_name);
|
||||
}
|
||||
|
||||
/* setup process name */
|
||||
rc = orte_ns.get_proc_name_string(&name_string, name);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
opal_output(0, "pls:tm: unable to create process name");
|
||||
return rc;
|
||||
}
|
||||
argv[proc_name_index] = name_string;
|
||||
|
||||
/* set the progress engine schedule for this node.
|
||||
* if node_slots is set to zero, then we default to
|
||||
* NOT being oversubscribed
|
||||
*/
|
||||
if (node->node_slots > 0 &&
|
||||
opal_list_get_size(&rmaps_node->node_procs) > node->node_slots) {
|
||||
if (mca_pls_tm_component.debug) {
|
||||
opal_output(0, "pls:tm: oversubscribed -- setting mpi_yield_when_idle to 1 (%d %d)",
|
||||
node->node_slots,
|
||||
opal_list_get_size(&rmaps_node->node_procs));
|
||||
}
|
||||
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
|
||||
opal_setenv(var, "1", true, &env);
|
||||
} else {
|
||||
if (mca_pls_tm_component.debug) {
|
||||
opal_output(0, "pls:tm: not oversubscribed -- setting mpi_yield_when_idle to 0");
|
||||
}
|
||||
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
|
||||
opal_setenv(var, "0", true, &env);
|
||||
}
|
||||
free(var);
|
||||
|
||||
/* save the daemons name on the node */
|
||||
if (ORTE_SUCCESS != (rc = orte_pls_base_proxy_set_node_name(node,jobid,name))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* exec the daemon */
|
||||
if (mca_pls_tm_component.debug) {
|
||||
opal_output(0, "pls:tm: oversubscribed -- setting mpi_yield_when_idle to 1 (%d %d)",
|
||||
node->node_slots, num_processes);
|
||||
param = opal_argv_join(argv, ' ');
|
||||
if (NULL != param) {
|
||||
opal_output(0, "pls:tm: executing: %s", param);
|
||||
free(param);
|
||||
}
|
||||
}
|
||||
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
|
||||
opal_setenv(var, "1", true, &env);
|
||||
} else {
|
||||
if (mca_pls_tm_component.debug) {
|
||||
opal_output(0, "pls:tm: not oversubscribed -- setting mpi_yield_when_idle to 0");
|
||||
|
||||
rc = pls_tm_start_proc(node->node_name, argc, argv, env);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
opal_output(0, "pls:tm: start_procs returned error %d", rc);
|
||||
goto cleanup;
|
||||
}
|
||||
var = mca_base_param_environ_variable("mpi", NULL, "yield_when_idle");
|
||||
opal_setenv(var, "0", true, &env);
|
||||
launched++;
|
||||
vpid++;
|
||||
free(name);
|
||||
opal_event_loop(OPAL_EVLOOP_NONBLOCK);
|
||||
}
|
||||
free(var);
|
||||
|
||||
/* save the daemons name on the node */
|
||||
if (ORTE_SUCCESS != (rc = orte_pls_base_proxy_set_node_name(node,jobid,name))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* exec the daemon */
|
||||
if (mca_pls_tm_component.debug) {
|
||||
param = opal_argv_join(argv, ' ');
|
||||
if (NULL != param) {
|
||||
opal_output(0, "pls:tm: executing: %s", param);
|
||||
free(param);
|
||||
}
|
||||
}
|
||||
|
||||
rc = pls_tm_start_proc(node->node_name, argc, argv, env);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
opal_output(0, "pls:tm: start_procs returned error %d", rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
vpid++;
|
||||
free(name);
|
||||
|
||||
}
|
||||
|
||||
cleanup:
|
||||
/* loop through all those that are launched and poll for
|
||||
completion status */
|
||||
|
||||
for(i = 0; i < launched; i++){
|
||||
int ret, local_err;
|
||||
tm_event_t event;
|
||||
ret = tm_poll(TM_NULL_EVENT, &event, 1, &local_err);
|
||||
if (TM_SUCCESS != ret) {
|
||||
errno = local_err;
|
||||
opal_output(0, "pls:tm: failed to start a proc error %d", ret);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
cleanup:
|
||||
if (connected) {
|
||||
pls_tm_disconnect();
|
||||
}
|
||||
|
||||
while (NULL != (item = opal_list_remove_first(&nodes))) {
|
||||
OBJ_RELEASE(item);
|
||||
|
||||
while (NULL != (m_item = opal_list_remove_first(&mapping))) {
|
||||
OBJ_RELEASE(m_item);
|
||||
}
|
||||
OBJ_DESTRUCT(&nodes);
|
||||
|
||||
while (NULL != (item = opal_list_remove_first(&mapping_list))) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
OBJ_DESTRUCT(&mapping_list);
|
||||
|
||||
OBJ_DESTRUCT(&mapping);
|
||||
if (NULL != lib_base) {
|
||||
free(lib_base);
|
||||
}
|
||||
@ -520,6 +502,8 @@ static tm_node_id *tm_node_ids = NULL;
|
||||
static int num_tm_hostnames, num_node_ids;
|
||||
|
||||
|
||||
|
||||
/* we don't call this anymore */
|
||||
/*
|
||||
* For a given TM node ID, get the string hostname corresponding to
|
||||
* it.
|
||||
@ -527,10 +511,10 @@ static int num_tm_hostnames, num_node_ids;
|
||||
static char*
|
||||
get_tm_hostname(tm_node_id node)
|
||||
{
|
||||
int ret, local_errno;
|
||||
char *hostname;
|
||||
tm_event_t event;
|
||||
char buffer[256];
|
||||
int ret, local_errno;
|
||||
tm_event_t event;
|
||||
char **argv;
|
||||
|
||||
/* Get the info string corresponding to this TM node ID */
|
||||
@ -565,6 +549,7 @@ get_tm_hostname(tm_node_id node)
|
||||
}
|
||||
|
||||
|
||||
/* we don't call this anymore!*/
|
||||
static int
|
||||
query_tm_hostnames(void)
|
||||
{
|
||||
@ -598,7 +583,7 @@ query_tm_hostnames(void)
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/* we don't call this anymore! */
|
||||
static int
|
||||
do_tm_resolve(char *hostname, tm_node_id *tnodeid)
|
||||
{
|
||||
@ -637,7 +622,7 @@ do_tm_resolve(char *hostname, tm_node_id *tnodeid)
|
||||
static int
|
||||
pls_tm_start_proc(char *nodename, int argc, char **argv, char **env)
|
||||
{
|
||||
int ret, local_err;
|
||||
int ret;
|
||||
tm_node_id node_id;
|
||||
tm_task_id task_id;
|
||||
tm_event_t event;
|
||||
@ -649,12 +634,6 @@ pls_tm_start_proc(char *nodename, int argc, char **argv, char **env)
|
||||
ret = tm_spawn(argc, argv, env, node_id, &task_id, &event);
|
||||
if (TM_SUCCESS != ret) return ORTE_ERROR;
|
||||
|
||||
ret = tm_poll(TM_NULL_EVENT, &event, 1, &local_err);
|
||||
if (TM_SUCCESS != ret) {
|
||||
errno = local_err;
|
||||
return ORTE_ERR_IN_ERRNO;
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -173,7 +174,10 @@ static int discover(opal_list_t* nodelist)
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* Iterate through all the nodes and make an entry for each */
|
||||
/* Iterate through all the nodes and make an entry for each. TM
|
||||
node ID's will never be duplicated, but they may end up
|
||||
resolving to the same hostname (i.e., vcpu's on a single
|
||||
host). */
|
||||
|
||||
OBJ_CONSTRUCT(&new_nodes, opal_list_t);
|
||||
for (i = 0; i < num_node_ids; ++i) {
|
||||
@ -296,6 +300,6 @@ static int get_tm_hostname(tm_node_id node, char **hostname, char **arch)
|
||||
/* All done */
|
||||
|
||||
opal_output(orte_ras_base.ras_output,
|
||||
"ras:tm:hostname: got hostname %s", hostname);
|
||||
"ras:tm:hostname: got hostname %s", *hostname);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -67,7 +68,7 @@ int orte_rmaps_base_open(void)
|
||||
|
||||
/* Debugging / verbose output */
|
||||
|
||||
param = mca_base_param_reg_int_name("rmaps_base", "verbose",
|
||||
param = mca_base_param_reg_int_name("rmaps", "base_verbose",
|
||||
"Verbosity level for the rmaps framework",
|
||||
false, false, 0, &value);
|
||||
if (value != 0) {
|
||||
@ -78,13 +79,19 @@ int orte_rmaps_base_open(void)
|
||||
|
||||
/* Are we scheduling by node or by slot? */
|
||||
|
||||
param = mca_base_param_reg_string_name("rmaps_base", "schedule_policy",
|
||||
param = mca_base_param_reg_string_name("rmaps", "base_schedule_policy",
|
||||
"Scheduling Policy for RMAPS. [slot | node]",
|
||||
false, false, "slot", &policy);
|
||||
if (0 == strcmp(policy, "node")) {
|
||||
mca_base_param_set_string(param, "node");
|
||||
}
|
||||
|
||||
/* Should we schedule on the local node or not? */
|
||||
|
||||
mca_base_param_reg_int_name("rmaps", "base_schedule_local",
|
||||
"If nonzero, allow scheduling MPI applications on the same node as mpirun (default). If zero, do not schedule any MPI applications on the same node as mpirun",
|
||||
false, false, 1, &value);
|
||||
|
||||
/* Open up all the components that we can find */
|
||||
|
||||
if (ORTE_SUCCESS !=
|
||||
|
@ -9,12 +9,14 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include <errno.h>
|
||||
#ifdef HAVE_UNISTD_H
|
||||
@ -24,11 +26,14 @@
|
||||
#include <string.h>
|
||||
#endif /* HAVE_STRING_H */
|
||||
|
||||
#include "orte/orte_constants.h"
|
||||
#include "orte/orte_types.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/if.h"
|
||||
#include "orte/orte_constants.h"
|
||||
#include "orte/orte_types.h"
|
||||
#include "orte/util/sys_info.h"
|
||||
#include "orte/mca/ns/ns.h"
|
||||
#include "orte/mca/gpr/gpr.h"
|
||||
#include "orte/mca/rmaps/base/base.h"
|
||||
@ -416,7 +421,8 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid)
|
||||
int rc = ORTE_SUCCESS;
|
||||
bool bynode = true;
|
||||
char **mapped_nodes = NULL;
|
||||
int num_mapped_nodes = 0;
|
||||
int num_mapped_nodes = 0;
|
||||
int id, value;
|
||||
|
||||
/* query for the application context and allocated nodes */
|
||||
if(ORTE_SUCCESS != (rc = orte_rmgr_base_get_app_context(jobid, &context, &num_context))) {
|
||||
@ -437,6 +443,24 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid)
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* If the "no local" option was set, then remove the local node
|
||||
from the list */
|
||||
|
||||
id = mca_base_param_find("rmaps", NULL, "base_schedule_local");
|
||||
mca_base_param_lookup_int(id, &value);
|
||||
if (0 == value) {
|
||||
for (item = opal_list_get_first(&nodes);
|
||||
item != opal_list_get_end(&nodes);
|
||||
item = opal_list_get_next(item) ) {
|
||||
if (0 == strcmp(((orte_ras_node_t *) item)->node_name,
|
||||
orte_system_info.nodename) ||
|
||||
opal_ifislocal(((orte_ras_node_t *) item)->node_name)) {
|
||||
opal_list_remove_item(&nodes, item);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Sanity check to make sure we have been allocated nodes */
|
||||
if (0 == opal_list_get_size(&nodes)) {
|
||||
OBJ_DESTRUCT(&nodes);
|
||||
@ -489,7 +513,8 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid)
|
||||
end, bounce back to the front (as would happen in the loop
|
||||
below)
|
||||
|
||||
But do a bozo check to ensure that we don't have a empty node list.*/
|
||||
But do a bozo check to ensure that we don't have a empty
|
||||
node list.*/
|
||||
if (0 == opal_list_get_size(&nodes)) {
|
||||
rc = ORTE_ERR_TEMP_OUT_OF_RESOURCE;
|
||||
goto cleanup;
|
||||
|
@ -181,6 +181,13 @@ Synonym for \fI-np\fP.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B -nolocal\fR,\fP --nolocal
|
||||
Do not run any copies of the launched application on the same node as
|
||||
orterun is running. This option will override listing the localhost
|
||||
with \fB--host\fR or any other host-specifying mechanism.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B -np \fR<#>\fP
|
||||
Run this many copies of the program on the given nodes. This option
|
||||
indicates that the specified file is an executable program and not an
|
||||
@ -409,6 +416,30 @@ on hosts b and c.
|
||||
.
|
||||
.
|
||||
.
|
||||
.SS No Local Launch
|
||||
.
|
||||
Using the \fB--nolocal\fR option to orterun tells the system to not
|
||||
launch any of the application processes on the same node that orterun
|
||||
is running. While orterun typically blocks and consumes few system
|
||||
resources, this option can be helpful for launching very large jobs
|
||||
where orterun may actually need to use noticable amounts of memory
|
||||
and/or processing time. \fB--nolocal\fR allows orteun to run without
|
||||
sharing the local node with the launched applications, and likewise
|
||||
allows the launched applications to run unhindered by orterun's system
|
||||
usage.
|
||||
.PP
|
||||
Note that \fB--nolocal\fR will override any other specification to
|
||||
launch the application on the local node. It will disqualify the
|
||||
localhost from being capable of running any processes in the
|
||||
application.
|
||||
.
|
||||
.
|
||||
.TP
|
||||
shell$ mpirun -np 1 --host localhost --nolocal hostname
|
||||
This example will result in an error because orterun will not find
|
||||
anywhere to launch the application.
|
||||
.
|
||||
.
|
||||
.SS Application Context or Executable Program?
|
||||
.
|
||||
To distinguish the two different forms, \fImpirun\fP
|
||||
|
@ -105,6 +105,7 @@ struct globals_t {
|
||||
bool by_node;
|
||||
bool by_slot;
|
||||
bool debugger;
|
||||
bool no_local_schedule;
|
||||
size_t num_procs;
|
||||
int exit_status;
|
||||
char *hostfile;
|
||||
@ -209,6 +210,11 @@ opal_cmd_line_init_t cmd_line_init[] = {
|
||||
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
||||
"List of hosts to invoke processes on" },
|
||||
|
||||
/* OSC mpiexec-like arguments */
|
||||
{ NULL, NULL, NULL, '\0', "nolocal", "nolocal", 0,
|
||||
&orterun_globals.no_local_schedule, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Do not run any MPI applications on the local node" },
|
||||
|
||||
/* User-level debugger arguments */
|
||||
{ NULL, NULL, NULL, '\0', "tv", "tv", 0,
|
||||
&orterun_globals.debugger, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
@ -761,6 +767,7 @@ static int init_globals(void)
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
0,
|
||||
0,
|
||||
NULL,
|
||||
@ -855,7 +862,7 @@ static int parse_globals(int argc, char* argv[])
|
||||
* since it really should be initialized in rmaps_base_open */
|
||||
if (orterun_globals.by_node || orterun_globals.by_slot) {
|
||||
char *policy = NULL;
|
||||
id = mca_base_param_reg_string_name("rmaps_base", "schedule_policy",
|
||||
id = mca_base_param_reg_string_name("rmaps", "base_schedule_policy",
|
||||
"Scheduling policy for RMAPS. [slot | node]",
|
||||
false, false, "slot", &policy);
|
||||
|
||||
@ -873,6 +880,17 @@ static int parse_globals(int argc, char* argv[])
|
||||
orterun_globals.by_slot = true;
|
||||
}
|
||||
|
||||
/* Do we want to allow MPI applications on the same node as
|
||||
mpirun? */
|
||||
id = mca_base_param_reg_int_name("rmaps", "base_schedule_local",
|
||||
"If nonzero, allow scheduling MPI applications on the same node as mpirun (default). If zero, do not schedule any MPI applications on the same node as mpirun",
|
||||
false, false, 1, &ret);
|
||||
if (orterun_globals.no_local_schedule) {
|
||||
mca_base_param_set_int(id, 0);
|
||||
} else {
|
||||
mca_base_param_set_int(id, 1);
|
||||
}
|
||||
|
||||
/* If we don't want to wait, we don't want to wait */
|
||||
|
||||
if (orterun_globals.no_wait_for_job_completion) {
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user