1
1

Fix tree spawn in the rsh/qrsh environment

This commit was SVN r26631.
Этот коммит содержится в:
Ralph Castain 2012-06-21 21:29:28 +00:00
родитель 249066e06d
Коммит e9591f2563
3 изменённых файлов: 40 добавлений и 15 удалений

Просмотреть файл

@ -897,11 +897,18 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
free(param);
}
/* if we want to use a common port, tell the daemon to do so */
if (orte_use_common_port) {
/* tell the daemon to use the common port */
opal_argv_append(argc, argv, "--use-common-port");
}
/* warn the daemons if we are using a tree spawn pattern so they
* know they shouldn't do a rollup on their callback
*/
if (NULL != orte_tree_launch_cmd) {
opal_argv_append(argc, argv, "--tree-spawn");
}
/* pass along any cmd line MCA params provided to mpirun,
* being sure to "purge" any that would cause problems
* on backend nodes

Просмотреть файл

@ -785,6 +785,11 @@ static int remote_spawn(opal_buffer_t *launch)
goto cleanup;
}
/* ensure the system knows we are not using common ports since we are
* doing a tree spawn
*/
orte_use_common_port = false;
/* setup the launch */
if (ORTE_SUCCESS != (rc = setup_launch(&argc, &argv, orte_process_info.nodename, &node_name_index1,
&proc_vpid_index, prefix))) {
@ -793,6 +798,9 @@ static int remote_spawn(opal_buffer_t *launch)
goto cleanup;
}
/* tell the daemon we are in a tree spawn */
opal_argv_append(&argc, &argv, "--tree-spawn");
/* get the daemon job object */
if (NULL == (daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
@ -1068,20 +1076,18 @@ static void launch_daemons(int fd, short args, void *cbdata)
}
}
/* setup the launch */
if (ORTE_SUCCESS != (rc = setup_launch(&argc, &argv, node->name, &node_name_index1,
&proc_vpid_index, prefix_dir))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* if we are tree launching, find our children and create the launch cmd */
if (mca_plm_rsh_component.tree_spawn) {
orte_daemon_cmd_flag_t command = ORTE_DAEMON_TREE_SPAWN;
opal_byte_object_t bo, *boptr;
orte_job_t *jdatorted;
/* ensure all systems know we are not using a common port since we
* are doing a tree spawn
*/
orte_use_common_port = false;
/* get the tree spawn buffer */
orte_tree_launch_cmd = OBJ_NEW(opal_buffer_t);
/* insert the tree_spawn cmd */
if (ORTE_SUCCESS != (rc = opal_dss.pack(orte_tree_launch_cmd, &command, 1, ORTE_DAEMON_CMD))) {
@ -1123,6 +1129,13 @@ static void launch_daemons(int fd, short args, void *cbdata)
orte_routed.get_routing_list(ORTE_GRPCOMM_XCAST, &coll);
}
/* setup the launch */
if (ORTE_SUCCESS != (rc = setup_launch(&argc, &argv, node->name, &node_name_index1,
&proc_vpid_index, prefix_dir))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/*
* Iterate through each of the nodes
*/

Просмотреть файл

@ -117,6 +117,7 @@ static struct {
int fail_delay;
bool abort;
bool mapreduce;
bool tree_spawn;
} orted_globals;
/*
@ -172,6 +173,10 @@ opal_cmd_line_init_t orte_cmd_line_opts[] = {
NULL, OPAL_CMD_LINE_TYPE_BOOL,
"Use the same port as the HNP."},
{ NULL, NULL, NULL, '\0', NULL, "tree-spawn", 0,
&orted_globals.tree_spawn, OPAL_CMD_LINE_TYPE_BOOL,
"Tree spawn is underway"},
{ NULL, NULL, NULL, '\0', NULL, "set-sid", 0,
&orted_globals.set_sid, OPAL_CMD_LINE_TYPE_BOOL,
"Direct the orted to separate from the current session"},
@ -718,7 +723,7 @@ int orte_daemon(int argc, char *argv[])
}
#endif
if (orte_static_ports || orte_use_common_port) {
if ((orte_static_ports || orte_use_common_port) && !orted_globals.tree_spawn) {
/* use the rollup collective to send our data to the HNP
* so we minimize the HNP bottleneck
*/