1
1

Fix the tree-spawn-with-rollup

Somehow, the code for passing a daemon's parent was accidentally removed, thus breaking the tree-spawn callback sequence and causing all daemons to phone directly home. Note that this is noticeably slower than no-tree-spawn for small clusters where directly ssh launch of the child daemons from the HNP doesn't overload the available file descriptors.

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2017-12-15 16:03:43 -08:00
родитель ea35820246
Коммит 7a58f91ab9
3 изменённых файлов: 57 добавлений и 37 удалений

Просмотреть файл

@ -1383,14 +1383,6 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
if (ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
ORTE_ERROR_LOG(rc);
ORTE_ACTIVATE_JOB_STATE(jdatorted, ORTE_JOB_STATE_FAILED_TO_START);
} else if (NULL != orte_tree_launch_cmd) {
/* if a tree-launch is underway, send the cmd back */
relay = OBJ_NEW(opal_buffer_t);
opal_dss.copy_payload(relay, orte_tree_launch_cmd);
orte_rml.send_buffer_nb(orte_mgmt_conduit,
sender, relay,
ORTE_RML_TAG_DAEMON,
orte_rml_send_callback, NULL);
}
}

Просмотреть файл

@ -88,6 +88,7 @@
#include "orte/mca/ess/base/base.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/grpcomm/base/base.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/rmaps/rmaps.h"
#include "orte/mca/routed/routed.h"
#include "orte/mca/rml/base/rml_contact.h"
@ -605,7 +606,6 @@ static int setup_launch(int *argcptr, char ***argvptr,
(mca_plm_rsh_component.using_qrsh && mca_plm_rsh_component.daemonize_qrsh)) &&
((!mca_plm_rsh_component.using_llspawn) ||
(mca_plm_rsh_component.using_llspawn && mca_plm_rsh_component.daemonize_llspawn))) {
opal_argv_append(&argc, &argv, "--daemonize");
}
/*
@ -617,9 +617,20 @@ static int setup_launch(int *argcptr, char ***argvptr,
proc_vpid_index);
/* ensure that only the ssh plm is selected on the remote daemon */
opal_argv_append_nosize(&argv, "-"OPAL_MCA_CMD_LINE_ID);
opal_argv_append_nosize(&argv, "plm");
opal_argv_append_nosize(&argv, "rsh");
opal_argv_append(&argc, &argv, "-"OPAL_MCA_CMD_LINE_ID);
opal_argv_append(&argc, &argv, "plm");
opal_argv_append(&argc, &argv, "rsh");
/* if we are tree-spawning, tell our child daemons the
* uri of their parent (me) */
if (!mca_plm_rsh_component.no_tree_spawn) {
opal_argv_append(&argc, &argv, "--tree-spawn");
orte_oob_base_get_addr(&param);
opal_argv_append(&argc, &argv, "-"OPAL_MCA_CMD_LINE_ID);
opal_argv_append(&argc, &argv, "orte_parent_uri");
opal_argv_append(&argc, &argv, param);
free(param);
}
/* unless told otherwise... */
if (mca_plm_rsh_component.pass_environ_mca_params) {
@ -795,11 +806,22 @@ static int remote_spawn(opal_buffer_t *launch)
/* if we hit any errors, tell the HNP it was us */
target.vpid = ORTE_PROC_MY_NAME->vpid;
/* extract the prefix from the launch buffer */
n = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(launch, &prefix, &n, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
if (NULL != launch) {
/* extract the prefix from the launch buffer */
n = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(launch, &prefix, &n, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
} else {
/* check to see if enable-orterun-prefix-by-default was given - if
* this is being done by a singleton, then orterun will not be there
* to put the prefix in the app. So make sure we check to find it */
if ((bool)ORTE_WANT_ORTERUN_PREFIX_BY_DEFAULT) {
prefix = strdup(opal_install_dirs.prefix);
} else {
prefix = NULL;
}
}
/* get the updated routing list */

Просмотреть файл

@ -120,7 +120,7 @@ static void rollup(int status, orte_process_name_t* sender,
static opal_buffer_t *bucket, *mybucket = NULL;
static int ncollected = 0;
static char *orte_parent_uri;
static char *orte_parent_uri = NULL;
static struct {
bool debug;
@ -187,6 +187,10 @@ opal_cmd_line_init_t orte_cmd_line_opts[] = {
&orted_globals.set_sid, OPAL_CMD_LINE_TYPE_BOOL,
"Direct the orted to separate from the current session"},
{ NULL, '\0', "tree-spawn", "tree-spawn", 0,
&orted_globals.tree_spawn, OPAL_CMD_LINE_TYPE_BOOL,
"Tree-based spawn in progress" },
{ "tmpdir_base", '\0', NULL, "tmpdir", 1,
NULL, OPAL_CMD_LINE_TYPE_STRING,
"Set the root for the session directory tree" },
@ -667,22 +671,19 @@ int orte_daemon(int argc, char *argv[])
MCA_BASE_VAR_SCOPE_CONSTANT,
&orte_parent_uri);
if (NULL != orte_parent_uri) {
orte_process_name_t parent;
opal_value_t val;
/* set the contact info into our local database */
ret = orte_rml_base_parse_uris(orte_parent_uri, &parent, NULL);
ret = orte_rml_base_parse_uris(orte_parent_uri, ORTE_PROC_MY_PARENT, NULL);
if (ORTE_SUCCESS != ret) {
ORTE_ERROR_LOG(ret);
free (orte_parent_uri);
orte_parent_uri = NULL;
goto DONE;
}
OBJ_CONSTRUCT(&val, opal_value_t);
val.key = OPAL_PMIX_PROC_URI;
val.type = OPAL_STRING;
val.data.string = orte_parent_uri;
if (OPAL_SUCCESS != (ret = opal_pmix.store_local(&parent, &val))) {
if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_PARENT, &val))) {
ORTE_ERROR_LOG(ret);
OBJ_DESTRUCT(&val);
goto DONE;
@ -691,21 +692,22 @@ int orte_daemon(int argc, char *argv[])
val.data.string = NULL;
OBJ_DESTRUCT(&val);
/* don't need this value anymore */
free(orte_parent_uri);
orte_parent_uri = NULL;
/* tell the routed module that we have a path
* back to the HNP
*/
if (ORTE_SUCCESS != (ret = orte_routed.update_route(NULL, ORTE_PROC_MY_HNP, &parent))) {
if (ORTE_SUCCESS != (ret = orte_routed.update_route(NULL, ORTE_PROC_MY_HNP, ORTE_PROC_MY_PARENT))) {
ORTE_ERROR_LOG(ret);
goto DONE;
}
/* and a path to our parent */
if (ORTE_SUCCESS != (ret = orte_routed.update_route(NULL, ORTE_PROC_MY_PARENT, ORTE_PROC_MY_PARENT))) {
ORTE_ERROR_LOG(ret);
goto DONE;
}
/* set the lifeline to point to our parent so that we
* can handle the situation if that lifeline goes away
*/
if (ORTE_SUCCESS != (ret = orte_routed.set_lifeline(NULL, &parent))) {
if (ORTE_SUCCESS != (ret = orte_routed.set_lifeline(NULL, ORTE_PROC_MY_PARENT))) {
ORTE_ERROR_LOG(ret);
goto DONE;
}
@ -717,12 +719,15 @@ int orte_daemon(int argc, char *argv[])
*/
if (!ORTE_PROC_IS_HNP) {
orte_process_name_t target;
target.jobid = ORTE_PROC_MY_NAME->jobid;
if (orte_fwd_mpirun_port || orte_static_ports) {
/* setup the rollup callback */
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ORTED_CALLBACK,
ORTE_RML_PERSISTENT, rollup, NULL);
/* setup the rollup callback */
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ORTED_CALLBACK,
ORTE_RML_PERSISTENT, rollup, NULL);
/* define the target jobid */
target.jobid = ORTE_PROC_MY_NAME->jobid;
if (orte_fwd_mpirun_port || orte_static_ports || NULL != orte_parent_uri) {
/* we start by sending to ourselves */
target.vpid = ORTE_PROC_MY_NAME->vpid;
/* since we will be waiting for any children to send us
* their rollup info before sending to our parent, save
@ -789,7 +794,6 @@ int orte_daemon(int argc, char *argv[])
}
OPAL_LIST_RELEASE(modex);
} else {
opal_output(0, "VAL KEY: %s", (NULL == val->key) ? "NULL" : val->key);
/* single value */
flag = 1;
if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &flag, 1, OPAL_INT32))) {
@ -965,6 +969,8 @@ int orte_daemon(int argc, char *argv[])
i += 2;
}
}
/* now launch any child daemons of ours */
orte_plm.remote_spawn(orte_tree_launch_cmd);
}
if (orte_debug_daemons_flag) {
@ -1053,8 +1059,6 @@ static void rollup(int status, orte_process_name_t* sender,
int32_t i, flag, cnt;
opal_value_t *kv;
/* xfer the contents of the rollup to our bucket */
opal_dss.copy_payload(bucket, buffer);
ncollected++;
/* if the sender is ourselves, then we save that buffer
@ -1064,6 +1068,8 @@ static void rollup(int status, orte_process_name_t* sender,
mybucket = OBJ_NEW(opal_buffer_t);
opal_dss.copy_payload(mybucket, buffer);
} else {
/* xfer the contents of the rollup to our bucket */
opal_dss.copy_payload(bucket, buffer);
/* the first entry in the bucket will be from our
* direct child - harvest it for connection info */
cnt = 1;