1
1

Improve launch performance on clusters that use dedicated nodes by instructing the orteds to use the same port as the HNP, thus allowing them to "rollup" their initial callback via the routed network. This substantially reduces the HNP bottleneck and the number of ports opened by the HNP.

Restore enable-static-ports option by default - the Cray will have to disable it to get around their library issues, but that's just a warning problem as opposed to blocking the build.

This commit was SVN r26606.
Этот коммит содержится в:
Ralph Castain 2012-06-15 10:15:07 +00:00
родитель defaefd59e
Коммит 96c778656a
16 изменённых файлов: 417 добавлений и 209 удалений

Просмотреть файл

@ -137,13 +137,13 @@ AC_DEFINE_UNQUOTED([ORTE_ENABLE_PROGRESS_THREADS],
AC_MSG_CHECKING([if want orte static ports]) AC_MSG_CHECKING([if want orte static ports])
AC_ARG_ENABLE([orte-static-ports], AC_ARG_ENABLE([orte-static-ports],
[AC_HELP_STRING([--enable-orte-static-ports], [AC_HELP_STRING([--enable-orte-static-ports],
[Enable orte static ports for tcp oob. (default: disabled)])]) [Enable orte static ports for tcp oob. (default: enabled)])])
if test "$enable_orte_static_ports" = "yes"; then if test "$enable_orte_static_ports" = "no"; then
AC_MSG_RESULT([yes])
orte_enable_static_ports=1
else
AC_MSG_RESULT([no]) AC_MSG_RESULT([no])
orte_enable_static_ports=0 orte_enable_static_ports=0
else
AC_MSG_RESULT([yes])
orte_enable_static_ports=1
fi fi
AC_DEFINE_UNQUOTED([ORTE_ENABLE_STATIC_PORTS], AC_DEFINE_UNQUOTED([ORTE_ENABLE_STATIC_PORTS],
[$orte_enable_static_ports], [$orte_enable_static_ports],

Просмотреть файл

@ -304,13 +304,18 @@ int orte_ess_base_orted_setup(char **hosts)
goto error; goto error;
} }
#if ORTE_ENABLE_STATIC_PORTS #if ORTE_ENABLE_STATIC_PORTS
/* if we are using static ports, then we need to setup /* if we are using static ports or a common port, then we need to setup
* the daemon info so the RML can function properly * the daemon info so the RML can function properly
* without requiring a wireup stage. This must be done * without requiring a wireup stage. This must be done
* after we enable_comm as that function determines our * after we enable_comm as that function determines our
* own port, which we need in order to construct the nidmap * own port, which we need in order to construct the nidmap
*/ */
if (orte_static_ports) { if (orte_static_ports || orte_use_common_port) {
/* define the routing tree so we know the pattern
* if we are trying to setup common or static ports
*/
orte_routed.update_routing_plan();
if (ORTE_SUCCESS != (ret = orte_util_setup_local_nidmap_entries())) { if (ORTE_SUCCESS != (ret = orte_util_setup_local_nidmap_entries())) {
ORTE_ERROR_LOG(ret); ORTE_ERROR_LOG(ret);
error = "orte_util_nidmap_init"; error = "orte_util_nidmap_init";

Просмотреть файл

@ -72,6 +72,7 @@ static int rte_init(void)
{ {
int ret; int ret;
char *error = NULL; char *error = NULL;
char **hosts = NULL;
/* run the prolog */ /* run the prolog */
if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
@ -86,11 +87,23 @@ static int rte_init(void)
* default procedure * default procedure
*/ */
if (ORTE_PROC_IS_DAEMON) { if (ORTE_PROC_IS_DAEMON) {
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(NULL))) { if (NULL != orte_node_regex) {
/* extract the nodes */
if (ORTE_SUCCESS != (ret =
orte_regex_extract_node_names(orte_node_regex, &hosts)) ||
NULL == hosts) {
error = "orte_regex_extract_node_names";
goto error;
}
}
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) {
ORTE_ERROR_LOG(ret); ORTE_ERROR_LOG(ret);
error = "orte_ess_base_orted_setup"; error = "orte_ess_base_orted_setup";
goto error; goto error;
} }
if (NULL != hosts) {
opal_argv_free(hosts);
}
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }

Просмотреть файл

@ -9,7 +9,7 @@
# University of Stuttgart. All rights reserved. # University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California. # Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved. # All rights reserved.
# Copyright (c) 2011 Los Alamos National Security, LLC. # Copyright (c) 2011-2012 Los Alamos National Security, LLC.
# All rights reserved. # All rights reserved.
# $COPYRIGHT$ # $COPYRIGHT$
# #
@ -31,5 +31,6 @@ if !ORTE_DISABLE_FULL_SUPPORT
libmca_grpcomm_la_SOURCES += \ libmca_grpcomm_la_SOURCES += \
base/grpcomm_base_modex.c \ base/grpcomm_base_modex.c \
base/grpcomm_base_receive.c \ base/grpcomm_base_receive.c \
base/grpcomm_base_xcast.c base/grpcomm_base_xcast.c \
base/grpcomm_base_rollup.c
endif endif

Просмотреть файл

@ -74,6 +74,9 @@ ORTE_DECLSPEC orte_grpcomm_coll_id_t orte_grpcomm_base_get_coll_id(void);
ORTE_DECLSPEC void orte_grpcomm_base_pack_collective(opal_buffer_t *relay, ORTE_DECLSPEC void orte_grpcomm_base_pack_collective(opal_buffer_t *relay,
orte_grpcomm_collective_t *coll, orte_grpcomm_collective_t *coll,
orte_grpcomm_internal_stage_t stg); orte_grpcomm_internal_stage_t stg);
ORTE_DECLSPEC void orte_grpcomm_base_rollup_recv(int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata);
/* modex support */ /* modex support */
ORTE_DECLSPEC int orte_grpcomm_base_set_proc_attr(const char* project, ORTE_DECLSPEC int orte_grpcomm_base_set_proc_attr(const char* project,

Просмотреть файл

@ -93,6 +93,16 @@ int orte_grpcomm_base_comm_start(void)
recv_issued = false; recv_issued = false;
return rc; return rc;
} }
if (ORTE_PROC_IS_DAEMON) {
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_ROLLUP,
ORTE_RML_PERSISTENT,
orte_grpcomm_base_rollup_recv, NULL))) {
ORTE_ERROR_LOG(rc);
recv_issued = false;
return rc;
}
}
if (ORTE_PROC_IS_HNP) { if (ORTE_PROC_IS_HNP) {
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_COLL_ID_REQ, ORTE_RML_TAG_COLL_ID_REQ,

127
orte/mca/grpcomm/base/grpcomm_base_rollup.c Обычный файл
Просмотреть файл

@ -0,0 +1,127 @@
/* -*- C -*-
*
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file:
*
*/
/*
* includes
*/
#include "orte_config.h"
#include "opal/dss/dss.h"
#include "orte/util/proc_info.h"
#include "orte/util/error_strings.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ess/ess.h"
#include "orte/mca/odls/base/base.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/routed/routed.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/grpcomm/grpcomm_types.h"
#include "orte/mca/grpcomm/grpcomm.h"
#include "orte/mca/grpcomm/base/base.h"
/* function to cleanup collective once completed */
static void rml_send_callback(int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata)
{
orte_grpcomm_collective_t *coll;
/* remove this collective from our list */
coll = (orte_grpcomm_collective_t*)opal_list_remove_first(&orte_grpcomm_base.active_colls);
/* release it */
OBJ_RELEASE(coll);
/* release our buffer */
OBJ_RELEASE(buffer);
}
void orte_grpcomm_base_rollup_recv(int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata)
{
int ret;
orte_grpcomm_collective_t *coll;
bool done = false;
opal_buffer_t *relay;
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
"%s grpcomm:rollup:recv from sender %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(sender)));
/* lookup the collective - can only be one on the list at this time */
coll = (orte_grpcomm_collective_t*)opal_list_get_first(&orte_grpcomm_base.active_colls);
/* flag that we received a bucket */
if (sender->vpid != ORTE_PROC_MY_NAME->vpid) {
coll->num_peer_buckets++;
}
/* transfer the data */
opal_dss.copy_payload(&coll->buffer, buffer);
/* if list is empty, then we can just send our data along */
if (opal_list_is_empty(&coll->targets)) {
done = true;
} else if (coll->num_peer_buckets == opal_list_get_size(&coll->targets)) {
done = true;
} else {
/* check for a wildcard */
orte_namelist_t *nm;
nm = (orte_namelist_t*)opal_list_get_first(&coll->targets);
if (ORTE_VPID_WILDCARD == nm->name.vpid &&
coll->num_peer_buckets == orte_process_info.num_procs) {
done = true;
}
}
if (done) {
/* send the message to my parent */
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
"%s grpcomm:rollup: sending rollup msg to %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(ORTE_PROC_MY_PARENT)));
relay = OBJ_NEW(opal_buffer_t);
opal_dss.copy_payload(relay, &coll->buffer);
/* if my parent is the HNP, send it to the final destination */
if (ORTE_PROC_MY_PARENT->vpid == ORTE_PROC_MY_HNP->vpid) {
if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, relay,
ORTE_RML_TAG_ORTED_CALLBACK, 0,
rml_send_callback, NULL))) {
ORTE_ERROR_LOG(ret);
}
} else {
if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_PARENT, relay,
ORTE_RML_TAG_ROLLUP, 0,
rml_send_callback, NULL))) {
ORTE_ERROR_LOG(ret);
}
}
}
}

Просмотреть файл

@ -70,7 +70,6 @@ typedef int (*orte_grpcomm_base_module_allgather_fn_t)(orte_grpcomm_collective_t
/* barrier function */ /* barrier function */
typedef int (*orte_grpcomm_base_module_barrier_fn_t)(orte_grpcomm_collective_t *coll); typedef int (*orte_grpcomm_base_module_barrier_fn_t)(orte_grpcomm_collective_t *coll);
/** DATA EXCHANGE FUNCTIONS - SEE ompi/runtime/ompi_module_exchange.h FOR A DESCRIPTION /** DATA EXCHANGE FUNCTIONS - SEE ompi/runtime/ompi_module_exchange.h FOR A DESCRIPTION
* OF HOW THIS ALL WORKS * OF HOW THIS ALL WORKS
*/ */

Просмотреть файл

@ -651,7 +651,15 @@ mca_oob_tcp_create_listen(int *target_sd, unsigned short *target_port, uint16_t
port in the range. Otherwise, tcp_port_min will be 0, which port in the range. Otherwise, tcp_port_min will be 0, which
means "pick any port" */ means "pick any port" */
if (AF_INET == af_family) { if (AF_INET == af_family) {
if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) { if (ORTE_PROC_IS_DAEMON && orte_use_common_port) {
/* use the same port as the HNP */
char *ptr, *portptr;
portptr = strdup(orte_process_info.my_hnp_uri);
ptr = strrchr(portptr, ':');
ptr++;
opal_argv_append_nosize(&ports, ptr);
free(portptr);
} else if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) {
if (NULL != mca_oob_tcp_component.tcp4_static_ports) { if (NULL != mca_oob_tcp_component.tcp4_static_ports) {
/* if static ports were provided, the daemon takes the /* if static ports were provided, the daemon takes the
* first entry in the list * first entry in the list
@ -1647,10 +1655,8 @@ int mca_oob_tcp_resolve(mca_oob_tcp_peer_t* peer)
* if we are trying to talk to a process on our own node, try * if we are trying to talk to a process on our own node, try
* looking for the loopback interface before giving up * looking for the loopback interface before giving up
*/ */
#if OPAL_WANT_IPV6
goto unlock; goto unlock;
} }
#else
if (0 == strcasecmp(host, orte_process_info.nodename) || if (0 == strcasecmp(host, orte_process_info.nodename) ||
0 == strncasecmp(host, orte_process_info.nodename, strlen(host)) || 0 == strncasecmp(host, orte_process_info.nodename, strlen(host)) ||
opal_ifislocal(host)) { opal_ifislocal(host)) {
@ -1677,7 +1683,6 @@ int mca_oob_tcp_resolve(mca_oob_tcp_peer_t* peer)
haddr = inet_ntoa(*(struct in_addr*)h->h_addr_list[0]); haddr = inet_ntoa(*(struct in_addr*)h->h_addr_list[0]);
} }
proceed: proceed:
#endif
/* we can't know which af_family we are using, so for now, let's /* we can't know which af_family we are using, so for now, let's
* just look to see which static port family was provided * just look to see which static port family was provided
*/ */

Просмотреть файл

@ -518,12 +518,16 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
char *nodename; char *nodename;
orte_node_t *node; orte_node_t *node;
orte_job_t *jdata; orte_job_t *jdata;
orte_process_name_t dname;
/* get the daemon job, if necessary */ /* get the daemon job, if necessary */
if (NULL == jdatorted) { if (NULL == jdatorted) {
jdatorted = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); jdatorted = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
} }
/* multiple daemons could be in this buffer, so unpack until we exhaust the data */
idx = 1;
while (OPAL_SUCCESS == (rc = opal_dss.unpack(buffer, &dname, &idx, ORTE_NAME))) {
/* unpack its contact info */ /* unpack its contact info */
idx = 1; idx = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &rml_uri, &idx, OPAL_STRING))) { if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &rml_uri, &idx, OPAL_STRING))) {
@ -542,10 +546,10 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s plm:base:orted_report_launch from daemon %s", "%s plm:base:orted_report_launch from daemon %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(sender))); ORTE_NAME_PRINT(&dname)));
/* update state and record for this daemon contact info */ /* update state and record for this daemon contact info */
if (NULL == (daemon = (orte_proc_t*)opal_pointer_array_get_item(jdatorted->procs, sender->vpid))) { if (NULL == (daemon = (orte_proc_t*)opal_pointer_array_get_item(jdatorted->procs, dname.vpid))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
orted_failed_launch = true; orted_failed_launch = true;
goto CLEANUP; goto CLEANUP;
@ -564,7 +568,7 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s plm:base:orted_report_launch from daemon %s on node %s", "%s plm:base:orted_report_launch from daemon %s on node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(sender), nodename)); ORTE_NAME_PRINT(&dname), nodename));
/* look this node up, if necessary */ /* look this node up, if necessary */
if (!orte_plm_globals.daemon_nodes_assigned_at_launch) { if (!orte_plm_globals.daemon_nodes_assigned_at_launch) {
@ -595,7 +599,7 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s plm:base:orted_report_launch attempting to assign daemon %s to node %s", "%s plm:base:orted_report_launch attempting to assign daemon %s to node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(sender), nodename)); ORTE_NAME_PRINT(&dname), nodename));
for (idx=0; idx < orte_node_pool->size; idx++) { for (idx=0; idx < orte_node_pool->size; idx++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, idx))) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, idx))) {
continue; continue;
@ -636,7 +640,7 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
#if OPAL_HAVE_HWLOC #if OPAL_HAVE_HWLOC
/* store the local resources for that node */ /* store the local resources for that node */
{ if (1 == dname.vpid || orte_hetero_nodes) {
hwloc_topology_t topo, t; hwloc_topology_t topo, t;
int i; int i;
bool found; bool found;
@ -652,7 +656,11 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
orted_failed_launch = true; orted_failed_launch = true;
goto CLEANUP; goto CLEANUP;
} }
if (OPAL_SUCCESS == opal_dss.unpack(buffer, &topo, &idx, OPAL_HWLOC_TOPO)) { if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &topo, &idx, OPAL_HWLOC_TOPO))) {
ORTE_ERROR_LOG(rc);
orted_failed_launch = true;
goto CLEANUP;
}
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s RECEIVED TOPOLOGY FROM NODE %s", "%s RECEIVED TOPOLOGY FROM NODE %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nodename)); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nodename));
@ -686,7 +694,6 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
node->topology = topo; node->topology = topo;
} }
} }
}
#endif #endif
CLEANUP: CLEANUP:
@ -694,11 +701,12 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
"%s plm:base:orted_report_launch %s for daemon %s at contact %s", "%s plm:base:orted_report_launch %s for daemon %s at contact %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
orted_failed_launch ? "failed" : "completed", orted_failed_launch ? "failed" : "completed",
ORTE_NAME_PRINT(sender), ORTE_NAME_PRINT(&dname),
(NULL == daemon) ? "UNKNOWN" : daemon->rml_uri)); (NULL == daemon) ? "UNKNOWN" : daemon->rml_uri));
if (orted_failed_launch) { if (orted_failed_launch) {
ORTE_ACTIVATE_JOB_STATE(jdatorted, ORTE_JOB_STATE_FAILED_TO_START); ORTE_ACTIVATE_JOB_STATE(jdatorted, ORTE_JOB_STATE_FAILED_TO_START);
return;
} else { } else {
jdatorted->num_reported++; jdatorted->num_reported++;
if (jdatorted->num_procs == jdatorted->num_reported) { if (jdatorted->num_procs == jdatorted->num_reported) {
@ -716,9 +724,13 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
} }
} }
} }
idx = 1;
}
if (ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
ORTE_ERROR_LOG(rc);
ORTE_ACTIVATE_JOB_STATE(jdatorted, ORTE_JOB_STATE_FAILED_TO_START);
} else if (NULL != orte_tree_launch_cmd) {
/* if a tree-launch is underway, send the cmd back */ /* if a tree-launch is underway, send the cmd back */
if (NULL != orte_tree_launch_cmd) {
OBJ_RETAIN(orte_tree_launch_cmd); OBJ_RETAIN(orte_tree_launch_cmd);
orte_rml.send_buffer_nb(sender, orte_tree_launch_cmd, orte_rml.send_buffer_nb(sender, orte_tree_launch_cmd,
ORTE_RML_TAG_DAEMON, 0, ORTE_RML_TAG_DAEMON, 0,
@ -770,24 +782,16 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
/* check for debug flags */ /* check for debug flags */
if (orte_debug_flag) { if (orte_debug_flag) {
opal_argv_append(argc, argv, "-mca"); opal_argv_append(argc, argv, "--debug");
opal_argv_append(argc, argv, "orte_debug");
opal_argv_append(argc, argv, "1");
} }
if (orte_debug_daemons_flag) { if (orte_debug_daemons_flag) {
opal_argv_append(argc, argv, "-mca"); opal_argv_append(argc, argv, "--debug-daemons");
opal_argv_append(argc, argv, "orte_debug_daemons");
opal_argv_append(argc, argv, "1");
} }
if (orte_debug_daemons_file_flag) { if (orte_debug_daemons_file_flag) {
opal_argv_append(argc, argv, "-mca"); opal_argv_append(argc, argv, "--debug-daemons-file");
opal_argv_append(argc, argv, "orte_debug_daemons_file");
opal_argv_append(argc, argv, "1");
} }
if (orted_spin_flag) { if (orted_spin_flag) {
opal_argv_append(argc, argv, "-mca"); opal_argv_append(argc, argv, "--spin");
opal_argv_append(argc, argv, "orte_daemon_spin");
opal_argv_append(argc, argv, "1");
} }
#if OPAL_HAVE_HWLOC #if OPAL_HAVE_HWLOC
if (opal_hwloc_report_bindings) { if (opal_hwloc_report_bindings) {
@ -880,8 +884,8 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
opal_argv_append(argc, argv, param); opal_argv_append(argc, argv, param);
free(param); free(param);
/* if given and we have static ports, pass the node list */ /* if given and we have static ports or are using a common port, pass the node list */
if (orte_static_ports && NULL != nodes) { if ((orte_static_ports || orte_use_common_port) && NULL != nodes) {
/* convert the nodes to a regex */ /* convert the nodes to a regex */
if (ORTE_SUCCESS != (rc = orte_regex_create(nodes, &param))) { if (ORTE_SUCCESS != (rc = orte_regex_create(nodes, &param))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
@ -893,6 +897,11 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
free(param); free(param);
} }
/* if we want to use a common port, tell the daemon to do so */
if (orte_use_common_port) {
opal_argv_append(argc, argv, "--use-common-port");
}
/* pass along any cmd line MCA params provided to mpirun, /* pass along any cmd line MCA params provided to mpirun,
* being sure to "purge" any that would cause problems * being sure to "purge" any that would cause problems
* on backend nodes * on backend nodes

Просмотреть файл

@ -61,6 +61,7 @@ BEGIN_C_DECLS
#define ORTE_RML_TAG_WIREUP 8 #define ORTE_RML_TAG_WIREUP 8
#define ORTE_RML_TAG_RML_INFO_UPDATE 9 #define ORTE_RML_TAG_RML_INFO_UPDATE 9
#define ORTE_RML_TAG_ORTED_CALLBACK 10 #define ORTE_RML_TAG_ORTED_CALLBACK 10
#define ORTE_RML_TAG_ROLLUP 11
#define ORTE_RML_TAG_REPORT_REMOTE_LAUNCH 12 #define ORTE_RML_TAG_REPORT_REMOTE_LAUNCH 12
#define ORTE_RML_TAG_CKPT 13 #define ORTE_RML_TAG_CKPT 13

Просмотреть файл

@ -385,7 +385,7 @@ static orte_process_name_t get_route(orte_process_name_t *target)
/* THIS CAME FROM OUR OWN JOB FAMILY... */ /* THIS CAME FROM OUR OWN JOB FAMILY... */
if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, target)) { if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, target)) {
if (!hnp_direct || orte_static_ports) { if (!hnp_direct || orte_static_ports || orte_use_common_port) {
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output, OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
"%s routing to the HNP through my parent %s", "%s routing to the HNP through my parent %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
@ -489,8 +489,8 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
return rc; return rc;
} }
/* if we are using static ports, set my lifeline to point at my parent */ /* if we are using static ports or a common port, set my lifeline to point at my parent */
if (orte_static_ports) { if (orte_static_ports || orte_use_common_port) {
lifeline = ORTE_PROC_MY_PARENT; lifeline = ORTE_PROC_MY_PARENT;
} else { } else {
/* set our lifeline to the HNP - we will abort if that connection is lost */ /* set our lifeline to the HNP - we will abort if that connection is lost */

Просмотреть файл

@ -168,6 +168,10 @@ opal_cmd_line_init_t orte_cmd_line_opts[] = {
NULL, OPAL_CMD_LINE_TYPE_STRING, NULL, OPAL_CMD_LINE_TYPE_STRING,
"URI for the parent if tree launch is enabled."}, "URI for the parent if tree launch is enabled."},
{ "orte", "use", "common_port", '\0', NULL, "use-common-port", 0,
NULL, OPAL_CMD_LINE_TYPE_BOOL,
"Use the same port as the HNP."},
{ NULL, NULL, NULL, '\0', NULL, "set-sid", 0, { NULL, NULL, NULL, '\0', NULL, "set-sid", 0,
&orted_globals.set_sid, OPAL_CMD_LINE_TYPE_BOOL, &orted_globals.set_sid, OPAL_CMD_LINE_TYPE_BOOL,
"Direct the orted to separate from the current session"}, "Direct the orted to separate from the current session"},
@ -685,6 +689,12 @@ int orte_daemon(int argc, char *argv[])
*/ */
buffer = OBJ_NEW(opal_buffer_t); buffer = OBJ_NEW(opal_buffer_t);
/* insert our name for rollup purposes */
if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(buffer);
goto DONE;
}
/* for now, always include our contact info, even if we are using /* for now, always include our contact info, even if we are using
* static ports. Eventually, this will be removed * static ports. Eventually, this will be removed
*/ */
@ -708,9 +718,26 @@ int orte_daemon(int argc, char *argv[])
} }
#endif #endif
/* send to the HNP's callback - this will flow up the routing if (orte_static_ports || orte_use_common_port) {
* tree if static ports are enabled /* use the rollup collective to send our data to the HNP
* so we minimize the HNP bottleneck
*/ */
orte_grpcomm_collective_t *coll;
coll = OBJ_NEW(orte_grpcomm_collective_t);
/* get the list of contributors we need from the routed module */
orte_routed.get_routing_list(ORTE_GRPCOMM_COLL_PEERS, coll);
/* add the collective to our list */
opal_list_append(&orte_grpcomm_base.active_colls, &coll->super);
/* send the buffer to ourselves to start the collective */
if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_NAME, buffer,
ORTE_RML_TAG_ROLLUP, 0,
rml_cbfunc, NULL))) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(buffer);
goto DONE;
}
} else {
/* send directly to the HNP's callback */
if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buffer, if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buffer,
ORTE_RML_TAG_ORTED_CALLBACK, 0, ORTE_RML_TAG_ORTED_CALLBACK, 0,
rml_cbfunc, NULL))) { rml_cbfunc, NULL))) {
@ -719,6 +746,7 @@ int orte_daemon(int argc, char *argv[])
goto DONE; goto DONE;
} }
} }
}
if (orte_debug_daemons_flag) { if (orte_debug_daemons_flag) {
opal_output(0, "%s orted: up and running - waiting for commands!", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); opal_output(0, "%s orted: up and running - waiting for commands!", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));

Просмотреть файл

@ -74,6 +74,7 @@ char *orte_basename = NULL;
bool orte_static_ports = false; bool orte_static_ports = false;
char *orte_oob_static_ports = NULL; char *orte_oob_static_ports = NULL;
bool orte_standalone_operation = false; bool orte_standalone_operation = false;
bool orte_use_common_port = false;
bool orte_keep_fqdn_hostnames = false; bool orte_keep_fqdn_hostnames = false;
bool orte_have_fqdn_allocation = false; bool orte_have_fqdn_allocation = false;

Просмотреть файл

@ -610,6 +610,7 @@ ORTE_DECLSPEC extern char *orte_basename;
ORTE_DECLSPEC extern bool orte_static_ports; ORTE_DECLSPEC extern bool orte_static_ports;
ORTE_DECLSPEC extern char *orte_oob_static_ports; ORTE_DECLSPEC extern char *orte_oob_static_ports;
ORTE_DECLSPEC extern bool orte_standalone_operation; ORTE_DECLSPEC extern bool orte_standalone_operation;
ORTE_DECLSPEC extern bool orte_use_common_port;
ORTE_DECLSPEC extern bool orte_keep_fqdn_hostnames; ORTE_DECLSPEC extern bool orte_keep_fqdn_hostnames;
ORTE_DECLSPEC extern bool orte_have_fqdn_allocation; ORTE_DECLSPEC extern bool orte_have_fqdn_allocation;

Просмотреть файл

@ -547,6 +547,11 @@ int orte_register_params(void)
"Maximum size of virtual machine - used to subdivide allocation", "Maximum size of virtual machine - used to subdivide allocation",
false, false, -1, &orte_max_vm_size); false, false, -1, &orte_max_vm_size);
mca_base_param_reg_int_name("orte", "use_common_port",
"Daemons use same port as HNP",
false, false, (int)false, &value);
orte_use_common_port = OPAL_INT_TO_BOOL(value);
#endif /* ORTE_DISABLE_FULL_SUPPORT */ #endif /* ORTE_DISABLE_FULL_SUPPORT */
return ORTE_SUCCESS; return ORTE_SUCCESS;