1
1

Improve launch performance on clusters that use dedicated nodes by instructing the orteds to use the same port as the HNP, thus allowing them to "rollup" their initial callback via the routed network. This substantially reduces the HNP bottleneck and the number of ports opened by the HNP.

Restore enable-static-ports option by default - the Cray will have to disable it to get around their library issues, but that's just a warning problem as opposed to blocking the build.

This commit was SVN r26606.
Этот коммит содержится в:
Ralph Castain 2012-06-15 10:15:07 +00:00
родитель defaefd59e
Коммит 96c778656a
16 изменённых файлов: 417 добавлений и 209 удалений

Просмотреть файл

@ -137,13 +137,13 @@ AC_DEFINE_UNQUOTED([ORTE_ENABLE_PROGRESS_THREADS],
AC_MSG_CHECKING([if want orte static ports])
AC_ARG_ENABLE([orte-static-ports],
[AC_HELP_STRING([--enable-orte-static-ports],
[Enable orte static ports for tcp oob. (default: disabled)])])
if test "$enable_orte_static_ports" = "yes"; then
AC_MSG_RESULT([yes])
orte_enable_static_ports=1
else
[Enable orte static ports for tcp oob. (default: enabled)])])
if test "$enable_orte_static_ports" = "no"; then
AC_MSG_RESULT([no])
orte_enable_static_ports=0
else
AC_MSG_RESULT([yes])
orte_enable_static_ports=1
fi
AC_DEFINE_UNQUOTED([ORTE_ENABLE_STATIC_PORTS],
[$orte_enable_static_ports],

Просмотреть файл

@ -304,13 +304,18 @@ int orte_ess_base_orted_setup(char **hosts)
goto error;
}
#if ORTE_ENABLE_STATIC_PORTS
/* if we are using static ports, then we need to setup
/* if we are using static ports or a common port, then we need to setup
* the daemon info so the RML can function properly
* without requiring a wireup stage. This must be done
* after we enable_comm as that function determines our
* own port, which we need in order to construct the nidmap
*/
if (orte_static_ports) {
if (orte_static_ports || orte_use_common_port) {
/* define the routing tree so we know the pattern
* if we are trying to setup common or static ports
*/
orte_routed.update_routing_plan();
if (ORTE_SUCCESS != (ret = orte_util_setup_local_nidmap_entries())) {
ORTE_ERROR_LOG(ret);
error = "orte_util_nidmap_init";
@ -364,8 +369,8 @@ int orte_ess_base_orted_setup(char **hosts)
goto error;
}
/* Once the session directory location has been established, set
the opal_output env file location to be in the
proc-specific session directory. */
the opal_output env file location to be in the
proc-specific session directory. */
opal_output_set_output_file_info(orte_process_info.proc_session_dir,
"output-", NULL, NULL);
@ -411,9 +416,9 @@ int orte_ess_base_orted_setup(char **hosts)
/* setup the global job and node arrays */
orte_job_data = OBJ_NEW(opal_pointer_array_t);
if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_job_data,
1,
ORTE_GLOBAL_ARRAY_MAX_SIZE,
1))) {
1,
ORTE_GLOBAL_ARRAY_MAX_SIZE,
1))) {
ORTE_ERROR_LOG(ret);
error = "setup job array";
goto error;
@ -421,18 +426,18 @@ int orte_ess_base_orted_setup(char **hosts)
orte_node_pool = OBJ_NEW(opal_pointer_array_t);
if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_pool,
ORTE_GLOBAL_ARRAY_BLOCK_SIZE,
ORTE_GLOBAL_ARRAY_MAX_SIZE,
ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) {
ORTE_GLOBAL_ARRAY_BLOCK_SIZE,
ORTE_GLOBAL_ARRAY_MAX_SIZE,
ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) {
ORTE_ERROR_LOG(ret);
error = "setup node array";
goto error;
}
orte_node_topologies = OBJ_NEW(opal_pointer_array_t);
if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_topologies,
ORTE_GLOBAL_ARRAY_BLOCK_SIZE,
ORTE_GLOBAL_ARRAY_MAX_SIZE,
ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) {
ORTE_GLOBAL_ARRAY_BLOCK_SIZE,
ORTE_GLOBAL_ARRAY_MAX_SIZE,
ORTE_GLOBAL_ARRAY_BLOCK_SIZE))) {
ORTE_ERROR_LOG(ret);
error = "setup node topologies array";
goto error;
@ -580,7 +585,7 @@ int orte_ess_base_orted_setup(char **hosts)
return ORTE_SUCCESS;
error:
error:
orte_show_help("help-orte-runtime.txt",
"orte_init:startup:internal-failure",
true, error, ORTE_ERROR_NAME(ret), ret);

Просмотреть файл

@ -72,6 +72,7 @@ static int rte_init(void)
{
int ret;
char *error = NULL;
char **hosts = NULL;
/* run the prolog */
if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
@ -86,11 +87,23 @@ static int rte_init(void)
* default procedure
*/
if (ORTE_PROC_IS_DAEMON) {
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(NULL))) {
if (NULL != orte_node_regex) {
/* extract the nodes */
if (ORTE_SUCCESS != (ret =
orte_regex_extract_node_names(orte_node_regex, &hosts)) ||
NULL == hosts) {
error = "orte_regex_extract_node_names";
goto error;
}
}
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) {
ORTE_ERROR_LOG(ret);
error = "orte_ess_base_orted_setup";
goto error;
}
if (NULL != hosts) {
opal_argv_free(hosts);
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -9,7 +9,7 @@
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2011 Los Alamos National Security, LLC.
# Copyright (c) 2011-2012 Los Alamos National Security, LLC.
# All rights reserved.
# $COPYRIGHT$
#
@ -31,5 +31,6 @@ if !ORTE_DISABLE_FULL_SUPPORT
libmca_grpcomm_la_SOURCES += \
base/grpcomm_base_modex.c \
base/grpcomm_base_receive.c \
base/grpcomm_base_xcast.c
base/grpcomm_base_xcast.c \
base/grpcomm_base_rollup.c
endif

Просмотреть файл

@ -74,6 +74,9 @@ ORTE_DECLSPEC orte_grpcomm_coll_id_t orte_grpcomm_base_get_coll_id(void);
ORTE_DECLSPEC void orte_grpcomm_base_pack_collective(opal_buffer_t *relay,
orte_grpcomm_collective_t *coll,
orte_grpcomm_internal_stage_t stg);
ORTE_DECLSPEC void orte_grpcomm_base_rollup_recv(int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata);
/* modex support */
ORTE_DECLSPEC int orte_grpcomm_base_set_proc_attr(const char* project,

Просмотреть файл

@ -93,6 +93,16 @@ int orte_grpcomm_base_comm_start(void)
recv_issued = false;
return rc;
}
if (ORTE_PROC_IS_DAEMON) {
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_ROLLUP,
ORTE_RML_PERSISTENT,
orte_grpcomm_base_rollup_recv, NULL))) {
ORTE_ERROR_LOG(rc);
recv_issued = false;
return rc;
}
}
if (ORTE_PROC_IS_HNP) {
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_COLL_ID_REQ,

127
orte/mca/grpcomm/base/grpcomm_base_rollup.c Обычный файл
Просмотреть файл

@ -0,0 +1,127 @@
/* -*- C -*-
*
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file:
*
*/
/*
* includes
*/
#include "orte_config.h"
#include "opal/dss/dss.h"
#include "orte/util/proc_info.h"
#include "orte/util/error_strings.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ess/ess.h"
#include "orte/mca/odls/base/base.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/routed/routed.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/grpcomm/grpcomm_types.h"
#include "orte/mca/grpcomm/grpcomm.h"
#include "orte/mca/grpcomm/base/base.h"
/* function to cleanup collective once completed */
static void rml_send_callback(int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata)
{
orte_grpcomm_collective_t *coll;
/* remove this collective from our list */
coll = (orte_grpcomm_collective_t*)opal_list_remove_first(&orte_grpcomm_base.active_colls);
/* release it */
OBJ_RELEASE(coll);
/* release our buffer */
OBJ_RELEASE(buffer);
}
void orte_grpcomm_base_rollup_recv(int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata)
{
int ret;
orte_grpcomm_collective_t *coll;
bool done = false;
opal_buffer_t *relay;
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output,
"%s grpcomm:rollup:recv from sender %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(sender)));
/* lookup the collective - can only be one on the list at this time */
coll = (orte_grpcomm_collective_t*)opal_list_get_first(&orte_grpcomm_base.active_colls);
/* flag that we received a bucket */
if (sender->vpid != ORTE_PROC_MY_NAME->vpid) {
coll->num_peer_buckets++;
}
/* transfer the data */
opal_dss.copy_payload(&coll->buffer, buffer);
/* if list is empty, then we can just send our data along */
if (opal_list_is_empty(&coll->targets)) {
done = true;
} else if (coll->num_peer_buckets == opal_list_get_size(&coll->targets)) {
done = true;
} else {
/* check for a wildcard */
orte_namelist_t *nm;
nm = (orte_namelist_t*)opal_list_get_first(&coll->targets);
if (ORTE_VPID_WILDCARD == nm->name.vpid &&
coll->num_peer_buckets == orte_process_info.num_procs) {
done = true;
}
}
if (done) {
/* send the message to my parent */
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
"%s grpcomm:rollup: sending rollup msg to %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(ORTE_PROC_MY_PARENT)));
relay = OBJ_NEW(opal_buffer_t);
opal_dss.copy_payload(relay, &coll->buffer);
/* if my parent is the HNP, send it to the final destination */
if (ORTE_PROC_MY_PARENT->vpid == ORTE_PROC_MY_HNP->vpid) {
if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, relay,
ORTE_RML_TAG_ORTED_CALLBACK, 0,
rml_send_callback, NULL))) {
ORTE_ERROR_LOG(ret);
}
} else {
if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_PARENT, relay,
ORTE_RML_TAG_ROLLUP, 0,
rml_send_callback, NULL))) {
ORTE_ERROR_LOG(ret);
}
}
}
}

Просмотреть файл

@ -70,7 +70,6 @@ typedef int (*orte_grpcomm_base_module_allgather_fn_t)(orte_grpcomm_collective_t
/* barrier function */
typedef int (*orte_grpcomm_base_module_barrier_fn_t)(orte_grpcomm_collective_t *coll);
/** DATA EXCHANGE FUNCTIONS - SEE ompi/runtime/ompi_module_exchange.h FOR A DESCRIPTION
* OF HOW THIS ALL WORKS
*/

Просмотреть файл

@ -651,7 +651,15 @@ mca_oob_tcp_create_listen(int *target_sd, unsigned short *target_port, uint16_t
port in the range. Otherwise, tcp_port_min will be 0, which
means "pick any port" */
if (AF_INET == af_family) {
if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) {
if (ORTE_PROC_IS_DAEMON && orte_use_common_port) {
/* use the same port as the HNP */
char *ptr, *portptr;
portptr = strdup(orte_process_info.my_hnp_uri);
ptr = strrchr(portptr, ':');
ptr++;
opal_argv_append_nosize(&ports, ptr);
free(portptr);
} else if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) {
if (NULL != mca_oob_tcp_component.tcp4_static_ports) {
/* if static ports were provided, the daemon takes the
* first entry in the list
@ -1647,10 +1655,8 @@ int mca_oob_tcp_resolve(mca_oob_tcp_peer_t* peer)
* if we are trying to talk to a process on our own node, try
* looking for the loopback interface before giving up
*/
#if OPAL_WANT_IPV6
goto unlock;
}
#else
if (0 == strcasecmp(host, orte_process_info.nodename) ||
0 == strncasecmp(host, orte_process_info.nodename, strlen(host)) ||
opal_ifislocal(host)) {
@ -1677,7 +1683,6 @@ int mca_oob_tcp_resolve(mca_oob_tcp_peer_t* peer)
haddr = inet_ntoa(*(struct in_addr*)h->h_addr_list[0]);
}
proceed:
#endif
/* we can't know which af_family we are using, so for now, let's
* just look to see which static port family was provided
*/
@ -1738,7 +1743,7 @@ int mca_oob_tcp_resolve(mca_oob_tcp_peer_t* peer)
}
}
unlock:
unlock:
#endif
return rc;
}

Просмотреть файл

@ -518,141 +518,149 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
char *nodename;
orte_node_t *node;
orte_job_t *jdata;
orte_process_name_t dname;
/* get the daemon job, if necessary */
if (NULL == jdatorted) {
jdatorted = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
}
/* unpack its contact info */
/* multiple daemons could be in this buffer, so unpack until we exhaust the data */
idx = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &rml_uri, &idx, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
orted_failed_launch = true;
goto CLEANUP;
}
/* set the contact info into the hash table */
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(rml_uri))) {
ORTE_ERROR_LOG(rc);
orted_failed_launch = true;
goto CLEANUP;
}
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s plm:base:orted_report_launch from daemon %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(sender)));
/* update state and record for this daemon contact info */
if (NULL == (daemon = (orte_proc_t*)opal_pointer_array_get_item(jdatorted->procs, sender->vpid))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
orted_failed_launch = true;
goto CLEANUP;
}
daemon->state = ORTE_PROC_STATE_RUNNING;
daemon->rml_uri = rml_uri;
/* unpack the node name */
idx = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &nodename, &idx, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
orted_failed_launch = true;
goto CLEANUP;
}
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s plm:base:orted_report_launch from daemon %s on node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(sender), nodename));
/* look this node up, if necessary */
if (!orte_plm_globals.daemon_nodes_assigned_at_launch) {
if (!orte_have_fqdn_allocation) {
/* remove any domain info */
if (NULL != (ptr = strchr(nodename, '.'))) {
*ptr = '\0';
ptr = strdup(nodename);
free(nodename);
nodename = ptr;
}
}
if (orte_plm_globals.strip_prefix_from_node_names) {
/* remove all leading characters and zeroes */
ptr = nodename;
while (idx < (int)strlen(nodename) &&
(isalpha(nodename[idx]) || '0' == nodename[idx])) {
idx++;
}
if (idx == (int)strlen(nodename)) {
ptr = strdup(nodename);
} else {
ptr = strdup(&nodename[idx]);
}
free(nodename);
nodename = ptr;
}
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s plm:base:orted_report_launch attempting to assign daemon %s to node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(sender), nodename));
for (idx=0; idx < orte_node_pool->size; idx++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, idx))) {
continue;
}
if (node->location_verified) {
/* already assigned */
continue;
}
if (0 == strcmp(nodename, node->name)) {
/* flag that we verified the location */
node->location_verified = true;
if (node == daemon->node) {
/* it wound up right where it should */
break;
}
/* remove the prior association */
if (NULL != daemon->node) {
OBJ_RELEASE(daemon->node);
}
if (NULL != node->daemon) {
OBJ_RELEASE(node->daemon);
}
/* associate this daemon with the node */
node->daemon = daemon;
OBJ_RETAIN(daemon);
/* associate this node with the daemon */
daemon->node = node;
daemon->nodename = node->name;
OBJ_RETAIN(node);
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s plm:base:orted_report_launch assigning daemon %s to node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&daemon->name), node->name));
break;
}
}
}
#if OPAL_HAVE_HWLOC
/* store the local resources for that node */
{
hwloc_topology_t topo, t;
int i;
bool found;
idx=1;
node = daemon->node;
if (NULL == node) {
/* this shouldn't happen - it indicates an error in the
* prior node matching logic, so report it and error out
*/
orte_show_help("help-plm-base.txt", "daemon-no-assigned-node", true,
ORTE_NAME_PRINT(&daemon->name), nodename);
while (OPAL_SUCCESS == (rc = opal_dss.unpack(buffer, &dname, &idx, ORTE_NAME))) {
/* unpack its contact info */
idx = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &rml_uri, &idx, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
orted_failed_launch = true;
goto CLEANUP;
}
if (OPAL_SUCCESS == opal_dss.unpack(buffer, &topo, &idx, OPAL_HWLOC_TOPO)) {
/* set the contact info into the hash table */
if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(rml_uri))) {
ORTE_ERROR_LOG(rc);
orted_failed_launch = true;
goto CLEANUP;
}
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s plm:base:orted_report_launch from daemon %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&dname)));
/* update state and record for this daemon contact info */
if (NULL == (daemon = (orte_proc_t*)opal_pointer_array_get_item(jdatorted->procs, dname.vpid))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
orted_failed_launch = true;
goto CLEANUP;
}
daemon->state = ORTE_PROC_STATE_RUNNING;
daemon->rml_uri = rml_uri;
/* unpack the node name */
idx = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &nodename, &idx, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
orted_failed_launch = true;
goto CLEANUP;
}
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s plm:base:orted_report_launch from daemon %s on node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&dname), nodename));
/* look this node up, if necessary */
if (!orte_plm_globals.daemon_nodes_assigned_at_launch) {
if (!orte_have_fqdn_allocation) {
/* remove any domain info */
if (NULL != (ptr = strchr(nodename, '.'))) {
*ptr = '\0';
ptr = strdup(nodename);
free(nodename);
nodename = ptr;
}
}
if (orte_plm_globals.strip_prefix_from_node_names) {
/* remove all leading characters and zeroes */
ptr = nodename;
while (idx < (int)strlen(nodename) &&
(isalpha(nodename[idx]) || '0' == nodename[idx])) {
idx++;
}
if (idx == (int)strlen(nodename)) {
ptr = strdup(nodename);
} else {
ptr = strdup(&nodename[idx]);
}
free(nodename);
nodename = ptr;
}
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s plm:base:orted_report_launch attempting to assign daemon %s to node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&dname), nodename));
for (idx=0; idx < orte_node_pool->size; idx++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, idx))) {
continue;
}
if (node->location_verified) {
/* already assigned */
continue;
}
if (0 == strcmp(nodename, node->name)) {
/* flag that we verified the location */
node->location_verified = true;
if (node == daemon->node) {
/* it wound up right where it should */
break;
}
/* remove the prior association */
if (NULL != daemon->node) {
OBJ_RELEASE(daemon->node);
}
if (NULL != node->daemon) {
OBJ_RELEASE(node->daemon);
}
/* associate this daemon with the node */
node->daemon = daemon;
OBJ_RETAIN(daemon);
/* associate this node with the daemon */
daemon->node = node;
daemon->nodename = node->name;
OBJ_RETAIN(node);
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s plm:base:orted_report_launch assigning daemon %s to node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&daemon->name), node->name));
break;
}
}
}
#if OPAL_HAVE_HWLOC
/* store the local resources for that node */
if (1 == dname.vpid || orte_hetero_nodes) {
hwloc_topology_t topo, t;
int i;
bool found;
idx=1;
node = daemon->node;
if (NULL == node) {
/* this shouldn't happen - it indicates an error in the
* prior node matching logic, so report it and error out
*/
orte_show_help("help-plm-base.txt", "daemon-no-assigned-node", true,
ORTE_NAME_PRINT(&daemon->name), nodename);
orted_failed_launch = true;
goto CLEANUP;
}
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &topo, &idx, OPAL_HWLOC_TOPO))) {
ORTE_ERROR_LOG(rc);
orted_failed_launch = true;
goto CLEANUP;
}
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s RECEIVED TOPOLOGY FROM NODE %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nodename));
@ -681,44 +689,48 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s NEW TOPOLOGY - ADDING",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
opal_pointer_array_add(orte_node_topologies, topo);
node->topology = topo;
}
}
}
#endif
CLEANUP:
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s plm:base:orted_report_launch %s for daemon %s at contact %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
orted_failed_launch ? "failed" : "completed",
ORTE_NAME_PRINT(sender),
(NULL == daemon) ? "UNKNOWN" : daemon->rml_uri));
if (orted_failed_launch) {
ORTE_ACTIVATE_JOB_STATE(jdatorted, ORTE_JOB_STATE_FAILED_TO_START);
} else {
jdatorted->num_reported++;
if (jdatorted->num_procs == jdatorted->num_reported) {
jdatorted->state = ORTE_JOB_STATE_DAEMONS_REPORTED;
/* activate the daemons_reported state for all jobs
* whose daemons were launched
*/
for (idx=1; idx < orte_job_data->size; idx++) {
if (NULL == (jdata = (orte_job_t*)opal_pointer_array_get_item(orte_job_data, idx))) {
continue;
}
if (ORTE_JOB_STATE_DAEMONS_LAUNCHED == jdata->state) {
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
CLEANUP:
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s plm:base:orted_report_launch %s for daemon %s at contact %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
orted_failed_launch ? "failed" : "completed",
ORTE_NAME_PRINT(&dname),
(NULL == daemon) ? "UNKNOWN" : daemon->rml_uri));
if (orted_failed_launch) {
ORTE_ACTIVATE_JOB_STATE(jdatorted, ORTE_JOB_STATE_FAILED_TO_START);
return;
} else {
jdatorted->num_reported++;
if (jdatorted->num_procs == jdatorted->num_reported) {
jdatorted->state = ORTE_JOB_STATE_DAEMONS_REPORTED;
/* activate the daemons_reported state for all jobs
* whose daemons were launched
*/
for (idx=1; idx < orte_job_data->size; idx++) {
if (NULL == (jdata = (orte_job_t*)opal_pointer_array_get_item(orte_job_data, idx))) {
continue;
}
if (ORTE_JOB_STATE_DAEMONS_LAUNCHED == jdata->state) {
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
}
}
}
}
idx = 1;
}
/* if a tree-launch is underway, send the cmd back */
if (NULL != orte_tree_launch_cmd) {
if (ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
ORTE_ERROR_LOG(rc);
ORTE_ACTIVATE_JOB_STATE(jdatorted, ORTE_JOB_STATE_FAILED_TO_START);
} else if (NULL != orte_tree_launch_cmd) {
/* if a tree-launch is underway, send the cmd back */
OBJ_RETAIN(orte_tree_launch_cmd);
orte_rml.send_buffer_nb(sender, orte_tree_launch_cmd,
ORTE_RML_TAG_DAEMON, 0,
@ -770,24 +782,16 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
/* check for debug flags */
if (orte_debug_flag) {
opal_argv_append(argc, argv, "-mca");
opal_argv_append(argc, argv, "orte_debug");
opal_argv_append(argc, argv, "1");
opal_argv_append(argc, argv, "--debug");
}
if (orte_debug_daemons_flag) {
opal_argv_append(argc, argv, "-mca");
opal_argv_append(argc, argv, "orte_debug_daemons");
opal_argv_append(argc, argv, "1");
opal_argv_append(argc, argv, "--debug-daemons");
}
if (orte_debug_daemons_file_flag) {
opal_argv_append(argc, argv, "-mca");
opal_argv_append(argc, argv, "orte_debug_daemons_file");
opal_argv_append(argc, argv, "1");
opal_argv_append(argc, argv, "--debug-daemons-file");
}
if (orted_spin_flag) {
opal_argv_append(argc, argv, "-mca");
opal_argv_append(argc, argv, "orte_daemon_spin");
opal_argv_append(argc, argv, "1");
opal_argv_append(argc, argv, "--spin");
}
#if OPAL_HAVE_HWLOC
if (opal_hwloc_report_bindings) {
@ -880,8 +884,8 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
opal_argv_append(argc, argv, param);
free(param);
/* if given and we have static ports, pass the node list */
if (orte_static_ports && NULL != nodes) {
/* if given and we have static ports or are using a common port, pass the node list */
if ((orte_static_ports || orte_use_common_port) && NULL != nodes) {
/* convert the nodes to a regex */
if (ORTE_SUCCESS != (rc = orte_regex_create(nodes, &param))) {
ORTE_ERROR_LOG(rc);
@ -893,6 +897,11 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
free(param);
}
/* if we want to use a common port, tell the daemon to do so */
if (orte_use_common_port) {
opal_argv_append(argc, argv, "--use-common-port");
}
/* pass along any cmd line MCA params provided to mpirun,
* being sure to "purge" any that would cause problems
* on backend nodes

Просмотреть файл

@ -61,6 +61,7 @@ BEGIN_C_DECLS
#define ORTE_RML_TAG_WIREUP 8
#define ORTE_RML_TAG_RML_INFO_UPDATE 9
#define ORTE_RML_TAG_ORTED_CALLBACK 10
#define ORTE_RML_TAG_ROLLUP 11
#define ORTE_RML_TAG_REPORT_REMOTE_LAUNCH 12
#define ORTE_RML_TAG_CKPT 13

Просмотреть файл

@ -385,7 +385,7 @@ static orte_process_name_t get_route(orte_process_name_t *target)
/* THIS CAME FROM OUR OWN JOB FAMILY... */
if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, target)) {
if (!hnp_direct || orte_static_ports) {
if (!hnp_direct || orte_static_ports || orte_use_common_port) {
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
"%s routing to the HNP through my parent %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
@ -489,8 +489,8 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
return rc;
}
/* if we are using static ports, set my lifeline to point at my parent */
if (orte_static_ports) {
/* if we are using static ports or a common port, set my lifeline to point at my parent */
if (orte_static_ports || orte_use_common_port) {
lifeline = ORTE_PROC_MY_PARENT;
} else {
/* set our lifeline to the HNP - we will abort if that connection is lost */

Просмотреть файл

@ -168,6 +168,10 @@ opal_cmd_line_init_t orte_cmd_line_opts[] = {
NULL, OPAL_CMD_LINE_TYPE_STRING,
"URI for the parent if tree launch is enabled."},
{ "orte", "use", "common_port", '\0', NULL, "use-common-port", 0,
NULL, OPAL_CMD_LINE_TYPE_BOOL,
"Use the same port as the HNP."},
{ NULL, NULL, NULL, '\0', NULL, "set-sid", 0,
&orted_globals.set_sid, OPAL_CMD_LINE_TYPE_BOOL,
"Direct the orted to separate from the current session"},
@ -685,6 +689,12 @@ int orte_daemon(int argc, char *argv[])
*/
buffer = OBJ_NEW(opal_buffer_t);
/* insert our name for rollup purposes */
if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(buffer);
goto DONE;
}
/* for now, always include our contact info, even if we are using
* static ports. Eventually, this will be removed
*/
@ -708,16 +718,34 @@ int orte_daemon(int argc, char *argv[])
}
#endif
/* send to the HNP's callback - this will flow up the routing
* tree if static ports are enabled
*/
if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buffer,
ORTE_RML_TAG_ORTED_CALLBACK, 0,
rml_cbfunc, NULL))) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(buffer);
goto DONE;
}
if (orte_static_ports || orte_use_common_port) {
/* use the rollup collective to send our data to the HNP
* so we minimize the HNP bottleneck
*/
orte_grpcomm_collective_t *coll;
coll = OBJ_NEW(orte_grpcomm_collective_t);
/* get the list of contributors we need from the routed module */
orte_routed.get_routing_list(ORTE_GRPCOMM_COLL_PEERS, coll);
/* add the collective to our list */
opal_list_append(&orte_grpcomm_base.active_colls, &coll->super);
/* send the buffer to ourselves to start the collective */
if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_NAME, buffer,
ORTE_RML_TAG_ROLLUP, 0,
rml_cbfunc, NULL))) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(buffer);
goto DONE;
}
} else {
/* send directly to the HNP's callback */
if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buffer,
ORTE_RML_TAG_ORTED_CALLBACK, 0,
rml_cbfunc, NULL))) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(buffer);
goto DONE;
}
}
}
if (orte_debug_daemons_flag) {

Просмотреть файл

@ -74,6 +74,7 @@ char *orte_basename = NULL;
bool orte_static_ports = false;
char *orte_oob_static_ports = NULL;
bool orte_standalone_operation = false;
bool orte_use_common_port = false;
bool orte_keep_fqdn_hostnames = false;
bool orte_have_fqdn_allocation = false;

Просмотреть файл

@ -610,6 +610,7 @@ ORTE_DECLSPEC extern char *orte_basename;
ORTE_DECLSPEC extern bool orte_static_ports;
ORTE_DECLSPEC extern char *orte_oob_static_ports;
ORTE_DECLSPEC extern bool orte_standalone_operation;
ORTE_DECLSPEC extern bool orte_use_common_port;
ORTE_DECLSPEC extern bool orte_keep_fqdn_hostnames;
ORTE_DECLSPEC extern bool orte_have_fqdn_allocation;

Просмотреть файл

@ -547,6 +547,11 @@ int orte_register_params(void)
"Maximum size of virtual machine - used to subdivide allocation",
false, false, -1, &orte_max_vm_size);
mca_base_param_reg_int_name("orte", "use_common_port",
"Daemons use same port as HNP",
false, false, (int)false, &value);
orte_use_common_port = OPAL_INT_TO_BOOL(value);
#endif /* ORTE_DISABLE_FULL_SUPPORT */
return ORTE_SUCCESS;