1
1

Add the btl_tcp_if_seq MCA parameter. From the help string:

If specified, a comma-delimited list of TCP interfaces.  Interfaces
  will be assigned, one to each MPI process, in a round-robin fashion
  on each server.  For example, if the list is "eth0,eth1" and four
  MPI processes are run on a single server, then local ranks 0 and 2
  will use eth0 and local ranks 1 and 3 will use eth1.

This feature is only useful for environments with virtual ethernet
interfaces on the same network.  For example, if eth0 and eth1 are
virtual interfaces to the same NIC on the same subnet, and if the NIC
provides different hardware resources to eth0 and eth1 (not just
different kernel resources), some HOL blocking and congestion issues
can be eased in a modest fashion.

This commit was SVN r24181.
Этот коммит содержится в:
Jeff Squyres 2010-12-16 00:54:32 +00:00
родитель 741ba6518b
Коммит b113b1a382
3 изменённых файлов: 82 добавлений и 11 удалений

5
NEWS
Просмотреть файл

@ -62,6 +62,11 @@ Trunk (not on release branches yet)
OPAL levels - intended for use when configuring without MPI support
- Modified paffinity system to provide warning when bindings result in
being "bound to all", which is equivalent to "not bound"
- Added btl_tcp_if_seq MCA parameter to select a different ethernet
interface for each MPI process on a node. This parameter is only
useful when used with virtual ethernet interfaces on a single
network card (e.g., when using virtual interfaces give dedicated
hardware resources on the NIC to each process).
1.5.1

Просмотреть файл

@ -1,4 +1,3 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -10,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -92,6 +92,10 @@ struct mca_btl_tcp_component_t {
/* Do we want to use TCP_NODELAY? */
int tcp_use_nodelay;
/* If btl_tcp_if_seq was specified, this is the one interface
(name) that we're supposed to use. */
char *tcp_if_seq;
};
typedef struct mca_btl_tcp_component_t mca_btl_tcp_component_t;

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2009 Oak Ridge National Laboratory
* $COPYRIGHT$
@ -48,19 +48,21 @@
#include <ctype.h>
#include <limits.h>
#include "ompi/constants.h"
#include "opal/mca/event/event.h"
#include "opal/util/if.h"
#include "opal/util/output.h"
#include "opal/util/argv.h"
#include "opal/util/net.h"
#include "opal/util/opal_sos.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/types.h"
#include "orte/util/show_help.h"
#include "orte/mca/ess/ess.h"
#include "ompi/constants.h"
#include "ompi/mca/btl/btl.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/mca/btl/base/base.h"
#include "ompi/runtime/ompi_module_exchange.h"
#include "ompi/mca/mpool/base/base.h"
#include "ompi/mca/btl/base/btl_base_error.h"
@ -69,7 +71,6 @@
#include "btl_tcp_proc.h"
#include "btl_tcp_frag.h"
#include "btl_tcp_endpoint.h"
#include "ompi/mca/btl/base/base.h"
mca_btl_tcp_component_t mca_btl_tcp_component = {
@ -281,6 +282,53 @@ int mca_btl_tcp_component_open(void)
mca_btl_tcp_component.tcp_disable_family =
mca_btl_tcp_param_register_int ("disable_family", NULL, 0);
/* Register a list of interfaces to use in sequence */
message = mca_btl_tcp_param_register_string("if_seq",
"If specified, a comma-delimited list of TCP interfaces. Interfaces will be assigned, one to each MPI process, in a round-robin fashion on each server. For example, if the list is \"eth0,eth1\" and four MPI processes are run on a single server, then local ranks 0 and 2 will use eth0 and local ranks 1 and 3 will use eth1.", NULL);
mca_btl_tcp_component.tcp_if_seq = NULL;
if (NULL != message && '\0' != *message) {
char **argv = opal_argv_split(message, ',');
if (NULL != argv && '\0' != *(argv[0])) {
int if_index, rc, count;
orte_node_rank_t node_rank;
char name[256];
node_rank = orte_ess.get_node_rank(ORTE_PROC_MY_NAME);
/* Now that we've got that local rank, take the
corresponding entry from the tcp_if_seq list (wrapping
if necessary) */
count = opal_argv_count(argv);
mca_btl_tcp_component.tcp_if_seq =
strdup(argv[node_rank % count]);
opal_argv_free(argv);
/* Double check that the selected interface actually exists */
for (if_index = opal_ifbegin(); if_index >= 0;
if_index = opal_ifnext(if_index)){
if (OPAL_SUCCESS !=
(rc = opal_ifindextoname(if_index, name, sizeof(name)))) {
return rc;
}
if (0 == strcmp(name, mca_btl_tcp_component.tcp_if_seq)) {
break;
}
}
if (if_index < 0) {
orte_show_help("help-mpi-btl-tcp.txt",
"invalid if_inexclude",
true, "if_seq",
orte_process_info.nodename,
mca_btl_tcp_component.tcp_if_seq,
"Interface does not exist");
return OMPI_ERR_BAD_PARAM;
}
BTL_VERBOSE(("Node rank %d using TCP interface %s",
node_rank, mca_btl_tcp_component.tcp_if_seq));
}
}
return OMPI_SUCCESS;
}
@ -302,6 +350,9 @@ int mca_btl_tcp_component_close(void)
free(mca_btl_tcp_component.tcp_if_exclude);
mca_btl_tcp_component.tcp_if_exclude = NULL;
}
if (NULL != mca_btl_tcp_component.tcp_if_seq) {
free(mca_btl_tcp_component.tcp_if_seq);
}
if (NULL != mca_btl_tcp_component.tcp_btls)
free(mca_btl_tcp_component.tcp_btls);
@ -319,7 +370,6 @@ int mca_btl_tcp_component_close(void)
}
#endif
/* cleanup any pending events */
OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock);
for(item = opal_list_get_first(&mca_btl_tcp_component.tcp_events);
@ -546,14 +596,26 @@ static int mca_btl_tcp_component_create_instances(void)
for(if_index = opal_ifbegin(); if_index >= 0; if_index = opal_ifnext(if_index)){
int index = opal_ifindextokindex (if_index);
if (index > 0) {
bool already_seen = false;
for (j=0; (false == already_seen) && (j < kif_count); j++) {
bool want_this_if = true;
/* Have we seen this if already? */
for (j = 0; want_this_if && (j < kif_count); j++) {
if (kindexes[j] == index) {
already_seen = true;
want_this_if = false;
}
}
if (false == already_seen) {
/* If we have an if_seq list, see if this is the one
interface that we're supposed to have */
if (NULL != mca_btl_tcp_component.tcp_if_seq) {
char name[256];
opal_ifindextoname(if_index, name, sizeof(name));
if (0 != strcmp(mca_btl_tcp_component.tcp_if_seq, name)) {
want_this_if = false;
}
}
if (want_this_if) {
kindexes[kif_count] = index;
kif_count++;
}