1
1

fix for #1130 - adds support for multi-rail configurations

This commit was SVN r17152.
Этот коммит содержится в:
Donald Kerr 2008-01-17 17:30:50 +00:00
родитель 601fb4389d
Коммит 5f884b1ca4
9 изменённых файлов: 304 добавлений и 50 удалений

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2006 Sandia National Laboratories. All rights
* reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
@ -48,6 +48,7 @@ static int udapl_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg);
static int mca_btl_udapl_set_peer_parameters(
struct mca_btl_udapl_module_t* udapl_btl,
size_t nprocs);
static int mca_btl_udapl_assign_netmask(mca_btl_udapl_module_t* udapl_btl);
mca_btl_udapl_module_t mca_btl_udapl_module = {
{
@ -189,6 +190,9 @@ mca_btl_udapl_init(DAT_NAME_PTR ia_name, mca_btl_udapl_module_t* btl)
memcpy(&btl->udapl_addr.addr, (btl->udapl_ia_attr).ia_address_ptr,
sizeof(DAT_SOCK_ADDR));
/* determine netmask */
mca_btl_udapl_assign_netmask(btl);
/* check evd qlen against adapter max */
if (btl->udapl_dto_evd_qlen > (btl->udapl_ia_attr).max_evd_qlen) {
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, ("help-mpi-btl-udapl.txt",
@ -415,6 +419,14 @@ int mca_btl_udapl_finalize(struct mca_btl_base_module_t* base_btl)
OBJ_DESTRUCT(&udapl_btl->udapl_frag_control);
OBJ_DESTRUCT(&udapl_btl->udapl_eager_rdma_lock);
/* destroy mpool */
if (OMPI_SUCCESS !=
mca_mpool_base_module_destroy(udapl_btl->super.btl_mpool)) {
BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_INFORM,
("WARNING: Failed to release mpool"));
return OMPI_ERROR;
}
free(udapl_btl);
return OMPI_SUCCESS;
}
@ -635,6 +647,97 @@ static int mca_btl_udapl_set_peer_parameters(
return rc;
}
/*
* Find and assign system netmask for the address of the uDAPL BTL
* module, but only if udapl_if_mask has not been set by the "--mca
* btl_udapl_if_mask" parameter. This routine will either find
* the system netmask or set the value to 0.
*
* @param udapl_btl (IN) BTL module
*
* @return OMPI_SUCCESS or OMPI_ERROR
*/
static int mca_btl_udapl_assign_netmask(mca_btl_udapl_module_t* udapl_btl)
{
struct sockaddr *saddr;
struct sockaddr_in *btl_addr;
char btl_addr_string[INET_ADDRSTRLEN];
char btl_ifname[INET_ADDRSTRLEN];
/* Setting if_mask to 0 informs future steps to assume all
* addresses are reachable.
*/
udapl_btl->udapl_if_mask = 0;
if (mca_btl_udapl_component.udapl_compare_subnet) {
/* go get system netmask value */
/* use generic address to find address family */
saddr = (struct sockaddr *)&(udapl_btl->udapl_addr.addr);
if (saddr->sa_family == AF_INET) {
btl_addr = (struct sockaddr_in *)saddr;
/*
* Retrieve the netmask of the udapl btl address. To
* accomplish this requires 4 steps and the use of an opal
* utility. This same utility is used by the tcp oob.
* Steps:
* 1. Get string value of known udapl btl module address.
* 2. Use string value to find the interface name of address.
* 3. Use interface name to find its index.
* 4. From the index get the netmask.
*/
/* retrieve string value of udapl btl address */
inet_ntop(AF_INET, (void *) &btl_addr->sin_addr,
btl_addr_string, INET_ADDRSTRLEN);
/* use address string to retrieve associated interface name */
if (OPAL_SUCCESS !=
opal_ifaddrtoname(btl_addr_string,
btl_ifname, INET_ADDRSTRLEN)) {
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
("help-mpi-btl-udapl.txt", "interface not found",
true, orte_system_info.nodename, btl_addr_string));
return OMPI_ERROR;
}
/* use interface name to retrieve index; then
* use index to retrieve udapl btl address netmask
*/
if (OPAL_SUCCESS !=
opal_ifindextomask(opal_ifnametoindex(btl_ifname),
&(udapl_btl->udapl_if_mask), sizeof(udapl_btl->udapl_if_mask))) {
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
("help-mpi-btl-udapl.txt", "netmask not found",
true, orte_system_info.nodename, btl_addr_string));
return OMPI_ERROR;
}
/* report if_mask used by address */
BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_INFORM,
("uDAPL BTL address %s : if_mask = %d",
btl_addr_string, udapl_btl->udapl_if_mask));
} else {
/* current uDAPL BTL does not support IPv6 */
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
("help-mpi-btl-udapl.txt", "IPv4 only",
true, orte_system_info.nodename));
return OMPI_ERROR;
}
}
return OMPI_SUCCESS;
}
/*
*
*/
@ -662,12 +765,6 @@ int mca_btl_udapl_add_procs(
continue;
}
/*
* Check to make sure that the peer has at least as many interface
* addresses exported as we are trying to use. If not, then
* don't bind this BTL instance to the proc.
*/
OPAL_THREAD_LOCK(&udapl_proc->proc_lock);
/* The btl_proc datastructure is shared by all uDAPL BTL

Просмотреть файл

@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
@ -88,6 +88,7 @@ struct mca_btl_udapl_component_t {
opal_list_t udapl_procs; /**< list of udapl proc structures */
opal_mutex_t udapl_lock; /**< lock for accessing module state */
char* udapl_mpool_name; /**< name of memory pool */
int32_t udapl_compare_subnet;/**< whether to compare with netmask or not */
char *if_include;
char **if_include_list;
char *if_exclude;
@ -149,6 +150,7 @@ struct mca_btl_udapl_module_t {
int udapl_max_recv_dtos; /**< maximum number of outstanding consumer
submitted recv operations, see section
6.6.6 of uDAPL Spec */
uint32_t udapl_if_mask; /**< netmask value btl module */
};
typedef struct mca_btl_udapl_module_t mca_btl_udapl_module_t;
extern mca_btl_udapl_module_t mca_btl_udapl_module;

Просмотреть файл

@ -261,6 +261,8 @@ mca_btl_udapl_modex_send(void)
return OMPI_ERR_OUT_OF_RESOURCE;
}
memset(addrs, 0, size);
for (i = 0; i < mca_btl_udapl_component.udapl_num_btls; i++) {
mca_btl_udapl_module_t* btl = mca_btl_udapl_component.udapl_btls[i];
addrs[i] = btl->udapl_addr;

Просмотреть файл

@ -254,7 +254,6 @@ int mca_btl_udapl_endpoint_send(mca_btl_base_endpoint_t* endpoint,
} else {
assert(frag->size ==
mca_btl_udapl_component.udapl_max_frag_size);
OPAL_THREAD_ADD32(&endpoint->endpoint_max_sends, -1);
opal_list_append(&endpoint->endpoint_max_frags,
(opal_list_item_t*)frag);
}
@ -540,9 +539,11 @@ void mca_btl_udapl_endpoint_recv(int status, orte_process_name_t* endpoint,
for(i = 0; i < proc->proc_endpoint_count; i++) {
ep = proc->proc_endpoints[i];
/* Does this endpoint match? */
/* Does this endpoint match? Only compare the address
* portion of mca_btl_udapl_addr_t.
*/
if(!memcmp(&addr, &ep->endpoint_addr,
sizeof(mca_btl_udapl_addr_t))) {
(sizeof(DAT_CONN_QUAL) + sizeof(DAT_SOCK_ADDR)))) {
OPAL_THREAD_UNLOCK(&mca_btl_udapl_component.udapl_lock);
mca_btl_udapl_endpoint_connect(ep);
return;
@ -805,11 +806,10 @@ static int mca_btl_udapl_endpoint_finish_max(mca_btl_udapl_endpoint_t* endpoint)
opal_list_remove_first(&endpoint->endpoint_max_frags))) {
cookie.as_ptr = frag;
assert(frag->triplet.virtual_address == (DAT_VADDR)frag->ftr);
assert(frag->triplet.segment_length ==
frag->segment.seg_len + sizeof(mca_btl_udapl_footer_t));
assert(frag->size ==
mca_btl_udapl_component.udapl_eager_frag_size);
mca_btl_udapl_component.udapl_max_frag_size);
rc = dat_ep_post_send(endpoint->endpoint_max, 1,
&frag->triplet, cookie, DAT_COMPLETION_DEFAULT_FLAG);
@ -947,10 +947,14 @@ static void mca_btl_udapl_endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
OBJ_DESTRUCT(&endpoint->endpoint_lock);
/* release eager rdma resources */
udapl_btl->super.btl_mpool->mpool_free(udapl_btl->super.btl_mpool,
NULL,
reg);
free(endpoint->endpoint_eager_rdma_local.base.pval);
if (NULL != reg) {
udapl_btl->super.btl_mpool->mpool_free(udapl_btl->super.btl_mpool,
NULL, reg);
}
if (NULL != endpoint->endpoint_eager_rdma_local.base.pval) {
free(endpoint->endpoint_eager_rdma_local.base.pval);
}
}

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2006 Sandia National Laboratories. All rights
* reserved.
* Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2006-2008 Sun Microsystems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
@ -46,6 +46,7 @@ extern "C" {
struct mca_btl_udapl_addr_t {
DAT_CONN_QUAL port;
DAT_SOCK_ADDR addr;
bool inuse;
};
typedef struct mca_btl_udapl_addr_t mca_btl_udapl_addr_t;

Просмотреть файл

@ -10,7 +10,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -242,6 +242,15 @@ int mca_btl_udapl_register_mca_params(void)
&(mca_btl_udapl_component.udapl_verbosity),
REGINT_NEG_ONE_OK), tmp_rc, rc);
CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("compare_subnet",
"By default uDAPL BTL will compare subnets using netmask to "
"determine if an interface is reachable. Setting this parameter to "
"0 will essentially turn this comparison off and the uDAPL BTL will "
"assume all uDAPL interfaces are reachable (0 or 1, default==1).",
1,
&(mca_btl_udapl_component.udapl_compare_subnet),
REGINT_GE_ZERO), tmp_rc, rc);
/* register uDAPL module parameters */
CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("async_evd_qlen",
"The asynchronous event dispatcher queue length.",

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2006 Sandia National Laboratories. All rights
* reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -22,8 +22,9 @@
#include "ompi_config.h"
#include "opal/class/opal_hash_table.h"
#include "opal/util/show_help.h"
#include "ompi/runtime/ompi_module_exchange.h"
#include "opal/util/net.h"
#include "btl_udapl.h"
#include "btl_udapl_endpoint.h"
#include "btl_udapl_proc.h"
@ -160,6 +161,114 @@ mca_btl_udapl_proc_t* mca_btl_udapl_proc_create(ompi_proc_t* ompi_proc)
}
/*
* Find an address on the peer_process which matches stated criteria
* to the udapl btl module address information. Return in peer_addr_idx
* the index to the peer_process address that matches the btl module
* address. Where match criteria is:
* - the address in not already in use
* - compare addresses using netmask, the netmask value can be modified with
* "--mca btl_udapl_if_mask"
*
* Note: since this is called from mca_btl_udapl_proc_insert() it
* is assumed that the process lock is locked when entered.
*
* @param udapl_btl (IN) BTL module
* @param peer_process (IN) BTL peer process
* @param peer_addr_idx(IN/OUT) Index of address on peer_process
* which matches the udapl_btl address data.
* On success should be >= 0.
* @return OMPI_SUCCESS or error status on failure
*/
static int mca_btl_udapl_proc_address_match(
mca_btl_udapl_module_t* udapl_btl,
mca_btl_udapl_proc_t* peer_proc,
int* peer_addr_idx)
{
int i;
struct sockaddr *saddr;
struct sockaddr_in *btl_addr;
struct sockaddr_in *peer_addr;
char btl_addr_string[INET_ADDRSTRLEN];
char peer_addr_string[INET_ADDRSTRLEN];
*peer_addr_idx = MCA_BTL_UDAPL_INVALID_PEER_ADDR_IDX;
/* use generic address to find address family */
saddr = (struct sockaddr *)&(udapl_btl->udapl_addr.addr);
if (saddr->sa_family == AF_INET) {
btl_addr = (struct sockaddr_in *)saddr;
/* Loop thru peer process addresses looking for match.
* Match criteria:
* - address should not be "inuse"
* - both udapl btl module and peer address should be on
* the same subnet (compare with if_mask value)
*/
for(i = 0; i < (int) peer_proc->proc_addr_count; i++) {
peer_addr =
(struct sockaddr_in *)&(peer_proc->proc_addrs[i].addr);
if (VERBOSE_INFORM <=
mca_btl_udapl_component.udapl_verbosity) {
/* retrieve udapl btl and peer address string for reporting */
inet_ntop(AF_INET, (void *) &btl_addr->sin_addr,
btl_addr_string, INET_ADDRSTRLEN);
inet_ntop(AF_INET, (void *) &peer_addr->sin_addr,
peer_addr_string, INET_ADDRSTRLEN);
}
if ((false == peer_proc->proc_addrs[i].inuse) &&
(opal_net_samenetwork((struct sockaddr *)btl_addr,
(struct sockaddr *)peer_addr, udapl_btl->udapl_if_mask))) {
/* capture index of remote address where match found */
*peer_addr_idx = i;
/* mark this address as now being used */
peer_proc->proc_addrs[i].inuse = true;
/* report what address was found to match */
BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_INFORM,
("uDAPL BTL module(%s) matched %s",
btl_addr_string, peer_addr_string));
break;
} else {
/* peer address already used by another udapl btl
* module or netmask check not successful so skip
*/
BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_INFORM,
("uDAPL BTL module(%s) either skipped because it "
"is already in use or match criteria not successful "
"for peer address %s",
btl_addr_string, peer_addr_string));
}
}
} else {
/* current uDAPL BTL only supports IPv4 */
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
("help-mpi-btl-udapl.txt", "IPv4 only",
true, orte_system_info.nodename));
return OMPI_ERROR;
}
if (MCA_BTL_UDAPL_INVALID_PEER_ADDR_IDX == *peer_addr_idx) {
BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP,
("help-mpi-btl-udapl.txt", "no network match",
true, btl_addr_string, orte_system_info.nodename,
peer_proc->proc_ompi->proc_hostname));
return OMPI_ERR_OUT_OF_RESOURCE;
}
return OMPI_SUCCESS;
}
/*
* Note that this routine must be called with the lock on the process
* already held. Insert a btl instance into the proc array and assign
@ -169,13 +278,32 @@ int mca_btl_udapl_proc_insert(
mca_btl_udapl_proc_t* udapl_proc,
mca_btl_base_endpoint_t* udapl_endpoint)
{
/* insert into endpoint array */
if(udapl_proc->proc_endpoint_count > udapl_proc->proc_addr_count)
int peer_address_idx;
mca_btl_udapl_module_t* udapl_btl = udapl_endpoint->endpoint_btl;
/* Check so as not to create more endpoints than addresses.
* Example: If one node has 3 btl modules and another only has 2,
* this check prevents the node with 3 btl modules from
* overloading the other, i.e. only 2 possible connections will
* be possible.
*/
if (udapl_proc->proc_endpoint_count > udapl_proc->proc_addr_count)
return OMPI_ERR_OUT_OF_RESOURCE;
/* Find an endpoint on the udapl process of interest that matches
* the endpoint information of the current udapl btl module
*/
if (OMPI_SUCCESS !=
mca_btl_udapl_proc_address_match(udapl_btl, udapl_proc,
&peer_address_idx)) {
/* no address on peer proc met criteria */
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* insert into endpoint array */
udapl_endpoint->endpoint_proc = udapl_proc;
udapl_endpoint->endpoint_addr =
udapl_proc->proc_addrs[udapl_proc->proc_endpoint_count];
udapl_proc->proc_addrs[peer_address_idx];
udapl_proc->proc_endpoints[udapl_proc->proc_endpoint_count] = udapl_endpoint;
udapl_proc->proc_endpoint_count++;

Просмотреть файл

@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -63,6 +64,8 @@ struct mca_btl_udapl_proc_t {
typedef struct mca_btl_udapl_proc_t mca_btl_udapl_proc_t;
OBJ_CLASS_DECLARATION(mca_btl_udapl_proc_t);
#define MCA_BTL_UDAPL_INVALID_PEER_ADDR_IDX 4294967295
mca_btl_udapl_proc_t* mca_btl_udapl_proc_create(ompi_proc_t* ompi_proc);
int mca_btl_udapl_proc_insert(mca_btl_udapl_proc_t*, mca_btl_base_endpoint_t*);

Просмотреть файл

@ -10,7 +10,7 @@
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2006 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
# Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved.
#
# $COPYRIGHT$
#
@ -21,81 +21,69 @@
# This is the US/English general help file for Open MPI.
#
[invalid num rdma segments]
WARNING: MCA parameter [btl_udapl_eager_rdma_num = %d] is not valid.
RDMA will not be used for short messages. Try setting to positive
value, e.g. 16.
#
[use default endpoint params]
WARNING: Using default uDAPL endpoint parameters not those that
would have been modified by MCA parameters.
#
[optimal buffer alignment mismatch]
WARNING: DAT_OPTIMAL_ALIGNMENT = %d : BTL buffer_alignment = %d.
The BTL buffer_alignment value may not be optimal. If all nodes
report the same DAT_OPTIMAL_ALIGNMENT value and this differs from
BTL buffer_alignment then setting "--mca btl_udapl_buffer_alignment
%d" may improve performance.
#
[max_recv_dtos too low]
WARNING: The MCA parameter max_recv_dtos has been modified to a value,
%d, that is insufficient. This value must be greater than or equal to
num_recvs, %d. The uDAPL BTL will adjust to allow the program to
proceed.
#
[max_request_dtos too low]
WARNING: The MCA parameter max_request_dtos has been modified to a
value, %d, which may not be sufficient. Try setting max_request_dtos
to %d if program fails.
#
[max_recv_dtos system max]
WARNING: The MCA parameter max_recv_dtos is trying to be set to,
%d, which is larger than allowable so the value will be set to maximum
allowed, %d.
#
[max_request_dtos system max]
WARNING: The MCA parameter max_request_dtos is trying to be set to,
%d, which is larger than allowable so the value will be set to maximum
allowed, %d.
#
[evd_qlen adapter max]
WARNING: The MCA parameter %s is trying to be set to %d,
which is larger than allowable so the value will be set to maximum
allowed, %d.
#
[evd_qlen too low]
WARNING: The MCA parameter %s has been modified to a value,
%d, which may not be sufficient. Try setting %s to %d if
program fails.
#
[connection timeout low]
WARNING: The MCA parameter %s has been modified to a value,
%d, which may not be sufficient. Try setting %s to %d if
program fails.
#
[dat_lmr_create DAT_INSUFFICIENT_RESOURCES]
WARNING: The uDAPL BTL is not able to register memory. Possibly out of
allowed privileged memory (i.e. memory that can be pinned). Increasing
the allowed privileged memory may alleviate this issue.
#
[dat_ia_open fail]
WARNING: Failed to open "%s" [%s:%s].
This may be a real error or it may be an invalid entry in the uDAPL
Registry which is contained in the dat.conf file. Contact your local
System Administrator to confirm the availability of the interfaces in
the dat.conf file.
#
[specified include and exclude]
ERROR: You have specified both the btl_udapl_if_include and
btl_udapl_if_exclude MCA parameters. These two parameters are
@ -105,7 +93,7 @@ For reference, the values that you specified are:
btl_udapl_if_include: %s
btl_udapl_if_exclude: %s
#
[nonexistent entry]
WARNING: One or more nonexistent interfaces were specified:
@ -114,3 +102,23 @@ WARNING: One or more nonexistent interfaces were specified:
Nonexistent entities: %s
These entities will be ignored.
#
[IPv4 only]
WARNING: uDAPL BTL only supports IPv4 addressing at this time.
Something other than an IPv4 address was detected on %s.
#
[no network match]
WARNING: Interface %s on node %s not able to find matching
interface on peer node %s. Could be that the interfaces are on
different subnets or there are fewer available uDAPL interfaces on peer.
#
[interface not found]
WARNING: Host %s, not able to determine interface name for
address %s. Will attempt to continue, assuming all addresses to
peer are reachable.
#
[netmask not found]
WARNING: Host %s, not able to determine netmask for address
%s. Will attempt to continue assuming all addresses to
peer are reachable.
#