Add some error checking into the btl_openib_ipaddr_in|exclude
calculations, and fix a few small memory leaks in that same logic. This commit was SVN r21196.
Этот коммит содержится в:
родитель
d920a302f3
Коммит
cd6c6e6206
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2008 Chelsio, Inc. All rights reserved.
|
||||
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2008 Chelsio, Inc. All rights reserved.
|
||||
* Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
@ -21,6 +21,8 @@
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/if.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
|
||||
#include "connect/connect.h"
|
||||
#endif
|
||||
/* Always want to include this file */
|
||||
@ -68,17 +70,19 @@ static char *stringify(uint32_t addr)
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Note that each device port can have multiple IP addresses associated with it
|
||||
* (aka IP aliasing). However, the openib module only knows about (device,port)
|
||||
* tuples -- not IP addresses (only the RDMA CM CPC knows which IP addresses are
|
||||
* associated with each (device,port) tuple). Thus, any searching of device
|
||||
* list for the IP Address or subnets may not work as one might expect. The
|
||||
* current behavior is to return the IP address (or subnet) of the *first*
|
||||
* instance of the device on the list. This behavior is uniform for subnet and
|
||||
* IP addresses and thus should not cause any mismatches. If this behavior is
|
||||
* not preferred by the user, the MCA parameters to include/exclude specific IP
|
||||
* addresses can be used to precisely specify which addresses are used (e.g., to
|
||||
* effect specific subnet routing).
|
||||
/* Note that each device port can have multiple IP addresses
|
||||
* associated with it (aka IP aliasing). However, the openib module
|
||||
* only knows about (device,port) tuples -- not IP addresses (only the
|
||||
* RDMA CM CPC knows which IP addresses are associated with each
|
||||
* (device,port) tuple). Thus, any searching of device list for the
|
||||
* IP Address or subnets may not work as one might expect. The
|
||||
* current behavior is to return the IP address (or subnet) of the
|
||||
* *first* instance of the device on the list. This behavior is
|
||||
* uniform for subnet and IP addresses and thus should not cause any
|
||||
* mismatches. If this behavior is not preferred by the user, the MCA
|
||||
* parameters to include/exclude specific IP addresses can be used to
|
||||
* precisely specify which addresses are used (e.g., to effect
|
||||
* specific subnet routing).
|
||||
*/
|
||||
uint64_t mca_btl_openib_get_iwarp_subnet_id(struct ibv_device *ib_dev)
|
||||
{
|
||||
@ -105,12 +109,12 @@ uint64_t mca_btl_openib_get_iwarp_subnet_id(struct ibv_device *ib_dev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* This function should not be necessary, as rdma_get_local_addr would be more
|
||||
* correct in returning the IP address given the cm_id (and not necessitate
|
||||
* having to do a list look up). Unfortunately, the subnet and IP address look
|
||||
* up needs to match or there could be a mismatch if IP Aliases are being used.
|
||||
* For more information on this, please read comment above
|
||||
* mca_btl_openib_get_iwarp_subnet_id.
|
||||
/* This function should not be necessary, as rdma_get_local_addr would
|
||||
* be more correct in returning the IP address given the cm_id (and
|
||||
* not necessitate having to do a list look up). Unfortunately, the
|
||||
* subnet and IP address look up needs to match or there could be a
|
||||
* mismatch if IP Aliases are being used. For more information on
|
||||
* this, please read comment above mca_btl_openib_get_iwarp_subnet_id.
|
||||
*/
|
||||
uint32_t mca_btl_openib_rdma_get_ipv4addr(struct ibv_context *verbs,
|
||||
uint8_t port)
|
||||
@ -186,9 +190,29 @@ static int ipaddr_specified(struct sockaddr_in *ipaddr, uint32_t netmask)
|
||||
struct in_addr ipae;
|
||||
char **temp = opal_argv_split(list[i], '/');
|
||||
|
||||
inet_pton(ipaddr->sin_family, temp[0], &ipae);
|
||||
if (NULL == temp || NULL == temp[0] || NULL == temp[1] ||
|
||||
NULL != temp[2]) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid ipaddr_inexclude", true, "include",
|
||||
orte_process_info.nodename, list[i],
|
||||
"Invalid specification (missing \"/\")");
|
||||
if (NULL != temp) {
|
||||
opal_argv_free(temp);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (1 != inet_pton(ipaddr->sin_family, temp[0], &ipae)) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid ipaddr_inexclude", true, "include",
|
||||
orte_process_info.nodename, list[i],
|
||||
"Invalid specification (inet_pton() failed)");
|
||||
opal_argv_free(temp);
|
||||
continue;
|
||||
}
|
||||
list_subnet = ipae.s_addr & ~(~0 << atoi(temp[1]));
|
||||
subnet = ipaddr->sin_addr.s_addr & ~(~0 << netmask);
|
||||
opal_argv_free(temp);
|
||||
|
||||
if (subnet == list_subnet) {
|
||||
return 0;
|
||||
@ -208,9 +232,29 @@ static int ipaddr_specified(struct sockaddr_in *ipaddr, uint32_t netmask)
|
||||
struct in_addr ipae;
|
||||
char **temp = opal_argv_split(list[i], '/');
|
||||
|
||||
inet_pton(ipaddr->sin_family, temp[0], &ipae);
|
||||
if (NULL == temp || NULL == temp[0] || NULL == temp[1] ||
|
||||
NULL != temp[2]) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid ipaddr_inexclude", true, "exclude",
|
||||
orte_process_info.nodename, list[i],
|
||||
"Invalid specification (missing \"/\")");
|
||||
if (NULL != temp) {
|
||||
opal_argv_free(temp);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (1 != inet_pton(ipaddr->sin_family, temp[0], &ipae)) {
|
||||
orte_show_help("help-mpi-btl-openib.txt",
|
||||
"invalid ipaddr_inexclude", true, "exclude",
|
||||
orte_process_info.nodename, list[i],
|
||||
"Invalid specification (inet_pton() failed)");
|
||||
opal_argv_free(temp);
|
||||
continue;
|
||||
}
|
||||
list_subnet = ipae.s_addr & ~(~0 << atoi(temp[1]));
|
||||
subnet = ipaddr->sin_addr.s_addr & ~(~0 << netmask);
|
||||
opal_argv_free(temp);
|
||||
|
||||
if (subnet == list_subnet) {
|
||||
return 1;
|
||||
|
@ -10,7 +10,7 @@
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2006 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2007-2008 Mellanox Technologies. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
@ -581,3 +581,11 @@ MPI to support iWARP devices.
|
||||
|
||||
(This message is being displayed because you told Open MPI to use
|
||||
iWARP devices via the btl_openib_device_type MCA parameter)
|
||||
#
|
||||
[invalid ipaddr_inexclude]
|
||||
WARNING: An invalid value was given for btl_openib_ipaddr_%s. This
|
||||
value will be ignored.
|
||||
|
||||
Local host: %s
|
||||
Value: %s
|
||||
Message: %s
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user