From cd6c6e6206cf38838064b5791730c62c62f3d02a Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Sat, 9 May 2009 12:28:09 +0000 Subject: [PATCH] Add some error checking into the btl_openib_ipaddr_in|exclude calculations, and fix a few small memory leaks in that same logic. This commit was SVN r21196. --- ompi/mca/btl/openib/btl_openib_iwarp.c | 86 ++++++++++++++++----- ompi/mca/btl/openib/help-mpi-btl-openib.txt | 10 ++- 2 files changed, 74 insertions(+), 22 deletions(-) diff --git a/ompi/mca/btl/openib/btl_openib_iwarp.c b/ompi/mca/btl/openib/btl_openib_iwarp.c index 45bb58557e..0ce0efa616 100644 --- a/ompi/mca/btl/openib/btl_openib_iwarp.c +++ b/ompi/mca/btl/openib/btl_openib_iwarp.c @@ -1,6 +1,6 @@ /* - * Copyright (c) 2008 Chelsio, Inc. All rights reserved. - * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2008 Chelsio, Inc. All rights reserved. + * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. * * Additional copyrights may follow * @@ -21,6 +21,8 @@ #include "opal/util/argv.h" #include "opal/util/if.h" +#include "orte/util/show_help.h" + #include "connect/connect.h" #endif /* Always want to include this file */ @@ -68,17 +70,19 @@ static char *stringify(uint32_t addr) } #endif -/* Note that each device port can have multiple IP addresses associated with it - * (aka IP aliasing). However, the openib module only knows about (device,port) - * tuples -- not IP addresses (only the RDMA CM CPC knows which IP addresses are - * associated with each (device,port) tuple). Thus, any searching of device - * list for the IP Address or subnets may not work as one might expect. The - * current behavior is to return the IP address (or subnet) of the *first* - * instance of the device on the list. This behavior is uniform for subnet and - * IP addresses and thus should not cause any mismatches. If this behavior is - * not preferred by the user, the MCA parameters to include/exclude specific IP - * addresses can be used to precisely specify which addresses are used (e.g., to - * effect specific subnet routing). +/* Note that each device port can have multiple IP addresses + * associated with it (aka IP aliasing). However, the openib module + * only knows about (device,port) tuples -- not IP addresses (only the + * RDMA CM CPC knows which IP addresses are associated with each + * (device,port) tuple). Thus, any searching of device list for the + * IP Address or subnets may not work as one might expect. The + * current behavior is to return the IP address (or subnet) of the + * *first* instance of the device on the list. This behavior is + * uniform for subnet and IP addresses and thus should not cause any + * mismatches. If this behavior is not preferred by the user, the MCA + * parameters to include/exclude specific IP addresses can be used to + * precisely specify which addresses are used (e.g., to effect + * specific subnet routing). */ uint64_t mca_btl_openib_get_iwarp_subnet_id(struct ibv_device *ib_dev) { @@ -105,12 +109,12 @@ uint64_t mca_btl_openib_get_iwarp_subnet_id(struct ibv_device *ib_dev) return 0; } -/* This function should not be necessary, as rdma_get_local_addr would be more - * correct in returning the IP address given the cm_id (and not necessitate - * having to do a list look up). Unfortunately, the subnet and IP address look - * up needs to match or there could be a mismatch if IP Aliases are being used. - * For more information on this, please read comment above - * mca_btl_openib_get_iwarp_subnet_id. +/* This function should not be necessary, as rdma_get_local_addr would + * be more correct in returning the IP address given the cm_id (and + * not necessitate having to do a list look up). Unfortunately, the + * subnet and IP address look up needs to match or there could be a + * mismatch if IP Aliases are being used. For more information on + * this, please read comment above mca_btl_openib_get_iwarp_subnet_id. */ uint32_t mca_btl_openib_rdma_get_ipv4addr(struct ibv_context *verbs, uint8_t port) @@ -186,9 +190,29 @@ static int ipaddr_specified(struct sockaddr_in *ipaddr, uint32_t netmask) struct in_addr ipae; char **temp = opal_argv_split(list[i], '/'); - inet_pton(ipaddr->sin_family, temp[0], &ipae); + if (NULL == temp || NULL == temp[0] || NULL == temp[1] || + NULL != temp[2]) { + orte_show_help("help-mpi-btl-openib.txt", + "invalid ipaddr_inexclude", true, "include", + orte_process_info.nodename, list[i], + "Invalid specification (missing \"/\")"); + if (NULL != temp) { + opal_argv_free(temp); + } + continue; + } + + if (1 != inet_pton(ipaddr->sin_family, temp[0], &ipae)) { + orte_show_help("help-mpi-btl-openib.txt", + "invalid ipaddr_inexclude", true, "include", + orte_process_info.nodename, list[i], + "Invalid specification (inet_pton() failed)"); + opal_argv_free(temp); + continue; + } list_subnet = ipae.s_addr & ~(~0 << atoi(temp[1])); subnet = ipaddr->sin_addr.s_addr & ~(~0 << netmask); + opal_argv_free(temp); if (subnet == list_subnet) { return 0; @@ -208,9 +232,29 @@ static int ipaddr_specified(struct sockaddr_in *ipaddr, uint32_t netmask) struct in_addr ipae; char **temp = opal_argv_split(list[i], '/'); - inet_pton(ipaddr->sin_family, temp[0], &ipae); + if (NULL == temp || NULL == temp[0] || NULL == temp[1] || + NULL != temp[2]) { + orte_show_help("help-mpi-btl-openib.txt", + "invalid ipaddr_inexclude", true, "exclude", + orte_process_info.nodename, list[i], + "Invalid specification (missing \"/\")"); + if (NULL != temp) { + opal_argv_free(temp); + } + continue; + } + + if (1 != inet_pton(ipaddr->sin_family, temp[0], &ipae)) { + orte_show_help("help-mpi-btl-openib.txt", + "invalid ipaddr_inexclude", true, "exclude", + orte_process_info.nodename, list[i], + "Invalid specification (inet_pton() failed)"); + opal_argv_free(temp); + continue; + } list_subnet = ipae.s_addr & ~(~0 << atoi(temp[1])); subnet = ipaddr->sin_addr.s_addr & ~(~0 << netmask); + opal_argv_free(temp); if (subnet == list_subnet) { return 1; diff --git a/ompi/mca/btl/openib/help-mpi-btl-openib.txt b/ompi/mca/btl/openib/help-mpi-btl-openib.txt index 19c41f496e..b8ce27334e 100644 --- a/ompi/mca/btl/openib/help-mpi-btl-openib.txt +++ b/ompi/mca/btl/openib/help-mpi-btl-openib.txt @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2006 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2007-2008 Mellanox Technologies. All rights reserved. # $COPYRIGHT$ # @@ -581,3 +581,11 @@ MPI to support iWARP devices. (This message is being displayed because you told Open MPI to use iWARP devices via the btl_openib_device_type MCA parameter) +# +[invalid ipaddr_inexclude] +WARNING: An invalid value was given for btl_openib_ipaddr_%s. This +value will be ignored. + + Local host: %s + Value: %s + Message: %s