From 81f2f19398062695525ff964c9eb72553e1e0656 Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Mon, 1 Oct 2018 15:30:28 -0700 Subject: [PATCH] btl/tcp: output the IP address correctly Per https://github.com/open-mpi/ompi/issues/3035#issuecomment-426085673, it looks like the IP address for a given interface is being stashed in two places: on the endpoint and on the module. 1. On the endpoint, it is storing the moral equivalent of a (struct sockaddr_in.sin_addr). 2. On the module, it is storing a full (struct sockaddr_storage). The call to opal_net_get_hostname() expects a full (struct sockaddr*) -- not just the stripped-down (struct sockaddr_in.sin_addr). Hence, when the original code was passing in the endpoint's (struct sockaddr_in.sin_addr) and opal_net_get_hostname() was treating it like a (struct sockaddr), hilarity ensued (i.e., we got the wrong output). This commit eliminates the call to opal_net_get_hostname() and just calls inet_ntop() directly to convert the (struct sockaddr_in.sin_addr) to a string. NOTE: Per the github comment cited above, there can be a disparity between the IP address cached on the endpoint vs. the IP address cached on the module. This only happens with interfaces that have more than one IP address. This commit does not fix that issue. Signed-off-by: Jeff Squyres (cherry picked from commit 5dae086f7e4aee28fbb5a7282a2661286a5f68fe) --- opal/mca/btl/tcp/btl_tcp_proc.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/opal/mca/btl/tcp/btl_tcp_proc.c b/opal/mca/btl/tcp/btl_tcp_proc.c index 28cf9a1778..5bc4e92828 100644 --- a/opal/mca/btl/tcp/btl_tcp_proc.c +++ b/opal/mca/btl/tcp/btl_tcp_proc.c @@ -901,17 +901,22 @@ void mca_btl_tcp_proc_accept(mca_btl_tcp_proc_t* btl_proc, struct sockaddr* addr /* No further use of this socket. Close it */ CLOSE_THE_SOCKET(sd); { - char *addr_str = NULL, *tmp, *pnet; + char *addr_str = NULL, *tmp; + char ip[128]; + ip[sizeof(ip) - 1] = '\0'; + for (size_t i = 0; i < btl_proc->proc_endpoint_count; i++) { mca_btl_base_endpoint_t* btl_endpoint = btl_proc->proc_endpoints[i]; if (btl_endpoint->endpoint_addr->addr_family != addr->sa_family) { continue; } - pnet = opal_net_get_hostname((struct sockaddr*)&btl_endpoint->endpoint_addr->addr_inet); + inet_ntop(btl_endpoint->endpoint_addr->addr_family, + (void*) &(btl_endpoint->endpoint_addr->addr_inet), + ip, sizeof(ip) - 1); if (NULL == addr_str) { - (void)asprintf(&tmp, "\n\t%s", pnet); + (void)asprintf(&tmp, "\n\t%s", ip); } else { - (void)asprintf(&tmp, "%s\n\t%s", addr_str, pnet); + (void)asprintf(&tmp, "%s\n\t%s", addr_str, ip); free(addr_str); } addr_str = tmp;