diff --git a/orte/mca/oob/tcp/oob_tcp_component.c b/orte/mca/oob/tcp/oob_tcp_component.c index 89db27ad84..383a078e5d 100644 --- a/orte/mca/oob/tcp/oob_tcp_component.c +++ b/orte/mca/oob/tcp/oob_tcp_component.c @@ -596,7 +596,7 @@ static int component_startup(void) static void component_shutdown(void) { - int i; + int i=0; opal_list_item_t *item; opal_output_verbose(2, orte_oob_base_framework.framework_output, @@ -703,20 +703,26 @@ static int component_set_addr(orte_process_name_t *peer, found = false; for (i=0; NULL != uris[i]; i++) { + tcpuri = strdup(uris[i]); + if (NULL == tcpuri) { + opal_output_verbose(2, orte_oob_base_framework.framework_output, + "%s oob:tcp: out of memory", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + continue; + } if (0 == strncmp(uris[i], "tcp:", 4)) { af_family = AF_INET; - tcpuri = strdup(uris[i]); host = tcpuri + strlen("tcp://"); } else if (0 == strncmp(uris[i], "tcp6:", 5)) { #if OPAL_ENABLE_IPV6 af_family = AF_INET6; - tcpuri = strdup(uris[i]); host = tcpuri + strlen("tcp6://"); #else /* we don't support this connection type */ opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s oob:tcp: address %s not supported", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), uris[i]); + free(tcpuri); continue; #endif } else { @@ -724,6 +730,7 @@ static int component_set_addr(orte_process_name_t *peer, opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s oob:tcp: ignoring address %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), uris[i]); + free(tcpuri); continue; } @@ -739,14 +746,6 @@ static int component_set_addr(orte_process_name_t *peer, ports++; /* split the addrs */ - if (NULL == host || 0 == strlen(host)) { - opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, - "FORMAT ERROR IN ADDR: %s", - (NULL == host) ? "NULL" : "ZERO LENGTH"); - free(tcpuri); - return ORTE_ERR_BAD_PARAM; - } - /* if this is a tcp6 connection, the first one will have a '[' * at the beginning of it, and the last will have a ']' at the * end - we need to remove those extra characters diff --git a/orte/mca/oob/tcp/oob_tcp_connection.c b/orte/mca/oob/tcp/oob_tcp_connection.c index f7598d73cb..c61ce848c9 100644 --- a/orte/mca/oob/tcp/oob_tcp_connection.c +++ b/orte/mca/oob/tcp/oob_tcp_connection.c @@ -14,7 +14,7 @@ * Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science + * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * @@ -98,15 +98,6 @@ static int tcp_peer_create_socket(mca_oob_tcp_peer_t* peer) ORTE_NAME_PRINT(&(peer->name)))); peer->sd = socket(AF_INET, SOCK_STREAM, 0); - /* Set this fd to be close-on-exec so that any subsequent children don't see it */ - if (opal_fd_set_cloexec(peer->sd) != OPAL_SUCCESS) { - opal_output(0, "%s unable to set socket to CLOEXEC", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - close(peer->sd); - peer->sd = -1; - return ORTE_ERROR; - } - if (peer->sd < 0) { opal_output(0, "%s-%s tcp_peer_create_socket: socket() failed: %s (%d)\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -116,6 +107,15 @@ static int tcp_peer_create_socket(mca_oob_tcp_peer_t* peer) return ORTE_ERR_UNREACH; } + /* Set this fd to be close-on-exec so that any subsequent children don't see it */ + if (opal_fd_set_cloexec(peer->sd) != OPAL_SUCCESS) { + opal_output(0, "%s unable to set socket to CLOEXEC", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + close(peer->sd); + peer->sd = -1; + return ORTE_ERROR; + } + /* setup socket options */ orte_oob_tcp_set_socket_options(peer->sd); @@ -575,11 +575,10 @@ static bool retry(mca_oob_tcp_peer_t* peer, int sd, bool fatal) opal_event_del(&peer->recv_event); peer->recv_ev_active = false; } - if (0 < peer->sd) { + if (0 <= peer->sd) { CLOSE_THE_SOCKET(peer->sd); peer->sd = -1; } - CLOSE_THE_SOCKET(peer->sd); if (OPAL_VALUE1_GREATER == cmpval) { /* force the other end to retry the connection */ peer->state = MCA_OOB_TCP_UNCONNECTED; @@ -1034,11 +1033,21 @@ void mca_oob_tcp_peer_dump(mca_oob_tcp_peer_t* peer, const char* msg) opal_socklen_t addrlen = sizeof(struct sockaddr_storage); opal_socklen_t optlen; - getsockname(peer->sd, (struct sockaddr*)&inaddr, &addrlen); - snprintf(src, sizeof(src), "%s", opal_net_get_hostname((struct sockaddr*) &inaddr)); - getpeername(peer->sd, (struct sockaddr*)&inaddr, &addrlen); - snprintf(dst, sizeof(dst), "%s", opal_net_get_hostname((struct sockaddr*) &inaddr)); - + if (getsockname(peer->sd, (struct sockaddr*)&inaddr, &addrlen) < 0) { + opal_output(0, "tcp_peer_dump: getsockname: %s (%d)\n", + strerror(opal_socket_errno), + opal_socket_errno); + } else { + snprintf(src, sizeof(src), "%s", opal_net_get_hostname((struct sockaddr*) &inaddr)); + } + if (getpeername(peer->sd, (struct sockaddr*)&inaddr, &addrlen) < 0) { + opal_output(0, "tcp_peer_dump: getpeername: %s (%d)\n", + strerror(opal_socket_errno), + opal_socket_errno); + } else { + snprintf(dst, sizeof(dst), "%s", opal_net_get_hostname((struct sockaddr*) &inaddr)); + } + if ((flags = fcntl(peer->sd, F_GETFL, 0)) < 0) { opal_output(0, "tcp_peer_dump: fcntl(F_GETFL) failed: %s (%d)\n", strerror(opal_socket_errno),