1
1

Print out error string rather than errno for TCP-related errors, making it easier for both the user and us to debug issues with BTL and OOB issues...

This commit was SVN r12852.
Этот коммит содержится в:
Brian Barrett 2006-12-14 18:20:43 +00:00
родитель 0ca8cb35b7
Коммит 38c2e43ac2
7 изменённых файлов: 164 добавлений и 71 удалений

Просмотреть файл

@ -408,7 +408,9 @@ static int mca_btl_tcp_component_create_listen(void)
/* create a listen socket for incoming connections */
mca_btl_tcp_component.tcp_listen_sd = socket(AF_INET, SOCK_STREAM, 0);
if(mca_btl_tcp_component.tcp_listen_sd < 0) {
BTL_ERROR(("socket() failed with errno=%d", opal_socket_errno));
BTL_ERROR(("socket() failed: %s (%d)",
strerror(opal_socket_errno),
opal_socket_errno));
return OMPI_ERROR;
}
mca_btl_tcp_set_socket_options(mca_btl_tcp_component.tcp_listen_sd);
@ -420,32 +422,42 @@ static int mca_btl_tcp_component_create_listen(void)
inaddr.sin_port = 0;
if(bind(mca_btl_tcp_component.tcp_listen_sd, (struct sockaddr*)&inaddr, sizeof(inaddr)) < 0) {
BTL_ERROR(("bind() failed with errno=%d", opal_socket_errno));
BTL_ERROR(("bind() failed: %s (%d)",
strerror(opal_socket_errno),
opal_socket_errno));
return OMPI_ERROR;
}
/* resolve system assignend port */
addrlen = sizeof(struct sockaddr_in);
if(getsockname(mca_btl_tcp_component.tcp_listen_sd, (struct sockaddr*)&inaddr, &addrlen) < 0) {
BTL_ERROR(("getsockname() failed with errno=%d", opal_socket_errno));
BTL_ERROR(("getsockname() failed: %s (%d)",
strerror(opal_socket_errno),
opal_socket_errno));
return OMPI_ERROR;
}
mca_btl_tcp_component.tcp_listen_port = inaddr.sin_port;
/* setup listen backlog to maximum allowed by kernel */
if(listen(mca_btl_tcp_component.tcp_listen_sd, SOMAXCONN) < 0) {
BTL_ERROR(("listen() failed with errno=%d", opal_socket_errno));
BTL_ERROR(("listen() failed: %s (%d)",
strerror(opal_socket_errno),
opal_socket_errno));
return OMPI_ERROR;
}
/* set socket up to be non-blocking, otherwise accept could block */
if((flags = fcntl(mca_btl_tcp_component.tcp_listen_sd, F_GETFL, 0)) < 0) {
BTL_ERROR(("fcntl(F_GETFL) failed with errno=%d", opal_socket_errno));
BTL_ERROR(("fcntl(F_GETFL) failed: %s (%d)",
strerror(opal_socket_errno),
opal_socket_errno));
return OMPI_ERROR;
} else {
flags |= O_NONBLOCK;
if(fcntl(mca_btl_tcp_component.tcp_listen_sd, F_SETFL, flags) < 0) {
BTL_ERROR(("fcntl(F_SETFL) failed with errno=%d", opal_socket_errno));
BTL_ERROR(("fcntl(F_SETFL) failed: %s (%d)",
strerror(opal_socket_errno),
opal_socket_errno));
return OMPI_ERROR;
}
}
@ -576,7 +588,9 @@ static void mca_btl_tcp_component_accept(void)
if(opal_socket_errno == EINTR)
continue;
if(opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK)
BTL_ERROR(("accept() failed with errno %d.", opal_socket_errno));
BTL_ERROR(("accept() failed: %s (%d).",
strerror(opal_socket_errno),
opal_socket_errno));
return;
}
mca_btl_tcp_set_socket_options(sd);
@ -620,11 +634,15 @@ static void mca_btl_tcp_component_recv_handler(int sd, short flags, void* user)
/* now set socket up to be non-blocking */
if((flags = fcntl(sd, F_GETFL, 0)) < 0) {
BTL_ERROR(("fcntl(F_GETFL) failed with errno=%d", opal_socket_errno));
BTL_ERROR(("fcntl(F_GETFL) failed: %s (%d)",
strerror(opal_socket_errno),
opal_socket_errno));
} else {
flags |= O_NONBLOCK;
if(fcntl(sd, F_SETFL, flags) < 0) {
BTL_ERROR(("fcntl(F_SETFL) failed with errno=%d", opal_socket_errno));
BTL_ERROR(("fcntl(F_SETFL) failed: %s (%d)",
strerror(opal_socket_errno),
opal_socket_errno));
}
}
@ -638,7 +656,9 @@ static void mca_btl_tcp_component_recv_handler(int sd, short flags, void* user)
/* lookup peer address */
if(getpeername(sd, (struct sockaddr*)&addr, &addr_len) != 0) {
BTL_ERROR(("getpeername() failed with errno=%d", opal_socket_errno));
BTL_ERROR(("getpeername() failed: %s (%d)",
strerror(opal_socket_errno),
opal_socket_errno));
CLOSE_THE_SOCKET(sd);
return;
}

Просмотреть файл

@ -142,13 +142,17 @@ static void mca_btl_tcp_endpoint_dump(mca_btl_base_endpoint_t* btl_endpoint, con
sprintf(dst, "%s", inet_ntoa(inaddr.sin_addr));
if((flags = fcntl(btl_endpoint->endpoint_sd, F_GETFL, 0)) < 0) {
BTL_ERROR(("fcntl(F_GETFL) failed with errno=%d", opal_socket_errno));
BTL_ERROR(("fcntl(F_GETFL) failed: %s (%d)",
strerror(opal_socket_errno),
opal_socket_errno));
}
#if defined(SO_SNDBUF)
obtlen = sizeof(sndbuf);
if(getsockopt(btl_endpoint->endpoint_sd, SOL_SOCKET, SO_SNDBUF, (char *)&sndbuf, &obtlen) < 0) {
BTL_ERROR(("SO_SNDBUF option: errno %d", opal_socket_errno));
BTL_ERROR(("SO_SNDBUF option: %s (%d)",
strerror(opal_socket_errno),
opal_socket_errno));
}
#else
sndbuf = -1;
@ -156,7 +160,9 @@ static void mca_btl_tcp_endpoint_dump(mca_btl_base_endpoint_t* btl_endpoint, con
#if defined(SO_RCVBUF)
obtlen = sizeof(rcvbuf);
if(getsockopt(btl_endpoint->endpoint_sd, SOL_SOCKET, SO_RCVBUF, (char *)&rcvbuf, &obtlen) < 0) {
BTL_ERROR(("SO_RCVBUF option: errno %d", opal_socket_errno));
BTL_ERROR(("SO_RCVBUF option: %s (%d)",
strerror(opal_socket_errno),
opal_socket_errno));
}
#else
rcvbuf = -1;
@ -164,7 +170,9 @@ static void mca_btl_tcp_endpoint_dump(mca_btl_base_endpoint_t* btl_endpoint, con
#if defined(TCP_NODELAY)
obtlen = sizeof(nodelay);
if(getsockopt(btl_endpoint->endpoint_sd, IPPROTO_TCP, TCP_NODELAY, (char *)&nodelay, &obtlen) < 0) {
BTL_ERROR(("TCP_NODELAY option: errno %d", opal_socket_errno));
BTL_ERROR(("TCP_NODELAY option: %s (%d)",
strerror(opal_socket_errno),
opal_socket_errno));
}
#else
nodelay = 0;
@ -255,7 +263,9 @@ static int mca_btl_tcp_endpoint_send_blocking(mca_btl_base_endpoint_t* btl_endpo
int retval = send(btl_endpoint->endpoint_sd, (const char *)ptr+cnt, size-cnt, 0);
if(retval < 0) {
if(opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) {
BTL_ERROR(("send() failed with errno=%d",opal_socket_errno));
BTL_ERROR(("send() failed: %s (%d)",
strerror(opal_socket_errno),
opal_socket_errno));
mca_btl_tcp_endpoint_close(btl_endpoint);
return -1;
}
@ -412,7 +422,9 @@ static int mca_btl_tcp_endpoint_recv_blocking(mca_btl_base_endpoint_t* btl_endpo
/* socket is non-blocking so handle errors */
if(retval < 0) {
if(opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) {
BTL_ERROR(("recv() failed with errno=%d",opal_socket_errno));
BTL_ERROR(("recv() failed: %s (%d)",
strerror(opal_socket_errno),
opal_socket_errno));
mca_btl_tcp_endpoint_close(btl_endpoint);
return -1;
}
@ -464,19 +476,25 @@ void mca_btl_tcp_set_socket_options(int sd)
#if defined(TCP_NODELAY)
optval = 1;
if(setsockopt(sd, IPPROTO_TCP, TCP_NODELAY, (char *)&optval, sizeof(optval)) < 0) {
BTL_ERROR(("setsockopt(TCP_NODELAY) failed with errno=%d", opal_socket_errno));
BTL_ERROR(("setsockopt(TCP_NODELAY) failed: %s (%d)",
strerror(opal_socket_errno),
opal_socket_errno));
}
#endif
#if defined(SO_SNDBUF)
if(mca_btl_tcp_component.tcp_sndbuf > 0 &&
setsockopt(sd, SOL_SOCKET, SO_SNDBUF, (char *)&mca_btl_tcp_component.tcp_sndbuf, sizeof(int)) < 0) {
BTL_ERROR(("setsockopt(SO_SNDBUF) failed with errno %d", opal_socket_errno));
BTL_ERROR(("setsockopt(SO_SNDBUF) failed: %s (%d)",
strerror(opal_socket_errno),
opal_socket_errno));
}
#endif
#if defined(SO_RCVBUF)
if(mca_btl_tcp_component.tcp_rcvbuf > 0 &&
setsockopt(sd, SOL_SOCKET, SO_RCVBUF, (char *)&mca_btl_tcp_component.tcp_rcvbuf, sizeof(int)) < 0) {
BTL_ERROR(("setsockopt(SO_RCVBUF) failed with errno %d", opal_socket_errno));
BTL_ERROR(("setsockopt(SO_RCVBUF) failed: %s (%d)",
strerror(opal_socket_errno),
opal_socket_errno));
}
#endif
}
@ -510,11 +528,15 @@ static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t* btl_endpo
/* setup the socket as non-blocking */
if((flags = fcntl(btl_endpoint->endpoint_sd, F_GETFL, 0)) < 0) {
BTL_ERROR(("fcntl(F_GETFL) failed with errno=%d", opal_socket_errno));
BTL_ERROR(("fcntl(F_GETFL) failed: %s (%d)",
strerror(opal_socket_errno),
opal_socket_errno));
} else {
flags |= O_NONBLOCK;
if(fcntl(btl_endpoint->endpoint_sd, F_SETFL, flags) < 0)
BTL_ERROR(("fcntl(F_SETFL) failed with errno=%d", opal_socket_errno));
BTL_ERROR(("fcntl(F_SETFL) failed: %s (%d)",
strerror(opal_socket_errno),
opal_socket_errno));
}
/* start the connect - will likely fail with EINPROGRESS */
@ -560,7 +582,9 @@ static void mca_btl_tcp_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_e
/* check connect completion status */
if(getsockopt(btl_endpoint->endpoint_sd, SOL_SOCKET, SO_ERROR, (char *)&so_error, &so_length) < 0) {
BTL_ERROR(("getsockopt() failed with errno=%d", opal_socket_errno));
BTL_ERROR(("getsockopt() failed: %s (%d)",
strerror(opal_socket_errno),
opal_socket_errno));
mca_btl_tcp_endpoint_close(btl_endpoint);
return;
}
@ -569,7 +593,9 @@ static void mca_btl_tcp_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_e
return;
}
if(so_error != 0) {
BTL_ERROR(("connect() failed with errno=%d", so_error));
BTL_ERROR(("connect() failed: %s (%d)",
strerror(so_error),
so_error));
mca_btl_tcp_endpoint_close(btl_endpoint);
return;
}

Просмотреть файл

@ -115,7 +115,9 @@ bool mca_btl_tcp_frag_send(mca_btl_tcp_frag_t* frag, int sd)
mca_btl_tcp_endpoint_close(frag->endpoint);
return false;
default:
BTL_ERROR(("mca_btl_tcp_frag_send: writev failed with errno=%d", opal_socket_errno));
BTL_ERROR(("mca_btl_tcp_frag_send: writev failed: %s (%d)",
strerror(opal_socket_errno),
opal_socket_errno));
mca_btl_tcp_endpoint_close(frag->endpoint);
return false;
}
@ -199,7 +201,9 @@ bool mca_btl_tcp_frag_recv(mca_btl_tcp_frag_t* frag, int sd)
mca_btl_tcp_endpoint_close(btl_endpoint);
return false;
default:
BTL_ERROR(("mca_btl_tcp_frag_recv: readv failed with errno=%d", opal_socket_errno));
BTL_ERROR(("mca_btl_tcp_frag_recv: readv failed: %s (%d)",
strerror(opal_socket_errno),
opal_socket_errno));
mca_btl_tcp_endpoint_close(btl_endpoint);
return false;
}

Просмотреть файл

@ -347,7 +347,8 @@ static void mca_oob_tcp_accept(void)
if(opal_socket_errno == EINTR)
continue;
if(opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK)
opal_output(0, "mca_oob_tcp_accept: accept() failed with errno %d.", opal_socket_errno);
opal_output(0, "mca_oob_tcp_accept: accept() failed: %s (%d).",
strerror(opal_socket_errno), opal_socket_errno);
return;
}
@ -382,7 +383,8 @@ static int mca_oob_tcp_create_listen(void)
/* create a listen socket for incoming connections */
mca_oob_tcp_component.tcp_listen_sd = socket(AF_INET, SOCK_STREAM, 0);
if(mca_oob_tcp_component.tcp_listen_sd < 0) {
opal_output(0,"mca_oob_tcp_component_init: socket() failed with errno=%d", opal_socket_errno);
opal_output(0,"mca_oob_tcp_component_init: socket() failed: %s (%d)",
strerror(opal_socket_errno), opal_socket_errno);
return ORTE_ERROR;
}
@ -396,32 +398,37 @@ static int mca_oob_tcp_create_listen(void)
inaddr.sin_port = 0;
if(bind(mca_oob_tcp_component.tcp_listen_sd, (struct sockaddr*)&inaddr, sizeof(inaddr)) < 0) {
opal_output(0,"mca_oob_tcp_create_listen: bind() failed with errno=%d", opal_socket_errno);
opal_output(0,"mca_oob_tcp_create_listen: bind() failed: %s (%d)",
strerror(opal_socket_errno), opal_socket_errno);
return ORTE_ERROR;
}
/* resolve system assigned port */
addrlen = sizeof(struct sockaddr_in);
if(getsockname(mca_oob_tcp_component.tcp_listen_sd, (struct sockaddr*)&inaddr, &addrlen) < 0) {
opal_output(0, "mca_oob_tcp_create_listen: getsockname() failed with errno=%d", opal_socket_errno);
opal_output(0, "mca_oob_tcp_create_listen: getsockname(): %s (%d)",
strerror(opal_socket_errno), opal_socket_errno);
return ORTE_ERROR;
}
mca_oob_tcp_component.tcp_listen_port = inaddr.sin_port;
/* setup listen backlog to maximum allowed by kernel */
if(listen(mca_oob_tcp_component.tcp_listen_sd, SOMAXCONN) < 0) {
opal_output(0, "mca_oob_tcp_component_init: listen() failed with errno=%d", opal_socket_errno);
opal_output(0, "mca_oob_tcp_component_init: listen(): %s (%d)",
strerror(opal_socket_errno), opal_socket_errno);
return ORTE_ERROR;
}
/* set socket up to be non-blocking, otherwise accept could block */
if((flags = fcntl(mca_oob_tcp_component.tcp_listen_sd, F_GETFL, 0)) < 0) {
opal_output(0, "mca_oob_tcp_component_init: fcntl(F_GETFL) failed with errno=%d", opal_socket_errno);
opal_output(0, "mca_oob_tcp_component_init: fcntl(F_GETFL) failed: %s (%d)",
strerror(opal_socket_errno), opal_socket_errno);
return ORTE_ERROR;
} else {
flags |= O_NONBLOCK;
if(fcntl(mca_oob_tcp_component.tcp_listen_sd, F_SETFL, flags) < 0) {
opal_output(0, "mca_oob_tcp_component_init: fcntl(F_SETFL) failed with errno=%d", opal_socket_errno);
opal_output(0, "mca_oob_tcp_component_init: fcntl(F_SETFL) failed: %s (%d)",
strerror(opal_socket_errno), opal_socket_errno);
return ORTE_ERROR;
}
}
@ -479,7 +486,8 @@ static void* mca_oob_tcp_listen_thread(opal_object_t *obj)
if (mca_oob_tcp_component.tcp_shutdown) return NULL;
if(opal_socket_errno != EAGAIN || opal_socket_errno != EWOULDBLOCK) {
opal_output(0, "mca_oob_tcp_accept: accept() failed with errno %d.", opal_socket_errno);
opal_output(0, "mca_oob_tcp_accept: accept() failed: %s (%d).",
strerror(opal_socket_errno), opal_socket_errno);
close(item->fd);
return NULL;
}
@ -586,7 +594,8 @@ static int mca_oob_tcp_create_listen_thread(void)
/* create a listen socket for incoming connections */
mca_oob_tcp_component.tcp_listen_sd = socket(AF_INET, SOCK_STREAM, 0);
if(mca_oob_tcp_component.tcp_listen_sd < 0) {
opal_output(0,"mca_oob_tcp_component_init: socket() failed with errno=%d", opal_socket_errno);
opal_output(0,"mca_oob_tcp_component_init: socket() failed: %s (%d)",
strerror(opal_socket_errno), opal_socket_errno);
return ORTE_ERROR;
}
@ -600,32 +609,37 @@ static int mca_oob_tcp_create_listen_thread(void)
inaddr.sin_port = 0;
if(bind(mca_oob_tcp_component.tcp_listen_sd, (struct sockaddr*)&inaddr, sizeof(inaddr)) < 0) {
opal_output(0,"mca_oob_tcp_create_listen: bind() failed with errno=%d", opal_socket_errno);
opal_output(0,"mca_oob_tcp_create_listen: bind() failed: %s (%d)",
strerror(opal_socket_errno), opal_socket_errno);
return ORTE_ERROR;
}
/* resolve system assigned port */
addrlen = sizeof(struct sockaddr_in);
if(getsockname(mca_oob_tcp_component.tcp_listen_sd, (struct sockaddr*)&inaddr, &addrlen) < 0) {
opal_output(0, "mca_oob_tcp_create_listen: getsockname() failed with errno=%d", opal_socket_errno);
opal_output(0, "mca_oob_tcp_create_listen: getsockname() failed: %s (%d)",
strerror(opal_socket_errno), opal_socket_errno);
return ORTE_ERROR;
}
mca_oob_tcp_component.tcp_listen_port = inaddr.sin_port;
/* setup listen backlog to maximum allowed by kernel */
if(listen(mca_oob_tcp_component.tcp_listen_sd, SOMAXCONN) < 0) {
opal_output(0, "mca_oob_tcp_component_init: listen() failed with errno=%d", opal_socket_errno);
opal_output(0, "mca_oob_tcp_component_init: listen() failed: %s (%d)",
strerror(opal_socket_errno), opal_socket_errno);
return ORTE_ERROR;
}
/* set socket up to be non-blocking, otherwise accept could block */
if((flags = fcntl(mca_oob_tcp_component.tcp_listen_sd, F_GETFL, 0)) < 0) {
opal_output(0, "mca_oob_tcp_component_init: fcntl(F_GETFL) failed with errno=%d", opal_socket_errno);
opal_output(0, "mca_oob_tcp_component_init: fcntl(F_GETFL) failed: %s (%d)",
strerror(opal_socket_errno), opal_socket_errno);
return ORTE_ERROR;
} else {
flags |= O_NONBLOCK;
if(fcntl(mca_oob_tcp_component.tcp_listen_sd, F_SETFL, flags) < 0) {
opal_output(0, "mca_oob_tcp_component_init: fcntl(F_SETFL) failed with errno=%d", opal_socket_errno);
opal_output(0, "mca_oob_tcp_component_init: fcntl(F_SETFL) failed: %s (%d)",
strerror(opal_socket_errno), opal_socket_errno);
return ORTE_ERROR;
}
}
@ -655,9 +669,10 @@ static void mca_oob_tcp_recv_probe(int sd, mca_oob_tcp_hdr_t* hdr)
int retval = send(sd, (char *)ptr+cnt, sizeof(mca_oob_tcp_hdr_t)-cnt, 0);
if(retval < 0) {
if(opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) {
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_recv_probe: send() failed with errno=%d\n",
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_recv_probe: send() failed: %s (%d)\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(&(hdr->msg_src)),
strerror(opal_socket_errno),
opal_socket_errno);
CLOSE_THE_SOCKET(sd);
return;
@ -680,13 +695,13 @@ static void mca_oob_tcp_recv_connect(int sd, mca_oob_tcp_hdr_t* hdr)
/* now set socket up to be non-blocking */
if((flags = fcntl(sd, F_GETFL, 0)) < 0) {
opal_output(0, "[%lu,%lu,%lu] mca_oob_tcp_recv_handler: fcntl(F_GETFL) failed with errno=%d",
ORTE_NAME_ARGS(orte_process_info.my_name), opal_socket_errno);
opal_output(0, "[%lu,%lu,%lu] mca_oob_tcp_recv_handler: fcntl(F_GETFL) failed: %s (%d)",
ORTE_NAME_ARGS(orte_process_info.my_name), strerror(opal_socket_errno), opal_socket_errno);
} else {
flags |= O_NONBLOCK;
if(fcntl(sd, F_SETFL, flags) < 0) {
opal_output(0, "[%lu,%lu,%lu] mca_oob_tcp_recv_handler: fcntl(F_SETFL) failed with errno=%d",
ORTE_NAME_ARGS(orte_process_info.my_name), opal_socket_errno);
opal_output(0, "[%lu,%lu,%lu] mca_oob_tcp_recv_handler: fcntl(F_SETFL) failed: %s (%d)",
ORTE_NAME_ARGS(orte_process_info.my_name), strerror(opal_socket_errno), opal_socket_errno);
}
}
@ -760,8 +775,8 @@ static void mca_oob_tcp_recv_handler(int sd, short flags, void* user)
return;
}
if(opal_socket_errno != EINTR) {
opal_output(0, "[%lu,%lu,%lu] mca_oob_tcp_recv_handler: recv() failed with errno=%d\n",
ORTE_NAME_ARGS(orte_process_info.my_name), opal_socket_errno);
opal_output(0, "[%lu,%lu,%lu] mca_oob_tcp_recv_handler: recv() failed: %s (%d)\n",
ORTE_NAME_ARGS(orte_process_info.my_name), strerror(opal_socket_errno), opal_socket_errno);
CLOSE_THE_SOCKET(sd);
return;
}

Просмотреть файл

@ -242,9 +242,10 @@ bool mca_oob_tcp_msg_send_handler(mca_oob_tcp_msg_t* msg, struct mca_oob_tcp_pee
else if (opal_socket_errno == EAGAIN || opal_socket_errno == EWOULDBLOCK)
return false;
else {
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_msg_send_handler: writev failed with errno=%d",
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_msg_send_handler: writev failed: %s (%d)",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(&(peer->peer_name)),
strerror(opal_socket_errno),
opal_socket_errno);
mca_oob_tcp_peer_close(peer);
msg->msg_rc = ORTE_ERR_CONNECTION_FAILED;
@ -341,9 +342,10 @@ static bool mca_oob_tcp_msg_recv(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* pee
else if (opal_socket_errno == EAGAIN || opal_socket_errno == EWOULDBLOCK)
return false;
else {
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_msg_recv: readv failed with errno=%d",
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_msg_recv: readv failed: %s (%d)",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(&(peer->peer_name)),
strerror(opal_socket_errno),
opal_socket_errno);
mca_oob_tcp_peer_close(peer);
mca_oob_call_exception_handlers(&peer->peer_name, MCA_OOB_PEER_DISCONNECTED);
@ -353,8 +355,7 @@ static bool mca_oob_tcp_msg_recv(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* pee
if(mca_oob_tcp_component.tcp_debug > 3) {
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_msg_recv: peer closed connection",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(&(peer->peer_name)),
opal_socket_errno);
ORTE_NAME_ARGS(&(peer->peer_name)));
}
mca_oob_tcp_peer_close(peer);
mca_oob_call_exception_handlers(&peer->peer_name, MCA_OOB_PEER_DISCONNECTED);

Просмотреть файл

@ -310,11 +310,12 @@ static int mca_oob_tcp_peer_try_connect(mca_oob_tcp_peer_t* peer)
if(mca_oob_tcp_component.tcp_debug > 0) {
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_try_connect: "
"connect to %s:%d failed with errno=%d",
"connect to %s:%d failed: %s (%d)",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(&(peer->peer_name)),
inet_ntoa(inaddr.sin_addr),
ntohs(inaddr.sin_port),
strerror(opal_socket_errno),
opal_socket_errno);
}
@ -329,7 +330,7 @@ static int mca_oob_tcp_peer_try_connect(mca_oob_tcp_peer_t* peer)
} else {
opal_output(0,
"[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_try_connect: "
"mca_oob_tcp_peer_send_connect_ack to %s:%d failed with errno=%d",
"mca_oob_tcp_peer_send_connect_ack to %s:%d failed: %s (%d)",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(&(peer->peer_name)),
inet_ntoa(inaddr.sin_addr),
@ -344,8 +345,7 @@ static int mca_oob_tcp_peer_try_connect(mca_oob_tcp_peer_t* peer)
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(&(peer->peer_name)),
inet_ntoa(inaddr.sin_addr),
ntohs(inaddr.sin_port),
opal_socket_errno);
ntohs(inaddr.sin_port));
mca_oob_tcp_peer_close(peer);
return ORTE_ERR_UNREACH;
}
@ -369,9 +369,10 @@ static int mca_oob_tcp_peer_start_connect(mca_oob_tcp_peer_t* peer)
if (peer->peer_sd < 0) {
struct timeval tv = { 1,0 };
opal_output(0,
"[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_start_connect: socket() failed with errno=%d\n",
"[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_start_connect: socket() failed: %s (%d)\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(&(peer->peer_name)),
strerror(opal_socket_errno),
opal_socket_errno);
mca_oob_tcp_peer_shutdown(peer);
opal_evtimer_add(&peer->peer_timer_event, &tv);
@ -386,16 +387,18 @@ static int mca_oob_tcp_peer_start_connect(mca_oob_tcp_peer_t* peer)
/* setup the socket as non-blocking */
if((flags = fcntl(peer->peer_sd, F_GETFL, 0)) < 0) {
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_connect: fcntl(F_GETFL) failed with errno=%d\n",
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_connect: fcntl(F_GETFL) failed: %s (%d)\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(&(peer->peer_name)),
strerror(opal_socket_errno),
opal_socket_errno);
} else {
flags |= O_NONBLOCK;
if(fcntl(peer->peer_sd, F_SETFL, flags) < 0)
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_connect: fcntl(F_SETFL) failed with errno=%d\n",
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_connect: fcntl(F_SETFL) failed: %s (%d)\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(&(peer->peer_name)),
strerror(opal_socket_errno),
opal_socket_errno);
}
@ -429,9 +432,10 @@ static void mca_oob_tcp_peer_complete_connect(mca_oob_tcp_peer_t* peer)
/* check connect completion status */
if(getsockopt(peer->peer_sd, SOL_SOCKET, SO_ERROR, (char *)&so_error, &so_length) < 0) {
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_complete_connect: getsockopt() failed with errno=%d\n",
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_complete_connect: getsockopt() failed: %s (%d)\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(&(peer->peer_name)),
strerror(opal_socket_errno),
opal_socket_errno);
mca_oob_tcp_peer_close(peer);
return;
@ -643,9 +647,10 @@ static int mca_oob_tcp_peer_recv_blocking(mca_oob_tcp_peer_t* peer, void* data,
/* socket is non-blocking so handle errors */
if(retval < 0) {
if(opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) {
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_recv_blocking: recv() failed with errno=%d\n",
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_recv_blocking: recv() failed: %s (%d)\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(&(peer->peer_name)),
strerror(errno),
errno);
mca_oob_tcp_peer_close(peer);
return -1;
@ -669,9 +674,10 @@ static int mca_oob_tcp_peer_send_blocking(mca_oob_tcp_peer_t* peer, void* data,
int retval = send(peer->peer_sd, (char *)ptr+cnt, size-cnt, 0);
if(retval < 0) {
if(opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) {
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_send_blocking: send() failed with errno=%d\n",
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_send_blocking: send() failed: %s (%d)\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(&(peer->peer_name)),
strerror(opal_socket_errno),
opal_socket_errno);
mca_oob_tcp_peer_close(peer);
return -1;
@ -841,13 +847,17 @@ static void mca_oob_tcp_peer_dump(mca_oob_tcp_peer_t* peer, const char* msg)
sprintf(dst, "%s", inet_ntoa(inaddr.sin_addr));
if((flags = fcntl(peer->peer_sd, F_GETFL, 0)) < 0) {
opal_output(0, "mca_oob_tcp_peer_dump: fcntl(F_GETFL) failed with errno=%d\n", opal_socket_errno);
opal_output(0, "mca_oob_tcp_peer_dump: fcntl(F_GETFL) failed: %s (%d)\n",
strerror(opal_socket_errno),
opal_socket_errno);
}
#if defined(SO_SNDBUF)
optlen = sizeof(sndbuf);
if(getsockopt(peer->peer_sd, SOL_SOCKET, SO_SNDBUF, (char *)&sndbuf, &optlen) < 0) {
opal_output(0, "mca_oob_tcp_peer_dump: SO_SNDBUF option: errno %d\n", opal_socket_errno);
opal_output(0, "mca_oob_tcp_peer_dump: SO_SNDBUF option: %s (%d)\n",
strerror(opal_socket_errno),
opal_socket_errno);
}
#else
sndbuf = -1;
@ -855,7 +865,9 @@ static void mca_oob_tcp_peer_dump(mca_oob_tcp_peer_t* peer, const char* msg)
#if defined(SO_RCVBUF)
optlen = sizeof(rcvbuf);
if(getsockopt(peer->peer_sd, SOL_SOCKET, SO_RCVBUF, (char *)&rcvbuf, &optlen) < 0) {
opal_output(0, "mca_oob_tcp_peer_dump: SO_RCVBUF option: errno %d\n", opal_socket_errno);
opal_output(0, "mca_oob_tcp_peer_dump: SO_RCVBUF option: %s (%d)\n",
strerror(opal_socket_errno),
opal_socket_errno);
}
#else
rcvbuf = -1;
@ -863,7 +875,9 @@ static void mca_oob_tcp_peer_dump(mca_oob_tcp_peer_t* peer, const char* msg)
#if defined(TCP_NODELAY)
optlen = sizeof(nodelay);
if(getsockopt(peer->peer_sd, IPPROTO_TCP, TCP_NODELAY, (char *)&nodelay, &optlen) < 0) {
opal_output(0, "mca_oob_tcp_peer_dump: TCP_NODELAY option: errno %d\n", opal_socket_errno);
opal_output(0, "mca_oob_tcp_peer_dump: TCP_NODELAY option: %s (%d)\n",
strerror(opal_socket_errno),
opal_socket_errno);
}
#else
nodelay = 0;
@ -993,19 +1007,28 @@ void mca_oob_tcp_set_socket_options(int sd)
#if defined(TCP_NODELAY)
optval = 1;
if(setsockopt(sd, IPPROTO_TCP, TCP_NODELAY, (char *)&optval, sizeof(optval)) < 0) {
opal_output(0, "[%s:%d] setsockopt(TCP_NODELAY) failed with errno=%d", __FILE__, __LINE__, opal_socket_errno);
opal_output(0, "[%s:%d] setsockopt(TCP_NODELAY) failed: %s (%d)",
__FILE__, __LINE__,
strerror(opal_socket_errno),
opal_socket_errno);
}
#endif
#if defined(SO_SNDBUF)
if(mca_oob_tcp_component.tcp_sndbuf > 0 &&
setsockopt(sd, SOL_SOCKET, SO_SNDBUF, (char *)&mca_oob_tcp_component.tcp_sndbuf, sizeof(int)) < 0) {
opal_output(0, "[%s:%d] setsockopt(SO_SNDBUF) failed with errno %d", __FILE__, __LINE__, opal_socket_errno);
opal_output(0, "[%s:%d] setsockopt(SO_SNDBUF) failed: %s (%d)",
__FILE__, __LINE__,
strerror(opal_socket_errno),
opal_socket_errno);
}
#endif
#if defined(SO_RCVBUF)
if(mca_oob_tcp_component.tcp_rcvbuf > 0 &&
setsockopt(sd, SOL_SOCKET, SO_RCVBUF, (char *)&mca_oob_tcp_component.tcp_rcvbuf, sizeof(int)) < 0) {
opal_output(0, "[%s:%d] setsockopt(SO_RCVBUF) failed with errno %d", __FILE__, __LINE__, opal_socket_errno);
opal_output(0, "[%s:%d] setsockopt(SO_RCVBUF) failed: %s (%d)",
__FILE__, __LINE__,
strerror(opal_socket_errno),
opal_socket_errno);
}
#endif
}

Просмотреть файл

@ -95,25 +95,28 @@ int mca_oob_tcp_ping(
sd = socket(AF_INET, SOCK_STREAM, 0);
if (sd < 0) {
opal_output(0,
"[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_ping: socket() failed with errno=%d\n",
"[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_ping: socket() failed: %s (%d)\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(name),
strerror(opal_socket_errno),
opal_socket_errno);
return ORTE_ERR_UNREACH;
}
/* setup the socket as non-blocking */
if((flags = fcntl(sd, F_GETFL, 0)) < 0) {
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_ping: fcntl(F_GETFL) failed with errno=%d\n",
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_ping: fcntl(F_GETFL) failed: %s (%d)\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(name),
strerror(opal_socket_errno),
opal_socket_errno);
} else {
flags |= O_NONBLOCK;
if(fcntl(sd, F_SETFL, flags) < 0) {
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_ping: fcntl(F_SETFL) failed with errno=%d\n",
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_ping: fcntl(F_SETFL) failed: %s (%d)\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(name),
strerror(opal_socket_errno),
opal_socket_errno);
}
}
@ -140,9 +143,10 @@ int mca_oob_tcp_ping(
/* set socket back to blocking */
flags &= ~O_NONBLOCK;
if(fcntl(sd, F_SETFL, flags) < 0) {
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_ping: fcntl(F_SETFL) failed with errno=%d\n",
opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_ping: fcntl(F_SETFL) failed: %s (%d)\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(name),
strerror(opal_socket_errno),
opal_socket_errno);
}