Per discussion with George and Ralph, change this BTL_ERROR message to
an opal_show_help() so that its output is deduplicated. This commit was SVN r31590.
Этот коммит содержится в:
родитель
445b552d3a
Коммит
56ecb92b10
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2007-2014 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009 Oak Ridge National Laboratory
|
||||
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
|
||||
@ -1126,9 +1126,14 @@ static void mca_btl_tcp_component_accept_handler( int incoming_sd,
|
||||
if(sd < 0) {
|
||||
if(opal_socket_errno == EINTR)
|
||||
continue;
|
||||
if(opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK)
|
||||
BTL_ERROR(("accept() failed: %s (%d).",
|
||||
strerror(opal_socket_errno), opal_socket_errno));
|
||||
if (opal_socket_errno != EAGAIN &&
|
||||
opal_socket_errno != EWOULDBLOCK) {
|
||||
opal_show_help("help-mpi-btl-tcp.txt", "accept failed",
|
||||
true, "v4", ompi_process_info.nodename,
|
||||
getpid(),
|
||||
opal_socket_errno,
|
||||
strerror(opal_socket_errno));
|
||||
}
|
||||
return;
|
||||
}
|
||||
mca_btl_tcp_set_socket_options(sd);
|
||||
|
@ -47,3 +47,20 @@ Your Open MPI job may now fail.
|
||||
PID: %d
|
||||
Message: %s
|
||||
#
|
||||
[accept failed]
|
||||
WARNING: The accept(3) system call failed on a TCP socket. While this
|
||||
should generally never happen on a well-configured HPC system, the
|
||||
most common causes when it does occur are:
|
||||
|
||||
* The process ran out of file descriptors
|
||||
* The operating system ran out of file descriptors
|
||||
* The operating system ran out of memory
|
||||
|
||||
Your Open MPI job will likely hang until the failure resason is fixed
|
||||
(e.g., more file descriptors and/or memory becomes available), and may
|
||||
eventually timeout / abort.
|
||||
|
||||
Local host: %s
|
||||
PID: %d
|
||||
Errno: %d (%s)
|
||||
#
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user