1
1

usnic: allow connect(2) to fail temporarily

When connecting the connectivity checker client to its agent fails
with ECONNREFUSED, just delay a little and try again a few more times.
Этот коммит содержится в:
Jeff Squyres 2016-03-08 15:35:34 -08:00
родитель 85674b9dfc
Коммит 4975fdcd5c

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014-2016 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -105,7 +105,23 @@ int opal_btl_usnic_connectivity_client_init(void)
address.sun_family = AF_UNIX;
strncpy(address.sun_path, ipc_filename, sizeof(address.sun_path) - 1);
if (0 != connect(agent_fd, (struct sockaddr*) &address, sizeof(address))) {
int count = 0;
while (1) {
int ret = connect(agent_fd, (struct sockaddr*) &address,
sizeof(address));
if (0 == ret) {
break;
}
// If we get ECONNREFUSED, delay a little and try again
if (ECONNREFUSED == errno) {
if (count < mca_btl_usnic_component.connectivity_num_retries) {
usleep(100);
++count;
continue;
}
}
OPAL_ERROR_LOG(OPAL_ERR_IN_ERRNO);
ABORT("connect() failed");
/* Will not return */