Derived from patch provided by Artem, cleanup the "abnormal" code path for selecting TCP OOB modules to connect to a remote process. If we can't find a direct interface-to-address match, then assign all the provided addresses to the first available TCP module and let the normal failure process determine if the remote proc is truly reachable.
cmr=v1.8.2:reviewer=artpol:subject=fix abnormal code connection path in tcp oob This commit was SVN r31536.
Этот коммит содержится в:
родитель
626b521e9c
Коммит
d642babff6
@ -805,7 +805,7 @@ static int component_set_addr(orte_process_name_t *peer,
|
|||||||
char **addrs, *hptr;
|
char **addrs, *hptr;
|
||||||
char *tcpuri=NULL, *host, *ports;
|
char *tcpuri=NULL, *host, *ports;
|
||||||
int i, j, k, rc;
|
int i, j, k, rc;
|
||||||
mca_oob_tcp_module_t *mod;
|
mca_oob_tcp_module_t *mod, *firstmod;
|
||||||
mca_oob_tcp_component_peer_t *pr;
|
mca_oob_tcp_component_peer_t *pr;
|
||||||
uint16_t af_family = AF_UNSPEC;
|
uint16_t af_family = AF_UNSPEC;
|
||||||
uint64_t ui64;
|
uint64_t ui64;
|
||||||
@ -950,10 +950,32 @@ static int component_set_addr(orte_process_name_t *peer,
|
|||||||
/* if we cycled thru all their addresses without finding a match in our
|
/* if we cycled thru all their addresses without finding a match in our
|
||||||
* interfaces, then it remains possible that they have a routed system
|
* interfaces, then it remains possible that they have a routed system
|
||||||
* that can still route messages to the destination. So give it a chance
|
* that can still route messages to the destination. So give it a chance
|
||||||
* to succeed by assigning our first module to try, and let the normal
|
* to succeed by assigning each provided address to the first module in our list,
|
||||||
* failure progression ultimately determine if we can reach this peer
|
* and let the normal failure progression ultimately determine if we
|
||||||
|
* can reach this peer. A future enhancement could be to do a better
|
||||||
|
* job of matching provided addresses with available interfaces, perhaps
|
||||||
|
* looking at the number of matching octets and assigning the address
|
||||||
|
* to the interface with the most matches - but that's for someone else
|
||||||
|
* to address :-)
|
||||||
*/
|
*/
|
||||||
if (!assigned) {
|
if (!assigned) {
|
||||||
|
firstmod = NULL;
|
||||||
|
for (k=0; k < mca_oob_tcp_component.modules.size; k++) {
|
||||||
|
if (NULL == (mod = (mca_oob_tcp_module_t*)opal_pointer_array_get_item(&mca_oob_tcp_component.modules, k))) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (NULL == firstmod) {
|
||||||
|
firstmod = mod;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (NULL == firstmod) {
|
||||||
|
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||||
|
return ORTE_ERR_NOT_FOUND;
|
||||||
|
}
|
||||||
|
for (j=0; NULL != addrs[j]; j++) {
|
||||||
|
/* if they gave us "localhost", then just take the first conn on our list */
|
||||||
|
if (0 == strcasecmp(addrs[j], "localhost")) {
|
||||||
#if OPAL_ENABLE_IPV6
|
#if OPAL_ENABLE_IPV6
|
||||||
if (AF_INET6 == af_family) {
|
if (AF_INET6 == af_family) {
|
||||||
if (NULL == mca_oob_tcp_component.ipv6conns ||
|
if (NULL == mca_oob_tcp_component.ipv6conns ||
|
||||||
@ -971,19 +993,8 @@ static int component_set_addr(orte_process_name_t *peer,
|
|||||||
#if OPAL_ENABLE_IPV6
|
#if OPAL_ENABLE_IPV6
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
/* lookup the kernel index of this address */
|
} else {
|
||||||
if (0 >= (k = opal_ifaddrtokindex(host))) {
|
host = addrs[j];
|
||||||
opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
|
||||||
"%s UNFOUND KERNEL INDEX %d FOR ADDRESS %s",
|
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), k, host);
|
|
||||||
/* we don't have an interface on this subnet - ignore it */
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (NULL == (mod = (mca_oob_tcp_module_t*)opal_pointer_array_get_item(&mca_oob_tcp_component.modules, k))) {
|
|
||||||
opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
|
||||||
"%s NO MODULE AT KINDEX %d FOR ADDRESS %s",
|
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), k, host);
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
/* record that this peer may be reachable via this module, but don't assign
|
/* record that this peer may be reachable via this module, but don't assign
|
||||||
* the peer to this module until later when we actually connect
|
* the peer to this module until later when we actually connect
|
||||||
@ -991,7 +1002,7 @@ static int component_set_addr(orte_process_name_t *peer,
|
|||||||
opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||||
"%s PEER %s MAY BE REACHABLE BY ROUTING - ASSIGNING MODULE AT KINDEX %d INTERFACE %s",
|
"%s PEER %s MAY BE REACHABLE BY ROUTING - ASSIGNING MODULE AT KINDEX %d INTERFACE %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
ORTE_NAME_PRINT(peer), k, mod->if_name);
|
ORTE_NAME_PRINT(peer), k, firstmod->if_name);
|
||||||
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&mca_oob_tcp_component.peers,
|
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&mca_oob_tcp_component.peers,
|
||||||
ui64, (void**)&pr) || NULL == pr) {
|
ui64, (void**)&pr) || NULL == pr) {
|
||||||
pr = OBJ_NEW(mca_oob_tcp_component_peer_t);
|
pr = OBJ_NEW(mca_oob_tcp_component_peer_t);
|
||||||
@ -1012,11 +1023,12 @@ static int component_set_addr(orte_process_name_t *peer,
|
|||||||
opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||||
"%s PASSING ADDR %s TO INTERFACE %s AT KERNEL INDEX %d",
|
"%s PASSING ADDR %s TO INTERFACE %s AT KERNEL INDEX %d",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), host,
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), host,
|
||||||
mod->if_name, k);
|
firstmod->if_name, k);
|
||||||
mod->api.set_peer((struct mca_oob_tcp_module_t*)mod,
|
mod->api.set_peer((struct mca_oob_tcp_module_t*)firstmod,
|
||||||
peer, af_family, host, ports);
|
peer, af_family, host, ports);
|
||||||
found = true;
|
found = true;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if (NULL != addrs) {
|
if (NULL != addrs) {
|
||||||
opal_argv_free(addrs);
|
opal_argv_free(addrs);
|
||||||
}
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user