1
1

usnic: fix sender hash comparisons for UDP

There was a duplicated subnet check in the sender hash lookup routine.
This caused receivers to always fail the sender hash lookup if the
sender was in a different subnet, so the receiver would discard the
packet as though it were coming from a different job.

cmr=v1.7.5:ticket=trac:4253

This commit was SVN r30841.

The following Trac tickets were found above:
  Ticket 4253 --> https://svn.open-mpi.org/trac/ompi/ticket/4253
Этот коммит содержится в:
Dave Goodell 2014-02-26 07:46:50 +00:00
родитель 90d68730f1
Коммит f6036d11c8
2 изменённых файлов: 19 добавлений и 34 удалений

Просмотреть файл

@ -518,6 +518,8 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
/* initialization */ /* initialization */
mca_btl_usnic_component.my_hashed_rte_name = mca_btl_usnic_component.my_hashed_rte_name =
ompi_rte_hash_name(&(ompi_proc_local()->proc_name)); ompi_rte_hash_name(&(ompi_proc_local()->proc_name));
MSGDEBUG1_OUT("%s: my_hashed_rte_name=0x%" PRIx64,
__func__, mca_btl_usnic_component.my_hashed_rte_name);
opal_srand(&ompi_btl_usnic_rand_buff, ((uint32_t) getpid())); opal_srand(&ompi_btl_usnic_rand_buff, ((uint32_t) getpid()));

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2006 Sandia National Laboratories. All rights * Copyright (c) 2006 Sandia National Laboratories. All rights
* reserved. * reserved.
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved * Copyright (c) 2013 Intel, Inc. All rights reserved
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -132,43 +132,23 @@ ompi_btl_usnic_endpoint_t *
ompi_btl_usnic_proc_lookup_endpoint(ompi_btl_usnic_module_t *receiver, ompi_btl_usnic_proc_lookup_endpoint(ompi_btl_usnic_module_t *receiver,
uint64_t sender_hashed_rte_name) uint64_t sender_hashed_rte_name)
{ {
size_t i;
uint32_t mynet, peernet;
ompi_btl_usnic_proc_t *proc; ompi_btl_usnic_proc_t *proc;
ompi_btl_usnic_endpoint_t *endpoint; ompi_btl_usnic_endpoint_t *endpoint;
opal_list_item_t *item;
for (proc = (ompi_btl_usnic_proc_t*)
opal_list_get_first(&mca_btl_usnic_component.usnic_procs); MSGDEBUG1_OUT("lookup_endpoint: recvmodule=%p sendhash=0x%" PRIx64,
proc != (ompi_btl_usnic_proc_t*) (void *)receiver, sender_hashed_rte_name);
opal_list_get_end(&mca_btl_usnic_component.usnic_procs);
proc = (ompi_btl_usnic_proc_t*) for (item = opal_list_get_first(&receiver->all_endpoints);
opal_list_get_next(proc)) { item != opal_list_get_end(&receiver->all_endpoints);
item = opal_list_get_next(item)) {
endpoint = container_of(item, ompi_btl_usnic_endpoint_t,
endpoint_endpoint_li);
proc = endpoint->endpoint_proc;
if (ompi_rte_hash_name(&proc->proc_ompi->proc_name) == if (ompi_rte_hash_name(&proc->proc_ompi->proc_name) ==
sender_hashed_rte_name) { sender_hashed_rte_name) {
break; MSGDEBUG1_OUT("lookup_endpoint: matched endpoint=%p",
} (void *)endpoint);
}
/* If we didn't find the sending proc (!), return NULL */
if (opal_list_get_end(&mca_btl_usnic_component.usnic_procs) ==
(opal_list_item_t*) proc) {
return NULL;
}
/* Look through all the endpoints on sender's proc and find one
that we can reach. For the moment, do the same test as in
match_modex: check to see if we have compatible IPv4
networks. */
mynet = ompi_btl_usnic_get_ipv4_subnet(receiver->if_ipv4_addr,
receiver->if_cidrmask);
for (i = 0; i < proc->proc_endpoint_count; ++i) {
endpoint = proc->proc_endpoints[i];
peernet = ompi_btl_usnic_get_ipv4_subnet(endpoint->endpoint_remote_addr.ipv4_addr,
endpoint->endpoint_remote_addr.cidrmask);
/* If we match, we're done */
if (mynet == peernet) {
return endpoint; return endpoint;
} }
} }
@ -472,6 +452,9 @@ ompi_btl_usnic_create_endpoint(ompi_btl_usnic_module_t *module,
/* Now claim that modex slot */ /* Now claim that modex slot */
proc->proc_modex_claimed[modex_index] = true; proc->proc_modex_claimed[modex_index] = true;
MSGDEBUG1_OUT("create_endpoint: module=%p claimed endpoint=%p on proc=%p (hash=0x%" PRIx64 ")\n",
(void *)module, (void *)endpoint, (void *)proc,
ompi_rte_hash_name(&proc->proc_ompi->proc_name));
/* Save the endpoint on this proc's array of endpoints */ /* Save the endpoint on this proc's array of endpoints */
proc->proc_endpoints[proc->proc_endpoint_count] = endpoint; proc->proc_endpoints[proc->proc_endpoint_count] = endpoint;