From 65b66ab4ae273360921a4c8d5239c21affa7d3f7 Mon Sep 17 00:00:00 2001 From: Dave Goodell Date: Wed, 20 May 2015 17:20:32 -0700 Subject: [PATCH] usnic: use fi_getname in newer libfabric When using an external libfabric (or really any libfabric newer than libfabric commit 607e863), we must use fi_getname to determine the local port of our endpoint. Without this fix, OMPI will hang endlessly while retransmitting packets to port 0 on the remote host. --- opal/mca/btl/usnic/btl_usnic_module.c | 41 +++++++++++++++++++++++++++ opal/mca/btl/usnic/btl_usnic_module.h | 1 + 2 files changed, 42 insertions(+) diff --git a/opal/mca/btl/usnic/btl_usnic_module.c b/opal/mca/btl/usnic/btl_usnic_module.c index 326553368e..0648f37918 100644 --- a/opal/mca/btl/usnic/btl_usnic_module.c +++ b/opal/mca/btl/usnic/btl_usnic_module.c @@ -1393,7 +1393,9 @@ static int create_ep(opal_btl_usnic_module_t* module, struct opal_btl_usnic_channel_t *channel) { int rc; + struct sockaddr *sa; struct sockaddr_in *sin; + size_t addrlen; struct fi_info *hint; hint = fi_dupinfo(module->fabric_info); @@ -1436,6 +1438,21 @@ static int create_ep(opal_btl_usnic_module_t* module, channel->info->caps &= ~(1ULL << 63); } + /* all of the OMPI code assumes IPv4, but some versions of libfabric will + * return FI_SOCKADDR instead of FI_SOCKADDR_IN, so we need to do a little + * bit of sanity checking */ + assert(FI_SOCKADDR_IN == channel->info->addr_format || + FI_SOCKADDR == channel->info->addr_format); + if (FI_SOCKADDR == channel->info->addr_format) { + sa = (struct sockaddr *)channel->info->src_addr; + assert(AF_INET == sa->sa_family); + } + sin = (struct sockaddr_in *)channel->info->src_addr; + assert(sizeof(struct sockaddr_in) == channel->info->src_addrlen); + + /* no matter the version of libfabric, this should hold */ + assert(0 == sin->sin_port); + rc = fi_endpoint(module->domain, channel->info, &channel->ep, NULL); if (0 != rc || NULL == channel->ep) { opal_show_help("help-mpi-btl-usnic.txt", @@ -1496,6 +1513,30 @@ static int create_ep(opal_btl_usnic_module_t* module, return OPAL_ERR_OUT_OF_RESOURCE; } + /* Immediately after libfabric v1.0 was released, we implemented support + * for fi_getname and changed the behavior of fi_endpoint w.r.t. setting + * the src_addr field of the fi_info struct passed in. Before the change + * fi_endpoint would set the src_addr field, including the sin_port field + * but calling fi_getname would return -FI_ENOSYS. Afterwards the address + * would not be touched relative to whatever was set by fi_getinfo. So we + * must call fi_getname in that case. + */ + if (0 == sin->sin_port) { + addrlen = sizeof(struct sockaddr_in); + rc = fi_getname(&channel->ep->fid, channel->info->src_addr, &addrlen); + if (0 != rc) { + opal_show_help("help-mpi-btl-usnic.txt", + "internal error during init", + true, + opal_process_info.nodename, + module->fabric_info->fabric_attr->name, + "fi_getname() failed", __FILE__, __LINE__, + rc, fi_strerror(-rc)); + return OPAL_ERR_OUT_OF_RESOURCE; + } + assert(0 != sin->sin_port); + } + /* actual sizes */ channel->chan_rd_num = channel->info->rx_attr->size; channel->chan_sd_num = channel->info->tx_attr->size; diff --git a/opal/mca/btl/usnic/btl_usnic_module.h b/opal/mca/btl/usnic/btl_usnic_module.h index 9403775ece..890c3ac115 100644 --- a/opal/mca/btl/usnic/btl_usnic_module.h +++ b/opal/mca/btl/usnic/btl_usnic_module.h @@ -25,6 +25,7 @@ #define OPAL_BTL_USNIC_MODULE_H #include +#include #include #include #include