1
1
* s/port/tcp_port/g where relevant to disambiguate TCP port from
   device port
 * Rework ipaddrcheck to make it work in the LMC>0 case

This commit was SVN r18482.

The following Trac tickets were found above:
  Ticket 1281 --> https://svn.open-mpi.org/trac/ompi/ticket/1281
Этот коммит содержится в:
Jeff Squyres 2008-05-22 19:18:15 +00:00
родитель 8c3b31b181
Коммит 1f7f0e1f96

Просмотреть файл

@ -63,12 +63,12 @@ struct rdmacm_contents {
struct ibv_cq *dummy_cq;
struct rdma_cm_id **id;
uint32_t ipaddr;
uint16_t port;
uint16_t tcp_port;
};
struct message {
uint32_t ipaddr;
uint16_t port;
uint16_t tcp_port;
};
struct rdmacm_endpoint_local_cpc_data {
@ -169,8 +169,8 @@ static mca_btl_openib_endpoint_t *rdmacm_find_endpoint(struct rdmacm_contents *l
peeripaddr = ((struct sockaddr_in *)peeraddr)->sin_addr.s_addr;
BTL_VERBOSE(("message ipaddr = %x port %d, rdma_get_peer_addr = %x",
message->ipaddr, message->port, peeripaddr));
if (message->ipaddr == peeripaddr && message->port == rem_port) {
message->ipaddr, message->tcp_port, peeripaddr));
if (message->ipaddr == peeripaddr && message->tcp_port == rem_port) {
ep = ib_endpoint;
break;
}
@ -287,7 +287,7 @@ static int rdma_client_connect_one(struct rdmacm_contents *local,
memset(&din, 0, sizeof(din));
din.sin_family = AF_INET;
din.sin_addr.s_addr = message->ipaddr;
din.sin_port = message->port;
din.sin_port = message->tcp_port;
/* Once the route to the remote system is discovered, a
* RDMA_CM_EVENT_ADDR_RESOLVED event will occur on the local event
@ -351,7 +351,7 @@ static int rdmacm_module_start_connect(ompi_btl_openib_connect_base_module_t *cp
message = (struct message *)endpoint->endpoint_remote_cpc_data->cbm_modex_message;
BTL_VERBOSE(("Connecting to remote ip addr = %x, port = %d ep state = %d",
message->ipaddr, message->port, endpoint->endpoint_state));
message->ipaddr, message->tcp_port, endpoint->endpoint_state));
if (MCA_BTL_IB_CONNECTED == endpoint->endpoint_state ||
MCA_BTL_IB_CONNECTING == endpoint->endpoint_state ||
@ -376,7 +376,7 @@ static int rdmacm_module_start_connect(ompi_btl_openib_connect_base_module_t *cp
* is being connected from, in the isntance where there are
* multiple listeners on the local system.
*/
client->port = ((struct message *)endpoint->endpoint_local_cpc->data.cbm_modex_message)->port;
client->tcp_port = ((struct message *)endpoint->endpoint_local_cpc->data.cbm_modex_message)->tcp_port;
rc = rdma_client_connect(client, message);
if (0 != rc) {
@ -425,7 +425,7 @@ static int handle_connect_request(struct rdmacm_contents *local,
BTL_VERBOSE(("ep state = %d, local ipaddr = %x, remote ipaddr = %x port %d",
endpoint->endpoint_state, local->ipaddr, message->ipaddr, rem_port));
if ((local->ipaddr > message->ipaddr && local->port > rem_port) ||
if ((local->ipaddr > message->ipaddr && local->tcp_port > rem_port) ||
local->ipaddr > message->ipaddr) {
int race = 1;
@ -737,7 +737,7 @@ static int finish_connect(struct rdmacm_contents *local, int num)
localipaddr = ((struct sockaddr_in *)localaddr)->sin_addr.s_addr;
remoteipaddr = ((struct sockaddr_in *)peeraddr)->sin_addr.s_addr;
if ((localipaddr == remoteipaddr && local->port <= remoteport) ||
if ((localipaddr == remoteipaddr && local->tcp_port <= remoteport) ||
localipaddr > remoteipaddr) {
rc = rdmacm_setup_qp(local, local->endpoint, local->id[num], num);
if (0 != rc) {
@ -784,7 +784,7 @@ static int finish_connect(struct rdmacm_contents *local, int num)
msg.qpnum = num;
msg.rem_index = local->endpoint->index;
msg.rem_port = local->port;
msg.rem_port = local->tcp_port;
BTL_VERBOSE(("Connecting from %x, port %d to %x", localipaddr, msg.rem_port, remoteipaddr));
@ -945,51 +945,46 @@ out:
return -1;
}
static int ipaddrcheck(struct rdmacm_contents *server, mca_btl_openib_module_t *openib_btl)
static int ipaddrcheck(struct rdmacm_contents *server,
mca_btl_openib_module_t *openib_btl)
{
int rc, i;
struct ibv_device_attr attr;
rc = ibv_query_device(openib_btl->hca->ib_dev_context, &attr);
if (-1 == rc) {
orte_output_verbose(5, mca_btl_base_output,
"openib BTL: rdmacm CPC system error (verbs failure)");
goto out;
}
for (i = 0; i < attr.phys_port_cnt; i++) {
bool found = false;
uint32_t ipaddr = mca_btl_openib_rdma_get_ipv4addr(openib_btl->hca->ib_dev_context, i+1);
uint32_t ipaddr;
bool already_exists = false;
opal_list_item_t *item;
int server_tcp_port = rdma_get_src_port(server->id[0]);
for (item = opal_list_get_first(&server_list); item != opal_list_get_end(&server_list); item = opal_list_get_next(item)) {
/* Look up the IP address of this device/port */
ipaddr =
mca_btl_openib_rdma_get_ipv4addr(openib_btl->hca->ib_dev_context,
openib_btl->port_num);
if (0 == ipaddr) {
BTL_VERBOSE(("openib BTL: rdmacm CPC unable to find IP address for %s", ibv_get_device_name(openib_btl->hca->ib_dev)));
return OMPI_ERR_NOT_FOUND;
}
/* Ok, we found the IP address of this device/port. Have we
already see this IP address/TCP port before? */
for (item = opal_list_get_first(&server_list);
item != opal_list_get_end(&server_list);
item = opal_list_get_next(item)) {
struct list_item *pitem = (struct list_item *)item;
BTL_VERBOSE(("paddr = %x, ipaddr addr = %x", pitem->item->ipaddr, ipaddr));
if (pitem->item->ipaddr == ipaddr || 0 == ipaddr) {
BTL_VERBOSE(("paddr = %x, ipaddr addr = %x",
pitem->item->ipaddr, ipaddr));
if (pitem->item->ipaddr == ipaddr &&
pitem->item->tcp_port == server_tcp_port) {
BTL_VERBOSE(("addr %x already exists", ipaddr));
found = true;
already_exists = true;
break;
}
}
if (!found) {
/* If we haven't seen it before, save it */
if (!already_exists) {
server->ipaddr = ipaddr;
server->port = rdma_get_src_port(server->id[0]);
break;
}
}
/* It's not an error if these things fail; perhaps RDMA CM is not
supported on this HCA. So just gracefully return "sorry,
Charlie" */
if (0 == server->ipaddr) {
orte_output_verbose(5, mca_btl_base_output, "openib BTL: rdmacm CPC unable to find IP address for %s", ibv_get_device_name(openib_btl->hca->ib_dev));
goto out;
server->tcp_port = server_tcp_port;
}
return OMPI_SUCCESS;
out:
return OMPI_ERROR;
return already_exists ? OMPI_ERROR : OMPI_SUCCESS;
}
static int create_message(struct rdmacm_contents *server, mca_btl_openib_module_t *openib_btl, ompi_btl_openib_connect_base_module_data_t *data)
@ -1003,9 +998,9 @@ static int create_message(struct rdmacm_contents *server, mca_btl_openib_module_
}
message->ipaddr = server->ipaddr;
message->port = server->port;
message->tcp_port = server->tcp_port;
BTL_VERBOSE(("Message IP address is %x, port %d", message->ipaddr, message->port));
BTL_VERBOSE(("Message IP address is %x, port %d", message->ipaddr, message->tcp_port));
data->cbm_modex_message = message;
data->cbm_modex_message_len = sizeof(struct message);
@ -1099,7 +1094,8 @@ static int rdmacm_component_query(mca_btl_openib_module_t *openib_btl, ompi_btl_
goto out5;
}
/* Verify that the HCA has a valid IP address on it, or we cannot use the cpc */
/* Verify that the HCA has a valid IP address on it, or we cannot
use the cpc */
rc = ipaddrcheck(server, openib_btl);
if (0 != rc) {
orte_output_verbose(5, mca_btl_base_output,
@ -1108,8 +1104,10 @@ static int rdmacm_component_query(mca_btl_openib_module_t *openib_btl, ompi_btl_
goto out5;
}
/* Listen on the specified address/port with the rdmacm, limit the amount of incoming connections to 1024 */
/* FIXME - 1024 should be (num of connectors * mca_btl_openib_component.num_qps) */
/* Listen on the specified address/port with the rdmacm, limit the
amount of incoming connections to 1024 */
/* FIXME - 1024 should be (num of connectors *
mca_btl_openib_component.num_qps) */
rc = rdma_listen(server->id[0], 1024);
if (0 != rc) {
orte_output_verbose(5, mca_btl_base_output,