* Fix #1916: endian problems in iwarp wireup on big endian machines
(now works on both big and little endian machines) * Be a little more flexible when looking for active devices in btl_openib_component.c * Add device name and port number to lots of verbose and help messages * Add a bunch of verbose messages to give insight into what is occurring during all the CPC wireups This commit was SVN r21418.
Этот коммит содержится в:
родитель
4881cd0df3
Коммит
814a8f5e0f
@ -2380,17 +2380,21 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
|
||||
/* Copy the btl module structs into a contiguous array and fully
|
||||
initialize them */
|
||||
for(i = 0; i < mca_btl_openib_component.ib_num_btls; i++){
|
||||
item = opal_list_remove_first(&btl_list);
|
||||
i = 0;
|
||||
while (NULL != (item = opal_list_remove_first(&btl_list))) {
|
||||
ib_selected = (mca_btl_base_selected_module_t*)item;
|
||||
openib_btl = (mca_btl_openib_module_t*)ib_selected->btl_module;
|
||||
|
||||
/* Do we have at least one CPC that can handle this
|
||||
port? */
|
||||
ret =
|
||||
ompi_btl_openib_connect_base_select_for_local_port(openib_btl);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
/* We already did a show_help in the lower layer */
|
||||
/* Search for a CPC that can handle this port */
|
||||
ret = ompi_btl_openib_connect_base_select_for_local_port(openib_btl);
|
||||
/* If we get NOT_SUPPORTED, then no CPC was found for this
|
||||
port. But that's not a fatal error -- just keep going;
|
||||
let's see if we find any usable openib modules or not. */
|
||||
if (OMPI_ERR_NOT_SUPPORTED == ret) {
|
||||
continue;
|
||||
} else if (OMPI_SUCCESS != ret) {
|
||||
/* All others *are* fatal. Note that we already did a
|
||||
show_help in the lower layer */
|
||||
goto no_btls;
|
||||
}
|
||||
|
||||
@ -2400,7 +2404,15 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
if (finish_btl_init(openib_btl) != OMPI_SUCCESS) {
|
||||
goto no_btls;
|
||||
}
|
||||
}
|
||||
++i;
|
||||
}
|
||||
/* If we got nothing, then error out */
|
||||
if (0 == i) {
|
||||
goto no_btls;
|
||||
}
|
||||
/* Otherwise reset to the number of openib modules that we
|
||||
actually got */
|
||||
mca_btl_openib_component.ib_num_btls = i;
|
||||
|
||||
btl_openib_modex_send();
|
||||
|
||||
|
@ -210,8 +210,8 @@ static int ipaddr_specified(struct sockaddr_in *ipaddr, uint32_t netmask)
|
||||
opal_argv_free(temp);
|
||||
continue;
|
||||
}
|
||||
list_subnet = ipae.s_addr & ~(~0 << atoi(temp[1]));
|
||||
subnet = ipaddr->sin_addr.s_addr & ~(~0 << netmask);
|
||||
list_subnet = ntohl(ipae.s_addr) & ~(~0 >> atoi(temp[1]));
|
||||
subnet = ntohl(ipaddr->sin_addr.s_addr) & ~(~0 >> netmask);
|
||||
opal_argv_free(temp);
|
||||
|
||||
if (subnet == list_subnet) {
|
||||
@ -252,8 +252,8 @@ static int ipaddr_specified(struct sockaddr_in *ipaddr, uint32_t netmask)
|
||||
opal_argv_free(temp);
|
||||
continue;
|
||||
}
|
||||
list_subnet = ipae.s_addr & ~(~0 << atoi(temp[1]));
|
||||
subnet = ipaddr->sin_addr.s_addr & ~(~0 << netmask);
|
||||
list_subnet = ntohl(ipae.s_addr) & ~(~0 >> atoi(temp[1]));
|
||||
subnet = ntohl(ipaddr->sin_addr.s_addr) & ~(~0 >> netmask);
|
||||
opal_argv_free(temp);
|
||||
|
||||
if (subnet == list_subnet) {
|
||||
@ -318,7 +318,7 @@ static int add_rdma_addr(struct sockaddr *ipaddr, uint32_t netmask)
|
||||
|
||||
sinp = (struct sockaddr_in *)ipaddr;
|
||||
myaddr->addr = sinp->sin_addr.s_addr;
|
||||
myaddr->subnet = myaddr->addr & ~(~0 << netmask);
|
||||
myaddr->subnet = ntohl(myaddr->addr) & ~(~0 >> netmask);
|
||||
inet_ntop(sinp->sin_family, &sinp->sin_addr,
|
||||
myaddr->addr_str, sizeof(myaddr->addr_str));
|
||||
memcpy(myaddr->dev_name, cm_id->verbs->device->name, IBV_SYSFS_NAME_MAX);
|
||||
|
@ -294,7 +294,7 @@ int ompi_btl_openib_connect_base_select_for_local_port(mca_btl_openib_module_t *
|
||||
"no cpcs for port", true,
|
||||
orte_process_info.nodename,
|
||||
ibv_get_device_name(btl->device->ib_dev),
|
||||
msg);
|
||||
btl->port_num, msg);
|
||||
free(cpcs);
|
||||
free(msg);
|
||||
return OMPI_ERR_NOT_SUPPORTED;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2008 Mellanox Technologies. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
@ -654,7 +654,9 @@ static int ibcm_component_query(mca_btl_openib_module_t *btl,
|
||||
iWarp), so we can safely assume that we can use this CPC. */
|
||||
#if defined(HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE)
|
||||
if (IBV_TRANSPORT_IB != btl->device->ib_dev->transport_type) {
|
||||
BTL_VERBOSE(("ibcm CPC only supported on InfiniBand"));
|
||||
BTL_VERBOSE(("ibcm CPC only supported on InfiniBand; skipped on %s:%d",
|
||||
ibv_get_device_name(btl->device->ib_dev),
|
||||
openib_btl->port_num));
|
||||
rc = OMPI_ERR_NOT_SUPPORTED;
|
||||
goto error;
|
||||
}
|
||||
@ -662,7 +664,9 @@ static int ibcm_component_query(mca_btl_openib_module_t *btl,
|
||||
|
||||
/* IBCM is not supported if we have any XRC QPs */
|
||||
if (mca_btl_openib_component.num_xrc_qps > 0) {
|
||||
BTL_VERBOSE(("ibcm CPC not supported with XRC receive queues, please try xoob CPC; skipped"));
|
||||
BTL_VERBOSE(("ibcm CPC not supported with XRC receive queues, please try xoob CPC; skipped on %s:%d",
|
||||
ibv_get_device_name(btl->device->ib_dev),
|
||||
openib_btl->port_num));
|
||||
rc = OMPI_ERR_NOT_SUPPORTED;
|
||||
goto error;
|
||||
}
|
||||
@ -754,9 +758,10 @@ static int ibcm_component_query(mca_btl_openib_module_t *btl,
|
||||
rc = OMPI_ERR_NOT_SUPPORTED;
|
||||
goto error;
|
||||
}
|
||||
OPAL_OUTPUT((-1, "opened ibcm device 0x%" PRIx64 " (%s)",
|
||||
OPAL_OUTPUT((-1, "opened ibcm device 0x%" PRIx64 " (%s:%d)",
|
||||
(uint64_t) cmh->cm_device,
|
||||
ibv_get_device_name(cmh->ib_context->device)));
|
||||
ibv_get_device_name(cmh->ib_context->device),
|
||||
openib_btl->port_num));
|
||||
|
||||
if (0 != (rc = ib_cm_create_id(cmh->cm_device,
|
||||
&cmh->listen_cm_id, NULL))) {
|
||||
@ -808,10 +813,11 @@ static int ibcm_component_query(mca_btl_openib_module_t *btl,
|
||||
different formula). Query for the Nth GID (N = MCA param) on
|
||||
the port. */
|
||||
if (ibcm_gid_table_index > btl->ib_port_attr.gid_tbl_len) {
|
||||
BTL_ERROR(("desired GID table index (%d) is larger than the actual table size (%d) on device %s",
|
||||
BTL_ERROR(("desired GID table index (%d) is larger than the actual table size (%d) on %s:%d",
|
||||
ibcm_gid_table_index,
|
||||
btl->ib_port_attr.gid_tbl_len,
|
||||
ibv_get_device_name(btl->device->ib_dev)));
|
||||
ibv_get_device_name(btl->device->ib_dev),
|
||||
btl->port_num));
|
||||
rc = OMPI_ERR_UNREACH;
|
||||
goto error;
|
||||
}
|
||||
@ -842,19 +848,22 @@ static int ibcm_component_query(mca_btl_openib_module_t *btl,
|
||||
|
||||
/* All done */
|
||||
*cpc = (ompi_btl_openib_connect_base_module_t *) m;
|
||||
BTL_VERBOSE(("available for use on %s",
|
||||
ibv_get_device_name(btl->device->ib_dev)));
|
||||
BTL_VERBOSE(("available for use on %s:%d",
|
||||
ibv_get_device_name(btl->device->ib_dev),
|
||||
btl->port_num));
|
||||
TIMER_STOP(QUERY);
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
error:
|
||||
ibcm_module_finalize(btl, (ompi_btl_openib_connect_base_module_t *) m);
|
||||
if (OMPI_ERR_NOT_SUPPORTED == rc) {
|
||||
BTL_VERBOSE(("unavailable for use on %s; skipped",
|
||||
ibv_get_device_name(btl->device->ib_dev)));
|
||||
BTL_VERBOSE(("unavailable for use on %s:%d; skipped",
|
||||
ibv_get_device_name(btl->device->ib_dev),
|
||||
btl->port_num));
|
||||
} else {
|
||||
BTL_VERBOSE(("unavailable for use on %s; fatal error %d (%s)",
|
||||
ibv_get_device_name(btl->device->ib_dev), rc,
|
||||
BTL_VERBOSE(("unavailable for use on %s:%d; fatal error %d (%s)",
|
||||
ibv_get_device_name(btl->device->ib_dev),
|
||||
btl->port_num, rc,
|
||||
opal_strerror(rc)));
|
||||
}
|
||||
return rc;
|
||||
@ -923,6 +932,7 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
|
||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"inline truncated", orte_process_info.nodename,
|
||||
ibv_get_device_name(openib_btl->device->ib_dev),
|
||||
openib_btl->port_num,
|
||||
req_inline, init_attr.cap.max_inline_data);
|
||||
} else {
|
||||
endpoint->qps[qp].ib_inline_max = req_inline;
|
||||
|
@ -122,15 +122,18 @@ static int oob_component_query(mca_btl_openib_module_t *btl,
|
||||
#if defined(HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE)
|
||||
if (IBV_TRANSPORT_IB != btl->device->ib_dev->transport_type) {
|
||||
opal_output_verbose(5, mca_btl_base_output,
|
||||
"openib BTL: oob CPC only supported on InfiniBand; skipped on device %s",
|
||||
ibv_get_device_name(btl->device->ib_dev));
|
||||
"openib BTL: oob CPC only supported on InfiniBand; skipped on %s:%d",
|
||||
ibv_get_device_name(btl->device->ib_dev),
|
||||
btl->port_num);
|
||||
return OMPI_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (mca_btl_openib_component.num_xrc_qps > 0) {
|
||||
opal_output_verbose(5, mca_btl_base_output,
|
||||
"openib BTL: oob CPC not supported with XRC receive queues, please try xoob CPC; skipped");
|
||||
"openib BTL: oob CPC not supported with XRC receive queues, please try xoob CPC; skipped on %s:%d",
|
||||
ibv_get_device_name(btl->device->ib_dev),
|
||||
btl->port_num);
|
||||
return OMPI_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
/* If this btl supports OOB, then post the RML message. But
|
||||
@ -171,8 +174,9 @@ static int oob_component_query(mca_btl_openib_module_t *btl,
|
||||
(*cpc)->cbm_uses_cts = false;
|
||||
|
||||
opal_output_verbose(5, mca_btl_base_output,
|
||||
"openib BTL: oob CPC available for use on %s",
|
||||
ibv_get_device_name(btl->device->ib_dev));
|
||||
"openib BTL: oob CPC available for use on %s:%d",
|
||||
ibv_get_device_name(btl->device->ib_dev),
|
||||
btl->port_num);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -468,6 +472,7 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
|
||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"inline truncated", true, orte_process_info.nodename,
|
||||
ibv_get_device_name(openib_btl->device->ib_dev),
|
||||
openib_btl->port_num,
|
||||
req_inline, init_attr.cap.max_inline_data);
|
||||
} else {
|
||||
endpoint->qps[qp].ib_inline_max = req_inline;
|
||||
|
@ -427,6 +427,7 @@ static int rdmacm_setup_qp(rdmacm_contents_t *contents,
|
||||
"inline truncated", true,
|
||||
orte_process_info.nodename,
|
||||
ibv_get_device_name(contents->openib_btl->device->ib_dev),
|
||||
contents->openib_btl->port_num,
|
||||
req_inline, attr.cap.max_inline_data);
|
||||
} else {
|
||||
endpoint->qps[qpnum].ib_inline_max = req_inline;
|
||||
@ -1752,7 +1753,9 @@ static int rdmacm_component_query(mca_btl_openib_module_t *openib_btl, ompi_btl_
|
||||
|
||||
/* RDMACM is not supported if we have any XRC QPs */
|
||||
if (mca_btl_openib_component.num_xrc_qps > 0) {
|
||||
BTL_VERBOSE(("rdmacm CPC not supported with XRC receive queues, please try xoob CPC; skipped"));
|
||||
BTL_VERBOSE(("rdmacm CPC not supported with XRC receive queues, please try xoob CPC; skipped on %s:%d",
|
||||
ibv_get_device_name(openib_btl->device->ib_dev),
|
||||
openib_btl->port_num));
|
||||
rc = OMPI_ERR_NOT_SUPPORTED;
|
||||
goto out;
|
||||
}
|
||||
@ -1853,8 +1856,9 @@ static int rdmacm_component_query(mca_btl_openib_module_t *openib_btl, ompi_btl_
|
||||
opal_list_append(&server_listener_list, &(server->super));
|
||||
|
||||
opal_output_verbose(5, mca_btl_base_output,
|
||||
"openib BTL: rdmacm CPC available for use on %s",
|
||||
ibv_get_device_name(openib_btl->device->ib_dev));
|
||||
"openib BTL: rdmacm CPC available for use on %s:%d",
|
||||
ibv_get_device_name(openib_btl->device->ib_dev),
|
||||
openib_btl->port_num);
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
out5:
|
||||
@ -1869,12 +1873,14 @@ out1:
|
||||
out:
|
||||
if (OMPI_ERR_NOT_SUPPORTED == rc) {
|
||||
opal_output_verbose(5, mca_btl_base_output,
|
||||
"openib BTL: rdmacm CPC unavailable for use on %s; skipped",
|
||||
ibv_get_device_name(openib_btl->device->ib_dev));
|
||||
"openib BTL: rdmacm CPC unavailable for use on %s:%d; skipped",
|
||||
ibv_get_device_name(openib_btl->device->ib_dev),
|
||||
openib_btl->port_num);
|
||||
} else {
|
||||
opal_output_verbose(5, mca_btl_base_output,
|
||||
"openib BTL: rmacm CPC unavailable for use on %s; fatal error %d (%s)",
|
||||
ibv_get_device_name(openib_btl->device->ib_dev), rc,
|
||||
"openib BTL: rmacm CPC unavailable for use on %s:%d; fatal error %d (%s)",
|
||||
ibv_get_device_name(openib_btl->device->ib_dev),
|
||||
openib_btl->port_num, rc,
|
||||
opal_strerror(rc));
|
||||
}
|
||||
return rc;
|
||||
|
@ -414,6 +414,7 @@ static int xoob_send_qp_create (mca_btl_base_endpoint_t* endpoint)
|
||||
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"inline truncated", orte_process_info.nodename,
|
||||
ibv_get_device_name(openib_btl->device->ib_dev),
|
||||
openib_btl->port_num,
|
||||
req_inline, qp_init_attr.cap.max_inline_data);
|
||||
} else {
|
||||
endpoint->qps[0].ib_inline_max = req_inline;
|
||||
@ -956,8 +957,9 @@ static int xoob_component_query(mca_btl_openib_module_t *openib_btl,
|
||||
|
||||
if (mca_btl_openib_component.num_xrc_qps <= 0) {
|
||||
opal_output_verbose(5, mca_btl_base_output,
|
||||
"openib BTL: xoob CPC only supported with XRC receive queues; skipped on device %s",
|
||||
ibv_get_device_name(openib_btl->device->ib_dev));
|
||||
"openib BTL: xoob CPC only supported with XRC receive queues; skipped on %s:%d",
|
||||
ibv_get_device_name(openib_btl->device->ib_dev),
|
||||
openib_btl->port_num);
|
||||
return OMPI_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
@ -998,8 +1000,9 @@ static int xoob_component_query(mca_btl_openib_module_t *openib_btl,
|
||||
(*cpc)->cbm_uses_cts = false;
|
||||
|
||||
opal_output_verbose(5, mca_btl_base_output,
|
||||
"openib BTL: xoob CPC available for use on %s",
|
||||
ibv_get_device_name(openib_btl->device->ib_dev));
|
||||
"openib BTL: xoob CPC available for use on %s:%d",
|
||||
ibv_get_device_name(openib_btl->device->ib_dev),
|
||||
openib_btl->port_num);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -17,6 +17,7 @@ support) will be disabled for this port.
|
||||
|
||||
Local host: %s
|
||||
Local device: %s
|
||||
Local port: %d
|
||||
CPCs attempted: %s
|
||||
#
|
||||
[cpc name not found]
|
||||
@ -36,5 +37,6 @@ a smaller inline data value than was requested.
|
||||
|
||||
Local host: %s
|
||||
Local device: %s
|
||||
Local port: %d
|
||||
Requested value: %d
|
||||
Value used by device: %d
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user