Local GID table contains not what I thought it contains. It contains local HCA
GIDs (there can be more than one) and not GIDs of the HCA on the network. Entry zero always have to be initialized so we use it, and warn user if there is more then one port active and default subnet is configured on at least one of them. This commit was SVN r11815.
Этот коммит содержится в:
родитель
8943f583bf
Коммит
7b1b4f95e3
@ -48,6 +48,7 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
#define MCA_BTL_IB_LEAVE_PINNED 1
|
||||
#define IB_DEFAULT_GID_PREFIX 0xfe80000000000000ll
|
||||
|
||||
/**
|
||||
* Infiniband (IB) BTL component.
|
||||
@ -133,6 +134,9 @@ struct mca_btl_openib_component_t {
|
||||
/** Whether we want a warning if no HCA-specific parameters are
|
||||
found in INI files */
|
||||
bool warn_no_hca_params_found;
|
||||
/** Whether we want a warning if non default GID prefix is not configured
|
||||
on multiport setup */
|
||||
bool warn_default_gid_prefix;
|
||||
}; typedef struct mca_btl_openib_component_t mca_btl_openib_component_t;
|
||||
|
||||
OMPI_MODULE_DECLSPEC extern mca_btl_openib_component_t mca_btl_openib_component;
|
||||
|
@ -241,6 +241,17 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_hca_t *hca,
|
||||
mca_btl_openib_module_t *openib_btl;
|
||||
mca_btl_base_selected_module_t *ib_selected;
|
||||
union ibv_gid gid;
|
||||
uint64_t subnet;
|
||||
|
||||
ibv_query_gid(hca->ib_dev_context, port_num, 0, &gid);
|
||||
subnet = ntoh64(gid.global.subnet_prefix);
|
||||
|
||||
if(mca_btl_openib_component.ib_num_btls > 0 &&
|
||||
IB_DEFAULT_GID_PREFIX == subnet &&
|
||||
mca_btl_openib_component.warn_default_gid_prefix) {
|
||||
opal_show_help("help-mpi-btl-openib.txt", "default subnet prefix",
|
||||
true, orte_system_info.nodename);
|
||||
}
|
||||
|
||||
lmc = (1 << ib_port_attr->lmc);
|
||||
|
||||
@ -268,9 +279,7 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_hca_t *hca,
|
||||
openib_btl->lid = lid;
|
||||
openib_btl->src_path_bits = lid - ib_port_attr->lid;
|
||||
/* store the subnet for multi-nic support */
|
||||
ibv_query_gid(hca->ib_dev_context, port_num, ib_port_attr->sm_lid,
|
||||
&gid);
|
||||
openib_btl->port_info.subnet = gid.global.subnet_prefix;
|
||||
openib_btl->port_info.subnet = subnet;
|
||||
openib_btl->port_info.mtu = hca->mtu;
|
||||
openib_btl->ib_reg[MCA_BTL_TAG_BTL].cbfunc = btl_openib_control;
|
||||
openib_btl->ib_reg[MCA_BTL_TAG_BTL].cbdata = NULL;
|
||||
|
@ -112,7 +112,10 @@ int btl_openib_register_mca_params(void)
|
||||
"Warn when no HCA-specific parameters are found in the INI file specified by the btl_openib_hca_param_files MCA parameter (0 = do not warn; any other value = warn)",
|
||||
1, &ival, 0));
|
||||
mca_btl_openib_component.warn_no_hca_params_found = (0 != ival);
|
||||
|
||||
CHECK(reg_int("warn_default_gid_prefix",
|
||||
"Warn when there is more than one active ports and at least one of them connected to the network with only default GID prefix configured (0 = do not warn; any other value = warn)",
|
||||
1, &ival, 0));
|
||||
mca_btl_openib_component.warn_default_gid_prefix = (0 != ival);
|
||||
asprintf(&str, "%s/mca-btl-openib-hca-params.ini", PKGDATADIR);
|
||||
if (NULL == str) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
|
@ -112,3 +112,10 @@ no active ports detected. This is most certainly not what you wanted.
|
||||
Check your cables and SM configuration.
|
||||
[error in hca init]
|
||||
WARNING: There were errors during IB HCA initialization on host '%s'.
|
||||
[default subnet prefix]
|
||||
WARNING: There are more than one active ports on host '%s', but non
|
||||
default GID prefix is not configured on one or more subnets. This
|
||||
configuration may fail. Please assign unique GID prefix to all subnets.
|
||||
|
||||
NOTE: You can turn off this warning by setting the MCA parameter
|
||||
btl_openib_warn_default_gid_prefix to 0.
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user