1
1

Local GID table contains not what I thought it contains. It contains local HCA

GIDs (there can be more than one) and not GIDs of the HCA on the network. Entry
zero always have to be initialized so we use it, and warn user if there is more
then one port active and default subnet is configured on at least one of them.

This commit was SVN r11815.
Этот коммит содержится в:
Gleb Natapov 2006-09-26 12:12:33 +00:00
родитель 8943f583bf
Коммит 7b1b4f95e3
4 изменённых файлов: 27 добавлений и 4 удалений

Просмотреть файл

@ -48,6 +48,7 @@ extern "C" {
#endif
#define MCA_BTL_IB_LEAVE_PINNED 1
#define IB_DEFAULT_GID_PREFIX 0xfe80000000000000ll
/**
* Infiniband (IB) BTL component.
@ -133,6 +134,9 @@ struct mca_btl_openib_component_t {
/** Whether we want a warning if no HCA-specific parameters are
found in INI files */
bool warn_no_hca_params_found;
/** Whether we want a warning if non default GID prefix is not configured
on multiport setup */
bool warn_default_gid_prefix;
}; typedef struct mca_btl_openib_component_t mca_btl_openib_component_t;
OMPI_MODULE_DECLSPEC extern mca_btl_openib_component_t mca_btl_openib_component;

Просмотреть файл

@ -241,6 +241,17 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_hca_t *hca,
mca_btl_openib_module_t *openib_btl;
mca_btl_base_selected_module_t *ib_selected;
union ibv_gid gid;
uint64_t subnet;
ibv_query_gid(hca->ib_dev_context, port_num, 0, &gid);
subnet = ntoh64(gid.global.subnet_prefix);
if(mca_btl_openib_component.ib_num_btls > 0 &&
IB_DEFAULT_GID_PREFIX == subnet &&
mca_btl_openib_component.warn_default_gid_prefix) {
opal_show_help("help-mpi-btl-openib.txt", "default subnet prefix",
true, orte_system_info.nodename);
}
lmc = (1 << ib_port_attr->lmc);
@ -268,9 +279,7 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_hca_t *hca,
openib_btl->lid = lid;
openib_btl->src_path_bits = lid - ib_port_attr->lid;
/* store the subnet for multi-nic support */
ibv_query_gid(hca->ib_dev_context, port_num, ib_port_attr->sm_lid,
&gid);
openib_btl->port_info.subnet = gid.global.subnet_prefix;
openib_btl->port_info.subnet = subnet;
openib_btl->port_info.mtu = hca->mtu;
openib_btl->ib_reg[MCA_BTL_TAG_BTL].cbfunc = btl_openib_control;
openib_btl->ib_reg[MCA_BTL_TAG_BTL].cbdata = NULL;

Просмотреть файл

@ -112,7 +112,10 @@ int btl_openib_register_mca_params(void)
"Warn when no HCA-specific parameters are found in the INI file specified by the btl_openib_hca_param_files MCA parameter (0 = do not warn; any other value = warn)",
1, &ival, 0));
mca_btl_openib_component.warn_no_hca_params_found = (0 != ival);
CHECK(reg_int("warn_default_gid_prefix",
"Warn when there is more than one active ports and at least one of them connected to the network with only default GID prefix configured (0 = do not warn; any other value = warn)",
1, &ival, 0));
mca_btl_openib_component.warn_default_gid_prefix = (0 != ival);
asprintf(&str, "%s/mca-btl-openib-hca-params.ini", PKGDATADIR);
if (NULL == str) {
return OMPI_ERR_OUT_OF_RESOURCE;

Просмотреть файл

@ -112,3 +112,10 @@ no active ports detected. This is most certainly not what you wanted.
Check your cables and SM configuration.
[error in hca init]
WARNING: There were errors during IB HCA initialization on host '%s'.
[default subnet prefix]
WARNING: There are more than one active ports on host '%s', but non
default GID prefix is not configured on one or more subnets. This
configuration may fail. Please assign unique GID prefix to all subnets.
NOTE: You can turn off this warning by setting the MCA parameter
btl_openib_warn_default_gid_prefix to 0.