diff --git a/ompi/mca/btl/openib/btl_openib_component.c b/ompi/mca/btl/openib/btl_openib_component.c index 04adead708..a8ab1b0890 100644 --- a/ompi/mca/btl/openib/btl_openib_component.c +++ b/ompi/mca/btl/openib/btl_openib_component.c @@ -68,6 +68,8 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev); static mca_btl_base_module_t **btl_openib_component_init( int *num_btl_modules, bool enable_progress_threads, bool enable_mpi_threads); +static void merge_values(ompi_btl_openib_ini_values_t *target, + ompi_btl_openib_ini_values_t *src); static int btl_openib_handle_incoming_hp(mca_btl_openib_module_t *openib_btl, mca_btl_openib_endpoint_t *endpoint, mca_btl_openib_frag_t *frag, @@ -280,7 +282,7 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev) mca_btl_openib_hca_t *hca; uint8_t i; int ret = -1; - ompi_btl_openib_ini_values_t values; + ompi_btl_openib_ini_values_t values, default_values; hca = malloc(sizeof(mca_btl_openib_hca_t)); if(NULL == hca){ @@ -303,10 +305,16 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev) goto close_hca; } - /* Load in vendor/part-specific HCA parameters. */ + /* Load in vendor/part-specific HCA parameters. Note that even if + we don't find values for this vendor/part, "values" will be set + indicating that it does not have good values */ ret = ompi_btl_openib_ini_query(hca->ib_dev_attr.vendor_id, hca->ib_dev_attr.vendor_part_id, &values); + if (OMPI_SUCCESS != ret && OMPI_ERR_NOT_FOUND != ret) { + /* If we get a serious error, propagate it upwards */ + goto close_hca; + } if (OMPI_ERR_NOT_FOUND == ret) { /* If we didn't find a matching HCA in the INI files, output a warning that we're using default values (unless overridden @@ -318,36 +326,42 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev) hca->ib_dev_attr.vendor_id, hca->ib_dev_attr.vendor_part_id); } - hca->mtu = mca_btl_openib_component.ib_mtu; - } else if (OMPI_SUCCESS != ret) { - /* We had some other error that wasn't good -- we should abort - upwards */ + } + /* Note that even if we don't find default values, "values" will + be set indicating that it does not have good values */ + ret = ompi_btl_openib_ini_query(0, 0, &default_values); + if (OMPI_SUCCESS != ret && OMPI_ERR_NOT_FOUND != ret) { + /* If we get a serious error, propagate it upwards */ goto close_hca; - } else { - /* If we did find values for this HCA, handle them */ - if (values.mtu_set) { - switch (values.mtu) { - case 256: - hca->mtu = IBV_MTU_256; - break; - case 512: - hca->mtu = IBV_MTU_512; - break; - case 1024: - hca->mtu = IBV_MTU_1024; - break; - case 2048: - hca->mtu = IBV_MTU_2048; - break; - case 4096: - hca->mtu = IBV_MTU_4096; - break; - default: - BTL_ERROR(("invalid MTU value specified in INI file (%d); ignored\n", values.mtu)); - hca->mtu = mca_btl_openib_component.ib_mtu; - break; - } + } + + /* If we did find values for this HCA (or in the defaults + section), handle them */ + merge_values(&values, &default_values); + if (values.mtu_set) { + switch (values.mtu) { + case 256: + hca->mtu = IBV_MTU_256; + break; + case 512: + hca->mtu = IBV_MTU_512; + break; + case 1024: + hca->mtu = IBV_MTU_1024; + break; + case 2048: + hca->mtu = IBV_MTU_2048; + break; + case 4096: + hca->mtu = IBV_MTU_4096; + break; + default: + BTL_ERROR(("invalid MTU value specified in INI file (%d); ignored\n", values.mtu)); + hca->mtu = mca_btl_openib_component.ib_mtu; + break; } + } else { + hca->mtu = mca_btl_openib_component.ib_mtu; } hca->ib_pd = ibv_alloc_pd(hca->ib_dev_context); @@ -658,6 +672,16 @@ btl_openib_component_init(int *num_btl_modules, } +static void merge_values(ompi_btl_openib_ini_values_t *target, + ompi_btl_openib_ini_values_t *src) +{ + if (!target->mtu_set && src->mtu_set) { + target->mtu = src->mtu; + target->mtu_set = true; + } +} + + static int btl_openib_handle_incoming_hp(mca_btl_openib_module_t *openib_btl, mca_btl_openib_endpoint_t *endpoint, mca_btl_openib_frag_t *frag, diff --git a/ompi/mca/btl/openib/btl_openib_ini.c b/ompi/mca/btl/openib/btl_openib_ini.c index b5024571cb..ff6426f39a 100644 --- a/ompi/mca/btl/openib/btl_openib_ini.c +++ b/ompi/mca/btl/openib/btl_openib_ini.c @@ -21,6 +21,7 @@ #include #include +#include #include "opal/util/output.h" #include "opal/util/show_help.h" @@ -44,9 +45,9 @@ static size_t key_buffer_len = 0; typedef struct parsed_section_values_t { char *name; - uint32_t vendor_id; - bool vendor_id_set; - + uint32_t *vendor_ids; + int vendor_ids_len; + uint32_t *vendor_part_ids; int vendor_part_ids_len; @@ -90,6 +91,7 @@ static void reset_section(bool had_previous_value, parsed_section_values_t *s); static void reset_values(ompi_btl_openib_ini_values_t *v); static int save_section(parsed_section_values_t *s); static int intify(char *string); +static int intify_list(char *str, uint32_t **values, int *len); static inline void show_help(const char *topic); @@ -306,7 +308,7 @@ cleanup: static int parse_line(parsed_section_values_t *sv) { int val, ret = OMPI_SUCCESS; - char *value, *comma; + char *value; bool showed_unknown_field_warning = false; /* Save the name name */ @@ -359,47 +361,16 @@ static int parse_line(parsed_section_values_t *sv) all whitespace at the beginning and ending of the value. */ if (0 == strcasecmp(key_buffer, "vendor_id")) { - /* Single value */ - sv->vendor_id = (uint32_t) intify(value); - sv->vendor_id_set = true; + if (OMPI_SUCCESS != (ret = intify_list(value, &sv->vendor_ids, + &sv->vendor_ids_len))) { + return ret; + } } else if (0 == strcasecmp(key_buffer, "vendor_part_id")) { - char *str = value; - - /* Comma-delimited list of values */ - comma = strchr(str, ','); - if (NULL == comma) { - /* If we only got one value (i.e., no comma found), then - just make an array of one value and save it */ - sv->vendor_part_ids = malloc(sizeof(uint32_t)); - if (NULL == sv->vendor_part_ids) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - sv->vendor_part_ids[0] = (uint32_t) intify(str); - sv->vendor_part_ids_len = 1; - } else { - /* If we found a comma, loop over all the values. Be a - little clever in that we alwasy alloc enough space for - an extra value so that when we exit the loop, we don't - have to realloc again to get space for the last item. */ - do { - *comma = '\0'; - sv->vendor_part_ids = realloc(sv->vendor_part_ids, - sizeof(uint32_t) * - (sv->vendor_part_ids_len + 2)); - sv->vendor_part_ids[sv->vendor_part_ids_len] = - (int32_t) intify(str); - ++sv->vendor_part_ids_len; - str = comma + 1; - comma = strchr(str, ','); - } while (NULL != comma); - /* Get the last value (i.e., the value after the last - comma, because it won't have been snarfed in the - loop) */ - sv->vendor_part_ids[sv->vendor_part_ids_len] = - (uint32_t) intify(str); - ++sv->vendor_part_ids_len; + if (OMPI_SUCCESS != (ret = intify_list(value, &sv->vendor_part_ids, + &sv->vendor_part_ids_len))) { + return ret; } } @@ -457,14 +428,17 @@ static void reset_section(bool had_previous_value, parsed_section_values_t *s) if (NULL != s->name) { free(s->name); } + if (NULL != s->vendor_ids) { + free(s->vendor_ids); + } if (NULL != s->vendor_part_ids) { free(s->vendor_part_ids); } } s->name = NULL; - s->vendor_id = 0; - s->vendor_id_set = false; + s->vendor_ids = NULL; + s->vendor_ids_len = 0; s->vendor_part_ids = NULL; s->vendor_part_ids_len = 0; @@ -490,46 +464,51 @@ static void reset_values(ompi_btl_openib_ini_values_t *v) */ static int save_section(parsed_section_values_t *s) { - int i; + int i, j; opal_list_item_t *item; hca_values_t *h; bool found; /* Is the parsed section valid? */ - if (NULL == s->name || !s->vendor_id_set || 0 == s->vendor_part_ids_len) { + if (NULL == s->name || 0 == s->vendor_ids_len || + 0 == s->vendor_part_ids_len) { return OMPI_ERR_BAD_PARAM; } - /* Iterate over each of the part IDs in the parsed values */ - for (i = 0; i < s->vendor_part_ids_len; ++i) { - found = false; + /* Iterate over each of the vendor/part IDs in the parsed + values */ + for (i = 0; i < s->vendor_ids_len; ++i) { + for (j = 0; j < s->vendor_part_ids_len; ++j) { + found = false; - /* Iterate over all the saved hcas */ - for (item = opal_list_get_first(&hcas); - item != opal_list_get_end(&hcas); - item = opal_list_get_next(item)) { - h = (hca_values_t*) item; - if (s->vendor_id == h->vendor_id && - s->vendor_part_ids[i] == h->vendor_part_id) { - /* Found a match. Update any newly-set values. */ - if (s->values.mtu_set) { - h->values.mtu = s->values.mtu; - h->values.mtu_set = true; - found = true; - break; + /* Iterate over all the saved hcas */ + for (item = opal_list_get_first(&hcas); + item != opal_list_get_end(&hcas); + item = opal_list_get_next(item)) { + h = (hca_values_t*) item; + if (s->vendor_ids[i] == h->vendor_id && + s->vendor_part_ids[j] == h->vendor_part_id) { + /* Found a match. Update any newly-set values. */ + if (s->values.mtu_set) { + h->values.mtu = s->values.mtu; + h->values.mtu_set = true; + found = true; + break; + } } } - } - /* Did we find/update it in the exising list? If not, create - a new one. */ - if (!found) { - h = OBJ_NEW(hca_values_t); - h->section_name = strdup(s->name); - h->vendor_id = s->vendor_id; - h->vendor_part_id = s->vendor_part_ids[i]; - h->values = s->values; - opal_list_append(&hcas, &h->super); + /* Did we find/update it in the exising list? If not, + create a new one. */ + if (!found) { + h = OBJ_NEW(hca_values_t); + h->section_name = strdup(s->name); + h->vendor_id = s->vendor_ids[i]; + h->vendor_part_id = s->vendor_part_ids[j]; + h->values = s->values; + opal_list_append(&hcas, &h->super); + printf("Created combo: %0x/%d\n", h->vendor_id, h->vendor_part_id); + } } } @@ -560,6 +539,50 @@ static int intify(char *str) } +/* + * Take a comma-delimited list and infity them all + */ +static int intify_list(char *value, uint32_t **values, int *len) +{ + char *comma; + char *str = value; + + *len = 0; + + /* Comma-delimited list of values */ + comma = strchr(str, ','); + if (NULL == comma) { + /* If we only got one value (i.e., no comma found), then + just make an array of one value and save it */ + *values = malloc(sizeof(uint32_t)); + if (NULL == *values) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + *values[0] = (uint32_t) intify(str); + *len = 1; + } else { + /* If we found a comma, loop over all the values. Be a + little clever in that we alwasy alloc enough space for + an extra value so that when we exit the loop, we don't + have to realloc again to get space for the last item. */ + do { + *comma = '\0'; + *values = realloc(*values, sizeof(uint32_t) * (*len + 2)); + (*values)[*len] = (int32_t) intify(str); + ++(*len); + str = comma + 1; + comma = strchr(str, ','); + } while (NULL != comma); + /* Get the last value (i.e., the value after the last + comma, because it won't have been snarfed in the + loop) */ + (*values)[*len] = (uint32_t) intify(str); + ++(*len); + } + + return OMPI_SUCCESS; +} + /* * Trival helper function */ diff --git a/ompi/mca/btl/openib/mca-btl-openib-hca-params.ini b/ompi/mca/btl/openib/mca-btl-openib-hca-params.ini index 457b18d9d5..8c963f73a7 100644 --- a/ompi/mca/btl/openib/mca-btl-openib-hca-params.ini +++ b/ompi/mca/btl/openib/mca-btl-openib-hca-params.ini @@ -47,7 +47,8 @@ # These are the default values, identified by the vendor and part ID # numbers of 0 and 0. If queried HCA does not return vendor and part # ID numbers that match any of the sections in this file, the values -# in this section are used. +# in this section are used. Vendor IDs and part IDs can be hex or +# decimal. vendor_id = 0 vendor_part_id = 0 mtu = 1024 @@ -55,21 +56,28 @@ mtu = 1024 ############################################################################ [Mellanox Tavor Infinihost] -# Vendor ID's can be hex or decimal -vendor_id = 0x5ad +# Several vendors resell Mellanox hardware and put their own firmware +# on the cards, therefore overriding the default Mellanox vendor ID. +# +# Mellanox 0x2c9 +# Cisco 0x5ad +# Silverstorm 0x66a +# Voltaire 0x8f1 +# +vendor_id = 0x2c9,0x5ad,0x66a,0x8f1 vendor_part_id = 2310 mtu = 1024 ############################################################################ [Mellanox Arbel InfiniHost III MemFree/Tavor] -vendor_id = 0x5ad +vendor_id = 0x2c9,0x5ad,0x66a,0x8f1 vendor_part_id = 25208,25218 mtu = 1024 ############################################################################ [Mellanox Sinai Infinihost III] -vendor_id = 0x5ad +vendor_id = 0x2c9,0x5ad,0x66a,0x8f1 vendor_part_id = 25204 mtu = 2048