From 91bdbc067344cc39797a2ef53112c124c866a09a Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Wed, 30 Aug 2006 20:21:47 +0000 Subject: [PATCH] This commit fixes a few things. It looks bigger than it is because a bunch of code changed indenting level and some code got moved out of one function and made into its own subroutine. - Gleb pointed out that I wasn't taking into account values from the default section of the INI file (and not finding values in the INI file is not an error). - I incorrectly thought that 0x5ad was Mellanox's vendor ID. Turns out that 0x5ad is Cisco's ID, while 0x2c9 is Mellanox. Specifically, Cisco burns its own firmware into the HCA which replaces the vendor ID, although the part ID stays the same. So it's Mellanox hardware with Cisco firmware. And apparently several of us do that. :-) So I expanded the concept of the vendor_id in the INI file to allow for lists of vendor IDs. - Along with that, I updated the default INI file to list all the IB vendors (that I am aware of -- certainly open to putting more data in there from other vendors) who overwrite Mellanox's vendor_id with their own for the part numbers that we have on file. This commit was SVN r11506. --- ompi/mca/btl/openib/btl_openib_component.c | 84 +++++---- ompi/mca/btl/openib/btl_openib_ini.c | 167 ++++++++++-------- .../btl/openib/mca-btl-openib-hca-params.ini | 18 +- 3 files changed, 162 insertions(+), 107 deletions(-) diff --git a/ompi/mca/btl/openib/btl_openib_component.c b/ompi/mca/btl/openib/btl_openib_component.c index 04adead708..a8ab1b0890 100644 --- a/ompi/mca/btl/openib/btl_openib_component.c +++ b/ompi/mca/btl/openib/btl_openib_component.c @@ -68,6 +68,8 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev); static mca_btl_base_module_t **btl_openib_component_init( int *num_btl_modules, bool enable_progress_threads, bool enable_mpi_threads); +static void merge_values(ompi_btl_openib_ini_values_t *target, + ompi_btl_openib_ini_values_t *src); static int btl_openib_handle_incoming_hp(mca_btl_openib_module_t *openib_btl, mca_btl_openib_endpoint_t *endpoint, mca_btl_openib_frag_t *frag, @@ -280,7 +282,7 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev) mca_btl_openib_hca_t *hca; uint8_t i; int ret = -1; - ompi_btl_openib_ini_values_t values; + ompi_btl_openib_ini_values_t values, default_values; hca = malloc(sizeof(mca_btl_openib_hca_t)); if(NULL == hca){ @@ -303,10 +305,16 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev) goto close_hca; } - /* Load in vendor/part-specific HCA parameters. */ + /* Load in vendor/part-specific HCA parameters. Note that even if + we don't find values for this vendor/part, "values" will be set + indicating that it does not have good values */ ret = ompi_btl_openib_ini_query(hca->ib_dev_attr.vendor_id, hca->ib_dev_attr.vendor_part_id, &values); + if (OMPI_SUCCESS != ret && OMPI_ERR_NOT_FOUND != ret) { + /* If we get a serious error, propagate it upwards */ + goto close_hca; + } if (OMPI_ERR_NOT_FOUND == ret) { /* If we didn't find a matching HCA in the INI files, output a warning that we're using default values (unless overridden @@ -318,36 +326,42 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev) hca->ib_dev_attr.vendor_id, hca->ib_dev_attr.vendor_part_id); } - hca->mtu = mca_btl_openib_component.ib_mtu; - } else if (OMPI_SUCCESS != ret) { - /* We had some other error that wasn't good -- we should abort - upwards */ + } + /* Note that even if we don't find default values, "values" will + be set indicating that it does not have good values */ + ret = ompi_btl_openib_ini_query(0, 0, &default_values); + if (OMPI_SUCCESS != ret && OMPI_ERR_NOT_FOUND != ret) { + /* If we get a serious error, propagate it upwards */ goto close_hca; - } else { - /* If we did find values for this HCA, handle them */ - if (values.mtu_set) { - switch (values.mtu) { - case 256: - hca->mtu = IBV_MTU_256; - break; - case 512: - hca->mtu = IBV_MTU_512; - break; - case 1024: - hca->mtu = IBV_MTU_1024; - break; - case 2048: - hca->mtu = IBV_MTU_2048; - break; - case 4096: - hca->mtu = IBV_MTU_4096; - break; - default: - BTL_ERROR(("invalid MTU value specified in INI file (%d); ignored\n", values.mtu)); - hca->mtu = mca_btl_openib_component.ib_mtu; - break; - } + } + + /* If we did find values for this HCA (or in the defaults + section), handle them */ + merge_values(&values, &default_values); + if (values.mtu_set) { + switch (values.mtu) { + case 256: + hca->mtu = IBV_MTU_256; + break; + case 512: + hca->mtu = IBV_MTU_512; + break; + case 1024: + hca->mtu = IBV_MTU_1024; + break; + case 2048: + hca->mtu = IBV_MTU_2048; + break; + case 4096: + hca->mtu = IBV_MTU_4096; + break; + default: + BTL_ERROR(("invalid MTU value specified in INI file (%d); ignored\n", values.mtu)); + hca->mtu = mca_btl_openib_component.ib_mtu; + break; } + } else { + hca->mtu = mca_btl_openib_component.ib_mtu; } hca->ib_pd = ibv_alloc_pd(hca->ib_dev_context); @@ -658,6 +672,16 @@ btl_openib_component_init(int *num_btl_modules, } +static void merge_values(ompi_btl_openib_ini_values_t *target, + ompi_btl_openib_ini_values_t *src) +{ + if (!target->mtu_set && src->mtu_set) { + target->mtu = src->mtu; + target->mtu_set = true; + } +} + + static int btl_openib_handle_incoming_hp(mca_btl_openib_module_t *openib_btl, mca_btl_openib_endpoint_t *endpoint, mca_btl_openib_frag_t *frag, diff --git a/ompi/mca/btl/openib/btl_openib_ini.c b/ompi/mca/btl/openib/btl_openib_ini.c index b5024571cb..ff6426f39a 100644 --- a/ompi/mca/btl/openib/btl_openib_ini.c +++ b/ompi/mca/btl/openib/btl_openib_ini.c @@ -21,6 +21,7 @@ #include #include +#include #include "opal/util/output.h" #include "opal/util/show_help.h" @@ -44,9 +45,9 @@ static size_t key_buffer_len = 0; typedef struct parsed_section_values_t { char *name; - uint32_t vendor_id; - bool vendor_id_set; - + uint32_t *vendor_ids; + int vendor_ids_len; + uint32_t *vendor_part_ids; int vendor_part_ids_len; @@ -90,6 +91,7 @@ static void reset_section(bool had_previous_value, parsed_section_values_t *s); static void reset_values(ompi_btl_openib_ini_values_t *v); static int save_section(parsed_section_values_t *s); static int intify(char *string); +static int intify_list(char *str, uint32_t **values, int *len); static inline void show_help(const char *topic); @@ -306,7 +308,7 @@ cleanup: static int parse_line(parsed_section_values_t *sv) { int val, ret = OMPI_SUCCESS; - char *value, *comma; + char *value; bool showed_unknown_field_warning = false; /* Save the name name */ @@ -359,47 +361,16 @@ static int parse_line(parsed_section_values_t *sv) all whitespace at the beginning and ending of the value. */ if (0 == strcasecmp(key_buffer, "vendor_id")) { - /* Single value */ - sv->vendor_id = (uint32_t) intify(value); - sv->vendor_id_set = true; + if (OMPI_SUCCESS != (ret = intify_list(value, &sv->vendor_ids, + &sv->vendor_ids_len))) { + return ret; + } } else if (0 == strcasecmp(key_buffer, "vendor_part_id")) { - char *str = value; - - /* Comma-delimited list of values */ - comma = strchr(str, ','); - if (NULL == comma) { - /* If we only got one value (i.e., no comma found), then - just make an array of one value and save it */ - sv->vendor_part_ids = malloc(sizeof(uint32_t)); - if (NULL == sv->vendor_part_ids) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - sv->vendor_part_ids[0] = (uint32_t) intify(str); - sv->vendor_part_ids_len = 1; - } else { - /* If we found a comma, loop over all the values. Be a - little clever in that we alwasy alloc enough space for - an extra value so that when we exit the loop, we don't - have to realloc again to get space for the last item. */ - do { - *comma = '\0'; - sv->vendor_part_ids = realloc(sv->vendor_part_ids, - sizeof(uint32_t) * - (sv->vendor_part_ids_len + 2)); - sv->vendor_part_ids[sv->vendor_part_ids_len] = - (int32_t) intify(str); - ++sv->vendor_part_ids_len; - str = comma + 1; - comma = strchr(str, ','); - } while (NULL != comma); - /* Get the last value (i.e., the value after the last - comma, because it won't have been snarfed in the - loop) */ - sv->vendor_part_ids[sv->vendor_part_ids_len] = - (uint32_t) intify(str); - ++sv->vendor_part_ids_len; + if (OMPI_SUCCESS != (ret = intify_list(value, &sv->vendor_part_ids, + &sv->vendor_part_ids_len))) { + return ret; } } @@ -457,14 +428,17 @@ static void reset_section(bool had_previous_value, parsed_section_values_t *s) if (NULL != s->name) { free(s->name); } + if (NULL != s->vendor_ids) { + free(s->vendor_ids); + } if (NULL != s->vendor_part_ids) { free(s->vendor_part_ids); } } s->name = NULL; - s->vendor_id = 0; - s->vendor_id_set = false; + s->vendor_ids = NULL; + s->vendor_ids_len = 0; s->vendor_part_ids = NULL; s->vendor_part_ids_len = 0; @@ -490,46 +464,51 @@ static void reset_values(ompi_btl_openib_ini_values_t *v) */ static int save_section(parsed_section_values_t *s) { - int i; + int i, j; opal_list_item_t *item; hca_values_t *h; bool found; /* Is the parsed section valid? */ - if (NULL == s->name || !s->vendor_id_set || 0 == s->vendor_part_ids_len) { + if (NULL == s->name || 0 == s->vendor_ids_len || + 0 == s->vendor_part_ids_len) { return OMPI_ERR_BAD_PARAM; } - /* Iterate over each of the part IDs in the parsed values */ - for (i = 0; i < s->vendor_part_ids_len; ++i) { - found = false; + /* Iterate over each of the vendor/part IDs in the parsed + values */ + for (i = 0; i < s->vendor_ids_len; ++i) { + for (j = 0; j < s->vendor_part_ids_len; ++j) { + found = false; - /* Iterate over all the saved hcas */ - for (item = opal_list_get_first(&hcas); - item != opal_list_get_end(&hcas); - item = opal_list_get_next(item)) { - h = (hca_values_t*) item; - if (s->vendor_id == h->vendor_id && - s->vendor_part_ids[i] == h->vendor_part_id) { - /* Found a match. Update any newly-set values. */ - if (s->values.mtu_set) { - h->values.mtu = s->values.mtu; - h->values.mtu_set = true; - found = true; - break; + /* Iterate over all the saved hcas */ + for (item = opal_list_get_first(&hcas); + item != opal_list_get_end(&hcas); + item = opal_list_get_next(item)) { + h = (hca_values_t*) item; + if (s->vendor_ids[i] == h->vendor_id && + s->vendor_part_ids[j] == h->vendor_part_id) { + /* Found a match. Update any newly-set values. */ + if (s->values.mtu_set) { + h->values.mtu = s->values.mtu; + h->values.mtu_set = true; + found = true; + break; + } } } - } - /* Did we find/update it in the exising list? If not, create - a new one. */ - if (!found) { - h = OBJ_NEW(hca_values_t); - h->section_name = strdup(s->name); - h->vendor_id = s->vendor_id; - h->vendor_part_id = s->vendor_part_ids[i]; - h->values = s->values; - opal_list_append(&hcas, &h->super); + /* Did we find/update it in the exising list? If not, + create a new one. */ + if (!found) { + h = OBJ_NEW(hca_values_t); + h->section_name = strdup(s->name); + h->vendor_id = s->vendor_ids[i]; + h->vendor_part_id = s->vendor_part_ids[j]; + h->values = s->values; + opal_list_append(&hcas, &h->super); + printf("Created combo: %0x/%d\n", h->vendor_id, h->vendor_part_id); + } } } @@ -560,6 +539,50 @@ static int intify(char *str) } +/* + * Take a comma-delimited list and infity them all + */ +static int intify_list(char *value, uint32_t **values, int *len) +{ + char *comma; + char *str = value; + + *len = 0; + + /* Comma-delimited list of values */ + comma = strchr(str, ','); + if (NULL == comma) { + /* If we only got one value (i.e., no comma found), then + just make an array of one value and save it */ + *values = malloc(sizeof(uint32_t)); + if (NULL == *values) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + *values[0] = (uint32_t) intify(str); + *len = 1; + } else { + /* If we found a comma, loop over all the values. Be a + little clever in that we alwasy alloc enough space for + an extra value so that when we exit the loop, we don't + have to realloc again to get space for the last item. */ + do { + *comma = '\0'; + *values = realloc(*values, sizeof(uint32_t) * (*len + 2)); + (*values)[*len] = (int32_t) intify(str); + ++(*len); + str = comma + 1; + comma = strchr(str, ','); + } while (NULL != comma); + /* Get the last value (i.e., the value after the last + comma, because it won't have been snarfed in the + loop) */ + (*values)[*len] = (uint32_t) intify(str); + ++(*len); + } + + return OMPI_SUCCESS; +} + /* * Trival helper function */ diff --git a/ompi/mca/btl/openib/mca-btl-openib-hca-params.ini b/ompi/mca/btl/openib/mca-btl-openib-hca-params.ini index 457b18d9d5..8c963f73a7 100644 --- a/ompi/mca/btl/openib/mca-btl-openib-hca-params.ini +++ b/ompi/mca/btl/openib/mca-btl-openib-hca-params.ini @@ -47,7 +47,8 @@ # These are the default values, identified by the vendor and part ID # numbers of 0 and 0. If queried HCA does not return vendor and part # ID numbers that match any of the sections in this file, the values -# in this section are used. +# in this section are used. Vendor IDs and part IDs can be hex or +# decimal. vendor_id = 0 vendor_part_id = 0 mtu = 1024 @@ -55,21 +56,28 @@ mtu = 1024 ############################################################################ [Mellanox Tavor Infinihost] -# Vendor ID's can be hex or decimal -vendor_id = 0x5ad +# Several vendors resell Mellanox hardware and put their own firmware +# on the cards, therefore overriding the default Mellanox vendor ID. +# +# Mellanox 0x2c9 +# Cisco 0x5ad +# Silverstorm 0x66a +# Voltaire 0x8f1 +# +vendor_id = 0x2c9,0x5ad,0x66a,0x8f1 vendor_part_id = 2310 mtu = 1024 ############################################################################ [Mellanox Arbel InfiniHost III MemFree/Tavor] -vendor_id = 0x5ad +vendor_id = 0x2c9,0x5ad,0x66a,0x8f1 vendor_part_id = 25208,25218 mtu = 1024 ############################################################################ [Mellanox Sinai Infinihost III] -vendor_id = 0x5ad +vendor_id = 0x2c9,0x5ad,0x66a,0x8f1 vendor_part_id = 25204 mtu = 2048