# # Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2006-2008 Mellanox Technologies. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # This is the default NIC/HCA parameters file for Open MPI's OpenIB # BTL. If NIC/HCA vendors wish to add their respective values into # this file (that is distributed with Open MPI), please contact the # Open MPI development team. See http://www.open-mpi.org/ for # details. # This file is in the "ini" style, meaning that it has sections # identified section names enclosed in square brackets (e.g., # "[Section name]") followed by "key = value" pairs indicating values # for a specific NIC/HCA vendor and model. NICs/HCAs are identified # by their vendor ID and vendor part ID, which can be obtained by # running the diagnostic utility command "ibv_devinfo". The fields # "vendor_id" and "vendor_part"id" are the vendor ID and vendor part # ID, respectively. # The sections in this file only accept a few fields: # vendor_id: a comma-delimited list of integers of NIC/HCA vendor IDs, # expressed either in decimal or hexidecimal (e.g., "13" or "0xd"). # Individual values can be taken directly from the output of # "ibv_devinfo". NIC/HCA vendor ID's correspond to IEEE OUI's, for # which you can find the canonical list here: # http://standards.ieee.org/regauth/oui/. Example: # # vendor_id = 0x05ad # # Note: Several vendors resell Mellanox hardware and put their own firmware # on the cards, therefore overriding the default Mellanox vendor ID. # # Mellanox 0x02c9 # Cisco 0x05ad # Silverstorm 0x066a # Voltaire 0x08f1 # HP 0x1708 # Sun 0x03ba # vendor_part_id: a comma-delimited list of integers of different # NIC/HCA models from a single vendor, expressed in either decimal or # hexidecimal (e.g., "13" or "0xd"). Individual values can be # obtained from the output of the "ibv_devinfo". Example: # # vendor_part_id = 25208,25218 # mtu: an integer indicating the maximum transfer unit (MTU) to be # used with this NIC/HCA. The effective MTU will be the minimum of an # NIC's/HCA's MTU value and its peer NIC's/HCA's MTU value. Valid # values are 256, 512, 1024, 2048, and 4096. Example: # # mtu = 1024 # use_eager_rdma: an integer indicating whether RDMA should be used # for eager messages. 0 values indicate "no" (false); non-zero values # indicate "yes" (true). This flag should only be enabled for # NICs/HCAs that can provide guarantees about ordering of data in # memory -- that the last byte of an incoming RDMA write will always # be written last. Certain cards cannot provide this guarantee, while # others can. # use_eager_rdma = 1 # receive_queues: a list of "bucket shared receive queues" (BSRQ) that # are opened between MPI process peer pairs for point-to-point # communications of messages shorter than the total length required # for RDMA transfer. The use of multiple RQs, each with different # sized posted receive buffers can allow [much] better registered # memory utilization -- MPI messages are sent on the QP with the # smallest buffer size that will fit the message. Note that flow # control messages are always sent across the QP with the smallest # buffer size. Also note that the buffers *must* be listed in # increasing buffer size. This parameter matches the # mca_btl_openib_receive_queues MCA parameter; see the ompi_info help # message and FAQ for a description of its values. BSRQ # specifications are found in this precedence: # highest: specifying the mca_btl_openib_receive_queues MCA param # next: finding a value in this file # lowest: using the default mca_btl_openib_receive_queues MCA param value # receive_queues = P,128,256,192,128:S,65536,256,192,128 # max_inline_data: an integer specifying the maximum inline data (in # bytes) supported by the device. -1 means to use a run-time probe to # figure out the maximum value supported by the device. # max_inline_data = 1024 # rdmacm_reject_causes_connect_error: a boolean indicating whether # when an RDMA CM REJECT is issued on the device, instead of getting # the expected REJECT event back, you might get a CONNECT_ERROR event. # Open MPI uses RDMA CM REJECT messages in its normal wireup # procedure; some connections are *expected* to be rejected. However, # with some older drivers, if process A issues a REJECT, process B # will receive a CONNECT_ERROR event instead of a REJECT event. So if # this flag is set to true and we receive a CONNECT_ERROR event on a # connection where we are expecting a REJECT, then just treat the # CONNECT_ERROR exactly as we would have treated the REJECT. Setting # this flag to true allows Open MPI to work around the behavior # described above. It is [mostly] safe to set this flag to true even # after a driver has been fixed; the scope of where this flag is used # is small enough that it *shouldn't* mask real CONNECT_ERROR events. # rdmacm_reject_causes_connect_error = 1 ############################################################################ [default] # These are the default values, identified by the vendor and part ID # numbers of 0 and 0. If queried NIC/HCA does not return vendor and # part ID numbers that match any of the sections in this file, the # values in this section are used. Vendor IDs and part IDs can be hex # or decimal. vendor_id = 0 vendor_part_id = 0 use_eager_rdma = 0 mtu = 1024 max_inline_data = 128 ############################################################################ [Mellanox Tavor Infinihost] vendor_id = 0x2c9,0x5ad,0x66a,0x8f1,0x1708,0x03ba vendor_part_id = 23108 use_eager_rdma = 1 mtu = 1024 max_inline_data = 128 ############################################################################ [Mellanox Arbel InfiniHost III MemFree/Tavor] vendor_id = 0x2c9,0x5ad,0x66a,0x8f1,0x1708,0x03ba vendor_part_id = 25208,25218 use_eager_rdma = 1 mtu = 1024 max_inline_data = 128 ############################################################################ [Mellanox Sinai Infinihost III] vendor_id = 0x2c9,0x5ad,0x66a,0x8f1,0x1708,0x03ba vendor_part_id = 25204,24204 use_eager_rdma = 1 mtu = 2048 max_inline_data = 128 ############################################################################ # A.k.a. ConnectX [Mellanox Hermon] vendor_id = 0x2c9,0x5ad,0x66a,0x8f1,0x1708,0x03ba,0x15b3 vendor_part_id = 25408,25418,25428,26418,26428,25448,26438,26448,26468,26478,26488 use_eager_rdma = 1 mtu = 2048 max_inline_data = 128 ############################################################################ [IBM eHCA 4x and 12x] vendor_id = 0x5076 vendor_part_id = 0 use_eager_rdma = 1 mtu = 2048 receive_queues = P,128,256,192,128:P,65536,256,192,128 max_inline_data = 0 ############################################################################ [IBM eHCA-2 4x and 12x] vendor_id = 0x5076 vendor_part_id = 1 use_eager_rdma = 1 mtu = 4096 receive_queues = P,128,256,192,128:P,65536,256,192,128 max_inline_data = 0 ############################################################################ # See http://lists.openfabrics.org/pipermail/general/2008-June/051920.html # 0x1fc1 and 0x1077 are PCI ID's; at least one of QL's OUIs is 0x1175 [QLogic InfiniPath 1] vendor_id = 0x1fc1,0x1077,0x1175 vendor_part_id = 13 use_eager_rdma = 1 mtu = 2048 max_inline_data = 0 [QLogic InfiniPath 2] vendor_id = 0x1fc1,0x1077,0x1175 vendor_part_id = 16,29216 use_eager_rdma = 1 mtu = 4096 max_inline_data = 0 [QLogic InfiniPath 3] vendor_id = 0x1fc1,0x1077,0x1175 vendor_part_id = 16,29474 use_eager_rdma = 1 mtu = 4096 max_inline_data = 0 ############################################################################ # Chelsio's OUI is 0x0743. 0x1425 is the PCI ID. [Chelsio T3] vendor_id = 0x1425 vendor_part_id = 0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0030,0x0031,0x0032,0x0035,0x0036 use_eager_rdma = 1 mtu = 2048 receive_queues = P,65536,256,192,128 max_inline_data = 64 ############################################################################ # I'm *assuming* that 0x4040 is the PCI ID... [NetXen] vendor_id = 0x4040 vendor_part_id = 0x0001,0x0002,0x0003,0x0004,0x0005,0x0024,0x0025,0x0100 use_eager_rdma = 1 mtu = 2048 receive_queues = P,65536,248,192,128 max_inline_data = 64 ############################################################################ # NetEffect's OUI is 0x1255. 0x1678 is the PCI ID. # ...but then NetEffect was bought by Intel. Still waiting for # new OUI/PCI ID and part_id values from Intel. [NetEffect NE020] vendor_id = 0x1678,0x1255 vendor_part_id = 0x0100 use_eager_rdma = 1 mtu = 2048 receive_queues = P,128,256,192,128:P,65536,256,192,128 max_inline_data = 64 rdmacm_reject_causes_connect_error = 1