1
1

Bring over all the work from the /tmp/ib-hw-detect branch. In

addition to my design and testing, it was conceptually approved by
Gil, Gleb, Pasha, Brad, and Galen.  Functionally [probably somewhat
lightly] tested by Galen.  We may still have to shake out some bugs
during the next few months, but it seems to be working for all the
cases that I can throw at it.

Here's a summary of the changes from that branch: 

* Move MCA parameter registration to a new file (btl_openib_mca.c):
   * Properly check the retun status of registering MCA params
   * Check for valid values of MCA parameters
   * Make help strings better
   * Otherwise, the only default value of an MCA param that was
     changed was max_btls; it went from 4 to -1 (meaning: use all
     available)
 * Properly prototyped internal functions in _component.c
   * Made a bunch of functions static that didn't need to be public
   * Renamed to remove "mca_" prefix from static functions
   * Call new MCA param registration function
   * Call new INI file read/lookup/finalize functions
   * Updated a bunch of macros to be "BTL_" instead of "ORTE_"
   * Be a little more consistent with return values
   * Handle -1 for the max_btls MCA param
   * Fixed a free() that should have been an OBJ_RELEASE()
   * Some re-indenting
 * Added INI-file parsing
   * New flex file: btl_openib_ini.l
   * New default HCA params .ini file (probably to be expanded over
     time by other HCA vendors)
   * Added more show_help messages for parsing problems
   * Read in INI files and cache the values for later lookup
   * When component opens an HCA, lookup to see if any corresponding
     values were found in the INI files (ID'ed by the HCA vendor_id
     and vendor_part_id)
   * Added btl_openib_verbose MCA param that shows what the INI-file
     stuff does (e.g., shows which MTU your HCA ends up using)
   * Added btl_openib_hca_param_files as a colon-delimited list of INI
     files to check for values during startup (in order,
     left-to-right, just like the MCA base directory param).
   * MTU is currently the only value supported in this framework.
   * It is not a fatal error if we don't find params for the HCA in
     the INI file(s).  Instead, just print a warning.  New MCA param
     btl_openib_warn_no_hca_params_found can be used to disable
     printing the warning.
 * Add MTU to peer negotiation when making a connection
   * Exchange maximum MTU; select the lesser of the two

This commit was SVN r11182.
Этот коммит содержится в:
Jeff Squyres 2006-08-14 19:30:37 +00:00
родитель dd6fa1da2a
Коммит 474564a6b1
13 изменённых файлов: 1674 добавлений и 344 удалений

Просмотреть файл

@ -18,11 +18,13 @@
# Use the top-level Makefile.options
AM_CPPFLAGS = $(btl_openib_CPPFLAGS) -DPKGDATADIR=\"$(pkgdatadir)\"
AM_LFLAGS = -Pbtl_openib_ini_yy
LEX_OUTPUT_ROOT = lex.btl_openib_ini_yy
AM_CPPFLAGS=$(btl_openib_CPPFLAGS)
dist_pkgdata_DATA=help-mpi-btl-openib.txt
dist_pkgdata_DATA = \
help-mpi-btl-openib.txt \
mca-btl-openib-hca-params.ini
sources = \
btl_openib.c \
@ -34,8 +36,14 @@ sources = \
btl_openib_frag.h \
btl_openib_proc.c \
btl_openib_proc.h \
btl_openib_eager_rdma.h
btl_openib_eager_rdma.h \
btl_openib_lex.h \
btl_openib_lex.l \
btl_openib_mca.c \
btl_openib_mca.h \
btl_openib_ini.c \
btl_openib_ini.h
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
@ -61,7 +69,6 @@ mca_btl_openib_la_LIBADD = \
$(top_ompi_builddir)/ompi/libmpi.la \
$(top_ompi_builddir)/orte/liborte.la \
$(top_ompi_builddir)/opal/libopal.la
noinst_LTLIBRARIES = $(lib)
libmca_btl_openib_la_SOURCES = $(lib_sources)

Просмотреть файл

@ -1,4 +1,3 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -10,15 +9,16 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
*
* @file
*/
#ifndef MCA_PTL_IB_H
#define MCA_PTL_IB_H
@ -123,6 +123,15 @@ struct mca_btl_openib_component_t {
uint32_t max_lmc;
uint32_t buffer_alignment;
/** Colon-delimited list of filenames for HCA parameters */
char *hca_params_file_names;
/** Whether we're in verbose mode or not */
bool verbose;
/** Whether we want a warning if no HCA-specific parameters are
found in INI files */
bool warn_no_hca_params_found;
}; typedef struct mca_btl_openib_component_t mca_btl_openib_component_t;
extern mca_btl_openib_component_t mca_btl_openib_component;
@ -136,6 +145,8 @@ struct mca_btl_openib_hca_t {
struct ibv_device_attr ib_dev_attr;
struct ibv_pd *ib_pd;
mca_mpool_base_module_t *mpool;
/* MTU for this HCA */
uint32_t mtu;
uint8_t btls; /** < number of btls using this HCA */
};
typedef struct mca_btl_openib_hca_t mca_btl_openib_hca_t;
@ -201,44 +212,6 @@ struct mca_btl_openib_module_t {
struct mca_btl_openib_frag_t;
extern mca_btl_openib_module_t mca_btl_openib_module;
/**
* Register IB component parameters with the MCA framework
*/
extern int mca_btl_openib_component_open(void);
/**
* Any final cleanup before being unloaded.
*/
extern int mca_btl_openib_component_close(void);
/**
* IB component initialization.
*
* @param num_btl_modules (OUT) Number of BTLs returned in BTL array.
* @param allow_multi_user_threads (OUT) Flag indicating wether BTL supports user threads (TRUE)
* @param have_hidden_threads (OUT) Flag indicating wether BTL uses threads (TRUE)
*
* (1) read interface list from kernel and compare against component parameters
* then create a BTL instance for selected interfaces
* (2) setup IB listen socket for incoming connection attempts
* (3) publish BTL addressing info
*
*/
extern mca_btl_base_module_t** mca_btl_openib_component_init(
int *num_btl_modules,
bool allow_multi_user_threads,
bool have_hidden_threads
);
/**
* IB component progress.
*/
extern int mca_btl_openib_component_progress(
void
);
/**
* Register a callback function that is called on receipt
* of a fragment.

Просмотреть файл

@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -31,14 +32,16 @@
#include "opal/mca/base/mca_base_param.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/sys_info.h"
#include "ompi/mca/mpool/base/base.h"
#include "ompi/mca/btl/base/base.h"
#include "btl_openib.h"
#include "btl_openib_frag.h"
#include "btl_openib_endpoint.h"
#include "btl_openib_eager_rdma.h"
#include "btl_openib_proc.h"
#include "ompi/mca/btl/base/base.h"
#include "btl_openib_ini.h"
#include "btl_openib_mca.h"
#include "ompi/datatype/convertor.h"
#include "ompi/mca/mpool/mpool.h"
@ -49,6 +52,30 @@
#include "ompi/mca/pml/base/pml_base_module_exchange.h"
/*
* Local functions
*/
static int btl_openib_component_open(void);
static int btl_openib_component_close(void);
static int btl_openib_modex_send(void);
static void btl_openib_control(struct mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t* descriptor,
void* cbdata);
static int init_one_port(opal_list_t *btl_list, mca_btl_openib_hca_t *hca,
uint8_t port_num, struct ibv_port_attr *ib_port_attr);
static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev);
static mca_btl_base_module_t **btl_openib_component_init(
int *num_btl_modules, bool enable_progress_threads,
bool enable_mpi_threads);
static int btl_openib_handle_incoming_hp(mca_btl_openib_module_t *openib_btl,
mca_btl_openib_endpoint_t *endpoint,
mca_btl_openib_frag_t *frag,
size_t byte_len);
static char* btl_openib_component_status_to_string(enum ibv_wc_status status);
static int btl_openib_component_progress(void);
mca_btl_openib_component_t mca_btl_openib_component = {
{
/* First, the mca_base_component_t struct containing meta information
@ -64,8 +91,8 @@ mca_btl_openib_component_t mca_btl_openib_component = {
OMPI_MAJOR_VERSION, /* MCA component major version */
OMPI_MINOR_VERSION, /* MCA component minor version */
OMPI_RELEASE_VERSION, /* MCA component release version */
mca_btl_openib_component_open, /* component open */
mca_btl_openib_component_close /* component close */
btl_openib_component_open, /* component open */
btl_openib_component_close /* component close */
},
/* Next the MCA v1.0.0 component meta data */
@ -76,182 +103,46 @@ mca_btl_openib_component_t mca_btl_openib_component = {
false
},
mca_btl_openib_component_init,
mca_btl_openib_component_progress,
btl_openib_component_init,
btl_openib_component_progress,
}
};
/*
* utility routines for parameter registration
*/
static inline void mca_btl_openib_param_register_string(
const char* param_name,
const char* param_desc,
const char* default_value,
char** out_value)
{
mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
param_name,
param_desc,
false,
false,
default_value,
out_value);
}
static inline void mca_btl_openib_param_register_int(
const char* param_name,
const char* param_desc,
int default_value,
int* out_value)
{
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
param_name,
param_desc,
false,
false,
default_value,
out_value);
}
/*
* Called by MCA framework to open the component, registers
* component parameters.
*/
int mca_btl_openib_component_open(void)
int btl_openib_component_open(void)
{
char *msg;
int val;
int ret;
/* initialize state */
mca_btl_openib_component.ib_num_btls=0;
mca_btl_openib_component.openib_btls=NULL;
mca_btl_openib_component.ib_num_btls = 0;
mca_btl_openib_component.openib_btls = NULL;
/* initialize objects */
OBJ_CONSTRUCT(&mca_btl_openib_component.ib_procs, opal_list_t);
/* register IB component parameters */
mca_btl_openib_param_register_int ("max_btls", "maximum number of HCAs/ports to use",
4, &mca_btl_openib_component.ib_max_btls);
mca_btl_openib_param_register_int ("free_list_num", "intial size of free lists",
8, &mca_btl_openib_component.ib_free_list_num);
mca_btl_openib_param_register_int ("free_list_max", "maximum size of free lists",
-1, &mca_btl_openib_component.ib_free_list_max);
mca_btl_openib_param_register_int ("free_list_inc", "increment size of free lists",
32, &mca_btl_openib_component.ib_free_list_inc);
mca_btl_openib_param_register_string("mpool", "name of the memory pool to be used",
"openib", &mca_btl_openib_component.ib_mpool_name);
mca_btl_openib_param_register_int("reg_mru_len", "length of the registration cache most recently used list",
16, (int*) &mca_btl_openib_component.reg_mru_len);
mca_btl_openib_param_register_int("use_srq", "if 1 use the IB shared receive queue to post receive descriptors",
0, (int*) &mca_btl_openib_component.use_srq);
mca_btl_openib_param_register_int("ib_cq_size", "size of the IB completion "
"queue, an override of this value may occur if set too small, "
"the override is 2*Number of Peers* btl_openib_rd_num",
1000, (int*) &mca_btl_openib_component.ib_cq_size);
mca_btl_openib_param_register_int("ib_sg_list_size", "size of IB segment list",
4, (int*) &mca_btl_openib_component.ib_sg_list_size);
mca_btl_openib_param_register_int("ib_pkey_ix", "IB pkey index",
0, (int*) &mca_btl_openib_component.ib_pkey_ix);
mca_btl_openib_param_register_int("ib_psn", "IB Packet sequence starting number",
0, (int*) &mca_btl_openib_component.ib_psn);
mca_btl_openib_param_register_int("ib_qp_ous_rd_atom", "IB outstanding atomic reads",
4, (int*) &mca_btl_openib_component.ib_qp_ous_rd_atom);
val = IBV_MTU_1024;
asprintf(&msg, "IB MTU, in bytes. Valid values are: %d=256 bytes, %d=512 bytes, %d=1024 bytes, %d=2048 bytes, %d=4096 bytes.",
IBV_MTU_256,
IBV_MTU_512,
IBV_MTU_1024,
IBV_MTU_2048,
IBV_MTU_4096);
mca_btl_openib_param_register_int("ib_mtu", msg,
val, (int*) &mca_btl_openib_component.ib_mtu);
free(msg);
mca_btl_openib_param_register_int("ib_min_rnr_timer", "IB min rnr timer",
5, (int*) &mca_btl_openib_component.ib_min_rnr_timer);
mca_btl_openib_param_register_int("ib_timeout", "IB transmit timeout",
10, (int*) &mca_btl_openib_component.ib_timeout);
mca_btl_openib_param_register_int("ib_retry_count", "IB transmit retry count",
7, (int*) &mca_btl_openib_component.ib_retry_count);
mca_btl_openib_param_register_int("ib_rnr_retry", "IB rnr retry",
7, (int*) &mca_btl_openib_component.ib_rnr_retry);
mca_btl_openib_param_register_int("ib_max_rdma_dst_ops", "IB max rdma destination operations",
4, (int*) &mca_btl_openib_component.ib_max_rdma_dst_ops);
mca_btl_openib_param_register_int("ib_service_level", "IB service level",
0, (int*) &mca_btl_openib_component.ib_service_level);
mca_btl_openib_param_register_int("ib_static_rate", "IB static rate",
0, (int*) &mca_btl_openib_component.ib_static_rate);
mca_btl_openib_param_register_int ("exclusivity", "BTL exclusivity",
MCA_BTL_EXCLUSIVITY_DEFAULT, (int*) &mca_btl_openib_module.super.btl_exclusivity);
mca_btl_openib_param_register_int("rd_num", "number of receive descriptors to post to a QP",
8, (int*) &mca_btl_openib_component.rd_num);
mca_btl_openib_param_register_int("rd_low", "low water mark before reposting occurs",
6, (int*) &mca_btl_openib_component.rd_low);
mca_btl_openib_param_register_int("rd_win", "window size at which generate explicity credit message",
4, (int*) &mca_btl_openib_component.rd_win);
mca_btl_openib_component.rd_rsv = ((mca_btl_openib_component.rd_num<<1)-1) / mca_btl_openib_component.rd_win;
ret = btl_openib_register_mca_params();
mca_btl_openib_param_register_int("srq_rd_max", "Max number of receive descriptors posted per SRQ.",
1000, (int*) &mca_btl_openib_component.srq_rd_max);
mca_btl_openib_param_register_int("srq_rd_per_peer", "Number of receive descriptors posted per peer. (SRQ)",
16, (int*) &mca_btl_openib_component.srq_rd_per_peer);
mca_btl_openib_param_register_int("srq_sd_max", "Maximum number of send descriptors posted. (SRQ)",
8, &mca_btl_openib_component.srq_sd_max);
mca_btl_openib_param_register_int("use_eager_rdma", "user RDMA for eager messages",
1, (int*) &mca_btl_openib_component.use_eager_rdma);
if (mca_btl_openib_component.use_srq)
mca_btl_openib_component.use_eager_rdma = 0;
mca_btl_openib_param_register_int("eager_rdma_threshold", "Open rdma channel for eager messages after this number of messages received from peer",
16, (int*) &mca_btl_openib_component.eager_rdma_threshold);
mca_btl_openib_param_register_int("max_eager_rdma", "Maximum number of eager RDMA connections",
16, (int*)&mca_btl_openib_component.max_eager_rdma);
mca_btl_openib_param_register_int("eager_rdma_num", "Number of RDMA buffers for eager messages",
16, (int*)&mca_btl_openib_component.eager_rdma_num);
mca_btl_openib_component.eager_rdma_num+=1;
mca_btl_openib_param_register_int("btls_per_lid", "Number of BTLs to create for each LID",
1, (int*)&mca_btl_openib_component.btls_per_lid);
mca_btl_openib_param_register_int("max_lmc", "Maximum LIDs to use for each port (0 - all available)",
0, (int*)&mca_btl_openib_component.max_lmc);
mca_btl_openib_param_register_int("buffer_alignment", "Prefered communication buffers alignmet for best performance",
64, (int*)&mca_btl_openib_component.buffer_alignment);
mca_btl_openib_param_register_int ("eager_limit", "eager send limit",
(12*1024), &val);
mca_btl_openib_module.super.btl_eager_limit = val;
mca_btl_openib_param_register_int ("min_send_size", "minimum send size",
(32*1024), &val);
mca_btl_openib_module.super.btl_min_send_size = val;
mca_btl_openib_param_register_int ("max_send_size", "maximum send size",
(64*1024), &val);
mca_btl_openib_module.super.btl_max_send_size = val;
mca_btl_openib_param_register_int("min_rdma_size", "minimum rdma size",
1024*1024, &val);
mca_btl_openib_module.super.btl_min_rdma_size = val;
mca_btl_openib_param_register_int("max_rdma_size", "maximium rdma size",
1024*1024, &val);
mca_btl_openib_module.super.btl_max_rdma_size = val;
mca_btl_openib_param_register_int("flags", "BTL flags, SEND=1, PUT=2, GET=4",
MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_NEED_ACK | MCA_BTL_FLAGS_NEED_CSUM,
(int*) &mca_btl_openib_module.super.btl_flags);
mca_btl_openib_param_register_int("bandwidth", "Approximate maximum bandwidth of interconnect",
800, (int*) &mca_btl_openib_module.super.btl_bandwidth);
mca_btl_openib_component.max_send_size = mca_btl_openib_module.super.btl_max_send_size;
mca_btl_openib_component.eager_limit = mca_btl_openib_module.super.btl_eager_limit;
return OMPI_SUCCESS;
mca_btl_openib_component.max_send_size =
mca_btl_openib_module.super.btl_max_send_size;
mca_btl_openib_component.eager_limit =
mca_btl_openib_module.super.btl_eager_limit;
return ret;
}
/*
* component cleanup - sanity checking of queue lengths
*/
int mca_btl_openib_component_close(void)
static int btl_openib_component_close(void)
{
ompi_btl_openib_ini_finalize();
return OMPI_SUCCESS;
}
@ -261,8 +152,7 @@ int mca_btl_openib_component_close(void)
* will make this available to all peers.
*/
static int
mca_btl_openib_modex_send(void)
static int btl_openib_modex_send(void)
{
int rc, i;
size_t size;
@ -272,6 +162,7 @@ mca_btl_openib_modex_send(void)
if (size != 0) {
ports = (mca_btl_openib_port_info_t *)malloc (size);
if (NULL == ports) {
BTL_ERROR(("Failed malloc: %s:%d\n", __FILE__, __LINE__));
return OMPI_ERR_OUT_OF_RESOURCE;
}
@ -291,11 +182,10 @@ mca_btl_openib_modex_send(void)
* Callback function on control message.
*/
static void mca_btl_openib_control(
struct mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t* descriptor,
void* cbdata)
static void btl_openib_control(struct mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t* descriptor,
void* cbdata)
{
/* dont return credits used for control messages */
mca_btl_openib_frag_t* frag = (mca_btl_openib_frag_t*)descriptor;
@ -334,7 +224,7 @@ static void mca_btl_openib_control(
}
static int init_one_port(opal_list_t *btl_list, mca_btl_openib_hca_t *hca,
uint8_t port_num, struct ibv_port_attr *ib_port_attr)
uint8_t port_num, struct ibv_port_attr *ib_port_attr)
{
uint16_t lid, i, lmc;
mca_btl_openib_module_t *openib_btl;
@ -342,17 +232,18 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_hca_t *hca,
lmc = (1 << ib_port_attr->lmc);
if(mca_btl_openib_component.max_lmc &&
mca_btl_openib_component.max_lmc < lmc)
if (0 != mca_btl_openib_component.max_lmc &&
mca_btl_openib_component.max_lmc < lmc) {
lmc = mca_btl_openib_component.max_lmc;
}
for(lid = ib_port_attr->lid;
lid < ib_port_attr->lid + lmc; lid++){
for(i = 0; i < mca_btl_openib_component.btls_per_lid; i++){
openib_btl = malloc(sizeof(mca_btl_openib_module_t));
if(NULL == openib_btl) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return -1;
BTL_ERROR(("Failed malloc: %s:%d\n", __FILE__, __LINE__));
return OMPI_ERR_OUT_OF_RESOURCE;
}
memcpy(openib_btl, &mca_btl_openib_module,
sizeof(mca_btl_openib_module));
@ -366,18 +257,21 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_hca_t *hca,
openib_btl->src_path_bits = lid - ib_port_attr->lid;
/* store the sm_lid for multi-nic support */
openib_btl->port_info.subnet = ib_port_attr->sm_lid;
openib_btl->ib_reg[MCA_BTL_TAG_BTL].cbfunc =
mca_btl_openib_control;
openib_btl->port_info.mtu = hca->mtu;
openib_btl->ib_reg[MCA_BTL_TAG_BTL].cbfunc = btl_openib_control;
openib_btl->ib_reg[MCA_BTL_TAG_BTL].cbdata = NULL;
opal_list_append(btl_list, (opal_list_item_t*) ib_selected);
hca->btls++;
if(++mca_btl_openib_component.ib_num_btls >=
mca_btl_openib_component.ib_max_btls)
return 0;
++mca_btl_openib_component.ib_num_btls;
if (-1 != mca_btl_openib_component.ib_max_btls &&
mca_btl_openib_component.ib_num_btls >=
mca_btl_openib_component.ib_max_btls) {
return OMPI_SUCCESS;
}
}
}
return 1;
return OMPI_SUCCESS;
}
static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
@ -386,11 +280,12 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
mca_btl_openib_hca_t *hca;
uint8_t i;
int ret = -1;
ompi_btl_openib_ini_values_t values;
hca = malloc(sizeof(mca_btl_openib_hca_t));
if(NULL == hca){
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return -1;
BTL_ERROR(("Failed malloc: %s:%d\n", __FILE__, __LINE__));
return OMPI_ERR_OUT_OF_RESOURCE;
}
hca->ib_dev = ib_dev;
@ -408,6 +303,53 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
goto close_hca;
}
/* Load in vendor/part-specific HCA parameters. */
ret = ompi_btl_openib_ini_query(hca->ib_dev_attr.vendor_id,
hca->ib_dev_attr.vendor_part_id,
&values);
if (OMPI_ERR_NOT_FOUND == ret) {
/* If we didn't find a matching HCA in the INI files, output a
warning that we're using default values (unless overridden
that we don't want to see these warnings) */
if (mca_btl_openib_component.warn_no_hca_params_found) {
opal_show_help("help-mpi-btl-openib.txt",
"no hca params found", true,
orte_system_info.nodename,
hca->ib_dev_attr.vendor_id,
hca->ib_dev_attr.vendor_part_id);
hca->mtu = mca_btl_openib_component.ib_mtu;
}
} else if (OMPI_SUCCESS != ret) {
/* We had some other error that wasn't good -- we should abort
upwards */
goto close_hca;
} else {
/* If we did find values for this HCA, handle them */
if (values.mtu_set) {
switch (values.mtu) {
case 256:
hca->mtu = IBV_MTU_256;
break;
case 512:
hca->mtu = IBV_MTU_512;
break;
case 1024:
hca->mtu = IBV_MTU_1024;
break;
case 2048:
hca->mtu = IBV_MTU_2048;
break;
case 4096:
hca->mtu = IBV_MTU_4096;
break;
default:
BTL_ERROR(("invalid MTU value specified in INI file (%d); ignored\n", values.mtu));
hca->mtu = mca_btl_openib_component.ib_mtu;
break;
}
}
}
hca->ib_pd = ibv_alloc_pd(hca->ib_dev_context);
if(NULL == hca->ib_pd){
BTL_ERROR(("error allocating pd for %s errno says %s\n",
@ -440,8 +382,9 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
if(IBV_PORT_ACTIVE == ib_port_attr.state){
ret = init_one_port(btl_list, hca, i, &ib_port_attr);
if (ret <= 0)
if (OMPI_SUCCESS != ret) {
break;
}
}
}
@ -465,13 +408,14 @@ free_hca:
* (3) register BTL parameters with the MCA
*/
mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
bool enable_progress_threads,
bool enable_mpi_threads)
static mca_btl_base_module_t**
btl_openib_component_init(int *num_btl_modules,
bool enable_progress_threads,
bool enable_mpi_threads)
{
struct ibv_device **ib_devs;
mca_btl_base_module_t** btls;
int i, length, num_devs;
int i, ret, length, num_devs;
opal_list_t btl_list;
mca_btl_openib_module_t * openib_btl;
mca_btl_base_selected_module_t* ib_selected;
@ -492,7 +436,7 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
if (enable_progress_threads) {
mca_btl_base_error_no_nics("OpenIB", "HCA");
mca_btl_openib_component.ib_num_btls = 0;
mca_btl_openib_modex_send();
btl_openib_modex_send();
return NULL;
}
@ -501,6 +445,11 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
seedv[2] = opal_sys_timer_get_cycles();
seed48(seedv);
/* Read in INI files with HCA-specific parameters */
if (OMPI_SUCCESS != (ret = ompi_btl_openib_ini_init())) {
return NULL;
}
#if OMPI_MCA_BTL_OPENIB_HAVE_DEVICE_LIST
ib_devs = ibv_get_device_list(&num_devs);
#else
@ -509,7 +458,7 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
if (NULL == dev_list) {
mca_btl_base_error_no_nics("OpenIB", "HCA");
mca_btl_openib_component.ib_num_btls = 0;
mca_btl_openib_modex_send();
btl_openib_modex_send();
return NULL;
}
dlist_start(dev_list);
@ -520,15 +469,15 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
if(0 == num_devs) {
mca_btl_base_error_no_nics("OpenIB", "HCA");
mca_btl_openib_modex_send();
btl_openib_modex_send();
return NULL;
}
#if OMPI_MCA_BTL_OPENIB_HAVE_DEVICE_LIST == 0
/* Allocate space for the ib devices */
ib_devs = (struct ibv_device**) malloc(num_devs * sizeof(struct ibv_dev*));
if(NULL == ib_devs) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
if(NULL == ib_devs) {
BTL_ERROR(("Failed malloc: %s:%d\n", __FILE__, __LINE__));
return NULL;
}
@ -539,19 +488,20 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
ib_devs[i++] = ib_dev;
#endif
/** We must loop through all the hca id's, get there handles and
for each hca we query the number of ports on the hca and set up
a distinct btl module for each hca port */
/* We must loop through all the hca id's, get their handles and
for each hca we query the number of ports on the hca and set up
a distinct btl module for each hca port */
OBJ_CONSTRUCT(&btl_list, opal_list_t);
OBJ_CONSTRUCT(&mca_btl_openib_component.ib_lock, opal_mutex_t);
for(i = 0; i < num_devs
&& mca_btl_openib_component.ib_num_btls <
mca_btl_openib_component.ib_max_btls; i++){
if (init_one_hca(&btl_list, ib_devs[i]) <= 0)
break;
for (i = 0; i < num_devs &&
(-1 == mca_btl_openib_component.ib_max_btls ||
mca_btl_openib_component.ib_num_btls <
mca_btl_openib_component.ib_max_btls); i++){
if (OMPI_SUCCESS != (ret = init_one_hca(&btl_list, ib_devs[i]))) {
break;
}
}
/* Allocate space for btl modules */
@ -560,26 +510,26 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
mca_btl_openib_component.ib_num_btls);
if(NULL == mca_btl_openib_component.openib_btls) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
BTL_ERROR(("Failed malloc: %s:%d\n", __FILE__, __LINE__));
return NULL;
}
btls = malloc(mca_btl_openib_component.ib_num_btls *
sizeof(struct mca_btl_openib_module_t*));
if(NULL == btls) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
BTL_ERROR(("Failed malloc: %s:%d\n", __FILE__, __LINE__));
return NULL;
}
/* Copy the btl module structs into a contiguous array and fully
initialize them */
for(i = 0; i < mca_btl_openib_component.ib_num_btls; i++){
item = opal_list_remove_first(&btl_list);
ib_selected = (mca_btl_base_selected_module_t*)item;
openib_btl = (mca_btl_openib_module_t*) ib_selected->btl_module;
memcpy(&(mca_btl_openib_component.openib_btls[i]), openib_btl,
sizeof(mca_btl_openib_module_t));
free(ib_selected);
free(openib_btl);
OBJ_RELEASE(ib_selected);
openib_btl = &mca_btl_openib_component.openib_btls[i];
openib_btl->rd_num = mca_btl_openib_component.rd_num +
@ -638,7 +588,6 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
sizeof(mca_btl_openib_header_t) +
openib_btl->super.btl_max_send_size;
ompi_free_list_init_ex(&openib_btl->send_free_max,
length,
sizeof(mca_btl_openib_frag_t),
@ -657,7 +606,8 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
OBJ_CLASS (mca_btl_openib_recv_frag_max_t),
mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc, openib_btl->super.btl_mpool);
mca_btl_openib_component.ib_free_list_inc,
openib_btl->super.btl_mpool);
length = sizeof(mca_btl_openib_frag_t) +
sizeof(mca_btl_openib_header_t) +
@ -690,14 +640,13 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
0);
openib_btl->eager_rdma_buffers_count = 0;
OBJ_CONSTRUCT(&openib_btl->eager_rdma_lock, opal_mutex_t);
btls[i] = &openib_btl->super;
}
/* Post OOB receive to support dynamic connection setup */
mca_btl_openib_post_recv();
mca_btl_openib_modex_send();
btl_openib_modex_send();
*num_btl_modules = mca_btl_openib_component.ib_num_btls;
#if OMPI_MCA_BTL_OPENIB_HAVE_DEVICE_LIST
@ -708,15 +657,11 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
return btls;
}
static int mca_btl_openib_handle_incoming_hp(mca_btl_openib_module_t *,
mca_btl_openib_endpoint_t *,
mca_btl_openib_frag_t *,
size_t);
int mca_btl_openib_handle_incoming_hp(
mca_btl_openib_module_t *openib_btl,
mca_btl_openib_endpoint_t *endpoint,
mca_btl_openib_frag_t *frag,
size_t byte_len)
static int btl_openib_handle_incoming_hp(mca_btl_openib_module_t *openib_btl,
mca_btl_openib_endpoint_t *endpoint,
mca_btl_openib_frag_t *frag,
size_t byte_len)
{
/* advance the segment address past the header and subtract from the length..*/
frag->segment.seg_len = byte_len -
@ -758,8 +703,9 @@ int mca_btl_openib_handle_incoming_hp(
openib_btl->eager_rdma_buffers_count <
mca_btl_openib_component.max_eager_rdma &&
OPAL_THREAD_ADD32(&endpoint->eager_recv_count, 1) ==
mca_btl_openib_component.eager_rdma_threshold)
mca_btl_openib_endpoint_connect_eager_rdma(endpoint);
mca_btl_openib_component.eager_rdma_threshold) {
mca_btl_openib_endpoint_connect_eager_rdma(endpoint);
}
/* repost receive descriptors */
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
@ -810,85 +756,87 @@ int mca_btl_openib_handle_incoming_hp(
return OMPI_SUCCESS;
}
static char* mca_btl_openib_component_status_to_string(enum ibv_wc_status status) {
switch(status) {
case IBV_WC_SUCCESS:
return "SUCCESS";
break;
case IBV_WC_LOC_LEN_ERR:
return "LOCAL LENGTH ERROR";
break;
case IBV_WC_LOC_QP_OP_ERR:
return "LOCAL QP OPERATION ERROR";
break;
case IBV_WC_LOC_EEC_OP_ERR:
return "LOCAL EEC OPERATION ERROR";
break;
case IBV_WC_LOC_PROT_ERR:
return "LOCAL PROTOCOL ERROR";
break;
case IBV_WC_WR_FLUSH_ERR:
return "WORK REQUEST FLUSHED ERROR";
break;
case IBV_WC_MW_BIND_ERR:
return "MEMORY WINDOW BIND ERROR";
break;
case IBV_WC_BAD_RESP_ERR:
return "BAD RESPONSE ERROR";
break;
case IBV_WC_LOC_ACCESS_ERR:
return "LOCAL ACCESS ERROR";
break;
case IBV_WC_REM_INV_REQ_ERR:
return "INVALID REQUEST ERROR";
break;
case IBV_WC_REM_ACCESS_ERR:
return "REMOTE ACCESS ERROR";
break;
case IBV_WC_REM_OP_ERR:
return "REMOTE OPERATION ERROR";
break;
case IBV_WC_RETRY_EXC_ERR:
return "RETRY EXCEEDED ERROR";
break;
case IBV_WC_RNR_RETRY_EXC_ERR:
return "RECEIVER NOT READY RETRY EXCEEEDED ERROR";
break;
case IBV_WC_LOC_RDD_VIOL_ERR:
return "LOCAL RDD VIOLATION ERROR";
break;
case IBV_WC_REM_INV_RD_REQ_ERR:
return "INVALID READ REQUEST ERROR";
break;
case IBV_WC_REM_ABORT_ERR:
return "REMOTE ABORT ERROR";
break;
case IBV_WC_INV_EECN_ERR:
return "INVALID EECN ERROR";
break;
case IBV_WC_INV_EEC_STATE_ERR:
return "INVALID EEC STATE ERROR";
break;
case IBV_WC_FATAL_ERR:
return "FATAL ERROR";
break;
case IBV_WC_RESP_TIMEOUT_ERR:
return "RESPONSE TIMEOUT ERROR";
break;
case IBV_WC_GENERAL_ERR:
return "GENERAL ERROR";
break;
default:
return "STATUS UNDEFINED";
break;
}
static char* btl_openib_component_status_to_string(enum ibv_wc_status status)
{
switch(status) {
case IBV_WC_SUCCESS:
return "SUCCESS";
break;
case IBV_WC_LOC_LEN_ERR:
return "LOCAL LENGTH ERROR";
break;
case IBV_WC_LOC_QP_OP_ERR:
return "LOCAL QP OPERATION ERROR";
break;
case IBV_WC_LOC_EEC_OP_ERR:
return "LOCAL EEC OPERATION ERROR";
break;
case IBV_WC_LOC_PROT_ERR:
return "LOCAL PROTOCOL ERROR";
break;
case IBV_WC_WR_FLUSH_ERR:
return "WORK REQUEST FLUSHED ERROR";
break;
case IBV_WC_MW_BIND_ERR:
return "MEMORY WINDOW BIND ERROR";
break;
case IBV_WC_BAD_RESP_ERR:
return "BAD RESPONSE ERROR";
break;
case IBV_WC_LOC_ACCESS_ERR:
return "LOCAL ACCESS ERROR";
break;
case IBV_WC_REM_INV_REQ_ERR:
return "INVALID REQUEST ERROR";
break;
case IBV_WC_REM_ACCESS_ERR:
return "REMOTE ACCESS ERROR";
break;
case IBV_WC_REM_OP_ERR:
return "REMOTE OPERATION ERROR";
break;
case IBV_WC_RETRY_EXC_ERR:
return "RETRY EXCEEDED ERROR";
break;
case IBV_WC_RNR_RETRY_EXC_ERR:
return "RECEIVER NOT READY RETRY EXCEEEDED ERROR";
break;
case IBV_WC_LOC_RDD_VIOL_ERR:
return "LOCAL RDD VIOLATION ERROR";
break;
case IBV_WC_REM_INV_RD_REQ_ERR:
return "INVALID READ REQUEST ERROR";
break;
case IBV_WC_REM_ABORT_ERR:
return "REMOTE ABORT ERROR";
break;
case IBV_WC_INV_EECN_ERR:
return "INVALID EECN ERROR";
break;
case IBV_WC_INV_EEC_STATE_ERR:
return "INVALID EEC STATE ERROR";
break;
case IBV_WC_FATAL_ERR:
return "FATAL ERROR";
break;
case IBV_WC_RESP_TIMEOUT_ERR:
return "RESPONSE TIMEOUT ERROR";
break;
case IBV_WC_GENERAL_ERR:
return "GENERAL ERROR";
break;
default:
return "STATUS UNDEFINED";
break;
}
}
/*
* IB component progress.
*/
int mca_btl_openib_component_progress()
static int btl_openib_component_progress(void)
{
int i, j, c;
int count = 0,ne = 0, ret;
@ -932,7 +880,7 @@ int mca_btl_openib_component_progress()
frag->segment.seg_addr.pval = ((unsigned char* )frag->hdr) +
sizeof(mca_btl_openib_header_t);
ret = mca_btl_openib_handle_incoming_hp(openib_btl,
ret = btl_openib_handle_incoming_hp(openib_btl,
frag->endpoint, frag,
size - sizeof(mca_btl_openib_footer_t));
if (ret != MPI_SUCCESS)
@ -973,7 +921,7 @@ int mca_btl_openib_component_progress()
}
if(wc.status != IBV_WC_WR_FLUSH_ERR || !flush_err_printed++)
BTL_PEER_ERROR(remote_proc, ("error polling HP CQ with status %s status number %d for wr_id %llu opcode %d",
mca_btl_openib_component_status_to_string(wc.status),
btl_openib_component_status_to_string(wc.status),
wc.status, wc.wr_id, wc.opcode));
if(wc.status == IBV_WC_RETRY_EXC_ERR) {
opal_show_help("help-mpi-btl-openib.txt", "btl_openib:retry-exceeded", true);
@ -1044,7 +992,7 @@ int mca_btl_openib_component_progress()
case IBV_WC_RECV:
/* Process a RECV */
frag = (mca_btl_openib_frag_t*) (unsigned long) wc.wr_id;
ret = mca_btl_openib_handle_incoming_hp(openib_btl,
ret = btl_openib_handle_incoming_hp(openib_btl,
frag->endpoint, frag, wc.byte_len);
if (ret != OMPI_SUCCESS)
return ret;
@ -1078,7 +1026,7 @@ int mca_btl_openib_component_progress()
}
if(wc.status != IBV_WC_WR_FLUSH_ERR || !flush_err_printed++)
BTL_PEER_ERROR(remote_proc, ("error polling LP CQ with status %s status number %d for wr_id %llu opcode %d",
mca_btl_openib_component_status_to_string(wc.status),
btl_openib_component_status_to_string(wc.status),
wc.status, wc.wr_id, wc.opcode));
return OMPI_ERROR;
}

Просмотреть файл

@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -59,6 +60,7 @@ int mca_btl_openib_endpoint_qp_init_query(
uint32_t rem_qp_num,
uint32_t rem_psn,
uint16_t rem_lid,
uint32_t rem_mtu,
uint32_t port_num
);
@ -271,6 +273,7 @@ static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
endpoint->rem_info.rem_psn_hp = 0;
endpoint->rem_info.rem_psn_lp = 0;
endpoint->rem_info.rem_subnet = 0;
endpoint->rem_info.rem_mtu = 0;
}
/*
@ -340,7 +343,13 @@ static int mca_btl_openib_endpoint_send_connect_data(mca_btl_base_endpoint_t* en
}
rc = orte_dss.pack(buffer, &((mca_btl_openib_endpoint_t*) endpoint)->subnet, 1, ORTE_UINT16);
rc = orte_dss.pack(buffer, &endpoint->subnet, 1, ORTE_UINT16);
if(rc != ORTE_SUCCESS) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = orte_dss.pack(buffer, &endpoint->endpoint_btl->hca->mtu, 1, ORTE_UINT32);
if(rc != ORTE_SUCCESS) {
ORTE_ERROR_LOG(rc);
return rc;
@ -607,6 +616,11 @@ static void mca_btl_openib_endpoint_recv(
ORTE_ERROR_LOG(rc);
return;
}
rc = orte_dss.unpack(buffer, &rem_info.rem_mtu, &cnt, ORTE_UINT32);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return;
}
#if 0
rc = orte_dss.unpack(buffer, &ib_endpoint->rdma_buf->r_key, &cnt, ORTE_UINT32);
if(rc != ORTE_SUCCESS) {
@ -860,6 +874,7 @@ int mca_btl_openib_endpoint_connect(
endpoint->rem_info.rem_qp_num_hp,
endpoint->rem_info.rem_psn_hp,
endpoint->rem_info.rem_lid,
endpoint->rem_info.rem_mtu,
openib_btl->port_num
);
@ -876,6 +891,7 @@ int mca_btl_openib_endpoint_connect(
endpoint->rem_info.rem_qp_num_lp,
endpoint->rem_info.rem_psn_lp,
endpoint->rem_info.rem_lid,
endpoint->rem_info.rem_mtu,
openib_btl->port_num
);
@ -980,13 +996,24 @@ int mca_btl_openib_endpoint_qp_init_query(
uint32_t rem_qp_num,
uint32_t rem_psn,
uint16_t rem_lid,
uint32_t rem_mtu,
uint32_t port_num
)
{
attr->qp_state = IBV_QPS_RTR;
attr->path_mtu = mca_btl_openib_component.ib_mtu;
attr->path_mtu = (openib_btl->hca->mtu < rem_mtu) ?
openib_btl->hca->mtu : rem_mtu;
if (mca_btl_openib_component.verbose) {
BTL_OUTPUT(("Set MTU to IBV value %d (%s bytes)", attr->path_mtu,
(attr->path_mtu == IBV_MTU_256) ? "256" :
(attr->path_mtu == IBV_MTU_512) ? "512" :
(attr->path_mtu == IBV_MTU_1024) ? "1024" :
(attr->path_mtu == IBV_MTU_2048) ? "2048" :
(attr->path_mtu == IBV_MTU_4096) ? "4096" :
"unknown (!)"));
}
attr->dest_qp_num = rem_qp_num;
attr->rq_psn = rem_psn;
attr->max_dest_rd_atomic = mca_btl_openib_component.ib_max_rdma_dst_ops;

Просмотреть файл

@ -40,6 +40,7 @@ OBJ_CLASS_DECLARATION(mca_btl_openib_endpoint_t);
struct mca_btl_openib_frag_t;
struct mca_btl_openib_port_info_t {
uint32_t mtu;
uint16_t subnet;
};
typedef struct mca_btl_openib_port_info_t mca_btl_openib_port_info_t;
@ -90,7 +91,8 @@ struct mca_btl_openib_rem_info_t {
uint16_t rem_subnet;
/* subnet of remote process */
/* MTU of remote process */
uint32_t rem_mtu;
};
typedef struct mca_btl_openib_rem_info_t mca_btl_openib_rem_info_t;

571
ompi/mca/btl/openib/btl_openib_ini.c Обычный файл
Просмотреть файл

@ -0,0 +1,571 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <string.h>
#include <ctype.h>
#include "opal/util/output.h"
#include "opal/util/show_help.h"
#include "opal/mca/base/mca_base_param.h"
#include "btl_openib.h"
#include "btl_openib_lex.h"
#include "btl_openib_ini.h"
static const char *ini_filename = NULL;
static bool initialized = false;
static opal_list_t hcas;
static char *key_buffer = NULL;
static size_t key_buffer_len = 0;
/*
* Struct to hold the section name, vendor ID, and list of vendor part
* ID's and a corresponding set of values (parsed from an INI file).
*/
typedef struct parsed_section_values_t {
char *name;
uint32_t vendor_id;
bool vendor_id_set;
uint32_t *vendor_part_ids;
int vendor_part_ids_len;
ompi_btl_openib_ini_values_t values;
} parsed_section_values_t;
/*
* Struct to hold the final values. Different from above in a few ways:
*
* - The vendor and part IDs will always be set properly
* - There will only be one part ID (i.e., the above struct is
* exploded into multiple of these for each of searching)
* - There is a super of opal_list_item_t so that we can have a list
* of these
*/
typedef struct hca_values_t {
opal_list_item_t super;
char *section_name;
uint32_t vendor_id;
uint32_t vendor_part_id;
ompi_btl_openib_ini_values_t values;
} hca_values_t;
static void hca_values_constructor(hca_values_t *s);
static void hca_values_destructor(hca_values_t *s);
OBJ_CLASS_INSTANCE(hca_values_t,
opal_list_item_t,
hca_values_constructor,
hca_values_destructor);
/*
* Local functions
*/
static int parse_file(char *filename);
static int parse_line(parsed_section_values_t *item);
static void reset_section(bool had_previous_value, parsed_section_values_t *s);
static void reset_values(ompi_btl_openib_ini_values_t *v);
static int save_section(parsed_section_values_t *s);
static int intify(char *string);
static inline void show_help(const char *topic);
/*
* Read the INI files for HCA-specific values and save them in
* internal data structures for later lookup.
*/
int ompi_btl_openib_ini_init(void)
{
int ret;
char *colon;
OBJ_CONSTRUCT(&hcas, opal_list_t);
colon = strchr(mca_btl_openib_component.hca_params_file_names, ':');
if (NULL == colon) {
/* If we've only got 1 file (i.e., no colons found), parse it
and be done */
ret = parse_file(mca_btl_openib_component.hca_params_file_names);
} else {
/* Otherwise, loop over all the files and parse them */
char *orig = strdup(mca_btl_openib_component.hca_params_file_names);
char *str = orig;
while (NULL != (colon = strchr(str, ':'))) {
*colon = '\0';
ret = parse_file(str);
/* Note that NOT_FOUND and SUCCESS are not fatal errors
and we keep going. Other errors are treated as
fatal */
if (OMPI_ERR_NOT_FOUND != ret && OMPI_SUCCESS != ret) {
break;
}
str = colon + 1;
}
/* Parse the last file if we didn't have a fatal error above */
if (OMPI_ERR_NOT_FOUND != ret && OMPI_SUCCESS != ret) {
ret = parse_file(str);
}
/* All done */
free(orig);
}
/* Return SUCCESS unless we got a fatal error */
initialized = true;
return (OMPI_SUCCESS == ret || OMPI_ERR_NOT_FOUND == ret) ?
OMPI_SUCCESS : ret;
}
/*
* The component found an HCA and is querying to see if an INI file
* specified any parameters for it.
*/
int ompi_btl_openib_ini_query(uint32_t vendor_id, uint32_t vendor_part_id,
ompi_btl_openib_ini_values_t *values)
{
int ret;
hca_values_t *h;
opal_list_item_t *item;
if (!initialized) {
if (OMPI_SUCCESS != (ret = ompi_btl_openib_ini_init())) {
return ret;
}
}
if (mca_btl_openib_component.verbose) {
BTL_OUTPUT(("Querying INI files for vendor 0x%04x, part ID %d",
vendor_id, vendor_part_id));
}
reset_values(values);
/* Iterate over all the saved hcas */
for (item = opal_list_get_first(&hcas);
item != opal_list_get_end(&hcas);
item = opal_list_get_next(item)) {
h = (hca_values_t*) item;
if (vendor_id == h->vendor_id &&
vendor_part_id == h->vendor_part_id) {
/* Found it! */
*values = h->values;
if (mca_btl_openib_component.verbose) {
BTL_OUTPUT(("Found corresponding INI values: %s",
h->section_name));
}
return OMPI_SUCCESS;
}
}
/* If we fall through to here, we didn't find it */
if (mca_btl_openib_component.verbose) {
BTL_OUTPUT(("Did not find corresponding INI values"));
}
return OMPI_ERR_NOT_FOUND;
}
/*
* The component is shutting down; release all internal state
*/
int ompi_btl_openib_ini_finalize(void)
{
opal_list_item_t *item;
if (initialized) {
for (item = opal_list_remove_first(&hcas);
NULL != item;
item = opal_list_remove_first(&hcas)) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&hcas);
initialized = true;
}
return OMPI_SUCCESS;
}
/**************************************************************************/
/*
* Parse a single file
*/
static int parse_file(char *filename)
{
int val;
int ret = OMPI_SUCCESS;
bool showed_no_section_warning = false;
bool showed_unexpected_tokens_warning = false;
parsed_section_values_t section;
reset_section(false, &section);
/* Open the file */
ini_filename = filename;
btl_openib_ini_yyin = fopen(filename, "r");
if (NULL == btl_openib_ini_yyin) {
opal_show_help("help-mpi-btl-openib.txt", "ini file:file not found",
true, filename);
ret = OMPI_ERR_NOT_FOUND;
goto cleanup;
}
/* Do the parsing */
btl_openib_ini_parse_done = false;
btl_openib_ini_yynewlines = 1;
btl_openib_ini_init_buffer(btl_openib_ini_yyin);
while (!btl_openib_ini_parse_done) {
val = btl_openib_ini_yylex();
switch (val) {
case BTL_OPENIB_INI_PARSE_DONE:
/* This will also set btl_openib_ini_parse_done to true, so just
break here */
break;
case BTL_OPENIB_INI_PARSE_NEWLINE:
/* blank line! ignore it */
break;
case BTL_OPENIB_INI_PARSE_SECTION:
/* We're starting a new section; if we have previously
parsed a section, go see if we can use its values. */
save_section(&section);
reset_section(true, &section);
section.name = strdup(btl_openib_ini_yytext);
break;
case BTL_OPENIB_INI_PARSE_SINGLE_WORD:
if (NULL == section.name) {
/* Warn that there is no current section, and ignore
this parameter */
if (!showed_no_section_warning) {
show_help("ini file:not in a section");
showed_no_section_warning = true;
}
/* Parse it and then dump it */
parse_line(&section);
reset_section(true, &section);
} else {
parse_line(&section);
}
break;
default:
/* anything else is an error */
if (!showed_unexpected_tokens_warning) {
show_help("ini file:unexpected token");
showed_unexpected_tokens_warning = true;
}
break;
}
}
save_section(&section);
fclose(btl_openib_ini_yyin);
cleanup:
reset_section(true, &section);
if (NULL != key_buffer) {
free(key_buffer);
key_buffer = NULL;
key_buffer_len = 0;
}
return ret;
}
/*
* Parse a single line in the INI file
*/
static int parse_line(parsed_section_values_t *sv)
{
int val, ret = OMPI_SUCCESS;
char *value, *comma;
bool showed_unknown_field_warning = false;
/* Save the name name */
if (key_buffer_len < strlen(btl_openib_ini_yytext) + 1) {
char *tmp;
key_buffer_len = strlen(btl_openib_ini_yytext) + 1;
tmp = realloc(key_buffer, key_buffer_len);
if (NULL == tmp) {
free(key_buffer);
key_buffer_len = 0;
key_buffer = NULL;
return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
}
key_buffer = tmp;
}
strncpy(key_buffer, btl_openib_ini_yytext, key_buffer_len);
/* The first thing we have to see is an "=" */
val = btl_openib_ini_yylex();
if (btl_openib_ini_parse_done || BTL_OPENIB_INI_PARSE_EQUAL != val) {
show_help("ini file:expected equals");
return OMPI_ERROR;
}
/* Next we get the value */
val = btl_openib_ini_yylex();
if (BTL_OPENIB_INI_PARSE_SINGLE_WORD == val ||
BTL_OPENIB_INI_PARSE_VALUE == val) {
value = strdup(btl_openib_ini_yytext);
/* Now we need to see the newline */
val = btl_openib_ini_yylex();
if (BTL_OPENIB_INI_PARSE_NEWLINE != val &&
BTL_OPENIB_INI_PARSE_DONE != val) {
show_help("ini file:expected newline");
free(value);
return OMPI_ERROR;
}
}
/* If we did not get EOL or EOF, something is wrong */
else if (BTL_OPENIB_INI_PARSE_DONE != val &&
BTL_OPENIB_INI_PARSE_NEWLINE != val) {
show_help("ini file:expected newline");
return OMPI_ERROR;
}
/* Ok, we got a good parse. Now figure out what it is and save
the value. Note that the flex already took care of trimming
all whitespace at the beginning and ending of the value. */
if (0 == strcasecmp(key_buffer, "vendor_id")) {
/* Single value */
sv->vendor_id = (uint32_t) intify(value);
sv->vendor_id_set = true;
}
else if (0 == strcasecmp(key_buffer, "vendor_part_id")) {
char *str = value;
/* Comma-delimited list of values */
comma = strchr(str, ',');
if (NULL == comma) {
/* If we only got one value (i.e., no comma found), then
just make an array of one value and save it */
sv->vendor_part_ids = malloc(sizeof(uint32_t));
if (NULL == sv->vendor_part_ids) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
sv->vendor_part_ids[0] = (uint32_t) intify(str);
sv->vendor_part_ids_len = 1;
} else {
/* If we found a comma, loop over all the values. Be a
little clever in that we alwasy alloc enough space for
an extra value so that when we exit the loop, we don't
have to realloc again to get space for the last item. */
do {
*comma = '\0';
sv->vendor_part_ids = realloc(sv->vendor_part_ids,
sizeof(uint32_t) *
(sv->vendor_part_ids_len + 2));
sv->vendor_part_ids[sv->vendor_part_ids_len] =
(int32_t) intify(str);
++sv->vendor_part_ids_len;
str = comma + 1;
comma = strchr(str, ',');
} while (NULL != comma);
/* Get the last value (i.e., the value after the last
comma, because it won't have been snarfed in the
loop) */
sv->vendor_part_ids[sv->vendor_part_ids_len] =
(uint32_t) intify(str);
++sv->vendor_part_ids_len;
}
}
else if (0 == strcasecmp(key_buffer, "mtu")) {
/* Single value */
sv->values.mtu = (uint32_t) intify(value);
sv->values.mtu_set = true;
}
else {
/* Have no idea what this parameter is. Not an error -- just
ignore it */
if (!showed_unknown_field_warning) {
show_help("ini file:unknown field");
showed_unknown_field_warning = true;
}
}
/* All done */
free(value);
return ret;
}
/*
* Construct an hca_values_t and set all of its values to known states
*/
static void hca_values_constructor(hca_values_t *s)
{
s->section_name = NULL;
s->vendor_id = 0;
s->vendor_part_id = 0;
reset_values(&s->values);
}
/*
* Destruct an hca_values_t and free any memory that it has
*/
static void hca_values_destructor(hca_values_t *s)
{
if (NULL != s->section_name) {
free(s->section_name);
}
}
/*
* Reset a parsed section; free any memory that it may have had
*/
static void reset_section(bool had_previous_value, parsed_section_values_t *s)
{
if (had_previous_value) {
if (NULL != s->name) {
free(s->name);
}
if (NULL != s->vendor_part_ids) {
free(s->vendor_part_ids);
}
}
s->name = NULL;
s->vendor_id = 0;
s->vendor_id_set = false;
s->vendor_part_ids = NULL;
s->vendor_part_ids_len = 0;
reset_values(&s->values);
}
/*
* Reset the values to known states
*/
static void reset_values(ompi_btl_openib_ini_values_t *v)
{
v->mtu = 0;
v->mtu_set = false;
}
/*
* If we have a valid section, see if we have a matching section
* somewhere (i.e., same vendor ID and vendor part ID). If we do,
* update the values. If not, save the values in a new instance and
* add it to the list.
*/
static int save_section(parsed_section_values_t *s)
{
int i;
opal_list_item_t *item;
hca_values_t *h;
bool found;
/* Is the parsed section valid? */
if (NULL == s->name || !s->vendor_id_set || 0 == s->vendor_part_ids_len) {
return OMPI_ERR_BAD_PARAM;
}
/* Iterate over each of the part IDs in the parsed values */
for (i = 0; i < s->vendor_part_ids_len; ++i) {
found = false;
/* Iterate over all the saved hcas */
for (item = opal_list_get_first(&hcas);
item != opal_list_get_end(&hcas);
item = opal_list_get_next(item)) {
h = (hca_values_t*) item;
if (s->vendor_id == h->vendor_id &&
s->vendor_part_ids[i] == h->vendor_part_id) {
/* Found a match. Update any newly-set values. */
if (s->values.mtu_set) {
h->values.mtu = s->values.mtu;
h->values.mtu_set = true;
found = true;
break;
}
}
}
/* Did we find/update it in the exising list? If not, create
a new one. */
if (!found) {
h = OBJ_NEW(hca_values_t);
h->section_name = strdup(s->name);
h->vendor_id = s->vendor_id;
h->vendor_part_id = s->vendor_part_ids[i];
h->values = s->values;
opal_list_append(&hcas, &h->super);
}
}
/* All done */
return OMPI_SUCCESS;
}
/*
* Do string-to-integer conversion, for both hex and decimal numbers
*/
static int intify(char *str)
{
while (isspace(*str)) {
++str;
}
/* If it's hex, use sscanf() */
if (strlen(str) > 3 && 0 == strncasecmp("0x", str, 2)) {
unsigned int i;
sscanf(str, "%X", &i);
return (int) i;
}
/* Nope -- just decimal, so use atoi() */
return atoi(str);
}
/*
* Trival helper function
*/
static inline void show_help(const char *topic)
{
opal_show_help("help-mpi-btl-openib.txt", topic, true,
ini_filename, btl_openib_ini_yynewlines,
btl_openib_ini_yytext);
}

52
ompi/mca/btl/openib/btl_openib_ini.h Обычный файл
Просмотреть файл

@ -0,0 +1,52 @@
/*
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* @file
*/
#ifndef MCA_PTL_IB_PARAMS_H
#define MCA_PTL_IB_PARAMS_H
#include "btl_openib.h"
/*
* Struct to hold the settable values that may be specified in the INI
* file
*/
typedef struct ompi_btl_openib_ini_values_t {
uint32_t mtu;
bool mtu_set;
} ompi_btl_openib_ini_values_t;
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/**
* Read in the INI files containing HCA params
*/
int ompi_btl_openib_ini_init(void);
/**
* Query the read-in params for a given HCA
*/
int ompi_btl_openib_ini_query(uint32_t vendor_id,
uint32_t vendor_part_id,
ompi_btl_openib_ini_values_t *values);
/**
* Shut down / release all internal state
*/
int ompi_btl_openib_ini_finalize(void);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

68
ompi/mca/btl/openib/btl_openib_lex.h Обычный файл
Просмотреть файл

@ -0,0 +1,68 @@
/* -*- C -*-
*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef BTL_OPENIB_INI_LEX_H_
#define BTL_OPENIB_INI_LEX_H_
#include "opal_config.h"
#ifdef malloc
#undef malloc
#endif
#ifdef realloc
#undef realloc
#endif
#ifdef free
#undef free
#endif
#include <stdio.h>
int btl_openib_ini_yylex(void);
int btl_openib_ini_init_buffer(FILE *file);
extern FILE *btl_openib_ini_yyin;
extern bool btl_openib_ini_parse_done;
extern char *btl_openib_ini_yytext;
extern int btl_openib_ini_yynewlines;
/*
* Make lex-generated files not issue compiler warnings
*/
#define YY_STACK_USED 0
#define YY_ALWAYS_INTERACTIVE 0
#define YY_NEVER_INTERACTIVE 0
#define YY_MAIN 0
#define YY_NO_UNPUT 1
enum {
BTL_OPENIB_INI_PARSE_DONE,
BTL_OPENIB_INI_PARSE_ERROR,
BTL_OPENIB_INI_PARSE_NEWLINE,
BTL_OPENIB_INI_PARSE_SECTION,
BTL_OPENIB_INI_PARSE_EQUAL,
BTL_OPENIB_INI_PARSE_SINGLE_WORD,
BTL_OPENIB_INI_PARSE_VALUE,
BTL_OPENIB_INI_PARSE_MAX
};
#endif

142
ompi/mca/btl/openib/btl_openib_lex.l Обычный файл
Просмотреть файл

@ -0,0 +1,142 @@
%{ /* -*- C -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include <stdio.h>
#if HAVE_UNISTD_H
#include <unistd.h>
#endif
#include "btl_openib_lex.h"
/*
* local functions
*/
static int finish_parsing(void) ;
static int btl_openib_ini_yywrap(void);
/*
* global variables
*/
int btl_openib_ini_yynewlines = 1;
bool btl_openib_ini_parse_done = false;
char *btl_openib_ini_string = NULL;
#define yyterminate() \
return finish_parsing()
%}
WHITE [\f\t\v ]
CHAR [A-Za-z0-9_\-\.]
NAME_CHAR [A-Za-z0-9_\-\.\\\/]
%x comment
%x section_name
%x section_end
%x value
%%
{WHITE}*\n { ++btl_openib_ini_yynewlines;
return BTL_OPENIB_INI_PARSE_NEWLINE; }
#.*\n { ++btl_openib_ini_yynewlines;
return BTL_OPENIB_INI_PARSE_NEWLINE; }
"//".*\n { ++btl_openib_ini_yynewlines;
return BTL_OPENIB_INI_PARSE_NEWLINE; }
"/*" { BEGIN(comment);
return BTL_OPENIB_INI_PARSE_NEWLINE; }
<comment>[^*\n]* ; /* Eat up non '*'s */
<comment>"*"+[^*/\n]* ; /* Eat '*'s not followed by a '/' */
<comment>\n { ++btl_openib_ini_yynewlines;
return BTL_OPENIB_INI_PARSE_NEWLINE; }
<comment>"*"+"/" { BEGIN(INITIAL); /* Done with block comment */
return BTL_OPENIB_INI_PARSE_NEWLINE; }
{WHITE}*\[{WHITE}* { BEGIN(section_name); }
<section_name>({NAME_CHAR}|{WHITE})*{NAME_CHAR}/{WHITE}*\] {
BEGIN(section_end);
return BTL_OPENIB_INI_PARSE_SECTION; }
<section_name>\n { ++btl_openib_ini_yynewlines;
return BTL_OPENIB_INI_PARSE_ERROR; }
<section_name>. { return BTL_OPENIB_INI_PARSE_ERROR; }
<section_end>{WHITE}*\]{WHITE}*\n { BEGIN(INITIAL);
++btl_openib_ini_yynewlines;
return BTL_OPENIB_INI_PARSE_NEWLINE; }
{WHITE}*"="{WHITE}* { BEGIN(value);
return BTL_OPENIB_INI_PARSE_EQUAL; }
{WHITE}+ ; /* whitespace */
{CHAR}+ { return BTL_OPENIB_INI_PARSE_SINGLE_WORD; }
<value>{WHITE}*\n { BEGIN(INITIAL);
++btl_openib_ini_yynewlines;
return BTL_OPENIB_INI_PARSE_NEWLINE; }
<value>[^\n]*[^\t \n]/[\t ]* {
return BTL_OPENIB_INI_PARSE_VALUE; }
. { return BTL_OPENIB_INI_PARSE_ERROR; }
%%
/*
* This cleans up at the end of the parse (since, in this case, we
* always parse the entire file) and prevents a memory leak.
*/
static int finish_parsing(void)
{
if (NULL != YY_CURRENT_BUFFER) {
yy_delete_buffer(YY_CURRENT_BUFFER);
#if defined(YY_CURRENT_BUFFER_LVALUE)
YY_CURRENT_BUFFER_LVALUE = NULL;
#else
YY_CURRENT_BUFFER = NULL;
#endif /* YY_CURRENT_BUFFER_LVALUE */
}
return YY_NULL;
}
static int btl_openib_ini_yywrap(void)
{
btl_openib_ini_parse_done = true;
return 1;
}
/*
* Ensure that we have a valid yybuffer to use. Specifically, if this
* scanner is invoked a second time, finish_parsing() (above) will
* have been executed, and the current buffer will have been freed.
* Flex doesn't recognize this fact because as far as it's concerned,
* its internal state was already initialized, so it thinks it should
* have a valid buffer. Hence, here we ensure to give it a valid
* buffer.
*/
int btl_openib_ini_init_buffer(FILE *file)
{
YY_BUFFER_STATE buf = yy_create_buffer(file, YY_BUF_SIZE);
yy_switch_to_buffer(buf);
return 0;
}

378
ompi/mca/btl/openib/btl_openib_mca.c Обычный файл
Просмотреть файл

@ -0,0 +1,378 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <string.h>
#include "opal/util/output.h"
#include "opal/mca/base/mca_base_param.h"
#include "btl_openib.h"
#include "btl_openib_mca.h"
/*
* Local flags
*/
enum {
REGINT_NEG_ONE_OK = 0x01,
REGINT_GE_ZERO = 0x02,
REGINT_GE_ONE = 0x04,
REGINT_NONZERO = 0x08,
REGINT_MAX = 0x88
};
enum {
REGSTR_EMPTY_OK = 0x01,
REGSTR_MAX = 0x88
};
/*
* utility routine for string parameter registration
*/
static inline int reg_string(const char* param_name, const char* param_desc,
const char* default_value, char **out_value,
int flags)
{
char *value;
mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
param_name, param_desc, false, false,
default_value, &value);
if (0 != (flags & REGSTR_EMPTY_OK) && 0 == strlen(value)) {
return OMPI_ERR_BAD_PARAM;
}
*out_value = value;
return OMPI_SUCCESS;
}
/*
* utility routine for integer parameter registration
*/
static inline int reg_int(const char* param_name, const char* param_desc,
int default_value, int *out_value, int flags)
{
int value;
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
param_name, param_desc, false, false,
default_value, &value);
if (0 != (flags & REGINT_NEG_ONE_OK) && -1 == value) {
*out_value = value;
return OMPI_SUCCESS;
}
if ((0 != (flags & REGINT_GE_ZERO) && value < 0) ||
(0 != (flags & REGINT_GE_ONE) && value < 1) ||
(0 != (flags & REGINT_NONZERO) && 0 == value)) {
return OMPI_ERR_BAD_PARAM;
}
*out_value = value;
return OMPI_SUCCESS;
}
/*
* Register and check all MCA parameters
*/
int btl_openib_register_mca_params(void)
{
char *msg, *str;
int ival, ret, tmp;
ret = OMPI_SUCCESS;
#define CHECK(expr) \
tmp = (expr); \
if (OMPI_SUCCESS != tmp) ret = tmp;
/* register IB component parameters */
CHECK(reg_int("verbose",
"Output some verbose OpenIB BTL information "
"(0 = no output, nonzero = output)", 0, &ival, 0));
mca_btl_openib_component.verbose = (0 != ival);
CHECK(reg_int("warn_no_hca_params_found",
"Warn when no HCA-specific parameters are found in the INI file specified by the btl_openib_hca_param_files MCA parameter (0 = do not warn; any other value = warn)",
1, &ival, 0));
mca_btl_openib_component.warn_no_hca_params_found = (0 != ival);
asprintf(&str, "%s/mca-btl-openib-hca-params.ini", PKGDATADIR);
if (NULL == str) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
CHECK(reg_string("hca_param_files",
"Colon-delimited list of INI-style files that contain HCA vendor/part-specific parameters",
str, &mca_btl_openib_component.hca_params_file_names, 0));
free(str);
CHECK(reg_int("max_btls",
"Maximum number of HCA ports to use "
"(-1 = use all available, otherwise must be >= 1)",
-1, &mca_btl_openib_component.ib_max_btls,
REGINT_NEG_ONE_OK | REGINT_GE_ONE));
CHECK(reg_int("free_list_num",
"Intial size of free lists (must be >= 1)",
8, &mca_btl_openib_component.ib_free_list_num,
REGINT_GE_ONE));
CHECK(reg_int("free_list_max",
"Maximum size of free lists "
"(-1 = infinite, otherwise must be >= 0)",
-1, &mca_btl_openib_component.ib_free_list_max,
REGINT_NEG_ONE_OK | REGINT_GE_ONE));
CHECK(reg_int("free_list_inc",
"Increment size of free lists (must be >= 1)",
32, &mca_btl_openib_component.ib_free_list_inc,
REGINT_GE_ONE));
CHECK(reg_string("mpool",
"Name of the memory pool to be used (it is unlikely that you will ever want to change this",
"openib", &mca_btl_openib_component.ib_mpool_name,
0));
CHECK(reg_int("reg_mru_len",
"Length of the registration cache most recently used list "
"(must be >= 1)",
16, (int*) &mca_btl_openib_component.reg_mru_len,
REGINT_GE_ONE));
CHECK(reg_int("ib_cq_size", "Size of the IB completion "
"queue (will automatically be set to a minimum of "
"(2 * number_of_peers * btl_openib_rd_num))",
1000, &ival, REGINT_GE_ONE));
mca_btl_openib_component.ib_cq_size = (uint32_t) ival;
CHECK(reg_int("ib_sg_list_size", "Size of IB segment list "
"(must be >= 1)",
4, &ival, REGINT_GE_ONE));
mca_btl_openib_component.ib_sg_list_size = (uint32_t) ival;
CHECK(reg_int("ib_pkey_ix", "InfiniBand pkey index "
"(must be >= 0)",
0, &ival, REGINT_GE_ZERO));
mca_btl_openib_component.ib_pkey_ix = (uint32_t) ival;
CHECK(reg_int("ib_psn", "InfiniBand packet sequence starting number "
"(must be >= 0)",
0, &ival, REGINT_GE_ZERO));
mca_btl_openib_component.ib_psn = (uint32_t) ival;
CHECK(reg_int("ib_qp_ous_rd_atom", "InfiniBand outstanding atomic reads "
"(must be >= 0)",
4, &ival, REGINT_GE_ZERO));
mca_btl_openib_component.ib_qp_ous_rd_atom = (uint32_t) ival;
asprintf(&msg, "IB MTU, in bytes (if not specified in INI files). Valid values are: %d=256 bytes, %d=512 bytes, %d=1024 bytes, %d=2048 bytes, %d=4096 bytes",
IBV_MTU_256,
IBV_MTU_512,
IBV_MTU_1024,
IBV_MTU_2048,
IBV_MTU_4096);
if (NULL == msg) {
/* Don't try to recover from this */
return OMPI_ERR_OUT_OF_RESOURCE;
}
CHECK(reg_int("ib_mtu", msg, IBV_MTU_1024, &ival, 0));
free(msg);
if (ival < IBV_MTU_1024 || ival > IBV_MTU_4096) {
ret = OMPI_ERR_BAD_PARAM;
mca_btl_openib_component.ib_mtu = IBV_MTU_1024;
} else {
mca_btl_openib_component.ib_mtu = (uint32_t) ival;
}
/* JMS Is this really in seconds? Is there a max? */
CHECK(reg_int("ib_min_rnr_timer", "InfiniBand minimum "
"\"receiver not ready\" timer, in seconds "
"(must be >= 1)",
5, &ival, REGINT_GE_ONE));
mca_btl_openib_component.ib_min_rnr_timer = (uint32_t) ival;
/* JMS is there a max? */
CHECK(reg_int("ib_timeout", "InfiniBand transmit timeout, in seconds"
"(must be >= 1)",
10, &ival, REGINT_GE_ONE));
mca_btl_openib_component.ib_timeout = (uint32_t) ival;
/* JMS What is the difference between these two counts? */
/* JMS is there a max? */
CHECK(reg_int("ib_retry_count", "InfiniBand transmit retry count "
"(must be >= 1)",
7, &ival, REGINT_GE_ONE));
mca_btl_openib_component.ib_retry_count = (uint32_t) ival;
/* JMS: is there a max? */
CHECK(reg_int("ib_rnr_retry", "InfiniBand \"receiver not ready\" "
"retry count "
"(must be >= 1)",
7, &ival, REGINT_GE_ONE));
mca_btl_openib_component.ib_rnr_retry = (uint32_t) ival;
CHECK(reg_int("ib_max_rdma_dst_ops", "InfiniBand maximum pending RDMA "
"destination operations "
"(must be >= 1)",
4, &ival, REGINT_GE_ONE));
mca_btl_openib_component.ib_max_rdma_dst_ops = (uint32_t) ival;
/* JMS is there a max? */
CHECK(reg_int("ib_service_level", "InfiniBand service level "
"(must be >= 0)",
0, &ival, REGINT_GE_ZERO));
mca_btl_openib_component.ib_service_level = (uint32_t) ival;
/* JMS what is this? */
CHECK(reg_int("ib_static_rate", "InfiniBand static rate "
"(must be >= 0; defulat: %d)",
0, &ival, REGINT_GE_ZERO));
mca_btl_openib_component.ib_static_rate = (uint32_t) ival;
CHECK(reg_int("exclusivity", "OpenIB BTL exclusivity "
"(must be >= 0)",
MCA_BTL_EXCLUSIVITY_DEFAULT, &ival, REGINT_GE_ZERO));
mca_btl_openib_module.super.btl_exclusivity = (uint32_t) ival;
CHECK(reg_int("rd_num", "Number of receive descriptors to post to a "
"queue pair (must be >= 1)",
8, &ival, REGINT_GE_ONE));
mca_btl_openib_component.rd_num = (uint32_t) ival;
/* JMS low water mark of what? */
CHECK(reg_int("rd_low", "Low water mark before reposting occurs "
"(must be >= 1)",
6, &ival, REGINT_GE_ONE));
mca_btl_openib_component.rd_low = (uint32_t) ival;
/* JMS meaning what? */
CHECK(reg_int("rd_win",
"Window size at which generate explicit credit message "
"(must be >= 1)",
4, &ival, REGINT_GE_ONE));
mca_btl_openib_component.rd_win = (uint32_t) ival;
mca_btl_openib_component.rd_rsv =
((mca_btl_openib_component.rd_num << 1) - 1) /
mca_btl_openib_component.rd_win;
CHECK(reg_int("use_srq",
"If nonzero, use the InfiniBand shared receive "
"queue (\"SRQ\")",
0, &ival, 0));
mca_btl_openib_component.use_srq = (0 != ival);
CHECK(reg_int("srq_rd_max", "Maxium number of receive descriptors "
"posted per SRQ (only relevant if btl_openib_use_srq is "
"true; must be >= 1)",
1000, &ival, REGINT_GE_ONE));
mca_btl_openib_component.srq_rd_max = (uint32_t) ival;
CHECK(reg_int("srq_rd_per_peer",
"Number of receive descriptors posted per peer in the SRQ "
"(only relevant if btl_openib_use_srq is "
"true; must be >= 1)",
16, &ival, REGINT_GE_ONE));
mca_btl_openib_component.srq_rd_per_peer = ival;
CHECK(reg_int("srq_sd_max",
"Maximum number of send descriptors posted "
"(only relevant if btl_openib_use_srq is "
"true; must be >= 1)",
8, &ival, REGINT_GE_ONE));
mca_btl_openib_component.srq_sd_max = (uint32_t) ival;
CHECK(reg_int("use_eager_rdma", "Use RDMA for eager messages "
"(automatically disabled if SRQ is enabled)",
1, &ival, 0));
mca_btl_openib_component.use_eager_rdma = (uint32_t) (ival != 0);
if (mca_btl_openib_component.use_srq) {
mca_btl_openib_component.use_eager_rdma = 0;
}
CHECK(reg_int("eager_rdma_threshold",
"Use RDMA for short messages after this number of "
"messages are received from a given peer "
"(must be >= 1)",
16, &ival, REGINT_GE_ONE));
mca_btl_openib_component.eager_rdma_threshold = (uint32_t) ival;
CHECK(reg_int("max_eager_rdma", "Maximum number of peers allowed to use "
"RDMA for short messages (RDMA is used for all long "
"messages, except if explicitly disabled, such as "
"with the \"dr\" pml) "
"(must be >= 0)",
16, &ival, REGINT_GE_ZERO));
mca_btl_openib_component.max_eager_rdma = (uint32_t) ival;
CHECK(reg_int("eager_rdma_num", "Number of RDMA buffers to allocate "
"for small messages"
"(must be >= 1)",
16, &ival, REGINT_GE_ONE));
mca_btl_openib_component.eager_rdma_num = (uint32_t) (ival + 1);
CHECK(reg_int("btls_per_lid", "Number of BTLs to create for each "
"InfiniBand LID "
"(must be >= 1)",
1, &ival, REGINT_GE_ONE));
mca_btl_openib_component.btls_per_lid = (uint32_t) ival;
CHECK(reg_int("max_lmc", "Maximum number of LIDs to use for each HCA port "
"(must be >= 0, where 0 = use all available)",
0, &ival, REGINT_GE_ZERO));
mca_btl_openib_component.max_lmc = (uint32_t) ival;
CHECK(reg_int("buffer_alignment",
"Prefered communication buffer alignment, in bytes "
"(must be >= 0)",
64, &ival, REGINT_GE_ZERO));
mca_btl_openib_component.buffer_alignment = (uint32_t) ival;
CHECK(reg_int("eager_limit", "Eager send limit, in bytes "
"(must be >= 1)",
(12 * 1024), &ival, REGINT_GE_ONE));
mca_btl_openib_module.super.btl_eager_limit = (uint32_t) ival;
CHECK(reg_int("min_send_size", "Minimum send size, in bytes "
"(must be >= 1)",
(32 * 1024), &ival, REGINT_GE_ONE));
mca_btl_openib_module.super.btl_min_send_size = (uint32_t) ival;
CHECK(reg_int("max_send_size", "Maximum send size, in bytes "
"(must be >= 1)",
(64 * 1024), &ival, REGINT_GE_ONE));
mca_btl_openib_module.super.btl_max_send_size = (uint32_t) ival;
CHECK(reg_int("min_rdma_size", "Minimum RDMA size, in bytes "
"(must be >= 1)",
(1024 * 1024), &ival, REGINT_GE_ONE));
mca_btl_openib_module.super.btl_min_rdma_size = (uint32_t) ival;
CHECK(reg_int("max_rdma_size", "Maximium RDMA size, in bytes "
"(must be >= 1)",
(1024 * 1024), &ival, REGINT_GE_ONE));
mca_btl_openib_module.super.btl_max_rdma_size = (uint32_t) ival;
CHECK(reg_int("flags", "BTL flags, added together: SEND=1, PUT=2, GET=4 "
"(cannot be 0)",
MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_NEED_ACK |
MCA_BTL_FLAGS_NEED_CSUM, &ival, REGINT_GE_ZERO));
mca_btl_openib_module.super.btl_flags = (uint32_t) ival;
CHECK(reg_int("bandwidth", "Approximate maximum bandwidth "
"of network (must be >= 1)",
800, &ival, REGINT_GE_ONE));
mca_btl_openib_module.super.btl_bandwidth = (uint32_t) ival;
return ret;
}

25
ompi/mca/btl/openib/btl_openib_mca.h Обычный файл
Просмотреть файл

@ -0,0 +1,25 @@
/*
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BTL_IB_MCA_H
#define MCA_BTL_IB_MCA_H
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/**
* Function to register MCA params and check for sane values
*/
int btl_openib_register_mca_params(void);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -18,6 +18,68 @@
#
# This is the US/English general help file for Open MPI.
#
[ini file:file not found]
The Open MPI OpenIB BTL component was unable to find or read an INI
file that was requested via the btl_openib_hca_param_files MCA
parameter. Please check this file and/or modify the
btl_openib_hca_param_files MCA parameter:
%s
[ini file:not in a section]
In parsing Open IB BTL parameter file, values were found that were not
in a valid INI section. These values will be ignored. Please
re-check this file:
%s
At line %d, near the following text:
%s
[ini file:unexpected token]
In parsing Open IB BTL parameter file, unexpected tokens were found
(this may cause significant portions of the INI file to be ignored).
Please re-check this file:
%s
At line %d, near the following text:
%s
[ini file:expected equals]
In parsing Open IB BTL parameter file, unexpected tokens were found
(this may cause significant portions of the INI file to be ignored).
An equals sign ("=") was expected but was not found. Please re-check
this file:
%s
At line %d, near the following text:
%s
[ini file:expected newline]
In parsing Open IB BTL parameter file, unexpected tokens were found
(this may cause significant portions of the INI file to be ignored).
A newline was expected but was not found. Please re-check this file:
%s
At line %d, near the following text:
%s
[no hca params found]
WARNING: No HCA parameters were found for the HCA that Open MPI
detected:
Hostname: %s
HCA vendor ID: 0x%04x
HCA vendor part ID: %d
Default HCA parameters will be used, which may result in lower
performance. You can edit any of the files specified by the
btl_openib_hca_param_files MCA parameter to set values for your HCA.
NOTE: You can turn off this warning by setting the MCA parameter
btl_openib_warn_no_hca_params_found to 0.
[btl_openib:retry-exceeded]
The InfiniBand retry count between two MPI processes has been
exceeded. "Retry count" is defined in the InfiniBand spec 1.2

Просмотреть файл

@ -0,0 +1,75 @@
#
# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# This is the default HCA parameters file for Open MPI's OpenIB BTL.
# If HCA vendors wish to add their respective values into this file
# (that is distributed with Open MPI), please contact the Open MPI
# development team. See http://www.open-mpi.org/ for details.
# This file is in the "ini" style, meaning that it has sections
# identified section names enclosed in square brackets (e.g.,
# "[Section name]") followed by "key = value" pairs indicating values
# for a specific HCA vendor and model. HCAs are identified by their
# vendor ID and vendor part ID, which can be obtained by running the
# diagnostic utility command "ibv_devinfo". The fields "vendor_id"
# and "vendor_part"id" are the vendor ID and vendor part ID,
# respectively.
# The sections in this file only accept a few fields:
# vendor_id: an integer, expressed either in decimal or hexidecimal
# (e.g., "13" or "0xd"), and can be taken directly from the output of
# "ibv_devinfo". Example:
#
# vendor_id = 0x05ad
# vendor_part_id: a comma-delimited list of integers of different HCA
# models from a single vendor, expressed in either decimal or
# hexidecimal (e.g., "13" or "0xd"). Individual values can be
# obtained from the output of the "ibv_devinfo". Example:
#
# vendor_part_id = 25208,25218
# mtu: an integer indicating the maximum transfer unit (MTU) to be
# used with this HCA. The effective MTU will be the minimum of an
# HCA's MTU value and its peer HCA's MTU value. Valid values are 256,
# 512, 1024, 2048, and 4096. Example:
#
# mtu = 1024
############################################################################
[default]
# These are the default values, identified by the vendor and part ID
# numbers of 0 and 0. If queried HCA does not return vendor and part
# ID numbers that match any of the sections in this file, the values
# in this section are used.
vendor_id = 0
vendor_part_id = 0
mtu = 1024
############################################################################
[Mellanox Tavor Infinihost]
# Vendor ID's can be hex or decimal
vendor_id = 0x5ad
vendor_part_id = 2310
mtu = 1024
############################################################################
[Mellanox Arbel InfiniHost III MemFree/Tavor]
vendor_id = 0x5ad
vendor_part_id = 25208,25218
mtu = 1024
############################################################################
[Mellanox Sinai Infinihost III]
vendor_id = 0x5ad
vendor_part_id = 25204
mtu = 2048