b611e6d7dc
This commit was SVN r13419.
385 строки
15 KiB
C
385 строки
15 KiB
C
/*
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
* University Research and Technology
|
|
* Corporation. All rights reserved.
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
* of Tennessee Research Foundation. All rights
|
|
* reserved.
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
* University of Stuttgart. All rights reserved.
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#include "ompi_config.h"
|
|
|
|
#include <string.h>
|
|
|
|
#include "opal/util/output.h"
|
|
#include "opal/mca/base/mca_base_param.h"
|
|
#include "btl_openib.h"
|
|
#include "btl_openib_mca.h"
|
|
|
|
/*
|
|
* Local flags
|
|
*/
|
|
enum {
|
|
REGINT_NEG_ONE_OK = 0x01,
|
|
REGINT_GE_ZERO = 0x02,
|
|
REGINT_GE_ONE = 0x04,
|
|
REGINT_NONZERO = 0x08,
|
|
|
|
REGINT_MAX = 0x88
|
|
};
|
|
|
|
|
|
enum {
|
|
REGSTR_EMPTY_OK = 0x01,
|
|
|
|
REGSTR_MAX = 0x88
|
|
};
|
|
|
|
/*
|
|
* utility routine for string parameter registration
|
|
*/
|
|
static inline int reg_string(const char* param_name, const char* param_desc,
|
|
const char* default_value, char **out_value,
|
|
int flags)
|
|
{
|
|
char *value;
|
|
mca_base_param_reg_string(&mca_btl_openib_component.super.btl_version,
|
|
param_name, param_desc, false, false,
|
|
default_value, &value);
|
|
|
|
if (0 != (flags & REGSTR_EMPTY_OK) && 0 == strlen(value)) {
|
|
return OMPI_ERR_BAD_PARAM;
|
|
}
|
|
*out_value = value;
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
|
|
/*
|
|
* utility routine for integer parameter registration
|
|
*/
|
|
static inline int reg_int(const char* param_name, const char* param_desc,
|
|
int default_value, int *out_value, int flags)
|
|
{
|
|
int value;
|
|
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
|
|
param_name, param_desc, false, false,
|
|
default_value, &value);
|
|
|
|
if (0 != (flags & REGINT_NEG_ONE_OK) && -1 == value) {
|
|
*out_value = value;
|
|
return OMPI_SUCCESS;
|
|
}
|
|
if ((0 != (flags & REGINT_GE_ZERO) && value < 0) ||
|
|
(0 != (flags & REGINT_GE_ONE) && value < 1) ||
|
|
(0 != (flags & REGINT_NONZERO) && 0 == value)) {
|
|
return OMPI_ERR_BAD_PARAM;
|
|
}
|
|
*out_value = value;
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
|
|
/*
|
|
* Register and check all MCA parameters
|
|
*/
|
|
int btl_openib_register_mca_params(void)
|
|
{
|
|
char *msg, *str;
|
|
int ival, ret, tmp;
|
|
|
|
ret = OMPI_SUCCESS;
|
|
#define CHECK(expr) \
|
|
tmp = (expr); \
|
|
if (OMPI_SUCCESS != tmp) ret = tmp;
|
|
|
|
/* register IB component parameters */
|
|
CHECK(reg_int("verbose",
|
|
"Output some verbose OpenIB BTL information "
|
|
"(0 = no output, nonzero = output)", 0, &ival, 0));
|
|
mca_btl_openib_component.verbose = (0 != ival);
|
|
CHECK(reg_int("warn_no_hca_params_found",
|
|
"Warn when no HCA-specific parameters are found in the INI file specified by the btl_openib_hca_param_files MCA parameter (0 = do not warn; any other value = warn)",
|
|
1, &ival, 0));
|
|
mca_btl_openib_component.warn_no_hca_params_found = (0 != ival);
|
|
CHECK(reg_int("warn_default_gid_prefix",
|
|
"Warn when there is more than one active ports and at least one of them connected to the network with only default GID prefix configured (0 = do not warn; any other value = warn)",
|
|
1, &ival, 0));
|
|
mca_btl_openib_component.warn_default_gid_prefix = (0 != ival);
|
|
asprintf(&str, "%s/mca-btl-openib-hca-params.ini", PKGDATADIR);
|
|
if (NULL == str) {
|
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
|
}
|
|
CHECK(reg_string("hca_param_files",
|
|
"Colon-delimited list of INI-style files that contain HCA vendor/part-specific parameters",
|
|
str, &mca_btl_openib_component.hca_params_file_names, 0));
|
|
free(str);
|
|
|
|
CHECK(reg_int("max_btls",
|
|
"Maximum number of HCA ports to use "
|
|
"(-1 = use all available, otherwise must be >= 1)",
|
|
-1, &mca_btl_openib_component.ib_max_btls,
|
|
REGINT_NEG_ONE_OK | REGINT_GE_ONE));
|
|
CHECK(reg_int("free_list_num",
|
|
"Intial size of free lists (must be >= 1)",
|
|
8, &mca_btl_openib_component.ib_free_list_num,
|
|
REGINT_GE_ONE));
|
|
CHECK(reg_int("free_list_max",
|
|
"Maximum size of free lists "
|
|
"(-1 = infinite, otherwise must be >= 0)",
|
|
-1, &mca_btl_openib_component.ib_free_list_max,
|
|
REGINT_NEG_ONE_OK | REGINT_GE_ONE));
|
|
CHECK(reg_int("free_list_inc",
|
|
"Increment size of free lists (must be >= 1)",
|
|
32, &mca_btl_openib_component.ib_free_list_inc,
|
|
REGINT_GE_ONE));
|
|
CHECK(reg_string("mpool",
|
|
"Name of the memory pool to be used (it is unlikely that you will ever want to change this",
|
|
"rdma", &mca_btl_openib_component.ib_mpool_name,
|
|
0));
|
|
CHECK(reg_int("reg_mru_len",
|
|
"Length of the registration cache most recently used list "
|
|
"(must be >= 1)",
|
|
16, (int*) &mca_btl_openib_component.reg_mru_len,
|
|
REGINT_GE_ONE));
|
|
|
|
CHECK(reg_int("ib_cq_size", "Size of the IB completion "
|
|
"queue (will automatically be set to a minimum of "
|
|
"(2 * number_of_peers * btl_openib_rd_num))",
|
|
1000, &ival, REGINT_GE_ONE));
|
|
mca_btl_openib_component.ib_cq_size = (uint32_t) ival;
|
|
|
|
CHECK(reg_int("ib_sg_list_size", "Size of IB segment list "
|
|
"(must be >= 1)",
|
|
4, &ival, REGINT_GE_ONE));
|
|
mca_btl_openib_component.ib_sg_list_size = (uint32_t) ival;
|
|
|
|
CHECK(reg_int("ib_pkey_ix", "InfiniBand pkey index "
|
|
"(must be >= 0)",
|
|
0, &ival, REGINT_GE_ZERO));
|
|
mca_btl_openib_component.ib_pkey_ix = (uint32_t) ival;
|
|
|
|
CHECK(reg_int("ib_psn", "InfiniBand packet sequence starting number "
|
|
"(must be >= 0)",
|
|
0, &ival, REGINT_GE_ZERO));
|
|
mca_btl_openib_component.ib_psn = (uint32_t) ival;
|
|
|
|
CHECK(reg_int("ib_qp_ous_rd_atom", "InfiniBand outstanding atomic reads "
|
|
"(must be >= 0)",
|
|
4, &ival, REGINT_GE_ZERO));
|
|
mca_btl_openib_component.ib_qp_ous_rd_atom = (uint32_t) ival;
|
|
|
|
asprintf(&msg, "IB MTU, in bytes (if not specified in INI files). Valid values are: %d=256 bytes, %d=512 bytes, %d=1024 bytes, %d=2048 bytes, %d=4096 bytes",
|
|
IBV_MTU_256,
|
|
IBV_MTU_512,
|
|
IBV_MTU_1024,
|
|
IBV_MTU_2048,
|
|
IBV_MTU_4096);
|
|
if (NULL == msg) {
|
|
/* Don't try to recover from this */
|
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
|
}
|
|
CHECK(reg_int("ib_mtu", msg, IBV_MTU_1024, &ival, 0));
|
|
free(msg);
|
|
if (ival < IBV_MTU_1024 || ival > IBV_MTU_4096) {
|
|
ret = OMPI_ERR_BAD_PARAM;
|
|
mca_btl_openib_component.ib_mtu = IBV_MTU_1024;
|
|
} else {
|
|
mca_btl_openib_component.ib_mtu = (uint32_t) ival;
|
|
}
|
|
|
|
/* JMS Is this really in seconds? Is there a max? */
|
|
CHECK(reg_int("ib_min_rnr_timer", "InfiniBand minimum "
|
|
"\"receiver not ready\" timer, in seconds "
|
|
"(must be >= 1)",
|
|
5, &ival, REGINT_GE_ONE));
|
|
mca_btl_openib_component.ib_min_rnr_timer = (uint32_t) ival;
|
|
|
|
/* JMS is there a max? */
|
|
CHECK(reg_int("ib_timeout", "InfiniBand transmit timeout, in seconds"
|
|
"(must be >= 1)",
|
|
10, &ival, REGINT_GE_ONE));
|
|
mca_btl_openib_component.ib_timeout = (uint32_t) ival;
|
|
|
|
/* JMS What is the difference between these two counts? */
|
|
/* JMS is there a max? */
|
|
CHECK(reg_int("ib_retry_count", "InfiniBand transmit retry count "
|
|
"(must be >= 1)",
|
|
7, &ival, REGINT_GE_ONE));
|
|
mca_btl_openib_component.ib_retry_count = (uint32_t) ival;
|
|
|
|
/* JMS: is there a max? */
|
|
CHECK(reg_int("ib_rnr_retry", "InfiniBand \"receiver not ready\" "
|
|
"retry count "
|
|
"(must be >= 1)",
|
|
7, &ival, REGINT_GE_ONE));
|
|
mca_btl_openib_component.ib_rnr_retry = (uint32_t) ival;
|
|
|
|
CHECK(reg_int("ib_max_rdma_dst_ops", "InfiniBand maximum pending RDMA "
|
|
"destination operations "
|
|
"(must be >= 1)",
|
|
4, &ival, REGINT_GE_ONE));
|
|
mca_btl_openib_component.ib_max_rdma_dst_ops = (uint32_t) ival;
|
|
|
|
/* JMS is there a max? */
|
|
CHECK(reg_int("ib_service_level", "InfiniBand service level "
|
|
"(must be >= 0)",
|
|
0, &ival, REGINT_GE_ZERO));
|
|
mca_btl_openib_component.ib_service_level = (uint32_t) ival;
|
|
|
|
/* JMS what is this? */
|
|
CHECK(reg_int("ib_static_rate", "InfiniBand static rate "
|
|
"(must be >= 0; defulat: %d)",
|
|
0, &ival, REGINT_GE_ZERO));
|
|
mca_btl_openib_component.ib_static_rate = (uint32_t) ival;
|
|
|
|
CHECK(reg_int("exclusivity", "OpenIB BTL exclusivity "
|
|
"(must be >= 0)",
|
|
MCA_BTL_EXCLUSIVITY_DEFAULT, &ival, REGINT_GE_ZERO));
|
|
mca_btl_openib_module.super.btl_exclusivity = (uint32_t) ival;
|
|
|
|
CHECK(reg_int("rd_num", "Number of receive descriptors to post to a "
|
|
"queue pair (must be >= 1)",
|
|
8, &ival, REGINT_GE_ONE));
|
|
mca_btl_openib_component.rd_num = (uint32_t) ival;
|
|
|
|
/* JMS low water mark of what? */
|
|
CHECK(reg_int("rd_low", "Low water mark before reposting occurs "
|
|
"(must be >= 1)",
|
|
6, &ival, REGINT_GE_ONE));
|
|
mca_btl_openib_component.rd_low = (uint32_t) ival;
|
|
|
|
/* JMS meaning what? */
|
|
CHECK(reg_int("rd_win",
|
|
"Window size at which generate explicit credit message "
|
|
"(must be >= 1)",
|
|
4, &ival, REGINT_GE_ONE));
|
|
mca_btl_openib_component.rd_win = (uint32_t) ival;
|
|
mca_btl_openib_component.rd_rsv =
|
|
((mca_btl_openib_component.rd_num << 1) - 1) /
|
|
mca_btl_openib_component.rd_win;
|
|
|
|
CHECK(reg_int("use_srq",
|
|
"If nonzero, use the InfiniBand shared receive "
|
|
"queue (\"SRQ\")",
|
|
0, &ival, 0));
|
|
mca_btl_openib_component.use_srq = (0 != ival);
|
|
CHECK(reg_int("srq_rd_max", "Maxium number of receive descriptors "
|
|
"posted per SRQ (only relevant if btl_openib_use_srq is "
|
|
"true; must be >= 1)",
|
|
1000, &ival, REGINT_GE_ONE));
|
|
mca_btl_openib_component.srq_rd_max = (uint32_t) ival;
|
|
|
|
CHECK(reg_int("srq_rd_per_peer",
|
|
"Number of receive descriptors posted per peer in the SRQ "
|
|
"(only relevant if btl_openib_use_srq is "
|
|
"true; must be >= 1)",
|
|
16, &ival, REGINT_GE_ONE));
|
|
mca_btl_openib_component.srq_rd_per_peer = ival;
|
|
|
|
CHECK(reg_int("srq_sd_max",
|
|
"Maximum number of send descriptors posted "
|
|
"(only relevant if btl_openib_use_srq is "
|
|
"true; must be >= 1)",
|
|
8, &ival, REGINT_GE_ONE));
|
|
mca_btl_openib_component.srq_sd_max = (uint32_t) ival;
|
|
|
|
CHECK(reg_int("use_eager_rdma", "Use RDMA for eager messages ",
|
|
1, &ival, 0));
|
|
mca_btl_openib_component.use_eager_rdma = (uint32_t) (ival != 0);
|
|
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
|
/* Fast rdma path isn't supported by PROGRESS_THREAD */
|
|
mca_btl_openib_component.use_eager_rdma = 0;
|
|
#endif
|
|
|
|
CHECK(reg_int("eager_rdma_threshold",
|
|
"Use RDMA for short messages after this number of "
|
|
"messages are received from a given peer "
|
|
"(must be >= 1)",
|
|
16, &ival, REGINT_GE_ONE));
|
|
mca_btl_openib_component.eager_rdma_threshold = (int32_t) ival;
|
|
|
|
CHECK(reg_int("max_eager_rdma", "Maximum number of peers allowed to use "
|
|
"RDMA for short messages (RDMA is used for all long "
|
|
"messages, except if explicitly disabled, such as "
|
|
"with the \"dr\" pml) "
|
|
"(must be >= 0)",
|
|
16, &ival, REGINT_GE_ZERO));
|
|
mca_btl_openib_component.max_eager_rdma = (int32_t) ival;
|
|
|
|
CHECK(reg_int("eager_rdma_num", "Number of RDMA buffers to allocate "
|
|
"for small messages"
|
|
"(must be >= 1)",
|
|
16, &ival, REGINT_GE_ONE));
|
|
mca_btl_openib_component.eager_rdma_num = (uint32_t) (ival + 1);
|
|
|
|
CHECK(reg_int("btls_per_lid", "Number of BTLs to create for each "
|
|
"InfiniBand LID "
|
|
"(must be >= 1)",
|
|
1, &ival, REGINT_GE_ONE));
|
|
mca_btl_openib_component.btls_per_lid = (uint32_t) ival;
|
|
|
|
CHECK(reg_int("max_lmc", "Maximum number of LIDs to use for each HCA port "
|
|
"(must be >= 0, where 0 = use all available)",
|
|
0, &ival, REGINT_GE_ZERO));
|
|
mca_btl_openib_component.max_lmc = (uint32_t) ival;
|
|
|
|
CHECK(reg_int("buffer_alignment",
|
|
"Prefered communication buffer alignment, in bytes "
|
|
"(must be >= 0)",
|
|
64, &ival, REGINT_GE_ZERO));
|
|
mca_btl_openib_component.buffer_alignment = (uint32_t) ival;
|
|
|
|
CHECK(reg_int("eager_limit", "Eager send limit, in bytes "
|
|
"(must be >= 1)",
|
|
(12 * 1024), &ival, REGINT_GE_ONE));
|
|
mca_btl_openib_module.super.btl_eager_limit = (uint32_t) ival;
|
|
|
|
CHECK(reg_int("min_send_size", "Minimum send size, in bytes "
|
|
"(must be >= 1)",
|
|
(32 * 1024), &ival, REGINT_GE_ONE));
|
|
mca_btl_openib_module.super.btl_min_send_size = (uint32_t) ival;
|
|
|
|
CHECK(reg_int("max_send_size", "Maximum send size, in bytes "
|
|
"(must be >= 1)",
|
|
(64 * 1024), &ival, REGINT_GE_ONE));
|
|
mca_btl_openib_module.super.btl_max_send_size = (uint32_t) ival;
|
|
|
|
CHECK(reg_int("min_rdma_size", "Minimum RDMA size, in bytes "
|
|
"(must be >= 1)",
|
|
(1024 * 1024), &ival, REGINT_GE_ONE));
|
|
mca_btl_openib_module.super.btl_min_rdma_size = (uint32_t) ival;
|
|
|
|
CHECK(reg_int("max_rdma_size", "Maximium RDMA size, in bytes "
|
|
"(must be >= 1)",
|
|
(1024 * 1024), &ival, REGINT_GE_ONE));
|
|
mca_btl_openib_module.super.btl_max_rdma_size = (uint32_t) ival;
|
|
|
|
CHECK(reg_int("flags", "BTL flags, added together: SEND=1, PUT=2, GET=4 "
|
|
"(cannot be 0)",
|
|
MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_NEED_ACK |
|
|
MCA_BTL_FLAGS_NEED_CSUM, &ival, REGINT_GE_ZERO));
|
|
mca_btl_openib_module.super.btl_flags = (uint32_t) ival;
|
|
|
|
CHECK(reg_int("bandwidth", "Approximate maximum bandwidth "
|
|
"of network (must be >= 1)",
|
|
800, &ival, REGINT_GE_ONE));
|
|
mca_btl_openib_module.super.btl_bandwidth = (uint32_t) ival;
|
|
CHECK(reg_int("latency", "Approximate latency of the device (must be >= 1)",
|
|
10, &ival, REGINT_GE_ONE));
|
|
mca_btl_openib_module.super.btl_latency = (uint32_t) ival;
|
|
|
|
return ret;
|
|
}
|
|
|