1
1

Fixes trac:1355: allow INI file to set max_inline_data vale, and if not

specified, probe for max value supported by device.

This commit was SVN r18720.

The following Trac tickets were found above:
  Ticket 1355 --> https://svn.open-mpi.org/trac/ompi/ticket/1355
Этот коммит содержится в:
Jeff Squyres 2008-06-24 17:18:07 +00:00
родитель 95cd9758e5
Коммит e0545460ff
10 изменённых файлов: 166 добавлений и 53 удалений

Просмотреть файл

@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2008 Mellanox Technologies. All rights reserved. * Copyright (c) 2006-2008 Mellanox Technologies. All rights reserved.
* Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights * Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
@ -172,7 +172,7 @@ struct mca_btl_openib_component_t {
uint32_t ib_cq_size[2]; /**< Max outstanding CQE on the CQ */ uint32_t ib_cq_size[2]; /**< Max outstanding CQE on the CQ */
uint32_t ib_max_inline_data; /**< Max size of inline data */ int32_t ib_max_inline_data; /**< Max size of inline data */
uint32_t ib_pkey_ix; /**< InfiniBand pkey index */ uint32_t ib_pkey_ix; /**< InfiniBand pkey index */
uint32_t ib_pkey_val; uint32_t ib_pkey_val;
uint32_t ib_psn; uint32_t ib_psn;
@ -321,7 +321,10 @@ typedef struct mca_btl_openib_hca_t {
struct mca_btl_base_endpoint_t **eager_rdma_buffers; struct mca_btl_base_endpoint_t **eager_rdma_buffers;
/**< frags for control massages */ /**< frags for control massages */
ompi_free_list_t send_free_control; ompi_free_list_t send_free_control;
/* QP types and attributes that will be used on this HCA */
mca_btl_openib_hca_qp_t *qps; mca_btl_openib_hca_qp_t *qps;
/* Maximum value supported by this HCA for max_inline_data */
uint32_t max_inline_data;
} mca_btl_openib_hca_t; } mca_btl_openib_hca_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_hca_t); OBJ_CLASS_DECLARATION(mca_btl_openib_hca_t);

Просмотреть файл

@ -716,6 +716,7 @@ static void hca_construct(mca_btl_openib_hca_t *hca)
#endif #endif
OBJ_CONSTRUCT(&hca->hca_lock, opal_mutex_t); OBJ_CONSTRUCT(&hca->hca_lock, opal_mutex_t);
OBJ_CONSTRUCT(&hca->send_free_control, ompi_free_list_t); OBJ_CONSTRUCT(&hca->send_free_control, ompi_free_list_t);
hca->max_inline_data = 0;
} }
static void hca_destruct(mca_btl_openib_hca_t *hca) static void hca_destruct(mca_btl_openib_hca_t *hca)
@ -1090,6 +1091,11 @@ static void merge_values(ompi_btl_openib_ini_values_t *target,
if (NULL == target->receive_queues && NULL != src->receive_queues) { if (NULL == target->receive_queues && NULL != src->receive_queues) {
target->receive_queues = strdup(src->receive_queues); target->receive_queues = strdup(src->receive_queues);
} }
if (!target->max_inline_data_set && src->max_inline_data_set) {
target->max_inline_data = src->max_inline_data;
target->max_inline_data_set = true;
}
} }
static bool inline is_credit_message(const mca_btl_openib_recv_frag_t *frag) static bool inline is_credit_message(const mca_btl_openib_recv_frag_t *frag)
@ -1333,6 +1339,7 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
int ret = -1, port_cnt; int ret = -1, port_cnt;
ompi_btl_openib_ini_values_t values, default_values; ompi_btl_openib_ini_values_t values, default_values;
int *allowed_ports = NULL; int *allowed_ports = NULL;
bool need_search;
hca = OBJ_NEW(mca_btl_openib_hca_t); hca = OBJ_NEW(mca_btl_openib_hca_t);
if(NULL == hca){ if(NULL == hca){
@ -1428,6 +1435,90 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
hca->mtu = mca_btl_openib_component.ib_mtu; hca->mtu = mca_btl_openib_component.ib_mtu;
} }
/* Allocate the protection domain for the HCA */
hca->ib_pd = ibv_alloc_pd(hca->ib_dev_context);
if(NULL == hca->ib_pd){
BTL_ERROR(("error allocating protection domain for %s errno says %s",
ibv_get_device_name(hca->ib_dev), strerror(errno)));
goto error;
}
/* Figure out what the max_inline_data value should be for all
ports and QPs on this HCA */
need_search = false;
if (0 == mca_btl_openib_component.ib_max_inline_data) {
need_search = true;
} else if (mca_btl_openib_component.ib_max_inline_data > 0) {
hca->max_inline_data = mca_btl_openib_component.ib_max_inline_data;
} else if (values.max_inline_data_set) {
if (0 == values.max_inline_data) {
need_search = true;
} else if (values.max_inline_data > 0) {
hca->max_inline_data = values.max_inline_data;
}
}
/* Horrible. :-( Per the thread starting here:
http://lists.openfabrics.org/pipermail/general/2008-June/051822.html,
we can't rely on the value reported by the device to determine
the maximum max_inline_data value. So we have to search by
looping over max_inline_data values and trying to make dummy
QPs. Yuck! */
if (need_search) {
struct ibv_qp *qp;
struct ibv_cq *cq;
struct ibv_qp_init_attr init_attr;
uint32_t max_inline_data;
/* Make a dummy CQ */
#if OMPI_IBV_CREATE_CQ_ARGS == 3
cq = ibv_create_cq(hca->ib_dev_context, 1, NULL);
#else
cq = ibv_create_cq(hca->ib_dev_context, 1, NULL, NULL, 0);
#endif
if (NULL == cq) {
orte_show_help("help-mpi-btl-openib.txt", "init-fail-create-q",
true, orte_process_info.nodename,
__FILE__, __LINE__, "ibv_create_cq",
strerror(errno), errno,
ibv_get_device_name(hca->ib_dev));
ret = OMPI_ERR_NOT_AVAILABLE;
goto error;
}
/* Setup the QP attributes */
memset(&init_attr, 0, sizeof(init_attr));
init_attr.qp_type = IBV_QPT_RC;
init_attr.send_cq = cq;
init_attr.recv_cq = cq;
init_attr.srq = 0;
init_attr.cap.max_send_sge = 1;
init_attr.cap.max_recv_sge = 1;
init_attr.cap.max_recv_wr = 1;
/* Loop over max_inline_data values; just check powers of 2 --
that's good enough */
init_attr.cap.max_inline_data = max_inline_data = 1 << 20;
while (max_inline_data > 0) {
qp = ibv_create_qp(hca->ib_pd, &init_attr);
if (NULL != qp) {
break;
}
max_inline_data >>= 1;
init_attr.cap.max_inline_data = max_inline_data;
}
/* Did we find it? */
if (NULL != qp) {
hca->max_inline_data = max_inline_data;
ibv_destroy_qp(qp);
} else {
hca->max_inline_data = 0;
}
/* Destroy the temp CQ */
ibv_destroy_cq(cq);
}
/* If the user specified btl_openib_receive_queues MCA param, it /* If the user specified btl_openib_receive_queues MCA param, it
overrides all HCA INI params */ overrides all HCA INI params */
if (BTL_OPENIB_RQ_SOURCE_MCA != if (BTL_OPENIB_RQ_SOURCE_MCA !=
@ -1487,17 +1578,7 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
ret = OMPI_SUCCESS; ret = OMPI_SUCCESS;
goto error; goto error;
} }
#endif
/* Allocate the protection domain for the HCA */
hca->ib_pd = ibv_alloc_pd(hca->ib_dev_context);
if(NULL == hca->ib_pd){
BTL_ERROR(("error allocating protection domain for %s errno says %s",
ibv_get_device_name(hca->ib_dev), strerror(errno)));
goto error;
}
#if HAVE_XRC
if (MCA_BTL_XRC_ENABLED) { if (MCA_BTL_XRC_ENABLED) {
if (OMPI_SUCCESS != mca_btl_openib_open_xrc_domain(hca)) { if (OMPI_SUCCESS != mca_btl_openib_open_xrc_domain(hca)) {
BTL_ERROR(("XRC Internal error. Failed to open xrc domain")); BTL_ERROR(("XRC Internal error. Failed to open xrc domain"));

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -396,6 +396,12 @@ static int parse_line(parsed_section_values_t *sv)
value = NULL; value = NULL;
} }
else if (0 == strcasecmp(key_buffer, "max_inline_data")) {
/* Single value */
sv->values.max_inline_data = (uint32_t) intify(value);
sv->values.max_inline_data_set = true;
}
else { else {
/* Have no idea what this parameter is. Not an error -- just /* Have no idea what this parameter is. Not an error -- just
ignore it */ ignore it */
@ -482,6 +488,9 @@ static void reset_values(ompi_btl_openib_ini_values_t *v)
v->use_eager_rdma_set = false; v->use_eager_rdma_set = false;
v->receive_queues = NULL; v->receive_queues = NULL;
v->max_inline_data = 0;
v->max_inline_data_set = false;
} }
@ -528,6 +537,16 @@ static int save_section(parsed_section_values_t *s)
h->values.use_eager_rdma_set = true; h->values.use_eager_rdma_set = true;
} }
if (NULL != s->values.receive_queues) {
h->values.receive_queues =
strdup(s->values.receive_queues);
}
if (s->values.max_inline_data_set) {
h->values.max_inline_data = s->values.max_inline_data;
h->values.max_inline_data_set = true;
}
found = true; found = true;
break; break;
} }

Просмотреть файл

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -27,12 +27,13 @@ typedef struct ompi_btl_openib_ini_values_t {
bool use_eager_rdma_set; bool use_eager_rdma_set;
char *receive_queues; char *receive_queues;
uint32_t max_inline_data;
bool max_inline_data_set;
} ompi_btl_openib_ini_values_t; } ompi_btl_openib_ini_values_t;
#if defined(c_plusplus) || defined(__cplusplus) BEGIN_C_DECLS
extern "C" {
#endif
/** /**
* Read in the INI files containing HCA params * Read in the INI files containing HCA params
@ -51,7 +52,6 @@ extern "C" {
*/ */
int ompi_btl_openib_ini_finalize(void); int ompi_btl_openib_ini_finalize(void);
#if defined(c_plusplus) || defined(__cplusplus) END_C_DECLS
}
#endif
#endif #endif

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2008 Mellanox Technologies. All rights reserved. * Copyright (c) 2006-2008 Mellanox Technologies. All rights reserved.
* Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights * Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
@ -222,10 +222,11 @@ int btl_openib_register_mca_params(void)
mca_btl_openib_component.ib_cq_size[BTL_OPENIB_LP_CQ] = mca_btl_openib_component.ib_cq_size[BTL_OPENIB_LP_CQ] =
mca_btl_openib_component.ib_cq_size[BTL_OPENIB_HP_CQ] = (uint32_t) ival; mca_btl_openib_component.ib_cq_size[BTL_OPENIB_HP_CQ] = (uint32_t) ival;
CHECK(reg_int("ib_max_inline_data", "Maximal size of inline data segment " CHECK(reg_int("ib_max_inline_data", "Maximum size of inline data segment "
"(must be >= 1)", "(-1 = use per-device devaults, 0 = run-time probe to discover max value, "
64, &ival, REGINT_GE_ZERO)); "otherwise must be >= 1)",
mca_btl_openib_component.ib_max_inline_data = (uint32_t) ival; -1, &ival, REGINT_NEG_ONE_OK | REGINT_GE_ZERO));
mca_btl_openib_component.ib_max_inline_data = (int32_t) ival;
CHECK(reg_int("ib_pkey_ix", "InfiniBand pkey index " CHECK(reg_int("ib_pkey_ix", "InfiniBand pkey index "
"(must be >= 0)", "(must be >= 0)",

Просмотреть файл

@ -768,19 +768,19 @@ static int ibcm_component_query(mca_btl_openib_module_t *btl,
/******************************************************************* /*******************************************************************
* Module * Module
*******************************************************************/ *******************************************************************/
/* Returns max inlne size for qp #N */ /* Returns max inlne size for qp #N */
static int max_inline_size(int qp) static uint32_t max_inline_size(int qp, mca_btl_openib_hca_t *hca)
{ {
if (mca_btl_openib_component.qp_infos[qp].size <= if (mca_btl_openib_component.qp_infos[qp].size <= hca->max_inline_data) {
mca_btl_openib_component.ib_max_inline_data) { /* If qp message size is smaller than max_inline_data,
/* If qp message size is smaller that max inline -
* we should enable inline messages */ * we should enable inline messages */
return mca_btl_openib_component.qp_infos[qp].size; return mca_btl_openib_component.qp_infos[qp].size;
} else if (mca_btl_openib_component.rdma_qp == qp || 0 == qp) { } else if (mca_btl_openib_component.rdma_qp == qp || 0 == qp) {
/* If qp message size is bigger that max inline - /* If qp message size is bigger that max_inline_data, we
* we should enable inline messages * should enable inline messages only for RDMA QP (for PUT/GET
* only for RDMA QP (for PUT/GET fin messages) and for the first qp */ * fin messages) and for the first qp */
return mca_btl_openib_component.ib_max_inline_data; return hca->max_inline_data;
} }
/* Otherway it is no reason for inline */ /* Otherway it is no reason for inline */
return 0; return 0;
@ -805,7 +805,8 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
init_attr.send_cq = openib_btl->hca->ib_cq[BTL_OPENIB_LP_CQ]; init_attr.send_cq = openib_btl->hca->ib_cq[BTL_OPENIB_LP_CQ];
init_attr.recv_cq = openib_btl->hca->ib_cq[qp_cq_prio(qp)]; init_attr.recv_cq = openib_btl->hca->ib_cq[qp_cq_prio(qp)];
init_attr.srq = srq; init_attr.srq = srq;
init_attr.cap.max_inline_data = req_inline = max_inline_size(qp); init_attr.cap.max_inline_data = req_inline =
max_inline_size(qp, openib_btl->hca);
init_attr.cap.max_send_sge = 1; init_attr.cap.max_send_sge = 1;
init_attr.cap.max_recv_sge = 1; /* we do not use SG list */ init_attr.cap.max_recv_sge = 1; /* we do not use SG list */
if(BTL_OPENIB_QP_TYPE_PP(qp)) { if(BTL_OPENIB_QP_TYPE_PP(qp)) {

Просмотреть файл

@ -404,18 +404,17 @@ static int qp_create_all(mca_btl_base_endpoint_t* endpoint)
/* Returns max inlne size for qp #N */ /* Returns max inlne size for qp #N */
static int max_inline_size(int qp) static uint32_t max_inline_size(int qp, mca_btl_openib_hca_t *hca)
{ {
if (mca_btl_openib_component.qp_infos[qp].size <= if (mca_btl_openib_component.qp_infos[qp].size <= hca->max_inline_data) {
mca_btl_openib_component.ib_max_inline_data) { /* If qp message size is smaller than max_inline_data,
/* If qp message size is smaller that max inline -
* we should enable inline messages */ * we should enable inline messages */
return mca_btl_openib_component.qp_infos[qp].size; return mca_btl_openib_component.qp_infos[qp].size;
} else if (mca_btl_openib_component.rdma_qp == qp || 0 == qp) { } else if (mca_btl_openib_component.rdma_qp == qp || 0 == qp) {
/* If qp message size is bigger that max inline - /* If qp message size is bigger that max_inline_data, we
* we should enable inline messages * should enable inline messages only for RDMA QP (for PUT/GET
* only for RDMA QP (for PUT/GET fin messages) and for the first qp */ * fin messages) and for the first qp */
return mca_btl_openib_component.ib_max_inline_data; return hca->max_inline_data;
} }
/* Otherway it is no reason for inline */ /* Otherway it is no reason for inline */
return 0; return 0;
@ -441,7 +440,8 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
init_attr.send_cq = openib_btl->hca->ib_cq[BTL_OPENIB_LP_CQ]; init_attr.send_cq = openib_btl->hca->ib_cq[BTL_OPENIB_LP_CQ];
init_attr.recv_cq = openib_btl->hca->ib_cq[qp_cq_prio(qp)]; init_attr.recv_cq = openib_btl->hca->ib_cq[qp_cq_prio(qp)];
init_attr.srq = srq; init_attr.srq = srq;
init_attr.cap.max_inline_data = req_inline = max_inline_size(qp); init_attr.cap.max_inline_data = req_inline =
max_inline_size(qp, openib_btl->hca);
init_attr.cap.max_send_sge = 1; init_attr.cap.max_send_sge = 1;
init_attr.cap.max_recv_sge = 1; /* we do not use SG list */ init_attr.cap.max_recv_sge = 1; /* we do not use SG list */
if(BTL_OPENIB_QP_TYPE_PP(qp)) { if(BTL_OPENIB_QP_TYPE_PP(qp)) {

Просмотреть файл

@ -199,23 +199,23 @@ static void rdmacm_cleanup(rdmacm_contents_t *local,
} }
/* Returns max inlne size for qp #N */ /* Returns max inlne size for qp #N */
static int max_inline_size(int qp) static uint32_t max_inline_size(int qp, mca_btl_openib_hca_t *hca)
{ {
if (mca_btl_openib_component.qp_infos[qp].size <= if (mca_btl_openib_component.qp_infos[qp].size <= hca->max_inline_data) {
mca_btl_openib_component.ib_max_inline_data) { /* If qp message size is smaller than max_inline_data,
/* If qp message size is smaller that max inline -
* we should enable inline messages */ * we should enable inline messages */
return mca_btl_openib_component.qp_infos[qp].size; return mca_btl_openib_component.qp_infos[qp].size;
} else if (mca_btl_openib_component.rdma_qp == qp || 0 == qp) { } else if (mca_btl_openib_component.rdma_qp == qp || 0 == qp) {
/* If qp message size is bigger that max inline - /* If qp message size is bigger that max_inline_data, we
* we should enable inline messages * should enable inline messages only for RDMA QP (for PUT/GET
* only for RDMA QP (for PUT/GET fin messages) and for the first qp */ * fin messages) and for the first qp */
return mca_btl_openib_component.ib_max_inline_data; return hca->max_inline_data;
} }
/* Otherway it is no reason for inline */ /* Otherway it is no reason for inline */
return 0; return 0;
} }
static int rdmacm_setup_qp(rdmacm_contents_t *local, static int rdmacm_setup_qp(rdmacm_contents_t *local,
mca_btl_openib_endpoint_t *endpoint, mca_btl_openib_endpoint_t *endpoint,
struct rdma_cm_id *id, struct rdma_cm_id *id,
@ -252,7 +252,8 @@ static int rdmacm_setup_qp(rdmacm_contents_t *local,
attr.srq = srq; attr.srq = srq;
attr.cap.max_recv_wr = max_recv_wr; attr.cap.max_recv_wr = max_recv_wr;
attr.cap.max_send_wr = max_send_wr; attr.cap.max_send_wr = max_send_wr;
attr.cap.max_inline_data = req_inline = max_inline_size(qpnum); attr.cap.max_inline_data = req_inline =
max_inline_size(qpnum, local->openib_btl->hca);
attr.cap.max_send_sge = 1; attr.cap.max_send_sge = 1;
attr.cap.max_recv_sge = 1; /* we do not use SG list */ attr.cap.max_recv_sge = 1; /* we do not use SG list */

Просмотреть файл

@ -393,7 +393,7 @@ static int xoob_send_qp_create (mca_btl_base_endpoint_t* endpoint)
qp_init_attr.cap.max_recv_wr = 0; qp_init_attr.cap.max_recv_wr = 0;
qp_init_attr.cap.max_send_wr = send_wr; qp_init_attr.cap.max_send_wr = send_wr;
qp_init_attr.cap.max_inline_data = req_inline = qp_init_attr.cap.max_inline_data = req_inline =
mca_btl_openib_component.ib_max_inline_data; openib_btl->hca->max_inline_data;
qp_init_attr.cap.max_send_sge = 1; qp_init_attr.cap.max_send_sge = 1;
/* this one is ignored by driver */ /* this one is ignored by driver */
qp_init_attr.cap.max_recv_sge = 1; /* we do not use SG list */ qp_init_attr.cap.max_recv_sge = 1; /* we do not use SG list */

Просмотреть файл

@ -76,6 +76,12 @@
# receive_queues = P,128,256,192,128:S,65536,256,192,128 # receive_queues = P,128,256,192,128:S,65536,256,192,128
# max_inline_data: an integer specifying the maximum inline data (in
# bytes) supported by the device. -1 means to use a run-time probe to
# figure out the maximum value supported by the device.
# max_inline_data = 1024
############################################################################ ############################################################################
[default] [default]
@ -88,6 +94,7 @@ vendor_id = 0
vendor_part_id = 0 vendor_part_id = 0
use_eager_rdma = 0 use_eager_rdma = 0
mtu = 1024 mtu = 1024
max_inline_data = 0
############################################################################ ############################################################################