1
1

btl: export local registration thresholds

Some BTLs do not require local registration for some rdma
transactions. For example: inline put on openib, fma put on ugni. This
commit adds code to expose the local registration thresholds to BTL
users. Optimized code can take advantage of this information to
improve rdma performance.
Этот коммит содержится в:
Nathan Hjelm 2015-02-19 16:13:37 -07:00
родитель 426d1ce146
Коммит cc750b00a6
5 изменённых файлов: 32 добавлений и 7 удалений

Просмотреть файл

@ -1119,6 +1119,10 @@ struct mca_btl_base_module_t {
size_t btl_put_limit; /**< maximum size supported by the btl_put function */
size_t btl_put_alignment; /**< minimum alignment/size needed by btl_put (power of 2) */
/* minimum transaction sizes for which registration is required for local memory */
size_t btl_get_local_registration_threshold;
size_t btl_put_local_registration_threshold;
/* BTL function table */
mca_btl_base_module_add_procs_fn_t btl_add_procs;
mca_btl_base_module_del_procs_fn_t btl_del_procs;

Просмотреть файл

@ -816,6 +816,9 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
openib_btl->super.btl_put_limit = openib_btl->ib_port_attr.max_msg_sz;
}
openib_btl->super.btl_put_local_registration_threshold = openib_btl->device->max_inline_data;
openib_btl->super.btl_get_local_registration_threshold = 0;
#if HAVE_DECL_IBV_ATOMIC_HCA
if (openib_btl->device->ib_dev_attr.atomic_cap == IBV_ATOMIC_NONE) {
openib_btl->super.btl_flags &= ~MCA_BTL_FLAGS_ATOMIC_FOPS;

Просмотреть файл

@ -45,7 +45,12 @@ int mca_btl_openib_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint
mca_btl_openib_put_frag_t *frag = NULL;
int rc, qp = order;
if (OPAL_UNLIKELY(size > btl->btl_put_limit)) {
if (MCA_BTL_NO_ORDER == qp) {
qp = mca_btl_openib_component.rdma_qp;
}
if (OPAL_UNLIKELY((ep->qps[qp].ib_inline_max < size && !local_handle) || !remote_handle ||
size > btl->btl_put_limit)) {
return OPAL_ERR_BAD_PARAM;
}
@ -54,10 +59,6 @@ int mca_btl_openib_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint
return OPAL_ERR_OUT_OF_RESOURCE;
}
if (MCA_BTL_NO_ORDER == qp) {
qp = mca_btl_openib_component.rdma_qp;
}
/* set base descriptor flags */
to_base_frag(frag)->base.order = qp;
/* free this descriptor when the operation is complete */
@ -65,7 +66,14 @@ int mca_btl_openib_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint
/* set up scatter-gather entry */
to_com_frag(frag)->sg_entry.length = size;
to_com_frag(frag)->sg_entry.lkey = local_handle->lkey;
if (local_handle) {
to_com_frag(frag)->sg_entry.lkey = local_handle->lkey;
} else {
/* lkey is not required for inline RDMA write */
to_com_frag(frag)->sg_entry.lkey = 0;
}
to_com_frag(frag)->sg_entry.addr = (uint64_t)(intptr_t) local_address;
to_com_frag(frag)->endpoint = ep;

Просмотреть файл

@ -251,6 +251,9 @@ btl_ugni_component_register(void)
mca_btl_ugni_module.super.btl_bandwidth = 40000; /* Mbs */
mca_btl_ugni_module.super.btl_latency = 2; /* Microsecs */
mca_btl_ugni_module.super.btl_get_local_registration_threshold = 0;
mca_btl_ugni_module.super.btl_put_local_registration_threshold = mca_btl_ugni_component.ugni_fma_limit;
/* Call the BTL based to register its MCA params */
mca_btl_base_param_register(&mca_btl_ugni_component.super.btl_version,
&mca_btl_ugni_module.super);
@ -321,6 +324,8 @@ mca_btl_ugni_component_init (int *num_btl_modules,
mca_btl_ugni_component.ugni_fma_limit = 65536;
}
mca_btl_ugni_module.super.btl_put_local_registration_threshold = mca_btl_ugni_component.ugni_fma_limit;
if (enable_mpi_threads && mca_btl_ugni_component.progress_thread_requested) {
mca_btl_ugni_component.progress_thread_enabled = 1;
}

Просмотреть файл

@ -53,8 +53,13 @@ static inline int mca_btl_ugni_post_fma (struct mca_btl_base_endpoint_t *endpoin
void *cbcontext, void *cbdata)
{
mca_btl_ugni_post_descriptor_t *post_desc;
gni_mem_handle_t local_gni_handle = {0, 0};
gni_return_t grc;
if (local_handle) {
local_gni_handle = local_handle->gni_handle;
}
mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata, &post_desc);
if (OPAL_UNLIKELY(NULL == post_desc)) {
return OPAL_ERR_OUT_OF_RESOURCE;
@ -62,7 +67,7 @@ static inline int mca_btl_ugni_post_fma (struct mca_btl_base_endpoint_t *endpoin
/* Post descriptor (CQ is ignored for FMA transactions) -- The CQ associated with the endpoint
* is used. */
init_gni_post_desc (&post_desc->desc, order, op_type, (intptr_t) local_address, local_handle->gni_handle,
init_gni_post_desc (&post_desc->desc, order, op_type, (intptr_t) local_address, local_gni_handle,
remote_address, remote_handle->gni_handle, size, 0);
OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock);