Merge pull request #5274 from thananon/ofi_sep
btl/ofi: add scalable endpoint support.
Этот коммит содержится в:
Коммит
13f58f3191
@ -46,9 +46,7 @@
|
||||
#include <rdma/fi_rma.h>
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
#define MCA_BTL_OFI_MAX_MODULES 16
|
||||
#define MCA_BTL_OFI_MAX_WORKERS 1
|
||||
#define MCA_BTL_OFI_MAX_CQ_READ_ENTRIES 128
|
||||
|
||||
#define MCA_BTL_OFI_ABORT(args) mca_btl_ofi_exit(args)
|
||||
@ -62,6 +60,26 @@ enum mca_btl_ofi_type {
|
||||
MCA_BTL_OFI_TYPE_TOTAL
|
||||
};
|
||||
|
||||
struct mca_btl_ofi_context_t {
|
||||
int32_t context_id;
|
||||
|
||||
/* transmit context */
|
||||
struct fid_ep *tx_ctx;
|
||||
struct fid_ep *rx_ctx;
|
||||
|
||||
/* completion queue */
|
||||
struct fid_cq *cq;
|
||||
|
||||
/* completion info freelist */
|
||||
/* We have it per context to reduce the thread contention
|
||||
* on the freelist. Things can get really slow. */
|
||||
opal_free_list_t comp_list;
|
||||
|
||||
/* for thread locking */
|
||||
volatile int32_t lock;
|
||||
};
|
||||
typedef struct mca_btl_ofi_context_t mca_btl_ofi_context_t;
|
||||
|
||||
/**
|
||||
* @brief OFI BTL module
|
||||
*/
|
||||
@ -74,17 +92,17 @@ struct mca_btl_ofi_module_t {
|
||||
struct fid_fabric *fabric;
|
||||
struct fid_domain *domain;
|
||||
struct fid_ep *ofi_endpoint;
|
||||
struct fid_cq *cq;
|
||||
struct fid_av *av;
|
||||
|
||||
int num_contexts;
|
||||
mca_btl_ofi_context_t *contexts;
|
||||
|
||||
char *linux_device_name;
|
||||
|
||||
/** whether the module has been fully initialized or not */
|
||||
bool initialized;
|
||||
bool use_virt_addr;
|
||||
|
||||
/** spin-lock to protect the module */
|
||||
volatile int32_t lock;
|
||||
bool is_scalable_ep;
|
||||
|
||||
int64_t outstanding_rdma;
|
||||
|
||||
@ -92,8 +110,7 @@ struct mca_btl_ofi_module_t {
|
||||
* there is no need for a complicated structure here at this time*/
|
||||
opal_list_t endpoints;
|
||||
|
||||
/* free lists */
|
||||
opal_free_list_t comp_list;
|
||||
opal_mutex_t module_lock;
|
||||
|
||||
/** registration cache */
|
||||
mca_rcache_base_module_t *rcache;
|
||||
@ -110,6 +127,7 @@ struct mca_btl_ofi_component_t {
|
||||
|
||||
/** number of TL modules */
|
||||
int module_count;
|
||||
int num_contexts_per_module;
|
||||
int num_cqe_read;
|
||||
|
||||
size_t namelen;
|
||||
@ -117,10 +135,6 @@ struct mca_btl_ofi_component_t {
|
||||
/** All BTL OFI modules (1 per tl) */
|
||||
mca_btl_ofi_module_t *modules[MCA_BTL_OFI_MAX_MODULES];
|
||||
|
||||
#if OPAL_C_HAVE__THREAD_LOCAL
|
||||
/** bind threads to contexts */
|
||||
bool bind_threads_to_contexts;
|
||||
#endif
|
||||
};
|
||||
typedef struct mca_btl_ofi_component_t mca_btl_ofi_component_t;
|
||||
|
||||
@ -151,6 +165,7 @@ struct mca_btl_ofi_completion_t {
|
||||
|
||||
struct mca_btl_base_module_t *btl;
|
||||
struct mca_btl_base_endpoint_t *endpoint;
|
||||
struct mca_btl_ofi_context_t *my_context;
|
||||
uint32_t type;
|
||||
|
||||
void *local_address;
|
||||
@ -269,7 +284,25 @@ int mca_btl_ofi_reg_mem (void *reg_data, void *base, size_t size,
|
||||
mca_rcache_base_registration_t *reg);
|
||||
int mca_btl_ofi_dereg_mem (void *reg_data, mca_rcache_base_registration_t *reg);
|
||||
|
||||
int mca_btl_ofi_context_progress(mca_btl_ofi_context_t *context);
|
||||
void mca_btl_ofi_exit(void);
|
||||
|
||||
/* thread atomics */
|
||||
static inline bool mca_btl_ofi_context_trylock (mca_btl_ofi_context_t *context)
|
||||
{
|
||||
return (context->lock || OPAL_ATOMIC_SWAP_32(&context->lock, 1));
|
||||
}
|
||||
|
||||
static inline void mca_btl_ofi_context_lock(mca_btl_ofi_context_t *context)
|
||||
{
|
||||
while (mca_btl_ofi_context_trylock(context));
|
||||
}
|
||||
|
||||
static inline void mca_btl_ofi_context_unlock(mca_btl_ofi_context_t *context)
|
||||
{
|
||||
opal_atomic_mb();
|
||||
context->lock = 0;
|
||||
}
|
||||
|
||||
END_C_DECLS
|
||||
#endif
|
||||
|
@ -43,6 +43,9 @@ int mca_btl_ofi_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end
|
||||
mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t *) btl;
|
||||
mca_btl_ofi_endpoint_t *btl_endpoint = (mca_btl_ofi_endpoint_t*) endpoint;
|
||||
mca_btl_ofi_completion_t *comp = NULL;
|
||||
mca_btl_ofi_context_t *ofi_context;
|
||||
|
||||
ofi_context = get_ofi_context(ofi_btl);
|
||||
|
||||
if (flags & MCA_BTL_ATOMIC_FLAG_32BIT) {
|
||||
fi_datatype = FI_UINT32;
|
||||
@ -51,6 +54,7 @@ int mca_btl_ofi_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end
|
||||
fi_op = to_fi_op(op);
|
||||
|
||||
comp = mca_btl_ofi_completion_alloc(btl, endpoint,
|
||||
ofi_context,
|
||||
local_address,
|
||||
local_handle,
|
||||
cbfunc, cbcontext, cbdata,
|
||||
@ -61,7 +65,7 @@ int mca_btl_ofi_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end
|
||||
|
||||
remote_address = (remote_address - (uint64_t) remote_handle->base_addr);
|
||||
|
||||
rc = fi_fetch_atomic(ofi_btl->ofi_endpoint,
|
||||
rc = fi_fetch_atomic(ofi_context->tx_ctx,
|
||||
(void*) &comp->operand, 1, NULL, /* operand */
|
||||
local_address, local_handle->desc, /* results */
|
||||
btl_endpoint->peer_addr, /* remote addr */
|
||||
@ -77,6 +81,9 @@ int mca_btl_ofi_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end
|
||||
|
||||
MCA_BTL_OFI_NUM_RDMA_INC(ofi_btl);
|
||||
|
||||
/* force a bit of progress. */
|
||||
mca_btl_ofi_component.super.btl_progress();
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
@ -92,6 +99,9 @@ int mca_btl_ofi_aop (struct mca_btl_base_module_t *btl, mca_btl_base_endpoint_t
|
||||
mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t *) btl;
|
||||
mca_btl_ofi_endpoint_t *btl_endpoint = (mca_btl_ofi_endpoint_t*) endpoint;
|
||||
mca_btl_ofi_completion_t *comp = NULL;
|
||||
mca_btl_ofi_context_t *ofi_context;
|
||||
|
||||
ofi_context = get_ofi_context(ofi_btl);
|
||||
|
||||
if (flags & MCA_BTL_ATOMIC_FLAG_32BIT) {
|
||||
fi_datatype = FI_UINT32;
|
||||
@ -100,6 +110,7 @@ int mca_btl_ofi_aop (struct mca_btl_base_module_t *btl, mca_btl_base_endpoint_t
|
||||
fi_op = to_fi_op(op);
|
||||
|
||||
comp = mca_btl_ofi_completion_alloc(btl, endpoint,
|
||||
ofi_context,
|
||||
NULL,
|
||||
NULL,
|
||||
cbfunc, cbcontext, cbdata,
|
||||
@ -110,7 +121,7 @@ int mca_btl_ofi_aop (struct mca_btl_base_module_t *btl, mca_btl_base_endpoint_t
|
||||
|
||||
remote_address = (remote_address - (uint64_t) remote_handle->base_addr);
|
||||
|
||||
rc = fi_atomic(ofi_btl->ofi_endpoint,
|
||||
rc = fi_atomic(ofi_context->tx_ctx,
|
||||
(void*) &comp->operand, 1, NULL, /* operand */
|
||||
btl_endpoint->peer_addr, /* remote addr */
|
||||
remote_address, remote_handle->rkey, /* remote buffer */
|
||||
@ -124,6 +135,7 @@ int mca_btl_ofi_aop (struct mca_btl_base_module_t *btl, mca_btl_base_endpoint_t
|
||||
}
|
||||
|
||||
MCA_BTL_OFI_NUM_RDMA_INC(ofi_btl);
|
||||
mca_btl_ofi_component.super.btl_progress();
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
@ -139,12 +151,16 @@ int mca_btl_ofi_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_e
|
||||
mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t *) btl;
|
||||
mca_btl_ofi_endpoint_t *btl_endpoint = (mca_btl_ofi_endpoint_t*) endpoint;
|
||||
mca_btl_ofi_completion_t *comp = NULL;
|
||||
mca_btl_ofi_context_t *ofi_context;
|
||||
|
||||
ofi_context = get_ofi_context(ofi_btl);
|
||||
|
||||
if (flags & MCA_BTL_ATOMIC_FLAG_32BIT) {
|
||||
fi_datatype = FI_UINT32;
|
||||
}
|
||||
|
||||
comp = mca_btl_ofi_completion_alloc(btl, endpoint,
|
||||
ofi_context,
|
||||
local_address,
|
||||
local_handle,
|
||||
cbfunc, cbcontext, cbdata,
|
||||
@ -157,7 +173,7 @@ int mca_btl_ofi_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_e
|
||||
remote_address = (remote_address - (uint64_t) remote_handle->base_addr);
|
||||
|
||||
/* perform atomic */
|
||||
rc = fi_compare_atomic(ofi_btl->ofi_endpoint,
|
||||
rc = fi_compare_atomic(ofi_context->tx_ctx,
|
||||
(void*) &comp->operand, 1, NULL,
|
||||
(void*) &comp->compare, NULL,
|
||||
local_address, local_handle->desc,
|
||||
@ -176,5 +192,8 @@ int mca_btl_ofi_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_e
|
||||
|
||||
MCA_BTL_OFI_NUM_RDMA_INC(ofi_btl);
|
||||
|
||||
/* force a bit of progress. */
|
||||
mca_btl_ofi_component.super.btl_progress();
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
@ -31,15 +31,16 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "btl_ofi.h"
|
||||
#include "btl_ofi_endpoint.h"
|
||||
#include "btl_ofi_rdma.h"
|
||||
|
||||
|
||||
#define MCA_BTL_OFI_REQUIRED_CAPS (FI_RMA | FI_ATOMIC)
|
||||
#define MCA_BTL_OFI_REQUESTED_MR_MODE (FI_MR_ALLOCATED | FI_MR_PROV_KEY | FI_MR_VIRT_ADDR)
|
||||
|
||||
static char *prov_include;
|
||||
static char *prov_exclude;
|
||||
static char *ofi_progress_mode;
|
||||
static bool disable_sep;
|
||||
static int mca_btl_ofi_init_device(struct fi_info *info);
|
||||
|
||||
/* validate information returned from fi_getinfo().
|
||||
@ -124,16 +125,24 @@ static int mca_btl_ofi_component_register(void)
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&ofi_progress_mode);
|
||||
|
||||
#if OPAL_C_HAVE__THREAD_LOCAL
|
||||
mca_btl_ofi_component.bind_threads_to_contexts = true;
|
||||
mca_btl_ofi_component.num_contexts_per_module = 1;
|
||||
(void) mca_base_component_var_register(&mca_btl_ofi_component.super.btl_version,
|
||||
"bind_threads_to_contexts", "Bind threads to device contexts. "
|
||||
"In general this should improve the multi-threaded performance "
|
||||
"when threads are used. (default: true)", MCA_BASE_VAR_TYPE_BOOL,
|
||||
NULL, 0 ,MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
|
||||
MCA_BASE_VAR_SCOPE_ALL,
|
||||
&mca_btl_ofi_component.bind_threads_to_contexts);
|
||||
#endif
|
||||
"num_contexts_per_module",
|
||||
"number of communication context per module to create. "
|
||||
"This should increase multithreaded performance but it is "
|
||||
"advised that this number should be lower than total cores.",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_btl_ofi_component.num_contexts_per_module);
|
||||
disable_sep = false;
|
||||
(void) mca_base_component_var_register(&mca_btl_ofi_component.super.btl_version,
|
||||
"disable_sep",
|
||||
"force btl/ofi to never use scalable endpoint. ",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&disable_sep);
|
||||
|
||||
/* for now we want this component to lose to btl/ugni and btl/vader */
|
||||
module->super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH - 50;
|
||||
@ -148,7 +157,6 @@ static int mca_btl_ofi_component_open(void)
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* component cleanup - sanity checking of queue lengths
|
||||
*/
|
||||
@ -289,21 +297,34 @@ static int mca_btl_ofi_init_device(struct fi_info *info)
|
||||
int rc;
|
||||
int *module_count = &mca_btl_ofi_component.module_count;
|
||||
size_t namelen;
|
||||
mca_btl_ofi_module_t *module;
|
||||
size_t num_contexts_to_create;
|
||||
|
||||
char *linux_device_name;
|
||||
char ep_name[FI_NAME_MAX];
|
||||
|
||||
struct fi_info *ofi_info;
|
||||
struct fi_cq_attr cq_attr = {0};
|
||||
struct fi_ep_attr *ep_attr;
|
||||
struct fi_domain_attr *domain_attr;
|
||||
struct fi_av_attr av_attr = {0};
|
||||
struct fid_fabric *fabric = NULL;
|
||||
struct fid_domain *domain = NULL;
|
||||
struct fid_ep *endpoint = NULL;
|
||||
struct fid_cq *cq = NULL;
|
||||
struct fid_ep *ep = NULL;
|
||||
struct fid_av *av = NULL;
|
||||
|
||||
mca_btl_ofi_module_t *module;
|
||||
|
||||
/* allocate module */
|
||||
module = (mca_btl_ofi_module_t*) calloc(1, sizeof(mca_btl_ofi_module_t));
|
||||
if (NULL == module) {
|
||||
BTL_ERROR(("failed to allocate memory for OFI module"));
|
||||
goto fail;
|
||||
}
|
||||
*module = mca_btl_ofi_module_template;
|
||||
|
||||
/* make a copy of the given info to store on the module */
|
||||
ofi_info = fi_dupinfo(info);
|
||||
ep_attr = ofi_info->ep_attr;
|
||||
domain_attr = ofi_info->domain_attr;
|
||||
|
||||
linux_device_name = info->domain_attr->name;
|
||||
BTL_VERBOSE(("initializing dev:%s provider:%s",
|
||||
@ -330,28 +351,6 @@ static int mca_btl_ofi_init_device(struct fi_info *info)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* endpoint */
|
||||
rc = fi_endpoint(domain, ofi_info, &endpoint, NULL);
|
||||
if (0 != rc) {
|
||||
BTL_VERBOSE(("%s failed fi_endpoint with err=%s",
|
||||
linux_device_name,
|
||||
fi_strerror(-rc)
|
||||
));
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* CQ */
|
||||
cq_attr.format = FI_CQ_FORMAT_CONTEXT;
|
||||
cq_attr.wait_obj = FI_WAIT_NONE;
|
||||
rc = fi_cq_open(domain, &cq_attr, &cq, NULL);
|
||||
if (0 != rc) {
|
||||
BTL_VERBOSE(("%s failed fi_cq_open with err=%s",
|
||||
linux_device_name,
|
||||
fi_strerror(-rc)
|
||||
));
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* AV */
|
||||
av_attr.type = FI_AV_MAP;
|
||||
rc = fi_av_open(domain, &av_attr, &av, NULL);
|
||||
@ -363,29 +362,76 @@ static int mca_btl_ofi_init_device(struct fi_info *info)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
num_contexts_to_create = mca_btl_ofi_component.num_contexts_per_module;
|
||||
|
||||
/* bind CQ and AV to endpoint */
|
||||
uint32_t cq_flags = (FI_TRANSMIT);
|
||||
rc = fi_ep_bind(endpoint, (fid_t)cq, cq_flags);
|
||||
/* If the domain support scalable endpoint. */
|
||||
if (domain_attr->max_ep_tx_ctx > 1 && !disable_sep) {
|
||||
|
||||
BTL_VERBOSE(("btl/ofi using scalable endpoint."));
|
||||
|
||||
if (num_contexts_to_create > domain_attr->max_ep_tx_ctx) {
|
||||
BTL_VERBOSE(("cannot create requested %u contexts. (node max=%zu)",
|
||||
module->num_contexts,
|
||||
domain_attr->max_ep_tx_ctx));
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* modify the info to let the provider know we are creating x contexts */
|
||||
ep_attr->tx_ctx_cnt = num_contexts_to_create;
|
||||
ep_attr->rx_ctx_cnt = num_contexts_to_create;
|
||||
|
||||
/* create scalable endpoint */
|
||||
rc = fi_scalable_ep(domain, ofi_info, &ep, NULL);
|
||||
if (0 != rc) {
|
||||
BTL_VERBOSE(("%s failed fi_ep_bind with err=%s",
|
||||
BTL_VERBOSE(("%s failed fi_scalable_ep with err=%s",
|
||||
linux_device_name,
|
||||
fi_strerror(-rc)
|
||||
));
|
||||
goto fail;
|
||||
}
|
||||
|
||||
rc = fi_ep_bind(endpoint, (fid_t)av, 0);
|
||||
module->num_contexts = num_contexts_to_create;
|
||||
module->is_scalable_ep = true;
|
||||
|
||||
/* create contexts */
|
||||
module->contexts = mca_btl_ofi_context_alloc_scalable(ofi_info,
|
||||
domain, ep, av,
|
||||
num_contexts_to_create);
|
||||
|
||||
} else {
|
||||
/* warn the user if they want more than 1 context */
|
||||
if (num_contexts_to_create > 1) {
|
||||
BTL_ERROR(("cannot create %zu contexts as the provider does not support "
|
||||
"scalable endpoint. Falling back to single context endpoint.",
|
||||
num_contexts_to_create));
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("btl/ofi using normal endpoint."));
|
||||
|
||||
rc = fi_endpoint(domain, ofi_info, &ep, NULL);
|
||||
if (0 != rc) {
|
||||
BTL_VERBOSE(("%s failed fi_ep_bind with err=%s",
|
||||
BTL_VERBOSE(("%s failed fi_endpoint with err=%s",
|
||||
linux_device_name,
|
||||
fi_strerror(-rc)
|
||||
));
|
||||
goto fail;
|
||||
}
|
||||
|
||||
module->num_contexts = 1;
|
||||
module->is_scalable_ep = false;
|
||||
|
||||
/* create contexts */
|
||||
module->contexts = mca_btl_ofi_context_alloc_normal(ofi_info,
|
||||
domain, ep, av);
|
||||
}
|
||||
|
||||
if (NULL == module->contexts) {
|
||||
/* error message is already printed */
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* enable the endpoint for using */
|
||||
rc = fi_enable(endpoint);
|
||||
rc = fi_enable(ep);
|
||||
if (0 != rc) {
|
||||
BTL_VERBOSE(("%s failed fi_enable with err=%s",
|
||||
linux_device_name,
|
||||
@ -395,19 +441,12 @@ static int mca_btl_ofi_init_device(struct fi_info *info)
|
||||
}
|
||||
|
||||
/* Everything succeeded, lets create a module for this device. */
|
||||
module = (mca_btl_ofi_module_t*) calloc(1, sizeof(mca_btl_ofi_module_t));
|
||||
if (NULL == module) {
|
||||
goto fail;
|
||||
}
|
||||
*module = mca_btl_ofi_module_template;
|
||||
|
||||
/* store the information. */
|
||||
module->fabric_info = ofi_info;
|
||||
module->fabric = fabric;
|
||||
module->domain = domain;
|
||||
module->cq = cq;
|
||||
module->av = av;
|
||||
module->ofi_endpoint = endpoint;
|
||||
module->ofi_endpoint = ep;
|
||||
module->linux_device_name = linux_device_name;
|
||||
module->outstanding_rdma = 0;
|
||||
module->use_virt_addr = false;
|
||||
@ -420,29 +459,13 @@ static int mca_btl_ofi_init_device(struct fi_info *info)
|
||||
/* initialize the rcache */
|
||||
mca_btl_ofi_rcache_init(module);
|
||||
|
||||
/* create endpoint list */
|
||||
OBJ_CONSTRUCT(&module->endpoints, opal_list_t);
|
||||
|
||||
/* init free lists */
|
||||
OBJ_CONSTRUCT(&module->comp_list, opal_free_list_t);
|
||||
rc = opal_free_list_init(&module->comp_list,
|
||||
sizeof(mca_btl_ofi_completion_t),
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(mca_btl_ofi_completion_t),
|
||||
0,
|
||||
0,
|
||||
128,
|
||||
-1,
|
||||
128,
|
||||
NULL,
|
||||
0,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL);
|
||||
assert(OPAL_SUCCESS == rc);
|
||||
OBJ_CONSTRUCT(&module->module_lock, opal_mutex_t);
|
||||
|
||||
/* create and send the modex for this device */
|
||||
namelen = sizeof(ep_name);
|
||||
rc = fi_getname((fid_t)endpoint, &ep_name[0], &namelen);
|
||||
rc = fi_getname((fid_t)ep, &ep_name[0], &namelen);
|
||||
if (0 != rc) {
|
||||
BTL_VERBOSE(("%s failed fi_getname with err=%s",
|
||||
linux_device_name,
|
||||
@ -466,15 +489,20 @@ static int mca_btl_ofi_init_device(struct fi_info *info)
|
||||
|
||||
fail:
|
||||
/* clean up */
|
||||
|
||||
/* if the contexts have not been initiated, num_contexts should
|
||||
* be zero and we skip this. */
|
||||
for (int i=0; i < module->num_contexts; i++) {
|
||||
mca_btl_ofi_context_finalize(&module->contexts[i], module->is_scalable_ep);
|
||||
}
|
||||
free(module->contexts);
|
||||
|
||||
if (NULL != av) {
|
||||
fi_close(&av->fid);
|
||||
}
|
||||
if (NULL != cq) {
|
||||
fi_close(&cq->fid);
|
||||
}
|
||||
|
||||
if (NULL != endpoint) {
|
||||
fi_close(&endpoint->fid);
|
||||
if (NULL != ep) {
|
||||
fi_close(&ep->fid);
|
||||
}
|
||||
|
||||
if (NULL != domain) {
|
||||
@ -484,12 +512,12 @@ fail:
|
||||
if (NULL != fabric) {
|
||||
fi_close(&fabric->fid);
|
||||
}
|
||||
free(module);
|
||||
|
||||
/* not really a failure. just skip this device. */
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @brief OFI BTL progress function
|
||||
*
|
||||
@ -497,6 +525,44 @@ fail:
|
||||
*/
|
||||
static int mca_btl_ofi_component_progress (void)
|
||||
{
|
||||
int events = 0;
|
||||
mca_btl_ofi_context_t *context;
|
||||
|
||||
for (int i = 0 ; i < mca_btl_ofi_component.module_count ; ++i) {
|
||||
mca_btl_ofi_module_t *module = mca_btl_ofi_component.modules[i];
|
||||
|
||||
/* progress context we own first. */
|
||||
context = get_ofi_context(module);
|
||||
|
||||
if (mca_btl_ofi_context_trylock(context)) {
|
||||
events += mca_btl_ofi_context_progress(context);
|
||||
mca_btl_ofi_context_unlock(context);
|
||||
}
|
||||
|
||||
/* if there is nothing to do, try progress other's. */
|
||||
if (events == 0) {
|
||||
for (int j = 0 ; j < module->num_contexts ; j++ ) {
|
||||
|
||||
context = get_ofi_context_rr(module);
|
||||
|
||||
if (mca_btl_ofi_context_trylock(context)) {
|
||||
events += mca_btl_ofi_context_progress(context);
|
||||
mca_btl_ofi_context_unlock(context);
|
||||
}
|
||||
|
||||
/* If we did something, good enough. return now.
|
||||
* This is crucial for performance/latency. */
|
||||
if (events > 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return events;
|
||||
}
|
||||
|
||||
int mca_btl_ofi_context_progress(mca_btl_ofi_context_t *context) {
|
||||
|
||||
int ret = 0;
|
||||
int events_read;
|
||||
@ -506,17 +572,14 @@ static int mca_btl_ofi_component_progress (void)
|
||||
|
||||
mca_btl_ofi_completion_t *comp;
|
||||
|
||||
for (int i = 0 ; i < mca_btl_ofi_component.module_count ; ++i) {
|
||||
mca_btl_ofi_module_t *module = mca_btl_ofi_component.modules[i];
|
||||
|
||||
ret = fi_cq_read(module->cq, &cq_entry, mca_btl_ofi_component.num_cqe_read);
|
||||
ret = fi_cq_read(context->cq, &cq_entry, mca_btl_ofi_component.num_cqe_read);
|
||||
|
||||
if (0 < ret) {
|
||||
events_read = ret;
|
||||
for (int j = 0; j < events_read; j++) {
|
||||
if (NULL != cq_entry[j].op_context) {
|
||||
for (int i = 0; i < events_read; i++) {
|
||||
if (NULL != cq_entry[i].op_context) {
|
||||
++events;
|
||||
comp = (mca_btl_ofi_completion_t*) cq_entry[j].op_context;
|
||||
comp = (mca_btl_ofi_completion_t*) cq_entry[i].op_context;
|
||||
mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t*)comp->btl;
|
||||
|
||||
switch (comp->type) {
|
||||
@ -547,7 +610,7 @@ static int mca_btl_ofi_component_progress (void)
|
||||
}
|
||||
}
|
||||
} else if (OPAL_UNLIKELY(ret == -FI_EAVAIL)) {
|
||||
ret = fi_cq_readerr(module->cq, &cqerr, 0);
|
||||
ret = fi_cq_readerr(context->cq, &cqerr, 0);
|
||||
|
||||
/* cq readerr failed!? */
|
||||
if (0 > ret) {
|
||||
@ -557,14 +620,12 @@ static int mca_btl_ofi_component_progress (void)
|
||||
BTL_ERROR(("fi_cq_readerr: (provider err_code = %d)\n",
|
||||
cqerr.prov_errno));
|
||||
}
|
||||
|
||||
MCA_BTL_OFI_ABORT();
|
||||
|
||||
}
|
||||
#ifdef FI_EINTR
|
||||
/* sometimes, sockets provider complain about interupt. */
|
||||
/* sometimes, sockets provider complain about interupt. We do nothing. */
|
||||
else if (OPAL_UNLIKELY(ret == -FI_EINTR)) {
|
||||
continue;
|
||||
|
||||
}
|
||||
#endif
|
||||
/* If the error is not FI_EAGAIN, report the error and abort. */
|
||||
@ -572,7 +633,6 @@ static int mca_btl_ofi_component_progress (void)
|
||||
BTL_ERROR(("fi_cq_read returned error %d:%s", ret, fi_strerror(-ret)));
|
||||
MCA_BTL_OFI_ABORT();
|
||||
}
|
||||
}
|
||||
|
||||
return events;
|
||||
}
|
||||
@ -593,5 +653,5 @@ mca_btl_ofi_component_t mca_btl_ofi_component = {
|
||||
|
||||
.btl_init = mca_btl_ofi_component_init,
|
||||
.btl_progress = mca_btl_ofi_component_progress,
|
||||
}
|
||||
},
|
||||
};
|
||||
|
@ -15,6 +15,10 @@
|
||||
#include "btl_ofi_endpoint.h"
|
||||
#include "opal/util/proc.h"
|
||||
|
||||
#if OPAL_HAVE_THREAD_LOCAL
|
||||
opal_thread_local mca_btl_ofi_context_t *my_context = NULL;
|
||||
#endif /* OPAL_HAVE_THREAD_LOCAL */
|
||||
|
||||
static void mca_btl_ofi_endpoint_construct (mca_btl_ofi_endpoint_t *endpoint)
|
||||
{
|
||||
endpoint->peer_addr = 0;
|
||||
@ -49,3 +53,290 @@ mca_btl_base_endpoint_t *mca_btl_ofi_endpoint_create (opal_proc_t *proc, struct
|
||||
return (mca_btl_base_endpoint_t *) endpoint;
|
||||
}
|
||||
|
||||
int ofi_comp_list_init(opal_free_list_t *comp_list)
|
||||
{
|
||||
int rc;
|
||||
OBJ_CONSTRUCT(comp_list, opal_free_list_t);
|
||||
rc = opal_free_list_init(comp_list,
|
||||
sizeof(mca_btl_ofi_completion_t),
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(mca_btl_ofi_completion_t),
|
||||
0,
|
||||
0,
|
||||
128,
|
||||
-1,
|
||||
128,
|
||||
NULL,
|
||||
0,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL);
|
||||
if (rc != OPAL_SUCCESS) {
|
||||
BTL_VERBOSE(("cannot allocate completion freelist"));
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* mca_btl_ofi_context_alloc_normal()
|
||||
*
|
||||
* This function will allocate an ofi_context, map the endpoint to tx/rx context,
|
||||
* bind CQ,AV to the endpoint and initialize all the structure.
|
||||
* USE WITH NORMAL ENDPOINT ONLY */
|
||||
mca_btl_ofi_context_t *mca_btl_ofi_context_alloc_normal(struct fi_info *info,
|
||||
struct fid_domain *domain,
|
||||
struct fid_ep *ep,
|
||||
struct fid_av *av)
|
||||
{
|
||||
int rc;
|
||||
uint32_t cq_flags = FI_TRANSMIT;
|
||||
char *linux_device_name = info->domain_attr->name;
|
||||
|
||||
struct fi_cq_attr cq_attr = {0};
|
||||
|
||||
mca_btl_ofi_context_t *context;
|
||||
|
||||
context = (mca_btl_ofi_context_t*) calloc(1, sizeof(*context));
|
||||
if (NULL == context) {
|
||||
BTL_VERBOSE(("cannot allocate context"));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Don't really need to check, just avoiding compiler warning because
|
||||
* BTL_VERBOSE is a no op in performance build and the compiler will
|
||||
* complain about unused variable. */
|
||||
if (NULL == linux_device_name) {
|
||||
BTL_VERBOSE(("linux device name is NULL. This shouldn't happen."));
|
||||
goto single_fail;
|
||||
}
|
||||
|
||||
cq_attr.format = FI_CQ_FORMAT_CONTEXT;
|
||||
cq_attr.wait_obj = FI_WAIT_NONE;
|
||||
rc = fi_cq_open(domain, &cq_attr, &context->cq, NULL);
|
||||
if (0 != rc) {
|
||||
BTL_VERBOSE(("%s failed fi_cq_open with err=%s",
|
||||
linux_device_name,
|
||||
fi_strerror(-rc)
|
||||
));
|
||||
goto single_fail;
|
||||
}
|
||||
|
||||
rc = fi_ep_bind(ep, (fid_t)av, 0);
|
||||
if (0 != rc) {
|
||||
BTL_VERBOSE(("%s failed fi_ep_bind with err=%s",
|
||||
linux_device_name,
|
||||
fi_strerror(-rc)
|
||||
));
|
||||
goto single_fail;
|
||||
}
|
||||
|
||||
rc = fi_ep_bind(ep, (fid_t)context->cq, cq_flags);
|
||||
if (0 != rc) {
|
||||
BTL_VERBOSE(("%s failed fi_scalable_ep_bind with err=%s",
|
||||
linux_device_name,
|
||||
fi_strerror(-rc)
|
||||
));
|
||||
goto single_fail;
|
||||
}
|
||||
|
||||
rc = ofi_comp_list_init(&context->comp_list);
|
||||
if (rc != OPAL_SUCCESS) {
|
||||
goto single_fail;
|
||||
}
|
||||
|
||||
context->tx_ctx = ep;
|
||||
context->rx_ctx = ep;
|
||||
context->context_id = 0;
|
||||
|
||||
return context;
|
||||
|
||||
single_fail:
|
||||
mca_btl_ofi_context_finalize(context, false);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* mca_btl_ofi_context_alloc_scalable()
|
||||
*
|
||||
* This function allocate communication contexts and return the pointer
|
||||
* to the first btl context. It also take care of all the bindings needed.
|
||||
* USE WITH SCALABLE ENDPOINT ONLY */
|
||||
mca_btl_ofi_context_t *mca_btl_ofi_context_alloc_scalable(struct fi_info *info,
|
||||
struct fid_domain *domain,
|
||||
struct fid_ep *sep,
|
||||
struct fid_av *av,
|
||||
size_t num_contexts)
|
||||
{
|
||||
BTL_VERBOSE(("creating %zu contexts", num_contexts));
|
||||
|
||||
int rc;
|
||||
size_t i;
|
||||
char *linux_device_name = info->domain_attr->name;
|
||||
|
||||
struct fi_cq_attr cq_attr = {0};
|
||||
struct fi_tx_attr tx_attr = {0};
|
||||
struct fi_rx_attr rx_attr = {0};
|
||||
|
||||
mca_btl_ofi_context_t *contexts;
|
||||
|
||||
contexts = (mca_btl_ofi_context_t*) calloc(num_contexts, sizeof(*contexts));
|
||||
if (NULL == contexts) {
|
||||
BTL_VERBOSE(("cannot allocate communication contexts."));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Don't really need to check, just avoiding compiler warning because
|
||||
* BTL_VERBOSE is a no op in performance build and the compiler will
|
||||
* complain about unused variable. */
|
||||
if (NULL == linux_device_name) {
|
||||
BTL_VERBOSE(("linux device name is NULL. This shouldn't happen."));
|
||||
goto scalable_fail;
|
||||
}
|
||||
|
||||
/* bind AV to endpoint */
|
||||
rc = fi_scalable_ep_bind(sep, (fid_t)av, 0);
|
||||
if (0 != rc) {
|
||||
BTL_VERBOSE(("%s failed fi_scalable_ep_bind with err=%s",
|
||||
linux_device_name,
|
||||
fi_strerror(-rc)
|
||||
));
|
||||
goto scalable_fail;
|
||||
}
|
||||
|
||||
for (i=0; i < num_contexts; i++) {
|
||||
rc = fi_tx_context(sep, i, &tx_attr, &contexts[i].tx_ctx, NULL);
|
||||
if (0 != rc) {
|
||||
BTL_VERBOSE(("%s failed fi_tx_context with err=%s",
|
||||
linux_device_name,
|
||||
fi_strerror(-rc)
|
||||
));
|
||||
goto scalable_fail;
|
||||
}
|
||||
|
||||
/* We don't actually need a receiving context as we only do one-sided.
|
||||
* However, sockets provider will hang if we dont have one. It is
|
||||
* also nice to have equal number of tx/rx context. */
|
||||
rc = fi_rx_context(sep, i, &rx_attr, &contexts[i].rx_ctx, NULL);
|
||||
if (0 != rc) {
|
||||
BTL_VERBOSE(("%s failed fi_rx_context with err=%s",
|
||||
linux_device_name,
|
||||
fi_strerror(-rc)
|
||||
));
|
||||
goto scalable_fail;
|
||||
}
|
||||
|
||||
/* create CQ */
|
||||
cq_attr.format = FI_CQ_FORMAT_CONTEXT;
|
||||
cq_attr.wait_obj = FI_WAIT_NONE;
|
||||
rc = fi_cq_open(domain, &cq_attr, &contexts[i].cq, NULL);
|
||||
if (0 != rc) {
|
||||
BTL_VERBOSE(("%s failed fi_cq_open with err=%s",
|
||||
linux_device_name,
|
||||
fi_strerror(-rc)
|
||||
));
|
||||
goto scalable_fail;
|
||||
}
|
||||
|
||||
/* bind cq to transmit context */
|
||||
uint32_t cq_flags = (FI_TRANSMIT);
|
||||
rc = fi_ep_bind(contexts[i].tx_ctx, (fid_t)contexts[i].cq, cq_flags);
|
||||
if (0 != rc) {
|
||||
BTL_VERBOSE(("%s failed fi_ep_bind with err=%s",
|
||||
linux_device_name,
|
||||
fi_strerror(-rc)
|
||||
));
|
||||
goto scalable_fail;
|
||||
}
|
||||
|
||||
/* enable the context. */
|
||||
rc = fi_enable(contexts[i].tx_ctx);
|
||||
if (0 != rc) {
|
||||
BTL_VERBOSE(("%s failed fi_enable with err=%s",
|
||||
linux_device_name,
|
||||
fi_strerror(-rc)
|
||||
));
|
||||
goto scalable_fail;
|
||||
}
|
||||
|
||||
rc = fi_enable(contexts[i].rx_ctx);
|
||||
if (0 != rc) {
|
||||
BTL_VERBOSE(("%s failed fi_enable with err=%s",
|
||||
linux_device_name,
|
||||
fi_strerror(-rc)
|
||||
));
|
||||
goto scalable_fail;
|
||||
}
|
||||
|
||||
/* initialize completion freelist. */
|
||||
rc = ofi_comp_list_init(&contexts[i].comp_list);
|
||||
if (rc != OPAL_SUCCESS) {
|
||||
goto scalable_fail;
|
||||
}
|
||||
|
||||
/* assign the id */
|
||||
contexts[i].context_id = i;
|
||||
}
|
||||
|
||||
return contexts;
|
||||
|
||||
scalable_fail:
|
||||
/* close and free */
|
||||
for(i=0; i < num_contexts; i++) {
|
||||
mca_btl_ofi_context_finalize(&contexts[i], true);
|
||||
}
|
||||
free(contexts);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void mca_btl_ofi_context_finalize(mca_btl_ofi_context_t *context, bool scalable_ep) {
|
||||
|
||||
/* if it is a scalable ep, we have to close all contexts. */
|
||||
if (scalable_ep) {
|
||||
if (NULL != context->tx_ctx) {
|
||||
fi_close(&context->tx_ctx->fid);
|
||||
}
|
||||
|
||||
if (NULL != context->rx_ctx) {
|
||||
fi_close(&context->rx_ctx->fid);
|
||||
}
|
||||
}
|
||||
|
||||
if( NULL != context->cq) {
|
||||
fi_close(&context->cq->fid);
|
||||
}
|
||||
|
||||
/* Can we destruct the object that hasn't been constructed? */
|
||||
OBJ_DESTRUCT(&context->comp_list);
|
||||
}
|
||||
|
||||
/* Get a context to use for communication.
|
||||
* If TLS is supported, it will use the cached endpoint.
|
||||
* If not, it will invoke the normal round-robin assignment. */
|
||||
mca_btl_ofi_context_t *get_ofi_context(mca_btl_ofi_module_t *btl)
|
||||
{
|
||||
#if OPAL_HAVE_THREAD_LOCAL
|
||||
/* With TLS, we cache the context we use. */
|
||||
static volatile int64_t cur_num = 0;
|
||||
|
||||
if (OPAL_UNLIKELY(my_context == NULL)) {
|
||||
OPAL_THREAD_LOCK(&btl->module_lock);
|
||||
|
||||
my_context = &btl->contexts[cur_num];
|
||||
cur_num = (cur_num + 1) %btl->num_contexts;
|
||||
|
||||
OPAL_THREAD_UNLOCK(&btl->module_lock);
|
||||
}
|
||||
|
||||
assert (my_context);
|
||||
return my_context;
|
||||
#else
|
||||
return get_ofi_context_rr(btl);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* return the context in a round-robin. */
|
||||
/* There is no need for atomics here as it might hurt the performance. */
|
||||
mca_btl_ofi_context_t *get_ofi_context_rr(mca_btl_ofi_module_t *btl)
|
||||
{
|
||||
static volatile uint64_t rr_num = 0;
|
||||
return &btl->contexts[rr_num++%btl->num_contexts];
|
||||
}
|
||||
|
@ -30,6 +30,10 @@
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
#if OPAL_HAVE_THREAD_LOCAL
|
||||
extern opal_thread_local mca_btl_ofi_context_t *my_context;
|
||||
#endif /* OPAL_HAVE_THREAD_LOCAL */
|
||||
|
||||
struct mca_btl_base_endpoint_t {
|
||||
opal_list_item_t super;
|
||||
|
||||
@ -47,7 +51,25 @@ typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;
|
||||
typedef mca_btl_base_endpoint_t mca_btl_ofi_endpoint_t;
|
||||
OBJ_CLASS_DECLARATION(mca_btl_ofi_endpoint_t);
|
||||
|
||||
int ofi_comp_list_init(opal_free_list_t *comp_list);
|
||||
|
||||
mca_btl_base_endpoint_t *mca_btl_ofi_endpoint_create (opal_proc_t *proc, struct fid_ep *ep);
|
||||
|
||||
/* contexts */
|
||||
mca_btl_ofi_context_t *mca_btl_ofi_context_alloc_scalable(struct fi_info *info,
|
||||
struct fid_domain *domain,
|
||||
struct fid_ep *sep,
|
||||
struct fid_av *av,
|
||||
size_t num_contexts);
|
||||
|
||||
mca_btl_ofi_context_t *mca_btl_ofi_context_alloc_normal(struct fi_info *info,
|
||||
struct fid_domain *domain,
|
||||
struct fid_ep *ep,
|
||||
struct fid_av *av);
|
||||
void mca_btl_ofi_context_finalize(mca_btl_ofi_context_t *context, bool scalable_ep);
|
||||
|
||||
mca_btl_ofi_context_t *get_ofi_context(mca_btl_ofi_module_t *btl);
|
||||
mca_btl_ofi_context_t *get_ofi_context_rr(mca_btl_ofi_module_t *btl);
|
||||
|
||||
END_C_DECLS
|
||||
#endif
|
||||
|
@ -242,14 +242,17 @@ int mca_btl_ofi_dereg_mem (void *reg_data, mca_rcache_base_registration_t *reg)
|
||||
|
||||
int mca_btl_ofi_finalize (mca_btl_base_module_t* btl)
|
||||
{
|
||||
int i;
|
||||
mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t *) btl;
|
||||
mca_btl_ofi_endpoint_t *endpoint, *next;
|
||||
|
||||
assert(btl);
|
||||
|
||||
if (NULL != ofi_btl->cq) {
|
||||
fi_close(&ofi_btl->cq->fid);
|
||||
/* loop over all the contexts */
|
||||
for (i=0; i < ofi_btl->num_contexts; i++) {
|
||||
mca_btl_ofi_context_finalize(&ofi_btl->contexts[i], ofi_btl->is_scalable_ep);
|
||||
}
|
||||
free(ofi_btl->contexts);
|
||||
|
||||
if (NULL != ofi_btl->av) {
|
||||
fi_close(&ofi_btl->av->fid);
|
||||
@ -278,7 +281,6 @@ int mca_btl_ofi_finalize (mca_btl_base_module_t* btl)
|
||||
}
|
||||
|
||||
OBJ_DESTRUCT(&ofi_btl->endpoints);
|
||||
OBJ_DESTRUCT(&ofi_btl->comp_list);
|
||||
|
||||
if (ofi_btl->rcache) {
|
||||
mca_rcache_base_module_destroy (ofi_btl->rcache);
|
||||
|
@ -21,26 +21,31 @@ OBJ_CLASS_INSTANCE(mca_btl_ofi_completion_t,
|
||||
mca_btl_ofi_completion_t *mca_btl_ofi_completion_alloc (
|
||||
mca_btl_base_module_t *btl,
|
||||
mca_btl_base_endpoint_t *endpoint,
|
||||
mca_btl_ofi_context_t *ofi_context,
|
||||
void *local_address,
|
||||
mca_btl_base_registration_handle_t *local_handle,
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc,
|
||||
void *cbcontext, void *cbdata,
|
||||
int type)
|
||||
{
|
||||
mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t*)btl;
|
||||
assert(btl);
|
||||
assert(endpoint);
|
||||
assert(ofi_context);
|
||||
|
||||
mca_btl_ofi_completion_t *comp;
|
||||
|
||||
comp = (mca_btl_ofi_completion_t*) opal_free_list_get(&ofi_btl->comp_list);
|
||||
comp = (mca_btl_ofi_completion_t*) opal_free_list_get(&ofi_context->comp_list);
|
||||
assert(comp);
|
||||
|
||||
comp->btl = btl;
|
||||
comp->endpoint = endpoint;
|
||||
comp->my_context = ofi_context;
|
||||
comp->local_address = local_address;
|
||||
comp->local_handle = local_handle;
|
||||
comp->cbfunc = cbfunc;
|
||||
comp->cbcontext = cbcontext;
|
||||
comp->cbdata = cbdata;
|
||||
comp->my_list = &ofi_btl->comp_list;
|
||||
comp->my_list = &ofi_context->comp_list;
|
||||
comp->type = type;
|
||||
|
||||
return comp;
|
||||
@ -53,12 +58,17 @@ int mca_btl_ofi_get (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi
|
||||
{
|
||||
|
||||
int rc;
|
||||
|
||||
mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t *) btl;
|
||||
mca_btl_ofi_endpoint_t *btl_endpoint = (mca_btl_ofi_endpoint_t*) endpoint;
|
||||
mca_btl_ofi_completion_t *comp;
|
||||
mca_btl_ofi_context_t *ofi_context;
|
||||
|
||||
ofi_context = get_ofi_context(ofi_btl);
|
||||
|
||||
/* create completion context */
|
||||
comp = mca_btl_ofi_completion_alloc(btl, endpoint,
|
||||
ofi_context,
|
||||
local_address,
|
||||
local_handle,
|
||||
cbfunc, cbcontext, cbdata,
|
||||
@ -67,7 +77,7 @@ int mca_btl_ofi_get (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi
|
||||
remote_address = (remote_address - (uint64_t) remote_handle->base_addr);
|
||||
|
||||
/* Remote write data across the wire */
|
||||
rc = fi_read(ofi_btl->ofi_endpoint,
|
||||
rc = fi_read(ofi_context->tx_ctx,
|
||||
local_address, size, /* payload */
|
||||
local_handle->desc,
|
||||
btl_endpoint->peer_addr,
|
||||
@ -99,10 +109,14 @@ int mca_btl_ofi_put (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi
|
||||
int rc;
|
||||
mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t *) btl;
|
||||
mca_btl_ofi_endpoint_t *btl_endpoint = (mca_btl_ofi_endpoint_t*) endpoint;
|
||||
mca_btl_ofi_context_t *ofi_context;
|
||||
|
||||
ofi_context = get_ofi_context(ofi_btl);
|
||||
|
||||
/* create completion context */
|
||||
mca_btl_ofi_completion_t *comp;
|
||||
comp = mca_btl_ofi_completion_alloc(btl, endpoint,
|
||||
ofi_context,
|
||||
local_address,
|
||||
local_handle,
|
||||
cbfunc, cbcontext, cbdata,
|
||||
@ -111,7 +125,7 @@ int mca_btl_ofi_put (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi
|
||||
remote_address = (remote_address - (uint64_t) remote_handle->base_addr);
|
||||
|
||||
/* Remote write data across the wire */
|
||||
rc = fi_write(ofi_btl->ofi_endpoint,
|
||||
rc = fi_write(ofi_context->tx_ctx,
|
||||
local_address, size, /* payload */
|
||||
local_handle->desc,
|
||||
btl_endpoint->peer_addr,
|
||||
|
@ -22,6 +22,7 @@
|
||||
mca_btl_ofi_completion_t *mca_btl_ofi_completion_alloc (
|
||||
mca_btl_base_module_t *btl,
|
||||
mca_btl_base_endpoint_t *endpoint,
|
||||
mca_btl_ofi_context_t *ofi_context,
|
||||
void *local_address,
|
||||
mca_btl_base_registration_handle_t *local_handle,
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc,
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user