Merge pull request #5334 from thananon/ofi_progress_fix
btl/ofi: progress now happens after a threshold.
Этот коммит содержится в:
Коммит
304cf97ab5
@ -48,6 +48,8 @@
|
||||
BEGIN_C_DECLS
|
||||
#define MCA_BTL_OFI_MAX_MODULES 16
|
||||
#define MCA_BTL_OFI_MAX_CQ_READ_ENTRIES 128
|
||||
#define MCA_BTL_OFI_NUM_CQE_READ 64
|
||||
#define MCA_BTL_OFI_PROGRESS_THRESHOLD 64
|
||||
|
||||
#define MCA_BTL_OFI_ABORT(args) mca_btl_ofi_exit(args)
|
||||
|
||||
@ -129,6 +131,7 @@ struct mca_btl_ofi_component_t {
|
||||
int module_count;
|
||||
int num_contexts_per_module;
|
||||
int num_cqe_read;
|
||||
int progress_threshold;
|
||||
|
||||
size_t namelen;
|
||||
|
||||
|
@ -81,9 +81,6 @@ int mca_btl_ofi_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end
|
||||
|
||||
MCA_BTL_OFI_NUM_RDMA_INC(ofi_btl);
|
||||
|
||||
/* force a bit of progress. */
|
||||
mca_btl_ofi_component.super.btl_progress();
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
@ -135,7 +132,6 @@ int mca_btl_ofi_aop (struct mca_btl_base_module_t *btl, mca_btl_base_endpoint_t
|
||||
}
|
||||
|
||||
MCA_BTL_OFI_NUM_RDMA_INC(ofi_btl);
|
||||
mca_btl_ofi_component.super.btl_progress();
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
@ -192,8 +188,5 @@ int mca_btl_ofi_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_e
|
||||
|
||||
MCA_BTL_OFI_NUM_RDMA_INC(ofi_btl);
|
||||
|
||||
/* force a bit of progress. */
|
||||
mca_btl_ofi_component.super.btl_progress();
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
@ -49,13 +49,17 @@ static int validate_info(struct fi_info *info)
|
||||
{
|
||||
int mr_mode;
|
||||
|
||||
BTL_VERBOSE(("validating device: %s", info->domain_attr->name));
|
||||
|
||||
/* we need exactly all the required bits */
|
||||
if ((info->caps & MCA_BTL_OFI_REQUIRED_CAPS) != MCA_BTL_OFI_REQUIRED_CAPS) {
|
||||
BTL_VERBOSE(("unsupported caps"));
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
|
||||
/* we need FI_EP_RDM */
|
||||
if (info->ep_attr->type != FI_EP_RDM) {
|
||||
BTL_VERBOSE(("unsupported EP type"));
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
|
||||
@ -63,9 +67,16 @@ static int validate_info(struct fi_info *info)
|
||||
|
||||
if (!(mr_mode == FI_MR_BASIC || mr_mode == FI_MR_SCALABLE ||
|
||||
(mr_mode & ~(FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY)) == 0)) {
|
||||
BTL_VERBOSE(("unsupported MR mode"));
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
|
||||
if (!(info->tx_attr->op_flags | FI_DELIVERY_COMPLETE)) {
|
||||
BTL_VERBOSE(("the endpoint tx_ctx does not support FI_DELIVERY_COMPLETE"));
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("device: %s is good to go.", info->domain_attr->name));
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
@ -102,14 +113,10 @@ static int mca_btl_ofi_component_register(void)
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&prov_exclude);
|
||||
|
||||
/* Note: better leave it at 1 for now. osc rdma module is designed for 1 completion
|
||||
* at a time. Dealing with more than 1 completion in 1 read will confuse the osc rdma.
|
||||
* source: 8 hours of debugging. :(*/
|
||||
mca_btl_ofi_component.num_cqe_read = 1;
|
||||
mca_btl_ofi_component.num_cqe_read = MCA_BTL_OFI_NUM_CQE_READ;
|
||||
(void) mca_base_component_var_register(&mca_btl_ofi_component.super.btl_version,
|
||||
"num_cq_read",
|
||||
"Number of completion entries to read from a single cq_read. "
|
||||
"(default: 1)",
|
||||
"Number of completion entries to read from a single cq_read. ",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
@ -135,6 +142,7 @@ static int mca_btl_ofi_component_register(void)
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_btl_ofi_component.num_contexts_per_module);
|
||||
|
||||
disable_sep = false;
|
||||
(void) mca_base_component_var_register(&mca_btl_ofi_component.super.btl_version,
|
||||
"disable_sep",
|
||||
@ -144,6 +152,17 @@ static int mca_btl_ofi_component_register(void)
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&disable_sep);
|
||||
|
||||
mca_btl_ofi_component.progress_threshold = MCA_BTL_OFI_PROGRESS_THRESHOLD;
|
||||
(void) mca_base_component_var_register(&mca_btl_ofi_component.super.btl_version,
|
||||
"progress_threshold",
|
||||
"number of outstanding operation before btl will progress "
|
||||
"automatically. Tuning this might improve performance on "
|
||||
"certain type of application.",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_btl_ofi_component.progress_threshold);
|
||||
|
||||
/* for now we want this component to lose to btl/ugni and btl/vader */
|
||||
module->super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH - 50;
|
||||
|
||||
@ -241,6 +260,8 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init (int *num_btl_modules,
|
||||
tx_attr.iov_limit = 1;
|
||||
rx_attr.iov_limit = 1;
|
||||
|
||||
tx_attr.op_flags = FI_DELIVERY_COMPLETE;
|
||||
|
||||
mca_btl_ofi_component.module_count = 0;
|
||||
|
||||
/* do the query. */
|
||||
|
@ -176,6 +176,7 @@ mca_btl_ofi_context_t *mca_btl_ofi_context_alloc_scalable(struct fi_info *info,
|
||||
struct fi_rx_attr rx_attr = {0};
|
||||
|
||||
mca_btl_ofi_context_t *contexts;
|
||||
tx_attr.op_flags = FI_DELIVERY_COMPLETE;
|
||||
|
||||
contexts = (mca_btl_ofi_context_t*) calloc(num_contexts, sizeof(*contexts));
|
||||
if (NULL == contexts) {
|
||||
|
@ -95,9 +95,6 @@ int mca_btl_ofi_get (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi
|
||||
|
||||
MCA_BTL_OFI_NUM_RDMA_INC(ofi_btl);
|
||||
|
||||
/* force a bit of progress */
|
||||
mca_btl_ofi_component.super.btl_progress();
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
@ -143,9 +140,6 @@ int mca_btl_ofi_put (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi
|
||||
|
||||
MCA_BTL_OFI_NUM_RDMA_INC(ofi_btl);
|
||||
|
||||
/* force a bit of progress */
|
||||
mca_btl_ofi_component.super.btl_progress();
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
|
||||
}
|
||||
|
@ -29,8 +29,11 @@ mca_btl_ofi_completion_t *mca_btl_ofi_completion_alloc (
|
||||
void *cbcontext, void *cbdata,
|
||||
int type);
|
||||
|
||||
#define MCA_BTL_OFI_NUM_RDMA_INC(module) \
|
||||
OPAL_THREAD_ADD_FETCH64(&(module)->outstanding_rdma, 1);
|
||||
#define MCA_BTL_OFI_NUM_RDMA_INC(module) \
|
||||
OPAL_THREAD_ADD_FETCH64(&(module)->outstanding_rdma, 1); \
|
||||
if (module->outstanding_rdma > mca_btl_ofi_component.progress_threshold){ \
|
||||
mca_btl_ofi_component.super.btl_progress(); \
|
||||
}
|
||||
|
||||
#define MCA_BTL_OFI_NUM_RDMA_DEC(module) \
|
||||
OPAL_THREAD_ADD_FETCH64(&(module)->outstanding_rdma, -1);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user