be76896f7c
This commit changed the way btl/ofi call progress. Before, we force progression with every rdma/atomic call. This gives performance boost in some case and slow down on others. Now we only force progression after some number of rdma calls which result in better performance overall. Also added new MCA parameter 'mca_btl_ofi_progress_threshold' to set the threshold number. The new default is 64. Also: Added FI_DELIVERY_COMPLETE to tx_rtx flags to ensure that the completion is generated after the message has been received on the remote side. Signed-off-by: Thananon Patinyasakdikul <thananon.patinyasakdikul@intel.com>
43 строки
1.6 KiB
C
43 строки
1.6 KiB
C
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
/*
|
|
* Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights
|
|
* reserved.
|
|
* Copyright (c) 2018 Intel, Inc, All rights reserved
|
|
*
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#ifndef BTL_OFI_RDMA_H
|
|
#define BTL_OFI_RDMA_H
|
|
|
|
#include "opal/threads/thread_usage.h"
|
|
|
|
#include "btl_ofi.h"
|
|
#include "btl_ofi_endpoint.h"
|
|
|
|
mca_btl_ofi_completion_t *mca_btl_ofi_completion_alloc (
|
|
mca_btl_base_module_t *btl,
|
|
mca_btl_base_endpoint_t *endpoint,
|
|
mca_btl_ofi_context_t *ofi_context,
|
|
void *local_address,
|
|
mca_btl_base_registration_handle_t *local_handle,
|
|
mca_btl_base_rdma_completion_fn_t cbfunc,
|
|
void *cbcontext, void *cbdata,
|
|
int type);
|
|
|
|
#define MCA_BTL_OFI_NUM_RDMA_INC(module) \
|
|
OPAL_THREAD_ADD_FETCH64(&(module)->outstanding_rdma, 1); \
|
|
if (module->outstanding_rdma > mca_btl_ofi_component.progress_threshold){ \
|
|
mca_btl_ofi_component.super.btl_progress(); \
|
|
}
|
|
|
|
#define MCA_BTL_OFI_NUM_RDMA_DEC(module) \
|
|
OPAL_THREAD_ADD_FETCH64(&(module)->outstanding_rdma, -1);
|
|
|
|
#endif /* !defined(BTL_OFI_RDMA_H) */
|
|
|