Merge pull request #6226 from aravindksg/sep_mca
mtl/ofi: Add MCA variables to enable SEP and to request OFI contexts
Этот коммит содержится в:
Коммит
dc6eb5d1a2
@ -111,11 +111,22 @@ favours only creating as many contexts as needed.
|
||||
}
|
||||
}
|
||||
|
||||
2. MCA variable:
|
||||
2. MCA variables:
|
||||
To utilize the feature, the following MCA variable needs to be set:
|
||||
mtl_ofi_enable_sep:
|
||||
This MCA variable needs to be set to enable the use of Scalable Endpoints
|
||||
feature in the OFI MTL. The underlying provider is also checked to ensure the
|
||||
feature is supported. If the provider chosen does not support it, user needs
|
||||
to either set this variable to 0 or select different provider which supports
|
||||
the feature.
|
||||
|
||||
Default: 0
|
||||
|
||||
Command-line syntax:
|
||||
"-mca mtl_ofi_enable_sep 1"
|
||||
|
||||
mtl_ofi_thread_grouping:
|
||||
This MCA variable is at the OFI MTL level and needs to be set to switch
|
||||
the feature on.
|
||||
This MCA variable needs to be set to switch Thread Grouping feature on.
|
||||
|
||||
Default: 0
|
||||
|
||||
@ -124,21 +135,46 @@ To utilize the feature, the following MCA variable needs to be set:
|
||||
- Applications that have multiple threads using a single communicator as
|
||||
it may degrade performance.
|
||||
|
||||
Command-line syntax to set the MCA variable:
|
||||
"-mca mtl_ofi_thread_grouping 1"
|
||||
Command-line syntax:
|
||||
"-mca mtl_ofi_thread_grouping 1"
|
||||
|
||||
mtl_ofi_num_ctxts:
|
||||
MCA variable allows user to set the number of OFI contexts the applications
|
||||
expects to use. For multi-threaded applications using Thread Grouping
|
||||
feature, this number should be set to the number of user threads that will
|
||||
call into MPI. For single-threaded applications one OFI context is
|
||||
sufficient.
|
||||
|
||||
Default: 1
|
||||
|
||||
Command-line syntax:
|
||||
"-mca mtl_ofi_num_ctxts N" [ N: number of OFI contexts required by
|
||||
application ]
|
||||
|
||||
3. Notes on performance:
|
||||
- OFI MTL will create as many TX/RX contexts as allowed by an underlying
|
||||
provider (each provider may have different thresholds). Once the threshold
|
||||
- OFI MTL will create as many TX/RX contexts as set by MCA mtl_ofi_num_ctxts.
|
||||
The number of contexts that can be created is also limited by the underlying
|
||||
provider as each provider may have different thresholds. Once the threshold
|
||||
is exceeded, contexts are used in a round-robin fashion which leads to
|
||||
resource sharing among threads. Therefore locks are required to guard
|
||||
against race conditions. For performance, it is recommended to have
|
||||
|
||||
Number of communicators = Number of contexts
|
||||
Number of threads = Number of communicators = Number of contexts
|
||||
|
||||
For example, when using PSM2 provider, the number of contexts is dictated
|
||||
by the Intel Omni-Path HFI1 driver module.
|
||||
|
||||
- OPAL layer allows for multiple threads to enter progress simultaneously. To
|
||||
enable this feature, user needs to set MCA variable
|
||||
"max_thread_in_progress". When using Thread Grouping feature, it is
|
||||
recommended to set this MCA parameter to the number of threads expected to
|
||||
call into MPI as it provides performance benefits.
|
||||
|
||||
Command-line syntax:
|
||||
"-mca opal_max_thread_in_progress N" [ N: number of threads expected to
|
||||
make MPI calls ]
|
||||
Default: 1
|
||||
|
||||
- For applications using a single thread with multiple communicators and MCA
|
||||
variable "mtl_ofi_thread_grouping" set to 1, the MTL will use multiple
|
||||
contexts, but the benefits may be negligible as only one thread is driving
|
||||
|
@ -26,17 +26,42 @@ fi_info -v -p %s
|
||||
Location: %s:%d
|
||||
|
||||
[SEP unavailable]
|
||||
Scalable Endpoint feature is required for Thread Grouping feature to work
|
||||
but it is not supported by %s provider. Try disabling this feature.
|
||||
Scalable Endpoint feature is enabled by the user but it is not supported by
|
||||
%s provider. Try disabling this feature or use a different provider that
|
||||
supports it using mtl_ofi_provider_include.
|
||||
|
||||
Local host: %s
|
||||
Location: %s:%d
|
||||
|
||||
[SEP ctxt limit]
|
||||
Reached limit (%d) for number of OFI contexts that can be opened with the
|
||||
provider. Creating new communicators beyond this limit is possible but
|
||||
they will re-use existing contexts in round-robin fashion.
|
||||
Using new communicators beyond the limit will impact performance.
|
||||
[SEP required]
|
||||
Scalable Endpoint feature is required for Thread Grouping feature to work.
|
||||
Please try enabling Scalable Endpoints using mtl_ofi_enable_sep.
|
||||
|
||||
Local host: %s
|
||||
Location: %s:%d
|
||||
|
||||
[SEP thread grouping ctxt limit]
|
||||
Reached limit (%d) for number of OFI contexts set by mtl_ofi_num_ctxts.
|
||||
Please set mtl_ofi_num_ctxts to a larger value if you need more contexts.
|
||||
If an MPI application creates more communicators than mtl_ofi_num_ctxts,
|
||||
OFI MTL will make the new communicators re-use existing contexts in
|
||||
round-robin fashion which will impact performance.
|
||||
|
||||
Local host: %s
|
||||
Location: %s:%d
|
||||
|
||||
[Local ranks exceed ofi contexts]
|
||||
Number of local ranks exceed the number of available OFI contexts in %s
|
||||
provider and we cannot provision enough contexts for each rank. Try disabling
|
||||
Scalable Endpoint feature.
|
||||
|
||||
Local host: %s
|
||||
Location: %s:%d
|
||||
|
||||
[Ctxts exceeded available]
|
||||
User requested for more than available contexts from provider. Limiting
|
||||
to max allowed (%d). Contexts will be re used in round-robin fashion if there
|
||||
are more threads than the available contexts.
|
||||
|
||||
Local host: %s
|
||||
Location: %s:%d
|
||||
|
@ -327,16 +327,7 @@ ompi_mtl_ofi_isend_callback(struct fi_cq_tagged_entry *wc,
|
||||
|
||||
#define MTL_OFI_MAP_COMM_TO_CONTEXT(comm_id, ctxt_id) \
|
||||
do { \
|
||||
if (ompi_mtl_ofi.thread_grouping && \
|
||||
(!ompi_mtl_ofi.threshold_comm_context_id || \
|
||||
((uint32_t) ompi_mtl_ofi.threshold_comm_context_id > comm_id))) { \
|
||||
ctxt_id = ompi_mtl_ofi.comm_to_context[comm_id]; \
|
||||
} else if (ompi_mtl_ofi.thread_grouping) { \
|
||||
/* Round-robin assignment of contexts if threshold is reached */ \
|
||||
ctxt_id = comm_id % ompi_mtl_ofi.total_ctxts_used; \
|
||||
} else { \
|
||||
ctxt_id = 0; \
|
||||
} \
|
||||
ctxt_id = ompi_mtl_ofi.comm_to_context[comm_id]; \
|
||||
} while (0);
|
||||
|
||||
__opal_attribute_always_inline__ static inline int
|
||||
@ -348,40 +339,40 @@ ompi_mtl_ofi_ssend_recv(ompi_mtl_ofi_request_t *ack_req,
|
||||
uint64_t *match_bits,
|
||||
int tag)
|
||||
{
|
||||
ssize_t ret = OMPI_SUCCESS;
|
||||
int ctxt_id = 0;
|
||||
ssize_t ret = OMPI_SUCCESS;
|
||||
int ctxt_id = 0;
|
||||
|
||||
MTL_OFI_MAP_COMM_TO_CONTEXT(comm->c_contextid, ctxt_id);
|
||||
set_thread_context(ctxt_id);
|
||||
MTL_OFI_MAP_COMM_TO_CONTEXT(comm->c_contextid, ctxt_id);
|
||||
set_thread_context(ctxt_id);
|
||||
|
||||
ack_req = malloc(sizeof(ompi_mtl_ofi_request_t));
|
||||
assert(ack_req);
|
||||
ack_req = malloc(sizeof(ompi_mtl_ofi_request_t));
|
||||
assert(ack_req);
|
||||
|
||||
ack_req->parent = ofi_req;
|
||||
ack_req->event_callback = ompi_mtl_ofi_send_ack_callback;
|
||||
ack_req->error_callback = ompi_mtl_ofi_send_ack_error_callback;
|
||||
ack_req->parent = ofi_req;
|
||||
ack_req->event_callback = ompi_mtl_ofi_send_ack_callback;
|
||||
ack_req->error_callback = ompi_mtl_ofi_send_ack_error_callback;
|
||||
|
||||
ofi_req->completion_count += 1;
|
||||
ofi_req->completion_count += 1;
|
||||
|
||||
MTL_OFI_RETRY_UNTIL_DONE(fi_trecv(ompi_mtl_ofi.ofi_ctxt[ctxt_id].rx_ep,
|
||||
NULL,
|
||||
0,
|
||||
NULL,
|
||||
*src_addr,
|
||||
*match_bits | ompi_mtl_ofi.sync_send_ack,
|
||||
0, /* Exact match, no ignore bits */
|
||||
(void *) &ack_req->ctx), ret);
|
||||
if (OPAL_UNLIKELY(0 > ret)) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: fi_trecv failed: %s(%zd)",
|
||||
__FILE__, __LINE__, fi_strerror(-ret), ret);
|
||||
free(ack_req);
|
||||
return ompi_mtl_ofi_get_error(ret);
|
||||
}
|
||||
MTL_OFI_RETRY_UNTIL_DONE(fi_trecv(ompi_mtl_ofi.ofi_ctxt[ctxt_id].rx_ep,
|
||||
NULL,
|
||||
0,
|
||||
NULL,
|
||||
*src_addr,
|
||||
*match_bits | ompi_mtl_ofi.sync_send_ack,
|
||||
0, /* Exact match, no ignore bits */
|
||||
(void *) &ack_req->ctx), ret);
|
||||
if (OPAL_UNLIKELY(0 > ret)) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: fi_trecv failed: %s(%zd)",
|
||||
__FILE__, __LINE__, fi_strerror(-ret), ret);
|
||||
free(ack_req);
|
||||
return ompi_mtl_ofi_get_error(ret);
|
||||
}
|
||||
|
||||
/* The SYNC_SEND tag bit is set for the send operation only.*/
|
||||
MTL_OFI_SET_SYNC_SEND(*match_bits);
|
||||
return OMPI_SUCCESS;
|
||||
/* The SYNC_SEND tag bit is set for the send operation only.*/
|
||||
MTL_OFI_SET_SYNC_SEND(*match_bits);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
__opal_attribute_always_inline__ static inline int
|
||||
@ -1242,13 +1233,15 @@ static int ompi_mtl_ofi_init_contexts(struct mca_mtl_base_module_t *mtl,
|
||||
}
|
||||
|
||||
/*
|
||||
* We only create upto Max number of contexts allowed by provider.
|
||||
* We only create upto Max number of contexts asked for by the user.
|
||||
* If user enables thread grouping feature and creates more number of
|
||||
* communicators than we have contexts, then we set the threshold
|
||||
* context_id so we know to use context 0 for operations involving these
|
||||
* "extra" communicators.
|
||||
* communicators than available contexts, then we set the threshold
|
||||
* context_id so that new communicators created beyond the threshold
|
||||
* will be assigned to contexts in a round-robin fashion.
|
||||
*/
|
||||
if (ompi_mtl_ofi.max_ctx_cnt <= ctxt_id) {
|
||||
if (ompi_mtl_ofi.num_ofi_contexts <= ompi_mtl_ofi.total_ctxts_used) {
|
||||
ompi_mtl_ofi.comm_to_context[comm->c_contextid] = comm->c_contextid %
|
||||
ompi_mtl_ofi.total_ctxts_used;
|
||||
if (!ompi_mtl_ofi.threshold_comm_context_id) {
|
||||
ompi_mtl_ofi.threshold_comm_context_id = comm->c_contextid;
|
||||
|
||||
|
@ -235,6 +235,15 @@ ompi_mtl_ofi_component_register(void)
|
||||
&av_type);
|
||||
OBJ_RELEASE(new_enum);
|
||||
|
||||
ompi_mtl_ofi.enable_sep = 0;
|
||||
mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version,
|
||||
"enable_sep",
|
||||
"Enable SEP feature",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_3,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&ompi_mtl_ofi.enable_sep);
|
||||
|
||||
ompi_mtl_ofi.thread_grouping = 0;
|
||||
mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version,
|
||||
"thread_grouping",
|
||||
@ -244,6 +253,20 @@ ompi_mtl_ofi_component_register(void)
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&ompi_mtl_ofi.thread_grouping);
|
||||
|
||||
/*
|
||||
* Default Policy: Create 1 context and let user ask for more for
|
||||
* multi-threaded workloads. User needs to ask for as many contexts as the
|
||||
* number of threads that are anticipated to make MPI calls.
|
||||
*/
|
||||
ompi_mtl_ofi.num_ofi_contexts = 1;
|
||||
mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version,
|
||||
"num_ctxts",
|
||||
"Specify number of OFI contexts to create",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_4,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&ompi_mtl_ofi.num_ofi_contexts);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -445,9 +468,9 @@ ompi_mtl_ofi_define_tag_mode(int ofi_tag_mode, int *bits_for_cid) {
|
||||
}
|
||||
}
|
||||
|
||||
#define MTL_OFI_ALLOC_COMM_TO_CONTEXT(num_ofi_ctxts) \
|
||||
#define MTL_OFI_ALLOC_COMM_TO_CONTEXT(arr_size) \
|
||||
do { \
|
||||
ompi_mtl_ofi.comm_to_context = calloc(num_ofi_ctxts, sizeof(int)); \
|
||||
ompi_mtl_ofi.comm_to_context = calloc(arr_size, sizeof(int)); \
|
||||
if (OPAL_UNLIKELY(!ompi_mtl_ofi.comm_to_context)) { \
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output, \
|
||||
"%s:%d: alloc of comm_to_context array failed: %s\n",\
|
||||
@ -458,7 +481,7 @@ ompi_mtl_ofi_define_tag_mode(int ofi_tag_mode, int *bits_for_cid) {
|
||||
|
||||
#define MTL_OFI_ALLOC_OFI_CTXTS() \
|
||||
do { \
|
||||
ompi_mtl_ofi.ofi_ctxt = (mca_mtl_ofi_context_t *) malloc(ompi_mtl_ofi.max_ctx_cnt * \
|
||||
ompi_mtl_ofi.ofi_ctxt = (mca_mtl_ofi_context_t *) malloc(ompi_mtl_ofi.num_ofi_contexts * \
|
||||
sizeof(mca_mtl_ofi_context_t)); \
|
||||
if (OPAL_UNLIKELY(!ompi_mtl_ofi.ofi_ctxt)) { \
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output, \
|
||||
@ -474,7 +497,7 @@ static int ompi_mtl_ofi_init_sep(struct fi_info *prov)
|
||||
struct fi_av_attr av_attr = {0};
|
||||
|
||||
prov->ep_attr->tx_ctx_cnt = prov->ep_attr->rx_ctx_cnt =
|
||||
ompi_mtl_ofi.max_ctx_cnt;
|
||||
ompi_mtl_ofi.num_ofi_contexts;
|
||||
|
||||
ret = fi_scalable_ep(ompi_mtl_ofi.domain, prov, &ompi_mtl_ofi.sep, NULL);
|
||||
if (0 != ret) {
|
||||
@ -486,11 +509,11 @@ static int ompi_mtl_ofi_init_sep(struct fi_info *prov)
|
||||
}
|
||||
|
||||
ompi_mtl_ofi.rx_ctx_bits = 0;
|
||||
while (ompi_mtl_ofi.max_ctx_cnt >> ++ompi_mtl_ofi.rx_ctx_bits);
|
||||
while (ompi_mtl_ofi.num_ofi_contexts >> ++ompi_mtl_ofi.rx_ctx_bits);
|
||||
|
||||
av_attr.type = (MTL_OFI_AV_TABLE == av_type) ? FI_AV_TABLE: FI_AV_MAP;
|
||||
av_attr.rx_ctx_bits = ompi_mtl_ofi.rx_ctx_bits;
|
||||
av_attr.count = ompi_mtl_ofi.max_ctx_cnt;
|
||||
av_attr.count = ompi_mtl_ofi.num_ofi_contexts;
|
||||
ret = fi_av_open(ompi_mtl_ofi.domain, &av_attr, &ompi_mtl_ofi.av, NULL);
|
||||
|
||||
if (0 != ret) {
|
||||
@ -506,12 +529,12 @@ static int ompi_mtl_ofi_init_sep(struct fi_info *prov)
|
||||
|
||||
/*
|
||||
* If SEP supported and Thread Grouping feature enabled, use
|
||||
* max_ctx_cnt + 2. Extra 2 items is to accomodate Open MPI contextid
|
||||
* num_ofi_contexts + 2. Extra 2 items is to accomodate Open MPI contextid
|
||||
* numbering- COMM_WORLD is 0, COMM_SELF is 1. Other user created
|
||||
* Comm contextid values are assigned sequentially starting with 3.
|
||||
*/
|
||||
num_ofi_ctxts = ompi_mtl_ofi.thread_grouping ?
|
||||
ompi_mtl_ofi.max_ctx_cnt + 2 : 1;
|
||||
ompi_mtl_ofi.num_ofi_contexts + 2 : 1;
|
||||
MTL_OFI_ALLOC_COMM_TO_CONTEXT(num_ofi_ctxts);
|
||||
|
||||
ompi_mtl_ofi.total_ctxts_used = 0;
|
||||
@ -525,13 +548,14 @@ static int ompi_mtl_ofi_init_sep(struct fi_info *prov)
|
||||
|
||||
static int ompi_mtl_ofi_init_regular_ep(struct fi_info * prov)
|
||||
{
|
||||
int ret = OMPI_SUCCESS, num_ofi_ctxts;
|
||||
int ret = OMPI_SUCCESS;
|
||||
struct fi_av_attr av_attr = {0};
|
||||
struct fi_cq_attr cq_attr = {0};
|
||||
cq_attr.format = FI_CQ_FORMAT_TAGGED;
|
||||
cq_attr.size = ompi_mtl_ofi.ofi_progress_event_count;
|
||||
|
||||
ompi_mtl_ofi.max_ctx_cnt = 1;
|
||||
/* Override any user defined setting */
|
||||
ompi_mtl_ofi.num_ofi_contexts = 1;
|
||||
ret = fi_endpoint(ompi_mtl_ofi.domain, /* In: Domain object */
|
||||
prov, /* In: Provider */
|
||||
&ompi_mtl_ofi.sep, /* Out: Endpoint object */
|
||||
@ -564,8 +588,7 @@ static int ompi_mtl_ofi_init_regular_ep(struct fi_info * prov)
|
||||
return ret;
|
||||
}
|
||||
|
||||
num_ofi_ctxts = 1;
|
||||
MTL_OFI_ALLOC_COMM_TO_CONTEXT(num_ofi_ctxts);
|
||||
MTL_OFI_ALLOC_COMM_TO_CONTEXT(1);
|
||||
|
||||
/* Allocate memory for OFI contexts */
|
||||
MTL_OFI_ALLOC_OFI_CTXTS();
|
||||
@ -594,7 +617,8 @@ static mca_mtl_base_module_t*
|
||||
ompi_mtl_ofi_component_init(bool enable_progress_threads,
|
||||
bool enable_mpi_threads)
|
||||
{
|
||||
int ret, fi_version, num_local_ranks;
|
||||
int ret, fi_version;
|
||||
int num_local_ranks, sep_support_in_provider, max_ofi_ctxts;
|
||||
int ofi_tag_leading_zeros, ofi_tag_bits_for_cid;
|
||||
struct fi_info *hints;
|
||||
struct fi_info *providers = NULL;
|
||||
@ -791,24 +815,32 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
|
||||
ompi_mtl_ofi.num_peers = 0;
|
||||
|
||||
/* Check if Scalable Endpoints can be enabled for the provider */
|
||||
ompi_mtl_ofi.enable_sep = 0;
|
||||
sep_support_in_provider = 0;
|
||||
if ((prov->domain_attr->max_ep_tx_ctx > 1) ||
|
||||
(prov->domain_attr->max_ep_rx_ctx > 1)) {
|
||||
ompi_mtl_ofi.enable_sep = 1;
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: Scalable EP supported in %s provider. Enabling in MTL.\n",
|
||||
__FILE__, __LINE__, prov->fabric_attr->prov_name);
|
||||
sep_support_in_provider = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Scalable Endpoints is required for Thread Grouping feature
|
||||
*/
|
||||
if (!ompi_mtl_ofi.enable_sep && ompi_mtl_ofi.thread_grouping) {
|
||||
opal_show_help("help-mtl-ofi.txt", "SEP unavailable", true,
|
||||
prov->fabric_attr->prov_name,
|
||||
ompi_process_info.nodename, __FILE__, __LINE__,
|
||||
fi_strerror(-ret), -ret);
|
||||
goto error;
|
||||
if (1 == ompi_mtl_ofi.enable_sep) {
|
||||
if (0 == sep_support_in_provider) {
|
||||
opal_show_help("help-mtl-ofi.txt", "SEP unavailable", true,
|
||||
prov->fabric_attr->prov_name,
|
||||
ompi_process_info.nodename, __FILE__, __LINE__);
|
||||
goto error;
|
||||
} else if (1 == sep_support_in_provider) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: Scalable EP supported in %s provider. Enabling in MTL.\n",
|
||||
__FILE__, __LINE__, prov->fabric_attr->prov_name);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Scalable Endpoints is required for Thread Grouping feature
|
||||
*/
|
||||
if (1 == ompi_mtl_ofi.thread_grouping) {
|
||||
opal_show_help("help-mtl-ofi.txt", "SEP required", true,
|
||||
ompi_process_info.nodename, __FILE__, __LINE__);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -865,19 +897,34 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
|
||||
* vectors, completion counters or event queues etc, and enabled.
|
||||
* See man fi_endpoint for more details.
|
||||
*/
|
||||
ompi_mtl_ofi.max_ctx_cnt = (prov->domain_attr->max_ep_tx_ctx <
|
||||
prov->domain_attr->max_ep_rx_ctx) ?
|
||||
prov->domain_attr->max_ep_tx_ctx :
|
||||
prov->domain_attr->max_ep_rx_ctx;
|
||||
max_ofi_ctxts = (prov->domain_attr->max_ep_tx_ctx <
|
||||
prov->domain_attr->max_ep_rx_ctx) ?
|
||||
prov->domain_attr->max_ep_tx_ctx :
|
||||
prov->domain_attr->max_ep_rx_ctx;
|
||||
|
||||
num_local_ranks = 1 + ompi_process_info.num_local_peers;
|
||||
if (ompi_mtl_ofi.max_ctx_cnt <= num_local_ranks) {
|
||||
ompi_mtl_ofi.enable_sep = 0;
|
||||
if ((max_ofi_ctxts <= num_local_ranks) &&
|
||||
(1 == ompi_mtl_ofi.enable_sep)) {
|
||||
opal_show_help("help-mtl-ofi.txt", "Local ranks exceed ofi contexts",
|
||||
true, prov->fabric_attr->prov_name,
|
||||
ompi_process_info.nodename, __FILE__, __LINE__);
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (1 == ompi_mtl_ofi.enable_sep) {
|
||||
/* Provision enough contexts to service all ranks in a node */
|
||||
ompi_mtl_ofi.max_ctx_cnt /= num_local_ranks;
|
||||
max_ofi_ctxts /= num_local_ranks;
|
||||
|
||||
/*
|
||||
* If num ctxts user specified is more than max allowed, limit to max
|
||||
* and start round-robining. Print warning to user.
|
||||
*/
|
||||
if (max_ofi_ctxts < ompi_mtl_ofi.num_ofi_contexts) {
|
||||
opal_show_help("help-mtl-ofi.txt", "Ctxts exceeded available",
|
||||
true, max_ofi_ctxts,
|
||||
ompi_process_info.nodename, __FILE__, __LINE__);
|
||||
ompi_mtl_ofi.num_ofi_contexts = max_ofi_ctxts;
|
||||
}
|
||||
|
||||
ret = ompi_mtl_ofi_init_sep(prov);
|
||||
} else {
|
||||
|
@ -44,38 +44,19 @@ typedef struct mca_mtl_ofi_module_t {
|
||||
/** Address vector handle */
|
||||
struct fid_av *av;
|
||||
|
||||
/* Scalable Endpoint */
|
||||
struct fid_ep *sep;
|
||||
|
||||
/* Multi-threaded Application flag */
|
||||
bool mpi_thread_multiple;
|
||||
|
||||
/* OFI contexts */
|
||||
mca_mtl_ofi_context_t *ofi_ctxt;
|
||||
|
||||
/* Max context count for scalable endpoints */
|
||||
int max_ctx_cnt;
|
||||
|
||||
/* Total number of TX/RX contexts used by MTL */
|
||||
int total_ctxts_used;
|
||||
|
||||
/*
|
||||
* Store context id of communicator if creating more than number of
|
||||
* contexts
|
||||
*/
|
||||
int threshold_comm_context_id;
|
||||
|
||||
/* Mapping of communicator ID to OFI context */
|
||||
int *comm_to_context;
|
||||
|
||||
/* MCA parameter for Thread grouping feature */
|
||||
int thread_grouping;
|
||||
|
||||
/* If SEP is used by OFI MTL */
|
||||
int enable_sep;
|
||||
|
||||
/* Numbers of bits used for rx contexts */
|
||||
int rx_ctx_bits;
|
||||
/* Scalable Endpoint attributes */
|
||||
struct fid_ep *sep; /* Endpoint object */
|
||||
mca_mtl_ofi_context_t *ofi_ctxt; /* OFI contexts */
|
||||
int threshold_comm_context_id; /* Set threshold communicator ID */
|
||||
int *comm_to_context; /* Map communicator ID to context */
|
||||
int rx_ctx_bits; /* Bits used for RX context */
|
||||
int total_ctxts_used; /* Total number of contexts used */
|
||||
int enable_sep; /* MCA to enable/disable SEP feature */
|
||||
int thread_grouping; /* MCA for thread grouping feature */
|
||||
int num_ofi_contexts; /* MCA for number of contexts to use */
|
||||
|
||||
/** Endpoint name length */
|
||||
size_t epnamelen;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user