Merge pull request #5004 from matcabral/mtl_ofi_remote_cq_data
MTL OFI: add support for FI_REMOTE_CQ_DATA.
Этот коммит содержится в:
Коммит
10516c1fb8
69
ompi/mca/mtl/ofi/README
Обычный файл
69
ompi/mca/mtl/ofi/README
Обычный файл
@ -0,0 +1,69 @@
|
||||
OFI MTL
|
||||
|
||||
The OFI MTL supports Libfabric (a.k.a. Open Fabrics Interfaces OFI,
|
||||
https://ofiwg.github.io/libfabric/) tagged APIs (fi_tagged(3)). At
|
||||
initialization time, the MTL queries libfabric for providers supporting tag matching
|
||||
(fi_getinfo(3)). Libfabric will return a list of providers that satisfy the requested
|
||||
capabilities, having the most performant one at the top of the list.
|
||||
The user may modify the OFI provider selection with mca parameters
|
||||
mtl_ofi_provider_include or mtl_ofi_provider_exclude.
|
||||
|
||||
PROGRESS:
|
||||
The MTL registers a progress function to opal_progress. There is currently
|
||||
no support for asynchronous progress. The progress function reads multiple events
|
||||
from the OFI provider Completion Queue (CQ) per iteration (defaults to 100, can be
|
||||
modified with the mca mtl_ofi_progress_event_cnt) and iterates until the
|
||||
completion queue is drained.
|
||||
|
||||
COMPLETIONS:
|
||||
Each operation uses a request type ompi_mtl_ofi_request_t which includes a reference
|
||||
to an operation specific completion callback, an MPI request, and a context. The
|
||||
context (fi_context) is used to map completion events with MPI_requests when reading the
|
||||
CQ.
|
||||
|
||||
OFI TAG:
|
||||
MPI needs to send 96 bits of information per message (32 bits communicator id,
|
||||
32 bits source rank, 32 bits MPI tag) but OFI only offers 64 bits tags. In
|
||||
addition, the OFI MTL uses 4 bits of the OFI tag for the synchronous send protocol.
|
||||
Therefore, there are only 60 bits available in the OFI tag for message usage. The
|
||||
OFI MTL offers the mtl_ofi_tag_mode mca parameter with 4 modes to address this:
|
||||
|
||||
"auto" (Default):
|
||||
After the OFI provider is selected, a runtime check is performed to assess
|
||||
FI_REMOTE_CQ_DATA and FI_DIRECTED_RECV support (see fi_tagged(3), fi_msg(2)
|
||||
and fi_getinfo(3)). If supported, "ofi_tag_full" is used. If not supported,
|
||||
fall back to "ofi_tag_1".
|
||||
|
||||
"ofi_tag_1":
|
||||
For providers that do not support FI_REMOTE_CQ_DATA, the OFI MTL will
|
||||
trim the fields (Communicator ID, Source Rank, MPI tag) to make them fit the 60
|
||||
bits available bit in the OFI tag. There are two options available with different
|
||||
number of bits for the Communicator ID and MPI tag fields. This tag distribution
|
||||
offers: 12 bits for Communicator ID (max Communicator ID 4,095) subject to
|
||||
provider reserved bits (see mem_tag_format below), 16 bits for Source Rank (max
|
||||
Source Rank 65,535), 32 bits for MPI tag (max MPI tag is INT_MAX).
|
||||
|
||||
"ofi_tag_2":
|
||||
Same as 2 "ofi_tag_1" but offering a different OFI tag distribution for
|
||||
applications that may require a greater number of supported Communicators at the
|
||||
expense of fewer MPI tag bits. This tag distribution offers: 24 bits for
|
||||
Communicator ID (max Communicator ED 16,777,215. See mem_tag_format below), 16
|
||||
bits for Source Rank (max Source Rank 65,535), 20 bits for MPI tag (max MPI tag
|
||||
524,287).
|
||||
|
||||
"ofi_tag_full":
|
||||
For executions that cannot accept trimming source rank or MPI tag, this mode sends
|
||||
source rank for each message in the CQ DATA. The Source Rank is made available at
|
||||
the remote process CQ (FI_CQ_FORMAT_TAGGED is used, see fi_cq(3)) at the completion
|
||||
of the matching receive operation. Since the minimum size for FI_REMOTE_CQ_DATA
|
||||
is 32 bits, the Source Rank fits with no limitations. The OFI tag is used for the
|
||||
Communicator id (28 bits, max Communicator ID 268,435,455. See mem_tag_format below),
|
||||
and the MPI tag (max MPI tag is INT_MAX). If this mode is selected by the user
|
||||
and FI_REMOTE_CQ_DATA or FI_DIRECTED_RECV are not supported, the execution will abort.
|
||||
|
||||
mem_tag_format (fi_endpoint(3))
|
||||
Some providers can reserve the higher order bits from the OFI tag for internal purposes.
|
||||
This is signaled in mem_tag_format (see fi_endpoint(3)) by setting higher order bits
|
||||
to zero. In such cases, the OFI MTL will reduce the number of communicator ids supported
|
||||
by reducing the bits available for the communicator ID field in the OFI tag.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -14,8 +14,8 @@ OMPI_DECLSPEC extern mca_mtl_ofi_component_t mca_mtl_ofi_component;
|
||||
|
||||
mca_mtl_ofi_module_t ompi_mtl_ofi = {
|
||||
{
|
||||
8191, /* max cid - 2^13 - 1 */
|
||||
(1UL << 30), /* max tag value - must allow negatives */
|
||||
(int)((1ULL << MTL_OFI_CID_BIT_COUNT_1) - 1), /* max cid */
|
||||
(int)((1ULL << (MTL_OFI_TAG_BIT_COUNT_1 - 1)) - 1) ,/* max tag value */
|
||||
0, /* request reserve space */
|
||||
0, /* flags */
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
*
|
||||
@ -244,6 +244,7 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl,
|
||||
ompi_proc_t *ompi_proc = NULL;
|
||||
mca_mtl_ofi_endpoint_t *endpoint = NULL;
|
||||
ompi_mtl_ofi_request_t *ack_req = NULL; /* For synchronous send */
|
||||
fi_addr_t src_addr = 0;
|
||||
|
||||
ompi_proc = ompi_comm_peer_lookup(comm, dest);
|
||||
endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc);
|
||||
@ -255,6 +256,15 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl,
|
||||
ofi_req->length = length;
|
||||
ofi_req->status.MPI_ERROR = OMPI_SUCCESS;
|
||||
|
||||
if (ompi_mtl_ofi.fi_cq_data) {
|
||||
match_bits = mtl_ofi_create_send_tag_CQD(comm->c_contextid, tag);
|
||||
src_addr = endpoint->peer_fiaddr;
|
||||
} else {
|
||||
match_bits = mtl_ofi_create_send_tag(comm->c_contextid,
|
||||
comm->c_my_rank, tag);
|
||||
/* src_addr is ignored when FI_DIRECTED_RECV is not supported */
|
||||
}
|
||||
|
||||
if (OPAL_UNLIKELY(MCA_PML_BASE_SEND_SYNCHRONOUS == mode)) {
|
||||
ack_req = malloc(sizeof(ompi_mtl_ofi_request_t));
|
||||
assert(ack_req);
|
||||
@ -263,14 +273,15 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl,
|
||||
ack_req->error_callback = ompi_mtl_ofi_send_ack_error_callback;
|
||||
|
||||
ofi_req->completion_count = 2;
|
||||
MTL_OFI_SET_SEND_BITS(match_bits, comm->c_contextid,
|
||||
comm->c_my_rank, tag, MTL_OFI_SYNC_SEND);
|
||||
|
||||
MTL_OFI_SET_SYNC_SEND(match_bits);
|
||||
|
||||
MTL_OFI_RETRY_UNTIL_DONE(fi_trecv(ompi_mtl_ofi.ep,
|
||||
NULL,
|
||||
0,
|
||||
NULL,
|
||||
endpoint->peer_fiaddr,
|
||||
match_bits | MTL_OFI_SYNC_SEND_ACK,
|
||||
src_addr,
|
||||
match_bits | ompi_mtl_ofi.sync_send_ack,
|
||||
0, /* Exact match, no ignore bits */
|
||||
(void *) &ack_req->ctx));
|
||||
if (OPAL_UNLIKELY(0 > ret)) {
|
||||
@ -282,20 +293,30 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl,
|
||||
}
|
||||
} else {
|
||||
ofi_req->completion_count = 1;
|
||||
MTL_OFI_SET_SEND_BITS(match_bits, comm->c_contextid,
|
||||
comm->c_my_rank, tag, 0);
|
||||
}
|
||||
|
||||
if (ompi_mtl_ofi.max_inject_size >= length) {
|
||||
MTL_OFI_RETRY_UNTIL_DONE(fi_tinject(ompi_mtl_ofi.ep,
|
||||
if (ompi_mtl_ofi.fi_cq_data) {
|
||||
MTL_OFI_RETRY_UNTIL_DONE(fi_tinjectdata(ompi_mtl_ofi.ep,
|
||||
start,
|
||||
length,
|
||||
comm->c_my_rank,
|
||||
endpoint->peer_fiaddr,
|
||||
match_bits));
|
||||
} else {
|
||||
MTL_OFI_RETRY_UNTIL_DONE(fi_tinject(ompi_mtl_ofi.ep,
|
||||
start,
|
||||
length,
|
||||
endpoint->peer_fiaddr,
|
||||
match_bits));
|
||||
}
|
||||
|
||||
if (OPAL_UNLIKELY(0 > ret)) {
|
||||
char *fi_api = ompi_mtl_ofi.fi_cq_data ? "fi_tinjectddata" : "fi_tinject";
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: fi_tinject failed: %s(%zd)",
|
||||
__FILE__, __LINE__, fi_strerror(-ret), ret);
|
||||
"%s:%d: %s failed: %s(%zd)",
|
||||
__FILE__, __LINE__,fi_api, fi_strerror(-ret), ret);
|
||||
|
||||
if (ack_req) {
|
||||
fi_cancel((fid_t)ompi_mtl_ofi.ep, &ack_req->ctx);
|
||||
free(ack_req);
|
||||
@ -305,17 +326,29 @@ ompi_mtl_ofi_send_start(struct mca_mtl_base_module_t *mtl,
|
||||
|
||||
ofi_req->event_callback(NULL,ofi_req);
|
||||
} else {
|
||||
MTL_OFI_RETRY_UNTIL_DONE(fi_tsend(ompi_mtl_ofi.ep,
|
||||
if (ompi_mtl_ofi.fi_cq_data) {
|
||||
MTL_OFI_RETRY_UNTIL_DONE(fi_tsenddata(ompi_mtl_ofi.ep,
|
||||
start,
|
||||
length,
|
||||
NULL,
|
||||
comm->c_my_rank,
|
||||
endpoint->peer_fiaddr,
|
||||
match_bits,
|
||||
(void *) &ofi_req->ctx));
|
||||
} else {
|
||||
MTL_OFI_RETRY_UNTIL_DONE(fi_tsend(ompi_mtl_ofi.ep,
|
||||
start,
|
||||
length,
|
||||
NULL,
|
||||
endpoint->peer_fiaddr,
|
||||
match_bits,
|
||||
(void *) &ofi_req->ctx));
|
||||
}
|
||||
if (OPAL_UNLIKELY(0 > ret)) {
|
||||
char *fi_api = ompi_mtl_ofi.fi_cq_data ? "fi_tsendddata" : "fi_send";
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: fi_tsend failed: %s(%zd)",
|
||||
__FILE__, __LINE__, fi_strerror(-ret), ret);
|
||||
"%s:%d: %s failed: %s(%zd)",
|
||||
__FILE__, __LINE__,fi_api, fi_strerror(-ret), ret);
|
||||
return ompi_mtl_ofi_get_error(ret);
|
||||
}
|
||||
}
|
||||
@ -415,7 +448,7 @@ ompi_mtl_ofi_recv_callback(struct fi_cq_tagged_entry *wc,
|
||||
ssize_t ret;
|
||||
ompi_proc_t *ompi_proc = NULL;
|
||||
mca_mtl_ofi_endpoint_t *endpoint = NULL;
|
||||
int src;
|
||||
int src = mtl_ofi_get_source(wc);
|
||||
ompi_status_public_t *status = NULL;
|
||||
|
||||
assert(ofi_req->super.ompi_req);
|
||||
@ -427,7 +460,7 @@ ompi_mtl_ofi_recv_callback(struct fi_cq_tagged_entry *wc,
|
||||
*/
|
||||
ofi_req->req_started = true;
|
||||
|
||||
status->MPI_SOURCE = MTL_OFI_GET_SOURCE(wc->tag);
|
||||
status->MPI_SOURCE = src;
|
||||
status->MPI_TAG = MTL_OFI_GET_TAG(wc->tag);
|
||||
status->_ucount = wc->len;
|
||||
|
||||
@ -474,7 +507,6 @@ ompi_mtl_ofi_recv_callback(struct fi_cq_tagged_entry *wc,
|
||||
* we need to extract the source's actual address.
|
||||
*/
|
||||
if (ompi_mtl_ofi.any_addr == ofi_req->remote_addr) {
|
||||
src = MTL_OFI_GET_SOURCE(wc->tag);
|
||||
ompi_proc = ompi_comm_peer_lookup(ofi_req->comm, src);
|
||||
endpoint = ompi_mtl_ofi_get_endpoint(ofi_req->mtl, ompi_proc);
|
||||
ofi_req->remote_addr = endpoint->peer_fiaddr;
|
||||
@ -484,7 +516,7 @@ ompi_mtl_ofi_recv_callback(struct fi_cq_tagged_entry *wc,
|
||||
0,
|
||||
NULL,
|
||||
ofi_req->remote_addr,
|
||||
wc->tag | MTL_OFI_SYNC_SEND_ACK,
|
||||
wc->tag | ompi_mtl_ofi.sync_send_ack,
|
||||
(void *) &ofi_req->ctx));
|
||||
if (OPAL_UNLIKELY(0 > ret)) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
@ -510,7 +542,7 @@ ompi_mtl_ofi_recv_error_callback(struct fi_cq_err_entry *error,
|
||||
assert(ofi_req->super.ompi_req);
|
||||
status = &ofi_req->super.ompi_req->req_status;
|
||||
status->MPI_TAG = MTL_OFI_GET_TAG(ofi_req->match_bits);
|
||||
status->MPI_SOURCE = MTL_OFI_GET_SOURCE(ofi_req->match_bits);
|
||||
status->MPI_SOURCE = mtl_ofi_get_source((struct fi_cq_tagged_entry *) error);
|
||||
|
||||
switch (error->err) {
|
||||
case FI_ETRUNC:
|
||||
@ -538,7 +570,7 @@ ompi_mtl_ofi_irecv(struct mca_mtl_base_module_t *mtl,
|
||||
int ompi_ret = OMPI_SUCCESS;
|
||||
ssize_t ret;
|
||||
uint64_t match_bits, mask_bits;
|
||||
fi_addr_t remote_addr;
|
||||
fi_addr_t remote_addr = ompi_mtl_ofi.any_addr;
|
||||
ompi_proc_t *ompi_proc = NULL;
|
||||
mca_mtl_ofi_endpoint_t *endpoint = NULL;
|
||||
ompi_mtl_ofi_request_t *ofi_req = (ompi_mtl_ofi_request_t*) mtl_request;
|
||||
@ -546,15 +578,21 @@ ompi_mtl_ofi_irecv(struct mca_mtl_base_module_t *mtl,
|
||||
size_t length;
|
||||
bool free_after;
|
||||
|
||||
if (MPI_ANY_SOURCE != src) {
|
||||
ompi_proc = ompi_comm_peer_lookup(comm, src);
|
||||
endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc);
|
||||
remote_addr = endpoint->peer_fiaddr;
|
||||
} else {
|
||||
remote_addr = ompi_mtl_ofi.any_addr;
|
||||
}
|
||||
|
||||
MTL_OFI_SET_RECV_BITS(match_bits, mask_bits, comm->c_contextid, src, tag);
|
||||
if (ompi_mtl_ofi.fi_cq_data) {
|
||||
if (MPI_ANY_SOURCE != src) {
|
||||
ompi_proc = ompi_comm_peer_lookup(comm, src);
|
||||
endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc);
|
||||
remote_addr = endpoint->peer_fiaddr;
|
||||
}
|
||||
|
||||
mtl_ofi_create_recv_tag_CQD(&match_bits, &mask_bits, comm->c_contextid,
|
||||
tag);
|
||||
} else {
|
||||
mtl_ofi_create_recv_tag(&match_bits, &mask_bits, comm->c_contextid, src,
|
||||
tag);
|
||||
/* src_addr is ignored when FI_DIRECTED_RECV is not used */
|
||||
}
|
||||
|
||||
ompi_ret = ompi_mtl_datatype_recv_buf(convertor,
|
||||
&start,
|
||||
@ -606,7 +644,7 @@ ompi_mtl_ofi_mrecv_callback(struct fi_cq_tagged_entry *wc,
|
||||
{
|
||||
struct mca_mtl_request_t *mrecv_req = ofi_req->mrecv_req;
|
||||
ompi_status_public_t *status = &mrecv_req->ompi_req->req_status;
|
||||
status->MPI_SOURCE = MTL_OFI_GET_SOURCE(wc->tag);
|
||||
status->MPI_SOURCE = mtl_ofi_get_source(wc);
|
||||
status->MPI_TAG = MTL_OFI_GET_TAG(wc->tag);
|
||||
status->MPI_ERROR = MPI_SUCCESS;
|
||||
status->_ucount = wc->len;
|
||||
@ -628,7 +666,7 @@ ompi_mtl_ofi_mrecv_error_callback(struct fi_cq_err_entry *error,
|
||||
struct mca_mtl_request_t *mrecv_req = ofi_req->mrecv_req;
|
||||
ompi_status_public_t *status = &mrecv_req->ompi_req->req_status;
|
||||
status->MPI_TAG = MTL_OFI_GET_TAG(ofi_req->match_bits);
|
||||
status->MPI_SOURCE = MTL_OFI_GET_SOURCE(ofi_req->match_bits);
|
||||
status->MPI_SOURCE = mtl_ofi_get_source((struct fi_cq_tagged_entry *) error);
|
||||
|
||||
switch (error->err) {
|
||||
case FI_ETRUNC:
|
||||
@ -716,7 +754,7 @@ ompi_mtl_ofi_probe_callback(struct fi_cq_tagged_entry *wc,
|
||||
{
|
||||
ofi_req->match_state = 1;
|
||||
ofi_req->match_bits = wc->tag;
|
||||
ofi_req->status.MPI_SOURCE = MTL_OFI_GET_SOURCE(wc->tag);
|
||||
ofi_req->status.MPI_SOURCE = mtl_ofi_get_source(wc);
|
||||
ofi_req->status.MPI_TAG = MTL_OFI_GET_TAG(wc->tag);
|
||||
ofi_req->status.MPI_ERROR = MPI_SUCCESS;
|
||||
ofi_req->status._ucount = wc->len;
|
||||
@ -749,22 +787,28 @@ ompi_mtl_ofi_iprobe(struct mca_mtl_base_module_t *mtl,
|
||||
struct ompi_mtl_ofi_request_t ofi_req;
|
||||
ompi_proc_t *ompi_proc = NULL;
|
||||
mca_mtl_ofi_endpoint_t *endpoint = NULL;
|
||||
fi_addr_t remote_proc = 0;
|
||||
fi_addr_t remote_proc = ompi_mtl_ofi.any_addr;
|
||||
uint64_t match_bits, mask_bits;
|
||||
ssize_t ret;
|
||||
struct fi_msg_tagged msg;
|
||||
uint64_t msgflags = FI_PEEK;
|
||||
|
||||
/**
|
||||
* If the source is known, use its peer_fiaddr.
|
||||
*/
|
||||
if (MPI_ANY_SOURCE != src) {
|
||||
ompi_proc = ompi_comm_peer_lookup( comm, src );
|
||||
endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc);
|
||||
remote_proc = endpoint->peer_fiaddr;
|
||||
}
|
||||
if (ompi_mtl_ofi.fi_cq_data) {
|
||||
/* If the source is known, use its peer_fiaddr. */
|
||||
if (MPI_ANY_SOURCE != src) {
|
||||
ompi_proc = ompi_comm_peer_lookup( comm, src );
|
||||
endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc);
|
||||
remote_proc = endpoint->peer_fiaddr;
|
||||
}
|
||||
|
||||
MTL_OFI_SET_RECV_BITS(match_bits, mask_bits, comm->c_contextid, src, tag);
|
||||
mtl_ofi_create_recv_tag_CQD(&match_bits, &mask_bits, comm->c_contextid,
|
||||
tag);
|
||||
}
|
||||
else {
|
||||
mtl_ofi_create_recv_tag(&match_bits, &mask_bits, comm->c_contextid, src,
|
||||
tag);
|
||||
/* src_addr is ignored when FI_DIRECTED_RECV is not used */
|
||||
}
|
||||
|
||||
/**
|
||||
* fi_trecvmsg with FI_PEEK:
|
||||
@ -829,7 +873,7 @@ ompi_mtl_ofi_improbe(struct mca_mtl_base_module_t *mtl,
|
||||
struct ompi_mtl_ofi_request_t *ofi_req;
|
||||
ompi_proc_t *ompi_proc = NULL;
|
||||
mca_mtl_ofi_endpoint_t *endpoint = NULL;
|
||||
fi_addr_t remote_proc = 0;
|
||||
fi_addr_t remote_proc = ompi_mtl_ofi.any_addr;
|
||||
uint64_t match_bits, mask_bits;
|
||||
ssize_t ret;
|
||||
struct fi_msg_tagged msg;
|
||||
@ -843,13 +887,22 @@ ompi_mtl_ofi_improbe(struct mca_mtl_base_module_t *mtl,
|
||||
/**
|
||||
* If the source is known, use its peer_fiaddr.
|
||||
*/
|
||||
if (MPI_ANY_SOURCE != src) {
|
||||
ompi_proc = ompi_comm_peer_lookup( comm, src );
|
||||
endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc);
|
||||
remote_proc = endpoint->peer_fiaddr;
|
||||
}
|
||||
|
||||
MTL_OFI_SET_RECV_BITS(match_bits, mask_bits, comm->c_contextid, src, tag);
|
||||
if (ompi_mtl_ofi.fi_cq_data) {
|
||||
if (MPI_ANY_SOURCE != src) {
|
||||
ompi_proc = ompi_comm_peer_lookup( comm, src );
|
||||
endpoint = ompi_mtl_ofi_get_endpoint(mtl, ompi_proc);
|
||||
remote_proc = endpoint->peer_fiaddr;
|
||||
}
|
||||
|
||||
mtl_ofi_create_recv_tag_CQD(&match_bits, &mask_bits, comm->c_contextid,
|
||||
tag);
|
||||
}
|
||||
else {
|
||||
/* src_addr is ignored when FI_DIRECTED_RECV is not used */
|
||||
mtl_ofi_create_recv_tag(&match_bits, &mask_bits, comm->c_contextid, src,
|
||||
tag);
|
||||
}
|
||||
|
||||
/**
|
||||
* fi_trecvmsg with FI_PEEK and FI_CLAIM:
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved
|
||||
*
|
||||
* Copyright (c) 2014-2017 Cisco Systems, Inc. All rights reserved
|
||||
* Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
|
||||
@ -31,6 +31,7 @@ static char *prov_exclude;
|
||||
static int control_progress;
|
||||
static int data_progress;
|
||||
static int av_type;
|
||||
static int ofi_tag_mode;
|
||||
|
||||
/*
|
||||
* Enumerators
|
||||
@ -68,6 +69,21 @@ mca_base_var_enum_value_t av_table_type[] = {
|
||||
{0, NULL}
|
||||
};
|
||||
|
||||
enum {
|
||||
MTL_OFI_TAG_AUTO=1,
|
||||
MTL_OFI_TAG_1,
|
||||
MTL_OFI_TAG_2,
|
||||
MTL_OFI_TAG_FULL,
|
||||
};
|
||||
|
||||
mca_base_var_enum_value_t ofi_tag_mode_type[] = {
|
||||
{MTL_OFI_TAG_AUTO, "auto"},
|
||||
{MTL_OFI_TAG_1, "ofi_tag_1"},
|
||||
{MTL_OFI_TAG_2, "ofi_tag_2"},
|
||||
{MTL_OFI_TAG_FULL, "ofi_tag_full"},
|
||||
{0, NULL}
|
||||
};
|
||||
|
||||
mca_mtl_ofi_component_t mca_mtl_ofi_component = {
|
||||
{
|
||||
|
||||
@ -136,7 +152,37 @@ ompi_mtl_ofi_component_register(void)
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&ompi_mtl_ofi.ofi_progress_event_count);
|
||||
|
||||
free(desc);
|
||||
free(desc);
|
||||
|
||||
ret = mca_base_var_enum_create ("ofi_tag_mode_type", ofi_tag_mode_type , &new_enum);
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
ofi_tag_mode = MTL_OFI_TAG_AUTO;
|
||||
asprintf(&desc, "Mode specifying how many bits to use for various MPI values in OFI/Libfabric"
|
||||
" communications. Some Libfabric provider network types can support most of Open MPI"
|
||||
" needs; others can only supply a limited number of bits, which then must be split"
|
||||
" across the MPI communicator ID, MPI source rank, and MPI tag. Three different"
|
||||
" splitting schemes are available: ofi_tag_full (%d bits for the communicator, %d bits"
|
||||
" for the source rank, and %d bits for the tag), ofi_tag_1 (%d bits for the communicator"
|
||||
", %d bits source rank, %d bits tag), ofi_tag_2 (%d bits for the communicator"
|
||||
", %d bits source rank, %d bits tag). By default, this MCA variable is set to \"auto\","
|
||||
" which will first try to use ofi_tag_full, and if that fails, fall back to ofi_tag_1.",
|
||||
MTL_OFI_CID_BIT_COUNT_DATA, 32, MTL_OFI_TAG_BIT_COUNT_DATA,
|
||||
MTL_OFI_CID_BIT_COUNT_1, MTL_OFI_SOURCE_BIT_COUNT_1, MTL_OFI_TAG_BIT_COUNT_1,
|
||||
MTL_OFI_CID_BIT_COUNT_2, MTL_OFI_SOURCE_BIT_COUNT_2, MTL_OFI_TAG_BIT_COUNT_2);
|
||||
|
||||
mca_base_component_var_register (&mca_mtl_ofi_component.super.mtl_version,
|
||||
"tag_mode",
|
||||
desc,
|
||||
MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
|
||||
OPAL_INFO_LVL_6,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&ofi_tag_mode);
|
||||
|
||||
free(desc);
|
||||
OBJ_RELEASE(new_enum);
|
||||
|
||||
ret = mca_base_var_enum_create ("control_prog_type", control_prog_type, &new_enum);
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
@ -304,13 +350,96 @@ select_ofi_provider(struct fi_info *providers)
|
||||
return prov;
|
||||
}
|
||||
|
||||
/* Check if FI_REMOTE_CQ_DATA is supported, if so send the source rank there
|
||||
* FI_DIRECTED_RECV is also needed so receives can discrimate the source
|
||||
*/
|
||||
static int
|
||||
ompi_mtl_ofi_check_fi_remote_cq_data(int fi_version,
|
||||
struct fi_info *hints,
|
||||
struct fi_info *provider,
|
||||
struct fi_info **prov_cq_data)
|
||||
{
|
||||
int ret;
|
||||
char *provider_name;
|
||||
struct fi_info *hints_dup;
|
||||
hints_dup = fi_dupinfo(hints);
|
||||
|
||||
provider_name = strdup(provider->fabric_attr->prov_name);
|
||||
hints_dup->fabric_attr->prov_name = provider_name;
|
||||
hints_dup->caps |= FI_TAGGED | FI_DIRECTED_RECV;
|
||||
/* Ask for the size that OMPI uses for the source rank number */
|
||||
hints_dup->domain_attr->cq_data_size = sizeof(int);
|
||||
ret = fi_getinfo(fi_version, NULL, NULL, 0ULL, hints_dup, prov_cq_data);
|
||||
|
||||
if ((0 != ret) && (-FI_ENODATA != ret)) {
|
||||
opal_show_help("help-mtl-ofi.txt", "OFI call fail", true,
|
||||
"fi_getinfo",
|
||||
ompi_process_info.nodename, __FILE__, __LINE__,
|
||||
fi_strerror(-ret), -ret);
|
||||
return ret;
|
||||
} else if (-FI_ENODATA == ret) {
|
||||
/* The provider does not support FI_REMOTE_CQ_DATA */
|
||||
prov_cq_data = NULL;
|
||||
}
|
||||
|
||||
fi_freeinfo(hints_dup);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
ompi_mtl_ofi_define_tag_mode(int ofi_tag_mode) {
|
||||
switch (ofi_tag_mode) {
|
||||
case MTL_OFI_TAG_1:
|
||||
ompi_mtl_ofi.base.mtl_max_contextid = (int)((1ULL << MTL_OFI_CID_BIT_COUNT_1 ) - 1);
|
||||
ompi_mtl_ofi.base.mtl_max_tag = (int)((1ULL << (MTL_OFI_TAG_BIT_COUNT_1 - 1)) - 1);
|
||||
|
||||
ompi_mtl_ofi.source_rank_tag_mask = MTL_OFI_SOURCE_TAG_MASK_1;
|
||||
ompi_mtl_ofi.num_bits_source_rank = MTL_OFI_SOURCE_BIT_COUNT_1;
|
||||
ompi_mtl_ofi.source_rank_mask = MTL_OFI_SOURCE_MASK_1;
|
||||
|
||||
ompi_mtl_ofi.mpi_tag_mask = MTL_OFI_TAG_MASK_1;
|
||||
ompi_mtl_ofi.num_bits_mpi_tag = MTL_OFI_TAG_BIT_COUNT_1;
|
||||
|
||||
ompi_mtl_ofi.sync_send = MTL_OFI_SYNC_SEND_1;
|
||||
ompi_mtl_ofi.sync_send_ack = MTL_OFI_SYNC_SEND_ACK_1;
|
||||
ompi_mtl_ofi.sync_proto_mask = MTL_OFI_PROTO_MASK_1;
|
||||
break;
|
||||
case MTL_OFI_TAG_2:
|
||||
ompi_mtl_ofi.base.mtl_max_contextid = (int)((1ULL << MTL_OFI_CID_BIT_COUNT_2 ) - 1);
|
||||
ompi_mtl_ofi.base.mtl_max_tag = (int)((1ULL << (MTL_OFI_TAG_BIT_COUNT_2 - 1)) - 1);
|
||||
|
||||
ompi_mtl_ofi.source_rank_tag_mask = MTL_OFI_SOURCE_TAG_MASK_2;
|
||||
ompi_mtl_ofi.num_bits_source_rank = MTL_OFI_SOURCE_BIT_COUNT_2;
|
||||
ompi_mtl_ofi.source_rank_mask = MTL_OFI_SOURCE_MASK_2;
|
||||
|
||||
ompi_mtl_ofi.mpi_tag_mask = MTL_OFI_TAG_MASK_2;
|
||||
ompi_mtl_ofi.num_bits_mpi_tag = MTL_OFI_TAG_BIT_COUNT_2;
|
||||
|
||||
ompi_mtl_ofi.sync_send = MTL_OFI_SYNC_SEND_2;
|
||||
ompi_mtl_ofi.sync_send_ack = MTL_OFI_SYNC_SEND_ACK_2;
|
||||
ompi_mtl_ofi.sync_proto_mask = MTL_OFI_PROTO_MASK_2;
|
||||
break;
|
||||
default: /* use FI_REMOTE_CQ_DATA */
|
||||
ompi_mtl_ofi.base.mtl_max_contextid = (int)((1ULL << MTL_OFI_CID_BIT_COUNT_DATA ) - 1);
|
||||
ompi_mtl_ofi.base.mtl_max_tag = (int)((1ULL << (MTL_OFI_TAG_BIT_COUNT_DATA - 1)) - 1);
|
||||
|
||||
ompi_mtl_ofi.mpi_tag_mask = MTL_OFI_TAG_MASK_DATA;
|
||||
|
||||
ompi_mtl_ofi.sync_send = MTL_OFI_SYNC_SEND_DATA;
|
||||
ompi_mtl_ofi.sync_send_ack = MTL_OFI_SYNC_SEND_ACK_DATA;
|
||||
ompi_mtl_ofi.sync_proto_mask = MTL_OFI_PROTO_MASK_DATA;
|
||||
}
|
||||
}
|
||||
|
||||
static mca_mtl_base_module_t*
|
||||
ompi_mtl_ofi_component_init(bool enable_progress_threads,
|
||||
bool enable_mpi_threads)
|
||||
{
|
||||
int ret, fi_version;
|
||||
struct fi_info *hints;
|
||||
struct fi_info *providers = NULL, *prov = NULL;
|
||||
struct fi_info *providers = NULL;
|
||||
struct fi_info *prov = NULL;
|
||||
struct fi_info *prov_cq_data = NULL;
|
||||
struct fi_cq_attr cq_attr = {0};
|
||||
struct fi_av_attr av_attr = {0};
|
||||
char ep_name[FI_NAME_MAX] = {0};
|
||||
@ -411,6 +540,39 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
|
||||
goto error;
|
||||
}
|
||||
|
||||
/**
|
||||
* Select the format of the OFI tag
|
||||
*/
|
||||
if ((MTL_OFI_TAG_AUTO == ofi_tag_mode) ||
|
||||
(MTL_OFI_TAG_FULL == ofi_tag_mode)) {
|
||||
ret = ompi_mtl_ofi_check_fi_remote_cq_data(fi_version,
|
||||
hints, prov,
|
||||
&prov_cq_data);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
goto error;
|
||||
} else if (NULL == prov_cq_data) {
|
||||
/* No support for FI_REMTOTE_CQ_DATA */
|
||||
fi_freeinfo(prov_cq_data);
|
||||
ompi_mtl_ofi.fi_cq_data = false;
|
||||
if (MTL_OFI_TAG_AUTO == ofi_tag_mode) {
|
||||
/* Fallback to MTL_OFI_TAG_1 */
|
||||
ompi_mtl_ofi_define_tag_mode(MTL_OFI_TAG_1);
|
||||
} else { /* MTL_OFI_TAG_FULL */
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: OFI provider %s does not support FI_REMOTE_CQ_DATA\n",
|
||||
__FILE__, __LINE__, prov->fabric_attr->prov_name);
|
||||
goto error;
|
||||
}
|
||||
} else {
|
||||
/* Use FI_REMTOTE_CQ_DATA */
|
||||
ompi_mtl_ofi.fi_cq_data = true;
|
||||
prov = prov_cq_data;
|
||||
ompi_mtl_ofi_define_tag_mode(MTL_OFI_TAG_FULL);
|
||||
}
|
||||
} else { /* MTL_OFI_TAG_1 or MTL_OFI_TAG_2 */
|
||||
ompi_mtl_ofi.fi_cq_data = false;
|
||||
ompi_mtl_ofi_define_tag_mode(ofi_tag_mode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Open fabric
|
||||
@ -503,7 +665,7 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads,
|
||||
* Allocate memory for storing the CQ events read in OFI progress.
|
||||
*/
|
||||
ompi_mtl_ofi.progress_entries = calloc(ompi_mtl_ofi.ofi_progress_event_count, sizeof(struct fi_cq_tagged_entry));
|
||||
if (OPAL_UNLIKELY(!ompi_mtl_ofi.progress_entries)) {
|
||||
if (NULL == ompi_mtl_ofi.progress_entries) {
|
||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||
"%s:%d: alloc of CQ event storage failed: %s\n",
|
||||
__FILE__, __LINE__, strerror(errno));
|
||||
@ -614,6 +776,9 @@ error:
|
||||
if (providers) {
|
||||
(void) fi_freeinfo(providers);
|
||||
}
|
||||
if (prov_cq_data) {
|
||||
(void) fi_freeinfo(prov_cq_data);
|
||||
}
|
||||
if (hints) {
|
||||
(void) fi_freeinfo(hints);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved
|
||||
*
|
||||
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -55,6 +55,21 @@ typedef struct mca_mtl_ofi_module_t {
|
||||
/** CQ event storage */
|
||||
struct fi_cq_tagged_entry *progress_entries;
|
||||
|
||||
/** Use FI_REMOTE_CQ_DATA*/
|
||||
bool fi_cq_data;
|
||||
|
||||
/** Info used to create the OFI tag **/
|
||||
unsigned long long source_rank_tag_mask;
|
||||
int num_bits_source_rank;
|
||||
unsigned long long source_rank_mask;
|
||||
unsigned long long mpi_tag_mask;
|
||||
int num_bits_mpi_tag;
|
||||
|
||||
/** Synchronous protocol tag bits */
|
||||
unsigned long long sync_send;
|
||||
unsigned long long sync_send_ack;
|
||||
unsigned long long sync_proto_mask;
|
||||
|
||||
} mca_mtl_ofi_module_t;
|
||||
|
||||
extern mca_mtl_ofi_module_t ompi_mtl_ofi;
|
||||
@ -64,75 +79,165 @@ typedef struct mca_mtl_ofi_component_t {
|
||||
mca_mtl_base_component_2_0_0_t super;
|
||||
} mca_mtl_ofi_component_t;
|
||||
|
||||
/*OFI TAG:
|
||||
* Define 3 different OFI tag distributions:
|
||||
* 1) Support FI_REMOTE_CQ_DATA: No need for source rank in the tag
|
||||
* 2) ofi_tag_1: fallback when no FI_REMOTE_CQ_DATA is supported
|
||||
* 3) ofi_tag_2: Alternative tag when no FI_REMOTE_CQ_DATA is supported
|
||||
* with more bits for the communicator ID.
|
||||
* More details of the tags are in the README file (mtl_ofi_tag_mode).
|
||||
*/
|
||||
|
||||
/* match/ignore bit manipulation
|
||||
*
|
||||
* 0 123 4567 01234567 01234567 01234567 01234567 01234567 01234567 01234567
|
||||
* | | | |
|
||||
* | | context id | source | message tag
|
||||
* ^| ^ | | |
|
||||
* | |
|
||||
* | +- protocol
|
||||
* +---- ACK flag
|
||||
/* Support FI_REMOTE_CQ_DATA, send the source rank in the CQ data (4 Bytes is the minimum)
|
||||
* 01234567 01234567 01234567 0123 4567 01234567 01234567 01234567 01234567
|
||||
* | |
|
||||
* context_id |prot| message tag
|
||||
*/
|
||||
#define MTL_OFI_PROTO_BIT_COUNT (4)
|
||||
|
||||
#define MTL_OFI_PROTOCOL_HEADER_MASK (0xF000000000000000ULL)
|
||||
#define MTL_OFI_PROTOCOL_MASK (0x7000000000000000ULL)
|
||||
#define MTL_OFI_CONTEXT_MASK (0x0FFF000000000000ULL)
|
||||
#define MTL_OFI_SOURCE_MASK (0x0000FFFF00000000ULL)
|
||||
#define MTL_OFI_TAG_MASK (0x00000000FFFFFFFFULL)
|
||||
#define MTL_OFI_CID_BIT_COUNT_DATA (28)
|
||||
#define MTL_OFI_TAG_MASK_DATA (0x00000000FFFFFFFFULL)
|
||||
#define MTL_OFI_TAG_BIT_COUNT_DATA (32)
|
||||
#define MTL_OFI_PROTO_MASK_DATA (0x0000000F00000000ULL)
|
||||
#define MTL_OFI_SYNC_SEND_DATA (0x0000000100000000ULL)
|
||||
#define MTL_OFI_SYNC_SEND_ACK_DATA (0x0000000900000000ULL)
|
||||
|
||||
#define MTL_OFI_SYNC_SEND (0x1000000000000000ULL)
|
||||
#define MTL_OFI_SYNC_SEND_ACK (0x9000000000000000ULL)
|
||||
/* Send tag with CQ_DATA */
|
||||
__opal_attribute_always_inline__ static inline uint64_t
|
||||
mtl_ofi_create_send_tag_CQD(int comm_id, int tag)
|
||||
{
|
||||
uint64_t match_bits = comm_id;
|
||||
match_bits = (match_bits << (MTL_OFI_TAG_BIT_COUNT_DATA
|
||||
+ MTL_OFI_PROTO_BIT_COUNT));
|
||||
match_bits |= (tag & MTL_OFI_TAG_MASK_DATA);
|
||||
return match_bits;
|
||||
}
|
||||
|
||||
/* send posting */
|
||||
#define MTL_OFI_SET_SEND_BITS(match_bits, contextid, source, tag, type) \
|
||||
{ \
|
||||
match_bits = contextid; \
|
||||
match_bits = (match_bits << 16); \
|
||||
match_bits |= (uint64_t)source; \
|
||||
match_bits = (match_bits << 32); \
|
||||
match_bits |= (MTL_OFI_TAG_MASK & tag) | type; \
|
||||
/* Receive tag with CQ_DATA */
|
||||
__opal_attribute_always_inline__ static inline void
|
||||
mtl_ofi_create_recv_tag_CQD(uint64_t *match_bits, uint64_t *mask_bits,
|
||||
int comm_id, int tag)
|
||||
{
|
||||
*mask_bits = ompi_mtl_ofi.sync_proto_mask;
|
||||
*match_bits = (uint64_t) comm_id;
|
||||
*match_bits = (*match_bits << (MTL_OFI_PROTO_BIT_COUNT
|
||||
+ MTL_OFI_TAG_BIT_COUNT_DATA));
|
||||
if (MPI_ANY_TAG == tag) {
|
||||
/* Special negative tags are used for collective operations.
|
||||
* MPI_ANY_TAG should not match these special tags.
|
||||
* See ompi/mca/coll/base/coll_tags.h
|
||||
*/
|
||||
*mask_bits |= (ompi_mtl_ofi.mpi_tag_mask>>1);
|
||||
} else {
|
||||
*match_bits |= (ompi_mtl_ofi.mpi_tag_mask & tag);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* ofi_tag_1: fallback when no FI_REMOTE_CQ_DATA is supported
|
||||
*
|
||||
* 01234567 0123 4567 01234567 0123 4567 01234567 01234567 01234567 01234567
|
||||
* | | |
|
||||
* Comm id | source |prot| message tag
|
||||
*/
|
||||
|
||||
#define MTL_OFI_CID_BIT_COUNT_1 (12)
|
||||
#define MTL_OFI_SOURCE_TAG_MASK_1 (0x000FFFF000000000ULL)
|
||||
#define MTL_OFI_SOURCE_BIT_COUNT_1 (16)
|
||||
#define MTL_OFI_SOURCE_MASK_1 (0x000000000000FFFFULL)
|
||||
#define MTL_OFI_TAG_MASK_1 (0x00000000FFFFFFFFULL)
|
||||
#define MTL_OFI_TAG_BIT_COUNT_1 (32)
|
||||
#define MTL_OFI_PROTO_MASK_1 (0x0000000F00000000ULL)
|
||||
#define MTL_OFI_SYNC_SEND_1 (0x0000000100000000ULL)
|
||||
#define MTL_OFI_SYNC_SEND_ACK_1 (0x0000000900000000ULL)
|
||||
|
||||
/*
|
||||
* ofi_tag_2: Alternative tag when no FI_REMOTE_CQ_DATA is supported
|
||||
*
|
||||
* 01234567 01234567 01234567 01234567 01234567 0123 4567 01234567 01234567
|
||||
* | | |
|
||||
* Comm id | source |prot| message tag
|
||||
*/
|
||||
|
||||
#define MTL_OFI_CID_BIT_COUNT_2 (24)
|
||||
#define MTL_OFI_SOURCE_TAG_MASK_2 (0x000000FFFF000000ULL)
|
||||
#define MTL_OFI_SOURCE_BIT_COUNT_2 (16)
|
||||
#define MTL_OFI_SOURCE_MASK_2 (0x000000000000FFFFULL)
|
||||
#define MTL_OFI_TAG_MASK_2 (0x00000000000FFFFFULL)
|
||||
#define MTL_OFI_TAG_BIT_COUNT_2 (20)
|
||||
#define MTL_OFI_PROTO_MASK_2 (0x0000000000F00000ULL)
|
||||
#define MTL_OFI_SYNC_SEND_2 (0x0000000000100000ULL)
|
||||
#define MTL_OFI_SYNC_SEND_ACK_2 (0x0000000000900000ULL)
|
||||
|
||||
/* Send tag */
|
||||
__opal_attribute_always_inline__ static inline uint64_t
|
||||
mtl_ofi_create_send_tag(int comm_id, int source, int tag)
|
||||
{
|
||||
uint64_t match_bits = comm_id;
|
||||
match_bits = (match_bits << ompi_mtl_ofi.num_bits_source_rank);
|
||||
match_bits |= (uint64_t)(source & ompi_mtl_ofi.source_rank_mask);
|
||||
match_bits = (match_bits << (ompi_mtl_ofi.num_bits_mpi_tag
|
||||
+ MTL_OFI_PROTO_BIT_COUNT));
|
||||
match_bits |= (tag & ompi_mtl_ofi.mpi_tag_mask);
|
||||
return match_bits;
|
||||
}
|
||||
|
||||
/* Receive tag*/
|
||||
__opal_attribute_always_inline__ static inline void
|
||||
mtl_ofi_create_recv_tag(uint64_t *match_bits, uint64_t *mask_bits,
|
||||
int comm_id, int source, int tag)
|
||||
{
|
||||
*mask_bits = ompi_mtl_ofi.sync_proto_mask;
|
||||
*match_bits = comm_id;
|
||||
*match_bits = (*match_bits << ompi_mtl_ofi.num_bits_source_rank);
|
||||
|
||||
if (MPI_ANY_SOURCE == source) {
|
||||
*match_bits = (*match_bits << (ompi_mtl_ofi.num_bits_mpi_tag
|
||||
+ MTL_OFI_PROTO_BIT_COUNT));
|
||||
*mask_bits |= ompi_mtl_ofi.source_rank_tag_mask;
|
||||
} else {
|
||||
*match_bits |= (uint64_t)(source & ompi_mtl_ofi.source_rank_mask);
|
||||
*match_bits = (*match_bits << (ompi_mtl_ofi.num_bits_mpi_tag
|
||||
+ MTL_OFI_PROTO_BIT_COUNT));
|
||||
}
|
||||
|
||||
/* receive posting */
|
||||
/* Special tags are used for collective operations.
|
||||
* MPI_ANY_TAG should not match these special tags.
|
||||
* See ompi/mca/coll/base/coll_tags.h
|
||||
*/
|
||||
#define MTL_OFI_SET_RECV_BITS(match_bits, mask_bits, contextid, source, tag) \
|
||||
{ \
|
||||
match_bits = 0; \
|
||||
mask_bits = MTL_OFI_PROTOCOL_MASK; \
|
||||
\
|
||||
match_bits = contextid; \
|
||||
match_bits = (match_bits << 16); \
|
||||
\
|
||||
if (MPI_ANY_SOURCE == source) { \
|
||||
match_bits = (match_bits << 32); \
|
||||
mask_bits |= MTL_OFI_SOURCE_MASK; \
|
||||
} else { \
|
||||
match_bits |= (uint64_t)source; \
|
||||
match_bits = (match_bits << 32); \
|
||||
} \
|
||||
\
|
||||
if (MPI_ANY_TAG == tag) { \
|
||||
mask_bits |= 0x000000007FFFFFFFULL; \
|
||||
} else { \
|
||||
match_bits |= (MTL_OFI_TAG_MASK & tag); \
|
||||
} \
|
||||
if (MPI_ANY_TAG == tag) {
|
||||
/* Special negative tags are used for collective operations.
|
||||
* MPI_ANY_TAG should not match these special tags.
|
||||
* See ompi/mca/coll/base/coll_tags.h
|
||||
*/
|
||||
*mask_bits |= (ompi_mtl_ofi.mpi_tag_mask>>1);
|
||||
} else {
|
||||
*match_bits |= (ompi_mtl_ofi.mpi_tag_mask & tag);
|
||||
}
|
||||
}
|
||||
|
||||
#define MTL_OFI_SET_SYNC_SEND(match_bits) \
|
||||
match_bits |= ompi_mtl_ofi.sync_send
|
||||
|
||||
#define MTL_OFI_IS_SYNC_SEND(match_bits) \
|
||||
(MTL_OFI_SYNC_SEND == (MTL_OFI_PROTOCOL_HEADER_MASK & match_bits))
|
||||
#define MTL_OFI_IS_SYNC_SEND_ACK(match_bits) \
|
||||
(MTL_OFI_SYNC_SEND_ACK == (MTL_OFI_PROTOCOL_HEADER_MASK & match_bits))
|
||||
(ompi_mtl_ofi.sync_send == (ompi_mtl_ofi.sync_proto_mask & match_bits))
|
||||
|
||||
#define MTL_OFI_IS_SYNC_SEND_ACK(match_bits) \
|
||||
(ompi_mtl_ofi.sync_send_ack == (ompi_mtl_ofi.sync_proto_mask & match_bits))
|
||||
|
||||
#define MTL_OFI_GET_TAG(match_bits) \
|
||||
((int)(match_bits & MTL_OFI_TAG_MASK))
|
||||
#define MTL_OFI_GET_SOURCE(match_bits) \
|
||||
((int)((match_bits & MTL_OFI_SOURCE_MASK) >> 32))
|
||||
((int)(match_bits & ompi_mtl_ofi.mpi_tag_mask))
|
||||
|
||||
__opal_attribute_always_inline__ static inline int
|
||||
mtl_ofi_get_source(struct fi_cq_tagged_entry *wc)
|
||||
{
|
||||
int src;
|
||||
if (ompi_mtl_ofi.fi_cq_data) {
|
||||
src = (int) wc->data;
|
||||
}
|
||||
else {
|
||||
src = (int)((wc->tag >> (MTL_OFI_PROTO_BIT_COUNT +
|
||||
ompi_mtl_ofi.num_bits_mpi_tag)) & ompi_mtl_ofi.source_rank_mask);
|
||||
}
|
||||
|
||||
return src;
|
||||
}
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* MTL_OFI_TYPES_H_HAS_BEEN_INCLUDED */
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user