mtl/ofi: Fix segfault when not using Thread-Grouping feature
For the non thread-grouping paths, only the first (0th) OFI context should be used for communication. Otherwise this would access a non existant array item and cause segfault. While at it, clarifiy some content regarding SEPs in README (Credit to Matias Cabral for README edits). Signed-off-by: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@intel.com>
Этот коммит содержится в:
родитель
8451cd70ac
Коммит
6edcc479c4
@ -72,7 +72,7 @@ by reducing the bits available for the communicator ID field in the OFI tag.
|
||||
|
||||
SCALABLE ENDPOINTS:
|
||||
-------------------
|
||||
OFI MTL supports OFI Scalable Endpoints feature as a means to improve
|
||||
OFI MTL supports OFI Scalable Endpoints (SEP) feature as a means to improve
|
||||
multi-threaded application throughput and message rate. Currently the feature
|
||||
is designed to utilize multiple TX/RX contexts exposed by the OFI provider in
|
||||
conjunction with a multi-communicator MPI application model. Therefore, new OFI
|
||||
@ -81,12 +81,13 @@ instead of creating them all at once during init time and this approach also
|
||||
favours only creating as many contexts as needed.
|
||||
|
||||
1. Multi-communicator model:
|
||||
With this approach, the application first duplicates the communicators it
|
||||
wants to use with MPI operations (ideally creating as many communicators as
|
||||
the number of threads it wants to use to call into MPI). The duplicated
|
||||
communicators are then used by the corresponding threads to perform MPI
|
||||
operations. A possible usage scenario could be in an MPI + OMP
|
||||
application as follows (example limited to 2 ranks):
|
||||
With this approach, the MPI application is requried to first duplicate
|
||||
the communicators it wants to use with MPI operations (ideally creating
|
||||
as many communicators as the number of threads it wants to use to call
|
||||
into MPI). The duplicated communicators are then used by the
|
||||
corresponding threads to perform MPI operations. A possible usage
|
||||
scenario could be in an MPI + OMP application as follows
|
||||
(example limited to 2 ranks):
|
||||
|
||||
MPI_Comm dup_comm[n];
|
||||
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
|
||||
@ -112,13 +113,17 @@ favours only creating as many contexts as needed.
|
||||
}
|
||||
|
||||
2. MCA variables:
|
||||
To utilize the feature, the following MCA variable needs to be set:
|
||||
To utilize the feature, the following MCA variables need to be set:
|
||||
mtl_ofi_enable_sep:
|
||||
This MCA variable needs to be set to enable the use of Scalable Endpoints
|
||||
This MCA variable needs to be set to enable the use of Scalable Endpoints (SEP)
|
||||
feature in the OFI MTL. The underlying provider is also checked to ensure the
|
||||
feature is supported. If the provider chosen does not support it, user needs
|
||||
to either set this variable to 0 or select different provider which supports
|
||||
to either set this variable to 0 or select a different provider which supports
|
||||
the feature.
|
||||
For single-threaded applications one OFI context is sufficient, so OFI SEPs
|
||||
may not add benefit.
|
||||
Note that mtl_ofi_thread_grouping (see below) needs to be enabled to use the
|
||||
different OFI SEP contexts. Otherwise, only one context (ctxt 0) will be used.
|
||||
|
||||
Default: 0
|
||||
|
||||
@ -126,7 +131,12 @@ To utilize the feature, the following MCA variable needs to be set:
|
||||
"-mca mtl_ofi_enable_sep 1"
|
||||
|
||||
mtl_ofi_thread_grouping:
|
||||
This MCA variable needs to be set to switch Thread Grouping feature on.
|
||||
Turn Thread Grouping feature on. This is needed to use the Multi-communicator
|
||||
model explained above. This means that the OFI MTL will use the communicator
|
||||
ID to decide the SEP contexts to be used by the thread. In this way, each
|
||||
thread will have direct access to different OFI resources. If disabled,
|
||||
only context 0 will be used.
|
||||
Requires mtl_ofi_enable_sep to be set to 1.
|
||||
|
||||
Default: 0
|
||||
|
||||
@ -139,11 +149,11 @@ To utilize the feature, the following MCA variable needs to be set:
|
||||
"-mca mtl_ofi_thread_grouping 1"
|
||||
|
||||
mtl_ofi_num_ctxts:
|
||||
MCA variable allows user to set the number of OFI contexts the applications
|
||||
expects to use. For multi-threaded applications using Thread Grouping
|
||||
feature, this number should be set to the number of user threads that will
|
||||
call into MPI. For single-threaded applications one OFI context is
|
||||
sufficient.
|
||||
This MCA variable allows user to set the number of OFI SEP contexts the
|
||||
application expects to use. For multi-threaded applications using Thread
|
||||
Grouping feature, this number should be set to the number of user threads
|
||||
that will call into MPI. This variable will only have effect if
|
||||
mtl_ofi_enable_sep is set to 1.
|
||||
|
||||
Default: 1
|
||||
|
||||
|
@ -325,10 +325,18 @@ ompi_mtl_ofi_isend_callback(struct fi_cq_tagged_entry *wc,
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
#define MTL_OFI_MAP_COMM_TO_CONTEXT(comm_id, ctxt_id) \
|
||||
do { \
|
||||
ctxt_id = ompi_mtl_ofi.comm_to_context[comm_id]; \
|
||||
} while (0);
|
||||
/* Return OFI context ID associated with the specific communicator */
|
||||
__opal_attribute_always_inline__ static inline int
|
||||
ompi_mtl_ofi_map_comm_to_ctxt(uint32_t comm_id)
|
||||
{
|
||||
/* For non-thread-grouping use case, only one context is used which is
|
||||
* associated to MPI_COMM_WORLD, so use that. */
|
||||
if (0 == ompi_mtl_ofi.thread_grouping) {
|
||||
comm_id = 0;
|
||||
}
|
||||
|
||||
return ompi_mtl_ofi.comm_to_context[comm_id];
|
||||
}
|
||||
|
||||
__opal_attribute_always_inline__ static inline int
|
||||
ompi_mtl_ofi_ssend_recv(ompi_mtl_ofi_request_t *ack_req,
|
||||
@ -342,7 +350,7 @@ ompi_mtl_ofi_ssend_recv(ompi_mtl_ofi_request_t *ack_req,
|
||||
ssize_t ret = OMPI_SUCCESS;
|
||||
int ctxt_id = 0;
|
||||
|
||||
MTL_OFI_MAP_COMM_TO_CONTEXT(comm->c_contextid, ctxt_id);
|
||||
ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid);
|
||||
set_thread_context(ctxt_id);
|
||||
|
||||
ack_req = malloc(sizeof(ompi_mtl_ofi_request_t));
|
||||
@ -397,7 +405,7 @@ ompi_mtl_ofi_send_generic(struct mca_mtl_base_module_t *mtl,
|
||||
fi_addr_t src_addr = 0;
|
||||
fi_addr_t sep_peer_fiaddr = 0;
|
||||
|
||||
MTL_OFI_MAP_COMM_TO_CONTEXT(comm->c_contextid, ctxt_id);
|
||||
ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid);
|
||||
set_thread_context(ctxt_id);
|
||||
|
||||
/**
|
||||
@ -532,7 +540,7 @@ ompi_mtl_ofi_isend_generic(struct mca_mtl_base_module_t *mtl,
|
||||
ompi_mtl_ofi_request_t *ack_req = NULL; /* For synchronous send */
|
||||
fi_addr_t sep_peer_fiaddr = 0;
|
||||
|
||||
MTL_OFI_MAP_COMM_TO_CONTEXT(comm->c_contextid, ctxt_id);
|
||||
ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid);
|
||||
set_thread_context(ctxt_id);
|
||||
|
||||
ofi_req->event_callback = ompi_mtl_ofi_isend_callback;
|
||||
@ -617,7 +625,7 @@ ompi_mtl_ofi_recv_callback(struct fi_cq_tagged_entry *wc,
|
||||
ompi_status_public_t *status = NULL;
|
||||
struct fi_msg_tagged tagged_msg;
|
||||
|
||||
MTL_OFI_MAP_COMM_TO_CONTEXT(ofi_req->comm->c_contextid, ctxt_id);
|
||||
ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(ofi_req->comm->c_contextid);
|
||||
|
||||
assert(ofi_req->super.ompi_req);
|
||||
status = &ofi_req->super.ompi_req->req_status;
|
||||
@ -758,7 +766,7 @@ ompi_mtl_ofi_irecv_generic(struct mca_mtl_base_module_t *mtl,
|
||||
size_t length;
|
||||
bool free_after;
|
||||
|
||||
MTL_OFI_MAP_COMM_TO_CONTEXT(comm->c_contextid, ctxt_id);
|
||||
ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid);
|
||||
set_thread_context(ctxt_id);
|
||||
|
||||
if (ofi_cq_data) {
|
||||
@ -884,7 +892,7 @@ ompi_mtl_ofi_imrecv(struct mca_mtl_base_module_t *mtl,
|
||||
uint64_t msgflags = FI_CLAIM | FI_COMPLETION;
|
||||
struct ompi_communicator_t *comm = (*message)->comm;
|
||||
|
||||
MTL_OFI_MAP_COMM_TO_CONTEXT(comm->c_contextid, ctxt_id);
|
||||
ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid);
|
||||
set_thread_context(ctxt_id);
|
||||
|
||||
ompi_ret = ompi_mtl_datatype_recv_buf(convertor,
|
||||
@ -977,7 +985,7 @@ ompi_mtl_ofi_iprobe_generic(struct mca_mtl_base_module_t *mtl,
|
||||
uint64_t msgflags = FI_PEEK | FI_COMPLETION;
|
||||
int ctxt_id = 0;
|
||||
|
||||
MTL_OFI_MAP_COMM_TO_CONTEXT(comm->c_contextid, ctxt_id);
|
||||
ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid);
|
||||
set_thread_context(ctxt_id);
|
||||
|
||||
if (ofi_cq_data) {
|
||||
@ -1066,7 +1074,7 @@ ompi_mtl_ofi_improbe_generic(struct mca_mtl_base_module_t *mtl,
|
||||
uint64_t msgflags = FI_PEEK | FI_CLAIM | FI_COMPLETION;
|
||||
int ctxt_id = 0;
|
||||
|
||||
MTL_OFI_MAP_COMM_TO_CONTEXT(comm->c_contextid, ctxt_id);
|
||||
ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid);
|
||||
set_thread_context(ctxt_id);
|
||||
|
||||
ofi_req = malloc(sizeof *ofi_req);
|
||||
@ -1168,7 +1176,7 @@ ompi_mtl_ofi_cancel(struct mca_mtl_base_module_t *mtl,
|
||||
int ret, ctxt_id = 0;
|
||||
ompi_mtl_ofi_request_t *ofi_req = (ompi_mtl_ofi_request_t*) mtl_request;
|
||||
|
||||
MTL_OFI_MAP_COMM_TO_CONTEXT(ofi_req->comm->c_contextid, ctxt_id);
|
||||
ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(ofi_req->comm->c_contextid);
|
||||
|
||||
switch (ofi_req->type) {
|
||||
case OMPI_MTL_OFI_SEND:
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user