Merge pull request #6361 from aravindksg/fix_tg_segfault
mtl/ofi: Fix segfault when not using Thread-Grouping feature
Этот коммит содержится в:
Коммит
7a593cea4a
@ -72,7 +72,7 @@ by reducing the bits available for the communicator ID field in the OFI tag.
|
|||||||
|
|
||||||
SCALABLE ENDPOINTS:
|
SCALABLE ENDPOINTS:
|
||||||
-------------------
|
-------------------
|
||||||
OFI MTL supports OFI Scalable Endpoints feature as a means to improve
|
OFI MTL supports OFI Scalable Endpoints (SEP) feature as a means to improve
|
||||||
multi-threaded application throughput and message rate. Currently the feature
|
multi-threaded application throughput and message rate. Currently the feature
|
||||||
is designed to utilize multiple TX/RX contexts exposed by the OFI provider in
|
is designed to utilize multiple TX/RX contexts exposed by the OFI provider in
|
||||||
conjunction with a multi-communicator MPI application model. Therefore, new OFI
|
conjunction with a multi-communicator MPI application model. Therefore, new OFI
|
||||||
@ -81,12 +81,13 @@ instead of creating them all at once during init time and this approach also
|
|||||||
favours only creating as many contexts as needed.
|
favours only creating as many contexts as needed.
|
||||||
|
|
||||||
1. Multi-communicator model:
|
1. Multi-communicator model:
|
||||||
With this approach, the application first duplicates the communicators it
|
With this approach, the MPI application is requried to first duplicate
|
||||||
wants to use with MPI operations (ideally creating as many communicators as
|
the communicators it wants to use with MPI operations (ideally creating
|
||||||
the number of threads it wants to use to call into MPI). The duplicated
|
as many communicators as the number of threads it wants to use to call
|
||||||
communicators are then used by the corresponding threads to perform MPI
|
into MPI). The duplicated communicators are then used by the
|
||||||
operations. A possible usage scenario could be in an MPI + OMP
|
corresponding threads to perform MPI operations. A possible usage
|
||||||
application as follows (example limited to 2 ranks):
|
scenario could be in an MPI + OMP application as follows
|
||||||
|
(example limited to 2 ranks):
|
||||||
|
|
||||||
MPI_Comm dup_comm[n];
|
MPI_Comm dup_comm[n];
|
||||||
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
|
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
|
||||||
@ -112,13 +113,17 @@ favours only creating as many contexts as needed.
|
|||||||
}
|
}
|
||||||
|
|
||||||
2. MCA variables:
|
2. MCA variables:
|
||||||
To utilize the feature, the following MCA variable needs to be set:
|
To utilize the feature, the following MCA variables need to be set:
|
||||||
mtl_ofi_enable_sep:
|
mtl_ofi_enable_sep:
|
||||||
This MCA variable needs to be set to enable the use of Scalable Endpoints
|
This MCA variable needs to be set to enable the use of Scalable Endpoints (SEP)
|
||||||
feature in the OFI MTL. The underlying provider is also checked to ensure the
|
feature in the OFI MTL. The underlying provider is also checked to ensure the
|
||||||
feature is supported. If the provider chosen does not support it, user needs
|
feature is supported. If the provider chosen does not support it, user needs
|
||||||
to either set this variable to 0 or select different provider which supports
|
to either set this variable to 0 or select a different provider which supports
|
||||||
the feature.
|
the feature.
|
||||||
|
For single-threaded applications one OFI context is sufficient, so OFI SEPs
|
||||||
|
may not add benefit.
|
||||||
|
Note that mtl_ofi_thread_grouping (see below) needs to be enabled to use the
|
||||||
|
different OFI SEP contexts. Otherwise, only one context (ctxt 0) will be used.
|
||||||
|
|
||||||
Default: 0
|
Default: 0
|
||||||
|
|
||||||
@ -126,7 +131,12 @@ To utilize the feature, the following MCA variable needs to be set:
|
|||||||
"-mca mtl_ofi_enable_sep 1"
|
"-mca mtl_ofi_enable_sep 1"
|
||||||
|
|
||||||
mtl_ofi_thread_grouping:
|
mtl_ofi_thread_grouping:
|
||||||
This MCA variable needs to be set to switch Thread Grouping feature on.
|
Turn Thread Grouping feature on. This is needed to use the Multi-communicator
|
||||||
|
model explained above. This means that the OFI MTL will use the communicator
|
||||||
|
ID to decide the SEP contexts to be used by the thread. In this way, each
|
||||||
|
thread will have direct access to different OFI resources. If disabled,
|
||||||
|
only context 0 will be used.
|
||||||
|
Requires mtl_ofi_enable_sep to be set to 1.
|
||||||
|
|
||||||
Default: 0
|
Default: 0
|
||||||
|
|
||||||
@ -139,11 +149,11 @@ To utilize the feature, the following MCA variable needs to be set:
|
|||||||
"-mca mtl_ofi_thread_grouping 1"
|
"-mca mtl_ofi_thread_grouping 1"
|
||||||
|
|
||||||
mtl_ofi_num_ctxts:
|
mtl_ofi_num_ctxts:
|
||||||
MCA variable allows user to set the number of OFI contexts the applications
|
This MCA variable allows user to set the number of OFI SEP contexts the
|
||||||
expects to use. For multi-threaded applications using Thread Grouping
|
application expects to use. For multi-threaded applications using Thread
|
||||||
feature, this number should be set to the number of user threads that will
|
Grouping feature, this number should be set to the number of user threads
|
||||||
call into MPI. For single-threaded applications one OFI context is
|
that will call into MPI. This variable will only have effect if
|
||||||
sufficient.
|
mtl_ofi_enable_sep is set to 1.
|
||||||
|
|
||||||
Default: 1
|
Default: 1
|
||||||
|
|
||||||
|
@ -325,10 +325,18 @@ ompi_mtl_ofi_isend_callback(struct fi_cq_tagged_entry *wc,
|
|||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define MTL_OFI_MAP_COMM_TO_CONTEXT(comm_id, ctxt_id) \
|
/* Return OFI context ID associated with the specific communicator */
|
||||||
do { \
|
__opal_attribute_always_inline__ static inline int
|
||||||
ctxt_id = ompi_mtl_ofi.comm_to_context[comm_id]; \
|
ompi_mtl_ofi_map_comm_to_ctxt(uint32_t comm_id)
|
||||||
} while (0);
|
{
|
||||||
|
/* For non-thread-grouping use case, only one context is used which is
|
||||||
|
* associated to MPI_COMM_WORLD, so use that. */
|
||||||
|
if (0 == ompi_mtl_ofi.thread_grouping) {
|
||||||
|
comm_id = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ompi_mtl_ofi.comm_to_context[comm_id];
|
||||||
|
}
|
||||||
|
|
||||||
__opal_attribute_always_inline__ static inline int
|
__opal_attribute_always_inline__ static inline int
|
||||||
ompi_mtl_ofi_ssend_recv(ompi_mtl_ofi_request_t *ack_req,
|
ompi_mtl_ofi_ssend_recv(ompi_mtl_ofi_request_t *ack_req,
|
||||||
@ -342,7 +350,7 @@ ompi_mtl_ofi_ssend_recv(ompi_mtl_ofi_request_t *ack_req,
|
|||||||
ssize_t ret = OMPI_SUCCESS;
|
ssize_t ret = OMPI_SUCCESS;
|
||||||
int ctxt_id = 0;
|
int ctxt_id = 0;
|
||||||
|
|
||||||
MTL_OFI_MAP_COMM_TO_CONTEXT(comm->c_contextid, ctxt_id);
|
ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid);
|
||||||
set_thread_context(ctxt_id);
|
set_thread_context(ctxt_id);
|
||||||
|
|
||||||
ack_req = malloc(sizeof(ompi_mtl_ofi_request_t));
|
ack_req = malloc(sizeof(ompi_mtl_ofi_request_t));
|
||||||
@ -397,7 +405,7 @@ ompi_mtl_ofi_send_generic(struct mca_mtl_base_module_t *mtl,
|
|||||||
fi_addr_t src_addr = 0;
|
fi_addr_t src_addr = 0;
|
||||||
fi_addr_t sep_peer_fiaddr = 0;
|
fi_addr_t sep_peer_fiaddr = 0;
|
||||||
|
|
||||||
MTL_OFI_MAP_COMM_TO_CONTEXT(comm->c_contextid, ctxt_id);
|
ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid);
|
||||||
set_thread_context(ctxt_id);
|
set_thread_context(ctxt_id);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -532,7 +540,7 @@ ompi_mtl_ofi_isend_generic(struct mca_mtl_base_module_t *mtl,
|
|||||||
ompi_mtl_ofi_request_t *ack_req = NULL; /* For synchronous send */
|
ompi_mtl_ofi_request_t *ack_req = NULL; /* For synchronous send */
|
||||||
fi_addr_t sep_peer_fiaddr = 0;
|
fi_addr_t sep_peer_fiaddr = 0;
|
||||||
|
|
||||||
MTL_OFI_MAP_COMM_TO_CONTEXT(comm->c_contextid, ctxt_id);
|
ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid);
|
||||||
set_thread_context(ctxt_id);
|
set_thread_context(ctxt_id);
|
||||||
|
|
||||||
ofi_req->event_callback = ompi_mtl_ofi_isend_callback;
|
ofi_req->event_callback = ompi_mtl_ofi_isend_callback;
|
||||||
@ -617,7 +625,7 @@ ompi_mtl_ofi_recv_callback(struct fi_cq_tagged_entry *wc,
|
|||||||
ompi_status_public_t *status = NULL;
|
ompi_status_public_t *status = NULL;
|
||||||
struct fi_msg_tagged tagged_msg;
|
struct fi_msg_tagged tagged_msg;
|
||||||
|
|
||||||
MTL_OFI_MAP_COMM_TO_CONTEXT(ofi_req->comm->c_contextid, ctxt_id);
|
ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(ofi_req->comm->c_contextid);
|
||||||
|
|
||||||
assert(ofi_req->super.ompi_req);
|
assert(ofi_req->super.ompi_req);
|
||||||
status = &ofi_req->super.ompi_req->req_status;
|
status = &ofi_req->super.ompi_req->req_status;
|
||||||
@ -758,7 +766,7 @@ ompi_mtl_ofi_irecv_generic(struct mca_mtl_base_module_t *mtl,
|
|||||||
size_t length;
|
size_t length;
|
||||||
bool free_after;
|
bool free_after;
|
||||||
|
|
||||||
MTL_OFI_MAP_COMM_TO_CONTEXT(comm->c_contextid, ctxt_id);
|
ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid);
|
||||||
set_thread_context(ctxt_id);
|
set_thread_context(ctxt_id);
|
||||||
|
|
||||||
if (ofi_cq_data) {
|
if (ofi_cq_data) {
|
||||||
@ -884,7 +892,7 @@ ompi_mtl_ofi_imrecv(struct mca_mtl_base_module_t *mtl,
|
|||||||
uint64_t msgflags = FI_CLAIM | FI_COMPLETION;
|
uint64_t msgflags = FI_CLAIM | FI_COMPLETION;
|
||||||
struct ompi_communicator_t *comm = (*message)->comm;
|
struct ompi_communicator_t *comm = (*message)->comm;
|
||||||
|
|
||||||
MTL_OFI_MAP_COMM_TO_CONTEXT(comm->c_contextid, ctxt_id);
|
ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid);
|
||||||
set_thread_context(ctxt_id);
|
set_thread_context(ctxt_id);
|
||||||
|
|
||||||
ompi_ret = ompi_mtl_datatype_recv_buf(convertor,
|
ompi_ret = ompi_mtl_datatype_recv_buf(convertor,
|
||||||
@ -977,7 +985,7 @@ ompi_mtl_ofi_iprobe_generic(struct mca_mtl_base_module_t *mtl,
|
|||||||
uint64_t msgflags = FI_PEEK | FI_COMPLETION;
|
uint64_t msgflags = FI_PEEK | FI_COMPLETION;
|
||||||
int ctxt_id = 0;
|
int ctxt_id = 0;
|
||||||
|
|
||||||
MTL_OFI_MAP_COMM_TO_CONTEXT(comm->c_contextid, ctxt_id);
|
ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid);
|
||||||
set_thread_context(ctxt_id);
|
set_thread_context(ctxt_id);
|
||||||
|
|
||||||
if (ofi_cq_data) {
|
if (ofi_cq_data) {
|
||||||
@ -1066,7 +1074,7 @@ ompi_mtl_ofi_improbe_generic(struct mca_mtl_base_module_t *mtl,
|
|||||||
uint64_t msgflags = FI_PEEK | FI_CLAIM | FI_COMPLETION;
|
uint64_t msgflags = FI_PEEK | FI_CLAIM | FI_COMPLETION;
|
||||||
int ctxt_id = 0;
|
int ctxt_id = 0;
|
||||||
|
|
||||||
MTL_OFI_MAP_COMM_TO_CONTEXT(comm->c_contextid, ctxt_id);
|
ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(comm->c_contextid);
|
||||||
set_thread_context(ctxt_id);
|
set_thread_context(ctxt_id);
|
||||||
|
|
||||||
ofi_req = malloc(sizeof *ofi_req);
|
ofi_req = malloc(sizeof *ofi_req);
|
||||||
@ -1168,7 +1176,7 @@ ompi_mtl_ofi_cancel(struct mca_mtl_base_module_t *mtl,
|
|||||||
int ret, ctxt_id = 0;
|
int ret, ctxt_id = 0;
|
||||||
ompi_mtl_ofi_request_t *ofi_req = (ompi_mtl_ofi_request_t*) mtl_request;
|
ompi_mtl_ofi_request_t *ofi_req = (ompi_mtl_ofi_request_t*) mtl_request;
|
||||||
|
|
||||||
MTL_OFI_MAP_COMM_TO_CONTEXT(ofi_req->comm->c_contextid, ctxt_id);
|
ctxt_id = ompi_mtl_ofi_map_comm_to_ctxt(ofi_req->comm->c_contextid);
|
||||||
|
|
||||||
switch (ofi_req->type) {
|
switch (ofi_req->type) {
|
||||||
case OMPI_MTL_OFI_SEND:
|
case OMPI_MTL_OFI_SEND:
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user