Merge pull request #1778 from thananon/usnic_thread_safe
Added MPI_THREAD_MULTIPLE support for btl/usnic.
Этот коммит содержится в:
Коммит
b7e937fea5
@ -335,3 +335,40 @@ libfabric abstractions:
|
||||
fi_fabric: corresponds to a VIC PF
|
||||
fi_domain: corresponds to a VIC VF
|
||||
fi_endpoint: resources inside the VIC VF (basically a QP)
|
||||
|
||||
======================================
|
||||
|
||||
MPI_THREAD_MULTIPLE support
|
||||
|
||||
In order to make usnic btl thread-safe, the mutex locks are issued
|
||||
to protect the critical path. ie; libfabric routines, book keeping, etc.
|
||||
|
||||
The said lock is btl_usnic_lock. It is a RECURSIVE lock, meaning that
|
||||
the same thread can take the lock again even if it already has the lock to
|
||||
allow the callback function to post another segment right away if we know
|
||||
that the current segment is completed inline. (So we can call send in send
|
||||
without deadlocking)
|
||||
|
||||
These two functions taking care of hotel checkin/checkout and we
|
||||
have to protect that part. So we take the mutex lock before we enter the
|
||||
function.
|
||||
|
||||
- opal_btl_usnic_check_rts()
|
||||
- opal_btl_usnic_handle_ack()
|
||||
|
||||
We also have to protect the call to libfabric routines
|
||||
|
||||
- opal_btl_usnic_endpoint_send_segment() (fi_send)
|
||||
- opal_btl_usnic_recv_call() (fi_recvmsg)
|
||||
|
||||
have to be protected as well.
|
||||
|
||||
Also cclient connection checking (opal_btl_usnic_connectivity_ping) has to be
|
||||
protected. This happens only in the beginning but cclient communicate with cagent
|
||||
through opal_fd_read/write() and if two or more clients do opal_fd_write() at the
|
||||
same time, the data might be corrupt.
|
||||
|
||||
With this concept, many functions in btl/usnic that make calls to the
|
||||
listed functions are protected by OPAL_THREAD_LOCK macro which will only
|
||||
be active if the user specify MPI_Init_thread() with MPI_THREAD_MULTIPLE
|
||||
support.
|
||||
|
@ -228,7 +228,8 @@ int opal_btl_usnic_connectivity_ping(uint32_t src_ipv4_addr, int src_port,
|
||||
uint32_t dest_netmask, int dest_port,
|
||||
char *dest_nodename,
|
||||
size_t max_msg_size)
|
||||
{
|
||||
{
|
||||
OPAL_THREAD_LOCK(&btl_usnic_lock);
|
||||
/* If connectivity checking is not enabled, do nothing */
|
||||
if (!mca_btl_usnic_component.connectivity_enabled) {
|
||||
return OPAL_SUCCESS;
|
||||
@ -259,6 +260,7 @@ int opal_btl_usnic_connectivity_ping(uint32_t src_ipv4_addr, int src_port,
|
||||
ABORT("usnic connectivity client IPC write failed");
|
||||
/* Will not return */
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&btl_usnic_lock);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
@ -509,6 +509,7 @@ opal_btl_usnic_prepare_src(
|
||||
size_t* size,
|
||||
uint32_t flags)
|
||||
{
|
||||
OPAL_THREAD_LOCK(&btl_usnic_lock);
|
||||
opal_btl_usnic_module_t *module = (opal_btl_usnic_module_t*) base_module;
|
||||
opal_btl_usnic_send_frag_t *frag;
|
||||
uint32_t payload_len;
|
||||
@ -552,6 +553,7 @@ opal_btl_usnic_prepare_src(
|
||||
#endif
|
||||
#endif
|
||||
|
||||
OPAL_THREAD_UNLOCK(&btl_usnic_lock);
|
||||
return &frag->sf_base.uf_base;
|
||||
}
|
||||
|
||||
|
@ -86,6 +86,9 @@
|
||||
|
||||
#define OPAL_BTL_USNIC_NUM_COMPLETIONS 500
|
||||
|
||||
/* MPI_THREAD_MULTIPLE_SUPPORT */
|
||||
opal_recursive_mutex_t btl_usnic_lock;
|
||||
|
||||
/* RNG buffer definition */
|
||||
opal_rng_buff_t opal_btl_usnic_rand_buff = {0};
|
||||
|
||||
@ -222,6 +225,8 @@ static int usnic_component_close(void)
|
||||
opal_btl_usnic_cleanup_tests();
|
||||
#endif
|
||||
|
||||
OBJ_DESTRUCT(&btl_usnic_lock);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
@ -615,13 +620,22 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
|
||||
|
||||
*num_btl_modules = 0;
|
||||
|
||||
/* Currently refuse to run if MPI_THREAD_MULTIPLE is enabled */
|
||||
/* MPI_THREAD_MULTIPLE is only supported in 2.0+ */
|
||||
if (want_mpi_threads && !mca_btl_base_thread_multiple_override) {
|
||||
opal_output_verbose(5, USNIC_OUT,
|
||||
"btl:usnic: MPI_THREAD_MULTIPLE not supported; skipping this component");
|
||||
return NULL;
|
||||
|
||||
if (OMPI_MAJOR_VERSION >= 2) {
|
||||
opal_output_verbose(5, USNIC_OUT,
|
||||
"btl:usnic: MPI_THREAD_MULTIPLE support is in testing phase.");
|
||||
}
|
||||
else {
|
||||
opal_output_verbose(5, USNIC_OUT,
|
||||
"btl:usnic: MPI_THREAD_MULTIPLE is not supported in version < 2.");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
OBJ_CONSTRUCT(&btl_usnic_lock, opal_recursive_mutex_t);
|
||||
|
||||
/* We only want providers named "usnic that are of type EP_DGRAM */
|
||||
fabric_attr.prov_name = "usnic";
|
||||
ep_attr.type = FI_EP_DGRAM;
|
||||
@ -1151,6 +1165,8 @@ static int usnic_handle_completion(
|
||||
/* Make the completion be Valgrind-defined */
|
||||
opal_memchecker_base_mem_defined(seg, sizeof(*seg));
|
||||
|
||||
OPAL_THREAD_LOCK(&btl_usnic_lock);
|
||||
|
||||
/* Handle work completions */
|
||||
switch(seg->us_type) {
|
||||
|
||||
@ -1181,6 +1197,8 @@ static int usnic_handle_completion(
|
||||
BTL_ERROR(("Unhandled completion segment type %d", seg->us_type));
|
||||
break;
|
||||
}
|
||||
|
||||
OPAL_THREAD_UNLOCK(&btl_usnic_lock);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -1086,6 +1086,7 @@ opal_btl_usnic_module_progress_sends(
|
||||
/*
|
||||
* Handle all the retransmits we can
|
||||
*/
|
||||
OPAL_THREAD_LOCK(&btl_usnic_lock);
|
||||
if (OPAL_UNLIKELY(!opal_list_is_empty(&module->pending_resend_segs))) {
|
||||
usnic_do_resends(module);
|
||||
}
|
||||
@ -1195,6 +1196,7 @@ opal_btl_usnic_module_progress_sends(
|
||||
|
||||
endpoint = next_endpoint;
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&btl_usnic_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1229,6 +1231,7 @@ usnic_send(
|
||||
opal_btl_usnic_module_t *module;
|
||||
opal_btl_usnic_send_segment_t *sseg;
|
||||
|
||||
OPAL_THREAD_LOCK(&btl_usnic_lock);
|
||||
endpoint = (opal_btl_usnic_endpoint_t *)base_endpoint;
|
||||
module = (opal_btl_usnic_module_t *)base_module;
|
||||
frag = (opal_btl_usnic_send_frag_t*) descriptor;
|
||||
@ -1337,6 +1340,7 @@ usnic_send(
|
||||
|
||||
++module->stats.pml_module_sends;
|
||||
|
||||
OPAL_THREAD_UNLOCK(&btl_usnic_lock);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -53,6 +53,12 @@
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/*
|
||||
* MPI_THREAD_MULTIPLE support
|
||||
*/
|
||||
extern opal_recursive_mutex_t btl_usnic_lock;
|
||||
|
||||
|
||||
/*
|
||||
* Forward declarations to avoid include loops
|
||||
*/
|
||||
|
@ -340,8 +340,9 @@ void opal_btl_usnic_recv_call(opal_btl_usnic_module_t *module,
|
||||
opal_output(0, " Received ACK for sequence number %" UDSEQ " from %s to %s\n",
|
||||
bseg->us_btl_header->ack_seq, remote_ip, local_ip);
|
||||
#endif
|
||||
OPAL_THREAD_LOCK(&btl_usnic_lock);
|
||||
opal_btl_usnic_handle_ack(endpoint, ack_seq);
|
||||
|
||||
OPAL_THREAD_UNLOCK(&btl_usnic_lock);
|
||||
goto repost;
|
||||
}
|
||||
|
||||
|
@ -157,8 +157,10 @@ opal_btl_usnic_check_rx_seq(
|
||||
#if MSGDEBUG1
|
||||
opal_output(0, "Handle piggy-packed ACK seq %"UDSEQ"\n", seg->rs_base.us_btl_header->ack_seq);
|
||||
#endif
|
||||
OPAL_THREAD_LOCK(&btl_usnic_lock);
|
||||
opal_btl_usnic_handle_ack(endpoint,
|
||||
seg->rs_base.us_btl_header->ack_seq);
|
||||
OPAL_THREAD_UNLOCK(&btl_usnic_lock);
|
||||
}
|
||||
|
||||
/* Do we have room in the endpoint's receiver window?
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user