Added MPI_THREAD_MULTIPLE support for btl/usnic.
Этот коммит содержится в:
родитель
80e362de52
Коммит
ee85204c12
@ -335,3 +335,40 @@ libfabric abstractions:
|
|||||||
fi_fabric: corresponds to a VIC PF
|
fi_fabric: corresponds to a VIC PF
|
||||||
fi_domain: corresponds to a VIC VF
|
fi_domain: corresponds to a VIC VF
|
||||||
fi_endpoint: resources inside the VIC VF (basically a QP)
|
fi_endpoint: resources inside the VIC VF (basically a QP)
|
||||||
|
|
||||||
|
======================================
|
||||||
|
|
||||||
|
MPI_THREAD_MULTIPLE support
|
||||||
|
|
||||||
|
In order to make usnic btl thread-safe, the mutex locks are issued
|
||||||
|
to protect the critical path. ie; libfabric routines, book keeping, etc.
|
||||||
|
|
||||||
|
The said lock is btl_usnic_lock. It is a RECURSIVE lock, meaning that
|
||||||
|
the same thread can take the lock again even if it already has the lock to
|
||||||
|
allow the callback function to post another segment right away if we know
|
||||||
|
that the current segment is completed inline. (So we can call send in send
|
||||||
|
without deadlocking)
|
||||||
|
|
||||||
|
These two functions taking care of hotel checkin/checkout and we
|
||||||
|
have to protect that part. So we take the mutex lock before we enter the
|
||||||
|
function.
|
||||||
|
|
||||||
|
- opal_btl_usnic_check_rts()
|
||||||
|
- opal_btl_usnic_handle_ack()
|
||||||
|
|
||||||
|
We also have to protect the call to libfabric routines
|
||||||
|
|
||||||
|
- opal_btl_usnic_endpoint_send_segment() (fi_send)
|
||||||
|
- opal_btl_usnic_recv_call() (fi_recvmsg)
|
||||||
|
|
||||||
|
have to be protected as well.
|
||||||
|
|
||||||
|
Also cclient connection checking (opal_btl_usnic_connectivity_ping) has to be
|
||||||
|
protected. This happens only in the beginning but cclient communicate with cagent
|
||||||
|
through opal_fd_read/write() and if two or more clients do opal_fd_write() at the
|
||||||
|
same time, the data might be corrupt.
|
||||||
|
|
||||||
|
With this concept, many functions in btl/usnic that make calls to the
|
||||||
|
listed functions are protected by OPAL_THREAD_LOCK macro which will only
|
||||||
|
be active if the user specify MPI_Init_thread() with MPI_THREAD_MULTIPLE
|
||||||
|
support.
|
||||||
|
@ -228,7 +228,8 @@ int opal_btl_usnic_connectivity_ping(uint32_t src_ipv4_addr, int src_port,
|
|||||||
uint32_t dest_netmask, int dest_port,
|
uint32_t dest_netmask, int dest_port,
|
||||||
char *dest_nodename,
|
char *dest_nodename,
|
||||||
size_t max_msg_size)
|
size_t max_msg_size)
|
||||||
{
|
{
|
||||||
|
OPAL_THREAD_LOCK(&btl_usnic_lock);
|
||||||
/* If connectivity checking is not enabled, do nothing */
|
/* If connectivity checking is not enabled, do nothing */
|
||||||
if (!mca_btl_usnic_component.connectivity_enabled) {
|
if (!mca_btl_usnic_component.connectivity_enabled) {
|
||||||
return OPAL_SUCCESS;
|
return OPAL_SUCCESS;
|
||||||
@ -259,6 +260,7 @@ int opal_btl_usnic_connectivity_ping(uint32_t src_ipv4_addr, int src_port,
|
|||||||
ABORT("usnic connectivity client IPC write failed");
|
ABORT("usnic connectivity client IPC write failed");
|
||||||
/* Will not return */
|
/* Will not return */
|
||||||
}
|
}
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_usnic_lock);
|
||||||
|
|
||||||
return OPAL_SUCCESS;
|
return OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -509,6 +509,7 @@ opal_btl_usnic_prepare_src(
|
|||||||
size_t* size,
|
size_t* size,
|
||||||
uint32_t flags)
|
uint32_t flags)
|
||||||
{
|
{
|
||||||
|
OPAL_THREAD_LOCK(&btl_usnic_lock);
|
||||||
opal_btl_usnic_module_t *module = (opal_btl_usnic_module_t*) base_module;
|
opal_btl_usnic_module_t *module = (opal_btl_usnic_module_t*) base_module;
|
||||||
opal_btl_usnic_send_frag_t *frag;
|
opal_btl_usnic_send_frag_t *frag;
|
||||||
uint32_t payload_len;
|
uint32_t payload_len;
|
||||||
@ -552,6 +553,7 @@ opal_btl_usnic_prepare_src(
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_usnic_lock);
|
||||||
return &frag->sf_base.uf_base;
|
return &frag->sf_base.uf_base;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -86,6 +86,9 @@
|
|||||||
|
|
||||||
#define OPAL_BTL_USNIC_NUM_COMPLETIONS 500
|
#define OPAL_BTL_USNIC_NUM_COMPLETIONS 500
|
||||||
|
|
||||||
|
/* MPI_THREAD_MULTIPLE_SUPPORT */
|
||||||
|
opal_recursive_mutex_t btl_usnic_lock;
|
||||||
|
|
||||||
/* RNG buffer definition */
|
/* RNG buffer definition */
|
||||||
opal_rng_buff_t opal_btl_usnic_rand_buff = {0};
|
opal_rng_buff_t opal_btl_usnic_rand_buff = {0};
|
||||||
|
|
||||||
@ -222,6 +225,8 @@ static int usnic_component_close(void)
|
|||||||
opal_btl_usnic_cleanup_tests();
|
opal_btl_usnic_cleanup_tests();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
OBJ_DESTRUCT(&btl_usnic_lock);
|
||||||
|
|
||||||
return OPAL_SUCCESS;
|
return OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -615,13 +620,22 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
|
|||||||
|
|
||||||
*num_btl_modules = 0;
|
*num_btl_modules = 0;
|
||||||
|
|
||||||
/* Currently refuse to run if MPI_THREAD_MULTIPLE is enabled */
|
/* MPI_THREAD_MULTIPLE is only supported in 2.0+ */
|
||||||
if (want_mpi_threads && !mca_btl_base_thread_multiple_override) {
|
if (want_mpi_threads && !mca_btl_base_thread_multiple_override) {
|
||||||
opal_output_verbose(5, USNIC_OUT,
|
|
||||||
"btl:usnic: MPI_THREAD_MULTIPLE not supported; skipping this component");
|
if (OMPI_MAJOR_VERSION >= 2) {
|
||||||
return NULL;
|
opal_output_verbose(5, USNIC_OUT,
|
||||||
|
"btl:usnic: MPI_THREAD_MULTIPLE support is in testing phase.");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
opal_output_verbose(5, USNIC_OUT,
|
||||||
|
"btl:usnic: MPI_THREAD_MULTIPLE is not supported in version < 2.");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
OBJ_CONSTRUCT(&btl_usnic_lock, opal_recursive_mutex_t);
|
||||||
|
|
||||||
/* We only want providers named "usnic that are of type EP_DGRAM */
|
/* We only want providers named "usnic that are of type EP_DGRAM */
|
||||||
fabric_attr.prov_name = "usnic";
|
fabric_attr.prov_name = "usnic";
|
||||||
ep_attr.type = FI_EP_DGRAM;
|
ep_attr.type = FI_EP_DGRAM;
|
||||||
@ -1151,6 +1165,8 @@ static int usnic_handle_completion(
|
|||||||
/* Make the completion be Valgrind-defined */
|
/* Make the completion be Valgrind-defined */
|
||||||
opal_memchecker_base_mem_defined(seg, sizeof(*seg));
|
opal_memchecker_base_mem_defined(seg, sizeof(*seg));
|
||||||
|
|
||||||
|
OPAL_THREAD_LOCK(&btl_usnic_lock);
|
||||||
|
|
||||||
/* Handle work completions */
|
/* Handle work completions */
|
||||||
switch(seg->us_type) {
|
switch(seg->us_type) {
|
||||||
|
|
||||||
@ -1181,6 +1197,8 @@ static int usnic_handle_completion(
|
|||||||
BTL_ERROR(("Unhandled completion segment type %d", seg->us_type));
|
BTL_ERROR(("Unhandled completion segment type %d", seg->us_type));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_usnic_lock);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1086,6 +1086,7 @@ opal_btl_usnic_module_progress_sends(
|
|||||||
/*
|
/*
|
||||||
* Handle all the retransmits we can
|
* Handle all the retransmits we can
|
||||||
*/
|
*/
|
||||||
|
OPAL_THREAD_LOCK(&btl_usnic_lock);
|
||||||
if (OPAL_UNLIKELY(!opal_list_is_empty(&module->pending_resend_segs))) {
|
if (OPAL_UNLIKELY(!opal_list_is_empty(&module->pending_resend_segs))) {
|
||||||
usnic_do_resends(module);
|
usnic_do_resends(module);
|
||||||
}
|
}
|
||||||
@ -1195,6 +1196,7 @@ opal_btl_usnic_module_progress_sends(
|
|||||||
|
|
||||||
endpoint = next_endpoint;
|
endpoint = next_endpoint;
|
||||||
}
|
}
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_usnic_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1229,6 +1231,7 @@ usnic_send(
|
|||||||
opal_btl_usnic_module_t *module;
|
opal_btl_usnic_module_t *module;
|
||||||
opal_btl_usnic_send_segment_t *sseg;
|
opal_btl_usnic_send_segment_t *sseg;
|
||||||
|
|
||||||
|
OPAL_THREAD_LOCK(&btl_usnic_lock);
|
||||||
endpoint = (opal_btl_usnic_endpoint_t *)base_endpoint;
|
endpoint = (opal_btl_usnic_endpoint_t *)base_endpoint;
|
||||||
module = (opal_btl_usnic_module_t *)base_module;
|
module = (opal_btl_usnic_module_t *)base_module;
|
||||||
frag = (opal_btl_usnic_send_frag_t*) descriptor;
|
frag = (opal_btl_usnic_send_frag_t*) descriptor;
|
||||||
@ -1337,6 +1340,7 @@ usnic_send(
|
|||||||
|
|
||||||
++module->stats.pml_module_sends;
|
++module->stats.pml_module_sends;
|
||||||
|
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_usnic_lock);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -53,6 +53,12 @@
|
|||||||
|
|
||||||
BEGIN_C_DECLS
|
BEGIN_C_DECLS
|
||||||
|
|
||||||
|
/*
|
||||||
|
* MPI_THREAD_MULTIPLE support
|
||||||
|
*/
|
||||||
|
extern opal_recursive_mutex_t btl_usnic_lock;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Forward declarations to avoid include loops
|
* Forward declarations to avoid include loops
|
||||||
*/
|
*/
|
||||||
|
@ -340,8 +340,9 @@ void opal_btl_usnic_recv_call(opal_btl_usnic_module_t *module,
|
|||||||
opal_output(0, " Received ACK for sequence number %" UDSEQ " from %s to %s\n",
|
opal_output(0, " Received ACK for sequence number %" UDSEQ " from %s to %s\n",
|
||||||
bseg->us_btl_header->ack_seq, remote_ip, local_ip);
|
bseg->us_btl_header->ack_seq, remote_ip, local_ip);
|
||||||
#endif
|
#endif
|
||||||
|
OPAL_THREAD_LOCK(&btl_usnic_lock);
|
||||||
opal_btl_usnic_handle_ack(endpoint, ack_seq);
|
opal_btl_usnic_handle_ack(endpoint, ack_seq);
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_usnic_lock);
|
||||||
goto repost;
|
goto repost;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -157,8 +157,10 @@ opal_btl_usnic_check_rx_seq(
|
|||||||
#if MSGDEBUG1
|
#if MSGDEBUG1
|
||||||
opal_output(0, "Handle piggy-packed ACK seq %"UDSEQ"\n", seg->rs_base.us_btl_header->ack_seq);
|
opal_output(0, "Handle piggy-packed ACK seq %"UDSEQ"\n", seg->rs_base.us_btl_header->ack_seq);
|
||||||
#endif
|
#endif
|
||||||
|
OPAL_THREAD_LOCK(&btl_usnic_lock);
|
||||||
opal_btl_usnic_handle_ack(endpoint,
|
opal_btl_usnic_handle_ack(endpoint,
|
||||||
seg->rs_base.us_btl_header->ack_seq);
|
seg->rs_base.us_btl_header->ack_seq);
|
||||||
|
OPAL_THREAD_UNLOCK(&btl_usnic_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Do we have room in the endpoint's receiver window?
|
/* Do we have room in the endpoint's receiver window?
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user