1
1

Added MPI_THREAD_MULTIPLE support for btl/usnic.

Этот коммит содержится в:
Thananon Patinyasakdikul 2016-06-02 11:52:32 -07:00 коммит произвёл Thananon Patinyasakdikul
родитель 80e362de52
Коммит ee85204c12
8 изменённых файлов: 78 добавлений и 6 удалений

Просмотреть файл

@ -335,3 +335,40 @@ libfabric abstractions:
fi_fabric: corresponds to a VIC PF
fi_domain: corresponds to a VIC VF
fi_endpoint: resources inside the VIC VF (basically a QP)
======================================
MPI_THREAD_MULTIPLE support
In order to make usnic btl thread-safe, the mutex locks are issued
to protect the critical path. ie; libfabric routines, book keeping, etc.
The said lock is btl_usnic_lock. It is a RECURSIVE lock, meaning that
the same thread can take the lock again even if it already has the lock to
allow the callback function to post another segment right away if we know
that the current segment is completed inline. (So we can call send in send
without deadlocking)
These two functions taking care of hotel checkin/checkout and we
have to protect that part. So we take the mutex lock before we enter the
function.
- opal_btl_usnic_check_rts()
- opal_btl_usnic_handle_ack()
We also have to protect the call to libfabric routines
- opal_btl_usnic_endpoint_send_segment() (fi_send)
- opal_btl_usnic_recv_call() (fi_recvmsg)
have to be protected as well.
Also cclient connection checking (opal_btl_usnic_connectivity_ping) has to be
protected. This happens only in the beginning but cclient communicate with cagent
through opal_fd_read/write() and if two or more clients do opal_fd_write() at the
same time, the data might be corrupt.
With this concept, many functions in btl/usnic that make calls to the
listed functions are protected by OPAL_THREAD_LOCK macro which will only
be active if the user specify MPI_Init_thread() with MPI_THREAD_MULTIPLE
support.

Просмотреть файл

@ -228,7 +228,8 @@ int opal_btl_usnic_connectivity_ping(uint32_t src_ipv4_addr, int src_port,
uint32_t dest_netmask, int dest_port,
char *dest_nodename,
size_t max_msg_size)
{
{
OPAL_THREAD_LOCK(&btl_usnic_lock);
/* If connectivity checking is not enabled, do nothing */
if (!mca_btl_usnic_component.connectivity_enabled) {
return OPAL_SUCCESS;
@ -259,6 +260,7 @@ int opal_btl_usnic_connectivity_ping(uint32_t src_ipv4_addr, int src_port,
ABORT("usnic connectivity client IPC write failed");
/* Will not return */
}
OPAL_THREAD_UNLOCK(&btl_usnic_lock);
return OPAL_SUCCESS;
}

Просмотреть файл

@ -509,6 +509,7 @@ opal_btl_usnic_prepare_src(
size_t* size,
uint32_t flags)
{
OPAL_THREAD_LOCK(&btl_usnic_lock);
opal_btl_usnic_module_t *module = (opal_btl_usnic_module_t*) base_module;
opal_btl_usnic_send_frag_t *frag;
uint32_t payload_len;
@ -552,6 +553,7 @@ opal_btl_usnic_prepare_src(
#endif
#endif
OPAL_THREAD_UNLOCK(&btl_usnic_lock);
return &frag->sf_base.uf_base;
}

Просмотреть файл

@ -86,6 +86,9 @@
#define OPAL_BTL_USNIC_NUM_COMPLETIONS 500
/* MPI_THREAD_MULTIPLE_SUPPORT */
opal_recursive_mutex_t btl_usnic_lock;
/* RNG buffer definition */
opal_rng_buff_t opal_btl_usnic_rand_buff = {0};
@ -222,6 +225,8 @@ static int usnic_component_close(void)
opal_btl_usnic_cleanup_tests();
#endif
OBJ_DESTRUCT(&btl_usnic_lock);
return OPAL_SUCCESS;
}
@ -615,13 +620,22 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
*num_btl_modules = 0;
/* Currently refuse to run if MPI_THREAD_MULTIPLE is enabled */
/* MPI_THREAD_MULTIPLE is only supported in 2.0+ */
if (want_mpi_threads && !mca_btl_base_thread_multiple_override) {
opal_output_verbose(5, USNIC_OUT,
"btl:usnic: MPI_THREAD_MULTIPLE not supported; skipping this component");
return NULL;
if (OMPI_MAJOR_VERSION >= 2) {
opal_output_verbose(5, USNIC_OUT,
"btl:usnic: MPI_THREAD_MULTIPLE support is in testing phase.");
}
else {
opal_output_verbose(5, USNIC_OUT,
"btl:usnic: MPI_THREAD_MULTIPLE is not supported in version < 2.");
return NULL;
}
}
OBJ_CONSTRUCT(&btl_usnic_lock, opal_recursive_mutex_t);
/* We only want providers named "usnic that are of type EP_DGRAM */
fabric_attr.prov_name = "usnic";
ep_attr.type = FI_EP_DGRAM;
@ -1151,6 +1165,8 @@ static int usnic_handle_completion(
/* Make the completion be Valgrind-defined */
opal_memchecker_base_mem_defined(seg, sizeof(*seg));
OPAL_THREAD_LOCK(&btl_usnic_lock);
/* Handle work completions */
switch(seg->us_type) {
@ -1181,6 +1197,8 @@ static int usnic_handle_completion(
BTL_ERROR(("Unhandled completion segment type %d", seg->us_type));
break;
}
OPAL_THREAD_UNLOCK(&btl_usnic_lock);
return 1;
}

Просмотреть файл

@ -1086,6 +1086,7 @@ opal_btl_usnic_module_progress_sends(
/*
* Handle all the retransmits we can
*/
OPAL_THREAD_LOCK(&btl_usnic_lock);
if (OPAL_UNLIKELY(!opal_list_is_empty(&module->pending_resend_segs))) {
usnic_do_resends(module);
}
@ -1195,6 +1196,7 @@ opal_btl_usnic_module_progress_sends(
endpoint = next_endpoint;
}
OPAL_THREAD_UNLOCK(&btl_usnic_lock);
}
/*
@ -1229,6 +1231,7 @@ usnic_send(
opal_btl_usnic_module_t *module;
opal_btl_usnic_send_segment_t *sseg;
OPAL_THREAD_LOCK(&btl_usnic_lock);
endpoint = (opal_btl_usnic_endpoint_t *)base_endpoint;
module = (opal_btl_usnic_module_t *)base_module;
frag = (opal_btl_usnic_send_frag_t*) descriptor;
@ -1337,6 +1340,7 @@ usnic_send(
++module->stats.pml_module_sends;
OPAL_THREAD_UNLOCK(&btl_usnic_lock);
return rc;
}

Просмотреть файл

@ -53,6 +53,12 @@
BEGIN_C_DECLS
/*
* MPI_THREAD_MULTIPLE support
*/
extern opal_recursive_mutex_t btl_usnic_lock;
/*
* Forward declarations to avoid include loops
*/

Просмотреть файл

@ -340,8 +340,9 @@ void opal_btl_usnic_recv_call(opal_btl_usnic_module_t *module,
opal_output(0, " Received ACK for sequence number %" UDSEQ " from %s to %s\n",
bseg->us_btl_header->ack_seq, remote_ip, local_ip);
#endif
OPAL_THREAD_LOCK(&btl_usnic_lock);
opal_btl_usnic_handle_ack(endpoint, ack_seq);
OPAL_THREAD_UNLOCK(&btl_usnic_lock);
goto repost;
}

Просмотреть файл

@ -157,8 +157,10 @@ opal_btl_usnic_check_rx_seq(
#if MSGDEBUG1
opal_output(0, "Handle piggy-packed ACK seq %"UDSEQ"\n", seg->rs_base.us_btl_header->ack_seq);
#endif
OPAL_THREAD_LOCK(&btl_usnic_lock);
opal_btl_usnic_handle_ack(endpoint,
seg->rs_base.us_btl_header->ack_seq);
OPAL_THREAD_UNLOCK(&btl_usnic_lock);
}
/* Do we have room in the endpoint's receiver window?