* Fix race condition in post/wait/start/complete synchronization where one
epoch's control data could overwrite the previous epoch's data because we were reusing data structures between PW and SC. Instead, we now have explicit post_msg and complete_msg counters for completion. refs trac:354 * Only register the rdma osc callback once, as it turns out that some btls (MX) do somethng more than update a table during the register call, and each register call sucks up valuable fragments... This commit was SVN r11745. The following Trac tickets were found above: Ticket 354 --> https://svn.open-mpi.org/trac/ompi/ticket/354
Этот коммит содержится в:
родитель
443af2a259
Коммит
2ec0c4f593
@ -95,20 +95,26 @@ struct ompi_osc_pt2pt_module_t {
|
||||
|
||||
/** For MPI_Fence synchronization, the number of messages to send
|
||||
in epoch. For Start/Complete, the number of updates for this
|
||||
Complete. For Post/Wait (poorly named), the number of
|
||||
Complete counters we're waiting for. For lock, the number of
|
||||
Complete. For lock, the number of
|
||||
messages waiting for completion on on the origin side. Not
|
||||
protected by p2p_lock - must use atomic counter operations. */
|
||||
volatile int32_t p2p_num_pending_out;
|
||||
|
||||
/** For MPI_Fence synchronization, the number of expected incoming
|
||||
messages. For Start/Complete, the number of expected Post
|
||||
messages. For Post/Wait, the number of expected updates from
|
||||
complete. For lock, the number of messages on the passive side
|
||||
we are waiting for. Not protected by p2p_lock - must use
|
||||
atomic counter operations. */
|
||||
volatile int32_t p2p_num_pending_in;
|
||||
|
||||
/** Number of "ping" messages from the remote post group we've
|
||||
received */
|
||||
volatile int32_t p2p_num_post_msgs;
|
||||
|
||||
/** Number of "count" messages from the remote complete group
|
||||
we've received */
|
||||
volatile int32_t p2p_num_complete_msgs;
|
||||
|
||||
/** cyclic counter for a unique tag for long messages. Not
|
||||
protected by the p2p_lock - must use create_send_tag() to
|
||||
create a send tag */
|
||||
|
@ -279,6 +279,8 @@ ompi_osc_pt2pt_component_select(ompi_win_t *win,
|
||||
|
||||
module->p2p_num_pending_out = 0;
|
||||
module->p2p_num_pending_in = 0;
|
||||
module->p2p_num_post_msgs = 0;
|
||||
module->p2p_num_complete_msgs = 0;
|
||||
module->p2p_tag_counter = 0;
|
||||
|
||||
OBJ_CONSTRUCT(&(module->p2p_long_msgs), opal_list_t);
|
||||
@ -533,7 +535,7 @@ ompi_osc_pt2pt_component_fragment_cb(ompi_osc_pt2pt_module_t *module,
|
||||
|
||||
assert(module == ompi_osc_pt2pt_windx_to_module(header->hdr_windx));
|
||||
|
||||
OPAL_THREAD_ADD32(&(module->p2p_num_pending_in), -1);
|
||||
OPAL_THREAD_ADD32(&(module->p2p_num_post_msgs), -1);
|
||||
}
|
||||
break;
|
||||
case OMPI_OSC_PT2PT_HDR_COMPLETE:
|
||||
@ -552,7 +554,7 @@ ompi_osc_pt2pt_component_fragment_cb(ompi_osc_pt2pt_module_t *module,
|
||||
|
||||
/* we've heard from one more place, and have value reqs to
|
||||
process */
|
||||
OPAL_THREAD_ADD32(&(module->p2p_num_pending_out), -1);
|
||||
OPAL_THREAD_ADD32(&(module->p2p_num_complete_msgs), -1);
|
||||
OPAL_THREAD_ADD32(&(module->p2p_num_pending_in), header->hdr_value[0]);
|
||||
}
|
||||
break;
|
||||
|
@ -204,7 +204,7 @@ ompi_osc_pt2pt_module_start(ompi_group_t *group,
|
||||
|
||||
/* possible we've already received a couple in messages, so
|
||||
atomicall add however many we're going to wait for */
|
||||
OPAL_THREAD_ADD32(&(P2P_MODULE(win)->p2p_num_pending_in),
|
||||
OPAL_THREAD_ADD32(&(P2P_MODULE(win)->p2p_num_post_msgs),
|
||||
ompi_group_size(P2P_MODULE(win)->p2p_sc_group));
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
@ -220,7 +220,7 @@ ompi_osc_pt2pt_module_complete(ompi_win_t *win)
|
||||
opal_list_item_t *item;
|
||||
|
||||
/* wait for all the post messages */
|
||||
while (0 != P2P_MODULE(win)->p2p_num_pending_in) {
|
||||
while (0 != P2P_MODULE(win)->p2p_num_post_msgs) {
|
||||
ompi_osc_pt2pt_progress_long(P2P_MODULE(win));
|
||||
}
|
||||
|
||||
@ -318,7 +318,7 @@ ompi_osc_pt2pt_module_post(ompi_group_t *group,
|
||||
ompi_win_set_mode(win, OMPI_WIN_EXPOSE_EPOCH | OMPI_WIN_POSTED);
|
||||
|
||||
/* list how many complete counters we're still waiting on */
|
||||
OPAL_THREAD_ADD32(&(P2P_MODULE(win)->p2p_num_pending_out),
|
||||
OPAL_THREAD_ADD32(&(P2P_MODULE(win)->p2p_num_complete_msgs),
|
||||
ompi_group_size(P2P_MODULE(win)->p2p_pw_group));
|
||||
|
||||
/* send a hello counter to everyone in group */
|
||||
@ -338,7 +338,7 @@ ompi_osc_pt2pt_module_wait(ompi_win_t *win)
|
||||
ompi_group_t *group;
|
||||
|
||||
while (0 != (P2P_MODULE(win)->p2p_num_pending_in) ||
|
||||
0 != (P2P_MODULE(win)->p2p_num_pending_out)) {
|
||||
0 != (P2P_MODULE(win)->p2p_num_complete_msgs)) {
|
||||
ompi_osc_pt2pt_progress_long(P2P_MODULE(win));
|
||||
}
|
||||
|
||||
@ -364,10 +364,10 @@ ompi_osc_pt2pt_module_test(ompi_win_t *win,
|
||||
ompi_group_t *group;
|
||||
|
||||
if (0 != (P2P_MODULE(win)->p2p_num_pending_in) ||
|
||||
0 != (P2P_MODULE(win)->p2p_num_pending_out)) {
|
||||
0 != (P2P_MODULE(win)->p2p_num_complete_msgs)) {
|
||||
ompi_osc_pt2pt_progress_long(P2P_MODULE(win));
|
||||
if (0 != (P2P_MODULE(win)->p2p_num_pending_in) ||
|
||||
0 != (P2P_MODULE(win)->p2p_num_pending_out)) {
|
||||
0 != (P2P_MODULE(win)->p2p_num_complete_msgs)) {
|
||||
*flag = 0;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -79,20 +79,26 @@ struct ompi_osc_rdma_module_t {
|
||||
|
||||
/** For MPI_Fence synchronization, the number of messages to send
|
||||
in epoch. For Start/Complete, the number of updates for this
|
||||
Complete. For Post/Wait (poorly named), the number of
|
||||
Complete counters we're waiting for. For lock, the number of
|
||||
Complete. For lock, the number of
|
||||
messages waiting for completion on on the origin side. Not
|
||||
protected by p2p_lock - must use atomic counter operations. */
|
||||
volatile int32_t p2p_num_pending_out;
|
||||
|
||||
/** For MPI_Fence synchronization, the number of expected incoming
|
||||
messages. For Start/Complete, the number of expected Post
|
||||
messages. For Post/Wait, the number of expected updates from
|
||||
complete. For lock, the number of messages on the passive side
|
||||
we are waiting for. Not protected by p2p_lock - must use
|
||||
atomic counter operations. */
|
||||
volatile int32_t p2p_num_pending_in;
|
||||
|
||||
/** Number of "ping" messages from the remote post group we've
|
||||
received */
|
||||
volatile int32_t p2p_num_post_msgs;
|
||||
|
||||
/** Number of "count" messages from the remote complete group
|
||||
we've received */
|
||||
volatile int32_t p2p_num_complete_msgs;
|
||||
|
||||
/** cyclic counter for a unique tage for long messages. Not
|
||||
protected by the p2p_lock - must use create_send_tag() to
|
||||
create a send tag */
|
||||
|
@ -36,6 +36,7 @@
|
||||
#include "ompi/datatype/dt_arch.h"
|
||||
|
||||
static int ompi_osc_rdma_component_open(void);
|
||||
static int32_t registered_callback = 0;
|
||||
|
||||
ompi_osc_rdma_component_t mca_osc_rdma_component = {
|
||||
{ /* ompi_osc_base_component_t */
|
||||
@ -278,6 +279,8 @@ ompi_osc_rdma_component_select(ompi_win_t *win,
|
||||
|
||||
module->p2p_num_pending_out = 0;
|
||||
module->p2p_num_pending_in = 0;
|
||||
module->p2p_num_post_msgs = 0;
|
||||
module->p2p_num_complete_msgs = 0;
|
||||
module->p2p_tag_counter = 0;
|
||||
|
||||
OBJ_CONSTRUCT(&(module->p2p_long_msgs), opal_list_t);
|
||||
@ -376,10 +379,12 @@ ompi_osc_rdma_component_select(ompi_win_t *win,
|
||||
/* sync memory - make sure all initialization completed */
|
||||
opal_atomic_mb();
|
||||
|
||||
/* register to receive fragment callbacks */
|
||||
ret = mca_bml.bml_register(MCA_BTL_TAG_OSC_RDMA,
|
||||
ompi_osc_rdma_component_fragment_cb,
|
||||
NULL);
|
||||
/* register to receive fragment callbacks, if not already done */
|
||||
if (OPAL_THREAD_ADD32(®istered_callback, 1) <= 1) {
|
||||
ret = mca_bml.bml_register(MCA_BTL_TAG_OSC_RDMA,
|
||||
ompi_osc_rdma_component_fragment_cb,
|
||||
NULL);
|
||||
}
|
||||
|
||||
|
||||
if (module->p2p_eager_send) {
|
||||
@ -558,7 +563,7 @@ ompi_osc_rdma_component_fragment_cb(struct mca_btl_base_module_t *btl,
|
||||
module = ompi_osc_rdma_windx_to_module(header->hdr_windx);
|
||||
if (NULL == module) return;
|
||||
|
||||
OPAL_THREAD_ADD32(&(module->p2p_num_pending_in), -1);
|
||||
OPAL_THREAD_ADD32(&(module->p2p_num_post_msgs), -1);
|
||||
}
|
||||
break;
|
||||
case OMPI_OSC_RDMA_HDR_COMPLETE:
|
||||
@ -579,7 +584,7 @@ ompi_osc_rdma_component_fragment_cb(struct mca_btl_base_module_t *btl,
|
||||
|
||||
/* we've heard from one more place, and have value reqs to
|
||||
process */
|
||||
OPAL_THREAD_ADD32(&(module->p2p_num_pending_out), -1);
|
||||
OPAL_THREAD_ADD32(&(module->p2p_num_complete_msgs), -1);
|
||||
OPAL_THREAD_ADD32(&(module->p2p_num_pending_in), header->hdr_value[0]);
|
||||
}
|
||||
break;
|
||||
|
@ -239,7 +239,7 @@ ompi_osc_rdma_module_start(ompi_group_t *group,
|
||||
|
||||
/* possible we've already received a couple in messages, so
|
||||
atomicall add however many we're going to wait for */
|
||||
OPAL_THREAD_ADD32(&(P2P_MODULE(win)->p2p_num_pending_in),
|
||||
OPAL_THREAD_ADD32(&(P2P_MODULE(win)->p2p_num_post_msgs),
|
||||
ompi_group_size(P2P_MODULE(win)->p2p_sc_group));
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
@ -255,7 +255,7 @@ ompi_osc_rdma_module_complete(ompi_win_t *win)
|
||||
opal_list_item_t *item;
|
||||
|
||||
/* wait for all the post messages */
|
||||
while (0 != P2P_MODULE(win)->p2p_num_pending_in) {
|
||||
while (0 != P2P_MODULE(win)->p2p_num_post_msgs) {
|
||||
ompi_osc_rdma_progress(P2P_MODULE(win));
|
||||
}
|
||||
|
||||
@ -353,7 +353,7 @@ ompi_osc_rdma_module_post(ompi_group_t *group,
|
||||
ompi_win_set_mode(win, OMPI_WIN_EXPOSE_EPOCH | OMPI_WIN_POSTED);
|
||||
|
||||
/* list how many complete counters we're still waiting on */
|
||||
OPAL_THREAD_ADD32(&(P2P_MODULE(win)->p2p_num_pending_out),
|
||||
OPAL_THREAD_ADD32(&(P2P_MODULE(win)->p2p_num_complete_msgs),
|
||||
ompi_group_size(P2P_MODULE(win)->p2p_pw_group));
|
||||
|
||||
/* send a hello counter to everyone in group */
|
||||
@ -373,7 +373,7 @@ ompi_osc_rdma_module_wait(ompi_win_t *win)
|
||||
ompi_group_t *group;
|
||||
|
||||
while (0 != (P2P_MODULE(win)->p2p_num_pending_in) ||
|
||||
0 != (P2P_MODULE(win)->p2p_num_pending_out)) {
|
||||
0 != (P2P_MODULE(win)->p2p_num_complete_msgs)) {
|
||||
ompi_osc_rdma_progress(P2P_MODULE(win));
|
||||
}
|
||||
|
||||
@ -399,10 +399,10 @@ ompi_osc_rdma_module_test(ompi_win_t *win,
|
||||
ompi_group_t *group;
|
||||
|
||||
if (0 != (P2P_MODULE(win)->p2p_num_pending_in) ||
|
||||
0 != (P2P_MODULE(win)->p2p_num_pending_out)) {
|
||||
0 != (P2P_MODULE(win)->p2p_num_complete_msgs)) {
|
||||
ompi_osc_rdma_progress(P2P_MODULE(win));
|
||||
if (0 != (P2P_MODULE(win)->p2p_num_pending_in) ||
|
||||
0 != (P2P_MODULE(win)->p2p_num_pending_out)) {
|
||||
0 != (P2P_MODULE(win)->p2p_num_complete_msgs)) {
|
||||
*flag = 0;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user