More One-sided fixes:
* Fix a counter roll-over issue that could result from a large (but not excessive) number of outstanding put/get/accumulate calls during a single synchronization issues (Refs trac:506) * Fix epoch issue with rdma component that would effect PWSC synchronization (Refs trac:507) This commit was SVN r12673. The following Trac tickets were found above: Ticket 506 --> https://svn.open-mpi.org/trac/ompi/ticket/506 Ticket 507 --> https://svn.open-mpi.org/trac/ompi/ticket/507
Этот коммит содержится в:
родитель
59cfee0cd2
Коммит
0c25f7be09
@ -87,10 +87,10 @@ struct ompi_osc_pt2pt_module_t {
|
||||
started. p2p_lock must be held when modifying this field. */
|
||||
opal_list_t p2p_pending_sendreqs;
|
||||
|
||||
/** list of int16_t counters for the number of requests to a
|
||||
/** list of unsigned int counters for the number of requests to a
|
||||
particular rank in p2p_comm for this access epoc. p2p_lock
|
||||
must be held when modifying this field */
|
||||
short *p2p_num_pending_sendreqs;
|
||||
unsigned int *p2p_num_pending_sendreqs;
|
||||
|
||||
/** For MPI_Fence synchronization, the number of messages to send
|
||||
in epoch. For Start/Complete, the number of updates for this
|
||||
@ -124,15 +124,15 @@ struct ompi_osc_pt2pt_module_t {
|
||||
opal_list_t p2p_long_msgs;
|
||||
|
||||
opal_list_t p2p_copy_pending_sendreqs;
|
||||
short *p2p_copy_num_pending_sendreqs;
|
||||
unsigned int *p2p_copy_num_pending_sendreqs;
|
||||
|
||||
/* ********************* FENCE data ************************ */
|
||||
/* an array of <sizeof(p2p_comm)> ints, each containing the value
|
||||
1. */
|
||||
int *p2p_fence_coll_counts;
|
||||
/* an array of <sizeof(p2p_comm)> shorts, for use in experimenting
|
||||
/* an array of <sizeof(p2p_comm)> unsigned ints, for use in experimenting
|
||||
with different synchronization costs */
|
||||
short *p2p_fence_coll_results;
|
||||
unsigned int *p2p_fence_coll_results;
|
||||
|
||||
/* ********************* PWSC data ************************ */
|
||||
|
||||
|
@ -257,7 +257,7 @@ ompi_osc_pt2pt_component_select(ompi_win_t *win,
|
||||
OBJ_CONSTRUCT(&module->p2p_pending_control_sends, opal_list_t);
|
||||
|
||||
OBJ_CONSTRUCT(&module->p2p_pending_sendreqs, opal_list_t);
|
||||
module->p2p_num_pending_sendreqs = (short*)malloc(sizeof(short) *
|
||||
module->p2p_num_pending_sendreqs = (unsigned int*)malloc(sizeof(unsigned int) *
|
||||
ompi_comm_size(module->p2p_comm));
|
||||
if (NULL == module->p2p_num_pending_sendreqs) {
|
||||
OBJ_DESTRUCT(&module->p2p_pending_sendreqs);
|
||||
@ -268,7 +268,7 @@ ompi_osc_pt2pt_component_select(ompi_win_t *win,
|
||||
return ret;
|
||||
}
|
||||
memset(module->p2p_num_pending_sendreqs, 0,
|
||||
sizeof(short) * ompi_comm_size(module->p2p_comm));
|
||||
sizeof(unsigned int) * ompi_comm_size(module->p2p_comm));
|
||||
|
||||
module->p2p_num_pending_out = 0;
|
||||
module->p2p_num_pending_in = 0;
|
||||
@ -279,7 +279,7 @@ ompi_osc_pt2pt_component_select(ompi_win_t *win,
|
||||
OBJ_CONSTRUCT(&(module->p2p_long_msgs), opal_list_t);
|
||||
|
||||
OBJ_CONSTRUCT(&(module->p2p_copy_pending_sendreqs), opal_list_t);
|
||||
module->p2p_copy_num_pending_sendreqs = (short*)malloc(sizeof(short) *
|
||||
module->p2p_copy_num_pending_sendreqs = (unsigned int*)malloc(sizeof(unsigned int) *
|
||||
ompi_comm_size(module->p2p_comm));
|
||||
if (NULL == module->p2p_copy_num_pending_sendreqs) {
|
||||
OBJ_DESTRUCT(&module->p2p_copy_pending_sendreqs);
|
||||
@ -293,7 +293,7 @@ ompi_osc_pt2pt_component_select(ompi_win_t *win,
|
||||
return ret;
|
||||
}
|
||||
memset(module->p2p_num_pending_sendreqs, 0,
|
||||
sizeof(short) * ompi_comm_size(module->p2p_comm));
|
||||
sizeof(unsigned int) * ompi_comm_size(module->p2p_comm));
|
||||
|
||||
/* fence data */
|
||||
module->p2p_fence_coll_counts = (int*)malloc(sizeof(int) *
|
||||
@ -314,7 +314,7 @@ ompi_osc_pt2pt_component_select(ompi_win_t *win,
|
||||
module->p2p_fence_coll_counts[i] = 1;
|
||||
}
|
||||
|
||||
module->p2p_fence_coll_results = (short*)malloc(sizeof(short) *
|
||||
module->p2p_fence_coll_results = (unsigned int*)malloc(sizeof(unsigned short) *
|
||||
ompi_comm_size(module->p2p_comm));
|
||||
if (NULL == module->p2p_fence_coll_results) {
|
||||
free(module->p2p_fence_coll_counts);
|
||||
|
@ -264,7 +264,7 @@ ompi_osc_pt2pt_sendreq_send(ompi_osc_pt2pt_module_t *module,
|
||||
#endif
|
||||
|
||||
/* send fragment */
|
||||
opal_output_verbose(50, ompi_osc_base_output,
|
||||
opal_output_verbose(51, ompi_osc_base_output,
|
||||
"%d sending sendreq to %d",
|
||||
sendreq->req_module->p2p_comm->c_my_rank,
|
||||
sendreq->req_target_rank);
|
||||
|
@ -63,7 +63,7 @@ ompi_osc_pt2pt_progress_long(ompi_osc_pt2pt_module_t *module)
|
||||
static inline void
|
||||
ompi_osc_pt2pt_flip_sendreqs(ompi_osc_pt2pt_module_t *module)
|
||||
{
|
||||
short *tmp;
|
||||
unsigned int *tmp;
|
||||
|
||||
OPAL_THREAD_LOCK(&(module->p2p_lock));
|
||||
|
||||
@ -72,7 +72,7 @@ ompi_osc_pt2pt_flip_sendreqs(ompi_osc_pt2pt_module_t *module)
|
||||
module->p2p_num_pending_sendreqs;
|
||||
module->p2p_num_pending_sendreqs = tmp;
|
||||
memset(module->p2p_num_pending_sendreqs, 0,
|
||||
sizeof(short) * ompi_comm_size(module->p2p_comm));
|
||||
sizeof(unsigned int) * ompi_comm_size(module->p2p_comm));
|
||||
|
||||
/* Copy in all the pending requests */
|
||||
opal_list_join(&module->p2p_copy_pending_sendreqs,
|
||||
@ -86,7 +86,7 @@ ompi_osc_pt2pt_flip_sendreqs(ompi_osc_pt2pt_module_t *module)
|
||||
int
|
||||
ompi_osc_pt2pt_module_fence(int assert, ompi_win_t *win)
|
||||
{
|
||||
short incoming_reqs;
|
||||
unsigned int incoming_reqs;
|
||||
int ret = OMPI_SUCCESS, i;
|
||||
|
||||
if (0 != (assert & MPI_MODE_NOPRECEDE)) {
|
||||
@ -116,7 +116,7 @@ ompi_osc_pt2pt_module_fence(int assert, ompi_win_t *win)
|
||||
c_coll.coll_reduce_scatter(P2P_MODULE(win)->p2p_copy_num_pending_sendreqs,
|
||||
&incoming_reqs,
|
||||
P2P_MODULE(win)->p2p_fence_coll_counts,
|
||||
MPI_SHORT,
|
||||
MPI_UNSIGNED,
|
||||
MPI_SUM,
|
||||
P2P_MODULE(win)->p2p_comm);
|
||||
|
||||
@ -200,6 +200,9 @@ ompi_osc_pt2pt_module_start(ompi_group_t *group,
|
||||
P2P_MODULE(win)->p2p_sc_group = group;
|
||||
OPAL_THREAD_UNLOCK(&(P2P_MODULE(win)->p2p_lock));
|
||||
|
||||
memset(P2P_MODULE(win)->p2p_sc_remote_active_ranks, 0,
|
||||
sizeof(bool) * ompi_comm_size(P2P_MODULE(win)->p2p_comm));
|
||||
|
||||
/* for each process in the specified group, find it's rank in our
|
||||
communicator, store those indexes, and set the true / false in
|
||||
the active ranks table */
|
||||
@ -263,11 +266,12 @@ ompi_osc_pt2pt_module_complete(ompi_win_t *win)
|
||||
|
||||
OPAL_THREAD_ADD32(&(P2P_MODULE(win)->p2p_num_pending_out),
|
||||
P2P_MODULE(win)->p2p_copy_num_pending_sendreqs[comm_rank]);
|
||||
ompi_osc_pt2pt_control_send(P2P_MODULE(win),
|
||||
P2P_MODULE(win)->p2p_sc_group->grp_proc_pointers[i],
|
||||
OMPI_OSC_PT2PT_HDR_COMPLETE,
|
||||
P2P_MODULE(win)->p2p_copy_num_pending_sendreqs[comm_rank],
|
||||
0);
|
||||
ret = ompi_osc_pt2pt_control_send(P2P_MODULE(win),
|
||||
P2P_MODULE(win)->p2p_sc_group->grp_proc_pointers[i],
|
||||
OMPI_OSC_PT2PT_HDR_COMPLETE,
|
||||
P2P_MODULE(win)->p2p_copy_num_pending_sendreqs[comm_rank],
|
||||
0);
|
||||
assert(ret == OMPI_SUCCESS);
|
||||
}
|
||||
|
||||
/* try to start all the requests. We've copied everything we
|
||||
|
@ -72,10 +72,10 @@ struct ompi_osc_rdma_module_t {
|
||||
started. p2p_lock must be held when modifying this field. */
|
||||
opal_list_t p2p_pending_sendreqs;
|
||||
|
||||
/** list of int16_t counters for the number of requests to a
|
||||
/** list of unsigned int counters for the number of requests to a
|
||||
particular rank in p2p_comm for this access epoc. p2p_lock
|
||||
must be held when modifying this field */
|
||||
short *p2p_num_pending_sendreqs;
|
||||
unsigned int *p2p_num_pending_sendreqs;
|
||||
|
||||
/** For MPI_Fence synchronization, the number of messages to send
|
||||
in epoch. For Start/Complete, the number of updates for this
|
||||
@ -109,7 +109,7 @@ struct ompi_osc_rdma_module_t {
|
||||
opal_list_t p2p_long_msgs;
|
||||
|
||||
opal_list_t p2p_copy_pending_sendreqs;
|
||||
short *p2p_copy_num_pending_sendreqs;
|
||||
unsigned int *p2p_copy_num_pending_sendreqs;
|
||||
|
||||
bool p2p_eager_send;
|
||||
|
||||
@ -117,9 +117,9 @@ struct ompi_osc_rdma_module_t {
|
||||
/* an array of <sizeof(p2p_comm)> ints, each containing the value
|
||||
1. */
|
||||
int *p2p_fence_coll_counts;
|
||||
/* an array of <sizeof(p2p_comm)> shorts, for use in experimenting
|
||||
/* an array of <sizeof(p2p_comm)> unsigned ints, for use in experimenting
|
||||
with different synchronization costs */
|
||||
short *p2p_fence_coll_results;
|
||||
unsigned int *p2p_fence_coll_results;
|
||||
|
||||
mca_osc_fence_sync_t p2p_fence_sync_type;
|
||||
|
||||
|
@ -264,7 +264,7 @@ ompi_osc_rdma_component_select(ompi_win_t *win,
|
||||
}
|
||||
|
||||
OBJ_CONSTRUCT(&module->p2p_pending_sendreqs, opal_list_t);
|
||||
module->p2p_num_pending_sendreqs = (short*)malloc(sizeof(short) *
|
||||
module->p2p_num_pending_sendreqs = (unsigned int*)malloc(sizeof(unsigned int) *
|
||||
ompi_comm_size(module->p2p_comm));
|
||||
if (NULL == module->p2p_num_pending_sendreqs) {
|
||||
OBJ_DESTRUCT(&module->p2p_pending_sendreqs);
|
||||
@ -275,7 +275,7 @@ ompi_osc_rdma_component_select(ompi_win_t *win,
|
||||
return ret;
|
||||
}
|
||||
memset(module->p2p_num_pending_sendreqs, 0,
|
||||
sizeof(short) * ompi_comm_size(module->p2p_comm));
|
||||
sizeof(unsigned int) * ompi_comm_size(module->p2p_comm));
|
||||
|
||||
module->p2p_num_pending_out = 0;
|
||||
module->p2p_num_pending_in = 0;
|
||||
@ -286,7 +286,7 @@ ompi_osc_rdma_component_select(ompi_win_t *win,
|
||||
OBJ_CONSTRUCT(&(module->p2p_long_msgs), opal_list_t);
|
||||
|
||||
OBJ_CONSTRUCT(&(module->p2p_copy_pending_sendreqs), opal_list_t);
|
||||
module->p2p_copy_num_pending_sendreqs = (short*)malloc(sizeof(short) *
|
||||
module->p2p_copy_num_pending_sendreqs = (unsigned int*)malloc(sizeof(unsigned int) *
|
||||
ompi_comm_size(module->p2p_comm));
|
||||
if (NULL == module->p2p_copy_num_pending_sendreqs) {
|
||||
OBJ_DESTRUCT(&module->p2p_copy_pending_sendreqs);
|
||||
@ -300,7 +300,7 @@ ompi_osc_rdma_component_select(ompi_win_t *win,
|
||||
return ret;
|
||||
}
|
||||
memset(module->p2p_num_pending_sendreqs, 0,
|
||||
sizeof(short) * ompi_comm_size(module->p2p_comm));
|
||||
sizeof(unsigned int) * ompi_comm_size(module->p2p_comm));
|
||||
|
||||
module->p2p_eager_send = check_config_value_bool("eager_send", info);
|
||||
|
||||
@ -323,7 +323,7 @@ ompi_osc_rdma_component_select(ompi_win_t *win,
|
||||
module->p2p_fence_coll_counts[i] = 1;
|
||||
}
|
||||
|
||||
module->p2p_fence_coll_results = (short*)malloc(sizeof(short) *
|
||||
module->p2p_fence_coll_results = (unsigned int*)malloc(sizeof(unsigned int) *
|
||||
ompi_comm_size(module->p2p_comm));
|
||||
if (NULL == module->p2p_fence_coll_results) {
|
||||
free(module->p2p_fence_coll_counts);
|
||||
|
@ -92,6 +92,8 @@ ompi_osc_rdma_sendreq_send_cb(struct mca_btl_base_module_t* btl,
|
||||
(ompi_osc_rdma_sendreq_t*) descriptor->des_cbdata;
|
||||
ompi_osc_rdma_send_header_t *header =
|
||||
(ompi_osc_rdma_send_header_t*) descriptor->des_src[0].seg_addr.pval;
|
||||
opal_list_item_t *item;
|
||||
ompi_osc_rdma_module_t *module = sendreq->req_module;
|
||||
|
||||
if (OMPI_SUCCESS != status) {
|
||||
/* requeue and return */
|
||||
@ -151,7 +153,26 @@ ompi_osc_rdma_sendreq_send_cb(struct mca_btl_base_module_t* btl,
|
||||
btl->btl_free(btl, descriptor);
|
||||
|
||||
/* any other sendreqs to restart? */
|
||||
/* BWB - FIX ME - implement sending the next sendreq here */
|
||||
while (NULL !=
|
||||
(item = opal_list_remove_first(&(module->p2p_copy_pending_sendreqs)))) {
|
||||
ompi_osc_rdma_sendreq_t *req =
|
||||
(ompi_osc_rdma_sendreq_t*) item;
|
||||
int ret;
|
||||
|
||||
ret = ompi_osc_rdma_sendreq_send(module, req);
|
||||
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
opal_output_verbose(5, ompi_osc_base_output,
|
||||
"fence: failure in starting sendreq (%d). Will try later.",
|
||||
ret);
|
||||
opal_list_append(&(module->p2p_copy_pending_sendreqs), item);
|
||||
|
||||
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret ||
|
||||
OMPI_ERR_OUT_OF_RESOURCE == ret) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -64,7 +64,7 @@ ompi_osc_rdma_progress(ompi_osc_rdma_module_t *module)
|
||||
static inline void
|
||||
ompi_osc_rdma_flip_sendreqs(ompi_osc_rdma_module_t *module)
|
||||
{
|
||||
short *tmp;
|
||||
unsigned int *tmp;
|
||||
|
||||
OPAL_THREAD_LOCK(&(module->p2p_lock));
|
||||
|
||||
@ -73,7 +73,7 @@ ompi_osc_rdma_flip_sendreqs(ompi_osc_rdma_module_t *module)
|
||||
module->p2p_num_pending_sendreqs;
|
||||
module->p2p_num_pending_sendreqs = tmp;
|
||||
memset(module->p2p_num_pending_sendreqs, 0,
|
||||
sizeof(short) * ompi_comm_size(module->p2p_comm));
|
||||
sizeof(unsigned int) * ompi_comm_size(module->p2p_comm));
|
||||
|
||||
/* Copy in all the pending requests */
|
||||
opal_list_join(&module->p2p_copy_pending_sendreqs,
|
||||
@ -87,7 +87,7 @@ ompi_osc_rdma_flip_sendreqs(ompi_osc_rdma_module_t *module)
|
||||
int
|
||||
ompi_osc_rdma_module_fence(int assert, ompi_win_t *win)
|
||||
{
|
||||
short incoming_reqs;
|
||||
unsigned int incoming_reqs;
|
||||
int ret = OMPI_SUCCESS, i;
|
||||
|
||||
if (0 != (assert & MPI_MODE_NOPRECEDE)) {
|
||||
@ -120,7 +120,7 @@ ompi_osc_rdma_module_fence(int assert, ompi_win_t *win)
|
||||
c_coll.coll_reduce_scatter(P2P_MODULE(win)->p2p_copy_num_pending_sendreqs,
|
||||
&incoming_reqs,
|
||||
P2P_MODULE(win)->p2p_fence_coll_counts,
|
||||
MPI_SHORT,
|
||||
MPI_UNSIGNED,
|
||||
MPI_SUM,
|
||||
P2P_MODULE(win)->p2p_comm);
|
||||
break;
|
||||
@ -130,7 +130,7 @@ ompi_osc_rdma_module_fence(int assert, ompi_win_t *win)
|
||||
c_coll.coll_allreduce(P2P_MODULE(win)->p2p_copy_num_pending_sendreqs,
|
||||
P2P_MODULE(win)->p2p_fence_coll_results,
|
||||
ompi_comm_size(P2P_MODULE(win)->p2p_comm),
|
||||
MPI_SHORT,
|
||||
MPI_UNSIGNED,
|
||||
MPI_SUM,
|
||||
P2P_MODULE(win)->p2p_comm);
|
||||
incoming_reqs = P2P_MODULE(win)->
|
||||
@ -141,10 +141,10 @@ ompi_osc_rdma_module_fence(int assert, ompi_win_t *win)
|
||||
ret = P2P_MODULE(win)->p2p_comm->
|
||||
c_coll.coll_alltoall(P2P_MODULE(win)->p2p_copy_num_pending_sendreqs,
|
||||
1,
|
||||
MPI_SHORT,
|
||||
MPI_UNSIGNED,
|
||||
P2P_MODULE(win)->p2p_fence_coll_results,
|
||||
1,
|
||||
MPI_SHORT,
|
||||
MPI_UNSIGNED,
|
||||
P2P_MODULE(win)->p2p_comm);
|
||||
incoming_reqs = 0;
|
||||
for (i = 0 ; i < ompi_comm_size(P2P_MODULE(win)->p2p_comm) ; ++i) {
|
||||
@ -198,6 +198,11 @@ ompi_osc_rdma_module_fence(int assert, ompi_win_t *win)
|
||||
"fence: failure in starting sendreq (%d). Will try later.",
|
||||
ret);
|
||||
opal_list_append(&(P2P_MODULE(win)->p2p_copy_pending_sendreqs), item);
|
||||
|
||||
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret ||
|
||||
OMPI_ERR_OUT_OF_RESOURCE == ret) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -301,11 +306,12 @@ ompi_osc_rdma_module_complete(ompi_win_t *win)
|
||||
|
||||
OPAL_THREAD_ADD32(&(P2P_MODULE(win)->p2p_num_pending_out),
|
||||
P2P_MODULE(win)->p2p_copy_num_pending_sendreqs[comm_rank]);
|
||||
ompi_osc_rdma_control_send(P2P_MODULE(win),
|
||||
P2P_MODULE(win)->p2p_sc_group->grp_proc_pointers[i],
|
||||
OMPI_OSC_RDMA_HDR_COMPLETE,
|
||||
P2P_MODULE(win)->p2p_copy_num_pending_sendreqs[comm_rank],
|
||||
0);
|
||||
ret = ompi_osc_rdma_control_send(P2P_MODULE(win),
|
||||
P2P_MODULE(win)->p2p_sc_group->grp_proc_pointers[i],
|
||||
OMPI_OSC_RDMA_HDR_COMPLETE,
|
||||
P2P_MODULE(win)->p2p_copy_num_pending_sendreqs[comm_rank],
|
||||
0);
|
||||
assert(ret == OMPI_SUCCESS);
|
||||
}
|
||||
|
||||
/* try to start all the requests. We've copied everything we
|
||||
@ -327,6 +333,7 @@ ompi_osc_rdma_module_complete(ompi_win_t *win)
|
||||
}
|
||||
|
||||
/* wait for all the requests */
|
||||
ompi_osc_rdma_progress(P2P_MODULE(win));
|
||||
while (0 != P2P_MODULE(win)->p2p_num_pending_out) {
|
||||
ompi_osc_rdma_progress(P2P_MODULE(win));
|
||||
}
|
||||
@ -364,7 +371,7 @@ ompi_osc_rdma_module_post(ompi_group_t *group,
|
||||
|
||||
/* Set our mode to expose w/ post */
|
||||
ompi_win_remove_mode(win, OMPI_WIN_FENCE);
|
||||
ompi_win_set_mode(win, OMPI_WIN_EXPOSE_EPOCH | OMPI_WIN_POSTED);
|
||||
ompi_win_append_mode(win, OMPI_WIN_EXPOSE_EPOCH | OMPI_WIN_POSTED);
|
||||
|
||||
/* list how many complete counters we're still waiting on */
|
||||
OPAL_THREAD_ADD32(&(P2P_MODULE(win)->p2p_num_complete_msgs),
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user