1
1
* Fix a counter roll-over issue that could result from a large (but
    not excessive) number of outstanding put/get/accumulate calls
    during a single synchronization issues (Refs trac:506)
  * Fix epoch issue with rdma component that would effect PWSC
    synchronization (Refs trac:507)

This commit was SVN r12673.

The following Trac tickets were found above:
  Ticket 506 --> https://svn.open-mpi.org/trac/ompi/ticket/506
  Ticket 507 --> https://svn.open-mpi.org/trac/ompi/ticket/507
Этот коммит содержится в:
Brian Barrett 2006-11-27 21:41:29 +00:00
родитель 59cfee0cd2
Коммит 0c25f7be09
8 изменённых файлов: 76 добавлений и 44 удалений

Просмотреть файл

@ -87,10 +87,10 @@ struct ompi_osc_pt2pt_module_t {
started. p2p_lock must be held when modifying this field. */
opal_list_t p2p_pending_sendreqs;
/** list of int16_t counters for the number of requests to a
/** list of unsigned int counters for the number of requests to a
particular rank in p2p_comm for this access epoc. p2p_lock
must be held when modifying this field */
short *p2p_num_pending_sendreqs;
unsigned int *p2p_num_pending_sendreqs;
/** For MPI_Fence synchronization, the number of messages to send
in epoch. For Start/Complete, the number of updates for this
@ -124,15 +124,15 @@ struct ompi_osc_pt2pt_module_t {
opal_list_t p2p_long_msgs;
opal_list_t p2p_copy_pending_sendreqs;
short *p2p_copy_num_pending_sendreqs;
unsigned int *p2p_copy_num_pending_sendreqs;
/* ********************* FENCE data ************************ */
/* an array of <sizeof(p2p_comm)> ints, each containing the value
1. */
int *p2p_fence_coll_counts;
/* an array of <sizeof(p2p_comm)> shorts, for use in experimenting
/* an array of <sizeof(p2p_comm)> unsigned ints, for use in experimenting
with different synchronization costs */
short *p2p_fence_coll_results;
unsigned int *p2p_fence_coll_results;
/* ********************* PWSC data ************************ */

Просмотреть файл

@ -257,7 +257,7 @@ ompi_osc_pt2pt_component_select(ompi_win_t *win,
OBJ_CONSTRUCT(&module->p2p_pending_control_sends, opal_list_t);
OBJ_CONSTRUCT(&module->p2p_pending_sendreqs, opal_list_t);
module->p2p_num_pending_sendreqs = (short*)malloc(sizeof(short) *
module->p2p_num_pending_sendreqs = (unsigned int*)malloc(sizeof(unsigned int) *
ompi_comm_size(module->p2p_comm));
if (NULL == module->p2p_num_pending_sendreqs) {
OBJ_DESTRUCT(&module->p2p_pending_sendreqs);
@ -268,7 +268,7 @@ ompi_osc_pt2pt_component_select(ompi_win_t *win,
return ret;
}
memset(module->p2p_num_pending_sendreqs, 0,
sizeof(short) * ompi_comm_size(module->p2p_comm));
sizeof(unsigned int) * ompi_comm_size(module->p2p_comm));
module->p2p_num_pending_out = 0;
module->p2p_num_pending_in = 0;
@ -279,7 +279,7 @@ ompi_osc_pt2pt_component_select(ompi_win_t *win,
OBJ_CONSTRUCT(&(module->p2p_long_msgs), opal_list_t);
OBJ_CONSTRUCT(&(module->p2p_copy_pending_sendreqs), opal_list_t);
module->p2p_copy_num_pending_sendreqs = (short*)malloc(sizeof(short) *
module->p2p_copy_num_pending_sendreqs = (unsigned int*)malloc(sizeof(unsigned int) *
ompi_comm_size(module->p2p_comm));
if (NULL == module->p2p_copy_num_pending_sendreqs) {
OBJ_DESTRUCT(&module->p2p_copy_pending_sendreqs);
@ -293,7 +293,7 @@ ompi_osc_pt2pt_component_select(ompi_win_t *win,
return ret;
}
memset(module->p2p_num_pending_sendreqs, 0,
sizeof(short) * ompi_comm_size(module->p2p_comm));
sizeof(unsigned int) * ompi_comm_size(module->p2p_comm));
/* fence data */
module->p2p_fence_coll_counts = (int*)malloc(sizeof(int) *
@ -314,7 +314,7 @@ ompi_osc_pt2pt_component_select(ompi_win_t *win,
module->p2p_fence_coll_counts[i] = 1;
}
module->p2p_fence_coll_results = (short*)malloc(sizeof(short) *
module->p2p_fence_coll_results = (unsigned int*)malloc(sizeof(unsigned short) *
ompi_comm_size(module->p2p_comm));
if (NULL == module->p2p_fence_coll_results) {
free(module->p2p_fence_coll_counts);

Просмотреть файл

@ -264,7 +264,7 @@ ompi_osc_pt2pt_sendreq_send(ompi_osc_pt2pt_module_t *module,
#endif
/* send fragment */
opal_output_verbose(50, ompi_osc_base_output,
opal_output_verbose(51, ompi_osc_base_output,
"%d sending sendreq to %d",
sendreq->req_module->p2p_comm->c_my_rank,
sendreq->req_target_rank);

Просмотреть файл

@ -63,7 +63,7 @@ ompi_osc_pt2pt_progress_long(ompi_osc_pt2pt_module_t *module)
static inline void
ompi_osc_pt2pt_flip_sendreqs(ompi_osc_pt2pt_module_t *module)
{
short *tmp;
unsigned int *tmp;
OPAL_THREAD_LOCK(&(module->p2p_lock));
@ -72,7 +72,7 @@ ompi_osc_pt2pt_flip_sendreqs(ompi_osc_pt2pt_module_t *module)
module->p2p_num_pending_sendreqs;
module->p2p_num_pending_sendreqs = tmp;
memset(module->p2p_num_pending_sendreqs, 0,
sizeof(short) * ompi_comm_size(module->p2p_comm));
sizeof(unsigned int) * ompi_comm_size(module->p2p_comm));
/* Copy in all the pending requests */
opal_list_join(&module->p2p_copy_pending_sendreqs,
@ -86,7 +86,7 @@ ompi_osc_pt2pt_flip_sendreqs(ompi_osc_pt2pt_module_t *module)
int
ompi_osc_pt2pt_module_fence(int assert, ompi_win_t *win)
{
short incoming_reqs;
unsigned int incoming_reqs;
int ret = OMPI_SUCCESS, i;
if (0 != (assert & MPI_MODE_NOPRECEDE)) {
@ -116,7 +116,7 @@ ompi_osc_pt2pt_module_fence(int assert, ompi_win_t *win)
c_coll.coll_reduce_scatter(P2P_MODULE(win)->p2p_copy_num_pending_sendreqs,
&incoming_reqs,
P2P_MODULE(win)->p2p_fence_coll_counts,
MPI_SHORT,
MPI_UNSIGNED,
MPI_SUM,
P2P_MODULE(win)->p2p_comm);
@ -200,6 +200,9 @@ ompi_osc_pt2pt_module_start(ompi_group_t *group,
P2P_MODULE(win)->p2p_sc_group = group;
OPAL_THREAD_UNLOCK(&(P2P_MODULE(win)->p2p_lock));
memset(P2P_MODULE(win)->p2p_sc_remote_active_ranks, 0,
sizeof(bool) * ompi_comm_size(P2P_MODULE(win)->p2p_comm));
/* for each process in the specified group, find it's rank in our
communicator, store those indexes, and set the true / false in
the active ranks table */
@ -263,11 +266,12 @@ ompi_osc_pt2pt_module_complete(ompi_win_t *win)
OPAL_THREAD_ADD32(&(P2P_MODULE(win)->p2p_num_pending_out),
P2P_MODULE(win)->p2p_copy_num_pending_sendreqs[comm_rank]);
ompi_osc_pt2pt_control_send(P2P_MODULE(win),
P2P_MODULE(win)->p2p_sc_group->grp_proc_pointers[i],
OMPI_OSC_PT2PT_HDR_COMPLETE,
P2P_MODULE(win)->p2p_copy_num_pending_sendreqs[comm_rank],
0);
ret = ompi_osc_pt2pt_control_send(P2P_MODULE(win),
P2P_MODULE(win)->p2p_sc_group->grp_proc_pointers[i],
OMPI_OSC_PT2PT_HDR_COMPLETE,
P2P_MODULE(win)->p2p_copy_num_pending_sendreqs[comm_rank],
0);
assert(ret == OMPI_SUCCESS);
}
/* try to start all the requests. We've copied everything we

Просмотреть файл

@ -72,10 +72,10 @@ struct ompi_osc_rdma_module_t {
started. p2p_lock must be held when modifying this field. */
opal_list_t p2p_pending_sendreqs;
/** list of int16_t counters for the number of requests to a
/** list of unsigned int counters for the number of requests to a
particular rank in p2p_comm for this access epoc. p2p_lock
must be held when modifying this field */
short *p2p_num_pending_sendreqs;
unsigned int *p2p_num_pending_sendreqs;
/** For MPI_Fence synchronization, the number of messages to send
in epoch. For Start/Complete, the number of updates for this
@ -109,7 +109,7 @@ struct ompi_osc_rdma_module_t {
opal_list_t p2p_long_msgs;
opal_list_t p2p_copy_pending_sendreqs;
short *p2p_copy_num_pending_sendreqs;
unsigned int *p2p_copy_num_pending_sendreqs;
bool p2p_eager_send;
@ -117,9 +117,9 @@ struct ompi_osc_rdma_module_t {
/* an array of <sizeof(p2p_comm)> ints, each containing the value
1. */
int *p2p_fence_coll_counts;
/* an array of <sizeof(p2p_comm)> shorts, for use in experimenting
/* an array of <sizeof(p2p_comm)> unsigned ints, for use in experimenting
with different synchronization costs */
short *p2p_fence_coll_results;
unsigned int *p2p_fence_coll_results;
mca_osc_fence_sync_t p2p_fence_sync_type;

Просмотреть файл

@ -264,7 +264,7 @@ ompi_osc_rdma_component_select(ompi_win_t *win,
}
OBJ_CONSTRUCT(&module->p2p_pending_sendreqs, opal_list_t);
module->p2p_num_pending_sendreqs = (short*)malloc(sizeof(short) *
module->p2p_num_pending_sendreqs = (unsigned int*)malloc(sizeof(unsigned int) *
ompi_comm_size(module->p2p_comm));
if (NULL == module->p2p_num_pending_sendreqs) {
OBJ_DESTRUCT(&module->p2p_pending_sendreqs);
@ -275,7 +275,7 @@ ompi_osc_rdma_component_select(ompi_win_t *win,
return ret;
}
memset(module->p2p_num_pending_sendreqs, 0,
sizeof(short) * ompi_comm_size(module->p2p_comm));
sizeof(unsigned int) * ompi_comm_size(module->p2p_comm));
module->p2p_num_pending_out = 0;
module->p2p_num_pending_in = 0;
@ -286,7 +286,7 @@ ompi_osc_rdma_component_select(ompi_win_t *win,
OBJ_CONSTRUCT(&(module->p2p_long_msgs), opal_list_t);
OBJ_CONSTRUCT(&(module->p2p_copy_pending_sendreqs), opal_list_t);
module->p2p_copy_num_pending_sendreqs = (short*)malloc(sizeof(short) *
module->p2p_copy_num_pending_sendreqs = (unsigned int*)malloc(sizeof(unsigned int) *
ompi_comm_size(module->p2p_comm));
if (NULL == module->p2p_copy_num_pending_sendreqs) {
OBJ_DESTRUCT(&module->p2p_copy_pending_sendreqs);
@ -300,7 +300,7 @@ ompi_osc_rdma_component_select(ompi_win_t *win,
return ret;
}
memset(module->p2p_num_pending_sendreqs, 0,
sizeof(short) * ompi_comm_size(module->p2p_comm));
sizeof(unsigned int) * ompi_comm_size(module->p2p_comm));
module->p2p_eager_send = check_config_value_bool("eager_send", info);
@ -323,7 +323,7 @@ ompi_osc_rdma_component_select(ompi_win_t *win,
module->p2p_fence_coll_counts[i] = 1;
}
module->p2p_fence_coll_results = (short*)malloc(sizeof(short) *
module->p2p_fence_coll_results = (unsigned int*)malloc(sizeof(unsigned int) *
ompi_comm_size(module->p2p_comm));
if (NULL == module->p2p_fence_coll_results) {
free(module->p2p_fence_coll_counts);

Просмотреть файл

@ -92,6 +92,8 @@ ompi_osc_rdma_sendreq_send_cb(struct mca_btl_base_module_t* btl,
(ompi_osc_rdma_sendreq_t*) descriptor->des_cbdata;
ompi_osc_rdma_send_header_t *header =
(ompi_osc_rdma_send_header_t*) descriptor->des_src[0].seg_addr.pval;
opal_list_item_t *item;
ompi_osc_rdma_module_t *module = sendreq->req_module;
if (OMPI_SUCCESS != status) {
/* requeue and return */
@ -151,7 +153,26 @@ ompi_osc_rdma_sendreq_send_cb(struct mca_btl_base_module_t* btl,
btl->btl_free(btl, descriptor);
/* any other sendreqs to restart? */
/* BWB - FIX ME - implement sending the next sendreq here */
while (NULL !=
(item = opal_list_remove_first(&(module->p2p_copy_pending_sendreqs)))) {
ompi_osc_rdma_sendreq_t *req =
(ompi_osc_rdma_sendreq_t*) item;
int ret;
ret = ompi_osc_rdma_sendreq_send(module, req);
if (OMPI_SUCCESS != ret) {
opal_output_verbose(5, ompi_osc_base_output,
"fence: failure in starting sendreq (%d). Will try later.",
ret);
opal_list_append(&(module->p2p_copy_pending_sendreqs), item);
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret ||
OMPI_ERR_OUT_OF_RESOURCE == ret) {
break;
}
}
}
}

Просмотреть файл

@ -64,7 +64,7 @@ ompi_osc_rdma_progress(ompi_osc_rdma_module_t *module)
static inline void
ompi_osc_rdma_flip_sendreqs(ompi_osc_rdma_module_t *module)
{
short *tmp;
unsigned int *tmp;
OPAL_THREAD_LOCK(&(module->p2p_lock));
@ -73,7 +73,7 @@ ompi_osc_rdma_flip_sendreqs(ompi_osc_rdma_module_t *module)
module->p2p_num_pending_sendreqs;
module->p2p_num_pending_sendreqs = tmp;
memset(module->p2p_num_pending_sendreqs, 0,
sizeof(short) * ompi_comm_size(module->p2p_comm));
sizeof(unsigned int) * ompi_comm_size(module->p2p_comm));
/* Copy in all the pending requests */
opal_list_join(&module->p2p_copy_pending_sendreqs,
@ -87,7 +87,7 @@ ompi_osc_rdma_flip_sendreqs(ompi_osc_rdma_module_t *module)
int
ompi_osc_rdma_module_fence(int assert, ompi_win_t *win)
{
short incoming_reqs;
unsigned int incoming_reqs;
int ret = OMPI_SUCCESS, i;
if (0 != (assert & MPI_MODE_NOPRECEDE)) {
@ -120,7 +120,7 @@ ompi_osc_rdma_module_fence(int assert, ompi_win_t *win)
c_coll.coll_reduce_scatter(P2P_MODULE(win)->p2p_copy_num_pending_sendreqs,
&incoming_reqs,
P2P_MODULE(win)->p2p_fence_coll_counts,
MPI_SHORT,
MPI_UNSIGNED,
MPI_SUM,
P2P_MODULE(win)->p2p_comm);
break;
@ -130,7 +130,7 @@ ompi_osc_rdma_module_fence(int assert, ompi_win_t *win)
c_coll.coll_allreduce(P2P_MODULE(win)->p2p_copy_num_pending_sendreqs,
P2P_MODULE(win)->p2p_fence_coll_results,
ompi_comm_size(P2P_MODULE(win)->p2p_comm),
MPI_SHORT,
MPI_UNSIGNED,
MPI_SUM,
P2P_MODULE(win)->p2p_comm);
incoming_reqs = P2P_MODULE(win)->
@ -141,10 +141,10 @@ ompi_osc_rdma_module_fence(int assert, ompi_win_t *win)
ret = P2P_MODULE(win)->p2p_comm->
c_coll.coll_alltoall(P2P_MODULE(win)->p2p_copy_num_pending_sendreqs,
1,
MPI_SHORT,
MPI_UNSIGNED,
P2P_MODULE(win)->p2p_fence_coll_results,
1,
MPI_SHORT,
MPI_UNSIGNED,
P2P_MODULE(win)->p2p_comm);
incoming_reqs = 0;
for (i = 0 ; i < ompi_comm_size(P2P_MODULE(win)->p2p_comm) ; ++i) {
@ -198,6 +198,11 @@ ompi_osc_rdma_module_fence(int assert, ompi_win_t *win)
"fence: failure in starting sendreq (%d). Will try later.",
ret);
opal_list_append(&(P2P_MODULE(win)->p2p_copy_pending_sendreqs), item);
if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret ||
OMPI_ERR_OUT_OF_RESOURCE == ret) {
break;
}
}
}
@ -301,11 +306,12 @@ ompi_osc_rdma_module_complete(ompi_win_t *win)
OPAL_THREAD_ADD32(&(P2P_MODULE(win)->p2p_num_pending_out),
P2P_MODULE(win)->p2p_copy_num_pending_sendreqs[comm_rank]);
ompi_osc_rdma_control_send(P2P_MODULE(win),
P2P_MODULE(win)->p2p_sc_group->grp_proc_pointers[i],
OMPI_OSC_RDMA_HDR_COMPLETE,
P2P_MODULE(win)->p2p_copy_num_pending_sendreqs[comm_rank],
0);
ret = ompi_osc_rdma_control_send(P2P_MODULE(win),
P2P_MODULE(win)->p2p_sc_group->grp_proc_pointers[i],
OMPI_OSC_RDMA_HDR_COMPLETE,
P2P_MODULE(win)->p2p_copy_num_pending_sendreqs[comm_rank],
0);
assert(ret == OMPI_SUCCESS);
}
/* try to start all the requests. We've copied everything we
@ -327,6 +333,7 @@ ompi_osc_rdma_module_complete(ompi_win_t *win)
}
/* wait for all the requests */
ompi_osc_rdma_progress(P2P_MODULE(win));
while (0 != P2P_MODULE(win)->p2p_num_pending_out) {
ompi_osc_rdma_progress(P2P_MODULE(win));
}
@ -364,7 +371,7 @@ ompi_osc_rdma_module_post(ompi_group_t *group,
/* Set our mode to expose w/ post */
ompi_win_remove_mode(win, OMPI_WIN_FENCE);
ompi_win_set_mode(win, OMPI_WIN_EXPOSE_EPOCH | OMPI_WIN_POSTED);
ompi_win_append_mode(win, OMPI_WIN_EXPOSE_EPOCH | OMPI_WIN_POSTED);
/* list how many complete counters we're still waiting on */
OPAL_THREAD_ADD32(&(P2P_MODULE(win)->p2p_num_complete_msgs),