From 0d507ad970736abd3cd37262d44cb7fdab81feea Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Tue, 31 Jan 2006 02:44:08 +0000 Subject: [PATCH] * Fix a bunch of checks for proper access / exposure epochs * Add implementation of Post/Wait/Start/Complete synchronization This commit was SVN r8848. --- ompi/mca/osc/osc.h | 2 +- ompi/mca/osc/pt2pt/osc_pt2pt.h | 20 ++- ompi/mca/osc/pt2pt/osc_pt2pt_component.c | 30 ++++ ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c | 76 +++++++++- ompi/mca/osc/pt2pt/osc_pt2pt_data_move.h | 4 + ompi/mca/osc/pt2pt/osc_pt2pt_header.h | 16 ++- ompi/mca/osc/pt2pt/osc_pt2pt_replyreq.c | 1 - ompi/mca/osc/pt2pt/osc_pt2pt_sync.c | 175 +++++++++++++++++++++-- ompi/mpi/c/accumulate.c | 3 + ompi/mpi/c/get.c | 3 + ompi/mpi/c/put.c | 3 + ompi/mpi/c/win_complete.c | 4 +- ompi/mpi/c/win_fence.c | 5 +- ompi/mpi/c/win_lock.c | 5 + ompi/mpi/c/win_post.c | 5 + ompi/mpi/c/win_start.c | 5 + ompi/mpi/c/win_test.c | 4 +- ompi/mpi/c/win_unlock.c | 2 + ompi/mpi/c/win_wait.c | 2 + ompi/win/win.c | 1 + ompi/win/win.h | 19 ++- 21 files changed, 356 insertions(+), 29 deletions(-) diff --git a/ompi/mca/osc/osc.h b/ompi/mca/osc/osc.h index bc1998b932..68b62c3951 100644 --- a/ompi/mca/osc/osc.h +++ b/ompi/mca/osc/osc.h @@ -119,7 +119,7 @@ typedef int (*ompi_osc_base_module_post_fn_t)(struct ompi_group_t *group, typedef int (*ompi_osc_base_module_wait_fn_t)(struct ompi_win_t *win); typedef int (*ompi_osc_base_module_test_fn_t)(struct ompi_win_t *win, - int flag); + int *flag); typedef int (*ompi_osc_base_module_lock_fn_t)(int lock_type, int target, diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt.h b/ompi/mca/osc/pt2pt/osc_pt2pt.h index 00a80a27dd..b0005147d3 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt.h @@ -70,12 +70,25 @@ struct ompi_osc_pt2pt_module_t { /** store weather user disabled locks for this window */ bool p2p_want_locks; - /** array of opal_list_ts, not a pointer to one of them */ + /** array of opal_list_ts, not a pointer to one of them. Array is + of size , although only the first + are used for PWSC synchronization */ opal_list_t *p2p_pending_out_sendreqs; + /* For MPI_Fence synchronization, the number of messages to send + in epoch. For Start/Complete, the number of updates for this + Complete. For Post/Wait (poorly named), the number of Complete + counters we're waiting for.*/ volatile int32_t p2p_num_pending_out; + /* For MPI_Fence synchronization, the number of expected incoming + messages. For Start/Complete, the number of expected Post + messages. For Post/Wait, the number of expected updates from + complete. */ volatile int32_t p2p_num_pending_in; + /* cyclic counter for a unique tage for long messages. Not + protected by the p2p_lock - must use create_send_tag() to + create a send tag */ volatile int32_t p2p_tag_counter; /** list of outstanding long messages that must be processes @@ -83,6 +96,9 @@ struct ompi_osc_pt2pt_module_t { opal_list_t p2p_long_msgs; /** number of outstanding long messages */ volatile int32_t p2p_num_long_msgs; + + struct ompi_group_t *pw_group; + struct ompi_group_t *sc_group; }; typedef struct ompi_osc_pt2pt_module_t ompi_osc_pt2pt_module_t; @@ -191,7 +207,7 @@ int ompi_osc_pt2pt_module_post(struct ompi_group_t *group, int ompi_osc_pt2pt_module_wait(struct ompi_win_t *win); int ompi_osc_pt2pt_module_test(struct ompi_win_t *win, - int flag); + int *flag); int ompi_osc_pt2pt_module_lock(int lock_type, int target, diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_component.c b/ompi/mca/osc/pt2pt/osc_pt2pt_component.c index dd42a60956..3c8eb3b461 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_component.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_component.c @@ -377,6 +377,36 @@ ompi_osc_pt2pt_component_fragment_cb(struct mca_btl_base_module_t *btl, ompi_osc_pt2pt_replyreq_recv(module, sendreq, header, payload); } break; + case OMPI_OSC_PT2PT_HDR_POST: + { + ompi_osc_pt2pt_control_header_t *header = + (ompi_osc_pt2pt_control_header_t*) + descriptor->des_dst[0].seg_addr.pval; + + /* get our module pointer */ + module = ompi_osc_pt2pt_windx_to_module(header->hdr_windx); + if (NULL == module) return; + + OPAL_THREAD_ADD32(&(module->p2p_num_pending_in), -1); + } + break; + case OMPI_OSC_PT2PT_HDR_COMPLETE: + { + ompi_osc_pt2pt_control_header_t *header = + (ompi_osc_pt2pt_control_header_t*) + descriptor->des_dst[0].seg_addr.pval; + + /* get our module pointer */ + module = ompi_osc_pt2pt_windx_to_module(header->hdr_windx); + if (NULL == module) return; + + /* we've heard from one more place, and have value reqs to + process */ + OPAL_THREAD_ADD32(&(module->p2p_num_pending_out), -1); + OPAL_THREAD_ADD32(&(module->p2p_num_pending_in), header->hdr_value); + } + break; + default: /* BWB - FIX ME - this sucks */ opal_output(0, "received packet for Window with unknown type"); diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c b/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c index 6545bfd034..17a6634735 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c @@ -484,7 +484,7 @@ ompi_osc_pt2pt_sendreq_recv_put(ompi_osc_pt2pt_module_t *module, OBJ_DESTRUCT(&convertor); OBJ_RELEASE(datatype); OPAL_THREAD_ADD32(&(module->p2p_num_pending_in), -1); - + } else { ompi_osc_pt2pt_longreq_t *longreq; ompi_osc_pt2pt_longreq_alloc(&longreq); @@ -702,3 +702,77 @@ ompi_osc_pt2pt_replyreq_recv(ompi_osc_pt2pt_module_t *module, return ret; } + + +/********************************************************************** + * + * Recveive a get on the origin side + * + **********************************************************************/ +static void +ompi_osc_pt2pt_control_send_cb(struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t *endpoint, + struct mca_btl_base_descriptor_t* descriptor, + int status) +{ + /* release the descriptor and sendreq */ + btl->btl_free(btl, descriptor); +} + + +int +ompi_osc_pt2pt_control_send(ompi_osc_pt2pt_module_t *module, + ompi_proc_t *proc, + uint8_t type, int32_t value) +{ + int ret = OMPI_SUCCESS; + mca_bml_base_endpoint_t *endpoint = NULL; + mca_bml_base_btl_t *bml_btl = NULL; + mca_btl_base_descriptor_t *descriptor = NULL; + ompi_osc_pt2pt_control_header_t *header = NULL; + + /* Get a BTL and a fragment to go with it */ + endpoint = (mca_bml_base_endpoint_t*) proc->proc_pml; + bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); + descriptor = bml_btl->btl_alloc(bml_btl->btl, + sizeof(ompi_osc_pt2pt_control_header_t)); + if (NULL == descriptor) { + ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE; + goto cleanup; + } + + /* verify at least enough space for header */ + if (descriptor->des_src[0].seg_len < sizeof(ompi_osc_pt2pt_control_header_t)) { + ret = OMPI_ERR_OUT_OF_RESOURCE; + goto cleanup; + } + + /* setup descriptor */ + descriptor->des_cbfunc = ompi_osc_pt2pt_control_send_cb; + descriptor->des_cbdata = NULL; + descriptor->des_flags = MCA_BTL_DES_FLAGS_PRIORITY; + descriptor->des_src[0].seg_len = sizeof(ompi_osc_pt2pt_control_header_t); + + /* pack header */ + header = (ompi_osc_pt2pt_control_header_t*) descriptor->des_src[0].seg_addr.pval; + header->hdr_type = type; + header->hdr_value = value; + header->hdr_windx = module->p2p_comm->c_contextid; + +#if 0 /* BWB - FIX ME */ + /* put in network byte order */ + OMPI_OSC_PT2PT_CONTROL_HDR_HTON(header); +#endif + + /* send fragment */ + ret = mca_bml_base_send(bml_btl, descriptor, MCA_BTL_TAG_OSC_PT2PT); + goto done; + + cleanup: + if (descriptor != NULL) { + mca_bml_base_free(bml_btl, descriptor); + } + + done: + return ret; +} diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.h b/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.h index 4b4304daa5..f3ebe2600e 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.h @@ -48,4 +48,8 @@ int ompi_osc_pt2pt_replyreq_recv(ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_reply_header_t *header, void *payload); +int ompi_osc_pt2pt_control_send(ompi_osc_pt2pt_module_t *module, + ompi_proc_t *proc, + uint8_t type, int32_t value); + #endif diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_header.h b/ompi/mca/osc/pt2pt/osc_pt2pt_header.h index b8200097b3..3cd2756530 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_header.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_header.h @@ -52,11 +52,19 @@ struct ompi_osc_pt2pt_reply_header_t { }; typedef struct ompi_osc_pt2pt_reply_header_t ompi_osc_pt2pt_reply_header_t; +struct ompi_osc_pt2pt_control_header_t { + uint8_t hdr_type; + int32_t hdr_windx; + int32_t hdr_value; +}; +typedef struct ompi_osc_pt2pt_control_header_t ompi_osc_pt2pt_control_header_t; -#define OMPI_OSC_PT2PT_HDR_PUT 0x0001 -#define OMPI_OSC_PT2PT_HDR_ACC 0x0002 -#define OMPI_OSC_PT2PT_HDR_GET 0x0004 -#define OMPI_OSC_PT2PT_HDR_REPLY 0x0008 +#define OMPI_OSC_PT2PT_HDR_PUT 0x0001 +#define OMPI_OSC_PT2PT_HDR_ACC 0x0002 +#define OMPI_OSC_PT2PT_HDR_GET 0x0004 +#define OMPI_OSC_PT2PT_HDR_REPLY 0x0008 +#define OMPI_OSC_PT2PT_HDR_POST 0x0010 +#define OMPI_OSC_PT2PT_HDR_COMPLETE 0x0020 /* * Convert a 64 bit value to network byte order. diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_replyreq.c b/ompi/mca/osc/pt2pt/osc_pt2pt_replyreq.c index 62c747b4a9..c7f5be12e2 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_replyreq.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_replyreq.c @@ -1,4 +1,3 @@ - /* * Copyright (c) 2004-2005 The Trustees of Indiana University. * All rights reserved. diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_sync.c b/ompi/mca/osc/pt2pt/osc_pt2pt_sync.c index 6653207160..a78738ebf9 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_sync.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_sync.c @@ -108,6 +108,7 @@ ompi_osc_pt2pt_module_fence(int assert, ompi_win_t *win) P2P_MODULE(win)->p2p_num_pending_in += incoming_reqs; + /* start all the requests */ for (i = 0 ; i < ompi_comm_size(P2P_MODULE(win)->p2p_comm) ; ++i) { opal_list_item_t *item; opal_list_t *req_list = @@ -154,9 +155,9 @@ ompi_osc_pt2pt_module_fence(int assert, ompi_win_t *win) /* all transfers are done - back to the real world we go */ if (0 == (assert & MPI_MODE_NOSUCCEED)) { - win->w_flags = OMPI_WIN_ACCESS_EPOCH | OMPI_WIN_EXPOSE_EPOCH; + win->w_mode = OMPI_WIN_ACCESS_EPOCH | OMPI_WIN_EXPOSE_EPOCH; } else { - win->w_flags = 0; + win->w_mode = 0; } cleanup: @@ -171,40 +172,190 @@ ompi_osc_pt2pt_module_fence(int assert, ompi_win_t *win) int ompi_osc_pt2pt_module_start(ompi_group_t *group, - int assert, - ompi_win_t *win) + int assert, + ompi_win_t *win) { - return OMPI_ERR_NOT_IMPLEMENTED; + OPAL_THREAD_LOCK(&(P2P_MODULE(win)->p2p_lock)); + + OBJ_RETAIN(group); + /* BWB - do I need this? */ + ompi_group_increment_proc_count(group); + P2P_MODULE(win)->sc_group = group; + + /* Set our mode to access w/ start */ + win->w_mode = OMPI_WIN_ACCESS_EPOCH | OMPI_WIN_STARTED; + + /* possible we've already received a couple in messages, so + atomicall add however many we're going to wait for */ + assert(P2P_MODULE(win)->p2p_num_pending_in == 0); + OPAL_THREAD_ADD32(&(P2P_MODULE(win)->p2p_num_pending_in), + ompi_group_size(P2P_MODULE(win)->sc_group)); + + OPAL_THREAD_UNLOCK(&(P2P_MODULE(win)->p2p_lock)); + + return OMPI_SUCCESS; } int ompi_osc_pt2pt_module_complete(ompi_win_t *win) { - return OMPI_ERR_NOT_IMPLEMENTED; + int i; + int ret = OMPI_SUCCESS; + + OPAL_THREAD_LOCK(&(P2P_MODULE(win)->p2p_lock)); + + /* wait for all the post messages */ + while (0 != P2P_MODULE(win)->p2p_num_pending_in) { + ompi_osc_pt2pt_progress(P2P_MODULE(win)); + } + + /* for each process in group, send a control message with number + of updates coming, then start all the requests */ + for (i = 0 ; i < ompi_group_size(P2P_MODULE(win)->sc_group) ; ++i) { + int comm_rank, j; + opal_list_item_t *item; + opal_list_t *req_list; + /* no need to increment ref count - the communicator isn't + going anywhere while we're here */ + ompi_group_t *comm_group = P2P_MODULE(win)->p2p_comm->c_local_group; + int32_t num_reqs; + + /* find the rank in the communicator associated with this windows */ + for (j = 0 ; + j < ompi_group_size(comm_group) ; + ++j) { + if (P2P_MODULE(win)->sc_group->grp_proc_pointers[i] == + comm_group->grp_proc_pointers[j]) { + comm_rank = j; + break; + } + } + + req_list = &(P2P_MODULE(win)->p2p_pending_out_sendreqs[comm_rank]); + + num_reqs = opal_list_get_size(req_list); + OPAL_THREAD_ADD32(&(P2P_MODULE(win)->p2p_num_pending_out), num_reqs); + ompi_osc_pt2pt_control_send(P2P_MODULE(win), + P2P_MODULE(win)->sc_group->grp_proc_pointers[i], + OMPI_OSC_PT2PT_HDR_COMPLETE, num_reqs); + + while (NULL != (item = opal_list_remove_first(req_list))) { + ompi_osc_pt2pt_sendreq_t *req = + (ompi_osc_pt2pt_sendreq_t*) item; + ret = ompi_osc_pt2pt_sendreq_send(P2P_MODULE(win), req); + + if (OMPI_SUCCESS != ret) { + opal_output(0, "complete: failure in starting sendreq"); + opal_list_prepend(req_list, item); + goto cleanup; + } + + } + } + + /* wait for all the requests */ + while (0 != P2P_MODULE(win)->p2p_num_pending_out) { + ompi_osc_pt2pt_progress(P2P_MODULE(win)); + } + + cleanup: + /* set our mode back to nothing */ + win->w_mode = 0; + + /* BWB - do I need this? */ + ompi_group_decrement_proc_count(P2P_MODULE(win)->sc_group); + OBJ_RELEASE(P2P_MODULE(win)->sc_group); + P2P_MODULE(win)->sc_group = NULL; + + OPAL_THREAD_UNLOCK(&(P2P_MODULE(win)->p2p_lock)); + + return ret; } int ompi_osc_pt2pt_module_post(ompi_group_t *group, - int assert, - ompi_win_t *win) + int assert, + ompi_win_t *win) { - return OMPI_ERR_NOT_IMPLEMENTED; + int i; + + OPAL_THREAD_LOCK(&(P2P_MODULE(win)->p2p_lock)); + + OBJ_RETAIN(group); + /* BWB - do I need this? */ + ompi_group_increment_proc_count(group); + P2P_MODULE(win)->pw_group = group; + + /* Set our mode to expose w/ post */ + win->w_mode = OMPI_WIN_EXPOSE_EPOCH | OMPI_WIN_POSTED; + + /* list how many complete counters we're still waiting on */ + OPAL_THREAD_ADD32(&(P2P_MODULE(win)->p2p_num_pending_out), + ompi_group_size(P2P_MODULE(win)->pw_group)); + + OPAL_THREAD_UNLOCK(&(P2P_MODULE(win)->p2p_lock)); + + /* send a hello counter to everyone in group */ + for (i = 0 ; i < ompi_group_size(P2P_MODULE(win)->pw_group) ; ++i) { + ompi_osc_pt2pt_control_send(P2P_MODULE(win), + group->grp_proc_pointers[i], + OMPI_OSC_PT2PT_HDR_POST, 1); + } + + return OMPI_SUCCESS; } int ompi_osc_pt2pt_module_wait(ompi_win_t *win) { - return OMPI_ERR_NOT_IMPLEMENTED; + while (0 != (P2P_MODULE(win)->p2p_num_pending_in) || + 0 != (P2P_MODULE(win)->p2p_num_pending_out)) { + ompi_osc_pt2pt_progress(P2P_MODULE(win)); + } + + OPAL_THREAD_LOCK(&(P2P_MODULE(win)->p2p_lock)); + win->w_mode = 0; + + /* BWB - do I need this? */ + ompi_group_decrement_proc_count(P2P_MODULE(win)->pw_group); + OBJ_RELEASE(P2P_MODULE(win)->pw_group); + P2P_MODULE(win)->pw_group = NULL; + + OPAL_THREAD_UNLOCK(&(P2P_MODULE(win)->p2p_lock)); + + return OMPI_SUCCESS; } int ompi_osc_pt2pt_module_test(ompi_win_t *win, - int flag) + int *flag) { - return OMPI_ERR_NOT_IMPLEMENTED; + if (0 != (P2P_MODULE(win)->p2p_num_pending_in) || + 0 != (P2P_MODULE(win)->p2p_num_pending_out)) { + ompi_osc_pt2pt_progress(P2P_MODULE(win)); + if (0 != (P2P_MODULE(win)->p2p_num_pending_in) || + 0 != (P2P_MODULE(win)->p2p_num_pending_out)) { + *flag = 0; + return OMPI_SUCCESS; + } + } + + *flag = 1; + + OPAL_THREAD_LOCK(&(P2P_MODULE(win)->p2p_lock)); + win->w_mode = 0; + + /* BWB - do I need this? */ + ompi_group_decrement_proc_count(P2P_MODULE(win)->pw_group); + OBJ_RELEASE(P2P_MODULE(win)->pw_group); + P2P_MODULE(win)->pw_group = NULL; + + OPAL_THREAD_UNLOCK(&(P2P_MODULE(win)->p2p_lock)); + + return OMPI_SUCCESS; } diff --git a/ompi/mpi/c/accumulate.c b/ompi/mpi/c/accumulate.c index cfbf8d1c03..247d4da6b7 100644 --- a/ompi/mpi/c/accumulate.c +++ b/ompi/mpi/c/accumulate.c @@ -61,6 +61,9 @@ int MPI_Accumulate(void *origin_addr, int origin_count, MPI_Datatype origin_data (target_datatype->id < DT_MAX_PREDEFINED && -1 == ompi_op_ddt_map[target_datatype->id])) { rc = MPI_ERR_OP; + } else if (0 == (win->w_mode & OMPI_WIN_ACCESS_EPOCH)) { + /* BWB - FIX ME - what error? */ + rc = MPI_ERR_RMA_CONFLICT; } else { OMPI_CHECK_DATATYPE_FOR_SEND(rc, origin_datatype, origin_count); } diff --git a/ompi/mpi/c/get.c b/ompi/mpi/c/get.c index 6cd94c7866..9bb56da650 100644 --- a/ompi/mpi/c/get.c +++ b/ompi/mpi/c/get.c @@ -53,6 +53,9 @@ int MPI_Get(void *origin_addr, int origin_count, rc = MPI_ERR_COUNT; } else if (ompi_win_peer_invalid(win, target_rank)) { rc = MPI_ERR_RANK; + } else if (0 == (win->w_mode & OMPI_WIN_ACCESS_EPOCH)) { + /* BWB - FIX ME - what error? */ + rc = MPI_ERR_RMA_CONFLICT; } else { OMPI_CHECK_DATATYPE_FOR_SEND(rc, origin_datatype, origin_count); } diff --git a/ompi/mpi/c/put.c b/ompi/mpi/c/put.c index efc886a7da..e5cab260ae 100644 --- a/ompi/mpi/c/put.c +++ b/ompi/mpi/c/put.c @@ -52,6 +52,9 @@ int MPI_Put(void *origin_addr, int origin_count, MPI_Datatype origin_datatype, rc = MPI_ERR_COUNT; } else if (ompi_win_peer_invalid(win, target_rank)) { rc = MPI_ERR_RANK; + } else if (0 == (win->w_mode & OMPI_WIN_ACCESS_EPOCH)) { + /* BWB - FIX ME - what error? */ + rc = MPI_ERR_RMA_CONFLICT; } else { OMPI_CHECK_DATATYPE_FOR_SEND(rc, origin_datatype, origin_count); } diff --git a/ompi/mpi/c/win_complete.c b/ompi/mpi/c/win_complete.c index c998991f3a..6bc3809ca7 100644 --- a/ompi/mpi/c/win_complete.c +++ b/ompi/mpi/c/win_complete.c @@ -42,9 +42,11 @@ int MPI_Win_complete(MPI_Win win) if (ompi_win_invalid(win)) { return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_WIN, FUNC_NAME); + } else if (0 == (win->w_mode & OMPI_WIN_STARTED)) { + return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_RMA_CONFLICT, FUNC_NAME); } } - rc = win->w_osc_module->osc_wait(win); + rc = win->w_osc_module->osc_complete(win); OMPI_ERRHANDLER_RETURN(rc, win, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/win_fence.c b/ompi/mpi/c/win_fence.c index 90ab4715aa..c6e620294b 100644 --- a/ompi/mpi/c/win_fence.c +++ b/ompi/mpi/c/win_fence.c @@ -45,7 +45,10 @@ int MPI_Win_fence(int assert, MPI_Win win) } else if (0 != (assert & ~(MPI_MODE_NOSTORE | MPI_MODE_NOPUT | MPI_MODE_NOPRECEDE | MPI_MODE_NOSUCCEED))) { return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_ASSERT, FUNC_NAME); - } + } else if (0 != (win->w_mode & (OMPI_WIN_POSTED | OMPI_WIN_STARTED))) { + /* If we're in a post or start, we can't be in a fence */ + return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_RMA_CONFLICT, FUNC_NAME); + } } rc = win->w_osc_module->osc_fence(assert, win); diff --git a/ompi/mpi/c/win_lock.c b/ompi/mpi/c/win_lock.c index 92976a97a9..41725e0177 100644 --- a/ompi/mpi/c/win_lock.c +++ b/ompi/mpi/c/win_lock.c @@ -49,6 +49,11 @@ int MPI_Win_lock(int lock_type, int rank, int assert, MPI_Win win) return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_RANK, FUNC_NAME); } else if (0 != (assert & ~(MPI_MODE_NOCHECK))) { return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_ASSERT, FUNC_NAME); + } else if (0 != win->w_mode) { + /* window can not be in use at all at this point */ + return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_RMA_CONFLICT, FUNC_NAME); + } else if (! ompi_win_allow_locks(win)) { + return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_RMA_SYNC, FUNC_NAME); } } diff --git a/ompi/mpi/c/win_post.c b/ompi/mpi/c/win_post.c index e0dc690f19..7ec596fe25 100644 --- a/ompi/mpi/c/win_post.c +++ b/ompi/mpi/c/win_post.c @@ -45,6 +45,11 @@ int MPI_Win_post(MPI_Group group, int assert, MPI_Win win) } else if (0 != (assert & ~(MPI_MODE_NOCHECK | MPI_MODE_NOSTORE | MPI_MODE_NOPUT))) { return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_ASSERT, FUNC_NAME); + } else if (0 != (win->w_mode & (OMPI_WIN_ACCESS_EPOCH | + OMPI_WIN_EXPOSE_EPOCH))) { + /* we can't already be in an an exposure or accesss epoch + when we start a post */ + return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_RMA_CONFLICT, FUNC_NAME); } } diff --git a/ompi/mpi/c/win_start.c b/ompi/mpi/c/win_start.c index d6a1fc95f2..eb58a0a093 100644 --- a/ompi/mpi/c/win_start.c +++ b/ompi/mpi/c/win_start.c @@ -44,6 +44,11 @@ int MPI_Win_start(MPI_Group group, int assert, MPI_Win win) return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_WIN, FUNC_NAME); } else if (0 != (assert & ~(MPI_MODE_NOCHECK))) { return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_ASSERT, FUNC_NAME); + } else if (0 != (win->w_mode & (OMPI_WIN_ACCESS_EPOCH | + OMPI_WIN_EXPOSE_EPOCH))) { + /* we can't already be in an an exposure or accesss epoch + when we start a start */ + return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_RMA_CONFLICT, FUNC_NAME); } } diff --git a/ompi/mpi/c/win_test.c b/ompi/mpi/c/win_test.c index 4025f45311..d227e13048 100644 --- a/ompi/mpi/c/win_test.c +++ b/ompi/mpi/c/win_test.c @@ -42,9 +42,11 @@ int MPI_Win_test(MPI_Win win, int *flag) if (ompi_win_invalid(win)) { return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_WIN, FUNC_NAME); + } else if (0 == (win->w_mode & OMPI_WIN_POSTED)) { + return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_RMA_CONFLICT, FUNC_NAME); } } - rc = win->w_osc_module->osc_wait(win); + rc = win->w_osc_module->osc_test(win, flag); OMPI_ERRHANDLER_RETURN(rc, win, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/win_unlock.c b/ompi/mpi/c/win_unlock.c index e1a165409f..0d08996d39 100644 --- a/ompi/mpi/c/win_unlock.c +++ b/ompi/mpi/c/win_unlock.c @@ -44,6 +44,8 @@ int MPI_Win_unlock(int rank, MPI_Win win) return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_WIN, FUNC_NAME); } else if (ompi_win_peer_invalid(win, rank)) { return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_RANK, FUNC_NAME); + } else if (0 == (win->w_mode & OMPI_WIN_LOCK_ACCESS)) { + return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_RMA_CONFLICT, FUNC_NAME); } } diff --git a/ompi/mpi/c/win_wait.c b/ompi/mpi/c/win_wait.c index 20759b45b6..7410234c52 100644 --- a/ompi/mpi/c/win_wait.c +++ b/ompi/mpi/c/win_wait.c @@ -42,6 +42,8 @@ int MPI_Win_wait(MPI_Win win) if (ompi_win_invalid(win)) { return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_WIN, FUNC_NAME); + } else if (0 == (win->w_mode & OMPI_WIN_POSTED)) { + return OMPI_ERRHANDLER_INVOKE(win, MPI_ERR_RMA_CONFLICT, FUNC_NAME); } } diff --git a/ompi/win/win.c b/ompi/win/win.c index 003fc9ed03..9d99643808 100644 --- a/ompi/win/win.c +++ b/ompi/win/win.c @@ -182,6 +182,7 @@ ompi_win_construct(ompi_win_t *win) win->w_disp_unit = 0; win->w_flags = 0; + win->w_mode = 0; win->w_baseptr = NULL; win->w_size = 0; win->w_osc_module = NULL; diff --git a/ompi/win/win.h b/ompi/win/win.h index 4ace72bf49..aab2d9fb88 100644 --- a/ompi/win/win.h +++ b/ompi/win/win.h @@ -34,13 +34,17 @@ extern "C" { #endif +/* flags */ #define OMPI_WIN_FREED 0x00000001 #define OMPI_WIN_INVALID 0x00000002 #define OMPI_WIN_NO_LOCKS 0x00000004 -#define OMPI_WIN_ACCESS_EPOCH 0x00000008 -#define OMPI_WIN_EXPOSE_EPOCH 0x00000010 -#define OMPI_WIN_POSTED 0x00000020 -#define OMPI_WIN_STARTED 0x00000040 + +/* mode */ +#define OMPI_WIN_ACCESS_EPOCH 0x00000001 +#define OMPI_WIN_EXPOSE_EPOCH 0x00000002 +#define OMPI_WIN_POSTED 0x00000010 +#define OMPI_WIN_STARTED 0x00000020 +#define OMPI_WIN_LOCK_ACCESS 0x00000040 struct ompi_win_t { opal_object_t w_base; @@ -66,7 +70,8 @@ struct ompi_win_t { /* displacement factor */ int w_disp_unit; - uint32_t w_flags; + uint16_t w_flags; + uint16_t w_mode; void *w_baseptr; long w_size; @@ -106,6 +111,10 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_win_t); return win->w_group->grp_my_rank; } + static inline bool ompi_win_allow_locks(ompi_win_t *win) { + return (0 != (win->w_flags & OMPI_WIN_NO_LOCKS)); + } + #if defined(c_plusplus) || defined(__cplusplus) } #endif