From 8fc278c3a37ef60e1ef91dd9da44cb39034c000f Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Thu, 21 Sep 2006 20:49:15 +0000 Subject: [PATCH] Rest of the fix for #325. It uses a bit more space, but now we can reasonably tell if the remote proc should be in an exposure epoch or not. Refs trac:325 This commit was SVN r11746. The following Trac tickets were found above: Ticket 325 --> https://svn.open-mpi.org/trac/ompi/ticket/325 --- ompi/mca/osc/pt2pt/osc_pt2pt.c | 2 + ompi/mca/osc/pt2pt/osc_pt2pt.h | 2 + ompi/mca/osc/pt2pt/osc_pt2pt_comm.c | 15 +++++++ ompi/mca/osc/pt2pt/osc_pt2pt_component.c | 35 ++++++++++++++- ompi/mca/osc/pt2pt/osc_pt2pt_sync.c | 51 +++++++++++++--------- ompi/mca/osc/rdma/osc_rdma.c | 2 + ompi/mca/osc/rdma/osc_rdma.h | 2 + ompi/mca/osc/rdma/osc_rdma_comm.c | 15 +++++++ ompi/mca/osc/rdma/osc_rdma_component.c | 37 +++++++++++++++- ompi/mca/osc/rdma/osc_rdma_sync.c | 54 +++++++++++++++--------- 10 files changed, 172 insertions(+), 43 deletions(-) diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt.c b/ompi/mca/osc/pt2pt/osc_pt2pt.c index 41637fb568..8ab07c7c74 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt.c @@ -56,6 +56,8 @@ ompi_osc_pt2pt_module_free(ompi_win_t *win) OBJ_DESTRUCT(&(module->p2p_locks_pending)); + free(module->p2p_sc_remote_ranks); + free(module->p2p_sc_remote_active_ranks); assert(module->p2p_sc_group == NULL); assert(module->p2p_pw_group == NULL); free(module->p2p_fence_coll_counts); diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt.h b/ompi/mca/osc/pt2pt/osc_pt2pt.h index 42b69e76df..6adf3fcb7a 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt.h @@ -139,6 +139,8 @@ struct ompi_osc_pt2pt_module_t { struct ompi_group_t *p2p_pw_group; struct ompi_group_t *p2p_sc_group; + bool *p2p_sc_remote_active_ranks; + int *p2p_sc_remote_ranks; /* ********************* LOCK data ************************ */ int32_t p2p_lock_status; /* one of 0, MPI_LOCK_EXCLUSIVE, MPI_LOCK_SHARED */ diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c b/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c index c2d26c559b..33ea966ebb 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_comm.c @@ -49,6 +49,11 @@ ompi_osc_pt2pt_module_accumulate(void *origin_addr, int origin_count, int ret; ompi_osc_pt2pt_sendreq_t *sendreq; + if ((OMPI_WIN_STARTED & ompi_win_get_mode(win)) && + (!P2P_MODULE(win)->p2p_sc_remote_active_ranks[target])) { + return MPI_ERR_RMA_SYNC; + } + if (OMPI_WIN_FENCE & ompi_win_get_mode(win)) { /* well, we're definitely in an access epoch now */ ompi_win_set_mode(win, OMPI_WIN_FENCE | OMPI_WIN_ACCESS_EPOCH | @@ -103,6 +108,11 @@ ompi_osc_pt2pt_module_get(void *origin_addr, int ret; ompi_osc_pt2pt_sendreq_t *sendreq; + if ((OMPI_WIN_STARTED & ompi_win_get_mode(win)) && + (!P2P_MODULE(win)->p2p_sc_remote_active_ranks[target])) { + return MPI_ERR_RMA_SYNC; + } + if (OMPI_WIN_FENCE & ompi_win_get_mode(win)) { /* well, we're definitely in an access epoch now */ ompi_win_set_mode(win, OMPI_WIN_FENCE | OMPI_WIN_ACCESS_EPOCH | @@ -143,6 +153,11 @@ ompi_osc_pt2pt_module_put(void *origin_addr, int origin_count, int ret; ompi_osc_pt2pt_sendreq_t *sendreq; + if ((OMPI_WIN_STARTED & ompi_win_get_mode(win)) && + (!P2P_MODULE(win)->p2p_sc_remote_active_ranks[target])) { + return MPI_ERR_RMA_SYNC; + } + if (OMPI_WIN_FENCE & ompi_win_get_mode(win)) { /* well, we're definitely in an access epoch now */ ompi_win_set_mode(win, OMPI_WIN_FENCE | OMPI_WIN_ACCESS_EPOCH | diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_component.c b/ompi/mca/osc/pt2pt/osc_pt2pt_component.c index d46fa90465..9f5fdc7b1f 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_component.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_component.c @@ -323,7 +323,7 @@ ompi_osc_pt2pt_component_select(ompi_win_t *win, module->p2p_fence_coll_results = (short*)malloc(sizeof(short) * ompi_comm_size(module->p2p_comm)); - if (NULL == module->p2p_fence_coll_counts) { + if (NULL == module->p2p_fence_coll_results) { free(module->p2p_fence_coll_counts); free(module->p2p_copy_num_pending_sendreqs); OBJ_DESTRUCT(&module->p2p_copy_pending_sendreqs); @@ -340,6 +340,39 @@ ompi_osc_pt2pt_component_select(ompi_win_t *win, /* pwsc data */ module->p2p_pw_group = NULL; module->p2p_sc_group = NULL; + module->p2p_sc_remote_active_ranks = + malloc(sizeof(bool) * ompi_comm_size(module->p2p_comm)); + if (NULL == module->p2p_sc_remote_active_ranks) { + free(module->p2p_fence_coll_results); + free(module->p2p_fence_coll_counts); + free(module->p2p_copy_num_pending_sendreqs); + OBJ_DESTRUCT(&module->p2p_copy_pending_sendreqs); + OBJ_DESTRUCT(&module->p2p_long_msgs); + free(module->p2p_num_pending_sendreqs); + OBJ_DESTRUCT(&module->p2p_pending_sendreqs); + ompi_comm_free(&comm); + OBJ_DESTRUCT(&(module->p2p_acc_lock)); + OBJ_DESTRUCT(&(module->p2p_lock)); + free(module); + return OMPI_ERROR; + } + module->p2p_sc_remote_ranks = + malloc(sizeof(int) * ompi_comm_size(module->p2p_comm)); + if (NULL == module->p2p_sc_remote_ranks) { + free(module->p2p_sc_remote_active_ranks); + free(module->p2p_fence_coll_results); + free(module->p2p_fence_coll_counts); + free(module->p2p_copy_num_pending_sendreqs); + OBJ_DESTRUCT(&module->p2p_copy_pending_sendreqs); + OBJ_DESTRUCT(&module->p2p_long_msgs); + free(module->p2p_num_pending_sendreqs); + OBJ_DESTRUCT(&module->p2p_pending_sendreqs); + ompi_comm_free(&comm); + OBJ_DESTRUCT(&(module->p2p_acc_lock)); + OBJ_DESTRUCT(&(module->p2p_lock)); + free(module); + return OMPI_ERROR; + } /* lock data */ module->p2p_lock_status = 0; diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_sync.c b/ompi/mca/osc/pt2pt/osc_pt2pt_sync.c index e57eb773f1..3200f46091 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_sync.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_sync.c @@ -189,6 +189,8 @@ ompi_osc_pt2pt_module_start(ompi_group_t *group, int assert, ompi_win_t *win) { + int i; + OBJ_RETAIN(group); /* BWB - do I need this? */ ompi_group_increment_proc_count(group); @@ -198,6 +200,34 @@ ompi_osc_pt2pt_module_start(ompi_group_t *group, P2P_MODULE(win)->p2p_sc_group = group; OPAL_THREAD_UNLOCK(&(P2P_MODULE(win)->p2p_lock)); + /* for each process in the specified group, find it's rank in our + communicator, store those indexes, and set the true / false in + the active ranks table */ + for (i = 0 ; i < ompi_group_size(group) ; i++) { + int comm_rank = -1, j; + + /* no need to increment ref count - the communicator isn't + going anywhere while we're here */ + ompi_group_t *comm_group = P2P_MODULE(win)->p2p_comm->c_local_group; + + /* find the rank in the communicator associated with this windows */ + for (j = 0 ; + j < ompi_group_size(comm_group) ; + ++j) { + if (P2P_MODULE(win)->p2p_sc_group->grp_proc_pointers[i] == + comm_group->grp_proc_pointers[j]) { + comm_rank = j; + break; + } + } + if (comm_rank == -1) { + return MPI_ERR_RMA_SYNC; + } + + P2P_MODULE(win)->p2p_sc_remote_active_ranks[comm_rank] = true; + P2P_MODULE(win)->p2p_sc_remote_ranks[i] = comm_rank; + } + /* Set our mode to access w/ start */ ompi_win_remove_mode(win, OMPI_WIN_FENCE); ompi_win_append_mode(win, OMPI_WIN_ACCESS_EPOCH | OMPI_WIN_STARTED); @@ -229,25 +259,7 @@ ompi_osc_pt2pt_module_complete(ompi_win_t *win) /* for each process in group, send a control message with number of updates coming, then start all the requests */ for (i = 0 ; i < ompi_group_size(P2P_MODULE(win)->p2p_sc_group) ; ++i) { - int comm_rank = -1, j; - /* no need to increment ref count - the communicator isn't - going anywhere while we're here */ - ompi_group_t *comm_group = P2P_MODULE(win)->p2p_comm->c_local_group; - - /* find the rank in the communicator associated with this windows */ - for (j = 0 ; - j < ompi_group_size(comm_group) ; - ++j) { - if (P2P_MODULE(win)->p2p_sc_group->grp_proc_pointers[i] == - comm_group->grp_proc_pointers[j]) { - comm_rank = j; - break; - } - } - if (comm_rank == -1) { - ret = MPI_ERR_RMA_SYNC; - goto cleanup; - } + int comm_rank = P2P_MODULE(win)->p2p_sc_remote_ranks[i]; OPAL_THREAD_ADD32(&(P2P_MODULE(win)->p2p_num_pending_out), P2P_MODULE(win)->p2p_copy_num_pending_sendreqs[comm_rank]); @@ -281,7 +293,6 @@ ompi_osc_pt2pt_module_complete(ompi_win_t *win) ompi_osc_pt2pt_progress_long(P2P_MODULE(win)); } - cleanup: /* remove WIN_POSTED from our mode */ ompi_win_remove_mode(win, OMPI_WIN_ACCESS_EPOCH | OMPI_WIN_STARTED); diff --git a/ompi/mca/osc/rdma/osc_rdma.c b/ompi/mca/osc/rdma/osc_rdma.c index af58c6e689..c43d5118de 100644 --- a/ompi/mca/osc/rdma/osc_rdma.c +++ b/ompi/mca/osc/rdma/osc_rdma.c @@ -51,6 +51,8 @@ ompi_osc_rdma_module_free(ompi_win_t *win) OBJ_DESTRUCT(&(module->p2p_locks_pending)); + free(module->p2p_sc_remote_ranks); + free(module->p2p_sc_remote_active_ranks); assert(module->p2p_sc_group == NULL); assert(module->p2p_pw_group == NULL); free(module->p2p_fence_coll_counts); diff --git a/ompi/mca/osc/rdma/osc_rdma.h b/ompi/mca/osc/rdma/osc_rdma.h index 7887bfba4a..924aa38639 100644 --- a/ompi/mca/osc/rdma/osc_rdma.h +++ b/ompi/mca/osc/rdma/osc_rdma.h @@ -127,6 +127,8 @@ struct ompi_osc_rdma_module_t { struct ompi_group_t *p2p_pw_group; struct ompi_group_t *p2p_sc_group; + bool *p2p_sc_remote_active_ranks; + int *p2p_sc_remote_ranks; /* ********************* LOCK data ************************ */ int32_t p2p_lock_status; /* one of 0, MPI_LOCK_EXCLUSIVE, MPI_LOCK_SHARED */ diff --git a/ompi/mca/osc/rdma/osc_rdma_comm.c b/ompi/mca/osc/rdma/osc_rdma_comm.c index 98e6e5dfbb..e02665a62b 100644 --- a/ompi/mca/osc/rdma/osc_rdma_comm.c +++ b/ompi/mca/osc/rdma/osc_rdma_comm.c @@ -49,6 +49,11 @@ ompi_osc_rdma_module_accumulate(void *origin_addr, int origin_count, int ret; ompi_osc_rdma_sendreq_t *sendreq; + if ((OMPI_WIN_STARTED & ompi_win_get_mode(win)) && + (!P2P_MODULE(win)->p2p_sc_remote_active_ranks[target])) { + return MPI_ERR_RMA_SYNC; + } + if (OMPI_WIN_FENCE & ompi_win_get_mode(win)) { /* well, we're definitely in an access epoch now */ ompi_win_set_mode(win, OMPI_WIN_FENCE | OMPI_WIN_ACCESS_EPOCH | @@ -103,6 +108,11 @@ ompi_osc_rdma_module_get(void *origin_addr, int ret; ompi_osc_rdma_sendreq_t *sendreq; + if ((OMPI_WIN_STARTED & ompi_win_get_mode(win)) && + (!P2P_MODULE(win)->p2p_sc_remote_active_ranks[target])) { + return MPI_ERR_RMA_SYNC; + } + if (OMPI_WIN_FENCE & ompi_win_get_mode(win)) { /* well, we're definitely in an access epoch now */ ompi_win_set_mode(win, OMPI_WIN_FENCE | OMPI_WIN_ACCESS_EPOCH | @@ -161,6 +171,11 @@ ompi_osc_rdma_module_put(void *origin_addr, int origin_count, int ret; ompi_osc_rdma_sendreq_t *sendreq; + if ((OMPI_WIN_STARTED & ompi_win_get_mode(win)) && + (!P2P_MODULE(win)->p2p_sc_remote_active_ranks[target])) { + return MPI_ERR_RMA_SYNC; + } + if (OMPI_WIN_FENCE & ompi_win_get_mode(win)) { /* well, we're definitely in an access epoch now */ ompi_win_set_mode(win, OMPI_WIN_FENCE | OMPI_WIN_ACCESS_EPOCH | diff --git a/ompi/mca/osc/rdma/osc_rdma_component.c b/ompi/mca/osc/rdma/osc_rdma_component.c index a24bc6c4b4..a39e8eae54 100644 --- a/ompi/mca/osc/rdma/osc_rdma_component.c +++ b/ompi/mca/osc/rdma/osc_rdma_component.c @@ -325,7 +325,7 @@ ompi_osc_rdma_component_select(ompi_win_t *win, module->p2p_fence_coll_results = (short*)malloc(sizeof(short) * ompi_comm_size(module->p2p_comm)); - if (NULL == module->p2p_fence_coll_counts) { + if (NULL == module->p2p_fence_coll_results) { free(module->p2p_fence_coll_counts); free(module->p2p_copy_num_pending_sendreqs); OBJ_DESTRUCT(&module->p2p_copy_pending_sendreqs); @@ -336,7 +336,7 @@ ompi_osc_rdma_component_select(ompi_win_t *win, OBJ_DESTRUCT(&(module->p2p_acc_lock)); OBJ_DESTRUCT(&(module->p2p_lock)); free(module); - return ret; + return OMPI_ERROR; } /* figure out what sync method to use */ @@ -356,6 +356,39 @@ ompi_osc_rdma_component_select(ompi_win_t *win, /* pwsc data */ module->p2p_pw_group = NULL; module->p2p_sc_group = NULL; + module->p2p_sc_remote_active_ranks = + malloc(sizeof(bool) * ompi_comm_size(module->p2p_comm)); + if (NULL == module->p2p_sc_remote_active_ranks) { + free(module->p2p_fence_coll_results); + free(module->p2p_fence_coll_counts); + free(module->p2p_copy_num_pending_sendreqs); + OBJ_DESTRUCT(&module->p2p_copy_pending_sendreqs); + OBJ_DESTRUCT(&module->p2p_long_msgs); + free(module->p2p_num_pending_sendreqs); + OBJ_DESTRUCT(&module->p2p_pending_sendreqs); + ompi_comm_free(&comm); + OBJ_DESTRUCT(&(module->p2p_acc_lock)); + OBJ_DESTRUCT(&(module->p2p_lock)); + free(module); + return OMPI_ERROR; + } + module->p2p_sc_remote_ranks = + malloc(sizeof(int) * ompi_comm_size(module->p2p_comm)); + if (NULL == module->p2p_sc_remote_ranks) { + free(module->p2p_sc_remote_active_ranks); + free(module->p2p_fence_coll_results); + free(module->p2p_fence_coll_counts); + free(module->p2p_copy_num_pending_sendreqs); + OBJ_DESTRUCT(&module->p2p_copy_pending_sendreqs); + OBJ_DESTRUCT(&module->p2p_long_msgs); + free(module->p2p_num_pending_sendreqs); + OBJ_DESTRUCT(&module->p2p_pending_sendreqs); + ompi_comm_free(&comm); + OBJ_DESTRUCT(&(module->p2p_acc_lock)); + OBJ_DESTRUCT(&(module->p2p_lock)); + free(module); + return OMPI_ERROR; + } /* lock data */ module->p2p_lock_status = 0; diff --git a/ompi/mca/osc/rdma/osc_rdma_sync.c b/ompi/mca/osc/rdma/osc_rdma_sync.c index aa05f51e3e..dc2a773d50 100644 --- a/ompi/mca/osc/rdma/osc_rdma_sync.c +++ b/ompi/mca/osc/rdma/osc_rdma_sync.c @@ -224,6 +224,8 @@ ompi_osc_rdma_module_start(ompi_group_t *group, int assert, ompi_win_t *win) { + int i; + OBJ_RETAIN(group); /* BWB - do I need this? */ ompi_group_increment_proc_count(group); @@ -233,6 +235,37 @@ ompi_osc_rdma_module_start(ompi_group_t *group, P2P_MODULE(win)->p2p_sc_group = group; OPAL_THREAD_UNLOCK(&(P2P_MODULE(win)->p2p_lock)); + memset(P2P_MODULE(win)->p2p_sc_remote_active_ranks, 0, + sizeof(bool) * ompi_comm_size(P2P_MODULE(win)->p2p_comm)); + + /* for each process in the specified group, find it's rank in our + communicator, store those indexes, and set the true / false in + the active ranks table */ + for (i = 0 ; i < ompi_group_size(group) ; i++) { + int comm_rank = -1, j; + + /* no need to increment ref count - the communicator isn't + going anywhere while we're here */ + ompi_group_t *comm_group = P2P_MODULE(win)->p2p_comm->c_local_group; + + /* find the rank in the communicator associated with this windows */ + for (j = 0 ; + j < ompi_group_size(comm_group) ; + ++j) { + if (P2P_MODULE(win)->p2p_sc_group->grp_proc_pointers[i] == + comm_group->grp_proc_pointers[j]) { + comm_rank = j; + break; + } + } + if (comm_rank == -1) { + return MPI_ERR_RMA_SYNC; + } + + P2P_MODULE(win)->p2p_sc_remote_active_ranks[comm_rank] = true; + P2P_MODULE(win)->p2p_sc_remote_ranks[i] = comm_rank; + } + /* Set our mode to access w/ start */ ompi_win_remove_mode(win, OMPI_WIN_FENCE); ompi_win_append_mode(win, OMPI_WIN_ACCESS_EPOCH | OMPI_WIN_STARTED); @@ -264,25 +297,7 @@ ompi_osc_rdma_module_complete(ompi_win_t *win) /* for each process in group, send a control message with number of updates coming, then start all the requests */ for (i = 0 ; i < ompi_group_size(P2P_MODULE(win)->p2p_sc_group) ; ++i) { - int comm_rank = -1, j; - /* no need to increment ref count - the communicator isn't - going anywhere while we're here */ - ompi_group_t *comm_group = P2P_MODULE(win)->p2p_comm->c_local_group; - - /* find the rank in the communicator associated with this windows */ - for (j = 0 ; - j < ompi_group_size(comm_group) ; - ++j) { - if (P2P_MODULE(win)->p2p_sc_group->grp_proc_pointers[i] == - comm_group->grp_proc_pointers[j]) { - comm_rank = j; - break; - } - } - if (comm_rank == -1) { - ret = MPI_ERR_RMA_SYNC; - goto cleanup; - } + int comm_rank = P2P_MODULE(win)->p2p_sc_remote_ranks[i]; OPAL_THREAD_ADD32(&(P2P_MODULE(win)->p2p_num_pending_out), P2P_MODULE(win)->p2p_copy_num_pending_sendreqs[comm_rank]); @@ -316,7 +331,6 @@ ompi_osc_rdma_module_complete(ompi_win_t *win) ompi_osc_rdma_progress(P2P_MODULE(win)); } - cleanup: /* remove WIN_POSTED from our mode */ ompi_win_remove_mode(win, OMPI_WIN_ACCESS_EPOCH | OMPI_WIN_STARTED);