1
1

Rest of the fix for #325. It uses a bit more space, but now we can reasonably

tell if the remote proc should be in an exposure epoch or not.

Refs trac:325

This commit was SVN r11746.

The following Trac tickets were found above:
  Ticket 325 --> https://svn.open-mpi.org/trac/ompi/ticket/325
Этот коммит содержится в:
Brian Barrett 2006-09-21 20:49:15 +00:00
родитель 2ec0c4f593
Коммит 8fc278c3a3
10 изменённых файлов: 172 добавлений и 43 удалений

Просмотреть файл

@ -56,6 +56,8 @@ ompi_osc_pt2pt_module_free(ompi_win_t *win)
OBJ_DESTRUCT(&(module->p2p_locks_pending));
free(module->p2p_sc_remote_ranks);
free(module->p2p_sc_remote_active_ranks);
assert(module->p2p_sc_group == NULL);
assert(module->p2p_pw_group == NULL);
free(module->p2p_fence_coll_counts);

Просмотреть файл

@ -139,6 +139,8 @@ struct ompi_osc_pt2pt_module_t {
struct ompi_group_t *p2p_pw_group;
struct ompi_group_t *p2p_sc_group;
bool *p2p_sc_remote_active_ranks;
int *p2p_sc_remote_ranks;
/* ********************* LOCK data ************************ */
int32_t p2p_lock_status; /* one of 0, MPI_LOCK_EXCLUSIVE, MPI_LOCK_SHARED */

Просмотреть файл

@ -49,6 +49,11 @@ ompi_osc_pt2pt_module_accumulate(void *origin_addr, int origin_count,
int ret;
ompi_osc_pt2pt_sendreq_t *sendreq;
if ((OMPI_WIN_STARTED & ompi_win_get_mode(win)) &&
(!P2P_MODULE(win)->p2p_sc_remote_active_ranks[target])) {
return MPI_ERR_RMA_SYNC;
}
if (OMPI_WIN_FENCE & ompi_win_get_mode(win)) {
/* well, we're definitely in an access epoch now */
ompi_win_set_mode(win, OMPI_WIN_FENCE | OMPI_WIN_ACCESS_EPOCH |
@ -103,6 +108,11 @@ ompi_osc_pt2pt_module_get(void *origin_addr,
int ret;
ompi_osc_pt2pt_sendreq_t *sendreq;
if ((OMPI_WIN_STARTED & ompi_win_get_mode(win)) &&
(!P2P_MODULE(win)->p2p_sc_remote_active_ranks[target])) {
return MPI_ERR_RMA_SYNC;
}
if (OMPI_WIN_FENCE & ompi_win_get_mode(win)) {
/* well, we're definitely in an access epoch now */
ompi_win_set_mode(win, OMPI_WIN_FENCE | OMPI_WIN_ACCESS_EPOCH |
@ -143,6 +153,11 @@ ompi_osc_pt2pt_module_put(void *origin_addr, int origin_count,
int ret;
ompi_osc_pt2pt_sendreq_t *sendreq;
if ((OMPI_WIN_STARTED & ompi_win_get_mode(win)) &&
(!P2P_MODULE(win)->p2p_sc_remote_active_ranks[target])) {
return MPI_ERR_RMA_SYNC;
}
if (OMPI_WIN_FENCE & ompi_win_get_mode(win)) {
/* well, we're definitely in an access epoch now */
ompi_win_set_mode(win, OMPI_WIN_FENCE | OMPI_WIN_ACCESS_EPOCH |

Просмотреть файл

@ -323,7 +323,7 @@ ompi_osc_pt2pt_component_select(ompi_win_t *win,
module->p2p_fence_coll_results = (short*)malloc(sizeof(short) *
ompi_comm_size(module->p2p_comm));
if (NULL == module->p2p_fence_coll_counts) {
if (NULL == module->p2p_fence_coll_results) {
free(module->p2p_fence_coll_counts);
free(module->p2p_copy_num_pending_sendreqs);
OBJ_DESTRUCT(&module->p2p_copy_pending_sendreqs);
@ -340,6 +340,39 @@ ompi_osc_pt2pt_component_select(ompi_win_t *win,
/* pwsc data */
module->p2p_pw_group = NULL;
module->p2p_sc_group = NULL;
module->p2p_sc_remote_active_ranks =
malloc(sizeof(bool) * ompi_comm_size(module->p2p_comm));
if (NULL == module->p2p_sc_remote_active_ranks) {
free(module->p2p_fence_coll_results);
free(module->p2p_fence_coll_counts);
free(module->p2p_copy_num_pending_sendreqs);
OBJ_DESTRUCT(&module->p2p_copy_pending_sendreqs);
OBJ_DESTRUCT(&module->p2p_long_msgs);
free(module->p2p_num_pending_sendreqs);
OBJ_DESTRUCT(&module->p2p_pending_sendreqs);
ompi_comm_free(&comm);
OBJ_DESTRUCT(&(module->p2p_acc_lock));
OBJ_DESTRUCT(&(module->p2p_lock));
free(module);
return OMPI_ERROR;
}
module->p2p_sc_remote_ranks =
malloc(sizeof(int) * ompi_comm_size(module->p2p_comm));
if (NULL == module->p2p_sc_remote_ranks) {
free(module->p2p_sc_remote_active_ranks);
free(module->p2p_fence_coll_results);
free(module->p2p_fence_coll_counts);
free(module->p2p_copy_num_pending_sendreqs);
OBJ_DESTRUCT(&module->p2p_copy_pending_sendreqs);
OBJ_DESTRUCT(&module->p2p_long_msgs);
free(module->p2p_num_pending_sendreqs);
OBJ_DESTRUCT(&module->p2p_pending_sendreqs);
ompi_comm_free(&comm);
OBJ_DESTRUCT(&(module->p2p_acc_lock));
OBJ_DESTRUCT(&(module->p2p_lock));
free(module);
return OMPI_ERROR;
}
/* lock data */
module->p2p_lock_status = 0;

Просмотреть файл

@ -189,6 +189,8 @@ ompi_osc_pt2pt_module_start(ompi_group_t *group,
int assert,
ompi_win_t *win)
{
int i;
OBJ_RETAIN(group);
/* BWB - do I need this? */
ompi_group_increment_proc_count(group);
@ -198,6 +200,34 @@ ompi_osc_pt2pt_module_start(ompi_group_t *group,
P2P_MODULE(win)->p2p_sc_group = group;
OPAL_THREAD_UNLOCK(&(P2P_MODULE(win)->p2p_lock));
/* for each process in the specified group, find it's rank in our
communicator, store those indexes, and set the true / false in
the active ranks table */
for (i = 0 ; i < ompi_group_size(group) ; i++) {
int comm_rank = -1, j;
/* no need to increment ref count - the communicator isn't
going anywhere while we're here */
ompi_group_t *comm_group = P2P_MODULE(win)->p2p_comm->c_local_group;
/* find the rank in the communicator associated with this windows */
for (j = 0 ;
j < ompi_group_size(comm_group) ;
++j) {
if (P2P_MODULE(win)->p2p_sc_group->grp_proc_pointers[i] ==
comm_group->grp_proc_pointers[j]) {
comm_rank = j;
break;
}
}
if (comm_rank == -1) {
return MPI_ERR_RMA_SYNC;
}
P2P_MODULE(win)->p2p_sc_remote_active_ranks[comm_rank] = true;
P2P_MODULE(win)->p2p_sc_remote_ranks[i] = comm_rank;
}
/* Set our mode to access w/ start */
ompi_win_remove_mode(win, OMPI_WIN_FENCE);
ompi_win_append_mode(win, OMPI_WIN_ACCESS_EPOCH | OMPI_WIN_STARTED);
@ -229,25 +259,7 @@ ompi_osc_pt2pt_module_complete(ompi_win_t *win)
/* for each process in group, send a control message with number
of updates coming, then start all the requests */
for (i = 0 ; i < ompi_group_size(P2P_MODULE(win)->p2p_sc_group) ; ++i) {
int comm_rank = -1, j;
/* no need to increment ref count - the communicator isn't
going anywhere while we're here */
ompi_group_t *comm_group = P2P_MODULE(win)->p2p_comm->c_local_group;
/* find the rank in the communicator associated with this windows */
for (j = 0 ;
j < ompi_group_size(comm_group) ;
++j) {
if (P2P_MODULE(win)->p2p_sc_group->grp_proc_pointers[i] ==
comm_group->grp_proc_pointers[j]) {
comm_rank = j;
break;
}
}
if (comm_rank == -1) {
ret = MPI_ERR_RMA_SYNC;
goto cleanup;
}
int comm_rank = P2P_MODULE(win)->p2p_sc_remote_ranks[i];
OPAL_THREAD_ADD32(&(P2P_MODULE(win)->p2p_num_pending_out),
P2P_MODULE(win)->p2p_copy_num_pending_sendreqs[comm_rank]);
@ -281,7 +293,6 @@ ompi_osc_pt2pt_module_complete(ompi_win_t *win)
ompi_osc_pt2pt_progress_long(P2P_MODULE(win));
}
cleanup:
/* remove WIN_POSTED from our mode */
ompi_win_remove_mode(win, OMPI_WIN_ACCESS_EPOCH | OMPI_WIN_STARTED);

Просмотреть файл

@ -51,6 +51,8 @@ ompi_osc_rdma_module_free(ompi_win_t *win)
OBJ_DESTRUCT(&(module->p2p_locks_pending));
free(module->p2p_sc_remote_ranks);
free(module->p2p_sc_remote_active_ranks);
assert(module->p2p_sc_group == NULL);
assert(module->p2p_pw_group == NULL);
free(module->p2p_fence_coll_counts);

Просмотреть файл

@ -127,6 +127,8 @@ struct ompi_osc_rdma_module_t {
struct ompi_group_t *p2p_pw_group;
struct ompi_group_t *p2p_sc_group;
bool *p2p_sc_remote_active_ranks;
int *p2p_sc_remote_ranks;
/* ********************* LOCK data ************************ */
int32_t p2p_lock_status; /* one of 0, MPI_LOCK_EXCLUSIVE, MPI_LOCK_SHARED */

Просмотреть файл

@ -49,6 +49,11 @@ ompi_osc_rdma_module_accumulate(void *origin_addr, int origin_count,
int ret;
ompi_osc_rdma_sendreq_t *sendreq;
if ((OMPI_WIN_STARTED & ompi_win_get_mode(win)) &&
(!P2P_MODULE(win)->p2p_sc_remote_active_ranks[target])) {
return MPI_ERR_RMA_SYNC;
}
if (OMPI_WIN_FENCE & ompi_win_get_mode(win)) {
/* well, we're definitely in an access epoch now */
ompi_win_set_mode(win, OMPI_WIN_FENCE | OMPI_WIN_ACCESS_EPOCH |
@ -103,6 +108,11 @@ ompi_osc_rdma_module_get(void *origin_addr,
int ret;
ompi_osc_rdma_sendreq_t *sendreq;
if ((OMPI_WIN_STARTED & ompi_win_get_mode(win)) &&
(!P2P_MODULE(win)->p2p_sc_remote_active_ranks[target])) {
return MPI_ERR_RMA_SYNC;
}
if (OMPI_WIN_FENCE & ompi_win_get_mode(win)) {
/* well, we're definitely in an access epoch now */
ompi_win_set_mode(win, OMPI_WIN_FENCE | OMPI_WIN_ACCESS_EPOCH |
@ -161,6 +171,11 @@ ompi_osc_rdma_module_put(void *origin_addr, int origin_count,
int ret;
ompi_osc_rdma_sendreq_t *sendreq;
if ((OMPI_WIN_STARTED & ompi_win_get_mode(win)) &&
(!P2P_MODULE(win)->p2p_sc_remote_active_ranks[target])) {
return MPI_ERR_RMA_SYNC;
}
if (OMPI_WIN_FENCE & ompi_win_get_mode(win)) {
/* well, we're definitely in an access epoch now */
ompi_win_set_mode(win, OMPI_WIN_FENCE | OMPI_WIN_ACCESS_EPOCH |

Просмотреть файл

@ -325,7 +325,7 @@ ompi_osc_rdma_component_select(ompi_win_t *win,
module->p2p_fence_coll_results = (short*)malloc(sizeof(short) *
ompi_comm_size(module->p2p_comm));
if (NULL == module->p2p_fence_coll_counts) {
if (NULL == module->p2p_fence_coll_results) {
free(module->p2p_fence_coll_counts);
free(module->p2p_copy_num_pending_sendreqs);
OBJ_DESTRUCT(&module->p2p_copy_pending_sendreqs);
@ -336,7 +336,7 @@ ompi_osc_rdma_component_select(ompi_win_t *win,
OBJ_DESTRUCT(&(module->p2p_acc_lock));
OBJ_DESTRUCT(&(module->p2p_lock));
free(module);
return ret;
return OMPI_ERROR;
}
/* figure out what sync method to use */
@ -356,6 +356,39 @@ ompi_osc_rdma_component_select(ompi_win_t *win,
/* pwsc data */
module->p2p_pw_group = NULL;
module->p2p_sc_group = NULL;
module->p2p_sc_remote_active_ranks =
malloc(sizeof(bool) * ompi_comm_size(module->p2p_comm));
if (NULL == module->p2p_sc_remote_active_ranks) {
free(module->p2p_fence_coll_results);
free(module->p2p_fence_coll_counts);
free(module->p2p_copy_num_pending_sendreqs);
OBJ_DESTRUCT(&module->p2p_copy_pending_sendreqs);
OBJ_DESTRUCT(&module->p2p_long_msgs);
free(module->p2p_num_pending_sendreqs);
OBJ_DESTRUCT(&module->p2p_pending_sendreqs);
ompi_comm_free(&comm);
OBJ_DESTRUCT(&(module->p2p_acc_lock));
OBJ_DESTRUCT(&(module->p2p_lock));
free(module);
return OMPI_ERROR;
}
module->p2p_sc_remote_ranks =
malloc(sizeof(int) * ompi_comm_size(module->p2p_comm));
if (NULL == module->p2p_sc_remote_ranks) {
free(module->p2p_sc_remote_active_ranks);
free(module->p2p_fence_coll_results);
free(module->p2p_fence_coll_counts);
free(module->p2p_copy_num_pending_sendreqs);
OBJ_DESTRUCT(&module->p2p_copy_pending_sendreqs);
OBJ_DESTRUCT(&module->p2p_long_msgs);
free(module->p2p_num_pending_sendreqs);
OBJ_DESTRUCT(&module->p2p_pending_sendreqs);
ompi_comm_free(&comm);
OBJ_DESTRUCT(&(module->p2p_acc_lock));
OBJ_DESTRUCT(&(module->p2p_lock));
free(module);
return OMPI_ERROR;
}
/* lock data */
module->p2p_lock_status = 0;

Просмотреть файл

@ -224,6 +224,8 @@ ompi_osc_rdma_module_start(ompi_group_t *group,
int assert,
ompi_win_t *win)
{
int i;
OBJ_RETAIN(group);
/* BWB - do I need this? */
ompi_group_increment_proc_count(group);
@ -233,6 +235,37 @@ ompi_osc_rdma_module_start(ompi_group_t *group,
P2P_MODULE(win)->p2p_sc_group = group;
OPAL_THREAD_UNLOCK(&(P2P_MODULE(win)->p2p_lock));
memset(P2P_MODULE(win)->p2p_sc_remote_active_ranks, 0,
sizeof(bool) * ompi_comm_size(P2P_MODULE(win)->p2p_comm));
/* for each process in the specified group, find it's rank in our
communicator, store those indexes, and set the true / false in
the active ranks table */
for (i = 0 ; i < ompi_group_size(group) ; i++) {
int comm_rank = -1, j;
/* no need to increment ref count - the communicator isn't
going anywhere while we're here */
ompi_group_t *comm_group = P2P_MODULE(win)->p2p_comm->c_local_group;
/* find the rank in the communicator associated with this windows */
for (j = 0 ;
j < ompi_group_size(comm_group) ;
++j) {
if (P2P_MODULE(win)->p2p_sc_group->grp_proc_pointers[i] ==
comm_group->grp_proc_pointers[j]) {
comm_rank = j;
break;
}
}
if (comm_rank == -1) {
return MPI_ERR_RMA_SYNC;
}
P2P_MODULE(win)->p2p_sc_remote_active_ranks[comm_rank] = true;
P2P_MODULE(win)->p2p_sc_remote_ranks[i] = comm_rank;
}
/* Set our mode to access w/ start */
ompi_win_remove_mode(win, OMPI_WIN_FENCE);
ompi_win_append_mode(win, OMPI_WIN_ACCESS_EPOCH | OMPI_WIN_STARTED);
@ -264,25 +297,7 @@ ompi_osc_rdma_module_complete(ompi_win_t *win)
/* for each process in group, send a control message with number
of updates coming, then start all the requests */
for (i = 0 ; i < ompi_group_size(P2P_MODULE(win)->p2p_sc_group) ; ++i) {
int comm_rank = -1, j;
/* no need to increment ref count - the communicator isn't
going anywhere while we're here */
ompi_group_t *comm_group = P2P_MODULE(win)->p2p_comm->c_local_group;
/* find the rank in the communicator associated with this windows */
for (j = 0 ;
j < ompi_group_size(comm_group) ;
++j) {
if (P2P_MODULE(win)->p2p_sc_group->grp_proc_pointers[i] ==
comm_group->grp_proc_pointers[j]) {
comm_rank = j;
break;
}
}
if (comm_rank == -1) {
ret = MPI_ERR_RMA_SYNC;
goto cleanup;
}
int comm_rank = P2P_MODULE(win)->p2p_sc_remote_ranks[i];
OPAL_THREAD_ADD32(&(P2P_MODULE(win)->p2p_num_pending_out),
P2P_MODULE(win)->p2p_copy_num_pending_sendreqs[comm_rank]);
@ -316,7 +331,6 @@ ompi_osc_rdma_module_complete(ompi_win_t *win)
ompi_osc_rdma_progress(P2P_MODULE(win));
}
cleanup:
/* remove WIN_POSTED from our mode */
ompi_win_remove_mode(win, OMPI_WIN_ACCESS_EPOCH | OMPI_WIN_STARTED);