1
1

osc/pt2pt: fix regression in pscw sync on 0 size groups

Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
Nathan Hjelm 2015-09-22 17:09:00 -06:00
родитель f6920aa916
Коммит ee5810813b
2 изменённых файлов: 24 добавлений и 28 удалений

Просмотреть файл

@ -114,11 +114,13 @@ static ompi_osc_pt2pt_peer_t **ompi_osc_pt2pt_get_peers (ompi_osc_pt2pt_module_t
static void ompi_osc_pt2pt_release_peers (ompi_osc_pt2pt_peer_t **peers, int npeers)
{
if (peers) {
for (int i = 0 ; i < npeers ; ++i) {
OBJ_RELEASE(peers[i]);
}
free (peers);
}
}
int ompi_osc_pt2pt_fence(int assert, ompi_win_t *win)
@ -228,20 +230,24 @@ int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win)
"ompi_osc_pt2pt_start entering with group size %d...",
sync->num_peers));
if (0 == ompi_group_size (group)) {
/* nothing more to do. this is an empty start epoch */
OPAL_THREAD_UNLOCK(&module->lock);
return OMPI_SUCCESS;
}
opal_atomic_wmb ();
sync->type = OMPI_OSC_PT2PT_SYNC_TYPE_PSCW;
/* prevent us from entering a passive-target, fence, or another pscw access epoch until
* the matching complete is called */
sync->epoch_active = true;
/* save the group */
OBJ_RETAIN(group);
if (0 == ompi_group_size (group)) {
/* nothing more to do. this is an empty start epoch */
sync->eager_send_active = true;
OPAL_THREAD_UNLOCK(&module->lock);
return OMPI_SUCCESS;
}
opal_atomic_wmb ();
/* translate the group ranks into the communicator */
sync->peer_list.peers = ompi_osc_pt2pt_get_peers (module, group);
if (NULL == sync->peer_list.peers) {
@ -249,10 +255,6 @@ int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win)
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* save the group */
OBJ_RETAIN(group);
ompi_group_increment_proc_count(group);
if (!(assert & MPI_MODE_NOCHECK)) {
OPAL_THREAD_LOCK(&sync->lock);
for (int i = 0 ; i < sync->num_peers ; ++i) {
@ -318,12 +320,6 @@ int ompi_osc_pt2pt_complete (ompi_win_t *win)
group_size = sync->num_peers;
peers = sync->peer_list.peers;
if (NULL == peers) {
/* empty peer list */
OPAL_THREAD_UNLOCK(&(module->lock));
OBJ_RELEASE(group);
return OMPI_SUCCESS;
}
OPAL_THREAD_UNLOCK(&module->lock);
@ -383,8 +379,10 @@ int ompi_osc_pt2pt_complete (ompi_win_t *win)
module->epoch_outgoing_frag_count[rank] = 0;
}
if (peers) {
/* release our reference to peers in this group */
ompi_osc_pt2pt_release_peers (peers, group_size);
}
if (OMPI_SUCCESS != ret) {
return ret;
@ -403,7 +401,6 @@ int ompi_osc_pt2pt_complete (ompi_win_t *win)
OPAL_THREAD_UNLOCK(&module->lock);
/* phase 2 cleanup group */
ompi_group_decrement_proc_count(group);
OBJ_RELEASE(group);
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
@ -439,7 +436,6 @@ int ompi_osc_pt2pt_post (ompi_group_t *group, int assert, ompi_win_t *win)
/* save the group */
OBJ_RETAIN(group);
ompi_group_increment_proc_count(group);
module->pw_group = group;
@ -523,7 +519,6 @@ int ompi_osc_pt2pt_wait (ompi_win_t *win)
module->pw_group = NULL;
OPAL_THREAD_UNLOCK(&module->lock);
ompi_group_decrement_proc_count(group);
OBJ_RELEASE(group);
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
@ -561,7 +556,6 @@ int ompi_osc_pt2pt_test (ompi_win_t *win, int *flag)
OPAL_THREAD_UNLOCK(&(module->lock));
ompi_group_decrement_proc_count(group);
OBJ_RELEASE(group);
return OMPI_SUCCESS;

Просмотреть файл

@ -173,6 +173,8 @@ static inline void ompi_osc_pt2pt_sync_reset (ompi_osc_pt2pt_sync_t *sync)
sync->type = OMPI_OSC_PT2PT_SYNC_TYPE_NONE;
sync->eager_send_active = 0;
sync->epoch_active = 0;
sync->peer_list.peers = NULL;
sync->sync.pscw.group = NULL;
}
#endif /* OMPI_OSC_PT2PT_SYNC_H */