1
1

osc/rdma: ensure eager sends are active before checking for sync errors

in self optimization

This addresses an issue found with the MPICH pscw_ordering test. Eager sends
were not yet active (which is ok for the standard path) but not ok for the
self optimization. Fixed by waiting for all post messages before checking
the sync state.

Fixes trac:4724

Tracking the 1.8.2 issue in this CMR.

cmr=v1.8.2:reviewer=bbenton

This commit was SVN r32012.

The following Trac tickets were found above:
  Ticket 4724 --> https://svn.open-mpi.org/trac/ompi/ticket/4724
Этот коммит содержится в:
Nathan Hjelm 2014-06-17 04:53:47 +00:00
родитель 37ae430424
Коммит 2f96f16416

Просмотреть файл

@ -82,6 +82,15 @@ static inline int ompi_osc_rdma_put_self (void *source, int source_count, ompi_d
((unsigned long) target_disp * module->disp_unit); ((unsigned long) target_disp * module->disp_unit);
int ret; int ret;
/* if we are in active target mode wait until all post messages arrive */
if (module->sc_group && !module->active_eager_send_active) {
OPAL_THREAD_LOCK(&module->lock);
while (0 != module->num_post_msgs) {
opal_condition_wait(&module->cond, &module->lock);
}
OPAL_THREAD_UNLOCK(&module->lock);
}
if (!(module->passive_target_access_epoch || module->active_eager_send_active)) { if (!(module->passive_target_access_epoch || module->active_eager_send_active)) {
return OMPI_ERR_RMA_SYNC; return OMPI_ERR_RMA_SYNC;
} }
@ -107,6 +116,15 @@ static inline int ompi_osc_rdma_get_self (void *target, int target_count, ompi_d
((unsigned long) source_disp * module->disp_unit); ((unsigned long) source_disp * module->disp_unit);
int ret; int ret;
/* if we are in active target mode wait until all post messages arrive */
if (module->sc_group && !module->active_eager_send_active) {
OPAL_THREAD_LOCK(&module->lock);
while (0 != module->num_post_msgs) {
opal_condition_wait(&module->cond, &module->lock);
}
OPAL_THREAD_UNLOCK(&module->lock);
}
if (!(module->passive_target_access_epoch || module->active_eager_send_active)) { if (!(module->passive_target_access_epoch || module->active_eager_send_active)) {
return OMPI_ERR_RMA_SYNC; return OMPI_ERR_RMA_SYNC;
} }
@ -130,6 +148,15 @@ static inline int ompi_osc_rdma_cas_self (void *source, void *compare, void *res
void *target = (unsigned char*) module->baseptr + void *target = (unsigned char*) module->baseptr +
((unsigned long) target_disp * module->disp_unit); ((unsigned long) target_disp * module->disp_unit);
/* if we are in active target mode wait until all post messages arrive */
if (module->sc_group && !module->active_eager_send_active) {
OPAL_THREAD_LOCK(&module->lock);
while (0 != module->num_post_msgs) {
opal_condition_wait(&module->cond, &module->lock);
}
OPAL_THREAD_UNLOCK(&module->lock);
}
if (!(module->passive_target_access_epoch || module->active_eager_send_active)) { if (!(module->passive_target_access_epoch || module->active_eager_send_active)) {
return OMPI_ERR_RMA_SYNC; return OMPI_ERR_RMA_SYNC;
} }
@ -155,6 +182,15 @@ static inline int ompi_osc_rdma_acc_self (void *source, int source_count, ompi_d
((unsigned long) target_disp * module->disp_unit); ((unsigned long) target_disp * module->disp_unit);
int ret; int ret;
/* if we are in active target mode wait until all post messages arrive */
if (module->sc_group && !module->active_eager_send_active) {
OPAL_THREAD_LOCK(&module->lock);
while (0 != module->num_post_msgs) {
opal_condition_wait(&module->cond, &module->lock);
}
OPAL_THREAD_UNLOCK(&module->lock);
}
if (!(module->passive_target_access_epoch || module->active_eager_send_active)) { if (!(module->passive_target_access_epoch || module->active_eager_send_active)) {
return OMPI_ERR_RMA_SYNC; return OMPI_ERR_RMA_SYNC;
} }
@ -191,6 +227,15 @@ static inline int ompi_osc_rdma_gacc_self (void *source, int source_count, ompi_
((unsigned long) target_disp * module->disp_unit); ((unsigned long) target_disp * module->disp_unit);
int ret; int ret;
/* if we are in active target mode wait until all post messages arrive */
if (module->sc_group && !module->active_eager_send_active) {
OPAL_THREAD_LOCK(&module->lock);
while (0 != module->num_post_msgs) {
opal_condition_wait(&module->cond, &module->lock);
}
OPAL_THREAD_UNLOCK(&module->lock);
}
if (!(module->passive_target_access_epoch || module->active_eager_send_active)) { if (!(module->passive_target_access_epoch || module->active_eager_send_active)) {
return OMPI_ERR_RMA_SYNC; return OMPI_ERR_RMA_SYNC;
} }