osc/rdma: check for more types of window access violations
This commit adds a check to see if the target is in an access epoch. If not we return OMPI_ERR_RMA_SYNC. This fixes test_start3 in the onesided test suite. The cost of this extra check is 1 byte/peer for the boolean flag indicating that the peer is in an access epoch. I also fixed a problem where mupliple unexpected post messages are not correctly handled. cmr=v1.8.2:reviewer=jsquyres This commit was SVN r32160.
Этот коммит содержится в:
родитель
d63cf04d2e
Коммит
b6abe68972
@ -88,6 +88,7 @@ struct ompi_osc_rdma_peer_t {
|
||||
/** Number of acks pending. New requests can not be sent out if there are
|
||||
* acks pending (to fulfill the ordering constraints of accumulate) */
|
||||
uint32_t num_acks_pending;
|
||||
bool access_epoch;
|
||||
};
|
||||
typedef struct ompi_osc_rdma_peer_t ompi_osc_rdma_peer_t;
|
||||
|
||||
@ -166,6 +167,9 @@ struct ompi_osc_rdma_module_t {
|
||||
/** start sending data eagerly */
|
||||
bool active_eager_send_active;
|
||||
|
||||
/** Indicates the window is in an all access epoch (fence, lock_all) */
|
||||
bool all_access_epoch;
|
||||
|
||||
bool *passive_eager_send_active;
|
||||
|
||||
/* ********************* PWSC data ************************ */
|
||||
@ -690,6 +694,11 @@ static inline void ompi_osc_rdma_accumulate_unlock (ompi_osc_rdma_module_t *modu
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool ompi_osc_rdma_check_access_epoch (ompi_osc_rdma_module_t *module, int rank)
|
||||
{
|
||||
return module->all_access_epoch || module->peers[rank].access_epoch;
|
||||
}
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* OMPI_OSC_RDMA_H */
|
||||
|
@ -117,6 +117,7 @@ ompi_osc_rdma_fence(int assert, ompi_win_t *win)
|
||||
/* active sends are now active (we will close the epoch if NOSUCCEED is specified) */
|
||||
if (0 == (assert & MPI_MODE_NOSUCCEED)) {
|
||||
module->active_eager_send_active = true;
|
||||
module->all_access_epoch = true;
|
||||
}
|
||||
|
||||
/* short-circuit the noprecede case */
|
||||
@ -166,7 +167,8 @@ ompi_osc_rdma_fence(int assert, ompi_win_t *win)
|
||||
/* as specified in MPI-3 p 438 3-5 the fence can end an epoch. it isn't explicitly
|
||||
* stated that MPI_MODE_NOSUCCEED ends the epoch but it is a safe assumption. */
|
||||
module->active_eager_send_active = false;
|
||||
}
|
||||
module->all_access_epoch = false;
|
||||
}
|
||||
|
||||
cleanup:
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
|
||||
@ -185,13 +187,14 @@ ompi_osc_rdma_start(ompi_group_t *group,
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_pending_post_t *pending_post, *next;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_start entering..."));
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
int group_size;
|
||||
int *ranks;
|
||||
|
||||
OPAL_THREAD_LOCK(&module->lock);
|
||||
|
||||
/* ensure we're not already in a start */
|
||||
/* ensure we're not already in a start or passive target. we can no check for all
|
||||
* access here due to fence */
|
||||
if (NULL != module->sc_group || module->passive_target_access_epoch) {
|
||||
OPAL_THREAD_UNLOCK(&module->lock);
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
@ -203,14 +206,32 @@ ompi_osc_rdma_start(ompi_group_t *group,
|
||||
|
||||
module->sc_group = group;
|
||||
|
||||
/* mark all procs in this group as being in an access epoch */
|
||||
group_size = ompi_group_size (module->sc_group);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_start entering with group size %d...",
|
||||
group_size));
|
||||
|
||||
ranks = get_comm_ranks(module, module->sc_group);
|
||||
if (NULL == ranks) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||
|
||||
for (int i = 0 ; i < group_size ; ++i) {
|
||||
/* when the post comes in we will be in an access epoch with this proc */
|
||||
module->peers[ranks[i]].access_epoch = true;
|
||||
}
|
||||
|
||||
free (ranks);
|
||||
|
||||
OPAL_LIST_FOREACH_SAFE(pending_post, next, &module->pending_posts, ompi_osc_rdma_pending_post_t) {
|
||||
ompi_proc_t *pending_proc = ompi_comm_peer_lookup (module->comm, pending_post->rank);
|
||||
|
||||
if (group_contains_proc (module->sc_group, pending_proc)) {
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "Consumed unexpected post message from %d",
|
||||
pending_post->rank));
|
||||
++module->num_post_msgs;
|
||||
opal_list_remove_item (&module->pending_posts, &pending_post->super);
|
||||
OBJ_RELEASE(pending_post);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -219,13 +240,13 @@ ompi_osc_rdma_start(ompi_group_t *group,
|
||||
receive messages. */
|
||||
module->active_eager_send_active = false;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"num_post_msgs = %d", module->num_post_msgs));
|
||||
|
||||
/* possible we've already received a couple in messages, so
|
||||
add however many we're going to wait for */
|
||||
module->num_post_msgs -= ompi_group_size(module->sc_group);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"num_post_msgs = %d", module->num_post_msgs));
|
||||
|
||||
/* if we've already received all the post messages, we can eager
|
||||
send. Otherwise, eager send will be enabled when
|
||||
numb_post_messages reaches 0 */
|
||||
@ -246,10 +267,12 @@ ompi_osc_rdma_complete(ompi_win_t *win)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_header_complete_t complete_req;
|
||||
ompi_osc_rdma_peer_t *peer;
|
||||
int ret = OMPI_SUCCESS;
|
||||
int i;
|
||||
int *ranks = NULL;
|
||||
ompi_group_t *group;
|
||||
int my_rank = ompi_comm_rank (module->comm);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_complete entering..."));
|
||||
@ -282,10 +305,21 @@ ompi_osc_rdma_complete(ompi_win_t *win)
|
||||
round. */
|
||||
OPAL_THREAD_UNLOCK(&module->lock);
|
||||
for (i = 0 ; i < ompi_group_size(module->sc_group) ; ++i) {
|
||||
if (my_rank == ranks[i]) {
|
||||
/* shortcut for self */
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "ompi_osc_rdma_complete self complete"));
|
||||
module->num_complete_msgs++;
|
||||
continue;
|
||||
}
|
||||
|
||||
complete_req.base.type = OMPI_OSC_RDMA_HDR_TYPE_COMPLETE;
|
||||
complete_req.base.flags = OMPI_OSC_RDMA_HDR_FLAG_VALID;
|
||||
complete_req.frag_count = module->epoch_outgoing_frag_count[ranks[i]];
|
||||
|
||||
peer = module->peers + ranks[i];
|
||||
|
||||
peer->access_epoch = false;
|
||||
|
||||
ret = ompi_osc_rdma_control_send(module,
|
||||
ranks[i],
|
||||
&complete_req,
|
||||
@ -344,14 +378,17 @@ ompi_osc_rdma_post(ompi_group_t *group,
|
||||
int ret = OMPI_SUCCESS;
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_header_post_t post_req;
|
||||
int my_rank = ompi_comm_rank(module->comm);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_post entering..."));
|
||||
|
||||
/* can't check for all access epoch here due to fence */
|
||||
if (module->pw_group) {
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_post entering with group size %d...",
|
||||
ompi_group_size (group)));
|
||||
|
||||
/* save the group */
|
||||
OBJ_RETAIN(group);
|
||||
ompi_group_increment_proc_count(group);
|
||||
@ -382,6 +419,15 @@ ompi_osc_rdma_post(ompi_group_t *group,
|
||||
|
||||
/* send a hello counter to everyone in group */
|
||||
for (int i = 0 ; i < ompi_group_size(module->pw_group) ; ++i) {
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "Sending post message to rank %d", ranks[i]));
|
||||
|
||||
/* shortcut for self */
|
||||
if (my_rank == ranks[i]) {
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "ompi_osc_rdma_complete self post"));
|
||||
osc_rdma_incoming_post (module, my_rank);
|
||||
continue;
|
||||
}
|
||||
|
||||
post_req.base.type = OMPI_OSC_RDMA_HDR_TYPE_POST;
|
||||
post_req.base.flags = OMPI_OSC_RDMA_HDR_FLAG_VALID;
|
||||
post_req.windx = ompi_comm_get_cid(module->comm);
|
||||
@ -407,16 +453,19 @@ ompi_osc_rdma_wait(ompi_win_t *win)
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_group_t *group;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_wait entering..."));
|
||||
|
||||
if (NULL == module->pw_group) {
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_wait entering..."));
|
||||
|
||||
OPAL_THREAD_LOCK(&module->lock);
|
||||
while (0 != module->num_complete_msgs ||
|
||||
module->active_incoming_frag_count < module->active_incoming_frag_signal_count) {
|
||||
OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output,
|
||||
"num_complete_msgs = %d, active_incoming_frag_count = %d, active_incoming_frag_signal_count = %d",
|
||||
module->num_complete_msgs, module->active_incoming_frag_count, module->active_incoming_frag_signal_count));
|
||||
opal_condition_wait(&module->cond, &module->lock);
|
||||
}
|
||||
|
||||
@ -487,6 +536,10 @@ int osc_rdma_incoming_post (ompi_osc_rdma_module_t *module, int source)
|
||||
if (!module->sc_group || !group_contains_proc (module->sc_group, source_proc)) {
|
||||
ompi_osc_rdma_pending_post_t *pending_post = OBJ_NEW(ompi_osc_rdma_pending_post_t);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"received unexpected post message from %d. module->sc_group = %p, size = %d",
|
||||
source, module->sc_group, module->sc_group ? ompi_group_size (module->sc_group) : 0));
|
||||
|
||||
pending_post->rank = source;
|
||||
|
||||
opal_list_append (&module->pending_posts, &pending_post->super);
|
||||
|
@ -301,6 +301,10 @@ static inline int ompi_osc_rdma_put_w_req (void *origin_addr, int origin_count,
|
||||
origin_dt->name, target, (int) target_disp,
|
||||
target_count, target_dt->name, win->w_name));
|
||||
|
||||
if (!ompi_osc_rdma_check_access_epoch (module, target)) {
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
||||
/* short-circuit case */
|
||||
if (0 == origin_count || 0 == target_count) {
|
||||
if (request) {
|
||||
@ -473,6 +477,10 @@ ompi_osc_rdma_accumulate_w_req (void *origin_addr, int origin_count,
|
||||
target_count, target_dt->name, op->o_name,
|
||||
win->w_name));
|
||||
|
||||
if (!ompi_osc_rdma_check_access_epoch (module, target)) {
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
||||
/* short-circuit case */
|
||||
if (0 == origin_count || 0 == target_count) {
|
||||
if (request) {
|
||||
@ -643,6 +651,10 @@ int ompi_osc_rdma_compare_and_swap (void *origin_addr, void *compare_addr,
|
||||
(unsigned long) result_addr, dt->name, target, (int) target_disp,
|
||||
win->w_name));
|
||||
|
||||
if (!ompi_osc_rdma_check_access_epoch (module, target)) {
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
||||
/* optimize self case. TODO: optimize local case */
|
||||
if (ompi_comm_rank (module->comm) == target) {
|
||||
return ompi_osc_rdma_cas_self (origin_addr, compare_addr, result_addr, dt, target_disp,
|
||||
@ -788,6 +800,10 @@ static inline int ompi_osc_rdma_rget_internal (void *origin_addr, int origin_cou
|
||||
origin_dt->name, target, (int) target_disp,
|
||||
target_count, target_dt->name, win->w_name));
|
||||
|
||||
if (!ompi_osc_rdma_check_access_epoch (module, target)) {
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
||||
/* gets are always request based, so that we know where to land the data */
|
||||
OMPI_OSC_RDMA_REQUEST_ALLOC(win, rdma_request);
|
||||
if (NULL == rdma_request) {
|
||||
@ -997,6 +1013,10 @@ int ompi_osc_rdma_rget_accumulate_internal (void *origin_addr, int origin_count,
|
||||
target_rank, (int) target_disp, target_count, target_datatype->name,
|
||||
op->o_name, win->w_name));
|
||||
|
||||
if (!ompi_osc_rdma_check_access_epoch (module, target_rank)) {
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
||||
/* get_accumulates are always request based, so that we know where to land the data */
|
||||
OMPI_OSC_RDMA_REQUEST_ALLOC(win, rdma_request);
|
||||
if (OPAL_UNLIKELY(NULL == rdma_request)) {
|
||||
|
@ -185,11 +185,12 @@ int ompi_osc_rdma_lock(int lock_type, int target, int assert, ompi_win_t *win)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_outstanding_lock_t *lock;
|
||||
ompi_osc_rdma_peer_t *peer = module->peers + target;
|
||||
int ret = OMPI_SUCCESS;
|
||||
|
||||
/* Check if no_locks is set. TODO: we also need to track whether we are in an
|
||||
* active target epoch. Fence can make this tricky to track. */
|
||||
if (NULL == module->passive_eager_send_active) {
|
||||
if (NULL == module->passive_eager_send_active || module->sc_group) {
|
||||
return OMPI_ERR_RMA_SYNC;
|
||||
}
|
||||
|
||||
@ -203,6 +204,9 @@ int ompi_osc_rdma_lock(int lock_type, int target, int assert, ompi_win_t *win)
|
||||
module->passive_eager_send_active[target] = false;
|
||||
module->passive_target_access_epoch = true;
|
||||
|
||||
/* when the lock ack returns we will be in an access epoch with this peer */
|
||||
peer->access_epoch = true;
|
||||
|
||||
/* create lock item */
|
||||
lock = OBJ_NEW(ompi_osc_rdma_outstanding_lock_t);
|
||||
if (OPAL_UNLIKELY(NULL == lock)) {
|
||||
@ -249,6 +253,7 @@ int ompi_osc_rdma_unlock(int target, ompi_win_t *win)
|
||||
{
|
||||
ompi_osc_rdma_module_t *module = GET_MODULE(win);
|
||||
ompi_osc_rdma_outstanding_lock_t *lock = NULL;
|
||||
ompi_osc_rdma_peer_t *peer = module->peers + target;
|
||||
int ret = OMPI_SUCCESS;
|
||||
|
||||
OPAL_THREAD_LOCK(&module->lock);
|
||||
@ -299,6 +304,8 @@ int ompi_osc_rdma_unlock(int target, ompi_win_t *win)
|
||||
module->epoch_outgoing_frag_count[target] = 0;
|
||||
module->passive_target_access_epoch = false;
|
||||
|
||||
peer->access_epoch = false;
|
||||
|
||||
/* delete the lock */
|
||||
opal_list_remove_item (&module->outstanding_locks, &lock->super);
|
||||
OBJ_RELEASE(lock);
|
||||
@ -328,6 +335,7 @@ int ompi_osc_rdma_lock_all(int assert, struct ompi_win_t *win)
|
||||
module->passive_eager_send_active[i] = false;
|
||||
}
|
||||
module->passive_target_access_epoch = true;
|
||||
module->all_access_epoch = true;
|
||||
|
||||
/* create lock item */
|
||||
lock = OBJ_NEW(ompi_osc_rdma_outstanding_lock_t);
|
||||
@ -434,6 +442,7 @@ int ompi_osc_rdma_unlock_all (struct ompi_win_t *win)
|
||||
OBJ_RELEASE(lock);
|
||||
|
||||
module->passive_target_access_epoch = false;
|
||||
module->all_access_epoch = false;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
|
||||
"ompi_osc_rdma_unlock_all complete"));
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user