From b8ee05d352ebdf2644ba91b5f0d52109f098ff71 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Tue, 20 Oct 2015 15:19:51 -0600 Subject: [PATCH] osc/rdma: bug fixes This commit fixes several bugs in the osc/rdma component: - Complete aggregated requests immediately. Completion of RMA requests indicates local completion anyway. This fixes a hang in the c_reqops test. - Correctly mark Rget_accumulate requests. - Set the local base flag correctly on the local peer. - Clear or set the no locks flag on the window if the value is changed by MPI_Win_set_info. - Actually update the target when using MPI_OP_REPLACE. Fixes open-mpi/ompi#1010 Signed-off-by: Nathan Hjelm --- ompi/mca/osc/rdma/osc_rdma.h | 4 ++-- ompi/mca/osc/rdma/osc_rdma_accumulate.c | 7 +++++-- ompi/mca/osc/rdma/osc_rdma_comm.c | 9 ++------- ompi/mca/osc/rdma/osc_rdma_comm.h | 2 +- ompi/mca/osc/rdma/osc_rdma_component.c | 20 +++++++------------- ompi/mca/osc/rdma/osc_rdma_lock.h | 10 +++++----- ompi/mca/osc/rdma/osc_rdma_passive_target.c | 13 ++++++++++--- ompi/mca/osc/rdma/osc_rdma_types.h | 3 --- 8 files changed, 32 insertions(+), 36 deletions(-) diff --git a/ompi/mca/osc/rdma/osc_rdma.h b/ompi/mca/osc/rdma/osc_rdma.h index 6a8790e43f..f49687f531 100644 --- a/ompi/mca/osc/rdma/osc_rdma.h +++ b/ompi/mca/osc/rdma/osc_rdma.h @@ -342,7 +342,7 @@ static inline void _ompi_osc_rdma_deregister (ompi_osc_rdma_module_t *module, mc #define ompi_osc_rdma_deregister(...) _ompi_osc_rdma_deregister(__VA_ARGS__, __LINE__, __FILE__) static inline void ompi_osc_rdma_progress (ompi_osc_rdma_module_t *module) { - module->selected_btl->btl_component->btl_progress (); + opal_progress (); } /** @@ -475,7 +475,7 @@ static inline void ompi_osc_rdma_sync_rdma_complete (ompi_osc_rdma_sync_t *sync) } do { - module->selected_btl->btl_component->btl_progress (); + opal_progress (); } while (sync->outstanding_rdma); } diff --git a/ompi/mca/osc/rdma/osc_rdma_accumulate.c b/ompi/mca/osc/rdma/osc_rdma_accumulate.c index b2f998ea60..1b73cd23ae 100644 --- a/ompi/mca/osc/rdma/osc_rdma_accumulate.c +++ b/ompi/mca/osc/rdma/osc_rdma_accumulate.c @@ -146,6 +146,8 @@ static void ompi_osc_rdma_acc_get_complete (struct mca_btl_base_module_t *btl, s /* accumulate the data */ if (&ompi_mpi_op_replace.op != request->op) { ompi_op_reduce (request->op, request->origin_addr, (void *) source, request->origin_count, request->origin_dt); + } else { + memcpy ((void *) source, request->origin_addr, request->len); } /* initiate the put of the accumulated data */ @@ -214,7 +216,7 @@ static inline int ompi_osc_rdma_gacc_contig (ompi_osc_rdma_sync_t *sync, const v ompi_osc_rdma_sync_rdma_inc (sync); - if (&ompi_mpi_op_replace.op != op || result) { + if (&ompi_mpi_op_replace.op != op || OMPI_OSC_RDMA_TYPE_GET_ACC == request->type) { /* align the target address */ target_address = target_address & ~btl_alignment_mask; @@ -283,9 +285,10 @@ static inline int ompi_osc_rdma_gacc_master (ompi_osc_rdma_sync_t *sync, const v if (NULL == request) { OMPI_OSC_RDMA_REQUEST_ALLOC(module, peer, request); request->internal = true; - request->type = result_datatype ? OMPI_OSC_RDMA_TYPE_GET_ACC : OMPI_OSC_RDMA_TYPE_ACC; } + request->type = result_datatype ? OMPI_OSC_RDMA_TYPE_GET_ACC : OMPI_OSC_RDMA_TYPE_ACC; + if (source_datatype) { (void) ompi_datatype_get_extent (source_datatype, &lb, &extent); source_buffer = (void *)((intptr_t) source_buffer + lb); diff --git a/ompi/mca/osc/rdma/osc_rdma_comm.c b/ompi/mca/osc/rdma/osc_rdma_comm.c index 38feec464f..3fc1047915 100644 --- a/ompi/mca/osc/rdma/osc_rdma_comm.c +++ b/ompi/mca/osc/rdma/osc_rdma_comm.c @@ -365,12 +365,6 @@ static void ompi_osc_rdma_aggregate_put_complete (struct mca_btl_base_module_t * assert (OPAL_SUCCESS == status); ompi_osc_rdma_frag_complete (frag); - - OPAL_LIST_FOREACH_SAFE(request, next, &aggregation->requests, ompi_osc_rdma_request_t) { - opal_list_remove_item (&aggregation->requests, (opal_list_item_t *) request); - ompi_osc_rdma_request_complete (request, status); - } - ompi_osc_rdma_aggregation_return (aggregation); /* make sure the aggregation is returned before marking the operation as complete */ @@ -426,7 +420,8 @@ static void ompi_osc_rdma_aggregate_append (ompi_osc_rdma_aggregation_t *aggrega aggregation->buffer_used += size; if (request) { - opal_list_append (&aggregation->requests, (opal_list_item_t *) request); + /* the local buffer is now available */ + ompi_osc_rdma_request_complete (request, 0); } } diff --git a/ompi/mca/osc/rdma/osc_rdma_comm.h b/ompi/mca/osc/rdma/osc_rdma_comm.h index 8fb65b4226..b5fb5a6d98 100644 --- a/ompi/mca/osc/rdma/osc_rdma_comm.h +++ b/ompi/mca/osc/rdma/osc_rdma_comm.h @@ -77,7 +77,7 @@ static inline int osc_rdma_get_remote_segment (ompi_osc_rdma_module_t *module, o int disp_unit = (module->same_disp_unit) ? module->disp_unit : ex_peer->disp_unit; size_t size = (module->same_size) ? module->size : (size_t) ex_peer->size; - *remote_address = ex_peer->super.base +disp_unit * target_disp; + *remote_address = ex_peer->super.base + disp_unit * target_disp; if (OPAL_UNLIKELY(*remote_address + length > (ex_peer->super.base + size))) { OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output, "remote address range 0x%" PRIx64 " - 0x%" PRIx64 " is out of range. Valid address range is 0x%" PRIx64 " - 0x%" PRIx64 " (%" PRIu64 " bytes)", diff --git a/ompi/mca/osc/rdma/osc_rdma_component.c b/ompi/mca/osc/rdma/osc_rdma_component.c index 1f89629cb1..ff3986eb31 100644 --- a/ompi/mca/osc/rdma/osc_rdma_component.c +++ b/ompi/mca/osc/rdma/osc_rdma_component.c @@ -606,6 +606,10 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s ex_peer->size = temp[i].size; } + if (my_rank == peer_rank) { + peer->flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE; + } + if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) { if (temp[i].size) { ex_peer->super.base = (uint64_t) (uintptr_t) module->segment_base + offset; @@ -1172,6 +1176,7 @@ static int ompi_osc_rdma_set_info (struct ompi_win_t *win, struct ompi_info_t *i OBJ_CONSTRUCT(&module->outstanding_locks, opal_hash_table_t); module->no_locks = true; + win->w_flags |= OMPI_WIN_NO_LOCKS; } else if (!temp && module->no_locks) { int world_size = ompi_comm_size (module->comm); int init_limit = world_size > 256 ? 256 : world_size; @@ -1183,6 +1188,7 @@ static int ompi_osc_rdma_set_info (struct ompi_win_t *win, struct ompi_info_t *i } module->no_locks = false; + win->w_flags &= ~OMPI_WIN_NO_LOCKS; } /* enforce collectiveness... */ @@ -1204,16 +1210,4 @@ static int ompi_osc_rdma_get_info (struct ompi_win_t *win, struct ompi_info_t ** return OMPI_SUCCESS; } -static void ompi_osc_rdma_aggregation_construct (ompi_osc_rdma_aggregation_t *aggregation) -{ - OBJ_CONSTRUCT(&aggregation->requests, opal_list_t); - aggregation->buffer_used = 0; -} - -static void ompi_osc_rdma_aggregation_destruct (ompi_osc_rdma_aggregation_t *aggregation) -{ - OBJ_DESTRUCT(&aggregation->requests); -} - -OBJ_CLASS_INSTANCE(ompi_osc_rdma_aggregation_t, opal_list_item_t, ompi_osc_rdma_aggregation_construct, - ompi_osc_rdma_aggregation_destruct); +OBJ_CLASS_INSTANCE(ompi_osc_rdma_aggregation_t, opal_list_item_t, NULL, NULL); diff --git a/ompi/mca/osc/rdma/osc_rdma_lock.h b/ompi/mca/osc/rdma/osc_rdma_lock.h index c091d6e883..6e88a1211d 100644 --- a/ompi/mca/osc/rdma/osc_rdma_lock.h +++ b/ompi/mca/osc/rdma/osc_rdma_lock.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -212,8 +212,8 @@ static inline int ompi_osc_rdma_lock_try_acquire_exclusive (ompi_osc_rdma_module } } - OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "got %lx when attempting compare and swap %" PRIx64 " complete %d", - (unsigned long) *temp, lock, atomic_complete)); + OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "got %lx when attempting compare and swap %" PRIx64 " on %p", + (unsigned long) *temp, lock, (void *) peer)); result = (*temp != 0); ompi_osc_rdma_frag_complete (frag); @@ -289,8 +289,8 @@ static inline int ompi_osc_rdma_lock_release_exclusive (ompi_osc_rdma_module_t * } } - OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "unlocked target lock %" PRIx64 " with value 0x%lx. old value 0x%" - PRIx64, lock, (unsigned long) -OMPI_OSC_RDMA_LOCK_EXCLUSIVE, ((uint64_t *) temp)[0])); + OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "unlocked target lock on peer %p %" PRIx64 " using value 0x%lx" + PRIx64, (void *) peer, lock, (unsigned long) -OMPI_OSC_RDMA_LOCK_EXCLUSIVE)); } else { ompi_osc_rdma_unlock_local ((volatile ompi_osc_rdma_lock_t *)(intptr_t) lock); } diff --git a/ompi/mca/osc/rdma/osc_rdma_passive_target.c b/ompi/mca/osc/rdma/osc_rdma_passive_target.c index f11611580b..fa59fc916b 100644 --- a/ompi/mca/osc/rdma/osc_rdma_passive_target.c +++ b/ompi/mca/osc/rdma/osc_rdma_passive_target.c @@ -182,6 +182,11 @@ int ompi_osc_rdma_lock_atomic (int lock_type, int target, int assert, ompi_win_t ompi_osc_rdma_sync_t *lock; int ret = OMPI_SUCCESS; + if (module->no_locks) { + OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "osc/rdma: attemoted to lock with no_locks set")); + return OMPI_ERR_RMA_SYNC; + } + OPAL_OUTPUT_VERBOSE((60, ompi_osc_base_framework.framework_output, "osc rdma: lock %d %d", target, lock_type)); if (module->all_sync.epoch_active && (OMPI_OSC_RDMA_SYNC_TYPE_LOCK != module->all_sync.type || MPI_LOCK_EXCLUSIVE == lock_type)) { @@ -275,10 +280,12 @@ int ompi_osc_rdma_lock_all_atomic (int assert, struct ompi_win_t *win) ompi_osc_rdma_sync_t *lock; int ret = OMPI_SUCCESS; - OPAL_THREAD_LOCK(&module->lock); + if (module->no_locks) { + OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "osc/rdma: attemoted to lock with no_locks set")); + return OMPI_ERR_RMA_SYNC; + } - /* Check if no_locks is set. TODO: we also need to track whether we are in an - * active target epoch. Fence can make this tricky to track. */ + OPAL_THREAD_LOCK(&module->lock); if (module->all_sync.epoch_active) { OPAL_OUTPUT_VERBOSE((25, ompi_osc_base_framework.framework_output, "osc/rdma: attempted " "to lock all when active target epoch is %s and lock all epoch is %s", diff --git a/ompi/mca/osc/rdma/osc_rdma_types.h b/ompi/mca/osc/rdma/osc_rdma_types.h index 7948fbc7a3..a6ac2467a2 100644 --- a/ompi/mca/osc/rdma/osc_rdma_types.h +++ b/ompi/mca/osc/rdma/osc_rdma_types.h @@ -202,9 +202,6 @@ struct ompi_osc_rdma_aggregation_t { /** type */ int type; - - /** list of associated requests */ - opal_list_t requests; }; typedef struct ompi_osc_rdma_aggregation_t ompi_osc_rdma_aggregation_t;