From e4989714c2160a11b9bf72e568da914b8ca6aa81 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Mon, 25 Jun 2018 09:47:44 -0600 Subject: [PATCH] osc/rdma: fix data race on teardown The osc/rdma module did not wait for all pending atomics to complete before tearing down. This could lead to weird issues as the target location may no longer be registered or allocated. This commit also fixes an offset calculation issue in ompi_osc_get_data_blocking (). Signed-off-by: Nathan Hjelm --- ompi/mca/osc/rdma/osc_rdma.h | 3 +++ ompi/mca/osc/rdma/osc_rdma_active_target.c | 5 +++++ ompi/mca/osc/rdma/osc_rdma_comm.c | 2 +- ompi/mca/osc/rdma/osc_rdma_lock.h | 10 ++++++++++ ompi/mca/osc/rdma/osc_rdma_module.c | 4 ++++ ompi/mca/osc/rdma/osc_rdma_types.h | 1 + 6 files changed, 24 insertions(+), 1 deletion(-) diff --git a/ompi/mca/osc/rdma/osc_rdma.h b/ompi/mca/osc/rdma/osc_rdma.h index d8192f1afd..277be1a1e9 100644 --- a/ompi/mca/osc/rdma/osc_rdma.h +++ b/ompi/mca/osc/rdma/osc_rdma.h @@ -269,6 +269,9 @@ struct ompi_osc_rdma_module_t { /** number of time a get had to be retried */ unsigned long get_retry_count; + + /** outstanding atomic operations */ + volatile int32_t pending_ops; }; typedef struct ompi_osc_rdma_module_t ompi_osc_rdma_module_t; OMPI_MODULE_DECLSPEC extern ompi_osc_rdma_component_t mca_osc_rdma_component; diff --git a/ompi/mca/osc/rdma/osc_rdma_active_target.c b/ompi/mca/osc/rdma/osc_rdma_active_target.c index 9ccb91b0dd..f677394da0 100644 --- a/ompi/mca/osc/rdma/osc_rdma_active_target.c +++ b/ompi/mca/osc/rdma/osc_rdma_active_target.c @@ -56,6 +56,7 @@ static void ompi_osc_rdma_pending_op_construct (ompi_osc_rdma_pending_op_t *pend pending_op->op_result = NULL; pending_op->op_complete = false; pending_op->cbfunc = NULL; + pending_op->module = NULL; } static void ompi_osc_rdma_pending_op_destruct (ompi_osc_rdma_pending_op_t *pending_op) @@ -64,6 +65,10 @@ static void ompi_osc_rdma_pending_op_destruct (ompi_osc_rdma_pending_op_t *pendi ompi_osc_rdma_frag_complete (pending_op->op_frag); } + if (NULL != pending_op->module) { + (void) opal_atomic_fetch_add_32 (&pending_op->module->pending_ops, -1); + } + ompi_osc_rdma_pending_op_construct (pending_op); } diff --git a/ompi/mca/osc/rdma/osc_rdma_comm.c b/ompi/mca/osc/rdma/osc_rdma_comm.c index fda90e9122..4e3736d951 100644 --- a/ompi/mca/osc/rdma/osc_rdma_comm.c +++ b/ompi/mca/osc/rdma/osc_rdma_comm.c @@ -63,7 +63,7 @@ int ompi_osc_get_data_blocking (ompi_osc_rdma_module_t *module, struct mca_btl_b ompi_osc_rdma_frag_t *frag = NULL; volatile bool read_complete = false; size_t aligned_len, offset; - uint64_t aligned_addr = (source_address + btl_alignment_mask) & ~btl_alignment_mask; + uint64_t aligned_addr = source_address & ~btl_alignment_mask; char *ptr = data; int ret; diff --git a/ompi/mca/osc/rdma/osc_rdma_lock.h b/ompi/mca/osc/rdma/osc_rdma_lock.h index bbc56bc182..70f0990879 100644 --- a/ompi/mca/osc/rdma/osc_rdma_lock.h +++ b/ompi/mca/osc/rdma/osc_rdma_lock.h @@ -47,6 +47,10 @@ static inline int ompi_osc_rdma_btl_fop (ompi_osc_rdma_module_t *module, struct if (wait_for_completion) { OBJ_RETAIN(pending_op); + } else { + /* NTH: need to keep track of pending ops to avoid a potential teardown problem */ + pending_op->module = module; + (void) opal_atomic_fetch_add_32 (&module->pending_ops, 1); } pending_op->op_result = (void *) result; @@ -130,6 +134,12 @@ static inline int ompi_osc_rdma_btl_op (ompi_osc_rdma_module_t *module, struct m pending_op->cbcontext = cbcontext; } + if (!wait_for_completion) { + /* NTH: need to keep track of pending ops to avoid a potential teardown problem */ + pending_op->module = module; + (void) opal_atomic_fetch_add_32 (&module->pending_ops, 1); + } + /* spin until the btl has accepted the operation */ do { ret = module->selected_btl->btl_atomic_op (module->selected_btl, endpoint, (intptr_t) address, address_handle, diff --git a/ompi/mca/osc/rdma/osc_rdma_module.c b/ompi/mca/osc/rdma/osc_rdma_module.c index 358c284f5d..e7d04fb96f 100644 --- a/ompi/mca/osc/rdma/osc_rdma_module.c +++ b/ompi/mca/osc/rdma/osc_rdma_module.c @@ -51,6 +51,10 @@ int ompi_osc_rdma_free(ompi_win_t *win) return OMPI_SUCCESS; } + while (module->pending_ops) { + ompi_osc_rdma_progress (module); + } + if (NULL != module->comm) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "rdma component destroying window with id %d", diff --git a/ompi/mca/osc/rdma/osc_rdma_types.h b/ompi/mca/osc/rdma/osc_rdma_types.h index 81d04a55eb..790b8802cb 100644 --- a/ompi/mca/osc/rdma/osc_rdma_types.h +++ b/ompi/mca/osc/rdma/osc_rdma_types.h @@ -209,6 +209,7 @@ typedef void (*ompi_osc_rdma_pending_op_cb_fn_t) (void *, void *, int); struct ompi_osc_rdma_pending_op_t { opal_list_item_t super; + struct ompi_osc_rdma_module_t *module; struct ompi_osc_rdma_frag_t *op_frag; void *op_buffer; void *op_result;