From 56e31f8206516d8ebbc07dbf45edc75a633d6cf4 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Thu, 23 Aug 2018 15:40:21 -0600 Subject: [PATCH] osc/rdma: clean out stale aggregation code The aggregation code in osc/rdma is currently broken and will likely not be reused. This commit cleans it out. Signed-off-by: Nathan Hjelm --- ompi/mca/osc/rdma/osc_rdma.h | 26 ----- ompi/mca/osc/rdma/osc_rdma_comm.c | 145 ------------------------- ompi/mca/osc/rdma/osc_rdma_component.c | 79 -------------- ompi/mca/osc/rdma/osc_rdma_peer.h | 12 +- ompi/mca/osc/rdma/osc_rdma_sync.c | 2 - ompi/mca/osc/rdma/osc_rdma_sync.h | 3 - ompi/mca/osc/rdma/osc_rdma_types.h | 36 +----- 7 files changed, 2 insertions(+), 301 deletions(-) diff --git a/ompi/mca/osc/rdma/osc_rdma.h b/ompi/mca/osc/rdma/osc_rdma.h index 277be1a1e9..b3743f261e 100644 --- a/ompi/mca/osc/rdma/osc_rdma.h +++ b/ompi/mca/osc/rdma/osc_rdma.h @@ -77,9 +77,6 @@ struct ompi_osc_rdma_component_t { /** RDMA component buffer size */ unsigned int buffer_size; - /** aggregation limit */ - unsigned int aggregation_limit; - /** List of requests that need to be freed */ opal_list_t request_gc; @@ -104,9 +101,6 @@ struct ompi_osc_rdma_component_t { /** Priority of the osc/rdma component */ unsigned int priority; - /** aggregation free list */ - opal_free_list_t aggregate; - /** directory where to place backing files */ char *backing_directory; }; @@ -569,16 +563,6 @@ static inline void ompi_osc_rdma_sync_rdma_dec (ompi_osc_rdma_sync_t *rdma_sync) */ static inline void ompi_osc_rdma_sync_rdma_complete (ompi_osc_rdma_sync_t *sync) { - if (opal_list_get_size (&sync->aggregations)) { - ompi_osc_rdma_aggregation_t *aggregation, *next; - - OPAL_THREAD_SCOPED_LOCK(&sync->lock, - OPAL_LIST_FOREACH_SAFE(aggregation, next, &sync->aggregations, ompi_osc_rdma_aggregation_t) { - fprintf (stderr, "Flushing aggregation %p, peer %p\n", (void*)aggregation, (void*)aggregation->peer); - ompi_osc_rdma_peer_aggregate_flush (aggregation->peer); - }); - } - #if !defined(BTL_VERSION) || (BTL_VERSION < 310) do { opal_progress (); @@ -611,16 +595,6 @@ static inline bool ompi_osc_rdma_access_epoch_active (ompi_osc_rdma_module_t *mo return (module->all_sync.epoch_active || ompi_osc_rdma_in_passive_epoch (module)); } -static inline void ompi_osc_rdma_aggregation_return (ompi_osc_rdma_aggregation_t *aggregation) -{ - if (aggregation->sync) { - opal_list_remove_item (&aggregation->sync->aggregations, (opal_list_item_t *) aggregation); - } - - opal_free_list_return(&mca_osc_rdma_component.aggregate, (opal_free_list_item_t *) aggregation); -} - - __opal_attribute_always_inline__ static inline bool ompi_osc_rdma_oor (int rc) { diff --git a/ompi/mca/osc/rdma/osc_rdma_comm.c b/ompi/mca/osc/rdma/osc_rdma_comm.c index 4e3736d951..1c16676778 100644 --- a/ompi/mca/osc/rdma/osc_rdma_comm.c +++ b/ompi/mca/osc/rdma/osc_rdma_comm.c @@ -434,28 +434,6 @@ static void ompi_osc_rdma_put_complete_flush (struct mca_btl_base_module_t *btl, } } -static void ompi_osc_rdma_aggregate_put_complete (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, - void *local_address, mca_btl_base_registration_handle_t *local_handle, - void *context, void *data, int status) -{ - ompi_osc_rdma_aggregation_t *aggregation = (ompi_osc_rdma_aggregation_t *) context; - ompi_osc_rdma_sync_t *sync = aggregation->sync; - ompi_osc_rdma_frag_t *frag = aggregation->frag; - - assert (OPAL_SUCCESS == status); - - OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "aggregate put complete %p on sync %p. local address %p. status %d", - (void *) aggregation, (void *) sync, local_address, status); - - ompi_osc_rdma_frag_complete (frag); - ompi_osc_rdma_aggregation_return (aggregation); - - /* make sure the aggregation is returned before marking the operation as complete */ - opal_atomic_wmb (); - - ompi_osc_rdma_sync_rdma_dec (sync); -} - static int ompi_osc_rdma_put_real (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_peer_t *peer, uint64_t target_address, mca_btl_base_registration_handle_t *target_handle, void *ptr, mca_btl_base_registration_handle_t *local_handle, size_t size, @@ -492,75 +470,11 @@ static int ompi_osc_rdma_put_real (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_pee return ret; } -#if 0 -static void ompi_osc_rdma_aggregate_append (ompi_osc_rdma_aggregation_t *aggregation, ompi_osc_rdma_request_t *request, - void *source_buffer, size_t size) -{ - size_t offset = aggregation->buffer_used; - - OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "appending %lu bytes of data from %p to aggregate fragment %p with start " - "address 0x%lx", (unsigned long) size, source_buffer, (void *) aggregation, - (unsigned long) aggregation->target_address); - - memcpy (aggregation->buffer + offset, source_buffer, size); - - aggregation->buffer_used += size; - - if (request) { - /* the local buffer is now available */ - ompi_osc_rdma_request_complete (request, 0); - } -} - -static int ompi_osc_rdma_aggregate_alloc (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_peer_t *peer, uint64_t target_address, - mca_btl_base_registration_handle_t *target_handle, void *source_buffer, size_t size, - ompi_osc_rdma_request_t *request, int type) -{ - ompi_osc_rdma_module_t *module = sync->module; - ompi_osc_rdma_aggregation_t *aggregation; - int ret; - - aggregation = (ompi_osc_rdma_aggregation_t *) opal_free_list_get (&mca_osc_rdma_component.aggregate); - if (OPAL_UNLIKELY(NULL == aggregation)) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - - ret = ompi_osc_rdma_frag_alloc (module, mca_osc_rdma_component.aggregation_limit, &aggregation->frag, - &aggregation->buffer); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - opal_free_list_return(&mca_osc_rdma_component.aggregate, (opal_free_list_item_t *) aggregation); - return ret; - } - - OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "allocated new aggregate fragment %p for target %d", (void *) aggregation, - peer->rank); - - peer->aggregate = aggregation; - - aggregation->target_address = target_address; - aggregation->target_handle = target_handle; - aggregation->buffer_size = mca_osc_rdma_component.aggregation_limit; - aggregation->sync = sync; - aggregation->peer = peer; - aggregation->type = type; - aggregation->buffer_used = 0; - - ompi_osc_rdma_aggregate_append (aggregation, request, source_buffer, size); - - opal_list_append (&sync->aggregations, (opal_list_item_t *) aggregation); - - return OMPI_SUCCESS; -} -#endif - int ompi_osc_rdma_put_contig (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_peer_t *peer, uint64_t target_address, mca_btl_base_registration_handle_t *target_handle, void *source_buffer, size_t size, ompi_osc_rdma_request_t *request) { ompi_osc_rdma_module_t *module = sync->module; -#if 0 - ompi_osc_rdma_aggregation_t *aggregation = peer->aggregate; -#endif mca_btl_base_registration_handle_t *local_handle = NULL; mca_btl_base_rdma_completion_fn_t cbfunc = NULL; ompi_osc_rdma_frag_t *frag = NULL; @@ -568,34 +482,6 @@ int ompi_osc_rdma_put_contig (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_peer_t * void *cbcontext; int ret; -#if 0 - if (aggregation) { - if (size <= (aggregation->buffer_size - aggregation->buffer_used) && (target_handle == aggregation->target_handle) && - (target_address == aggregation->target_address + aggregation->buffer_used)) { - assert (OMPI_OSC_RDMA_TYPE_PUT == aggregation->type); - ompi_osc_rdma_aggregate_append (aggregation, request, source_buffer, size); - return OMPI_SUCCESS; - } - - /* can't aggregate this operation. flush the previous segment */ - ret = ompi_osc_rdma_peer_aggregate_flush (peer); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - return ret; - } - } - - if (size <= (mca_osc_rdma_component.aggregation_limit >> 2)) { - ret = ompi_osc_rdma_aggregate_alloc (sync, peer, target_address, target_handle, source_buffer, size, request, - OMPI_OSC_RDMA_TYPE_PUT); - if (OPAL_LIKELY(OMPI_SUCCESS == ret)) { - if (request) { - - } - return ret; - } - } -#endif - if (module->selected_btl->btl_register_mem && size > module->selected_btl->btl_put_local_registration_threshold) { ret = ompi_osc_rdma_frag_alloc (module, size, &frag, &ptr); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { @@ -680,37 +566,6 @@ static void ompi_osc_rdma_get_complete (struct mca_btl_base_module_t *btl, struc ompi_osc_rdma_request_complete (request, status); } -int ompi_osc_rdma_peer_aggregate_flush (ompi_osc_rdma_peer_t *peer) -{ - ompi_osc_rdma_aggregation_t *aggregation = peer->aggregate; - int ret; - - if (NULL == aggregation) { - return OMPI_SUCCESS; - } - - OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "flusing aggregate fragment %p", (void *) aggregation); - - assert (OMPI_OSC_RDMA_TYPE_PUT == aggregation->type); - - ret = ompi_osc_rdma_put_real (aggregation->sync, peer, aggregation->target_address, aggregation->target_handle, - aggregation->buffer, aggregation->frag->handle, aggregation->buffer_used, - ompi_osc_rdma_aggregate_put_complete, (void *) aggregation, NULL); - - peer->aggregate = NULL; - - if (OPAL_UNLIKELY(OMPI_SUCCESS == ret)) { - return OMPI_SUCCESS; - } - - ompi_osc_rdma_cleanup_rdma (aggregation->sync, false, aggregation->frag, NULL, NULL); - - ompi_osc_rdma_aggregation_return (aggregation); - - return ret; - -} - static int ompi_osc_rdma_get_partial (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_peer_t *peer, uint64_t source_address, mca_btl_base_registration_handle_t *source_handle, void *target_buffer, size_t size, ompi_osc_rdma_request_t *request) { diff --git a/ompi/mca/osc/rdma/osc_rdma_component.c b/ompi/mca/osc/rdma/osc_rdma_component.c index bf6c1a84bb..ef0fad453c 100644 --- a/ompi/mca/osc/rdma/osc_rdma_component.c +++ b/ompi/mca/osc/rdma/osc_rdma_component.c @@ -223,16 +223,6 @@ static int ompi_osc_rdma_component_register (void) MCA_BASE_VAR_SCOPE_GROUP, &mca_osc_rdma_component.max_attach); free(description_str); - mca_osc_rdma_component.aggregation_limit = 1024; - asprintf(&description_str, "Maximum size of an aggregated put/get. Messages are aggregated for consecutive" - "put and get operations. In some cases this may lead to higher latency but " - "should also lead to higher bandwidth utilization. Set to 0 to disable (default: %d)", - mca_osc_rdma_component.aggregation_limit); - (void) mca_base_component_var_register (&mca_osc_rdma_component.super.osc_version, "aggregation_limit", - description_str, MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0, OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_GROUP, &mca_osc_rdma_component.aggregation_limit); - free(description_str); - mca_osc_rdma_component.priority = 101; asprintf(&description_str, "Priority of the osc/rdma component (default: %d)", mca_osc_rdma_component.priority); @@ -336,24 +326,6 @@ static int ompi_osc_rdma_component_init (bool enable_progress_threads, __FILE__, __LINE__, ret); } - OBJ_CONSTRUCT(&mca_osc_rdma_component.aggregate, opal_free_list_t); - - if (!enable_mpi_threads && mca_osc_rdma_component.aggregation_limit) { - ret = opal_free_list_init (&mca_osc_rdma_component.aggregate, - sizeof(ompi_osc_rdma_aggregation_t), 8, - OBJ_CLASS(ompi_osc_rdma_aggregation_t), 0, 0, - 32, 128, 32, NULL, 0, NULL, NULL, NULL); - - if (OPAL_SUCCESS != ret) { - opal_output_verbose(1, ompi_osc_base_framework.framework_output, - "%s:%d: opal_free_list_init failed: %d\n", - __FILE__, __LINE__, ret); - } - } else { - /* only enable put aggregation when not using threads */ - mca_osc_rdma_component.aggregation_limit = 0; - } - return ret; } @@ -373,7 +345,6 @@ int ompi_osc_rdma_component_finalize (void) OBJ_DESTRUCT(&mca_osc_rdma_component.requests); OBJ_DESTRUCT(&mca_osc_rdma_component.request_gc); OBJ_DESTRUCT(&mca_osc_rdma_component.buffer_gc); - OBJ_DESTRUCT(&mca_osc_rdma_component.aggregate); return OMPI_SUCCESS; } @@ -1355,53 +1326,3 @@ static char* ompi_osc_rdma_set_no_lock_info(opal_infosubscriber_t *obj, char *ke */ return module->no_locks ? "true" : "false"; } - -#if 0 // stale code? -static int ompi_osc_rdma_set_info (struct ompi_win_t *win, struct opal_info_t *info) -{ - ompi_osc_rdma_module_t *module = GET_MODULE(win); - bool temp; - - temp = check_config_value_bool ("no_locks", info); - if (temp && !module->no_locks) { - /* clean up the lock hash. it is up to the user to ensure no lock is - * outstanding from this process when setting the info key */ - OBJ_DESTRUCT(&module->outstanding_locks); - OBJ_CONSTRUCT(&module->outstanding_locks, opal_hash_table_t); - - module->no_locks = true; - win->w_flags |= OMPI_WIN_NO_LOCKS; - } else if (!temp && module->no_locks) { - int world_size = ompi_comm_size (module->comm); - int init_limit = world_size > 256 ? 256 : world_size; - int ret; - - ret = opal_hash_table_init (&module->outstanding_locks, init_limit); - if (OPAL_SUCCESS != ret) { - return ret; - } - - module->no_locks = false; - win->w_flags &= ~OMPI_WIN_NO_LOCKS; - } - - /* enforce collectiveness... */ - return module->comm->c_coll->coll_barrier(module->comm, - module->comm->c_coll->coll_barrier_module); -} - - -static int ompi_osc_rdma_get_info (struct ompi_win_t *win, struct opal_info_t **info_used) -{ - opal_info_t *info = OBJ_NEW(opal_info_t); - - if (NULL == info) { - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - - *info_used = info; - - return OMPI_SUCCESS; -} -#endif -OBJ_CLASS_INSTANCE(ompi_osc_rdma_aggregation_t, opal_list_item_t, NULL, NULL); diff --git a/ompi/mca/osc/rdma/osc_rdma_peer.h b/ompi/mca/osc/rdma/osc_rdma_peer.h index 0e46ec6dfc..a0db4c4a7f 100644 --- a/ompi/mca/osc/rdma/osc_rdma_peer.h +++ b/ompi/mca/osc/rdma/osc_rdma_peer.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -44,9 +44,6 @@ struct ompi_osc_rdma_peer_t { /** peer flags */ volatile int32_t flags; - - /** aggregation support */ - ompi_osc_rdma_aggregation_t *aggregate; }; typedef struct ompi_osc_rdma_peer_t ompi_osc_rdma_peer_t; @@ -164,13 +161,6 @@ int ompi_osc_rdma_new_peer (struct ompi_osc_rdma_module_t *module, int peer_id, */ struct ompi_osc_rdma_peer_t *ompi_osc_rdma_peer_lookup (struct ompi_osc_rdma_module_t *module, int peer_id); -/** - * @brief flush queued aggregated operation - * - * @param[in] peer osc rdma peer - */ -int ompi_osc_rdma_peer_aggregate_flush (ompi_osc_rdma_peer_t *peer); - /** * @brief lookup the btl endpoint for a peer * diff --git a/ompi/mca/osc/rdma/osc_rdma_sync.c b/ompi/mca/osc/rdma/osc_rdma_sync.c index f07ea4f783..49aae970ad 100644 --- a/ompi/mca/osc/rdma/osc_rdma_sync.c +++ b/ompi/mca/osc/rdma/osc_rdma_sync.c @@ -17,14 +17,12 @@ static void ompi_osc_rdma_sync_constructor (ompi_osc_rdma_sync_t *rdma_sync) rdma_sync->type = OMPI_OSC_RDMA_SYNC_TYPE_NONE; rdma_sync->epoch_active = false; rdma_sync->outstanding_rdma.counter = 0; - OBJ_CONSTRUCT(&rdma_sync->aggregations, opal_list_t); OBJ_CONSTRUCT(&rdma_sync->lock, opal_mutex_t); OBJ_CONSTRUCT(&rdma_sync->demand_locked_peers, opal_list_t); } static void ompi_osc_rdma_sync_destructor (ompi_osc_rdma_sync_t *rdma_sync) { - OBJ_DESTRUCT(&rdma_sync->aggregations); OBJ_DESTRUCT(&rdma_sync->lock); OBJ_DESTRUCT(&rdma_sync->demand_locked_peers); } diff --git a/ompi/mca/osc/rdma/osc_rdma_sync.h b/ompi/mca/osc/rdma/osc_rdma_sync.h index e33b32d437..202bf79265 100644 --- a/ompi/mca/osc/rdma/osc_rdma_sync.h +++ b/ompi/mca/osc/rdma/osc_rdma_sync.h @@ -97,9 +97,6 @@ struct ompi_osc_rdma_sync_t { /** outstanding rdma operations on epoch */ ompi_osc_rdma_sync_aligned_counter_t outstanding_rdma __opal_attribute_aligned__(64); - /** aggregated operations in this epoch */ - opal_list_t aggregations; - /** lock to protect sync structure members */ opal_mutex_t lock; }; diff --git a/ompi/mca/osc/rdma/osc_rdma_types.h b/ompi/mca/osc/rdma/osc_rdma_types.h index 790b8802cb..4acb40154d 100644 --- a/ompi/mca/osc/rdma/osc_rdma_types.h +++ b/ompi/mca/osc/rdma/osc_rdma_types.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -171,40 +171,6 @@ struct ompi_osc_rdma_state_t { }; typedef struct ompi_osc_rdma_state_t ompi_osc_rdma_state_t; -struct ompi_osc_rdma_aggregation_t { - opal_list_item_t super; - - /** associated peer */ - struct ompi_osc_rdma_peer_t *peer; - - /** aggregation buffer frag */ - struct ompi_osc_rdma_frag_t *frag; - - /** synchronization object */ - struct ompi_osc_rdma_sync_t *sync; - - /** aggregation buffer */ - char *buffer; - - /** target for the operation */ - osc_rdma_base_t target_address; - - /** handle for target memory address */ - mca_btl_base_registration_handle_t *target_handle; - - /** buffer size */ - size_t buffer_size; - - /** buffer used */ - size_t buffer_used; - - /** type */ - int type; -}; -typedef struct ompi_osc_rdma_aggregation_t ompi_osc_rdma_aggregation_t; - -OBJ_CLASS_DECLARATION(ompi_osc_rdma_aggregation_t); - typedef void (*ompi_osc_rdma_pending_op_cb_fn_t) (void *, void *, int); struct ompi_osc_rdma_pending_op_t {