From 69a80fce9f0eb551e982d19c1cf8668851c55b95 Mon Sep 17 00:00:00 2001 From: Xin Zhao Date: Wed, 13 Mar 2019 04:39:26 +0200 Subject: [PATCH] ompi/oshmem/spml/ucx: use lockfree array to optimize spml_ucx_progress/delete oshmem_barrier in shmem_ctx_destroy ompi/oshmem/spml/ucx: optimize spml ucx progress Signed-off-by: Tomislav Janjusic (cherry picked from commit 9c3d00b144641d2929f830279dcc9d163c38e9e1) --- opal/mca/common/ucx/common_ucx.c | 1 + oshmem/mca/spml/ucx/spml_ucx.c | 109 ++++++++++++++--------- oshmem/mca/spml/ucx/spml_ucx.h | 22 +++-- oshmem/mca/spml/ucx/spml_ucx_component.c | 93 ++++++++++--------- 4 files changed, 130 insertions(+), 95 deletions(-) diff --git a/opal/mca/common/ucx/common_ucx.c b/opal/mca/common/ucx/common_ucx.c index 086c2dd6d9..7ccde24bc6 100644 --- a/opal/mca/common/ucx/common_ucx.c +++ b/opal/mca/common/ucx/common_ucx.c @@ -151,6 +151,7 @@ void opal_common_ucx_mca_proc_added(void) } } #endif +} OPAL_DECLSPEC int opal_common_ucx_mca_pmix_fence_nb(int *fenced) { diff --git a/oshmem/mca/spml/ucx/spml_ucx.c b/oshmem/mca/spml/ucx/spml_ucx.c index eb1f50da27..0522ba0966 100644 --- a/oshmem/mca/spml/ucx/spml_ucx.c +++ b/oshmem/mca/spml/ucx/spml_ucx.c @@ -80,8 +80,6 @@ mca_spml_ucx_t mca_spml_ucx = { .get_mkey_slow = NULL }; -OBJ_CLASS_INSTANCE(mca_spml_ucx_ctx_list_item_t, opal_list_item_t, NULL, NULL); - mca_spml_ucx_ctx_t mca_spml_ucx_ctx_default = { .ucp_worker = NULL, .ucp_peers = NULL, @@ -246,7 +244,7 @@ int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs) goto error; } - opal_progress_register(spml_ucx_progress); + opal_progress_register(spml_ucx_default_progress); mca_spml_ucx.remote_addrs_tbl = (char **)calloc(nprocs, sizeof(char *)); memset(mca_spml_ucx.remote_addrs_tbl, 0, nprocs * sizeof(char *)); @@ -514,9 +512,45 @@ int mca_spml_ucx_deregister(sshmem_mkey_t *mkeys) return OSHMEM_SUCCESS; } +static inline void _ctx_add(mca_spml_ucx_ctx_array_t *array, mca_spml_ucx_ctx_t *ctx) +{ + int i; + + if (array->ctxs_count < array->ctxs_num) { + array->ctxs[array->ctxs_count] = ctx; + } else { + array->ctxs = realloc(array->ctxs, (array->ctxs_num + MCA_SPML_UCX_CTXS_ARRAY_INC) * sizeof(mca_spml_ucx_ctx_t *)); + opal_atomic_wmb (); + for (i = array->ctxs_num; i < array->ctxs_num + MCA_SPML_UCX_CTXS_ARRAY_INC; i++) { + array->ctxs[i] = NULL; + } + array->ctxs[array->ctxs_num] = ctx; + array->ctxs_num += MCA_SPML_UCX_CTXS_ARRAY_INC; + } + + opal_atomic_wmb (); + array->ctxs_count++; +} + +static inline void _ctx_remove(mca_spml_ucx_ctx_array_t *array, mca_spml_ucx_ctx_t *ctx) +{ + int i; + + for (i = 0; i < array->ctxs_count; i++) { + if (array->ctxs[i] == ctx) { + array->ctxs[i] = array->ctxs[array->ctxs_count-1]; + array->ctxs[array->ctxs_count-1] = NULL; + break; + } + } + + array->ctxs_count--; + opal_atomic_wmb (); +} + int mca_spml_ucx_ctx_create(long options, shmem_ctx_t *ctx) { - mca_spml_ucx_ctx_list_item_t *ctx_item; + mca_spml_ucx_ctx_t *ucx_ctx; ucp_worker_params_t params; ucp_ep_params_t ep_params; size_t i, j, nprocs = oshmem_num_procs(); @@ -527,8 +561,8 @@ int mca_spml_ucx_ctx_create(long options, shmem_ctx_t *ctx) sshmem_mkey_t *mkey; int rc = OSHMEM_ERROR; - ctx_item = OBJ_NEW(mca_spml_ucx_ctx_list_item_t); - ctx_item->ctx.options = options; + ucx_ctx = malloc(sizeof(mca_spml_ucx_ctx_t)); + ucx_ctx->options = options; params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; if (oshmem_mpi_thread_provided == SHMEM_THREAD_SINGLE || options & SHMEM_CTX_PRIVATE || options & SHMEM_CTX_SERIALIZED) { @@ -538,22 +572,26 @@ int mca_spml_ucx_ctx_create(long options, shmem_ctx_t *ctx) } err = ucp_worker_create(mca_spml_ucx.ucp_context, ¶ms, - &ctx_item->ctx.ucp_worker); + &ucx_ctx->ucp_worker); if (UCS_OK != err) { - OBJ_RELEASE(ctx_item); + free(ucx_ctx); return OSHMEM_ERROR; } - ctx_item->ctx.ucp_peers = (ucp_peer_t *) calloc(nprocs, sizeof(*(ctx_item->ctx.ucp_peers))); - if (NULL == ctx_item->ctx.ucp_peers) { + ucx_ctx->ucp_peers = (ucp_peer_t *) calloc(nprocs, sizeof(*(ucx_ctx->ucp_peers))); + if (NULL == ucx_ctx->ucp_peers) { goto error; } + if (mca_spml_ucx.active_array.ctxs_count == 0) { + opal_progress_register(spml_ucx_ctx_progress); + } + for (i = 0; i < nprocs; i++) { ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; ep_params.address = (ucp_address_t *)(mca_spml_ucx.remote_addrs_tbl[i]); - err = ucp_ep_create(ctx_item->ctx.ucp_worker, &ep_params, - &ctx_item->ctx.ucp_peers[i].ucp_conn); + err = ucp_ep_create(ucx_ctx->ucp_worker, &ep_params, + &ucx_ctx->ucp_peers[i].ucp_conn); if (UCS_OK != err) { SPML_ERROR("ucp_ep_create(proc=%d/%d) failed: %s", i, nprocs, ucs_status_string(err)); @@ -562,41 +600,38 @@ int mca_spml_ucx_ctx_create(long options, shmem_ctx_t *ctx) for (j = 0; j < MCA_MEMHEAP_SEG_COUNT; j++) { mkey = &memheap_map->mem_segs[j].mkeys_cache[i][0]; - ucx_mkey = &ctx_item->ctx.ucp_peers[i].mkeys[j].key; - err = ucp_ep_rkey_unpack(ctx_item->ctx.ucp_peers[i].ucp_conn, + ucx_mkey = &ucx_ctx->ucp_peers[i].mkeys[j].key; + err = ucp_ep_rkey_unpack(ucx_ctx->ucp_peers[i].ucp_conn, mkey->u.data, &ucx_mkey->rkey); if (UCS_OK != err) { SPML_UCX_ERROR("failed to unpack rkey"); goto error2; } - mca_spml_ucx_cache_mkey(&ctx_item->ctx, mkey, j, i); + mca_spml_ucx_cache_mkey(ucx_ctx, mkey, j, i); } } SHMEM_MUTEX_LOCK(mca_spml_ucx.internal_mutex); - - opal_list_append(&(mca_spml_ucx.ctx_list), &ctx_item->super); - + _ctx_add(&mca_spml_ucx.active_array, ucx_ctx); SHMEM_MUTEX_UNLOCK(mca_spml_ucx.internal_mutex); - (*ctx) = (shmem_ctx_t)(&ctx_item->ctx); - + (*ctx) = (shmem_ctx_t)ucx_ctx; return OSHMEM_SUCCESS; error2: for (i = 0; i < nprocs; i++) { - if (ctx_item->ctx.ucp_peers[i].ucp_conn) { - ucp_ep_destroy(ctx_item->ctx.ucp_peers[i].ucp_conn); + if (ucx_ctx->ucp_peers[i].ucp_conn) { + ucp_ep_destroy(ucx_ctx->ucp_peers[i].ucp_conn); } } - if (ctx_item->ctx.ucp_peers) - free(ctx_item->ctx.ucp_peers); + if (ucx_ctx->ucp_peers) + free(ucx_ctx->ucp_peers); error: - ucp_worker_destroy(ctx_item->ctx.ucp_worker); - OBJ_RELEASE(ctx_item); + ucp_worker_destroy(ucx_ctx->ucp_worker); + free(ucx_ctx); rc = OSHMEM_ERR_OUT_OF_RESOURCE; SPML_ERROR("ctx create FAILED rc=%d", rc); return rc; @@ -604,26 +639,16 @@ int mca_spml_ucx_ctx_create(long options, shmem_ctx_t *ctx) void mca_spml_ucx_ctx_destroy(shmem_ctx_t ctx) { - mca_spml_ucx_ctx_list_item_t *ctx_item, *next; - size_t i, j, nprocs = oshmem_num_procs(); - MCA_SPML_CALL(quiet(ctx)); - oshmem_shmem_barrier(); - SHMEM_MUTEX_LOCK(mca_spml_ucx.internal_mutex); - - /* delete context object from list */ - OPAL_LIST_FOREACH_SAFE(ctx_item, next, &(mca_spml_ucx.ctx_list), - mca_spml_ucx_ctx_list_item_t) { - if ((shmem_ctx_t)(&ctx_item->ctx) == ctx) { - opal_list_remove_item(&(mca_spml_ucx.ctx_list), &ctx_item->super); - opal_list_append(&(mca_spml_ucx.idle_ctx_list), &ctx_item->super); - break; - } - } - + _ctx_remove(&mca_spml_ucx.active_array, (mca_spml_ucx_ctx_t *)ctx); + _ctx_add(&mca_spml_ucx.idle_array, (mca_spml_ucx_ctx_t *)ctx); SHMEM_MUTEX_UNLOCK(mca_spml_ucx.internal_mutex); + + if (!mca_spml_ucx.active_array.ctxs_count) { + opal_progress_unregister(spml_ucx_ctx_progress); + } } int mca_spml_ucx_get(shmem_ctx_t ctx, void *src_addr, size_t size, void *dst_addr, int src) diff --git a/oshmem/mca/spml/ucx/spml_ucx.h b/oshmem/mca/spml/ucx/spml_ucx.h index cf9c50c030..6c2424ba76 100644 --- a/oshmem/mca/spml/ucx/spml_ucx.h +++ b/oshmem/mca/spml/ucx/spml_ucx.h @@ -75,14 +75,14 @@ typedef struct mca_spml_ucx_ctx mca_spml_ucx_ctx_t; extern mca_spml_ucx_ctx_t mca_spml_ucx_ctx_default; -struct mca_spml_ucx_ctx_list_item { - opal_list_item_t super; - mca_spml_ucx_ctx_t ctx; -}; -typedef struct mca_spml_ucx_ctx_list_item mca_spml_ucx_ctx_list_item_t; - typedef spml_ucx_mkey_t * (*mca_spml_ucx_get_mkey_slow_fn_t)(shmem_ctx_t ctx, int pe, void *va, void **rva); +typedef struct mca_spml_ucx_ctx_array { + int ctxs_count; + int ctxs_num; + mca_spml_ucx_ctx_t **ctxs; +} mca_spml_ucx_ctx_array_t; + struct mca_spml_ucx { mca_spml_base_module_t super; ucp_context_h ucp_context; @@ -91,8 +91,8 @@ struct mca_spml_ucx { bool enabled; mca_spml_ucx_get_mkey_slow_fn_t get_mkey_slow; char **remote_addrs_tbl; - opal_list_t ctx_list; - opal_list_t idle_ctx_list; + mca_spml_ucx_ctx_array_t active_array; + mca_spml_ucx_ctx_array_t idle_array; int priority; /* component priority */ shmem_internal_mutex_t internal_mutex; }; @@ -152,7 +152,8 @@ extern int mca_spml_ucx_add_procs(ompi_proc_t** procs, size_t nprocs); extern int mca_spml_ucx_del_procs(ompi_proc_t** procs, size_t nprocs); extern int mca_spml_ucx_fence(shmem_ctx_t ctx); extern int mca_spml_ucx_quiet(shmem_ctx_t ctx); -extern int spml_ucx_progress(void); +extern int spml_ucx_default_progress(void); +extern int spml_ucx_ctx_progress(void); static void mca_spml_ucx_cache_mkey(mca_spml_ucx_ctx_t *ucx_ctx, sshmem_mkey_t *mkey, uint32_t segno, int dst_pe) { @@ -193,6 +194,9 @@ static inline int ucx_status_to_oshmem_nb(ucs_status_t status) #endif } +#define MCA_SPML_UCX_CTXS_ARRAY_SIZE 64 +#define MCA_SPML_UCX_CTXS_ARRAY_INC 64 + END_C_DECLS #endif diff --git a/oshmem/mca/spml/ucx/spml_ucx_component.c b/oshmem/mca/spml/ucx/spml_ucx_component.c index 009a00dbf8..720dbf88f6 100644 --- a/oshmem/mca/spml/ucx/spml_ucx_component.c +++ b/oshmem/mca/spml/ucx/spml_ucx_component.c @@ -109,16 +109,18 @@ static int mca_spml_ucx_component_register(void) return OSHMEM_SUCCESS; } -int spml_ucx_progress(void) +int spml_ucx_ctx_progress(void) { - mca_spml_ucx_ctx_list_item_t *ctx_item, *next; - ucp_worker_progress(mca_spml_ucx_ctx_default.ucp_worker); - SHMEM_MUTEX_LOCK(mca_spml_ucx.internal_mutex); - OPAL_LIST_FOREACH_SAFE(ctx_item, next, &(mca_spml_ucx.ctx_list), - mca_spml_ucx_ctx_list_item_t) { - ucp_worker_progress(ctx_item->ctx.ucp_worker); + int i; + for (i = 0; i < mca_spml_ucx.active_array.ctxs_count; i++) { + ucp_worker_progress(mca_spml_ucx.active_array.ctxs[i]->ucp_worker); } - SHMEM_MUTEX_UNLOCK(mca_spml_ucx.internal_mutex); + return 1; +} + +int spml_ucx_default_progress(void) +{ + ucp_worker_progress(mca_spml_ucx_ctx_default.ucp_worker); return 1; } @@ -175,8 +177,13 @@ static int spml_ucx_init(void) oshmem_mpi_thread_provided = SHMEM_THREAD_SINGLE; } - OBJ_CONSTRUCT(&(mca_spml_ucx.ctx_list), opal_list_t); - OBJ_CONSTRUCT(&(mca_spml_ucx.idle_ctx_list), opal_list_t); + mca_spml_ucx.active_array.ctxs_count = mca_spml_ucx.idle_array.ctxs_count = 0; + mca_spml_ucx.active_array.ctxs_num = mca_spml_ucx.idle_array.ctxs_num = MCA_SPML_UCX_CTXS_ARRAY_SIZE; + mca_spml_ucx.active_array.ctxs = calloc(mca_spml_ucx.active_array.ctxs_num, + sizeof(mca_spml_ucx_ctx_t *)); + mca_spml_ucx.idle_array.ctxs = calloc(mca_spml_ucx.idle_array.ctxs_num, + sizeof(mca_spml_ucx_ctx_t *)); + SHMEM_MUTEX_INIT(mca_spml_ucx.internal_mutex); wkr_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; @@ -225,7 +232,7 @@ mca_spml_ucx_component_init(int* priority, return &mca_spml_ucx.super; } -static void _ctx_cleanup(mca_spml_ucx_ctx_list_item_t *ctx_item) +static void _ctx_cleanup(mca_spml_ucx_ctx_t *ctx) { int i, j, nprocs = oshmem_num_procs(); opal_common_ucx_del_proc_t *del_procs; @@ -234,43 +241,43 @@ static void _ctx_cleanup(mca_spml_ucx_ctx_list_item_t *ctx_item) for (i = 0; i < nprocs; ++i) { for (j = 0; j < MCA_MEMHEAP_SEG_COUNT; j++) { - if (ctx_item->ctx.ucp_peers[i].mkeys[j].key.rkey != NULL) { - ucp_rkey_destroy(ctx_item->ctx.ucp_peers[i].mkeys[j].key.rkey); + if (ctx->ucp_peers[i].mkeys[j].key.rkey != NULL) { + ucp_rkey_destroy(ctx->ucp_peers[i].mkeys[j].key.rkey); } } - del_procs[i].ep = ctx_item->ctx.ucp_peers[i].ucp_conn; + del_procs[i].ep = ctx->ucp_peers[i].ucp_conn; del_procs[i].vpid = i; - ctx_item->ctx.ucp_peers[i].ucp_conn = NULL; + ctx->ucp_peers[i].ucp_conn = NULL; } opal_common_ucx_del_procs_nofence(del_procs, nprocs, oshmem_my_proc_id(), mca_spml_ucx.num_disconnect, - ctx_item->ctx.ucp_worker); + ctx->ucp_worker); free(del_procs); - free(ctx_item->ctx.ucp_peers); + free(ctx->ucp_peers); } static int mca_spml_ucx_component_fini(void) { - mca_spml_ucx_ctx_list_item_t *ctx_item, *next; - int fenced = 0; + int fenced = 0, i; int ret = OSHMEM_SUCCESS; - opal_progress_unregister(spml_ucx_progress); + opal_progress_unregister(spml_ucx_default_progress); + if (mca_spml_ucx.active_array.ctxs_count) { + opal_progress_unregister(spml_ucx_ctx_progress); + } if(!mca_spml_ucx.enabled) return OSHMEM_SUCCESS; /* never selected.. return success.. */ /* delete context objects from list */ - OPAL_LIST_FOREACH_SAFE(ctx_item, next, &(mca_spml_ucx.idle_ctx_list), - mca_spml_ucx_ctx_list_item_t) { - _ctx_cleanup(ctx_item); + for (i = 0; i < mca_spml_ucx.active_array.ctxs_count; i++) { + _ctx_cleanup(mca_spml_ucx.active_array.ctxs[i]); } - OPAL_LIST_FOREACH_SAFE(ctx_item, next, &(mca_spml_ucx.ctx_list), - mca_spml_ucx_ctx_list_item_t) { - _ctx_cleanup(ctx_item); + for (i = 0; i < mca_spml_ucx.idle_array.ctxs_count; i++) { + _ctx_cleanup(mca_spml_ucx.idle_array.ctxs[i]); } ret = opal_common_ucx_mca_pmix_fence_nb(&fenced); @@ -279,29 +286,26 @@ static int mca_spml_ucx_component_fini(void) } while (!fenced) { - OPAL_LIST_FOREACH_SAFE(ctx_item, next, &(mca_spml_ucx.ctx_list), - mca_spml_ucx_ctx_list_item_t) { - ucp_worker_progress(ctx_item->ctx.ucp_worker); + for (i = 0; i < mca_spml_ucx.active_array.ctxs_count; i++) { + ucp_worker_progress(mca_spml_ucx.active_array.ctxs[i]->ucp_worker); } - OPAL_LIST_FOREACH_SAFE(ctx_item, next, &(mca_spml_ucx.idle_ctx_list), - mca_spml_ucx_ctx_list_item_t) { - ucp_worker_progress(ctx_item->ctx.ucp_worker); + + for (i = 0; i < mca_spml_ucx.idle_array.ctxs_count; i++) { + ucp_worker_progress(mca_spml_ucx.idle_array.ctxs[i]->ucp_worker); } + ucp_worker_progress(mca_spml_ucx_ctx_default.ucp_worker); } /* delete all workers */ - OPAL_LIST_FOREACH_SAFE(ctx_item, next, &(mca_spml_ucx.idle_ctx_list), - mca_spml_ucx_ctx_list_item_t) { - opal_list_remove_item(&(mca_spml_ucx.idle_ctx_list), &ctx_item->super); - ucp_worker_destroy(ctx_item->ctx.ucp_worker); - OBJ_RELEASE(ctx_item); + for (i = 0; i < mca_spml_ucx.active_array.ctxs_count; i++) { + ucp_worker_destroy(mca_spml_ucx.active_array.ctxs[i]->ucp_worker); + free(mca_spml_ucx.active_array.ctxs[i]); } - OPAL_LIST_FOREACH_SAFE(ctx_item, next, &(mca_spml_ucx.ctx_list), - mca_spml_ucx_ctx_list_item_t) { - opal_list_remove_item(&(mca_spml_ucx.ctx_list), &ctx_item->super); - ucp_worker_destroy(ctx_item->ctx.ucp_worker); - OBJ_RELEASE(ctx_item); + + for (i = 0; i < mca_spml_ucx.idle_array.ctxs_count; i++) { + ucp_worker_destroy(mca_spml_ucx.idle_array.ctxs[i]->ucp_worker); + free(mca_spml_ucx.idle_array.ctxs[i]); } if (mca_spml_ucx_ctx_default.ucp_worker) { @@ -310,8 +314,9 @@ static int mca_spml_ucx_component_fini(void) mca_spml_ucx.enabled = false; /* not anymore */ - OBJ_DESTRUCT(&(mca_spml_ucx.ctx_list)); - OBJ_DESTRUCT(&(mca_spml_ucx.idle_ctx_list)); + free(mca_spml_ucx.active_array.ctxs); + free(mca_spml_ucx.idle_array.ctxs); + SHMEM_MUTEX_DESTROY(mca_spml_ucx.internal_mutex); if (mca_spml_ucx.ucp_context) {