From a081fba0465e0e03472fc45c9a4a7154f539e3f6 Mon Sep 17 00:00:00 2001 From: Sergey Oblomov Date: Wed, 18 Jul 2018 16:59:13 +0300 Subject: [PATCH 1/4] OSC/UCX: fixed hang on OSC init - there worked progress was missed on startup which caused hang on one of ranks Signed-off-by: Sergey Oblomov --- ompi/mca/osc/ucx/osc_ucx.h | 1 + ompi/mca/osc/ucx/osc_ucx_component.c | 20 ++++++++++---------- ompi/mca/osc/ucx/osc_ucx_request.c | 1 + 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/ompi/mca/osc/ucx/osc_ucx.h b/ompi/mca/osc/ucx/osc_ucx.h index 095de34c27..f4ffbf17bd 100644 --- a/ompi/mca/osc/ucx/osc_ucx.h +++ b/ompi/mca/osc/ucx/osc_ucx.h @@ -38,6 +38,7 @@ typedef struct ompi_osc_ucx_component { opal_free_list_t requests; /* request free list for the r* communication variants */ bool env_initialized; /* UCX environment is initialized or not */ int num_incomplete_req_ops; + int init_in_progress; unsigned int priority; } ompi_osc_ucx_component_t; diff --git a/ompi/mca/osc/ucx/osc_ucx_component.c b/ompi/mca/osc/ucx/osc_ucx_component.c index dc6c5f2e44..9090b9e677 100644 --- a/ompi/mca/osc/ucx/osc_ucx_component.c +++ b/ompi/mca/osc/ucx/osc_ucx_component.c @@ -45,7 +45,12 @@ ompi_osc_ucx_component_t mca_osc_ucx_component = { .osc_query = component_query, .osc_select = component_select, .osc_finalize = component_finalize, - } + }, + .ucp_context = NULL, + .ucp_worker = NULL, + .env_initialized = false, + .num_incomplete_req_ops = 0, + .init_in_progress = 1 }; ompi_osc_ucx_module_t ompi_osc_ucx_module_template = { @@ -105,24 +110,19 @@ static int component_register(void) { } static int progress_callback(void) { - if (mca_osc_ucx_component.ucp_worker != NULL && - mca_osc_ucx_component.num_incomplete_req_ops > 0) { + if ((mca_osc_ucx_component.ucp_worker != NULL) && + (mca_osc_ucx_component.num_incomplete_req_ops + + mca_osc_ucx_component.init_in_progress > 0)) { ucp_worker_progress(mca_osc_ucx_component.ucp_worker); } return 0; } static int component_init(bool enable_progress_threads, bool enable_mpi_threads) { - int ret = OMPI_SUCCESS; - - mca_osc_ucx_component.ucp_context = NULL; - mca_osc_ucx_component.ucp_worker = NULL; mca_osc_ucx_component.enable_mpi_threads = enable_mpi_threads; - mca_osc_ucx_component.env_initialized = false; - mca_osc_ucx_component.num_incomplete_req_ops = 0; opal_common_ucx_mca_register(); - return ret; + return OMPI_SUCCESS; } static int component_finalize(void) { diff --git a/ompi/mca/osc/ucx/osc_ucx_request.c b/ompi/mca/osc/ucx/osc_ucx_request.c index efbd9c38cc..146111f948 100644 --- a/ompi/mca/osc/ucx/osc_ucx_request.c +++ b/ompi/mca/osc/ucx/osc_ucx_request.c @@ -57,6 +57,7 @@ void req_completion(void *request, ucs_status_t status) { ompi_request_complete(&(req->external_req->super), true); ucp_request_release(req); mca_osc_ucx_component.num_incomplete_req_ops--; + mca_osc_ucx_component.init_in_progress = 0; assert(mca_osc_ucx_component.num_incomplete_req_ops >= 0); } } From 55b934bacf42f78de004feb60f8d4f97557cacce Mon Sep 17 00:00:00 2001 From: Sergey Oblomov Date: Wed, 18 Jul 2018 17:52:30 +0300 Subject: [PATCH 2/4] OSC/UCX: enable progress when at least one window is allocated Signed-off-by: Sergey Oblomov --- ompi/mca/osc/ucx/osc_ucx.h | 2 +- ompi/mca/osc/ucx/osc_ucx_component.c | 9 ++++++--- ompi/mca/osc/ucx/osc_ucx_request.c | 1 - 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/ompi/mca/osc/ucx/osc_ucx.h b/ompi/mca/osc/ucx/osc_ucx.h index f4ffbf17bd..44dff95a84 100644 --- a/ompi/mca/osc/ucx/osc_ucx.h +++ b/ompi/mca/osc/ucx/osc_ucx.h @@ -38,7 +38,7 @@ typedef struct ompi_osc_ucx_component { opal_free_list_t requests; /* request free list for the r* communication variants */ bool env_initialized; /* UCX environment is initialized or not */ int num_incomplete_req_ops; - int init_in_progress; + int num_modules; unsigned int priority; } ompi_osc_ucx_component_t; diff --git a/ompi/mca/osc/ucx/osc_ucx_component.c b/ompi/mca/osc/ucx/osc_ucx_component.c index 9090b9e677..237858a26d 100644 --- a/ompi/mca/osc/ucx/osc_ucx_component.c +++ b/ompi/mca/osc/ucx/osc_ucx_component.c @@ -50,7 +50,7 @@ ompi_osc_ucx_component_t mca_osc_ucx_component = { .ucp_worker = NULL, .env_initialized = false, .num_incomplete_req_ops = 0, - .init_in_progress = 1 + .num_modules = 0 }; ompi_osc_ucx_module_t ompi_osc_ucx_module_template = { @@ -111,8 +111,7 @@ static int component_register(void) { static int progress_callback(void) { if ((mca_osc_ucx_component.ucp_worker != NULL) && - (mca_osc_ucx_component.num_incomplete_req_ops + - mca_osc_ucx_component.init_in_progress > 0)) { + mca_osc_ucx_component.num_modules) { ucp_worker_progress(mca_osc_ucx_component.ucp_worker); } return 0; @@ -362,6 +361,8 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in goto error_nomem; } + mca_osc_ucx_component.num_modules++; + /* fill in the function pointer part */ memcpy(module, &ompi_osc_ucx_module_template, sizeof(ompi_osc_base_module_t)); @@ -645,6 +646,7 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in } if (progress_registered) opal_progress_unregister(progress_callback); if (module) free(module); + mca_osc_ucx_component.num_modules--; error_nomem: if (env_initialized == true) { @@ -812,6 +814,7 @@ int ompi_osc_ucx_free(struct ompi_win_t *win) { ompi_comm_free(&module->comm); free(module); + mca_osc_ucx_component.num_modules--; return ret; } diff --git a/ompi/mca/osc/ucx/osc_ucx_request.c b/ompi/mca/osc/ucx/osc_ucx_request.c index 146111f948..efbd9c38cc 100644 --- a/ompi/mca/osc/ucx/osc_ucx_request.c +++ b/ompi/mca/osc/ucx/osc_ucx_request.c @@ -57,7 +57,6 @@ void req_completion(void *request, ucs_status_t status) { ompi_request_complete(&(req->external_req->super), true); ucp_request_release(req); mca_osc_ucx_component.num_incomplete_req_ops--; - mca_osc_ucx_component.init_in_progress = 0; assert(mca_osc_ucx_component.num_incomplete_req_ops >= 0); } } From 6f0a7a2005023e1cfd7a7a17fa862ef60be5cdcc Mon Sep 17 00:00:00 2001 From: Sergey Oblomov Date: Thu, 19 Jul 2018 12:07:26 +0300 Subject: [PATCH 3/4] OSC/UCX: opal progress register/unregister optimization Signed-off-by: Sergey Oblomov --- ompi/mca/osc/ucx/osc_ucx_component.c | 44 ++++++++++++++++++---------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/ompi/mca/osc/ucx/osc_ucx_component.c b/ompi/mca/osc/ucx/osc_ucx_component.c index 237858a26d..a0bd7be80a 100644 --- a/ompi/mca/osc/ucx/osc_ucx_component.c +++ b/ompi/mca/osc/ucx/osc_ucx_component.c @@ -110,10 +110,7 @@ static int component_register(void) { } static int progress_callback(void) { - if ((mca_osc_ucx_component.ucp_worker != NULL) && - mca_osc_ucx_component.num_modules) { - ucp_worker_progress(mca_osc_ucx_component.ucp_worker); - } + ucp_worker_progress(mca_osc_ucx_component.ucp_worker); return 0; } @@ -140,7 +137,6 @@ static int component_finalize(void) { assert(mca_osc_ucx_component.num_incomplete_req_ops == 0); if (mca_osc_ucx_component.env_initialized == true) { OBJ_DESTRUCT(&mca_osc_ucx_component.requests); - opal_progress_unregister(progress_callback); ucp_cleanup(mca_osc_ucx_component.ucp_context); mca_osc_ucx_component.env_initialized = false; } @@ -250,7 +246,7 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in ucs_status_t status; int i, comm_size = ompi_comm_size(comm); int is_eps_ready; - bool progress_registered = false, eps_created = false, env_initialized = false; + bool eps_created = false, env_initialized = false; ucp_address_t *my_addr = NULL; size_t my_addr_len; char *recv_buf = NULL; @@ -327,13 +323,6 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in goto error_nomem; } - ret = opal_progress_register(progress_callback); - progress_registered = true; - if (OMPI_SUCCESS != ret) { - OSC_UCX_VERBOSE(1, "opal_progress_register failed: %d", ret); - goto error; - } - /* query UCP worker attributes */ worker_attr.field_mask = UCP_WORKER_ATTR_FIELD_THREAD_MODE; status = ucp_worker_query(mca_osc_ucx_component.ucp_worker, &worker_attr); @@ -617,6 +606,14 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in goto error; } + OSC_UCX_ASSERT(mca_osc_ucx_component.num_modules > 0); + if (1 == mca_osc_ucx_component.num_modules) { + ret = opal_progress_register(progress_callback); + if (OMPI_SUCCESS != ret) { + OSC_UCX_VERBOSE(1, "opal_progress_register failed: %d", ret); + goto error; + } + } return ret; error: @@ -644,9 +641,17 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in ucp_ep_destroy(ep); } } - if (progress_registered) opal_progress_unregister(progress_callback); - if (module) free(module); - mca_osc_ucx_component.num_modules--; + if (module) { + free(module); + mca_osc_ucx_component.num_modules--; + OSC_UCX_ASSERT(mca_osc_ucx_component.num_modules >= 0); + if (0 == mca_osc_ucx_component.num_modules) { + ret = opal_progress_unregister(progress_callback); + if (OMPI_SUCCESS != ret) { + OSC_UCX_VERBOSE(1, "opal_progress_unregister failed: %d", ret); + } + } + } error_nomem: if (env_initialized == true) { @@ -815,6 +820,13 @@ int ompi_osc_ucx_free(struct ompi_win_t *win) { free(module); mca_osc_ucx_component.num_modules--; + OSC_UCX_ASSERT(mca_osc_ucx_component.num_modules >= 0); + if (0 == mca_osc_ucx_component.num_modules) { + ret = opal_progress_unregister(progress_callback); + if (OMPI_SUCCESS != ret) { + OSC_UCX_VERBOSE(1, "opal_progress_unregister failed: %d", ret); + } + } return ret; } From fa33e322e70412f01cdf0f0f96cb62578b0786d1 Mon Sep 17 00:00:00 2001 From: Sergey Oblomov Date: Thu, 19 Jul 2018 12:39:15 +0300 Subject: [PATCH 4/4] OSC/UCX: code deduplication Signed-off-by: Sergey Oblomov --- ompi/mca/osc/ucx/osc_ucx_component.c | 33 ++++++++++++++-------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/ompi/mca/osc/ucx/osc_ucx_component.c b/ompi/mca/osc/ucx/osc_ucx_component.c index a0bd7be80a..ad604fb873 100644 --- a/ompi/mca/osc/ucx/osc_ucx_component.c +++ b/ompi/mca/osc/ucx/osc_ucx_component.c @@ -26,6 +26,7 @@ static int component_query(struct ompi_win_t *win, void **base, size_t size, int static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit, struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor, int *model); +static void ompi_osc_ucx_unregister_progress(void); ompi_osc_ucx_component_t mca_osc_ucx_component = { { /* ompi_osc_base_component_t */ @@ -236,6 +237,20 @@ static inline int mem_map(void **base, size_t size, ucp_mem_h *memh_ptr, return ret; } +static void ompi_osc_ucx_unregister_progress() +{ + int ret; + + mca_osc_ucx_component.num_modules--; + OSC_UCX_ASSERT(mca_osc_ucx_component.num_modules >= 0); + if (0 == mca_osc_ucx_component.num_modules) { + ret = opal_progress_unregister(progress_callback); + if (OMPI_SUCCESS != ret) { + OSC_UCX_VERBOSE(1, "opal_progress_unregister failed: %d", ret); + } + } +} + static int component_select(struct ompi_win_t *win, void **base, size_t size, int disp_unit, struct ompi_communicator_t *comm, struct opal_info_t *info, int flavor, int *model) { @@ -643,14 +658,7 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in } if (module) { free(module); - mca_osc_ucx_component.num_modules--; - OSC_UCX_ASSERT(mca_osc_ucx_component.num_modules >= 0); - if (0 == mca_osc_ucx_component.num_modules) { - ret = opal_progress_unregister(progress_callback); - if (OMPI_SUCCESS != ret) { - OSC_UCX_VERBOSE(1, "opal_progress_unregister failed: %d", ret); - } - } + ompi_osc_ucx_unregister_progress(); } error_nomem: @@ -819,14 +827,7 @@ int ompi_osc_ucx_free(struct ompi_win_t *win) { ompi_comm_free(&module->comm); free(module); - mca_osc_ucx_component.num_modules--; - OSC_UCX_ASSERT(mca_osc_ucx_component.num_modules >= 0); - if (0 == mca_osc_ucx_component.num_modules) { - ret = opal_progress_unregister(progress_callback); - if (OMPI_SUCCESS != ret) { - OSC_UCX_VERBOSE(1, "opal_progress_unregister failed: %d", ret); - } - } + ompi_osc_ucx_unregister_progress(); return ret; }