From 72585a905fecd22cd18456ab786fa3dbe14d55c1 Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Mon, 11 Jul 2016 09:35:27 +0300 Subject: [PATCH] opal/pmix: add blocking Fence to SLURM components. Blocking fence is used in yalla del proc. Native pmix exposes this functionality. We need to expose it for SLURM's s1/s2 components as well. Also this commit fixes uninitialized `rc` in fencenb's of both components. --- opal/mca/pmix/s1/pmix_s1.c | 33 ++++++++++++++++++++++++++++++++- opal/mca/pmix/s2/pmix_s2.c | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/opal/mca/pmix/s1/pmix_s1.c b/opal/mca/pmix/s1/pmix_s1.c index a7e1e660e0..b2791b4490 100644 --- a/opal/mca/pmix/s1/pmix_s1.c +++ b/opal/mca/pmix/s1/pmix_s1.c @@ -38,6 +38,7 @@ static int s1_abort(int flag, const char msg[], static int s1_commit(void); static int s1_fencenb(opal_list_t *procs, int collect_data, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +static int s1_fence(opal_list_t *procs, int collect_data); static int s1_put(opal_pmix_scope_t scope, opal_value_t *kv); static int s1_get(const opal_process_name_t *id, @@ -61,6 +62,7 @@ const opal_pmix_base_module_t opal_pmix_s1_module = { .abort = s1_abort, .commit = s1_commit, .fence_nb = s1_fencenb, + .fence = s1_fence, .put = s1_put, .get = s1_get, .publish = s1_publish, @@ -527,7 +529,7 @@ static int s1_commit(void) static void fencenb(int sd, short args, void *cbdata) { pmi_opcaddy_t *op = (pmi_opcaddy_t*)cbdata; - int rc; + int rc = OPAL_SUCCESS; int32_t i; opal_value_t *kp, kvn; opal_hwloc_locality_t locality; @@ -617,6 +619,35 @@ static int s1_fencenb(opal_list_t *procs, int collect_data, return OPAL_SUCCESS; } +#define S1_WAIT_FOR_COMPLETION(a) \ + do { \ + while ((a)) { \ + usleep(10); \ + } \ + } while (0) + +struct fence_result { + volatile int flag; + int status; +}; + +static void fence_release(int status, void *cbdata) +{ + struct fence_result *res = (struct fence_result*)cbdata; + res->status = status; + opal_atomic_wmb(); + res->flag = 0; +} + +static int s1_fence(opal_list_t *procs, int collect_data) +{ + struct fence_result result = { 1, OPAL_SUCCESS }; + s1_fencenb(procs, collect_data, fence_release, (void*)&result); + S1_WAIT_FOR_COMPLETION(result.flag); + return result.status; +} + + static int s1_get(const opal_process_name_t *id, const char *key, opal_list_t *info, opal_value_t **kv) diff --git a/opal/mca/pmix/s2/pmix_s2.c b/opal/mca/pmix/s2/pmix_s2.c index a51147d17c..d0cdb78dfa 100644 --- a/opal/mca/pmix/s2/pmix_s2.c +++ b/opal/mca/pmix/s2/pmix_s2.c @@ -45,6 +45,7 @@ static int s2_abort(int flag, const char msg[], static int s2_commit(void); static int s2_fencenb(opal_list_t *procs, int collect_data, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); +static int s2_fence(opal_list_t *procs, int collect_data); static int s2_put(opal_pmix_scope_t scope, opal_value_t *kv); static int s2_get(const opal_process_name_t *id, @@ -68,6 +69,7 @@ const opal_pmix_base_module_t opal_pmix_s2_module = { .abort = s2_abort, .commit = s2_commit, .fence_nb = s2_fencenb, + .fence = s2_fence, .put = s2_put, .get = s2_get, .publish = s2_publish, @@ -545,7 +547,7 @@ static int s2_commit(void) static void fencenb(int sd, short args, void *cbdata) { pmi_opcaddy_t *op = (pmi_opcaddy_t*)cbdata; - int rc; + int rc = OPAL_SUCCESS; int32_t i; opal_value_t *kp, kvn; opal_hwloc_locality_t locality; @@ -635,6 +637,35 @@ static int s2_fencenb(opal_list_t *procs, int collect_data, return OPAL_SUCCESS; } +#define S2_WAIT_FOR_COMPLETION(a) \ + do { \ + while ((a)) { \ + usleep(10); \ + } \ + } while (0) + +struct fence_result { + volatile int flag; + int status; +}; + +static void fence_release(int status, void *cbdata) +{ + struct fence_result *res = (struct fence_result*)cbdata; + res->status = status; + opal_atomic_wmb(); + res->flag = 0; +} + +static int s2_fence(opal_list_t *procs, int collect_data) +{ + struct fence_result result = { 1, OPAL_SUCCESS }; + s2_fencenb(procs, collect_data, fence_release, (void*)&result); + S2_WAIT_FOR_COMPLETION(result.flag); + return result.status; +} + + static int s2_get(const opal_process_name_t *id, const char *key, opal_list_t *info, opal_value_t **kv)