1
1

opal/pmix: add blocking Fence to SLURM components.

Blocking fence is used in yalla del proc. Native pmix exposes this functionality.
We need to expose it for SLURM's s1/s2 components as well.

Also this commit fixes uninitialized `rc` in fencenb's of both
components.
Этот коммит содержится в:
Artem Polyakov 2016-07-11 09:35:27 +03:00
родитель a55d57406b
Коммит 72585a905f
2 изменённых файлов: 64 добавлений и 2 удалений

Просмотреть файл

@ -38,6 +38,7 @@ static int s1_abort(int flag, const char msg[],
static int s1_commit(void); static int s1_commit(void);
static int s1_fencenb(opal_list_t *procs, int collect_data, static int s1_fencenb(opal_list_t *procs, int collect_data,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata); opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
static int s1_fence(opal_list_t *procs, int collect_data);
static int s1_put(opal_pmix_scope_t scope, static int s1_put(opal_pmix_scope_t scope,
opal_value_t *kv); opal_value_t *kv);
static int s1_get(const opal_process_name_t *id, static int s1_get(const opal_process_name_t *id,
@ -61,6 +62,7 @@ const opal_pmix_base_module_t opal_pmix_s1_module = {
.abort = s1_abort, .abort = s1_abort,
.commit = s1_commit, .commit = s1_commit,
.fence_nb = s1_fencenb, .fence_nb = s1_fencenb,
.fence = s1_fence,
.put = s1_put, .put = s1_put,
.get = s1_get, .get = s1_get,
.publish = s1_publish, .publish = s1_publish,
@ -527,7 +529,7 @@ static int s1_commit(void)
static void fencenb(int sd, short args, void *cbdata) static void fencenb(int sd, short args, void *cbdata)
{ {
pmi_opcaddy_t *op = (pmi_opcaddy_t*)cbdata; pmi_opcaddy_t *op = (pmi_opcaddy_t*)cbdata;
int rc; int rc = OPAL_SUCCESS;
int32_t i; int32_t i;
opal_value_t *kp, kvn; opal_value_t *kp, kvn;
opal_hwloc_locality_t locality; opal_hwloc_locality_t locality;
@ -617,6 +619,35 @@ static int s1_fencenb(opal_list_t *procs, int collect_data,
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
#define S1_WAIT_FOR_COMPLETION(a) \
do { \
while ((a)) { \
usleep(10); \
} \
} while (0)
struct fence_result {
volatile int flag;
int status;
};
static void fence_release(int status, void *cbdata)
{
struct fence_result *res = (struct fence_result*)cbdata;
res->status = status;
opal_atomic_wmb();
res->flag = 0;
}
static int s1_fence(opal_list_t *procs, int collect_data)
{
struct fence_result result = { 1, OPAL_SUCCESS };
s1_fencenb(procs, collect_data, fence_release, (void*)&result);
S1_WAIT_FOR_COMPLETION(result.flag);
return result.status;
}
static int s1_get(const opal_process_name_t *id, static int s1_get(const opal_process_name_t *id,
const char *key, opal_list_t *info, const char *key, opal_list_t *info,
opal_value_t **kv) opal_value_t **kv)

Просмотреть файл

@ -45,6 +45,7 @@ static int s2_abort(int flag, const char msg[],
static int s2_commit(void); static int s2_commit(void);
static int s2_fencenb(opal_list_t *procs, int collect_data, static int s2_fencenb(opal_list_t *procs, int collect_data,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata); opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
static int s2_fence(opal_list_t *procs, int collect_data);
static int s2_put(opal_pmix_scope_t scope, static int s2_put(opal_pmix_scope_t scope,
opal_value_t *kv); opal_value_t *kv);
static int s2_get(const opal_process_name_t *id, static int s2_get(const opal_process_name_t *id,
@ -68,6 +69,7 @@ const opal_pmix_base_module_t opal_pmix_s2_module = {
.abort = s2_abort, .abort = s2_abort,
.commit = s2_commit, .commit = s2_commit,
.fence_nb = s2_fencenb, .fence_nb = s2_fencenb,
.fence = s2_fence,
.put = s2_put, .put = s2_put,
.get = s2_get, .get = s2_get,
.publish = s2_publish, .publish = s2_publish,
@ -545,7 +547,7 @@ static int s2_commit(void)
static void fencenb(int sd, short args, void *cbdata) static void fencenb(int sd, short args, void *cbdata)
{ {
pmi_opcaddy_t *op = (pmi_opcaddy_t*)cbdata; pmi_opcaddy_t *op = (pmi_opcaddy_t*)cbdata;
int rc; int rc = OPAL_SUCCESS;
int32_t i; int32_t i;
opal_value_t *kp, kvn; opal_value_t *kp, kvn;
opal_hwloc_locality_t locality; opal_hwloc_locality_t locality;
@ -635,6 +637,35 @@ static int s2_fencenb(opal_list_t *procs, int collect_data,
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
#define S2_WAIT_FOR_COMPLETION(a) \
do { \
while ((a)) { \
usleep(10); \
} \
} while (0)
struct fence_result {
volatile int flag;
int status;
};
static void fence_release(int status, void *cbdata)
{
struct fence_result *res = (struct fence_result*)cbdata;
res->status = status;
opal_atomic_wmb();
res->flag = 0;
}
static int s2_fence(opal_list_t *procs, int collect_data)
{
struct fence_result result = { 1, OPAL_SUCCESS };
s2_fencenb(procs, collect_data, fence_release, (void*)&result);
S2_WAIT_FOR_COMPLETION(result.flag);
return result.status;
}
static int s2_get(const opal_process_name_t *id, static int s2_get(const opal_process_name_t *id,
const char *key, opal_list_t *info, const char *key, opal_list_t *info,
opal_value_t **kv) opal_value_t **kv)