pmix/cray: define fence method for cray pmix
Turns out UCX PML calls opal_pmix.fence in its del procs method without checking whether or not the fence method for the pmix component was defined. Rather than patch UCX PML, actually define a fence method for the cray pmix. Signed-off-by: Howard Pritchard <howardp@lanl.gov>
Этот коммит содержится в:
родитель
a76a61b2c9
Коммит
e8bfd494e7
@ -53,6 +53,7 @@ static int cray_resolve_peers(const char *nodename,
|
||||
opal_list_t *procs);
|
||||
static int cray_resolve_nodes(opal_jobid_t jobid, char **nodelist);
|
||||
static int cray_put(opal_pmix_scope_t scope, opal_value_t *kv);
|
||||
static int cray_fence(opal_list_t *procs, int collect_data);
|
||||
static int cray_fencenb(opal_list_t *procs, int collect_data,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
static int cray_commit(void);
|
||||
@ -87,7 +88,7 @@ const opal_pmix_base_module_t opal_pmix_cray_module = {
|
||||
.initialized = cray_initialized,
|
||||
.abort = cray_abort,
|
||||
.commit = cray_commit,
|
||||
.fence = NULL,
|
||||
.fence = cray_fence,
|
||||
.fence_nb = cray_fencenb,
|
||||
.put = cray_put,
|
||||
.get = cray_get,
|
||||
@ -127,6 +128,11 @@ static OBJ_CLASS_INSTANCE(pmi_opcaddy_t,
|
||||
opal_object_t,
|
||||
NULL, NULL);
|
||||
|
||||
struct fence_result {
|
||||
volatile int flag;
|
||||
int status;
|
||||
};
|
||||
|
||||
// PMI constant values:
|
||||
static int pmix_kvslen_max = 0;
|
||||
static int pmix_keylen_max = 0;
|
||||
@ -154,6 +160,13 @@ static char* pmix_error(int pmix_err);
|
||||
pmix_error(pmi_err)); \
|
||||
} while(0);
|
||||
|
||||
#define CRAY_WAIT_FOR_COMPLETION(a) \
|
||||
do { \
|
||||
while ((a)) { \
|
||||
usleep(10); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void cray_get_more_info(void)
|
||||
{
|
||||
int alps_status = 0, i;
|
||||
@ -901,6 +914,23 @@ fn_exit:
|
||||
return;
|
||||
}
|
||||
|
||||
static void fence_release(int status, void *cbdata)
|
||||
{
|
||||
struct fence_result *res = (struct fence_result*)cbdata;
|
||||
res->status = status;
|
||||
opal_atomic_wmb();
|
||||
res->flag = 0;
|
||||
}
|
||||
|
||||
static int cray_fence(opal_list_t *procs, int collect_data)
|
||||
{
|
||||
struct fence_result result = { 1, OPAL_SUCCESS };
|
||||
cray_fencenb(procs, collect_data, fence_release, (void*)&result);
|
||||
CRAY_WAIT_FOR_COMPLETION(result.flag);
|
||||
return result.status;
|
||||
}
|
||||
|
||||
|
||||
static int cray_fencenb(opal_list_t *procs, int collect_data,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user