543c16b28d
Signed-off-by: Ralph Castain <rhc@open-mpi.org>
507 строки
16 KiB
C
507 строки
16 KiB
C
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
/*
|
|
* Copyright (c) 2016-2017 Intel, Inc. All rights reserved.
|
|
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All
|
|
* rights reserved.
|
|
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#include "opal_config.h"
|
|
#include "opal/constants.h"
|
|
#include "opal/types.h"
|
|
|
|
#ifdef HAVE_STRING_H
|
|
#include <string.h>
|
|
#endif
|
|
#ifdef HAVE_UNISTD_H
|
|
#include <unistd.h>
|
|
#endif
|
|
|
|
#include "opal/dss/dss.h"
|
|
#include "opal/mca/event/event.h"
|
|
#include "opal/mca/hwloc/base/base.h"
|
|
#include "opal/runtime/opal.h"
|
|
#include "opal/runtime/opal_progress_threads.h"
|
|
#include "opal/util/argv.h"
|
|
#include "opal/util/error.h"
|
|
#include "opal/util/output.h"
|
|
#include "opal/util/proc.h"
|
|
#include "opal/util/show_help.h"
|
|
|
|
#include "pmix_isolated.h"
|
|
#include "opal/mca/pmix/base/base.h"
|
|
#include "opal/mca/pmix/base/pmix_base_hash.h"
|
|
|
|
|
|
static int isolated_init(opal_list_t *ilist);
|
|
static int isolated_fini(void);
|
|
static int isolated_initialized(void);
|
|
static int isolated_abort(int flat, const char *msg,
|
|
opal_list_t *procs);
|
|
static int isolated_spawn(opal_list_t *jobinfo, opal_list_t *apps, opal_jobid_t *jobid);
|
|
static int isolated_spawn_nb(opal_list_t *jobinfo, opal_list_t *apps,
|
|
opal_pmix_spawn_cbfunc_t cbfunc,
|
|
void *cbdata);
|
|
static int isolated_job_connect(opal_list_t *procs);
|
|
static int isolated_job_disconnect(opal_list_t *procs);
|
|
static int isolated_job_disconnect_nb(opal_list_t *procs,
|
|
opal_pmix_op_cbfunc_t cbfunc,
|
|
void *cbdata);
|
|
static int isolated_resolve_peers(const char *nodename,
|
|
opal_jobid_t jobid,
|
|
opal_list_t *procs);
|
|
static int isolated_resolve_nodes(opal_jobid_t jobid, char **nodelist);
|
|
static int isolated_put(opal_pmix_scope_t scope, opal_value_t *kv);
|
|
static int isolated_fence(opal_list_t *procs, int collect_data);
|
|
static int isolated_fence_nb(opal_list_t *procs, int collect_data,
|
|
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
|
|
static int isolated_commit(void);
|
|
static int isolated_get(const opal_process_name_t *id,
|
|
const char *key, opal_list_t *info,
|
|
opal_value_t **kv);
|
|
static int isolated_get_nb(const opal_process_name_t *id, const char *key,
|
|
opal_list_t *info,
|
|
opal_pmix_value_cbfunc_t cbfunc, void *cbdata);
|
|
static int isolated_publish(opal_list_t *info);
|
|
static int isolated_publish_nb(opal_list_t *info,
|
|
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
|
|
static int isolated_lookup(opal_list_t *data, opal_list_t *info);
|
|
static int isolated_lookup_nb(char **keys, opal_list_t *info,
|
|
opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata);
|
|
static int isolated_unpublish(char **keys, opal_list_t *info);
|
|
static int isolated_unpublish_nb(char **keys, opal_list_t *info,
|
|
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
|
|
static const char *isolated_get_version(void);
|
|
static int isolated_store_local(const opal_process_name_t *proc,
|
|
opal_value_t *val);
|
|
static const char *isolated_get_nspace(opal_jobid_t jobid);
|
|
static void isolated_register_jobid(opal_jobid_t jobid, const char *nspace);
|
|
|
|
const opal_pmix_base_module_t opal_pmix_isolated_module = {
|
|
.init = isolated_init,
|
|
.finalize = isolated_fini,
|
|
.initialized = isolated_initialized,
|
|
.abort = isolated_abort,
|
|
.commit = isolated_commit,
|
|
.fence = isolated_fence,
|
|
.fence_nb = isolated_fence_nb,
|
|
.put = isolated_put,
|
|
.get = isolated_get,
|
|
.get_nb = isolated_get_nb,
|
|
.publish = isolated_publish,
|
|
.publish_nb = isolated_publish_nb,
|
|
.lookup = isolated_lookup,
|
|
.lookup_nb = isolated_lookup_nb,
|
|
.unpublish = isolated_unpublish,
|
|
.unpublish_nb = isolated_unpublish_nb,
|
|
.spawn = isolated_spawn,
|
|
.spawn_nb = isolated_spawn_nb,
|
|
.connect = isolated_job_connect,
|
|
.disconnect = isolated_job_disconnect,
|
|
.disconnect_nb = isolated_job_disconnect_nb,
|
|
.resolve_peers = isolated_resolve_peers,
|
|
.resolve_nodes = isolated_resolve_nodes,
|
|
.get_version = isolated_get_version,
|
|
.register_evhandler = opal_pmix_base_register_handler,
|
|
.deregister_evhandler = opal_pmix_base_deregister_handler,
|
|
.notify_event = opal_pmix_base_notify_event,
|
|
.store_local = isolated_store_local,
|
|
.get_nspace = isolated_get_nspace,
|
|
.register_jobid = isolated_register_jobid
|
|
};
|
|
|
|
static int isolated_init_count = 0;
|
|
static opal_process_name_t isolated_pname;
|
|
|
|
static int isolated_init(opal_list_t *ilist)
|
|
{
|
|
int rc;
|
|
opal_value_t kv;
|
|
opal_process_name_t wildcard;
|
|
|
|
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
|
|
++isolated_init_count;
|
|
if (1 < isolated_init_count) {
|
|
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
|
|
wildcard.jobid = 1;
|
|
wildcard.vpid = OPAL_VPID_WILDCARD;
|
|
|
|
/* store our name in the opal_proc_t so that
|
|
* debug messages will make sense - an upper
|
|
* layer will eventually overwrite it, but that
|
|
* won't do any harm */
|
|
isolated_pname.jobid = 1;
|
|
isolated_pname.vpid = 0;
|
|
opal_proc_set_name(&isolated_pname);
|
|
opal_output_verbose(10, opal_pmix_base_framework.framework_output,
|
|
"%s pmix:isolated: assigned tmp name %d %d",
|
|
OPAL_NAME_PRINT(isolated_pname),isolated_pname.jobid,isolated_pname.vpid);
|
|
|
|
// setup hash table
|
|
opal_pmix_base_hash_init();
|
|
|
|
/* save the job size */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(OPAL_PMIX_JOB_SIZE);
|
|
kv.type = OPAL_UINT32;
|
|
kv.data.uint32 = 1;
|
|
if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
|
|
OPAL_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
goto err_exit;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
|
|
/* save the appnum */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(OPAL_PMIX_APPNUM);
|
|
kv.type = OPAL_UINT32;
|
|
kv.data.uint32 = 0;
|
|
if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
|
|
OPAL_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
goto err_exit;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(OPAL_PMIX_UNIV_SIZE);
|
|
kv.type = OPAL_UINT32;
|
|
kv.data.uint32 = 1;
|
|
if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
|
|
OPAL_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
goto err_exit;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(OPAL_PMIX_MAX_PROCS);
|
|
kv.type = OPAL_UINT32;
|
|
kv.data.uint32 = 1;
|
|
if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&wildcard, &kv))) {
|
|
OPAL_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
goto err_exit;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(OPAL_PMIX_JOBID);
|
|
kv.type = OPAL_UINT32;
|
|
kv.data.uint32 = 1;
|
|
if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
|
|
OPAL_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
goto err_exit;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
|
|
/* save the local size */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(OPAL_PMIX_LOCAL_SIZE);
|
|
kv.type = OPAL_UINT32;
|
|
kv.data.uint32 = 1;
|
|
if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
|
|
OPAL_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
goto err_exit;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(OPAL_PMIX_LOCAL_PEERS);
|
|
kv.type = OPAL_STRING;
|
|
kv.data.string = strdup("0");
|
|
if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
|
|
OPAL_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
goto err_exit;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
|
|
/* save the local leader */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(OPAL_PMIX_LOCALLDR);
|
|
kv.type = OPAL_UINT64;
|
|
kv.data.uint64 = 0;
|
|
if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
|
|
OPAL_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
goto err_exit;
|
|
}
|
|
|
|
/* save our local rank */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(OPAL_PMIX_LOCAL_RANK);
|
|
kv.type = OPAL_UINT16;
|
|
kv.data.uint16 = 0;
|
|
if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
|
|
OPAL_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
goto err_exit;
|
|
}
|
|
|
|
/* and our node rank */
|
|
OBJ_CONSTRUCT(&kv, opal_value_t);
|
|
kv.key = strdup(OPAL_PMIX_NODE_RANK);
|
|
kv.type = OPAL_UINT16;
|
|
kv.data.uint16 = 0;
|
|
if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) {
|
|
OPAL_ERROR_LOG(rc);
|
|
OBJ_DESTRUCT(&kv);
|
|
goto err_exit;
|
|
}
|
|
OBJ_DESTRUCT(&kv);
|
|
|
|
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
|
return OPAL_SUCCESS;
|
|
|
|
err_exit:
|
|
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
|
return rc;
|
|
}
|
|
|
|
static int isolated_fini(void)
|
|
{
|
|
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
|
|
--opal_pmix_base.initialized;
|
|
|
|
if (0 == isolated_init_count) {
|
|
opal_pmix_base_hash_finalize();
|
|
}
|
|
|
|
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
static int isolated_initialized(void)
|
|
{
|
|
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
|
|
if (0 < isolated_init_count) {
|
|
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
|
return 1;
|
|
}
|
|
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
|
return 0;
|
|
}
|
|
|
|
static int isolated_abort(int flag, const char *msg,
|
|
opal_list_t *procs)
|
|
{
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
static int isolated_spawn(opal_list_t *jobinfo, opal_list_t *apps, opal_jobid_t *jobid)
|
|
{
|
|
return OPAL_ERR_NOT_SUPPORTED;
|
|
}
|
|
|
|
static int isolated_spawn_nb(opal_list_t *jobinfo, opal_list_t *apps,
|
|
opal_pmix_spawn_cbfunc_t cbfunc,
|
|
void *cbdata)
|
|
{
|
|
return OPAL_ERR_NOT_SUPPORTED;
|
|
}
|
|
|
|
static int isolated_job_connect(opal_list_t *procs)
|
|
{
|
|
return OPAL_ERR_NOT_SUPPORTED;
|
|
}
|
|
|
|
static int isolated_job_disconnect(opal_list_t *procs)
|
|
{
|
|
return OPAL_ERR_NOT_SUPPORTED;
|
|
}
|
|
|
|
static int isolated_job_disconnect_nb(opal_list_t *procs,
|
|
opal_pmix_op_cbfunc_t cbfunc,
|
|
void *cbdata)
|
|
{
|
|
return OPAL_ERR_NOT_SUPPORTED;
|
|
}
|
|
|
|
static int isolated_resolve_peers(const char *nodename,
|
|
opal_jobid_t jobid,
|
|
opal_list_t *procs)
|
|
{
|
|
return OPAL_ERR_NOT_IMPLEMENTED;
|
|
}
|
|
|
|
static int isolated_resolve_nodes(opal_jobid_t jobid, char **nodelist)
|
|
{
|
|
return OPAL_ERR_NOT_IMPLEMENTED;
|
|
}
|
|
|
|
static int isolated_put(opal_pmix_scope_t scope,
|
|
opal_value_t *kv)
|
|
{
|
|
int rc;
|
|
|
|
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
|
"%s pmix:isolated isolated_put key %s scope %d",
|
|
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kv->key, scope);
|
|
|
|
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
|
|
if (0 == isolated_init_count) {
|
|
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
|
return OPAL_ERROR;
|
|
}
|
|
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
|
|
|
rc = opal_pmix_base_store(&isolated_pname, kv);
|
|
|
|
return rc;
|
|
}
|
|
|
|
static int isolated_commit(void)
|
|
{
|
|
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
|
"%s pmix:isolated isolated commit",
|
|
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
static int isolated_fence(opal_list_t *procs, int collect_data)
|
|
{
|
|
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
|
"%s pmix:isolated isolated fence",
|
|
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
static int isolated_fence_nb(opal_list_t *procs, int collect_data,
|
|
opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
|
|
{
|
|
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
|
"%s pmix:isolated isolated fence_nb",
|
|
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
|
if (NULL != cbfunc) {
|
|
cbfunc(OPAL_SUCCESS, cbdata);
|
|
}
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
static int isolated_get(const opal_process_name_t *id,
|
|
const char *key, opal_list_t *info,
|
|
opal_value_t **kv)
|
|
{
|
|
int rc;
|
|
opal_list_t vals;
|
|
|
|
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
|
"%s pmix:isolated getting value for proc %s key %s",
|
|
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
|
|
OPAL_NAME_PRINT(*id), key);
|
|
|
|
OBJ_CONSTRUCT(&vals, opal_list_t);
|
|
rc = opal_pmix_base_fetch(id, key, &vals);
|
|
if (OPAL_SUCCESS == rc) {
|
|
*kv = (opal_value_t*)opal_list_remove_first(&vals);
|
|
return OPAL_SUCCESS;
|
|
} else {
|
|
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
|
"%s pmix:isolated fetch from dstore failed: %d",
|
|
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), rc);
|
|
}
|
|
OPAL_LIST_DESTRUCT(&vals);
|
|
|
|
return rc;
|
|
}
|
|
static int isolated_get_nb(const opal_process_name_t *id, const char *key,
|
|
opal_list_t *info, opal_pmix_value_cbfunc_t cbfunc, void *cbdata)
|
|
{
|
|
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
|
"%s pmix:isolated isolated get_nb",
|
|
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
|
return OPAL_ERR_NOT_IMPLEMENTED;
|
|
}
|
|
|
|
static int isolated_publish(opal_list_t *info)
|
|
{
|
|
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
|
"%s pmix:isolated isolated publish",
|
|
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
|
return OPAL_ERR_NOT_SUPPORTED;
|
|
}
|
|
|
|
static int isolated_publish_nb(opal_list_t *info,
|
|
opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
|
|
{
|
|
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
|
"%s pmix:isolated isolated publish_nb",
|
|
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
|
return OPAL_ERR_NOT_SUPPORTED;
|
|
}
|
|
|
|
static int isolated_lookup(opal_list_t *data, opal_list_t *info)
|
|
{
|
|
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
|
"%s pmix:isolated isolated lookup",
|
|
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
|
return OPAL_ERR_NOT_SUPPORTED;
|
|
}
|
|
|
|
static int isolated_lookup_nb(char **keys, opal_list_t *info,
|
|
opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata)
|
|
{
|
|
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
|
"%s pmix:isolated isolated lookup_nb",
|
|
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
|
return OPAL_ERR_NOT_SUPPORTED;
|
|
}
|
|
|
|
static int isolated_unpublish(char **keys, opal_list_t *info)
|
|
{
|
|
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
|
"%s pmix:isolated isolated unpublish",
|
|
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
|
return OPAL_ERR_NOT_SUPPORTED;
|
|
}
|
|
|
|
static int isolated_unpublish_nb(char **keys, opal_list_t *info,
|
|
opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
|
|
{
|
|
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
|
"%s pmix:isolated isolated unpublish_nb",
|
|
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
|
return OPAL_ERR_NOT_SUPPORTED;
|
|
}
|
|
|
|
static const char *isolated_get_version(void)
|
|
{
|
|
return "N/A";
|
|
}
|
|
|
|
static int isolated_store_local(const opal_process_name_t *proc,
|
|
opal_value_t *val)
|
|
{
|
|
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
|
"%s pmix:isolated isolated store_local",
|
|
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
|
|
|
opal_pmix_base_store(proc, val);
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
static const char *isolated_get_nspace(opal_jobid_t jobid)
|
|
{
|
|
return "N/A";
|
|
}
|
|
|
|
static void isolated_register_jobid(opal_jobid_t jobid, const char *nspace)
|
|
{
|
|
return;
|
|
}
|