Merge pull request #3934 from rhc54/topic/singleton
Fix the isolated pmix component. Cleanup the ess/singleton component …
Этот коммит содержится в:
Коммит
fca68b070b
@ -122,12 +122,18 @@ static int isolated_init(opal_list_t *ilist)
|
||||
{
|
||||
int rc;
|
||||
opal_value_t kv;
|
||||
opal_process_name_t wildcard;
|
||||
|
||||
if (0 < isolated_init_count) {
|
||||
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
|
||||
++isolated_init_count;
|
||||
if (1 < isolated_init_count) {
|
||||
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
++isolated_init_count;
|
||||
|
||||
wildcard.jobid = 1;
|
||||
wildcard.vpid = OPAL_VPID_WILDCARD;
|
||||
|
||||
/* store our name in the opal_proc_t so that
|
||||
* debug messages will make sense - an upper
|
||||
@ -178,6 +184,17 @@ static int isolated_init(opal_list_t *ilist)
|
||||
}
|
||||
OBJ_DESTRUCT(&kv);
|
||||
|
||||
OBJ_CONSTRUCT(&kv, opal_value_t);
|
||||
kv.key = strdup(OPAL_PMIX_MAX_PROCS);
|
||||
kv.type = OPAL_UINT32;
|
||||
kv.data.uint32 = 1;
|
||||
if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&wildcard, &kv))) {
|
||||
OPAL_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&kv);
|
||||
goto err_exit;
|
||||
}
|
||||
OBJ_DESTRUCT(&kv);
|
||||
|
||||
OBJ_CONSTRUCT(&kv, opal_value_t);
|
||||
kv.key = strdup(OPAL_PMIX_JOBID);
|
||||
kv.type = OPAL_UINT32;
|
||||
@ -246,30 +263,35 @@ static int isolated_init(opal_list_t *ilist)
|
||||
}
|
||||
OBJ_DESTRUCT(&kv);
|
||||
|
||||
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
||||
return OPAL_SUCCESS;
|
||||
|
||||
err_exit:
|
||||
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int isolated_fini(void)
|
||||
{
|
||||
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
|
||||
--opal_pmix_base.initialized;
|
||||
|
||||
if (0 == isolated_init_count) {
|
||||
return OPAL_SUCCESS;
|
||||
opal_pmix_base_hash_finalize();
|
||||
}
|
||||
|
||||
if (0 != --isolated_init_count) {
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
opal_pmix_base_hash_finalize();
|
||||
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static int isolated_initialized(void)
|
||||
{
|
||||
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
|
||||
if (0 < isolated_init_count) {
|
||||
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
||||
return 1;
|
||||
}
|
||||
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -325,13 +347,16 @@ static int isolated_put(opal_pmix_scope_t scope,
|
||||
{
|
||||
int rc;
|
||||
|
||||
opal_output_verbose(10, opal_pmix_base_framework.framework_output,
|
||||
"%s pmix:isolated isolated_put key %s scope %d\n",
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s pmix:isolated isolated_put key %s scope %d",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kv->key, scope);
|
||||
|
||||
if (!isolated_init_count) {
|
||||
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
|
||||
if (0 == isolated_init_count) {
|
||||
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
|
||||
|
||||
rc = opal_pmix_base_store(&isolated_pname, kv);
|
||||
|
||||
@ -340,18 +365,31 @@ static int isolated_put(opal_pmix_scope_t scope,
|
||||
|
||||
static int isolated_commit(void)
|
||||
{
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s pmix:isolated isolated commit",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static int isolated_fence(opal_list_t *procs, int collect_data)
|
||||
{
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s pmix:isolated isolated fence",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static int isolated_fence_nb(opal_list_t *procs, int collect_data,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
return OPAL_ERR_NOT_IMPLEMENTED;
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s pmix:isolated isolated fence_nb",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
||||
if (NULL != cbfunc) {
|
||||
cbfunc(OPAL_SUCCESS, cbdata);
|
||||
}
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static int isolated_get(const opal_process_name_t *id,
|
||||
@ -383,39 +421,60 @@ static int isolated_get(const opal_process_name_t *id,
|
||||
static int isolated_get_nb(const opal_process_name_t *id, const char *key,
|
||||
opal_list_t *info, opal_pmix_value_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s pmix:isolated isolated get_nb",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
||||
return OPAL_ERR_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
static int isolated_publish(opal_list_t *info)
|
||||
{
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s pmix:isolated isolated publish",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
||||
return OPAL_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
static int isolated_publish_nb(opal_list_t *info,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s pmix:isolated isolated publish_nb",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
||||
return OPAL_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
static int isolated_lookup(opal_list_t *data, opal_list_t *info)
|
||||
{
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s pmix:isolated isolated lookup",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
||||
return OPAL_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
static int isolated_lookup_nb(char **keys, opal_list_t *info,
|
||||
opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s pmix:isolated isolated lookup_nb",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
||||
return OPAL_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
static int isolated_unpublish(char **keys, opal_list_t *info)
|
||||
{
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s pmix:isolated isolated unpublish",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
||||
return OPAL_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
static int isolated_unpublish_nb(char **keys, opal_list_t *info,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s pmix:isolated isolated unpublish_nb",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
||||
return OPAL_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
@ -427,6 +486,10 @@ static const char *isolated_get_version(void)
|
||||
static int isolated_store_local(const opal_process_name_t *proc,
|
||||
opal_value_t *val)
|
||||
{
|
||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||
"%s pmix:isolated isolated store_local",
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
||||
|
||||
opal_pmix_base_store(proc, val);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
|
@ -84,8 +84,6 @@ static int rte_init(void)
|
||||
{
|
||||
int rc, ret;
|
||||
char *error = NULL;
|
||||
opal_value_t *kv;
|
||||
char *val = NULL;
|
||||
int u32, *u32ptr;
|
||||
uint16_t u16, *u16ptr;
|
||||
orte_process_name_t name;
|
||||
@ -159,7 +157,7 @@ static int rte_init(void)
|
||||
} else if (NULL != getenv("SINGULARITY_CONTAINER") ||
|
||||
mca_ess_singleton_component.isolated) {
|
||||
/* ensure we use the isolated pmix component */
|
||||
opal_setenv (OPAL_MCA_PREFIX"pmix", "isolated", true, &environ);
|
||||
opal_setenv(OPAL_MCA_PREFIX"pmix", "isolated", true, &environ);
|
||||
} else {
|
||||
/* we want to use PMIX_NAMESPACE that will be sent by the hnp as a jobid */
|
||||
opal_setenv(OPAL_MCA_PREFIX"orte_launch", "1", true, &environ);
|
||||
@ -169,7 +167,7 @@ static int rte_init(void)
|
||||
return rc;
|
||||
}
|
||||
/* our name was given to us by the HNP */
|
||||
opal_setenv (OPAL_MCA_PREFIX"pmix", "^s1,s2,cray,isolated", true, &environ);
|
||||
opal_setenv(OPAL_MCA_PREFIX"pmix", "^s1,s2,cray,isolated", true, &environ);
|
||||
}
|
||||
|
||||
/* get an async event base - we use the opal_async one so
|
||||
@ -265,69 +263,13 @@ static int rte_init(void)
|
||||
* we can use the jobfam and stepid as unique keys
|
||||
* because they are unique values assigned by the RM
|
||||
*/
|
||||
assert (NULL != getenv(OPAL_MCA_PREFIX"orte_precondition_transports"));
|
||||
|
||||
/* retrieve our topology */
|
||||
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_TOPO,
|
||||
&name, &val, OPAL_STRING);
|
||||
if (OPAL_SUCCESS == ret && NULL != val) {
|
||||
/* load the topology */
|
||||
if (0 != hwloc_topology_init(&opal_hwloc_topology)) {
|
||||
ret = OPAL_ERROR;
|
||||
free(val);
|
||||
error = "setting topology";
|
||||
goto error;
|
||||
if (NULL == getenv(OPAL_MCA_PREFIX"orte_precondition_transports")) {
|
||||
char *key;
|
||||
ret = orte_pre_condition_transports(NULL, &key);
|
||||
if (ORTE_SUCCESS == ret) {
|
||||
opal_setenv(OPAL_MCA_PREFIX"orte_precondition_transports", key, true, &environ);
|
||||
free(key);
|
||||
}
|
||||
if (0 != hwloc_topology_set_xmlbuffer(opal_hwloc_topology, val, strlen(val))) {
|
||||
ret = OPAL_ERROR;
|
||||
free(val);
|
||||
hwloc_topology_destroy(opal_hwloc_topology);
|
||||
error = "setting topology";
|
||||
goto error;
|
||||
}
|
||||
/* since we are loading this from an external source, we have to
|
||||
* explicitly set a flag so hwloc sets things up correctly
|
||||
*/
|
||||
if (0 != hwloc_topology_set_flags(opal_hwloc_topology,
|
||||
(HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM |
|
||||
HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM |
|
||||
HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) {
|
||||
ret = OPAL_ERROR;
|
||||
hwloc_topology_destroy(opal_hwloc_topology);
|
||||
free(val);
|
||||
error = "setting topology";
|
||||
goto error;
|
||||
}
|
||||
/* now load the topology */
|
||||
if (0 != hwloc_topology_load(opal_hwloc_topology)) {
|
||||
ret = OPAL_ERROR;
|
||||
hwloc_topology_destroy(opal_hwloc_topology);
|
||||
free(val);
|
||||
error = "setting topology";
|
||||
goto error;
|
||||
}
|
||||
free(val);
|
||||
} else {
|
||||
/* it wasn't passed down to us, so go get it */
|
||||
if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) {
|
||||
error = "topology discovery";
|
||||
goto error;
|
||||
}
|
||||
/* push it into the PMIx database in case someone
|
||||
* tries to retrieve it so we avoid an attempt to
|
||||
* get it again */
|
||||
kv = OBJ_NEW(opal_value_t);
|
||||
kv->key = strdup(OPAL_PMIX_LOCAL_TOPO);
|
||||
kv->type = OPAL_STRING;
|
||||
if (0 != (ret = hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &kv->data.string, &u32))) {
|
||||
error = "topology export";
|
||||
goto error;
|
||||
}
|
||||
if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, kv))) {
|
||||
error = "topology store";
|
||||
goto error;
|
||||
}
|
||||
OBJ_RELEASE(kv);
|
||||
}
|
||||
|
||||
/* use the std app init to complete the procedure */
|
||||
|
@ -332,7 +332,7 @@ void orte_plm_base_setup_job(int fd, short args, void *cbdata)
|
||||
}
|
||||
free(key);
|
||||
} else {
|
||||
if (ORTE_SUCCESS != (rc = orte_pre_condition_transports(caddy->jdata))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_pre_condition_transports(caddy->jdata, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
OBJ_RELEASE(caddy);
|
||||
@ -342,7 +342,7 @@ void orte_plm_base_setup_job(int fd, short args, void *cbdata)
|
||||
} else {
|
||||
/* this will also record the transport key attribute in the job object, and
|
||||
* adds the key envar to each app */
|
||||
if (ORTE_SUCCESS != (rc = orte_pre_condition_transports(caddy->jdata))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_pre_condition_transports(caddy->jdata, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
OBJ_RELEASE(caddy);
|
||||
|
@ -578,7 +578,7 @@ int orte_daemon(int argc, char *argv[])
|
||||
ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_LOCAL);
|
||||
|
||||
/* set the ORTE_JOB_TRANSPORT_KEY from the environment */
|
||||
orte_pre_condition_transports(jdata);
|
||||
orte_pre_condition_transports(jdata, NULL);
|
||||
|
||||
/* register the singleton's nspace with our PMIx server */
|
||||
if (ORTE_SUCCESS != (ret = orte_pmix_server_register_nspace(jdata, false))) {
|
||||
|
33
orte/test/mpi/hellocycle.pl
Исполняемый файл
33
orte/test/mpi/hellocycle.pl
Исполняемый файл
@ -0,0 +1,33 @@
|
||||
#!/usr/bin/env perl
|
||||
#
|
||||
use strict;
|
||||
use warnings;
|
||||
use Date::Parse;
|
||||
|
||||
#
|
||||
$ENV{OMPI_MCA_btl} = "self";
|
||||
#
|
||||
sub prtime {
|
||||
my $count = shift;
|
||||
my $str = localtime;
|
||||
print "$count: $str\n";
|
||||
}
|
||||
|
||||
|
||||
my $totalcount = 5000;
|
||||
my $count = $totalcount;
|
||||
prtime($count);
|
||||
my $start = time();
|
||||
while ($count > 0) {
|
||||
system("./hello > /dev/null 2>&1");
|
||||
$count--;
|
||||
|
||||
if ($count % 1000 == 0) {
|
||||
prtime($count);
|
||||
}
|
||||
}
|
||||
prtime($count);
|
||||
|
||||
my $stop = time();
|
||||
my $rate = $totalcount / ($stop - $start);
|
||||
print "Rate: $rate\n";
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016-2017 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -130,7 +130,7 @@ char* orte_pre_condition_transports_print(uint64_t *unique_key)
|
||||
}
|
||||
|
||||
|
||||
int orte_pre_condition_transports(orte_job_t *jdata)
|
||||
int orte_pre_condition_transports(orte_job_t *jdata, char **key)
|
||||
{
|
||||
uint64_t unique_key[2];
|
||||
int n;
|
||||
@ -164,23 +164,28 @@ int orte_pre_condition_transports(orte_job_t *jdata)
|
||||
}
|
||||
|
||||
/* record it in case this job executes a dynamic spawn */
|
||||
orte_set_attribute(&jdata->attributes, ORTE_JOB_TRANSPORT_KEY, ORTE_ATTR_LOCAL, string_key, OPAL_STRING);
|
||||
if (NULL != jdata) {
|
||||
orte_set_attribute(&jdata->attributes, ORTE_JOB_TRANSPORT_KEY, ORTE_ATTR_LOCAL, string_key, OPAL_STRING);
|
||||
|
||||
if (OPAL_SUCCESS != mca_base_var_env_name ("orte_precondition_transports", &cs_env)) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
free(string_key);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
for (n=0; n < jdata->apps->size; n++) {
|
||||
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) {
|
||||
continue;
|
||||
if (OPAL_SUCCESS != mca_base_var_env_name ("orte_precondition_transports", &cs_env)) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
free(string_key);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
opal_setenv(cs_env, string_key, true, &app->env);
|
||||
}
|
||||
|
||||
free(cs_env);
|
||||
free(string_key);
|
||||
for (n=0; n < jdata->apps->size; n++) {
|
||||
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) {
|
||||
continue;
|
||||
}
|
||||
opal_setenv(cs_env, string_key, true, &app->env);
|
||||
}
|
||||
free(cs_env);
|
||||
free(string_key);
|
||||
} else if (NULL != key) {
|
||||
*key = string_key;
|
||||
} else {
|
||||
free(string_key);
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2017 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -32,7 +33,7 @@
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
ORTE_DECLSPEC int orte_pre_condition_transports(orte_job_t *jdata);
|
||||
ORTE_DECLSPEC int orte_pre_condition_transports(orte_job_t *jdata, char **key);
|
||||
|
||||
ORTE_DECLSPEC char* orte_pre_condition_transports_print(uint64_t *unique_key);
|
||||
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user