1
1

Merge pull request #3934 from rhc54/topic/singleton

Fix the isolated pmix component. Cleanup the ess/singleton component …
Этот коммит содержится в:
Ralph Castain 2017-07-19 16:02:37 -05:00 коммит произвёл GitHub
родитель 6cbea90209 543c16b28d
Коммит fca68b070b
7 изменённых файлов: 141 добавлений и 97 удалений

Просмотреть файл

@ -122,12 +122,18 @@ static int isolated_init(opal_list_t *ilist)
{ {
int rc; int rc;
opal_value_t kv; opal_value_t kv;
opal_process_name_t wildcard;
if (0 < isolated_init_count) { OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
++isolated_init_count;
if (1 < isolated_init_count) {
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
++isolated_init_count;
wildcard.jobid = 1;
wildcard.vpid = OPAL_VPID_WILDCARD;
/* store our name in the opal_proc_t so that /* store our name in the opal_proc_t so that
* debug messages will make sense - an upper * debug messages will make sense - an upper
@ -178,6 +184,17 @@ static int isolated_init(opal_list_t *ilist)
} }
OBJ_DESTRUCT(&kv); OBJ_DESTRUCT(&kv);
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(OPAL_PMIX_MAX_PROCS);
kv.type = OPAL_UINT32;
kv.data.uint32 = 1;
if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&wildcard, &kv))) {
OPAL_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
goto err_exit;
}
OBJ_DESTRUCT(&kv);
OBJ_CONSTRUCT(&kv, opal_value_t); OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(OPAL_PMIX_JOBID); kv.key = strdup(OPAL_PMIX_JOBID);
kv.type = OPAL_UINT32; kv.type = OPAL_UINT32;
@ -246,30 +263,35 @@ static int isolated_init(opal_list_t *ilist)
} }
OBJ_DESTRUCT(&kv); OBJ_DESTRUCT(&kv);
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
return OPAL_SUCCESS; return OPAL_SUCCESS;
err_exit: err_exit:
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
return rc; return rc;
} }
static int isolated_fini(void) static int isolated_fini(void)
{ {
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
--opal_pmix_base.initialized;
if (0 == isolated_init_count) { if (0 == isolated_init_count) {
return OPAL_SUCCESS; opal_pmix_base_hash_finalize();
} }
if (0 != --isolated_init_count) { OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
return OPAL_SUCCESS;
}
opal_pmix_base_hash_finalize();
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
static int isolated_initialized(void) static int isolated_initialized(void)
{ {
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
if (0 < isolated_init_count) { if (0 < isolated_init_count) {
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
return 1; return 1;
} }
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
return 0; return 0;
} }
@ -325,13 +347,16 @@ static int isolated_put(opal_pmix_scope_t scope,
{ {
int rc; int rc;
opal_output_verbose(10, opal_pmix_base_framework.framework_output, opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix:isolated isolated_put key %s scope %d\n", "%s pmix:isolated isolated_put key %s scope %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kv->key, scope); OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kv->key, scope);
if (!isolated_init_count) { OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
if (0 == isolated_init_count) {
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
return OPAL_ERROR; return OPAL_ERROR;
} }
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
rc = opal_pmix_base_store(&isolated_pname, kv); rc = opal_pmix_base_store(&isolated_pname, kv);
@ -340,18 +365,31 @@ static int isolated_put(opal_pmix_scope_t scope,
static int isolated_commit(void) static int isolated_commit(void)
{ {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix:isolated isolated commit",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
static int isolated_fence(opal_list_t *procs, int collect_data) static int isolated_fence(opal_list_t *procs, int collect_data)
{ {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix:isolated isolated fence",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
static int isolated_fence_nb(opal_list_t *procs, int collect_data, static int isolated_fence_nb(opal_list_t *procs, int collect_data,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata) opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
{ {
return OPAL_ERR_NOT_IMPLEMENTED; opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix:isolated isolated fence_nb",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
if (NULL != cbfunc) {
cbfunc(OPAL_SUCCESS, cbdata);
}
return OPAL_SUCCESS;
} }
static int isolated_get(const opal_process_name_t *id, static int isolated_get(const opal_process_name_t *id,
@ -383,39 +421,60 @@ static int isolated_get(const opal_process_name_t *id,
static int isolated_get_nb(const opal_process_name_t *id, const char *key, static int isolated_get_nb(const opal_process_name_t *id, const char *key,
opal_list_t *info, opal_pmix_value_cbfunc_t cbfunc, void *cbdata) opal_list_t *info, opal_pmix_value_cbfunc_t cbfunc, void *cbdata)
{ {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix:isolated isolated get_nb",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
return OPAL_ERR_NOT_IMPLEMENTED; return OPAL_ERR_NOT_IMPLEMENTED;
} }
static int isolated_publish(opal_list_t *info) static int isolated_publish(opal_list_t *info)
{ {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix:isolated isolated publish",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
return OPAL_ERR_NOT_SUPPORTED; return OPAL_ERR_NOT_SUPPORTED;
} }
static int isolated_publish_nb(opal_list_t *info, static int isolated_publish_nb(opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata) opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
{ {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix:isolated isolated publish_nb",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
return OPAL_ERR_NOT_SUPPORTED; return OPAL_ERR_NOT_SUPPORTED;
} }
static int isolated_lookup(opal_list_t *data, opal_list_t *info) static int isolated_lookup(opal_list_t *data, opal_list_t *info)
{ {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix:isolated isolated lookup",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
return OPAL_ERR_NOT_SUPPORTED; return OPAL_ERR_NOT_SUPPORTED;
} }
static int isolated_lookup_nb(char **keys, opal_list_t *info, static int isolated_lookup_nb(char **keys, opal_list_t *info,
opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata) opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata)
{ {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix:isolated isolated lookup_nb",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
return OPAL_ERR_NOT_SUPPORTED; return OPAL_ERR_NOT_SUPPORTED;
} }
static int isolated_unpublish(char **keys, opal_list_t *info) static int isolated_unpublish(char **keys, opal_list_t *info)
{ {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix:isolated isolated unpublish",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
return OPAL_ERR_NOT_SUPPORTED; return OPAL_ERR_NOT_SUPPORTED;
} }
static int isolated_unpublish_nb(char **keys, opal_list_t *info, static int isolated_unpublish_nb(char **keys, opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata) opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
{ {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix:isolated isolated unpublish_nb",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
return OPAL_ERR_NOT_SUPPORTED; return OPAL_ERR_NOT_SUPPORTED;
} }
@ -427,6 +486,10 @@ static const char *isolated_get_version(void)
static int isolated_store_local(const opal_process_name_t *proc, static int isolated_store_local(const opal_process_name_t *proc,
opal_value_t *val) opal_value_t *val)
{ {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix:isolated isolated store_local",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
opal_pmix_base_store(proc, val); opal_pmix_base_store(proc, val);
return OPAL_SUCCESS; return OPAL_SUCCESS;

Просмотреть файл

@ -84,8 +84,6 @@ static int rte_init(void)
{ {
int rc, ret; int rc, ret;
char *error = NULL; char *error = NULL;
opal_value_t *kv;
char *val = NULL;
int u32, *u32ptr; int u32, *u32ptr;
uint16_t u16, *u16ptr; uint16_t u16, *u16ptr;
orte_process_name_t name; orte_process_name_t name;
@ -159,7 +157,7 @@ static int rte_init(void)
} else if (NULL != getenv("SINGULARITY_CONTAINER") || } else if (NULL != getenv("SINGULARITY_CONTAINER") ||
mca_ess_singleton_component.isolated) { mca_ess_singleton_component.isolated) {
/* ensure we use the isolated pmix component */ /* ensure we use the isolated pmix component */
opal_setenv (OPAL_MCA_PREFIX"pmix", "isolated", true, &environ); opal_setenv(OPAL_MCA_PREFIX"pmix", "isolated", true, &environ);
} else { } else {
/* we want to use PMIX_NAMESPACE that will be sent by the hnp as a jobid */ /* we want to use PMIX_NAMESPACE that will be sent by the hnp as a jobid */
opal_setenv(OPAL_MCA_PREFIX"orte_launch", "1", true, &environ); opal_setenv(OPAL_MCA_PREFIX"orte_launch", "1", true, &environ);
@ -169,7 +167,7 @@ static int rte_init(void)
return rc; return rc;
} }
/* our name was given to us by the HNP */ /* our name was given to us by the HNP */
opal_setenv (OPAL_MCA_PREFIX"pmix", "^s1,s2,cray,isolated", true, &environ); opal_setenv(OPAL_MCA_PREFIX"pmix", "^s1,s2,cray,isolated", true, &environ);
} }
/* get an async event base - we use the opal_async one so /* get an async event base - we use the opal_async one so
@ -265,69 +263,13 @@ static int rte_init(void)
* we can use the jobfam and stepid as unique keys * we can use the jobfam and stepid as unique keys
* because they are unique values assigned by the RM * because they are unique values assigned by the RM
*/ */
assert (NULL != getenv(OPAL_MCA_PREFIX"orte_precondition_transports")); if (NULL == getenv(OPAL_MCA_PREFIX"orte_precondition_transports")) {
char *key;
/* retrieve our topology */ ret = orte_pre_condition_transports(NULL, &key);
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_TOPO, if (ORTE_SUCCESS == ret) {
&name, &val, OPAL_STRING); opal_setenv(OPAL_MCA_PREFIX"orte_precondition_transports", key, true, &environ);
if (OPAL_SUCCESS == ret && NULL != val) { free(key);
/* load the topology */
if (0 != hwloc_topology_init(&opal_hwloc_topology)) {
ret = OPAL_ERROR;
free(val);
error = "setting topology";
goto error;
} }
if (0 != hwloc_topology_set_xmlbuffer(opal_hwloc_topology, val, strlen(val))) {
ret = OPAL_ERROR;
free(val);
hwloc_topology_destroy(opal_hwloc_topology);
error = "setting topology";
goto error;
}
/* since we are loading this from an external source, we have to
* explicitly set a flag so hwloc sets things up correctly
*/
if (0 != hwloc_topology_set_flags(opal_hwloc_topology,
(HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM |
HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM |
HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) {
ret = OPAL_ERROR;
hwloc_topology_destroy(opal_hwloc_topology);
free(val);
error = "setting topology";
goto error;
}
/* now load the topology */
if (0 != hwloc_topology_load(opal_hwloc_topology)) {
ret = OPAL_ERROR;
hwloc_topology_destroy(opal_hwloc_topology);
free(val);
error = "setting topology";
goto error;
}
free(val);
} else {
/* it wasn't passed down to us, so go get it */
if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) {
error = "topology discovery";
goto error;
}
/* push it into the PMIx database in case someone
* tries to retrieve it so we avoid an attempt to
* get it again */
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_LOCAL_TOPO);
kv->type = OPAL_STRING;
if (0 != (ret = hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &kv->data.string, &u32))) {
error = "topology export";
goto error;
}
if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, kv))) {
error = "topology store";
goto error;
}
OBJ_RELEASE(kv);
} }
/* use the std app init to complete the procedure */ /* use the std app init to complete the procedure */

Просмотреть файл

@ -332,7 +332,7 @@ void orte_plm_base_setup_job(int fd, short args, void *cbdata)
} }
free(key); free(key);
} else { } else {
if (ORTE_SUCCESS != (rc = orte_pre_condition_transports(caddy->jdata))) { if (ORTE_SUCCESS != (rc = orte_pre_condition_transports(caddy->jdata, NULL))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
OBJ_RELEASE(caddy); OBJ_RELEASE(caddy);
@ -342,7 +342,7 @@ void orte_plm_base_setup_job(int fd, short args, void *cbdata)
} else { } else {
/* this will also record the transport key attribute in the job object, and /* this will also record the transport key attribute in the job object, and
* adds the key envar to each app */ * adds the key envar to each app */
if (ORTE_SUCCESS != (rc = orte_pre_condition_transports(caddy->jdata))) { if (ORTE_SUCCESS != (rc = orte_pre_condition_transports(caddy->jdata, NULL))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
OBJ_RELEASE(caddy); OBJ_RELEASE(caddy);

Просмотреть файл

@ -578,7 +578,7 @@ int orte_daemon(int argc, char *argv[])
ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_LOCAL); ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_LOCAL);
/* set the ORTE_JOB_TRANSPORT_KEY from the environment */ /* set the ORTE_JOB_TRANSPORT_KEY from the environment */
orte_pre_condition_transports(jdata); orte_pre_condition_transports(jdata, NULL);
/* register the singleton's nspace with our PMIx server */ /* register the singleton's nspace with our PMIx server */
if (ORTE_SUCCESS != (ret = orte_pmix_server_register_nspace(jdata, false))) { if (ORTE_SUCCESS != (ret = orte_pmix_server_register_nspace(jdata, false))) {

33
orte/test/mpi/hellocycle.pl Исполняемый файл
Просмотреть файл

@ -0,0 +1,33 @@
#!/usr/bin/env perl
#
use strict;
use warnings;
use Date::Parse;
#
$ENV{OMPI_MCA_btl} = "self";
#
sub prtime {
my $count = shift;
my $str = localtime;
print "$count: $str\n";
}
my $totalcount = 5000;
my $count = $totalcount;
prtime($count);
my $start = time();
while ($count > 0) {
system("./hello > /dev/null 2>&1");
$count--;
if ($count % 1000 == 0) {
prtime($count);
}
}
prtime($count);
my $stop = time();
my $rate = $totalcount / ($stop - $start);
print "Rate: $rate\n";

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science * Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* Copyright (c) 2016 Intel, Inc. All rights reserved. * Copyright (c) 2016-2017 Intel, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -130,7 +130,7 @@ char* orte_pre_condition_transports_print(uint64_t *unique_key)
} }
int orte_pre_condition_transports(orte_job_t *jdata) int orte_pre_condition_transports(orte_job_t *jdata, char **key)
{ {
uint64_t unique_key[2]; uint64_t unique_key[2];
int n; int n;
@ -164,6 +164,7 @@ int orte_pre_condition_transports(orte_job_t *jdata)
} }
/* record it in case this job executes a dynamic spawn */ /* record it in case this job executes a dynamic spawn */
if (NULL != jdata) {
orte_set_attribute(&jdata->attributes, ORTE_JOB_TRANSPORT_KEY, ORTE_ATTR_LOCAL, string_key, OPAL_STRING); orte_set_attribute(&jdata->attributes, ORTE_JOB_TRANSPORT_KEY, ORTE_ATTR_LOCAL, string_key, OPAL_STRING);
if (OPAL_SUCCESS != mca_base_var_env_name ("orte_precondition_transports", &cs_env)) { if (OPAL_SUCCESS != mca_base_var_env_name ("orte_precondition_transports", &cs_env)) {
@ -178,9 +179,13 @@ int orte_pre_condition_transports(orte_job_t *jdata)
} }
opal_setenv(cs_env, string_key, true, &app->env); opal_setenv(cs_env, string_key, true, &app->env);
} }
free(cs_env); free(cs_env);
free(string_key); free(string_key);
} else if (NULL != key) {
*key = string_key;
} else {
free(string_key);
}
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }

Просмотреть файл

@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2017 Intel, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -32,7 +33,7 @@
BEGIN_C_DECLS BEGIN_C_DECLS
ORTE_DECLSPEC int orte_pre_condition_transports(orte_job_t *jdata); ORTE_DECLSPEC int orte_pre_condition_transports(orte_job_t *jdata, char **key);
ORTE_DECLSPEC char* orte_pre_condition_transports_print(uint64_t *unique_key); ORTE_DECLSPEC char* orte_pre_condition_transports_print(uint64_t *unique_key);