Fix orte-submit so it allows application procs to select the correct ess component. Protect orte_data_server from multiple calls to finalize.
Этот коммит содержится в:
родитель
ef6cf50687
Коммит
f872e99315
@ -601,6 +601,13 @@ static int rte_init(void)
|
||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_SHOW_HELP,
|
||||
ORTE_RML_PERSISTENT, orte_show_help_recv, NULL);
|
||||
|
||||
/* setup the data server */
|
||||
if (ORTE_SUCCESS != (ret = orte_data_server_init())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_data_server_init";
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (orte_create_session_dirs) {
|
||||
/* set the opal_output hnp file location to be in the
|
||||
* proc-specific session directory. */
|
||||
@ -814,6 +821,8 @@ static int rte_finalize(void)
|
||||
/* shutdown the pmix server */
|
||||
pmix_server_finalize();
|
||||
(void) mca_base_framework_close(&opal_pmix_base_framework);
|
||||
/* cleanup our data server */
|
||||
orte_data_server_finalize();
|
||||
|
||||
(void) mca_base_framework_close(&orte_schizo_base_framework);
|
||||
(void) mca_base_framework_close(&orte_dfs_base_framework);
|
||||
|
@ -104,11 +104,17 @@ OBJ_CLASS_INSTANCE(orte_data_req_t,
|
||||
/* local globals */
|
||||
static opal_pointer_array_t orte_data_server_store;
|
||||
static opal_list_t pending;
|
||||
static bool initialized = false;
|
||||
|
||||
int orte_data_server_init(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (initialized) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
initialized = true;
|
||||
|
||||
OBJ_CONSTRUCT(&orte_data_server_store, opal_pointer_array_t);
|
||||
if (ORTE_SUCCESS != (rc = opal_pointer_array_init(&orte_data_server_store,
|
||||
1,
|
||||
@ -134,6 +140,11 @@ void orte_data_server_finalize(void)
|
||||
orte_std_cntr_t i;
|
||||
orte_data_object_t *data;
|
||||
|
||||
if (!initialized) {
|
||||
return;
|
||||
}
|
||||
initialized = false;
|
||||
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DATA_SERVER);
|
||||
|
||||
for (i=0; i < orte_data_server_store.size; i++) {
|
||||
|
@ -96,7 +96,6 @@ static struct {
|
||||
bool help;
|
||||
bool version;
|
||||
char *report_uri;
|
||||
char *basename;
|
||||
char *prefix;
|
||||
bool run_as_root;
|
||||
} myglobals;
|
||||
@ -150,7 +149,7 @@ int main(int argc, char *argv[])
|
||||
memset(&myglobals, 0, sizeof(myglobals));
|
||||
/* find our basename (the name of the executable) so that we can
|
||||
use it in pretty-print error messages */
|
||||
myglobals.basename = opal_basename(argv[0]);
|
||||
orte_basename = opal_basename(argv[0]);
|
||||
|
||||
opal_cmd_line_create(&cmd_line, cmd_line_init);
|
||||
mca_base_cmd_line_setup(&cmd_line);
|
||||
@ -174,7 +173,7 @@ int main(int argc, char *argv[])
|
||||
OPAL_REPO_REV);
|
||||
if (NULL != str) {
|
||||
fprintf(stdout, "%s %s\n\nReport bugs to %s\n",
|
||||
myglobals.basename, str, PACKAGE_BUGREPORT);
|
||||
orte_basename, str, PACKAGE_BUGREPORT);
|
||||
free(str);
|
||||
}
|
||||
exit(0);
|
||||
@ -187,10 +186,10 @@ int main(int argc, char *argv[])
|
||||
if (0 == geteuid() && !myglobals.run_as_root) {
|
||||
fprintf(stderr, "--------------------------------------------------------------------------\n");
|
||||
if (myglobals.help) {
|
||||
fprintf(stderr, "%s cannot provide the help message when run as root\n", myglobals.basename);
|
||||
fprintf(stderr, "%s cannot provide the help message when run as root\n", orte_basename);
|
||||
} else {
|
||||
/* show_help is not yet available, so print an error manually */
|
||||
fprintf(stderr, "%s has detected an attempt to run as root.\n", myglobals.basename);
|
||||
fprintf(stderr, "%s has detected an attempt to run as root.\n", orte_basename);
|
||||
}
|
||||
fprintf(stderr, " This is *strongly* discouraged as any mistake (e.g., in defining TMPDIR) or bug can\n");
|
||||
fprintf(stderr, "result in catastrophic damage to the OS file system, leaving\n");
|
||||
@ -222,15 +221,15 @@ int main(int argc, char *argv[])
|
||||
if (myglobals.help) {
|
||||
char *str, *args = NULL;
|
||||
char *project_name = NULL;
|
||||
if (0 == strcmp(myglobals.basename, "mpirun")) {
|
||||
if (0 == strcmp(orte_basename, "mpirun")) {
|
||||
project_name = "Open MPI";
|
||||
} else {
|
||||
project_name = "OpenRTE";
|
||||
}
|
||||
args = opal_cmd_line_get_usage_msg(&cmd_line);
|
||||
str = opal_show_help_string("help-orterun.txt", "orterun:usage", false,
|
||||
myglobals.basename, project_name, OPAL_VERSION,
|
||||
myglobals.basename, args,
|
||||
orte_basename, project_name, OPAL_VERSION,
|
||||
orte_basename, args,
|
||||
PACKAGE_BUGREPORT);
|
||||
if (NULL != str) {
|
||||
printf("%s", str);
|
||||
@ -245,6 +244,14 @@ int main(int argc, char *argv[])
|
||||
/* Setup MCA params */
|
||||
orte_register_params();
|
||||
|
||||
/* save the environment for launch purposes. This MUST be
|
||||
* done so that we can pass it to any local procs we
|
||||
* spawn - otherwise, those local procs won't see any
|
||||
* non-MCA envars were set in the enviro prior to calling
|
||||
* orterun
|
||||
*/
|
||||
orte_launch_environ = opal_argv_copy(environ);
|
||||
|
||||
/* Intialize our Open RTE environment */
|
||||
if (ORTE_SUCCESS != (rc = orte_init(&argc, &argv, ORTE_PROC_MASTER))) {
|
||||
/* cannot call ORTE_ERROR_LOG as it could be the errmgr
|
||||
@ -273,7 +280,7 @@ int main(int argc, char *argv[])
|
||||
fp = fopen(ptr, "w");
|
||||
if (NULL == fp) {
|
||||
orte_show_help("help-orterun.txt", "orterun:write_file", false,
|
||||
myglobals.basename, "pid", ptr);
|
||||
orte_basename, "pid", ptr);
|
||||
exit(0);
|
||||
}
|
||||
fprintf(fp, "%s\n", uri);
|
||||
@ -282,7 +289,7 @@ int main(int argc, char *argv[])
|
||||
fp = fopen(myglobals.report_uri, "w");
|
||||
if (NULL == fp) {
|
||||
orte_show_help("help-orterun.txt", "orterun:write_file", false,
|
||||
myglobals.basename, "pid", myglobals.report_uri);
|
||||
orte_basename, "pid", myglobals.report_uri);
|
||||
exit(0);
|
||||
}
|
||||
fprintf(fp, "%s\n", uri);
|
||||
@ -296,13 +303,13 @@ int main(int argc, char *argv[])
|
||||
/* get the daemon job object - was created by ess/hnp component */
|
||||
if (NULL == (jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
|
||||
orte_show_help("help-orterun.txt", "bad-job-object", true,
|
||||
myglobals.basename);
|
||||
orte_basename);
|
||||
exit(0);
|
||||
}
|
||||
/* also should have created a daemon "app" */
|
||||
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0))) {
|
||||
orte_show_help("help-orterun.txt", "bad-app-object", true,
|
||||
myglobals.basename);
|
||||
orte_basename);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
@ -326,7 +333,7 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
if (0 != strcmp(param, value)) {
|
||||
orte_show_help("help-orterun.txt", "orterun:app-prefix-conflict",
|
||||
true, myglobals.basename, value, param);
|
||||
true, orte_basename, value, param);
|
||||
/* let the global-level prefix take precedence since we
|
||||
* know that one is being used
|
||||
*/
|
||||
@ -352,7 +359,7 @@ int main(int argc, char *argv[])
|
||||
param_len--;
|
||||
if (0 == param_len) {
|
||||
orte_show_help("help-orterun.txt", "orterun:empty-prefix",
|
||||
true, myglobals.basename, myglobals.basename);
|
||||
true, orte_basename, orte_basename);
|
||||
return ORTE_ERR_FATAL;
|
||||
}
|
||||
}
|
||||
@ -368,7 +375,7 @@ int main(int argc, char *argv[])
|
||||
if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "hostfile"))) {
|
||||
if(1 < j) {
|
||||
orte_show_help("help-orterun.txt", "orterun:multiple-hostfiles",
|
||||
true, myglobals.basename, NULL);
|
||||
true, orte_basename, NULL);
|
||||
return ORTE_ERR_FATAL;
|
||||
} else {
|
||||
value = opal_cmd_line_get_param(&cmd_line, "hostfile", 0, 0);
|
||||
@ -378,7 +385,7 @@ int main(int argc, char *argv[])
|
||||
if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "machinefile"))) {
|
||||
if(1 < j || orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, NULL, OPAL_STRING)) {
|
||||
orte_show_help("help-orterun.txt", "orterun:multiple-hostfiles",
|
||||
true, myglobals.basename, NULL);
|
||||
true, orte_basename, NULL);
|
||||
return ORTE_ERR_FATAL;
|
||||
} else {
|
||||
value = opal_cmd_line_get_param(&cmd_line, "machinefile", 0, 0);
|
||||
|
@ -127,7 +127,6 @@ static struct {
|
||||
char *path;
|
||||
bool enable_recovery;
|
||||
char *personality;
|
||||
char *basename;
|
||||
char *prefix;
|
||||
bool terminate;
|
||||
bool nolocal;
|
||||
@ -333,7 +332,7 @@ static void spawn_recv(int status, orte_process_name_t* sender,
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int rc, i;
|
||||
int rc;
|
||||
opal_cmd_line_t cmd_line;
|
||||
char *param;
|
||||
orte_job_t *jdata=NULL;
|
||||
@ -344,7 +343,7 @@ int main(int argc, char *argv[])
|
||||
memset(&myglobals, 0, sizeof(myglobals));
|
||||
/* find our basename (the name of the executable) so that we can
|
||||
use it in pretty-print error messages */
|
||||
myglobals.basename = opal_basename(argv[0]);
|
||||
orte_basename = opal_basename(argv[0]);
|
||||
|
||||
|
||||
opal_cmd_line_create(&cmd_line, cmd_line_init);
|
||||
@ -369,7 +368,7 @@ int main(int argc, char *argv[])
|
||||
OPAL_REPO_REV);
|
||||
if (NULL != str) {
|
||||
fprintf(stdout, "%s %s\n\nReport bugs to %s\n",
|
||||
myglobals.basename, str, PACKAGE_BUGREPORT);
|
||||
orte_basename, str, PACKAGE_BUGREPORT);
|
||||
free(str);
|
||||
}
|
||||
exit(0);
|
||||
@ -382,10 +381,10 @@ int main(int argc, char *argv[])
|
||||
if (0 == geteuid() && !myglobals.run_as_root) {
|
||||
fprintf(stderr, "--------------------------------------------------------------------------\n");
|
||||
if (myglobals.help) {
|
||||
fprintf(stderr, "%s cannot provide the help message when run as root\n", myglobals.basename);
|
||||
fprintf(stderr, "%s cannot provide the help message when run as root\n", orte_basename);
|
||||
} else {
|
||||
/* show_help is not yet available, so print an error manually */
|
||||
fprintf(stderr, "%s has detected an attempt to run as root.\n", myglobals.basename);
|
||||
fprintf(stderr, "%s has detected an attempt to run as root.\n", orte_basename);
|
||||
}
|
||||
fprintf(stderr, " This is *strongly* discouraged as any mistake (e.g., in defining TMPDIR) or bug can\n");
|
||||
fprintf(stderr, "result in catastrophic damage to the OS file system, leaving\n");
|
||||
@ -427,7 +426,7 @@ int main(int argc, char *argv[])
|
||||
char *str, *args = NULL;
|
||||
char *project_name = NULL;
|
||||
opal_output(0, "GETTING HELP");
|
||||
if (0 == strcmp(myglobals.basename, "mpirun")) {
|
||||
if (0 == strcmp(orte_basename, "mpirun")) {
|
||||
project_name = "Open MPI";
|
||||
} else {
|
||||
project_name = "OpenRTE";
|
||||
@ -435,8 +434,8 @@ int main(int argc, char *argv[])
|
||||
args = opal_cmd_line_get_usage_msg(&cmd_line);
|
||||
opal_output(0, "CMD LINE %s", args);
|
||||
str = opal_show_help_string("help-orterun.txt", "orterun:usage", false,
|
||||
myglobals.basename, project_name, OPAL_VERSION,
|
||||
myglobals.basename, args,
|
||||
orte_basename, project_name, OPAL_VERSION,
|
||||
orte_basename, args,
|
||||
PACKAGE_BUGREPORT);
|
||||
if (NULL != str) {
|
||||
printf("%s", str);
|
||||
@ -529,11 +528,9 @@ int main(int argc, char *argv[])
|
||||
*/
|
||||
opal_finalize();
|
||||
|
||||
for (i=0; NULL != environ[i]; i++) {
|
||||
if (0 == strncmp(environ[i], "OMPI", 4)) {
|
||||
fprintf(stderr, "%s\n", environ[i]);
|
||||
}
|
||||
}
|
||||
/* clear the ess param from the environment so our children
|
||||
* don't pick it up */
|
||||
opal_unsetenv("OMPI_MCA_ess", &environ);
|
||||
|
||||
/* set the info in our contact table */
|
||||
orte_rml.set_contact_info(orte_process_info.my_hnp_uri);
|
||||
@ -659,7 +656,7 @@ int main(int argc, char *argv[])
|
||||
/* This should never happen -- this case should be caught in
|
||||
create_app(), but let's just double check... */
|
||||
orte_show_help("help-orterun.txt", "orterun:nothing-to-do",
|
||||
true, myglobals.basename);
|
||||
true, orte_basename);
|
||||
exit(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
}
|
||||
|
||||
@ -765,7 +762,7 @@ static int parse_globals(int argc, char* argv[], opal_cmd_line_t *cmd_line)
|
||||
fp = fopen(myglobals.report_pid, "w");
|
||||
if (NULL == fp) {
|
||||
orte_show_help("help-orterun.txt", "orterun:write_file", false,
|
||||
myglobals.basename, "pid", myglobals.report_pid);
|
||||
orte_basename, "pid", myglobals.report_pid);
|
||||
exit(0);
|
||||
}
|
||||
fprintf(fp, "%d\n", (int)getpid());
|
||||
@ -1009,7 +1006,7 @@ static int create_app(int argc, char* argv[],
|
||||
|
||||
if (0 == count) {
|
||||
orte_show_help("help-orterun.txt", "orterun:executable-not-specified",
|
||||
true, myglobals.basename, myglobals.basename);
|
||||
true, orte_basename, orte_basename);
|
||||
rc = ORTE_ERR_NOT_FOUND;
|
||||
goto cleanup;
|
||||
}
|
||||
@ -1099,7 +1096,7 @@ static int create_app(int argc, char* argv[],
|
||||
}
|
||||
if (0 != strcmp(param, value)) {
|
||||
orte_show_help("help-orterun.txt", "orterun:app-prefix-conflict",
|
||||
true, myglobals.basename, value, param);
|
||||
true, orte_basename, value, param);
|
||||
/* let the global-level prefix take precedence since we
|
||||
* know that one is being used
|
||||
*/
|
||||
@ -1125,7 +1122,7 @@ static int create_app(int argc, char* argv[],
|
||||
param_len--;
|
||||
if (0 == param_len) {
|
||||
orte_show_help("help-orterun.txt", "orterun:empty-prefix",
|
||||
true, myglobals.basename, myglobals.basename);
|
||||
true, orte_basename, orte_basename);
|
||||
free(param);
|
||||
return ORTE_ERR_FATAL;
|
||||
}
|
||||
@ -1143,7 +1140,7 @@ static int create_app(int argc, char* argv[],
|
||||
if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "hostfile"))) {
|
||||
if(1 < j) {
|
||||
orte_show_help("help-orterun.txt", "orterun:multiple-hostfiles",
|
||||
true, myglobals.basename, NULL);
|
||||
true, orte_basename, NULL);
|
||||
return ORTE_ERR_FATAL;
|
||||
} else {
|
||||
value = opal_cmd_line_get_param(&cmd_line, "hostfile", 0, 0);
|
||||
@ -1153,7 +1150,7 @@ static int create_app(int argc, char* argv[],
|
||||
if (0 < (j = opal_cmd_line_get_ninsts(&cmd_line, "machinefile"))) {
|
||||
if(1 < j || orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, NULL, OPAL_STRING)) {
|
||||
orte_show_help("help-orterun.txt", "orterun:multiple-hostfiles",
|
||||
true, myglobals.basename, NULL);
|
||||
true, orte_basename, NULL);
|
||||
return ORTE_ERR_FATAL;
|
||||
} else {
|
||||
value = opal_cmd_line_get_param(&cmd_line, "machinefile", 0, 0);
|
||||
@ -1177,7 +1174,7 @@ static int create_app(int argc, char* argv[],
|
||||
/* check for bozo error */
|
||||
if (0 > myglobals.num_procs) {
|
||||
orte_show_help("help-orterun.txt", "orterun:negative-nprocs",
|
||||
true, myglobals.basename, app->argv[0],
|
||||
true, orte_basename, app->argv[0],
|
||||
myglobals.num_procs, NULL);
|
||||
return ORTE_ERR_FATAL;
|
||||
}
|
||||
@ -1216,7 +1213,7 @@ static int create_app(int argc, char* argv[],
|
||||
app->app = strdup(app->argv[0]);
|
||||
if (NULL == app->app) {
|
||||
orte_show_help("help-orterun.txt", "orterun:call-failed",
|
||||
true, myglobals.basename, "library", "strdup returned NULL", errno);
|
||||
true, orte_basename, "library", "strdup returned NULL", errno);
|
||||
rc = ORTE_ERR_NOT_FOUND;
|
||||
goto cleanup;
|
||||
}
|
||||
|
@ -105,7 +105,6 @@
|
||||
#include "orte/runtime/runtime.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
#include "orte/runtime/orte_data_server.h"
|
||||
#include "orte/runtime/orte_locks.h"
|
||||
#include "orte/runtime/orte_quit.h"
|
||||
|
||||
@ -1011,13 +1010,6 @@ int orterun(int argc, char *argv[])
|
||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DAEMON,
|
||||
ORTE_RML_PERSISTENT, orte_daemon_recv, NULL);
|
||||
|
||||
/* setup the data server */
|
||||
if (ORTE_SUCCESS != (rc = orte_data_server_init())) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
goto DONE;
|
||||
}
|
||||
|
||||
/* setup for debugging */
|
||||
orte_debugger_init_before_spawn(jdata);
|
||||
orte_state.add_job_state(ORTE_JOB_STATE_READY_FOR_DEBUGGERS,
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user