From 80f4e3b872735b0a967a0b8d39dd44151b816c95 Mon Sep 17 00:00:00 2001 From: Ralph Castain <rhc@open-mpi.org> Date: Mon, 23 May 2016 21:09:44 -0700 Subject: [PATCH] Fix the --tune problem by searching the argv for MCA params in advance of opal_init_util. Only search the first app_context as we historically have done - we can debate whether or not to search all app_contexts --- opal/runtime/opal_init.c | 14 +- orte/bindings/python/src/orte-cffi/build.py | 4 +- orte/orted/orted_submit.c | 135 ++++++++++---------- orte/orted/orted_submit.h | 2 +- 4 files changed, 80 insertions(+), 75 deletions(-) diff --git a/opal/runtime/opal_init.c b/opal/runtime/opal_init.c index a6f97990a8..e8d2220d84 100644 --- a/opal/runtime/opal_init.c +++ b/opal/runtime/opal_init.c @@ -334,6 +334,13 @@ opal_init_util(int* pargc, char*** pargv) goto return_error; } + /* read any param files that were provided */ + if (OPAL_SUCCESS != (ret = mca_base_var_cache_files(false))) { + error = "failed to cache files"; + goto return_error; + } + + /* register params for opal */ if (OPAL_SUCCESS != (ret = opal_register_params())) { error = "opal_register_params"; @@ -415,13 +422,6 @@ opal_init(int* pargc, char*** pargv) return ret; } - /* read any param files that were provided */ - if (OPAL_SUCCESS != (ret = mca_base_var_cache_files(false))) { - error = "failed to cache files"; - goto return_error; - } - - /* open hwloc - since this is a static framework, no * select is required */ diff --git a/orte/bindings/python/src/orte-cffi/build.py b/orte/bindings/python/src/orte-cffi/build.py index 18dfd6cdf7..64bd0e33cd 100644 --- a/orte/bindings/python/src/orte-cffi/build.py +++ b/orte/bindings/python/src/orte-cffi/build.py @@ -118,11 +118,11 @@ ffi.set_source("orte_cffi", """ ffi.cdef(""" /* Types */ typedef ... orte_job_t; -typedef ... opal_cmd_line_t; +typedef ... opal_cmd_line_init_t; typedef void (*orte_submit_cbfunc_t)(int index, orte_job_t *jdata, int ret, void *cbdata); /* Functions */ -int orte_submit_init(int argc, char *argv[], opal_cmd_line_t *opts); +int orte_submit_init(int argc, char *argv[], opal_cmd_line_init_t *opts); int orte_submit_job(char *cmd[], int *index, orte_submit_cbfunc_t launch_cb, void *launch_cbdata, orte_submit_cbfunc_t complete_cb, void *complete_cbdata); diff --git a/orte/orted/orted_submit.c b/orte/orted/orted_submit.c index 41d3bf1d16..de90a63cf4 100644 --- a/orte/orted/orted_submit.c +++ b/orte/orted/orted_submit.c @@ -199,7 +199,7 @@ static OBJ_CLASS_INSTANCE(trackr_t, tcon, tdes); int orte_submit_init(int argc, char *argv[], - opal_cmd_line_t *opts) + opal_cmd_line_init_t *opts) { int rc, i; char *param; @@ -211,51 +211,32 @@ int orte_submit_init(int argc, char *argv[], use it in pretty-print error messages */ orte_basename = opal_basename(argv[0]); - /* see if print version is requested. Do this before - * check for help so that --version --help works as - * one might expect. */ - for (i=0; NULL != argv[i]; i++) { - if (0 == strcmp(argv[i], "--version") || - 0 == strcmp(argv[i], "-V")) { - char *str, *project_name = NULL; - if (0 == strcmp(orte_basename, "mpirun")) { - project_name = "Open MPI"; - } else { - project_name = "OpenRTE"; - } - str = opal_info_make_version_str("all", - OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION, - OPAL_GREEK_VERSION, - OPAL_REPO_REV); - if (NULL != str) { - fprintf(stdout, "%s (%s) %s\n\nReport bugs to %s\n", - orte_basename, project_name, str, PACKAGE_BUGREPORT); - free(str); - } - exit(0); + /* search the argv for MCA params */ + for (i=0; NULL != argv[i]; i++) { + if (':' == argv[i][0] || + NULL == argv[i+1] || NULL == argv[i+2]) { + break; + } + if (0 == strncmp(argv[i], "-"OPAL_MCA_CMD_LINE_ID, strlen("-"OPAL_MCA_CMD_LINE_ID)) || + 0 == strncmp(argv[i], "--"OPAL_MCA_CMD_LINE_ID, strlen("--"OPAL_MCA_CMD_LINE_ID)) || + 0 == strncmp(argv[i], "-g"OPAL_MCA_CMD_LINE_ID, strlen("-g"OPAL_MCA_CMD_LINE_ID)) || + 0 == strncmp(argv[i], "--g"OPAL_MCA_CMD_LINE_ID, strlen("--g"OPAL_MCA_CMD_LINE_ID))) { + (void) mca_base_var_env_name (argv[i+1], ¶m); + opal_setenv(param, argv[i+2], true, &environ); + free(param); + } else if (0 == strcmp(argv[i], "-am") || + 0 == strcmp(argv[i], "--am")) { + (void)mca_base_var_env_name("mca_base_param_file_prefix", ¶m); + opal_setenv(param, argv[i+1], true, &environ); + free(param); + } else if (0 == strcmp(argv[i], "-tune") || + 0 == strcmp(argv[i], "--tune")) { + (void)mca_base_var_env_name("mca_base_envar_file_prefix", ¶m); + opal_setenv(param, argv[i+1], true, &environ); + free(param); } } - /* need to parse mca options *before* opal_init_util() */ - orte_cmd_line = OBJ_NEW(opal_cmd_line_t); - mca_base_cmd_line_setup (orte_cmd_line); - - /* parse the result to get values */ - if (OPAL_SUCCESS != (rc = opal_cmd_line_parse(orte_cmd_line, - true, true, argc, argv)) ) { - if (OPAL_ERR_SILENT != rc) { - fprintf(stderr, "%s: command line error (%s)\n", argv[0], - opal_strerror(rc)); - } - return rc; - } - - if (OPAL_SUCCESS != (rc = mca_base_cmd_line_process_args(orte_cmd_line, &environ, &environ))) { - return rc; - } - - /* init only the util portion of OPAL */ if (OPAL_SUCCESS != (rc = opal_init_util(&argc, &argv))) { return rc; @@ -273,6 +254,10 @@ int orte_submit_init(int argc, char *argv[], OBJ_CONSTRUCT(&tool_jobs, opal_pointer_array_t); opal_pointer_array_init(&tool_jobs, 256, INT_MAX, 128); + + /* setup the cmd line */ + orte_cmd_line = OBJ_NEW(opal_cmd_line_t); + /* if they were provided, add the opts */ if (NULL != opts) { if (OPAL_SUCCESS != (rc = opal_cmd_line_add(orte_cmd_line, opts))) { @@ -298,6 +283,29 @@ int orte_submit_init(int argc, char *argv[], return rc; } + /* see if print version is requested. Do this before + * check for help so that --version --help works as + * one might expect. */ + if (orte_cmd_options.version) { + char *str, *project_name = NULL; + if (0 == strcmp(orte_basename, "mpirun")) { + project_name = "Open MPI"; + } else { + project_name = "OpenRTE"; + } + str = opal_info_make_version_str("all", + OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, + OPAL_RELEASE_VERSION, + OPAL_GREEK_VERSION, + OPAL_REPO_REV); + if (NULL != str) { + fprintf(stdout, "%s (%s) %s\n\nReport bugs to %s\n", + orte_basename, project_name, str, PACKAGE_BUGREPORT); + free(str); + } + exit(0); + } + /* check if we are running as root - if we are, then only allow * us to proceed if the allow-run-as-root flag was given. Otherwise, * exit with a giant warning flag @@ -332,29 +340,26 @@ int orte_submit_init(int argc, char *argv[], } /* Check for help request */ - for (i=0; NULL != argv[i]; i++) { - if (0 == strcmp(argv[i], "--help") || - 0 == strcmp(argv[i], "-h")) { - char *str, *args = NULL; - char *project_name = NULL; - if (0 == strcmp(orte_basename, "mpirun")) { - project_name = "Open MPI"; - } else { - project_name = "OpenRTE"; - } - args = opal_cmd_line_get_usage_msg(orte_cmd_line); - str = opal_show_help_string("help-orterun.txt", "orterun:usage", false, - orte_basename, project_name, OPAL_VERSION, - orte_basename, args, - PACKAGE_BUGREPORT); - if (NULL != str) { - printf("%s", str); - free(str); - } - free(args); - /* If someone asks for help, that should be all we do */ - exit(0); + if (orte_cmd_options.help) { + char *str, *args = NULL; + char *project_name = NULL; + if (0 == strcmp(orte_basename, "mpirun")) { + project_name = "Open MPI"; + } else { + project_name = "OpenRTE"; } + args = opal_cmd_line_get_usage_msg(orte_cmd_line); + str = opal_show_help_string("help-orterun.txt", "orterun:usage", false, + orte_basename, project_name, OPAL_VERSION, + orte_basename, args, + PACKAGE_BUGREPORT); + if (NULL != str) { + printf("%s", str); + free(str); + } + free(args); + /* If someone asks for help, that should be all we do */ + exit(0); } /* set the flags - if they gave us a -hnp option, then diff --git a/orte/orted/orted_submit.h b/orte/orted/orted_submit.h index 1daf1b0ecb..f26b14aa9b 100644 --- a/orte/orted/orted_submit.h +++ b/orte/orted/orted_submit.h @@ -23,7 +23,7 @@ BEGIN_C_DECLS typedef void (*orte_submit_cbfunc_t)(int index, orte_job_t *jdata, int ret, void *cbdata); ORTE_DECLSPEC int orte_submit_init(int argc, char *argv[], - opal_cmd_line_t *opts); + opal_cmd_line_init_t *opts); ORTE_DECLSPEC int orte_submit_cancel(int index); ORTE_DECLSPEC void orte_submit_finalize(void); ORTE_DECLSPEC int orte_submit_job(char *cmd[], int *index,