1
1

Make use of "instant-on" feature optional

The PMIx support for "instant on" remains experimental, so disable it by default. Provide an MCA param and corresponding command line option to enable it at runtime.

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2018-06-17 02:40:12 -07:00
родитель fa18ba395d
Коммит 795140e590
5 изменённых файлов: 28 добавлений и 18 удалений

23
orte/mca/odls/base/odls_base_default_fns.c Исполняемый файл → Обычный файл
Просмотреть файл

@ -100,7 +100,6 @@
#include "orte/mca/odls/base/base.h"
#include "orte/mca/odls/base/odls_private.h"
#if 0
static void setup_cbfunc(int status,
opal_list_t *info,
void *provided_cbdata,
@ -132,9 +131,8 @@ static void setup_cbfunc(int status,
/* move to next stage */
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_SEND_LAUNCH_MSG);
}
#endif
}
/* IT IS CRITICAL THAT ANY CHANGE IN THE ORDER OF THE INFO PACKED IN
* THIS FUNCTION BE REFLECTED IN THE CONSTRUCT_CHILD_LIST PARSER BELOW
*/
@ -433,8 +431,7 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer,
}
/* get any application prep info */
#if 0
if (NULL != opal_pmix.server_setup_application) {
if (orte_enable_instant_on_support && NULL != opal_pmix.server_setup_application) {
/* we don't want to block here because it could
* take some indeterminate time to get the info */
if (OPAL_SUCCESS != (rc = opal_pmix.server_setup_application(jdata->jobid, NULL, setup_cbfunc, jdata))) {
@ -442,7 +439,6 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer,
}
return rc;
}
#endif
/* move to next stage */
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_SEND_LAUNCH_MSG);
@ -457,13 +453,11 @@ static void fm_release(void *cbdata)
OBJ_RELEASE(bptr);
}
#if 0
static void ls_cbunc(int status, void *cbdata)
{
opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata;
OPAL_PMIX_WAKEUP_THREAD(lock);
}
#endif
int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
orte_jobid_t *job)
@ -801,11 +795,11 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
goto REPORT_ERROR;
}
#if 0
/* if we have local support setup info, then execute it here - we
* have to do so AFTER we register the nspace so the PMIx server
* has the nspace info it needs */
if (0 < opal_list_get_size(&local_support) &&
if (orte_enable_instant_on_support &&
0 < opal_list_get_size(&local_support) &&
NULL != opal_pmix.server_setup_local_support) {
if (OPAL_SUCCESS != (rc = opal_pmix.server_setup_local_support(jdata->jobid, &local_support,
ls_cbunc, &lock))) {
@ -815,8 +809,6 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
} else {
lock.active = false; // we won't get a callback
}
#endif
lock.active = false; // we won't get a callback
/* if we have a file map, then we need to load it */
if (orte_get_attribute(&jdata->attributes, ORTE_JOB_FILE_MAPS, (void**)&bptr, OPAL_BUFFER)) {
@ -1067,11 +1059,10 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&child->name));
// if (15 < opal_output_get_verbosity(orte_odls_base_framework.framework_output)) {
if (15 < opal_output_get_verbosity(orte_odls_base_framework.framework_output)) {
/* dump what is going to be exec'd */
opal_dss.dump(0, app, ORTE_APP_CONTEXT);
// }
exit(1);
opal_dss.dump(orte_odls_base_framework.framework_output, app, ORTE_APP_CONTEXT);
}
if (ORTE_SUCCESS != (rc = cd->fork_local(cd))) {
/* error message already output */

Просмотреть файл

@ -509,6 +509,12 @@ static opal_cmd_line_init_t cmd_line_init[] = {
"Forward mpirun port to compute node daemons so all will use it",
OPAL_CMD_LINE_OTYPE_LAUNCH },
/* enable instant-on support */
{ "orte_enable_instant_on_support", '\0', "enable-instant-on-support", "enable-instant-on-support", 0,
NULL, OPAL_CMD_LINE_TYPE_BOOL,
"Enable PMIx-based instant on launch support (experimental)",
OPAL_CMD_LINE_OTYPE_LAUNCH },
/* End of list */
{ NULL, '\0', NULL, NULL, 0,
NULL, OPAL_CMD_LINE_TYPE_NULL, NULL }
@ -874,7 +880,7 @@ static int setup_fork(orte_job_t *jdata,
tmp_app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0);
assert (NULL != tmp_app);
orte_get_attribute(&tmp_app->attributes, ORTE_APP_PREFIX_DIR, (void**)&param, OPAL_STRING);
}
}
for (i = 0; NULL != param && NULL != app->env && NULL != app->env[i]; ++i) {
char *newenv;

Просмотреть файл

@ -205,6 +205,9 @@ char *orte_job_ident = NULL;
bool orte_execute_quiet = false;
bool orte_report_silent_errors = false;
/* enable PMIx-based "instant on" support */
bool orte_enable_instant_on_support = false;
/* See comment in orte/tools/orterun/debuggers.c about this MCA
param */
bool orte_in_parallel_debugger = false;

Просмотреть файл

@ -588,6 +588,9 @@ ORTE_DECLSPEC extern char *orte_daemon_cores;
/* Max time to wait for stack straces to return */
ORTE_DECLSPEC extern int orte_stack_trace_wait_timeout;
/* enable PMIx-based "instant on" support */
ORTE_DECLSPEC extern bool orte_enable_instant_on_support;
END_C_DECLS
#endif /* ORTE_RUNTIME_ORTE_GLOBALS_H */

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
* All rights reserved
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
@ -790,5 +790,12 @@ int orte_register_params(void)
OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_ALL,
&orte_data_server_uri);
orte_enable_instant_on_support = false;
(void) mca_base_var_register ("orte", "orte", NULL, "enable_instant_on_support",
"Enable PMIx-based instant on launch support (experimental)",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&orte_enable_instant_on_support);
return ORTE_SUCCESS;
}