Make use of "instant-on" feature optional
The PMIx support for "instant on" remains experimental, so disable it by default. Provide an MCA param and corresponding command line option to enable it at runtime. Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
родитель
fa18ba395d
Коммит
795140e590
23
orte/mca/odls/base/odls_base_default_fns.c
Исполняемый файл → Обычный файл
23
orte/mca/odls/base/odls_base_default_fns.c
Исполняемый файл → Обычный файл
@ -100,7 +100,6 @@
|
||||
#include "orte/mca/odls/base/base.h"
|
||||
#include "orte/mca/odls/base/odls_private.h"
|
||||
|
||||
#if 0
|
||||
static void setup_cbfunc(int status,
|
||||
opal_list_t *info,
|
||||
void *provided_cbdata,
|
||||
@ -132,9 +131,8 @@ static void setup_cbfunc(int status,
|
||||
|
||||
/* move to next stage */
|
||||
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_SEND_LAUNCH_MSG);
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
/* IT IS CRITICAL THAT ANY CHANGE IN THE ORDER OF THE INFO PACKED IN
|
||||
* THIS FUNCTION BE REFLECTED IN THE CONSTRUCT_CHILD_LIST PARSER BELOW
|
||||
*/
|
||||
@ -433,8 +431,7 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer,
|
||||
}
|
||||
|
||||
/* get any application prep info */
|
||||
#if 0
|
||||
if (NULL != opal_pmix.server_setup_application) {
|
||||
if (orte_enable_instant_on_support && NULL != opal_pmix.server_setup_application) {
|
||||
/* we don't want to block here because it could
|
||||
* take some indeterminate time to get the info */
|
||||
if (OPAL_SUCCESS != (rc = opal_pmix.server_setup_application(jdata->jobid, NULL, setup_cbfunc, jdata))) {
|
||||
@ -442,7 +439,6 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer,
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* move to next stage */
|
||||
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_SEND_LAUNCH_MSG);
|
||||
@ -457,13 +453,11 @@ static void fm_release(void *cbdata)
|
||||
OBJ_RELEASE(bptr);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static void ls_cbunc(int status, void *cbdata)
|
||||
{
|
||||
opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata;
|
||||
OPAL_PMIX_WAKEUP_THREAD(lock);
|
||||
}
|
||||
#endif
|
||||
|
||||
int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
|
||||
orte_jobid_t *job)
|
||||
@ -801,11 +795,11 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
|
||||
goto REPORT_ERROR;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* if we have local support setup info, then execute it here - we
|
||||
* have to do so AFTER we register the nspace so the PMIx server
|
||||
* has the nspace info it needs */
|
||||
if (0 < opal_list_get_size(&local_support) &&
|
||||
if (orte_enable_instant_on_support &&
|
||||
0 < opal_list_get_size(&local_support) &&
|
||||
NULL != opal_pmix.server_setup_local_support) {
|
||||
if (OPAL_SUCCESS != (rc = opal_pmix.server_setup_local_support(jdata->jobid, &local_support,
|
||||
ls_cbunc, &lock))) {
|
||||
@ -815,8 +809,6 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
|
||||
} else {
|
||||
lock.active = false; // we won't get a callback
|
||||
}
|
||||
#endif
|
||||
lock.active = false; // we won't get a callback
|
||||
|
||||
/* if we have a file map, then we need to load it */
|
||||
if (orte_get_attribute(&jdata->attributes, ORTE_JOB_FILE_MAPS, (void**)&bptr, OPAL_BUFFER)) {
|
||||
@ -1067,11 +1059,10 @@ void orte_odls_base_spawn_proc(int fd, short sd, void *cbdata)
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&child->name));
|
||||
|
||||
// if (15 < opal_output_get_verbosity(orte_odls_base_framework.framework_output)) {
|
||||
if (15 < opal_output_get_verbosity(orte_odls_base_framework.framework_output)) {
|
||||
/* dump what is going to be exec'd */
|
||||
opal_dss.dump(0, app, ORTE_APP_CONTEXT);
|
||||
// }
|
||||
exit(1);
|
||||
opal_dss.dump(orte_odls_base_framework.framework_output, app, ORTE_APP_CONTEXT);
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = cd->fork_local(cd))) {
|
||||
/* error message already output */
|
||||
|
@ -509,6 +509,12 @@ static opal_cmd_line_init_t cmd_line_init[] = {
|
||||
"Forward mpirun port to compute node daemons so all will use it",
|
||||
OPAL_CMD_LINE_OTYPE_LAUNCH },
|
||||
|
||||
/* enable instant-on support */
|
||||
{ "orte_enable_instant_on_support", '\0', "enable-instant-on-support", "enable-instant-on-support", 0,
|
||||
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Enable PMIx-based instant on launch support (experimental)",
|
||||
OPAL_CMD_LINE_OTYPE_LAUNCH },
|
||||
|
||||
/* End of list */
|
||||
{ NULL, '\0', NULL, NULL, 0,
|
||||
NULL, OPAL_CMD_LINE_TYPE_NULL, NULL }
|
||||
@ -874,7 +880,7 @@ static int setup_fork(orte_job_t *jdata,
|
||||
tmp_app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0);
|
||||
assert (NULL != tmp_app);
|
||||
orte_get_attribute(&tmp_app->attributes, ORTE_APP_PREFIX_DIR, (void**)¶m, OPAL_STRING);
|
||||
}
|
||||
}
|
||||
for (i = 0; NULL != param && NULL != app->env && NULL != app->env[i]; ++i) {
|
||||
char *newenv;
|
||||
|
||||
|
@ -205,6 +205,9 @@ char *orte_job_ident = NULL;
|
||||
bool orte_execute_quiet = false;
|
||||
bool orte_report_silent_errors = false;
|
||||
|
||||
/* enable PMIx-based "instant on" support */
|
||||
bool orte_enable_instant_on_support = false;
|
||||
|
||||
/* See comment in orte/tools/orterun/debuggers.c about this MCA
|
||||
param */
|
||||
bool orte_in_parallel_debugger = false;
|
||||
|
@ -588,6 +588,9 @@ ORTE_DECLSPEC extern char *orte_daemon_cores;
|
||||
/* Max time to wait for stack straces to return */
|
||||
ORTE_DECLSPEC extern int orte_stack_trace_wait_timeout;
|
||||
|
||||
/* enable PMIx-based "instant on" support */
|
||||
ORTE_DECLSPEC extern bool orte_enable_instant_on_support;
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* ORTE_RUNTIME_ORTE_GLOBALS_H */
|
||||
|
@ -13,7 +13,7 @@
|
||||
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2017 IBM Corporation. All rights reserved.
|
||||
@ -790,5 +790,12 @@ int orte_register_params(void)
|
||||
OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_ALL,
|
||||
&orte_data_server_uri);
|
||||
|
||||
orte_enable_instant_on_support = false;
|
||||
(void) mca_base_var_register ("orte", "orte", NULL, "enable_instant_on_support",
|
||||
"Enable PMIx-based instant on launch support (experimental)",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&orte_enable_instant_on_support);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user