Handle the case of someone specifying a directory for the application. Ensure we get a non-zero exit status and clarify the error message.
cmr:v1.7 This commit was SVN r28119.
Этот коммит содержится в:
родитель
f36312ee6f
Коммит
347df93cd4
@ -1389,6 +1389,7 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
|
|||||||
if (opal_sys_limits.num_files < limit) {
|
if (opal_sys_limits.num_files < limit) {
|
||||||
if (2 < caddy->retries) {
|
if (2 < caddy->retries) {
|
||||||
/* tried enough - give up */
|
/* tried enough - give up */
|
||||||
|
child->exit_code = ORTE_PROC_STATE_FAILED_TO_LAUNCH;
|
||||||
ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH);
|
ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -1478,6 +1479,7 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
|
|||||||
*/
|
*/
|
||||||
if (ORTE_SUCCESS != (rc = setup_child(child, jobdat, app))) {
|
if (ORTE_SUCCESS != (rc = setup_child(child, jobdat, app))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
|
child->exit_code = rc;
|
||||||
ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH);
|
ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -1499,6 +1501,7 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
|
|||||||
&(app->argv),
|
&(app->argv),
|
||||||
&(app->env) ) ) ) {
|
&(app->env) ) ) ) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
|
child->exit_code = ORTE_PROC_STATE_FAILED_TO_LAUNCH;
|
||||||
ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH);
|
ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_LAUNCH);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -1539,6 +1542,7 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
|
|||||||
* across the entire cluster. Instead, we let orterun
|
* across the entire cluster. Instead, we let orterun
|
||||||
* output a consolidated error message for us
|
* output a consolidated error message for us
|
||||||
*/
|
*/
|
||||||
|
child->exit_code = ORTE_ERR_SILENT; /* error message already output */
|
||||||
ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_START);
|
ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_FAILED_TO_START);
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
|
@ -22,13 +22,15 @@
|
|||||||
#
|
#
|
||||||
[execve error]
|
[execve error]
|
||||||
Open MPI tried to fork a new process via the "execve" system call but
|
Open MPI tried to fork a new process via the "execve" system call but
|
||||||
failed. This is an unusual error because Open MPI checks many things
|
failed. Open MPI checks many things before attempting to launch a
|
||||||
before attempting to launch a child process. This error may be
|
child process, but nothing is perfect. This error may be indicative
|
||||||
indicative of another problem on the target host. Your job will now
|
of another problem on the target host, or even something as silly as
|
||||||
|
having specified a directory for your application. Your job will now
|
||||||
abort.
|
abort.
|
||||||
|
|
||||||
Local host: %s
|
Local host: %s
|
||||||
Application name: %s
|
Application name: %s
|
||||||
|
Error: %s
|
||||||
#
|
#
|
||||||
[binding not supported]
|
[binding not supported]
|
||||||
Open MPI tried to bind a new process, but process binding is not
|
Open MPI tried to bind a new process, but process binding is not
|
||||||
|
@ -627,7 +627,7 @@ static int do_child(orte_app_context_t* context,
|
|||||||
execve(context->app, context->argv, environ_copy);
|
execve(context->app, context->argv, environ_copy);
|
||||||
send_error_show_help(write_fd, 1,
|
send_error_show_help(write_fd, 1,
|
||||||
"help-orte-odls-default.txt", "execve error",
|
"help-orte-odls-default.txt", "execve error",
|
||||||
context->app, strerror(errno));
|
orte_process_info.nodename, context->app, strerror(errno));
|
||||||
/* Does not return */
|
/* Does not return */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user