2005-04-15 21:23:25 +00:00
|
|
|
/*
|
2007-03-16 23:11:45 +00:00
|
|
|
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
2005-11-05 19:57:48 +00:00
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
2008-11-01 00:39:46 +00:00
|
|
|
* Copyright (c) 2004-2008 The University of Tennessee and The University
|
2005-11-05 19:57:48 +00:00
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
2005-04-15 21:23:25 +00:00
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
2008-06-09 14:53:58 +00:00
|
|
|
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
|
2005-04-15 21:23:25 +00:00
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*
|
|
|
|
* These symbols are in a file by themselves to provide nice linker
|
|
|
|
* semantics. Since linkers generally pull in symbols by object
|
|
|
|
* files, keeping these symbols as the only symbols in this file
|
|
|
|
* prevents utility programs such as "ompi_info" from having to import
|
|
|
|
* entire components just to query their version and parameters.
|
|
|
|
*/
|
|
|
|
|
2006-02-12 01:33:29 +00:00
|
|
|
#include "orte_config.h"
|
2008-02-28 01:57:57 +00:00
|
|
|
#include "orte/constants.h"
|
2005-04-15 21:23:25 +00:00
|
|
|
|
|
|
|
#include <stdlib.h>
|
2005-04-18 21:17:56 +00:00
|
|
|
#ifdef HAVE_UNISTD_H
|
2005-04-15 21:23:25 +00:00
|
|
|
#include <unistd.h>
|
2005-04-18 21:17:56 +00:00
|
|
|
#endif
|
2005-04-15 21:23:25 +00:00
|
|
|
#include <errno.h>
|
|
|
|
#include <sys/types.h>
|
2005-04-18 21:17:56 +00:00
|
|
|
#ifdef HAVE_SYS_WAIT_H
|
2005-04-15 21:23:25 +00:00
|
|
|
#include <sys/wait.h>
|
2005-04-18 21:17:56 +00:00
|
|
|
#endif
|
2005-04-15 21:23:25 +00:00
|
|
|
#include <signal.h>
|
|
|
|
#ifdef HAVE_UTIL_H
|
|
|
|
#include <util.h>
|
|
|
|
#endif
|
|
|
|
#ifdef HAVE_PTY_H
|
|
|
|
#include <pty.h>
|
|
|
|
#endif
|
2005-06-01 19:23:23 +00:00
|
|
|
#ifdef HAVE_FCNTL_H
|
|
|
|
#include <fcntl.h>
|
|
|
|
#endif
|
2005-06-10 08:06:07 +00:00
|
|
|
#ifdef HAVE_TERMIOS_H
|
|
|
|
#include <termios.h>
|
2005-06-13 20:28:44 +00:00
|
|
|
# ifdef HAVE_TERMIO_H
|
|
|
|
# include <termio.h>
|
|
|
|
# endif
|
2005-06-10 08:06:07 +00:00
|
|
|
#endif
|
2005-10-13 15:41:25 +00:00
|
|
|
#ifdef HAVE_LIBUTIL_H
|
|
|
|
#include <libutil.h>
|
|
|
|
#endif
|
2005-04-15 21:23:25 +00:00
|
|
|
|
2006-03-11 02:35:40 +00:00
|
|
|
#include "opal/util/opal_pty.h"
|
2008-06-09 14:53:58 +00:00
|
|
|
#include "opal/util/opal_environ.h"
|
2008-02-28 01:57:57 +00:00
|
|
|
|
2006-02-12 01:33:29 +00:00
|
|
|
#include "orte/mca/errmgr/errmgr.h"
|
2008-02-28 01:57:57 +00:00
|
|
|
|
2006-02-12 01:33:29 +00:00
|
|
|
#include "orte/mca/iof/iof.h"
|
2008-02-28 01:57:57 +00:00
|
|
|
#include "orte/mca/iof/base/iof_base_setup.h"
|
2005-04-15 21:23:25 +00:00
|
|
|
|
|
|
|
int
|
2005-04-17 17:50:39 +00:00
|
|
|
orte_iof_base_setup_prefork(orte_iof_base_io_conf_t *opts)
|
2005-04-15 21:23:25 +00:00
|
|
|
{
|
2008-10-31 18:05:28 +00:00
|
|
|
int ret = -1;
|
2005-04-15 21:23:25 +00:00
|
|
|
|
|
|
|
fflush(stdout);
|
2005-06-02 19:17:32 +00:00
|
|
|
|
2008-10-31 18:05:28 +00:00
|
|
|
/* first check to make sure we can do ptys */
|
2006-03-11 02:35:40 +00:00
|
|
|
#if OMPI_ENABLE_PTY_SUPPORT
|
2005-04-15 21:23:25 +00:00
|
|
|
if (opts->usepty) {
|
2008-10-31 18:05:28 +00:00
|
|
|
/**
|
|
|
|
* It has been reported that on MAC OS X 10.4 and prior one cannot
|
|
|
|
* safely close the writing side of a pty before completly reading
|
|
|
|
* all data inside.
|
|
|
|
* There seems to be two issues: first all pending data is
|
|
|
|
* discarded, and second it randomly generate kernel panics.
|
|
|
|
* Apparently this issue was fixed in 10.5 so by now we use the
|
|
|
|
* pty exactly as we use the pipes.
|
|
|
|
* This comment is here as a reminder.
|
|
|
|
*/
|
2006-03-11 02:35:40 +00:00
|
|
|
ret = opal_openpty(&(opts->p_stdout[0]), &(opts->p_stdout[1]),
|
2006-08-23 03:32:36 +00:00
|
|
|
(char*)NULL, (struct termios*)NULL, (struct winsize*)NULL);
|
2005-04-15 21:23:25 +00:00
|
|
|
}
|
|
|
|
#else
|
2008-10-31 18:05:28 +00:00
|
|
|
opts->usepty = 0;
|
2005-04-15 21:23:25 +00:00
|
|
|
#endif
|
2005-04-18 21:17:56 +00:00
|
|
|
|
2005-12-12 20:04:00 +00:00
|
|
|
#if defined(__WINDOWS__)
|
2005-04-18 21:17:56 +00:00
|
|
|
/* Windows doesn't have a 'pipe' function.
|
|
|
|
* So we need to do something a bit more complex */
|
2007-03-16 23:11:45 +00:00
|
|
|
/*
|
2005-04-18 21:17:56 +00:00
|
|
|
* http://www-106.ibm.com/developerworks/linux/library/l-rt4/?open&t=grl,l=252,p=pipes
|
|
|
|
*/
|
|
|
|
#else
|
2005-04-15 21:23:25 +00:00
|
|
|
if (ret < 0) {
|
|
|
|
if (pipe(opts->p_stdout) < 0) {
|
2007-04-24 19:28:37 +00:00
|
|
|
ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_PIPES);
|
|
|
|
return ORTE_ERR_SYS_LIMITS_PIPES;
|
2005-04-15 21:23:25 +00:00
|
|
|
}
|
2006-02-03 20:43:20 +00:00
|
|
|
}
|
2008-06-09 14:53:58 +00:00
|
|
|
if (pipe(opts->p_stdin) < 0) {
|
|
|
|
ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_PIPES);
|
|
|
|
return ORTE_ERR_SYS_LIMITS_PIPES;
|
|
|
|
}
|
2005-04-15 21:23:25 +00:00
|
|
|
if (pipe(opts->p_stderr) < 0) {
|
2007-04-24 19:28:37 +00:00
|
|
|
ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_PIPES);
|
|
|
|
return ORTE_ERR_SYS_LIMITS_PIPES;
|
2005-04-15 21:23:25 +00:00
|
|
|
}
|
2008-06-09 14:53:58 +00:00
|
|
|
if (pipe(opts->p_internal) < 0) {
|
|
|
|
ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_PIPES);
|
|
|
|
return ORTE_ERR_SYS_LIMITS_PIPES;
|
|
|
|
}
|
2005-04-18 21:17:56 +00:00
|
|
|
#endif
|
2005-04-15 21:23:25 +00:00
|
|
|
|
2005-08-15 18:25:35 +00:00
|
|
|
return ORTE_SUCCESS;
|
2005-04-15 21:23:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int
|
2008-06-09 14:53:58 +00:00
|
|
|
orte_iof_base_setup_child(orte_iof_base_io_conf_t *opts, char ***env)
|
2005-04-15 21:23:25 +00:00
|
|
|
{
|
|
|
|
int ret;
|
2008-06-09 14:53:58 +00:00
|
|
|
char *str;
|
2005-04-15 21:23:25 +00:00
|
|
|
|
2006-02-03 20:43:20 +00:00
|
|
|
close(opts->p_stdin[1]);
|
2008-10-31 18:05:28 +00:00
|
|
|
close(opts->p_stdout[0]);
|
2005-04-15 21:23:25 +00:00
|
|
|
close(opts->p_stderr[0]);
|
2008-06-09 14:53:58 +00:00
|
|
|
close(opts->p_internal[0]);
|
2005-04-15 21:23:25 +00:00
|
|
|
|
|
|
|
if (opts->usepty) {
|
2005-12-12 20:04:00 +00:00
|
|
|
#ifndef __WINDOWS__
|
2005-11-28 14:58:53 +00:00
|
|
|
/* disable echo */
|
|
|
|
struct termios term_attrs;
|
|
|
|
if (tcgetattr(opts->p_stdout[1], &term_attrs) < 0) {
|
2007-04-24 19:28:37 +00:00
|
|
|
return ORTE_ERR_PIPE_SETUP_FAILURE;
|
2005-11-28 14:58:53 +00:00
|
|
|
}
|
|
|
|
term_attrs.c_lflag &= ~ (ECHO | ECHOE | ECHOK |
|
|
|
|
ECHOCTL | ECHOKE | ECHONL);
|
|
|
|
term_attrs.c_iflag &= ~ (ICRNL | INLCR | ISTRIP | INPCK | IXON);
|
2005-12-02 18:24:59 +00:00
|
|
|
term_attrs.c_oflag &= ~ (
|
|
|
|
#ifdef OCRNL
|
|
|
|
/* OS X 10.3 does not have this
|
|
|
|
value defined */
|
|
|
|
OCRNL |
|
|
|
|
#endif
|
|
|
|
ONLCR);
|
2005-11-28 14:58:53 +00:00
|
|
|
if (tcsetattr(opts->p_stdout[1], TCSANOW, &term_attrs) == -1) {
|
2007-04-24 19:28:37 +00:00
|
|
|
return ORTE_ERR_PIPE_SETUP_FAILURE;
|
2005-11-28 14:58:53 +00:00
|
|
|
}
|
|
|
|
#endif
|
2005-04-15 21:23:25 +00:00
|
|
|
ret = dup2(opts->p_stdout[1], fileno(stdout));
|
2007-04-24 19:28:37 +00:00
|
|
|
if (ret < 0) return ORTE_ERR_PIPE_SETUP_FAILURE;
|
2005-06-02 21:15:26 +00:00
|
|
|
|
2005-04-15 21:23:25 +00:00
|
|
|
} else {
|
|
|
|
if(opts->p_stdout[1] != fileno(stdout)) {
|
2005-04-15 21:36:32 +00:00
|
|
|
ret = dup2(opts->p_stdout[1], fileno(stdout));
|
2007-04-24 19:28:37 +00:00
|
|
|
if (ret < 0) return ORTE_ERR_PIPE_SETUP_FAILURE;
|
2005-04-15 21:23:25 +00:00
|
|
|
close(opts->p_stdout[1]);
|
|
|
|
}
|
2006-02-03 20:43:20 +00:00
|
|
|
}
|
|
|
|
if (opts->connect_stdin) {
|
|
|
|
if(opts->p_stdin[0] != fileno(stdin)) {
|
|
|
|
ret = dup2(opts->p_stdin[0], fileno(stdin));
|
2007-04-24 19:28:37 +00:00
|
|
|
if (ret < 0) return ORTE_ERR_PIPE_SETUP_FAILURE;
|
2006-02-03 20:43:20 +00:00
|
|
|
close(opts->p_stdin[0]);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
int fd;
|
2005-06-01 19:23:23 +00:00
|
|
|
|
2006-02-03 20:43:20 +00:00
|
|
|
close(opts->p_stdin[0]);
|
|
|
|
/* connect input to /dev/null */
|
2006-08-23 03:32:36 +00:00
|
|
|
fd = open("/dev/null", O_RDONLY, 0);
|
2006-02-03 20:43:20 +00:00
|
|
|
if(fd > fileno(stdin)) {
|
|
|
|
dup2(fd, fileno(stdin));
|
|
|
|
close(fd);
|
2005-04-15 21:23:25 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if(opts->p_stderr[1] != fileno(stderr)) {
|
2005-04-15 21:36:32 +00:00
|
|
|
ret = dup2(opts->p_stderr[1], fileno(stderr));
|
2007-04-24 19:28:37 +00:00
|
|
|
if (ret < 0) return ORTE_ERR_PIPE_SETUP_FAILURE;
|
2005-04-15 21:23:25 +00:00
|
|
|
close(opts->p_stderr[1]);
|
|
|
|
}
|
2005-06-02 21:15:26 +00:00
|
|
|
|
2008-06-09 14:53:58 +00:00
|
|
|
/* Set an environment variable that the new child process can use
|
|
|
|
to get the fd of the pipe connected to the INTERNAL IOF tag. */
|
|
|
|
asprintf(&str, "%d", opts->p_internal[1]);
|
|
|
|
if (NULL != str) {
|
|
|
|
opal_setenv("OPAL_OUTPUT_STDERR_FD", str, true, env);
|
|
|
|
free(str);
|
|
|
|
}
|
|
|
|
|
2005-08-15 18:25:35 +00:00
|
|
|
return ORTE_SUCCESS;
|
2005-04-15 21:23:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int
|
2005-04-17 17:50:39 +00:00
|
|
|
orte_iof_base_setup_parent(const orte_process_name_t* name,
|
|
|
|
orte_iof_base_io_conf_t *opts)
|
2005-04-15 21:23:25 +00:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
2006-08-23 03:32:36 +00:00
|
|
|
close(opts->p_stdin[0]);
|
2008-10-31 18:05:28 +00:00
|
|
|
close(opts->p_stdout[1]);
|
2005-04-15 21:23:25 +00:00
|
|
|
close(opts->p_stderr[1]);
|
2008-06-09 14:53:58 +00:00
|
|
|
close(opts->p_internal[1]);
|
2005-04-15 21:23:25 +00:00
|
|
|
|
2005-06-02 21:15:26 +00:00
|
|
|
/* connect stdin endpoint */
|
2005-06-01 19:23:23 +00:00
|
|
|
if (opts->connect_stdin) {
|
2005-11-28 14:58:53 +00:00
|
|
|
/* and connect the pty to stdin */
|
Roll in the revamped IOF subsystem. Per the devel mailing list email, this is a complete rewrite of the iof framework designed to simplify the code for maintainability, and to support features we had planned to do, but were too difficult to implement in the old code. Specifically, the new code:
1. completely and cleanly separates responsibilities between the HNP, orted, and tool components.
2. removes all wireup messaging during launch and shutdown.
3. maintains flow control for stdin to avoid large-scale consumption of memory by orteds when large input files are forwarded. This is done using an xon/xoff protocol.
4. enables specification of stdin recipients on the mpirun cmd line. Allowed options include rank, "all", or "none". Default is rank 0.
5. creates a new MPI_Info key "ompi_stdin_target" that supports the above options for child jobs. Default is "none".
6. adds a new tool "orte-iof" that can connect to a running mpirun and display the output. Cmd line options allow selection of any combination of stdout, stderr, and stddiag. Default is stdout.
7. adds a new mpirun and orte-iof cmd line option "tag-output" that will tag each line of output with process name and stream ident. For example, "[1,0]<stdout>this is output"
This is not intended for the 1.3 release as it is a major change requiring considerable soak time.
This commit was SVN r19767.
2008-10-18 00:00:49 +00:00
|
|
|
ret = orte_iof.pull(name, ORTE_IOF_STDIN, opts->p_stdin[1]);
|
2005-06-01 19:23:23 +00:00
|
|
|
if(ORTE_SUCCESS != ret) {
|
|
|
|
ORTE_ERROR_LOG(ret);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
} else {
|
2006-08-23 03:32:36 +00:00
|
|
|
close(opts->p_stdin[1]);
|
2005-06-01 19:23:23 +00:00
|
|
|
}
|
|
|
|
|
2008-06-09 14:53:58 +00:00
|
|
|
/* connect read ends to IOF */
|
Roll in the revamped IOF subsystem. Per the devel mailing list email, this is a complete rewrite of the iof framework designed to simplify the code for maintainability, and to support features we had planned to do, but were too difficult to implement in the old code. Specifically, the new code:
1. completely and cleanly separates responsibilities between the HNP, orted, and tool components.
2. removes all wireup messaging during launch and shutdown.
3. maintains flow control for stdin to avoid large-scale consumption of memory by orteds when large input files are forwarded. This is done using an xon/xoff protocol.
4. enables specification of stdin recipients on the mpirun cmd line. Allowed options include rank, "all", or "none". Default is rank 0.
5. creates a new MPI_Info key "ompi_stdin_target" that supports the above options for child jobs. Default is "none".
6. adds a new tool "orte-iof" that can connect to a running mpirun and display the output. Cmd line options allow selection of any combination of stdout, stderr, and stddiag. Default is stdout.
7. adds a new mpirun and orte-iof cmd line option "tag-output" that will tag each line of output with process name and stream ident. For example, "[1,0]<stdout>this is output"
This is not intended for the 1.3 release as it is a major change requiring considerable soak time.
This commit was SVN r19767.
2008-10-18 00:00:49 +00:00
|
|
|
ret = orte_iof.push(name, ORTE_IOF_STDOUT, opts->p_stdout[0]);
|
2005-04-15 21:23:25 +00:00
|
|
|
if(ORTE_SUCCESS != ret) {
|
|
|
|
ORTE_ERROR_LOG(ret);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
Roll in the revamped IOF subsystem. Per the devel mailing list email, this is a complete rewrite of the iof framework designed to simplify the code for maintainability, and to support features we had planned to do, but were too difficult to implement in the old code. Specifically, the new code:
1. completely and cleanly separates responsibilities between the HNP, orted, and tool components.
2. removes all wireup messaging during launch and shutdown.
3. maintains flow control for stdin to avoid large-scale consumption of memory by orteds when large input files are forwarded. This is done using an xon/xoff protocol.
4. enables specification of stdin recipients on the mpirun cmd line. Allowed options include rank, "all", or "none". Default is rank 0.
5. creates a new MPI_Info key "ompi_stdin_target" that supports the above options for child jobs. Default is "none".
6. adds a new tool "orte-iof" that can connect to a running mpirun and display the output. Cmd line options allow selection of any combination of stdout, stderr, and stddiag. Default is stdout.
7. adds a new mpirun and orte-iof cmd line option "tag-output" that will tag each line of output with process name and stream ident. For example, "[1,0]<stdout>this is output"
This is not intended for the 1.3 release as it is a major change requiring considerable soak time.
This commit was SVN r19767.
2008-10-18 00:00:49 +00:00
|
|
|
ret = orte_iof.push(name, ORTE_IOF_STDERR, opts->p_stderr[0]);
|
2005-04-15 21:23:25 +00:00
|
|
|
if(ORTE_SUCCESS != ret) {
|
|
|
|
ORTE_ERROR_LOG(ret);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
Roll in the revamped IOF subsystem. Per the devel mailing list email, this is a complete rewrite of the iof framework designed to simplify the code for maintainability, and to support features we had planned to do, but were too difficult to implement in the old code. Specifically, the new code:
1. completely and cleanly separates responsibilities between the HNP, orted, and tool components.
2. removes all wireup messaging during launch and shutdown.
3. maintains flow control for stdin to avoid large-scale consumption of memory by orteds when large input files are forwarded. This is done using an xon/xoff protocol.
4. enables specification of stdin recipients on the mpirun cmd line. Allowed options include rank, "all", or "none". Default is rank 0.
5. creates a new MPI_Info key "ompi_stdin_target" that supports the above options for child jobs. Default is "none".
6. adds a new tool "orte-iof" that can connect to a running mpirun and display the output. Cmd line options allow selection of any combination of stdout, stderr, and stddiag. Default is stdout.
7. adds a new mpirun and orte-iof cmd line option "tag-output" that will tag each line of output with process name and stream ident. For example, "[1,0]<stdout>this is output"
This is not intended for the 1.3 release as it is a major change requiring considerable soak time.
This commit was SVN r19767.
2008-10-18 00:00:49 +00:00
|
|
|
ret = orte_iof.push(name, ORTE_IOF_STDDIAG, opts->p_internal[0]);
|
2008-06-09 14:53:58 +00:00
|
|
|
if(ORTE_SUCCESS != ret) {
|
|
|
|
ORTE_ERROR_LOG(ret);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2005-08-15 18:25:35 +00:00
|
|
|
return ORTE_SUCCESS;
|
2005-04-15 21:23:25 +00:00
|
|
|
}
|