1
1

Fix a race condition in the IOF and add some new user-requested features:

1. fix a race condition whereby a proc's output could trigger an event prior to the other outputs being setup, thus c ausing the IOF to declare the proc "terminated" too early. This was really rare, but could happen.

2. add a new "timestamp-output" option that timestamp's each line of output

3. add a new "output-filename" option that redirects each proc's output to a separate rank-named file.

4. add a new "xterm" option that redirects the output of the specified ranks to a separate xterm window.

This commit was SVN r20392.
Этот коммит содержится в:
Ralph Castain 2009-01-30 22:47:30 +00:00
родитель 0704b98668
Коммит 2966206f58
24 изменённых файлов: 586 добавлений и 132 удалений

Просмотреть файл

@ -71,15 +71,6 @@ typedef struct {
} orte_iof_write_event_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_iof_write_event_t);
struct orte_iof_base_t {
int iof_output;
opal_list_t iof_components_opened;
opal_mutex_t iof_write_output_lock;
orte_iof_write_event_t iof_write_stdout;
orte_iof_write_event_t iof_write_stderr;
};
typedef struct orte_iof_base_t orte_iof_base_t;
typedef struct {
opal_list_item_t super;
orte_process_name_t name;
@ -122,14 +113,25 @@ typedef struct {
} orte_iof_write_output_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_iof_write_output_t);
/* the iof globals struct */
struct orte_iof_base_t {
int iof_output;
opal_list_t iof_components_opened;
opal_mutex_t iof_write_output_lock;
orte_iof_sink_t *iof_write_stdout;
orte_iof_sink_t *iof_write_stderr;
};
typedef struct orte_iof_base_t orte_iof_base_t;
#if OMPI_ENABLE_DEBUG
#define ORTE_IOF_SINK_DEFINE(snk, nm, fid, tg, wrthndlr, eplist) \
do { \
orte_iof_sink_t *ep; \
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, \
"defining endpoint: %s %d", \
__FILE__, __LINE__)); \
"defining endpt: file %s line %d fd %d",\
__FILE__, __LINE__, (fid))); \
ep = OBJ_NEW(orte_iof_sink_t); \
ep->name.jobid = (nm)->jobid; \
ep->name.vpid = (nm)->vpid; \
@ -138,9 +140,11 @@ ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_iof_write_output_t);
ep->wev->fd = (fid); \
opal_event_set(&(ep->wev->ev), ep->wev->fd, \
OPAL_EV_WRITE, \
wrthndlr, ep) ; \
wrthndlr, ep); \
} \
if (NULL != (eplist)) { \
opal_list_append((eplist), &ep->super); \
} \
opal_list_append((eplist), &ep->super); \
*(snk) = ep; \
ep->file = strdup(__FILE__); \
ep->line = __LINE__; \

Просмотреть файл

@ -36,6 +36,7 @@ int orte_iof_base_close(void)
bool dump;
opal_list_item_t *item;
orte_iof_write_output_t *output;
orte_iof_write_event_t *wev;
int num_written;
/* shutdown any remaining opened components */
@ -48,13 +49,14 @@ int orte_iof_base_close(void)
OPAL_THREAD_LOCK(&orte_iof_base.iof_write_output_lock);
if (!orte_process_info.daemon) {
/* check if anything is still trying to be written out */
if (!opal_list_is_empty(&orte_iof_base.iof_write_stdout.outputs)) {
wev = orte_iof_base.iof_write_stdout->wev;
if (!opal_list_is_empty(&wev->outputs)) {
dump = false;
/* make one last attempt to write this out */
while (NULL != (item = opal_list_remove_first(&orte_iof_base.iof_write_stdout.outputs))) {
while (NULL != (item = opal_list_remove_first(&wev->outputs))) {
output = (orte_iof_write_output_t*)item;
if (!dump) {
num_written = write(orte_iof_base.iof_write_stdout.fd, output->data, output->numbytes);
num_written = write(wev->fd, output->data, output->numbytes);
if (num_written < output->numbytes) {
/* don't retry - just cleanout the list and dump it */
dump = true;
@ -63,14 +65,15 @@ int orte_iof_base_close(void)
OBJ_RELEASE(output);
}
}
OBJ_DESTRUCT(&orte_iof_base.iof_write_stdout);
if (!opal_list_is_empty(&orte_iof_base.iof_write_stderr.outputs)) {
OBJ_RELEASE(orte_iof_base.iof_write_stdout);
wev = orte_iof_base.iof_write_stderr->wev;
if (!opal_list_is_empty(&wev->outputs)) {
dump = false;
/* make one last attempt to write this out */
while (NULL != (item = opal_list_remove_first(&orte_iof_base.iof_write_stderr.outputs))) {
while (NULL != (item = opal_list_remove_first(&wev->outputs))) {
output = (orte_iof_write_output_t*)item;
if (!dump) {
num_written = write(orte_iof_base.iof_write_stderr.fd, output->data, output->numbytes);
num_written = write(wev->fd, output->data, output->numbytes);
if (num_written < output->numbytes) {
/* don't retry - just cleanout the list and dump it */
dump = true;
@ -79,7 +82,7 @@ int orte_iof_base_close(void)
OBJ_RELEASE(output);
}
}
OBJ_DESTRUCT(&orte_iof_base.iof_write_stderr);
OBJ_RELEASE(orte_iof_base.iof_write_stderr);
}
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_write_output_lock);

Просмотреть файл

@ -25,6 +25,8 @@
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/util/os_dirpath.h"
#include "opal/util/basename.h"
#include "orte/util/show_help.h"
#include "orte/util/proc_info.h"
@ -84,6 +86,8 @@ OBJ_CLASS_INSTANCE(orte_iof_proc_t,
static void orte_iof_base_sink_construct(orte_iof_sink_t* ptr)
{
ptr->daemon.jobid = ORTE_JOBID_INVALID;
ptr->daemon.vpid = ORTE_VPID_INVALID;
ptr->wev = OBJ_NEW(orte_iof_write_event_t);
}
static void orte_iof_base_sink_destruct(orte_iof_sink_t* ptr)
@ -163,31 +167,38 @@ orte_iof_base_t orte_iof_base;
*/
int orte_iof_base_open(void)
{
int rc;
/* Initialize globals */
OBJ_CONSTRUCT(&orte_iof_base.iof_components_opened, opal_list_t);
OBJ_CONSTRUCT(&orte_iof_base.iof_write_output_lock, opal_mutex_t);
/* did the user request we print output to files? */
if (NULL != orte_output_filename) {
/* we will setup the files themselves as needed in the iof
* module. For now, let's see if the filename contains a
* path, or just a name
*/
char *path;
path = opal_dirname(orte_output_filename);
if (0 != strcmp(path, orte_output_filename)) {
/* there is a path in this name - ensure that the directory
* exists, and create it if not
*/
if (ORTE_SUCCESS != (rc = opal_os_dirpath_create(path, S_IRWXU))) {
return rc;
}
}
}
/* daemons do not need to do this as they do not write out stdout/err */
if (!orte_process_info.daemon) {
/* setup the stdout event */
OBJ_CONSTRUCT(&orte_iof_base.iof_write_stdout, orte_iof_write_event_t);
orte_iof_base.iof_write_stdout.fd = 1;
/* create the write event, but don't add it until we need it */
opal_event_set(&orte_iof_base.iof_write_stdout.ev,
orte_iof_base.iof_write_stdout.fd,
OPAL_EV_WRITE,
orte_iof_base_write_handler,
&orte_iof_base.iof_write_stdout);
ORTE_IOF_SINK_DEFINE(&orte_iof_base.iof_write_stdout, ORTE_PROC_MY_NAME,
1, ORTE_IOF_STDOUT, orte_iof_base_write_handler, NULL);
/* setup the stderr event */
OBJ_CONSTRUCT(&orte_iof_base.iof_write_stderr, orte_iof_write_event_t);
orte_iof_base.iof_write_stderr.fd = 2;
/* create the write event, but don't add it until we need it */
opal_event_set(&orte_iof_base.iof_write_stderr.ev,
orte_iof_base.iof_write_stderr.fd,
OPAL_EV_WRITE,
orte_iof_base_write_handler,
&orte_iof_base.iof_write_stderr);
ORTE_IOF_SINK_DEFINE(&orte_iof_base.iof_write_stderr, ORTE_PROC_MY_NAME,
2, ORTE_IOF_STDERR, orte_iof_base_write_handler, NULL);
/* do NOT set these file descriptors to non-blocking. If we do so,
* we set the file descriptor to non-blocking for everyone that has
* that file descriptor, which includes everyone else in our shell

Просмотреть файл

@ -30,6 +30,9 @@
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_TIME_H
#include <time.h>
#endif
#include <errno.h>
#include "orte/util/name_fns.h"
@ -47,17 +50,26 @@ int orte_iof_base_write_output(orte_process_name_t *name, orte_iof_tag_t stream,
int i, j, k, starttaglen, endtaglen, num_buffered;
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
"%s write:output setting up to write %d bytes to %s of %s",
"%s write:output setting up to write %d bytes to %s for %s on fd %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
(ORTE_IOF_STDIN & stream) ? "stdin" : ((ORTE_IOF_STDOUT & stream) ? "stdout" : ((ORTE_IOF_STDERR & stream) ? "stderr" : "stddiag")),
ORTE_NAME_PRINT(name)));
ORTE_NAME_PRINT(name), channel->fd));
/* setup output object */
output = OBJ_NEW(orte_iof_write_output_t);
/* write output data to the corresponding tag */
if (ORTE_IOF_STDIN & stream) {
suffix = NULL;
/* copy over the data to be written */
if (0 < numbytes) {
/* don't copy 0 bytes - we just need to pass
* the zero bytes so the fd can be closed
* after it writes everything out
*/
memcpy(output->data, data, numbytes);
}
output->numbytes = numbytes;
goto process;
} else if (ORTE_IOF_STDOUT & stream) {
/* write the bytes to stdout */
suffix = "stdout";
@ -74,59 +86,92 @@ int orte_iof_base_write_output(orte_process_name_t *name, orte_iof_tag_t stream,
"%s stream %0x", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), stream));
return ORTE_ERR_VALUE_OUT_OF_BOUNDS;
}
/* see if data is to be tagged */
if (orte_tag_output && NULL != suffix) {
/* if this is to be xml tagged, create a tag with the correct syntax */
if (orte_xml_output) {
snprintf(starttag, ORTE_IOF_BASE_TAG_MAX, "<%s rank=\"%s\">", suffix, ORTE_VPID_PRINT(name->vpid));
snprintf(endtag, ORTE_IOF_BASE_TAG_MAX, "</%s>", suffix);
} else {
snprintf(starttag, ORTE_IOF_BASE_TAG_MAX, "[%s,%s]<%s>",
ORTE_LOCAL_JOBID_PRINT(name->jobid),
ORTE_VPID_PRINT(name->vpid), suffix);
memset(endtag, '\0', ORTE_IOF_BASE_TAG_MAX);
}
starttaglen = strlen(starttag);
endtaglen = strlen(endtag);
/* start with the tag */
for (j=0, k=0; j < starttaglen && k < ORTE_IOF_BASE_TAGGED_OUT_MAX; j++) {
output->data[k++] = starttag[j];
}
/* cycle through the data looking for <cr>
* and replace those with the tag
*/
for (i=0; i < numbytes && k < ORTE_IOF_BASE_TAGGED_OUT_MAX; i++) {
if ('\n' == data[i]) {
/* we need to break the line with the end tag */
for (j=0; j < endtaglen && k < ORTE_IOF_BASE_TAGGED_OUT_MAX; j++) {
output->data[k++] = endtag[j];
}
/* move the <cr> over */
output->data[k++] = '\n';
/* if this isn't the end of the line, add a new start tag */
if (i < numbytes-1) {
for (j=0; j < starttaglen && k < ORTE_IOF_BASE_TAGGED_OUT_MAX; j++) {
output->data[k++] = starttag[j];
}
}
} else {
output->data[k++] = data[i];
}
}
output->numbytes = k;
} else {
/* copy over the data to be written */
if (0 < numbytes) {
/* don't copy 0 bytes - we just need to pass
* the zero bytes so the fd can be closed
* after it writes everything out
*/
memcpy(output->data, data, numbytes);
}
output->numbytes = numbytes;
/* if this is to be xml tagged, create a tag with the correct syntax - we do not allow
* timestamping of xml output
*/
if (orte_xml_output) {
snprintf(starttag, ORTE_IOF_BASE_TAG_MAX, "<%s rank=\"%s\">", suffix, ORTE_VPID_PRINT(name->vpid));
snprintf(endtag, ORTE_IOF_BASE_TAG_MAX, "</%s>", suffix);
goto construct;
}
/* if we are to timestamp output, start the tag with that */
if (orte_timestamp_output) {
time_t mytime;
char *cptr;
/* get the timestamp */
time(&mytime);
cptr = ctime(&mytime);
cptr[strlen(cptr)-1] = '\0'; /* remove trailing newline */
if (orte_tag_output) {
/* if we want it tagged as well, use both */
snprintf(starttag, ORTE_IOF_BASE_TAG_MAX, "%s[%s,%s]<%s>:",
cptr, ORTE_LOCAL_JOBID_PRINT(name->jobid),
ORTE_VPID_PRINT(name->vpid), suffix);
} else {
/* only use timestamp */
snprintf(starttag, ORTE_IOF_BASE_TAG_MAX, "%s<%s>:", cptr, suffix);
}
/* no endtag for this option */
memset(endtag, '\0', ORTE_IOF_BASE_TAG_MAX);
goto construct;
}
if (orte_tag_output) {
snprintf(starttag, ORTE_IOF_BASE_TAG_MAX, "[%s,%s]<%s>:",
ORTE_LOCAL_JOBID_PRINT(name->jobid),
ORTE_VPID_PRINT(name->vpid), suffix);
/* no endtag for this option */
memset(endtag, '\0', ORTE_IOF_BASE_TAG_MAX);
goto construct;
}
/* if we get here, then the data is not to be tagged - just copy it
* and move on to processing
*/
if (0 < numbytes) {
/* don't copy 0 bytes - we just need to pass
* the zero bytes so the fd can be closed
* after it writes everything out
*/
memcpy(output->data, data, numbytes);
}
output->numbytes = numbytes;
goto process;
construct:
starttaglen = strlen(starttag);
endtaglen = strlen(endtag);
/* start with the tag */
for (j=0, k=0; j < starttaglen && k < ORTE_IOF_BASE_TAGGED_OUT_MAX; j++) {
output->data[k++] = starttag[j];
}
/* cycle through the data looking for <cr>
* and replace those with the tag
*/
for (i=0; i < numbytes && k < ORTE_IOF_BASE_TAGGED_OUT_MAX; i++) {
if ('\n' == data[i]) {
/* we need to break the line with the end tag */
for (j=0; j < endtaglen && k < ORTE_IOF_BASE_TAGGED_OUT_MAX; j++) {
output->data[k++] = endtag[j];
}
/* move the <cr> over */
output->data[k++] = '\n';
/* if this isn't the end of the line, add a new start tag */
if (i < numbytes-1) {
for (j=0; j < starttaglen && k < ORTE_IOF_BASE_TAGGED_OUT_MAX; j++) {
output->data[k++] = starttag[j];
}
}
} else {
output->data[k++] = data[i];
}
}
output->numbytes = k;
process:
/* lock us up to protect global operations */
OPAL_THREAD_LOCK(&orte_iof_base.iof_write_output_lock);
@ -154,7 +199,8 @@ int orte_iof_base_write_output(orte_process_name_t *name, orte_iof_tag_t stream,
void orte_iof_base_write_handler(int fd, short event, void *cbdata)
{
orte_iof_write_event_t *wev = (orte_iof_write_event_t*)cbdata;
orte_iof_sink_t *sink = (orte_iof_sink_t*)cbdata;
orte_iof_write_event_t *wev = sink->wev;
opal_list_item_t *item;
orte_iof_write_output_t *output;
int num_written;

Просмотреть файл

@ -41,6 +41,8 @@
#include "orte/mca/oob/base/base.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/name_fns.h"
#include "orte/mca/odls/odls_types.h"
#include "orte/mca/iof/base/base.h"
#include "iof_hnp.h"
@ -99,6 +101,10 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag,
orte_iof_proc_t *proct;
opal_list_item_t *item;
int flags;
char *outfile;
int fdout;
orte_odls_job_t *jobdat;
int np, numdigs;
int rc;
/* don't do this if the dst vpid is invalid or the fd is negative! */
@ -138,18 +144,64 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag,
proct->name.jobid = dst_name->jobid;
proct->name.vpid = dst_name->vpid;
opal_list_append(&mca_iof_hnp_component.procs, &proct->super);
/* see if we are to output to a file */
if (NULL != orte_output_filename) {
/* get the local jobdata for this proc */
for (item = opal_list_get_first(&orte_local_jobdata);
item != opal_list_get_end(&orte_local_jobdata);
item = opal_list_get_next(item)) {
jobdat = (orte_odls_job_t*)item;
if (jobdat->jobid == proct->name.jobid) {
break;
}
}
np = jobdat->num_procs / 10;
/* determine the number of digits required for max vpid */
numdigs = 1;
while (np > 0) {
numdigs++;
np = np / 10;
}
/* construct the filename */
asprintf(&outfile, "%s.%*0lu", orte_output_filename, numdigs, (unsigned long)proct->name.vpid);
/* create the file */
fdout = open(outfile, O_CREAT|O_RDWR|O_TRUNC, 0644);
free(outfile);
if (fdout < 0) {
/* couldn't be opened */
ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE);
return ORTE_ERR_FILE_OPEN_FAILURE;
}
/* define a sink to that file descriptor */
ORTE_IOF_SINK_DEFINE(&sink, dst_name, fdout, ORTE_IOF_STDOUTALL,
orte_iof_base_write_handler,
&mca_iof_hnp_component.sinks);
}
SETUP:
/* define a read event and activate it */
if (src_tag & ORTE_IOF_STDOUT) {
ORTE_IOF_READ_EVENT(&proct->revstdout, dst_name, fd, ORTE_IOF_STDOUT,
orte_iof_hnp_read_local_handler, true);
orte_iof_hnp_read_local_handler, false);
} else if (src_tag & ORTE_IOF_STDERR) {
ORTE_IOF_READ_EVENT(&proct->revstderr, dst_name, fd, ORTE_IOF_STDERR,
orte_iof_hnp_read_local_handler, true);
orte_iof_hnp_read_local_handler, false);
} else if (src_tag & ORTE_IOF_STDDIAG) {
ORTE_IOF_READ_EVENT(&proct->revstddiag, dst_name, fd, ORTE_IOF_STDDIAG,
orte_iof_hnp_read_local_handler, true);
orte_iof_hnp_read_local_handler, false);
}
/* if -all- of the readevents for this proc have been defined, then
* activate them. Otherwise, we can think that the proc is complete
* because one of the readevents fires -prior- to all of them having
* been defined!
*/
if (NULL != proct->revstdout && NULL != proct->revstderr && NULL != proct->revstddiag) {
proct->revstdout->active = true;
opal_event_add(&(proct->revstdout->ev), 0);
proct->revstderr->active = true;
opal_event_add(&(proct->revstderr->ev), 0);
proct->revstddiag->active = true;
opal_event_add(&(proct->revstddiag->ev), 0);
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -109,6 +109,7 @@ static int orte_iof_hnp_close(void)
}
OBJ_DESTRUCT(&mca_iof_hnp_component.procs);
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_IOF_HNP);
/* release and cleanup the lock */
OPAL_THREAD_UNLOCK(&mca_iof_hnp_component.lock);
OBJ_DESTRUCT(&mca_iof_hnp_component.lock);
}
@ -156,7 +157,7 @@ static int orte_iof_hnp_query(mca_base_module_t **module, int *priority)
OBJ_CONSTRUCT(&mca_iof_hnp_component.sinks, opal_list_t);
OBJ_CONSTRUCT(&mca_iof_hnp_component.procs, opal_list_t);
mca_iof_hnp_component.stdinev = NULL;
/* we must be selected */
*priority = 100;
*module = (mca_base_module_t *) &orte_iof_hnp_module;

Просмотреть файл

@ -203,7 +203,11 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata)
item != opal_list_get_end(&mca_iof_hnp_component.sinks);
item = opal_list_get_next(item)) {
orte_iof_sink_t *sink = (orte_iof_sink_t*)item;
if (sink->tag & rev->tag &&
/* if the target isn't set, then this sink is for another purpose - ignore it */
if (ORTE_JOBID_INVALID == sink->daemon.jobid) {
continue;
}
if ((sink->tag & rev->tag) &&
sink->name.jobid == rev->name.jobid &&
(ORTE_VPID_WILDCARD == sink->name.vpid || sink->name.vpid == rev->name.vpid)) {
/* need to send the data to the remote endpoint - if
@ -275,17 +279,45 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata)
break;
}
}
} else {
if (ORTE_IOF_STDOUT & rev->tag) {
orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, &orte_iof_base.iof_write_stdout);
} else {
orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, &orte_iof_base.iof_write_stderr);
}
/* re-add the event */
opal_event_add(&rev->ev, 0);
OPAL_THREAD_UNLOCK(&mca_iof_hnp_component.lock);
return;
}
/* see if the user wanted the output directed to files */
if (NULL != orte_output_filename) {
/* find the sink for this rank */
for (item = opal_list_get_first(&mca_iof_hnp_component.sinks);
item != opal_list_get_end(&mca_iof_hnp_component.sinks);
item = opal_list_get_next(item)) {
orte_iof_sink_t *sink = (orte_iof_sink_t*)item;
/* if the target is set, then this sink is for another purpose - ignore it */
if (ORTE_JOBID_INVALID != sink->daemon.jobid) {
continue;
}
/* if this sink isn't for output, ignore it */
if (ORTE_IOF_STDIN & sink->tag) {
continue;
}
/* is this the desired proc? */
if (sink->name.jobid == rev->name.jobid &&
sink->name.vpid == rev->name.vpid) {
/* output to the corresponding file */
orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, sink->wev);
/* done */
break;
}
}
} else {
/* output this to our local output */
if (ORTE_IOF_STDOUT & rev->tag) {
orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, orte_iof_base.iof_write_stdout->wev);
} else {
orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, orte_iof_base.iof_write_stderr->wev);
}
}
/* re-add the event */
opal_event_add(&rev->ev, 0);
OPAL_THREAD_UNLOCK(&mca_iof_hnp_component.lock);
return;

Просмотреть файл

@ -27,6 +27,13 @@
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#else
#ifdef HAVE_SYS_FCNTL_H
#include <sys/fcntl.h>
#endif
#endif
#include "orte/util/show_help.h"
@ -129,9 +136,12 @@ static void process_msg(int fd, short event, void *cbdata)
while (item != opal_list_get_end(&mca_iof_hnp_component.sinks)) {
next = opal_list_get_next(item);
sink = (orte_iof_sink_t*)item;
/* if the target isn't set, then this sink is for another purpose - ignore it */
if (ORTE_JOBID_INVALID == sink->daemon.jobid) {
continue;
}
/* if this sink is the designated one, then remove it from list */
if (stream & sink->tag &&
if ((stream & sink->tag) &&
sink->name.jobid == origin.jobid &&
(ORTE_VPID_WILDCARD == sink->name.vpid ||
ORTE_VPID_WILDCARD == origin.vpid ||
@ -161,19 +171,23 @@ static void process_msg(int fd, short event, void *cbdata)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
ORTE_NAME_PRINT(&origin)));
/* write the output locally */
/* output this to our local output */
if (ORTE_IOF_STDOUT & stream) {
orte_iof_base_write_output(&origin, stream, data, numbytes, &orte_iof_base.iof_write_stdout);
orte_iof_base_write_output(&origin, stream, data, numbytes, orte_iof_base.iof_write_stdout->wev);
} else {
orte_iof_base_write_output(&origin, stream, data, numbytes, &orte_iof_base.iof_write_stderr);
orte_iof_base_write_output(&origin, stream, data, numbytes, orte_iof_base.iof_write_stderr->wev);
}
/* cycle through the endpoints to see if someone else wants a copy */
for (item = opal_list_get_first(&mca_iof_hnp_component.sinks);
item != opal_list_get_end(&mca_iof_hnp_component.sinks);
item = opal_list_get_next(item)) {
orte_iof_sink_t* sink = (orte_iof_sink_t*)item;
if (stream & sink->tag &&
sink = (orte_iof_sink_t*)item;
/* if the target isn't set, then this sink is for another purpose - ignore it */
if (ORTE_JOBID_INVALID == sink->daemon.jobid) {
continue;
}
if ((stream & sink->tag) &&
sink->name.jobid == origin.jobid &&
(ORTE_VPID_WILDCARD == sink->name.vpid ||
ORTE_VPID_WILDCARD == origin.vpid ||

Просмотреть файл

@ -37,6 +37,8 @@ typedef uint8_t orte_iof_tag_t;
#define ORTE_IOF_STDOUT 0x02
#define ORTE_IOF_STDERR 0x04
#define ORTE_IOF_STDDIAG 0x08
#define ORTE_IOF_STDOUTALL 0x0e
/* flow control flags */
#define ORTE_IOF_XON 0x10
#define ORTE_IOF_XOFF 0x20

Просмотреть файл

@ -42,6 +42,7 @@
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/odls/odls_types.h"
#include "orte/mca/iof/iof.h"
#include "orte/mca/iof/base/base.h"
@ -91,6 +92,11 @@ static int orted_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_ta
int flags;
opal_list_item_t *item;
orte_iof_proc_t *proct;
orte_iof_sink_t *sink;
char *outfile;
int fdout;
orte_odls_job_t *jobdat;
int np, numdigs;
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
"%s iof:orted pushing fd %d for process %s",
@ -124,20 +130,65 @@ static int orted_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_ta
proct->name.jobid = dst_name->jobid;
proct->name.vpid = dst_name->vpid;
opal_list_append(&mca_iof_orted_component.procs, &proct->super);
/* see if we are to output to a file */
if (NULL != orte_output_filename) {
/* get the local jobdata for this proc */
for (item = opal_list_get_first(&orte_local_jobdata);
item != opal_list_get_end(&orte_local_jobdata);
item = opal_list_get_next(item)) {
jobdat = (orte_odls_job_t*)item;
if (jobdat->jobid == proct->name.jobid) {
break;
}
}
np = jobdat->num_procs / 10;
/* determine the number of digits required for max vpid */
numdigs = 1;
while (np > 0) {
numdigs++;
np = np / 10;
}
/* construct the filename */
asprintf(&outfile, "%s.%*0lu", orte_output_filename, numdigs, (unsigned long)proct->name.vpid);
/* create the file */
fdout = open(outfile, O_CREAT|O_RDWR|O_TRUNC, 0644);
free(outfile);
if (fdout < 0) {
/* couldn't be opened */
ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE);
return ORTE_ERR_FILE_OPEN_FAILURE;
}
/* define a sink to that file descriptor */
ORTE_IOF_SINK_DEFINE(&sink, dst_name, fdout, ORTE_IOF_STDOUTALL,
orte_iof_base_write_handler,
&mca_iof_orted_component.sinks);
}
SETUP:
/* define a read event and activate it */
if (src_tag & ORTE_IOF_STDOUT) {
ORTE_IOF_READ_EVENT(&proct->revstdout, dst_name, fd, ORTE_IOF_STDOUT,
orte_iof_orted_read_handler, true);
orte_iof_orted_read_handler, false);
} else if (src_tag & ORTE_IOF_STDERR) {
ORTE_IOF_READ_EVENT(&proct->revstderr, dst_name, fd, ORTE_IOF_STDERR,
orte_iof_orted_read_handler, true);
orte_iof_orted_read_handler, false);
} else if (src_tag & ORTE_IOF_STDDIAG) {
ORTE_IOF_READ_EVENT(&proct->revstddiag, dst_name, fd, ORTE_IOF_STDDIAG,
orte_iof_orted_read_handler, true);
orte_iof_orted_read_handler, false);
}
/* if -all- of the readevents for this proc have been defined, then
* activate them. Otherwise, we can think that the proc is complete
* because one of the readevents fires -prior- to all of them having
* been defined!
*/
if (NULL != proct->revstdout && NULL != proct->revstderr && NULL != proct->revstddiag) {
proct->revstdout->active = true;
opal_event_add(&(proct->revstdout->ev), 0);
proct->revstderr->active = true;
opal_event_add(&(proct->revstderr->ev), 0);
proct->revstddiag->active = true;
opal_event_add(&(proct->revstddiag->ev), 0);
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -103,6 +103,33 @@ void orte_iof_orted_read_handler(int fd, short event, void *cbdata)
goto CLEAN_RETURN;
}
/* see if the user wanted the output directed to files */
if (NULL != orte_output_filename) {
/* find the sink for this rank */
for (item = opal_list_get_first(&mca_iof_orted_component.sinks);
item != opal_list_get_end(&mca_iof_orted_component.sinks);
item = opal_list_get_next(item)) {
orte_iof_sink_t *sink = (orte_iof_sink_t*)item;
/* if the target is set, then this sink is for another purpose - ignore it */
if (ORTE_JOBID_INVALID != sink->daemon.jobid) {
continue;
}
/* if this sink isn't for output, ignore it */
if (ORTE_IOF_STDIN & sink->tag) {
continue;
}
/* is this the desired proc? */
if (sink->name.jobid == rev->name.jobid &&
sink->name.vpid == rev->name.vpid) {
/* output to the corresponding file */
orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, sink->wev);
/* done */
break;
}
}
goto RESTART;
}
/* prep the buffer */
buf = OBJ_NEW(opal_buffer_t);
@ -134,6 +161,7 @@ void orte_iof_orted_read_handler(int fd, short event, void *cbdata)
orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP,
0, send_cb, NULL);
RESTART:
/* re-add the event */
opal_event_add(&rev->ev, 0);

Просмотреть файл

@ -96,9 +96,9 @@ static void process_msg(int fd, short event, void *cbdata)
if (0 < numbytes) {
/* write the output locally */
if (ORTE_IOF_STDOUT & stream) {
orte_iof_base_write_output(&origin, stream, data, numbytes, &orte_iof_base.iof_write_stdout);
orte_iof_base_write_output(&origin, stream, data, numbytes, orte_iof_base.iof_write_stdout->wev);
} else {
orte_iof_base_write_output(&origin, stream, data, numbytes, &orte_iof_base.iof_write_stderr);
orte_iof_base_write_output(&origin, stream, data, numbytes, orte_iof_base.iof_write_stderr->wev);
}
}

Просмотреть файл

@ -31,3 +31,21 @@ Fileset: %s
Will continue attempting to launch the process.
#
[orte-odls-base:xterm-neg-rank]
The xterm option was given a negative rank to display:
Rank: %d
Note that a value of -1 represents "all", but all other values
must range from 0 to #procs-1.
#
[orte-odls-base:xterm-rank-out-of-bounds]
The xterm option was asked to display a rank that is larger
than the number of procs in the job:
Rank: %d
#procs: %d
Note that ranks start with 0, not 1, and must be specified
accordingly.

Просмотреть файл

@ -23,6 +23,7 @@
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/class/opal_list.h"
#include "orte/mca/odls/odls.h"
#include "orte/mca/odls/base/base.h"
@ -31,9 +32,15 @@
int orte_odls_base_close(void)
{
opal_list_item_t *item;
/* cleanup ODLS globals */
OBJ_DESTRUCT(&orte_odls_globals.mutex);
OBJ_DESTRUCT(&orte_odls_globals.cond);
while (NULL != (item = opal_list_remove_first(&orte_odls_globals.xterm_ranks))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&orte_odls_globals.xterm_ranks);
if (NULL != orte_odls_globals.dmap && NULL != orte_odls_globals.dmap->bytes) {
free(orte_odls_globals.dmap->bytes);
free(orte_odls_globals.dmap);

Просмотреть файл

@ -904,6 +904,8 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
char dir[MAXPATHLEN];
char **argvptr;
char *full_search;
char **argvsav=NULL;
int inm;
/* protect operations involving the global list of children */
OPAL_THREAD_LOCK(&orte_odls_globals.mutex);
@ -1141,7 +1143,57 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
continue;
}
/* setup the rest of the environment with the proc-specific items - these
/* did the user request we display output in xterms? */
if (NULL != orte_xterm) {
opal_list_item_t *nmitem;
orte_namelist_t *nm;
/* see if this rank is one of those requested */
for (nmitem = opal_list_get_first(&orte_odls_globals.xterm_ranks);
nmitem != opal_list_get_end(&orte_odls_globals.xterm_ranks);
nmitem = opal_list_get_next(nmitem)) {
nm = (orte_namelist_t*)nmitem;
/* check for bozo case */
if (jobdat->num_procs <= nm->name.vpid) {
/* can't be done! */
orte_show_help("help-odls-base.txt",
"orte-odls-base:xterm-rank-out-of-bounds",
true, nm->name.vpid, jobdat->num_procs);
rc = ORTE_ERR_VALUE_OUT_OF_BOUNDS;
goto CLEANUP;
}
if (ORTE_VPID_WILDCARD == nm->name.vpid ||
child->name->vpid == nm->name.vpid) {
/* we want this one - modify the app's command to include
* the orte xterm cmd. Need to be careful, though, that we
* don't modify the app for ALL ranks that use it! So we
* will create a copy of the argv so we can restore it later
*/
argvsav = opal_argv_copy(app->argv);
/* free the argv */
opal_argv_free(app->argv);
app->argv = NULL;
/* now create a new one that starts with the xtermcmd */
for (inm=0; inm < opal_argv_count(orte_odls_globals.xtermcmd); inm++) {
opal_argv_append_nosize(&app->argv, orte_odls_globals.xtermcmd[inm]);
}
/* insert the rank into the correct place as a window title */
free(app->argv[2]);
asprintf(&app->argv[2], "Rank %s", ORTE_VPID_PRINT(child->name->vpid));
/* add back the original argv */
for (inm=0; inm < opal_argv_count(argvsav); inm++) {
opal_argv_append_nosize(&app->argv, argvsav[inm]);
}
/* the app exe name itself is in the argvsav array, so
* we can recover it from there later
*/
free(app->app);
app->app = strdup(orte_odls_globals.xtermcmd[0]);
break;
}
}
}
/* setup the rest of the environment with the proc-specific items - these
* will be overwritten for each child
*/
if (ORTE_SUCCESS != (rc = orte_util_convert_jobid_to_string(&job_str, child->name->jobid))) {
@ -1168,6 +1220,7 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
}
opal_setenv(param, vpid_str, true, &app->env);
free(param);
/* although the vpid IS the process' rank within the job, users
* would appreciate being given a public environmental variable
* that also represents this value - something MPI specific - so
@ -1179,7 +1232,7 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
opal_setenv("OMPI_COMM_WORLD_RANK", vpid_str, true, &app->env);
free(vpid_str); /* done with this now */
/* users would appreciate being given a public environmental variable
/* users would appreciate being given a public environmental variable
* that also represents the local rank value - something MPI specific - so
* do that here.
*
@ -1195,7 +1248,7 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
opal_setenv("OMPI_COMM_WORLD_LOCAL_RANK", value, true, &app->env);
free(value);
param = mca_base_param_environ_variable("opal", NULL, "paffinity_base_slot_list");
param = mca_base_param_environ_variable("opal", NULL, "paffinity_base_slot_list");
if ( NULL != child->slot_list ) {
asprintf(&value, "%s", child->slot_list);
opal_setenv(param, value, true, &app->env);
@ -1205,7 +1258,7 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
}
free(param);
/* if we are timing things, record when we are going to launch this proc */
/* if we are timing things, record when we are going to launch this proc */
if (orte_timing) {
gettimeofday(&child->starttime, NULL);
}
@ -1266,6 +1319,17 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
}
/* move to next processor */
proc_rank++;
/* reset the exe name, if necessary */
if (NULL != argvsav) {
/* release the current argv array */
opal_argv_free(app->argv);
/* restore the original one */
app->argv = argvsav;
argvsav = NULL;
/* the app exe name itself is now in the argv[0] posn */
free(app->app);
app->app = strdup(app->argv[0]);
}
} /* complete launching all children for this app */
/* reset our working directory back to our default location - if we
* don't do this, then we will be looking for relative paths starting

Просмотреть файл

@ -25,6 +25,7 @@
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/util/trace.h"
#include "opal/util/path.h"
#include "opal/util/argv.h"
#include "opal/class/opal_value_array.h"
#include "opal/class/opal_pointer_array.h"
@ -35,6 +36,7 @@
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/util/show_help.h"
#include "orte/util/parse_options.h"
#include "orte/mca/odls/base/odls_private.h"
@ -155,6 +157,10 @@ orte_odls_globals_t orte_odls_globals;
*/
int orte_odls_base_open(void)
{
char **ranks=NULL, *tmp;
int i, rank;
orte_namelist_t *nm;
/* Debugging / verbose output. Always have stream open, with
verbose set by the mca open system... */
orte_odls_globals.output = opal_output_open(NULL);
@ -166,10 +172,51 @@ int orte_odls_base_open(void)
/* initialize ODLS globals */
OBJ_CONSTRUCT(&orte_odls_globals.mutex, opal_mutex_t);
OBJ_CONSTRUCT(&orte_odls_globals.cond, opal_condition_t);
OBJ_CONSTRUCT(&orte_odls_globals.xterm_ranks, opal_list_t);
orte_odls_globals.xtermcmd = NULL;
orte_odls_globals.dmap = NULL;
orte_odls_globals.debugger = NULL;
orte_odls_globals.debugger_launched = false;
/* check if the user requested that we display output in xterms */
if (NULL != orte_xterm) {
/* construct a list of ranks to be displayed */
orte_util_parse_range_options(orte_xterm, &ranks);
for (i=0; i < opal_argv_count(ranks); i++) {
nm = OBJ_NEW(orte_namelist_t);
rank = strtol(ranks[i], NULL, 10);
if (-1 == rank) {
/* wildcard */
nm->name.vpid = ORTE_VPID_WILDCARD;
} else if (rank < 0) {
/* error out on bozo case */
orte_show_help("help-odls-base.txt",
"orte-odls-base:xterm-neg-rank",
true, rank);
return ORTE_ERROR;
} else {
/* we can't check here if the rank is out of
* range as we don't yet know how many ranks
* will be in the job - we'll check later
*/
nm->name.vpid = rank;
}
opal_list_append(&orte_odls_globals.xterm_ranks, &nm->item);
}
opal_argv_free(ranks);
/* construct the xtermcmd */
orte_odls_globals.xtermcmd = NULL;
tmp = opal_find_absolute_path("xterm");
if (NULL == tmp) {
return ORTE_ERROR;
}
opal_argv_append_nosize(&orte_odls_globals.xtermcmd, tmp);
free(tmp);
opal_argv_append_nosize(&orte_odls_globals.xtermcmd, "-T");
opal_argv_append_nosize(&orte_odls_globals.xtermcmd, "save");
opal_argv_append_nosize(&orte_odls_globals.xtermcmd, "-e");
}
/* Open up all available components */
if (ORTE_SUCCESS !=

Просмотреть файл

@ -63,6 +63,10 @@ typedef struct {
orte_odls_job_t *debugger;
/* debugger launched */
bool debugger_launched;
/* list of ranks to be displayed on separate xterms */
opal_list_t xterm_ranks;
/* the xterm cmd to be used */
char **xtermcmd;
} orte_odls_globals_t;
ORTE_DECLSPEC extern orte_odls_globals_t orte_odls_globals;

Просмотреть файл

@ -55,7 +55,6 @@ bool orte_do_not_launch = false;
bool orted_spin_flag = false;
bool orte_static_ports = false;
bool orte_keep_fqdn_hostnames = false;
bool orte_tag_output;
bool orte_show_resolved_nodenames;
int orted_debug_failure;
int orted_debug_failure_delay;
@ -110,6 +109,13 @@ opal_list_t orte_local_children;
/* list of job data for local children on a daemon */
opal_list_t orte_local_jobdata;
/* IOF controls */
bool orte_tag_output;
bool orte_timestamp_output;
char *orte_output_filename;
/* generate new xterm windows to display output from specified ranks */
char *orte_xterm;
/* whether or not to forward SIGTSTP and SIGCONT signals */
bool orte_forward_job_control;

Просмотреть файл

@ -427,7 +427,6 @@ ORTE_DECLSPEC extern bool orted_spin_flag;
ORTE_DECLSPEC extern bool orte_static_ports;
ORTE_DECLSPEC extern int32_t orte_contiguous_nodes;
ORTE_DECLSPEC extern bool orte_keep_fqdn_hostnames;
ORTE_DECLSPEC extern bool orte_tag_output;
ORTE_DECLSPEC extern bool orte_show_resolved_nodenames;
ORTE_DECLSPEC extern int orted_debug_failure;
ORTE_DECLSPEC extern int orted_debug_failure_delay;
@ -485,6 +484,12 @@ ORTE_DECLSPEC extern opal_list_t orte_local_jobdata;
/* whether or not to forward SIGTSTP and SIGCONT signals */
ORTE_DECLSPEC extern bool orte_forward_job_control;
/* IOF controls */
ORTE_DECLSPEC extern bool orte_tag_output;
ORTE_DECLSPEC extern bool orte_timestamp_output;
ORTE_DECLSPEC extern char *orte_output_filename;
/* generate new xterm windows to display output from specified ranks */
ORTE_DECLSPEC extern char *orte_xterm;
#endif /* ORTE_DISABLE_FULL_SUPPORT */

Просмотреть файл

@ -211,6 +211,7 @@ int orte_register_params(void)
"Number of nodes after which contiguous nodename encoding will automatically be used [default: INT_MAX]",
false, false, INT32_MAX, &orte_contiguous_nodes);
/* whether to tag output */
mca_base_param_reg_int_name("orte", "tag_output",
"Tag all output with [job,rank] (default: false)",
false, false, (int) false, &value);
@ -224,7 +225,18 @@ int orte_register_params(void)
if (orte_xml_output) {
orte_tag_output = true;
}
/* whether to timestamp output */
mca_base_param_reg_int_name("orte", "timestamp_output",
"Timestamp all application process output (default: false)",
false, false, (int) false, &value);
orte_timestamp_output = OPAL_INT_TO_BOOL(value);
/* redirect output into files */
mca_base_param_reg_string_name("orte", "output_filename",
"Redirect output from application processes into filename.rank [default: NULL]",
false, false, NULL, &orte_output_filename);
mca_base_param_reg_int_name("orte", "show_resolved_nodenames",
"Display any node names that are resolved to a different name (default: false)",
false, false, (int) false, &value);
@ -246,6 +258,11 @@ int orte_register_params(void)
false, false, (int)false, &value);
orte_allocation_required = OPAL_INT_TO_BOOL(value);
/* generate new terminal windows to display output from specified ranks */
mca_base_param_reg_string_name("orte", "xterm",
"Create a new xterm window and display output from the specified ranks there [default: none]",
false, false, NULL, &orte_xterm);
/* whether or not to forward SIGTSTP and SIGCONT signals */
mca_base_param_reg_int_name("orte", "forward_job_control",
"Forward SIGTSTP (after converting to SIGSTOP) and SIGCONT signals to the application procs [default: no]",

Просмотреть файл

@ -265,6 +265,14 @@ is 10 seconds.
.
.
.TP
.B -output-filename\fR,\fP --output-filename \fR<filename>\fP
Redirect the stdout, stderr, and stddiag of all ranks to a rank-unique version of
the specified filename. Any directories in the filename will automatically be created.
Each output file will consist of filename.rank, where the rank will be left-filled with
zero's for correct ordering in listings.
.
.
.TP
.B -path\fR,\fP --path \fR<path>\fP
<path> that will be used when attempting to locate the requested
executables. This is used prior to using the local PATH setting.
@ -341,11 +349,16 @@ indicating that no ranks are to receive stdin.
.
.TP
.B -tag-output\fR,\fP --tag-output
Tag each line output to stdout, stderr, and stddiag with \fB[jobid, rank]<stdxxx>\fP indicating the process jobid
Tag each line of output to stdout, stderr, and stddiag with \fB[jobid, rank]<stdxxx>\fP indicating the process jobid
and rank that generated the output, and the channel which generated it.
.
.
.TP
.B -timestamp-output\fR,\fP --timestamp-output
Timestamp each line of output to stdout, stderr, and stddiag.
.
.
.TP
.B --tmpdir \fR<dir>\fP
Set the root for the session directory tree for mpirun only.
.
@ -377,7 +390,10 @@ See the "Current Working Directory" section for notes on relative paths.
.B Note:
If the \fI-wdir\fP option appears both on the command line and in an
application context, the context will take precedence over the command
line.
line. Relative paths are converted to absolute paths on the node where
mpirun is executed. Thus, if the path to the desired wdir is different
on the backend nodes, then it must be specified as an absolute path that
is correct for the backend node.
.
.
.TP
@ -396,6 +412,20 @@ then use \fI-x\fP to export (not define) them.
Provide all output to stdout, stderr, and stddiag in an xml format.
.
.
.TP
.B -xterm\fR,\fP --xterm \fR<ranks>\fP
Display the specified ranks in separate xterm windows. The ranks are specified
as a comma-separated list of ranges, with a -1 indicating all. A separate
window will be created for each specified rank.
.B Note:
In some environments, xterm may require that the executable be in the user's
path, or be specified in absolute or relative terms. Thus, it may be necessary
to specify a local executable as "./foo" instead of just "foo". If xterm fails to
find the executable, mpirun will hang, but still respond correctly to a ctrl-c.
If this happens, please check that the executable is being specified correctly
and try again.
.
.
.P
The following options are useful for developers; they are not generally
useful to most ORTE and/or MPI users:

Просмотреть файл

@ -63,6 +63,7 @@
#include "opal/version.h"
#include "opal/runtime/opal.h"
#include "opal/util/os_dirpath.h"
#include "opal/util/os_path.h"
#include "opal/util/path.h"
#include "opal/class/opal_pointer_array.h"
@ -73,6 +74,7 @@
#include "orte/util/session_dir.h"
#include "orte/util/name_fns.h"
#include "orte/util/hnp_contact.h"
#include "orte/util/parse_options.h"
#include "orte/mca/odls/odls.h"
#include "orte/mca/plm/plm.h"
@ -162,7 +164,16 @@ static opal_cmd_line_init_t cmd_line_init[] = {
{ "orte", "tag", "output", '\0', "tag-output", "tag-output", 0,
NULL, OPAL_CMD_LINE_TYPE_BOOL,
"Tag all output with [job,rank]" },
{ "orte", "timestamp", "output", '\0', "timestamp-output", "timestamp-output", 0,
NULL, OPAL_CMD_LINE_TYPE_BOOL,
"Timestamp all application process output" },
{ "orte", "output", "filename", '\0', "output-filename", "output-filename", 1,
NULL, OPAL_CMD_LINE_TYPE_STRING,
"Redirect output from application processes into filename.rank" },
{ "orte", "xterm", NULL, '\0', "xterm", "xterm", 1,
NULL, OPAL_CMD_LINE_TYPE_STRING,
"Create a new xterm window and display output from the specified ranks there" },
/* select stdin option */
{ NULL, NULL, NULL, '\0', "stdin", "stdin", 1,
&orterun_globals.stdin_target, OPAL_CMD_LINE_TYPE_STRING,

Просмотреть файл

@ -64,7 +64,6 @@ ORTE_DECLSPEC orte_proc_info_t orte_process_info = {
/* .sock_stderr = */ NULL
};
#define ORTE_MAX_HOSTNAME_SIZE 512
static bool init=false;
int orte_proc_info(void)

Просмотреть файл

@ -37,6 +37,8 @@
BEGIN_C_DECLS
#define ORTE_MAX_HOSTNAME_SIZE 512
/**
* Process information structure
*