
1. completely and cleanly separates responsibilities between the HNP, orted, and tool components. 2. removes all wireup messaging during launch and shutdown. 3. maintains flow control for stdin to avoid large-scale consumption of memory by orteds when large input files are forwarded. This is done using an xon/xoff protocol. 4. enables specification of stdin recipients on the mpirun cmd line. Allowed options include rank, "all", or "none". Default is rank 0. 5. creates a new MPI_Info key "ompi_stdin_target" that supports the above options for child jobs. Default is "none". 6. adds a new tool "orte-iof" that can connect to a running mpirun and display the output. Cmd line options allow selection of any combination of stdout, stderr, and stddiag. Default is stdout. 7. adds a new mpirun and orte-iof cmd line option "tag-output" that will tag each line of output with process name and stream ident. For example, "[1,0]<stdout>this is output" This is not intended for the 1.3 release as it is a major change requiring considerable soak time. This commit was SVN r19767.
92 строки
3.3 KiB
C
92 строки
3.3 KiB
C
/*
|
|
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
|
* University Research and Technology
|
|
* Corporation. All rights reserved.
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
* of Tennessee Research Foundation. All rights
|
|
* reserved.
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
* University of Stuttgart. All rights reserved.
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#include "orte_config.h"
|
|
#include "orte/constants.h"
|
|
|
|
#include <stdio.h>
|
|
|
|
#include "opal/event/event.h"
|
|
#include "opal/mca/mca.h"
|
|
#include "opal/mca/base/base.h"
|
|
|
|
#include "orte/util/proc_info.h"
|
|
|
|
#include "orte/mca/iof/iof.h"
|
|
#include "orte/mca/iof/base/base.h"
|
|
|
|
|
|
int orte_iof_base_close(void)
|
|
{
|
|
bool dump;
|
|
opal_list_item_t *item;
|
|
orte_iof_write_output_t *output;
|
|
int num_written;
|
|
|
|
/* shutdown any remaining opened components */
|
|
if (0 != opal_list_get_size(&orte_iof_base.iof_components_opened)) {
|
|
mca_base_components_close(orte_iof_base.iof_output,
|
|
&orte_iof_base.iof_components_opened, NULL);
|
|
}
|
|
OBJ_DESTRUCT(&orte_iof_base.iof_components_opened);
|
|
|
|
OPAL_THREAD_LOCK(&orte_iof_base.iof_write_output_lock);
|
|
if (!orte_process_info.daemon) {
|
|
/* check if anything is still trying to be written out */
|
|
if (!opal_list_is_empty(&orte_iof_base.iof_write_stdout.outputs)) {
|
|
dump = false;
|
|
/* make one last attempt to write this out */
|
|
while (NULL != (item = opal_list_remove_first(&orte_iof_base.iof_write_stdout.outputs))) {
|
|
output = (orte_iof_write_output_t*)item;
|
|
if (!dump) {
|
|
num_written = write(orte_iof_base.iof_write_stdout.fd, output->data, output->numbytes);
|
|
if (num_written < output->numbytes) {
|
|
/* don't retry - just cleanout the list and dump it */
|
|
dump = true;
|
|
}
|
|
}
|
|
OBJ_RELEASE(output);
|
|
}
|
|
}
|
|
OBJ_DESTRUCT(&orte_iof_base.iof_write_stdout);
|
|
if (!opal_list_is_empty(&orte_iof_base.iof_write_stderr.outputs)) {
|
|
dump = false;
|
|
/* make one last attempt to write this out */
|
|
while (NULL != (item = opal_list_remove_first(&orte_iof_base.iof_write_stderr.outputs))) {
|
|
output = (orte_iof_write_output_t*)item;
|
|
if (!dump) {
|
|
num_written = write(orte_iof_base.iof_write_stderr.fd, output->data, output->numbytes);
|
|
if (num_written < output->numbytes) {
|
|
/* don't retry - just cleanout the list and dump it */
|
|
dump = true;
|
|
}
|
|
}
|
|
OBJ_RELEASE(output);
|
|
}
|
|
}
|
|
OBJ_DESTRUCT(&orte_iof_base.iof_write_stderr);
|
|
}
|
|
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_write_output_lock);
|
|
|
|
OBJ_DESTRUCT(&orte_iof_base.iof_write_output_lock);
|
|
|
|
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|