diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index 187be8cc7d..49021d08ed 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -119,6 +119,12 @@ static int rte_init(void) goto error; } + /* if we are using xml for output, put an mpirun start tag */ + if (orte_xml_output) { + fprintf(orte_xml_fp, "\n"); + fflush(orte_xml_fp); + } + /* open and setup the opal_pstat framework so we can provide * process stats if requested */ @@ -587,6 +593,15 @@ static int rte_finalize(void) free(orte_job_ident); } + /* close the xml output file, if open */ + if (orte_xml_output) { + fprintf(orte_xml_fp, "\n"); + fflush(orte_xml_fp); + if (stdout != orte_xml_fp) { + fclose(orte_xml_fp); + } + } + return ORTE_SUCCESS; } diff --git a/orte/mca/iof/base/iof_base_close.c b/orte/mca/iof/base/iof_base_close.c index a5168887c8..c8b4ef0bc5 100644 --- a/orte/mca/iof/base/iof_base_close.c +++ b/orte/mca/iof/base/iof_base_close.c @@ -66,23 +66,26 @@ int orte_iof_base_close(void) } } OBJ_RELEASE(orte_iof_base.iof_write_stdout); - wev = orte_iof_base.iof_write_stderr->wev; - if (!opal_list_is_empty(&wev->outputs)) { - dump = false; - /* make one last attempt to write this out */ - while (NULL != (item = opal_list_remove_first(&wev->outputs))) { - output = (orte_iof_write_output_t*)item; - if (!dump) { - num_written = write(wev->fd, output->data, output->numbytes); - if (num_written < output->numbytes) { - /* don't retry - just cleanout the list and dump it */ - dump = true; + if (!orte_xml_output) { + /* we only opened stderr channel if we are NOT doing xml output */ + wev = orte_iof_base.iof_write_stderr->wev; + if (!opal_list_is_empty(&wev->outputs)) { + dump = false; + /* make one last attempt to write this out */ + while (NULL != (item = opal_list_remove_first(&wev->outputs))) { + output = (orte_iof_write_output_t*)item; + if (!dump) { + num_written = write(wev->fd, output->data, output->numbytes); + if (num_written < output->numbytes) { + /* don't retry - just cleanout the list and dump it */ + dump = true; + } } + OBJ_RELEASE(output); } - OBJ_RELEASE(output); } + OBJ_RELEASE(orte_iof_base.iof_write_stderr); } - OBJ_RELEASE(orte_iof_base.iof_write_stderr); } OPAL_THREAD_UNLOCK(&orte_iof_base.iof_write_output_lock); diff --git a/orte/mca/iof/base/iof_base_open.c b/orte/mca/iof/base/iof_base_open.c index e6e75ca549..5ac8754469 100644 --- a/orte/mca/iof/base/iof_base_open.c +++ b/orte/mca/iof/base/iof_base_open.c @@ -140,6 +140,15 @@ static void orte_iof_base_write_event_destruct(orte_iof_write_event_t* wev) if (wev->pending) { opal_event_del(&wev->ev); } + if (ORTE_PROC_IS_HNP) { + int xmlfd = fileno(orte_xml_fp); + if (xmlfd == wev->fd) { + /* don't close this one - will get it later */ + OBJ_DESTRUCT(&wev->outputs); + return; + } + } + if (2 < wev->fd) { OPAL_OUTPUT_VERBOSE((20, orte_iof_base.iof_output, "%s iof: closing fd %d for write event", @@ -170,7 +179,7 @@ orte_iof_base_t orte_iof_base; */ int orte_iof_base_open(void) { - int rc; + int rc, xmlfd; /* Initialize globals */ OBJ_CONSTRUCT(&orte_iof_base.iof_components_opened, opal_list_t); @@ -196,12 +205,28 @@ int orte_iof_base_open(void) /* daemons do not need to do this as they do not write out stdout/err */ if (!ORTE_PROC_IS_DAEMON) { - /* setup the stdout event */ - ORTE_IOF_SINK_DEFINE(&orte_iof_base.iof_write_stdout, ORTE_PROC_MY_NAME, - 1, ORTE_IOF_STDOUT, orte_iof_base_write_handler, NULL); - /* setup the stderr event */ - ORTE_IOF_SINK_DEFINE(&orte_iof_base.iof_write_stderr, ORTE_PROC_MY_NAME, - 2, ORTE_IOF_STDERR, orte_iof_base_write_handler, NULL); + if (orte_xml_output) { + if (NULL != orte_xml_fp) { + /* user wants all xml-formatted output sent to file */ + xmlfd = fileno(orte_xml_fp); + } else { + xmlfd = 1; + } + /* setup the stdout event */ + ORTE_IOF_SINK_DEFINE(&orte_iof_base.iof_write_stdout, ORTE_PROC_MY_NAME, + xmlfd, ORTE_IOF_STDOUT, orte_iof_base_write_handler, NULL); + /* don't create a stderr event - all output will go to + * the stdout channel + */ + } else { + /* setup the stdout event */ + ORTE_IOF_SINK_DEFINE(&orte_iof_base.iof_write_stdout, ORTE_PROC_MY_NAME, + 1, ORTE_IOF_STDOUT, orte_iof_base_write_handler, NULL); + /* setup the stderr event */ + ORTE_IOF_SINK_DEFINE(&orte_iof_base.iof_write_stderr, ORTE_PROC_MY_NAME, + 2, ORTE_IOF_STDERR, orte_iof_base_write_handler, NULL); + } + /* do NOT set these file descriptors to non-blocking. If we do so, * we set the file descriptor to non-blocking for everyone that has * that file descriptor, which includes everyone else in our shell diff --git a/orte/mca/ras/base/ras_base_allocate.c b/orte/mca/ras/base/ras_base_allocate.c index ce6307ab4c..4bbc0fab0d 100644 --- a/orte/mca/ras/base/ras_base_allocate.c +++ b/orte/mca/ras/base/ras_base_allocate.c @@ -70,7 +70,8 @@ static void display_alloc(void) } } if (orte_xml_output) { - opal_output(orte_clean_output, "%s\n", tmp); + fprintf(orte_xml_fp, "%s\n", tmp); + fflush(orte_xml_fp); } else { opal_output(orte_clean_output, "%s\n\n=================================================================\n", tmp); } diff --git a/orte/mca/rmaps/base/rmaps_base_map_job.c b/orte/mca/rmaps/base/rmaps_base_map_job.c index f813752183..4d5bb04599 100644 --- a/orte/mca/rmaps/base/rmaps_base_map_job.c +++ b/orte/mca/rmaps/base/rmaps_base_map_job.c @@ -93,7 +93,12 @@ int orte_rmaps_base_map_job(orte_job_t *jdata) if (jdata->map->display_map) { char *output; opal_dss.print(&output, NULL, jdata->map, ORTE_JOB_MAP); - opal_output(orte_clean_output, "%s", output); + if (orte_xml_output) { + fprintf(orte_xml_fp, "%s\n", output); + fflush(orte_xml_fp); + } else { + opal_output(orte_clean_output, "%s", output); + } free(output); } diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index e33ad98da0..e0759c0f30 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -156,6 +156,7 @@ orte_mapping_policy_t orte_default_mapping_policy = 0; int orte_debug_output = -1; bool orte_debug_daemons_flag = false; bool orte_xml_output = false; +FILE *orte_xml_fp = NULL; char *orte_job_ident = NULL; /* See comment in orte/tools/orterun/debuggers.c about this MCA diff --git a/orte/runtime/orte_globals.h b/orte/runtime/orte_globals.h index 80eabcef41..0cab7a7f6c 100644 --- a/orte/runtime/orte_globals.h +++ b/orte/runtime/orte_globals.h @@ -49,6 +49,7 @@ BEGIN_C_DECLS ORTE_DECLSPEC extern int orte_debug_verbosity; /* instantiated in orte/runtime/orte_init.c */ ORTE_DECLSPEC extern char *orte_prohibited_session_dirs; /* instantiated in orte/runtime/orte_init.c */ ORTE_DECLSPEC extern bool orte_xml_output; /* instantiated in orte/runtime/orte_globals.c */ +ORTE_DECLSPEC extern FILE *orte_xml_fp; /* instantiated in orte/runtime/orte_globals.c */ ORTE_DECLSPEC extern bool orte_help_want_aggregate; /* instantiated in orte/util/show_help.c */ ORTE_DECLSPEC extern char *orte_job_ident; /* instantiated in orte/runtime/orte_globals.c */ diff --git a/orte/runtime/orte_mca_params.c b/orte/runtime/orte_mca_params.c index c2774b8fb4..819ac04213 100644 --- a/orte/runtime/orte_mca_params.c +++ b/orte/runtime/orte_mca_params.c @@ -224,16 +224,36 @@ int orte_register_params(void) "Tag all output with [job,rank] (default: false)", false, false, (int) false, &value); orte_tag_output = OPAL_INT_TO_BOOL(value); - - mca_base_param_reg_int_name("orte", "xml_output", - "Display all output in XML format (default: false)", - false, false, (int) false, &value); - orte_xml_output = OPAL_INT_TO_BOOL(value); /* if we requested xml output, be sure to tag the output as well */ if (orte_xml_output) { orte_tag_output = true; } + mca_base_param_reg_int_name("orte", "xml_output", + "Display all output in XML format (default: false)", + false, false, (int) false, &value); + orte_xml_output = OPAL_INT_TO_BOOL(value); + + mca_base_param_reg_string_name("orte", "xml_file", + "Provide all output in XML format to the specified file", + false, false, NULL, &strval); + if (NULL != strval) { + if (ORTE_PROC_IS_HNP && NULL == orte_xml_fp) { + /* only the HNP opens this file! Make sure it only happens once */ + orte_xml_fp = fopen(strval, "w"); + if (NULL == orte_xml_fp) { + opal_output(0, "Could not open specified xml output file: %s", strval); + return ORTE_ERROR; + } + } + /* ensure we set the flags to tag output */ + orte_xml_output = true; + orte_tag_output = true; + } else { + /* default to stdout */ + orte_xml_fp = stdout; + } + /* whether to timestamp output */ mca_base_param_reg_int_name("orte", "timestamp_output", "Timestamp all application process output (default: false)", diff --git a/orte/tools/orterun/orterun.c b/orte/tools/orterun/orterun.c index 0cfed39141..bbee624294 100644 --- a/orte/tools/orterun/orterun.c +++ b/orte/tools/orterun/orterun.c @@ -160,7 +160,10 @@ static opal_cmd_line_init_t cmd_line_init[] = { { "orte", "xml", "output", '\0', "xml", "xml", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, "Provide all output in XML format" }, - + { "orte", "xml", "file", '\0', "xml-file", "xml-file", 1, + NULL, OPAL_CMD_LINE_TYPE_STRING, + "Provide all output in XML format to the specified file" }, + /* tag output */ { "orte", "tag", "output", '\0', "tag-output", "tag-output", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, @@ -568,13 +571,7 @@ int orterun(int argc, char *argv[]) ORTE_ERROR_LOG(rc); return rc; } - - /* if we are using xml for output, put an mpirun start tag */ - if (orte_xml_output) { - fprintf(stdout, "\n"); - fflush(stdout); - } - + /* check for request to report uri */ if (NULL != orterun_globals.report_uri) { FILE *fp; @@ -830,36 +827,36 @@ static void job_completed(int trigpipe, short event, void *arg) show a followup message here */ if (num_failed_start > 1) { if (orte_xml_output) { - printf(""); + fprintf(orte_xml_fp, ""); } - printf("%d total process%s failed to start", + fprintf(orte_xml_fp, "%d total process%s failed to start", num_failed_start, ((num_failed_start > 1) ? "es" : "")); if (orte_xml_output) { - printf(" "); + fprintf(orte_xml_fp, " "); } - printf("\n"); + fprintf(orte_xml_fp, "\n"); } if (num_aborted > 1) { if (orte_xml_output) { - printf(""); + fprintf(orte_xml_fp, ""); } - printf("%d total process%s aborted", + fprintf(orte_xml_fp, "%d total process%s aborted", num_aborted, ((num_aborted > 1) ? "es" : "")); if (orte_xml_output) { - printf(" "); + fprintf(orte_xml_fp, " "); } - printf("\n"); + fprintf(orte_xml_fp, "\n"); } if (num_killed > 1) { if (orte_xml_output) { - printf(""); + fprintf(orte_xml_fp, ""); } - printf("%d total process%s killed (some possibly by %s during cleanup)", + fprintf(orte_xml_fp, "%d total process%s killed (some possibly by %s during cleanup)", num_killed, ((num_killed > 1) ? "es" : ""), orterun_basename); if (orte_xml_output) { - printf(" "); + fprintf(orte_xml_fp, " "); } - printf("\n"); + fprintf(orte_xml_fp, "\n"); } } @@ -922,12 +919,6 @@ static void just_quit(int fd, short ign, void *arg) /* cleanup and leave */ orte_finalize(); - - /* if we are using xml output, terminate the output */ - if (orte_xml_output) { - fprintf(stdout, "\n"); - fflush(stdout); - } free(orterun_basename); if (orte_debug_flag) { diff --git a/orte/util/show_help.c b/orte/util/show_help.c index baaad8fcab..3aab4f284c 100644 --- a/orte/util/show_help.c +++ b/orte/util/show_help.c @@ -322,7 +322,7 @@ static void show_accumulated_duplicates(int fd, short event, void *context) tli->tli_filename, tli->tli_topic); output = xml_format((unsigned char*)tmp); free(tmp); - fprintf(stdout, "%s", output); + fprintf(orte_xml_fp, "%s", output); free(output); } else { opal_output(0, "%d more process%s sent help message %s / %s", @@ -334,8 +334,8 @@ static void show_accumulated_duplicates(int fd, short event, void *context) if (first) { if (orte_xml_output) { - fprintf(stdout, "Set MCA parameter \"orte_base_help_aggregate\" to 0 to see all help / error messages\n"); - fflush(stdout); + fprintf(orte_xml_fp, "Set MCA parameter \"orte_base_help_aggregate\" to 0 to see all help / error messages\n"); + fflush(orte_xml_fp); } else { opal_output(0, "Set MCA parameter \"orte_base_help_aggregate\" to 0 to see all help / error messages"); } @@ -406,8 +406,8 @@ static int show_help(const char *filename, const char *topic, if (orte_xml_output) { char *tmp; tmp = xml_format((unsigned char*)output); - fprintf(stdout, "%s", tmp); - fflush(stdout); + fprintf(orte_xml_fp, "%s", tmp); + fflush(orte_xml_fp); free(tmp); } else { fprintf(stderr, "%s", output);