Extend the iof by adding two new components to support map-reduce IO chaining. Add a mapreduce tool for running such applications.
Fix the state machine to support multiple jobs being simultaneously launched as this is not only required for mapreduce, but can happen under comm-spawn applications as well. This commit was SVN r26380.
Этот коммит содержится в:
родитель
40c2fc5f55
Коммит
b2f77bf08f
@ -4,7 +4,7 @@
|
||||
# Copyright (c) 2009-2010 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
# Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
# reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
@ -32,5 +32,6 @@ AC_DEFUN([ORTE_CONFIG_FILES],[
|
||||
orte/tools/orte-top/Makefile
|
||||
orte/tools/orte-migrate/Makefile
|
||||
orte/tools/orte-info/Makefile
|
||||
orte/tools/mapreduce/Makefile
|
||||
])
|
||||
])
|
||||
|
@ -86,7 +86,7 @@ void orte_grpcomm_base_xcast_recv(int status, orte_process_name_t* sender,
|
||||
* knows what to do - it will also free the bytes in the bo
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base.output,
|
||||
"%s grpcomm:base:xcast updating nidmap",
|
||||
"%s grpcomm:base:xcast updating daemon nidmap",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
if (ORTE_SUCCESS != (ret = orte_ess.update_nidmap(bo))) {
|
||||
|
@ -10,6 +10,8 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -42,7 +44,10 @@
|
||||
#include <signal.h>
|
||||
#endif
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/class/opal_bitmap.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
|
||||
#include "orte/mca/iof/iof.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
@ -53,6 +58,14 @@ ORTE_DECLSPEC int orte_iof_base_open(void);
|
||||
|
||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||
|
||||
/* track xon/xoff of processes */
|
||||
typedef struct {
|
||||
opal_object_t super;
|
||||
orte_job_t *jdata;
|
||||
opal_bitmap_t xoff;
|
||||
} orte_iof_job_t;
|
||||
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_iof_job_t);
|
||||
|
||||
/*
|
||||
* Maximum size of single msg
|
||||
*/
|
||||
@ -76,10 +89,7 @@ typedef struct {
|
||||
orte_process_name_t daemon;
|
||||
orte_iof_tag_t tag;
|
||||
orte_iof_write_event_t *wev;
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
char *file;
|
||||
int line;
|
||||
#endif
|
||||
bool xoff;
|
||||
} orte_iof_sink_t;
|
||||
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_iof_sink_t);
|
||||
|
||||
@ -90,10 +100,6 @@ typedef struct {
|
||||
int fd;
|
||||
orte_iof_tag_t tag;
|
||||
bool active;
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
char *file;
|
||||
int line;
|
||||
#endif
|
||||
} orte_iof_read_event_t;
|
||||
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_iof_read_event_t);
|
||||
|
||||
@ -103,6 +109,7 @@ typedef struct {
|
||||
orte_iof_read_event_t *revstdout;
|
||||
orte_iof_read_event_t *revstderr;
|
||||
orte_iof_read_event_t *revstddiag;
|
||||
orte_iof_sink_t *sink;
|
||||
} orte_iof_proc_t;
|
||||
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_iof_proc_t);
|
||||
|
||||
@ -116,6 +123,7 @@ ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_iof_write_output_t);
|
||||
/* the iof globals struct */
|
||||
struct orte_iof_base_t {
|
||||
int iof_output;
|
||||
char *input_files;
|
||||
opal_list_t iof_components_opened;
|
||||
opal_mutex_t iof_write_output_lock;
|
||||
orte_iof_sink_t *iof_write_stdout;
|
||||
@ -124,8 +132,6 @@ struct orte_iof_base_t {
|
||||
typedef struct orte_iof_base_t orte_iof_base_t;
|
||||
|
||||
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
|
||||
#define ORTE_IOF_SINK_DEFINE(snk, nm, fid, tg, wrthndlr, eplist) \
|
||||
do { \
|
||||
orte_iof_sink_t *ep; \
|
||||
@ -147,8 +153,6 @@ typedef struct orte_iof_base_t orte_iof_base_t;
|
||||
opal_list_append((eplist), &ep->super); \
|
||||
} \
|
||||
*(snk) = ep; \
|
||||
ep->file = strdup(__FILE__); \
|
||||
ep->line = __LINE__; \
|
||||
} while(0);
|
||||
|
||||
/* add list of structs that has name of proc + orte_iof_tag_t - when
|
||||
@ -171,8 +175,6 @@ typedef struct orte_iof_base_t orte_iof_base_t;
|
||||
rev->tag = (tg); \
|
||||
rev->fd = (fid); \
|
||||
*(rv) = rev; \
|
||||
rev->file = strdup(__FILE__); \
|
||||
rev->line = __LINE__; \
|
||||
opal_event_set(orte_event_base, \
|
||||
rev->ev, (fid), \
|
||||
OPAL_EV_READ, \
|
||||
@ -184,49 +186,6 @@ typedef struct orte_iof_base_t orte_iof_base_t;
|
||||
} while(0);
|
||||
|
||||
|
||||
#else
|
||||
|
||||
#define ORTE_IOF_SINK_DEFINE(snk, nm, fid, tg, wrthndlr, eplist) \
|
||||
do { \
|
||||
orte_iof_sink_t *ep; \
|
||||
ep = OBJ_NEW(orte_iof_sink_t); \
|
||||
ep->name.jobid = (nm)->jobid; \
|
||||
ep->name.vpid = (nm)->vpid; \
|
||||
ep->tag = (tg); \
|
||||
if (0 <= (fid)) { \
|
||||
ep->wev->fd = (fid); \
|
||||
opal_event_set(orte_event_base, \
|
||||
ep->wev->ev, ep->wev->fd, \
|
||||
OPAL_EV_WRITE, \
|
||||
wrthndlr, ep); \
|
||||
} \
|
||||
if (NULL != (eplist)) { \
|
||||
opal_list_append((eplist), &ep->super); \
|
||||
} \
|
||||
*(snk) = ep; \
|
||||
} while(0);
|
||||
|
||||
#define ORTE_IOF_READ_EVENT(rv, nm, fid, tg, cbfunc, actv) \
|
||||
do { \
|
||||
orte_iof_read_event_t *rev; \
|
||||
rev = OBJ_NEW(orte_iof_read_event_t); \
|
||||
rev->name.jobid = (nm)->jobid; \
|
||||
rev->name.vpid = (nm)->vpid; \
|
||||
rev->tag = (tg); \
|
||||
rev->fd = (fid); \
|
||||
*(rv) = rev; \
|
||||
opal_event_set(orte_event_base, \
|
||||
rev->ev, (fid), \
|
||||
OPAL_EV_READ, \
|
||||
(cbfunc), rev); \
|
||||
if ((actv)) { \
|
||||
rev->active = true; \
|
||||
opal_event_add(rev->ev, 0); \
|
||||
} \
|
||||
} while(0);
|
||||
|
||||
#endif
|
||||
|
||||
ORTE_DECLSPEC int orte_iof_base_close(void);
|
||||
ORTE_DECLSPEC int orte_iof_base_select(void);
|
||||
ORTE_DECLSPEC int orte_iof_base_flush(void);
|
||||
|
@ -63,11 +63,29 @@ int orte_iof_base_open(void)
|
||||
#else
|
||||
|
||||
/* class instances */
|
||||
static void orte_iof_job_construct(orte_iof_job_t *ptr)
|
||||
{
|
||||
ptr->jdata = NULL;
|
||||
OBJ_CONSTRUCT(&ptr->xoff, opal_bitmap_t);
|
||||
}
|
||||
static void orte_iof_job_destruct(orte_iof_job_t *ptr)
|
||||
{
|
||||
if (NULL != ptr->jdata) {
|
||||
OBJ_RELEASE(ptr->jdata);
|
||||
}
|
||||
OBJ_DESTRUCT(&ptr->xoff);
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(orte_iof_job_t,
|
||||
opal_object_t,
|
||||
orte_iof_job_construct,
|
||||
orte_iof_job_destruct);
|
||||
|
||||
static void orte_iof_base_proc_construct(orte_iof_proc_t* ptr)
|
||||
{
|
||||
ptr->revstdout = NULL;
|
||||
ptr->revstderr = NULL;
|
||||
ptr->revstddiag = NULL;
|
||||
ptr->sink = NULL;
|
||||
}
|
||||
static void orte_iof_base_proc_destruct(orte_iof_proc_t* ptr)
|
||||
{
|
||||
@ -92,6 +110,7 @@ static void orte_iof_base_sink_construct(orte_iof_sink_t* ptr)
|
||||
ptr->daemon.jobid = ORTE_JOBID_INVALID;
|
||||
ptr->daemon.vpid = ORTE_VPID_INVALID;
|
||||
ptr->wev = OBJ_NEW(orte_iof_write_event_t);
|
||||
ptr->xoff = false;
|
||||
}
|
||||
static void orte_iof_base_sink_destruct(orte_iof_sink_t* ptr)
|
||||
{
|
||||
@ -205,6 +224,11 @@ int orte_iof_base_open(void)
|
||||
}
|
||||
}
|
||||
|
||||
/* check for files to be sent to stdin of procs */
|
||||
mca_base_param_reg_string_name("iof", "base_input_files",
|
||||
"Comma-separated list of input files to be read and sent to stdin of procs (default: NULL)",
|
||||
false, false, NULL, &orte_iof_base.input_files);
|
||||
|
||||
/* daemons do not need to do this as they do not write out stdout/err */
|
||||
if (!ORTE_PROC_IS_DAEMON ||
|
||||
(ORTE_PROC_IS_DAEMON && ORTE_PROC_IS_CM)) {
|
||||
|
@ -60,7 +60,8 @@ int orte_iof_base_write_output(orte_process_name_t *name, orte_iof_tag_t stream,
|
||||
"%s write:output setting up to write %d bytes to %s for %s on fd %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
|
||||
(ORTE_IOF_STDIN & stream) ? "stdin" : ((ORTE_IOF_STDOUT & stream) ? "stdout" : ((ORTE_IOF_STDERR & stream) ? "stderr" : "stddiag")),
|
||||
ORTE_NAME_PRINT(name), channel->fd));
|
||||
ORTE_NAME_PRINT(name),
|
||||
(NULL == channel) ? -1 : channel->fd));
|
||||
|
||||
/* setup output object */
|
||||
output = OBJ_NEW(orte_iof_write_output_t);
|
||||
@ -251,9 +252,6 @@ construct:
|
||||
output->numbytes = k;
|
||||
|
||||
process:
|
||||
/* lock us up to protect global operations */
|
||||
OPAL_THREAD_LOCK(&orte_iof_base.iof_write_output_lock);
|
||||
|
||||
/* add this data to the write list for this fd */
|
||||
opal_list_append(&channel->outputs, &output->super);
|
||||
|
||||
@ -270,9 +268,6 @@ process:
|
||||
channel->pending = true;
|
||||
}
|
||||
|
||||
/* unlock and go */
|
||||
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_write_output_lock);
|
||||
|
||||
return num_buffered;
|
||||
}
|
||||
|
||||
@ -289,11 +284,13 @@ void orte_iof_base_write_handler(int fd, short event, void *cbdata)
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
wev->fd));
|
||||
|
||||
/* lock us up to protect global operations */
|
||||
OPAL_THREAD_LOCK(&orte_iof_base.iof_write_output_lock);
|
||||
|
||||
while (NULL != (item = opal_list_remove_first(&wev->outputs))) {
|
||||
output = (orte_iof_write_output_t*)item;
|
||||
if (0 == output->numbytes) {
|
||||
/* indicates we are to close this stream */
|
||||
OBJ_RELEASE(sink);
|
||||
return;
|
||||
}
|
||||
num_written = write(wev->fd, output->data, output->numbytes);
|
||||
if (num_written < 0) {
|
||||
if (EAGAIN == errno || EINTR == errno) {
|
||||
@ -302,7 +299,7 @@ void orte_iof_base_write_handler(int fd, short event, void *cbdata)
|
||||
/* leave the write event running so it will call us again
|
||||
* when the fd is ready.
|
||||
*/
|
||||
goto DEPART;
|
||||
return;
|
||||
}
|
||||
/* otherwise, something bad happened so all we can do is abort
|
||||
* this attempt
|
||||
@ -312,12 +309,12 @@ void orte_iof_base_write_handler(int fd, short event, void *cbdata)
|
||||
} else if (num_written < output->numbytes) {
|
||||
/* incomplete write - adjust data to avoid duplicate output */
|
||||
memmove(output->data, &output->data[num_written], output->numbytes - num_written);
|
||||
/* push this item back on the front of the list */
|
||||
/* push this item back on the front of the list */
|
||||
opal_list_prepend(&wev->outputs, item);
|
||||
/* leave the write event running so it will call us again
|
||||
* when the fd is ready
|
||||
*/
|
||||
goto DEPART;
|
||||
return;
|
||||
}
|
||||
OBJ_RELEASE(output);
|
||||
}
|
||||
@ -325,7 +322,4 @@ ABORT:
|
||||
opal_event_del(wev->ev);
|
||||
wev->pending = false;
|
||||
|
||||
DEPART:
|
||||
/* unlock and go */
|
||||
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_write_output_lock);
|
||||
}
|
||||
|
@ -57,8 +57,11 @@
|
||||
|
||||
#include "opal/util/opal_pty.h"
|
||||
#include "opal/util/opal_environ.h"
|
||||
#include "opal/util/output.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "orte/mca/iof/iof.h"
|
||||
#include "orte/mca/iof/base/iof_base_setup.h"
|
||||
|
@ -84,6 +84,7 @@ orte_iof_base_module_t orte_iof_hnp_module = {
|
||||
hnp_push,
|
||||
hnp_pull,
|
||||
hnp_close,
|
||||
NULL,
|
||||
finalize,
|
||||
hnp_ft_event
|
||||
};
|
||||
|
@ -181,6 +181,10 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata)
|
||||
OPAL_THREAD_UNLOCK(&mca_iof_hnp_component.lock);
|
||||
return;
|
||||
}
|
||||
if (0 < numbytes && numbytes < (int)sizeof(data)) {
|
||||
/* need to write a 0-byte event to clear the stream and close it */
|
||||
orte_iof_base_write_output(&rev->name, ORTE_IOF_STDIN, data, 0, sink->wev);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
@ -207,8 +211,8 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata)
|
||||
}
|
||||
}
|
||||
}
|
||||
/* if num_bytes was zero, then we need to terminate the event */
|
||||
if (0 == numbytes) {
|
||||
/* if num_bytes was zero, or we read the last piece of the file, then we need to terminate the event */
|
||||
if (0 == numbytes || numbytes < (int)sizeof(data)) {
|
||||
/* this will also close our stdin file descriptor */
|
||||
OBJ_RELEASE(mca_iof_hnp_component.stdinev);
|
||||
} else {
|
||||
|
@ -10,6 +10,8 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -119,6 +121,7 @@
|
||||
#include "opal/mca/crs/crs.h"
|
||||
#include "opal/mca/crs/base/base.h"
|
||||
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "iof_types.h"
|
||||
|
||||
@ -157,6 +160,9 @@ typedef int (*orte_iof_base_pull_fn_t)(const orte_process_name_t* peer,
|
||||
typedef int (*orte_iof_base_close_fn_t)(const orte_process_name_t* peer,
|
||||
orte_iof_tag_t source_tag);
|
||||
|
||||
/* Flag that a job is complete */
|
||||
typedef void (*orte_iof_base_complete_fn_t)(const orte_job_t *jdata);
|
||||
|
||||
/* finalize the selected module */
|
||||
typedef int (*orte_iof_base_finalize_fn_t)(void);
|
||||
|
||||
@ -173,6 +179,7 @@ struct orte_iof_base_module_2_0_0_t {
|
||||
orte_iof_base_push_fn_t push;
|
||||
orte_iof_base_pull_fn_t pull;
|
||||
orte_iof_base_close_fn_t close;
|
||||
orte_iof_base_complete_fn_t complete;
|
||||
orte_iof_base_finalize_fn_t finalize;
|
||||
orte_iof_base_ft_event_fn_t ft_event;
|
||||
};
|
||||
|
40
orte/mca/iof/mr_hnp/Makefile.am
Обычный файл
40
orte/mca/iof/mr_hnp/Makefile.am
Обычный файл
@ -0,0 +1,40 @@
|
||||
#
|
||||
# Copyright (c) 2012 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
EXTRA_DIST = .windows
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_orte_iof_mr_hnp_DSO
|
||||
component_noinst =
|
||||
component_install = mca_iof_mr_hnp.la
|
||||
else
|
||||
component_noinst = libmca_iof_mr_hnp.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mr_hnp_SOURCES = \
|
||||
iof_mrhnp.c \
|
||||
iof_mrhnp.h \
|
||||
iof_mrhnp_component.c \
|
||||
iof_mrhnp_read.c \
|
||||
iof_mrhnp_receive.c
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_iof_mr_hnp_la_SOURCES = $(mr_hnp_SOURCES)
|
||||
mca_iof_mr_hnp_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_iof_mr_hnp_la_SOURCES = $(mr_hnp_SOURCES)
|
||||
libmca_iof_mr_hnp_la_LIBADD =
|
||||
libmca_iof_mr_hnp_la_LDFLAGS = -module -avoid-version
|
19
orte/mca/iof/mr_hnp/configure.m4
Обычный файл
19
orte/mca/iof/mr_hnp/configure.m4
Обычный файл
@ -0,0 +1,19 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2012 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
# MCA_iof_mr_hnp_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_orte_iof_mr_hnp_CONFIG], [
|
||||
AC_CONFIG_FILES([orte/mca/iof/mr_hnp/Makefile])
|
||||
|
||||
AS_IF([test "$orte_without_full_support" = 0],
|
||||
[$1],
|
||||
[$2])
|
||||
])
|
700
orte/mca/iof/mr_hnp/iof_mrhnp.c
Обычный файл
700
orte/mca/iof/mr_hnp/iof_mrhnp.c
Обычный файл
@ -0,0 +1,700 @@
|
||||
/*
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
#include "orte_config.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include <errno.h>
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif /* HAVE_UNISTD_H */
|
||||
#ifdef HAVE_STRING_H
|
||||
#include <string.h>
|
||||
#endif /* HAVE_STRING_H */
|
||||
|
||||
#ifdef HAVE_FCNTL_H
|
||||
#include <fcntl.h>
|
||||
#else
|
||||
#ifdef HAVE_SYS_FCNTL_H
|
||||
#include <sys/fcntl.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "opal/mca/event/event.h"
|
||||
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/mca/odls/odls_types.h"
|
||||
|
||||
#include "orte/mca/iof/base/base.h"
|
||||
#include "iof_mrhnp.h"
|
||||
|
||||
/* LOCAL FUNCTIONS */
|
||||
static void stdin_write_handler(int fd, short event, void *cbdata);
|
||||
|
||||
/* API FUNCTIONS */
|
||||
static int init(void);
|
||||
|
||||
static int mrhnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, int fd);
|
||||
|
||||
static int mrhnp_pull(const orte_process_name_t* src_name,
|
||||
orte_iof_tag_t src_tag,
|
||||
int fd);
|
||||
|
||||
static int mrhnp_close(const orte_process_name_t* peer,
|
||||
orte_iof_tag_t source_tag);
|
||||
|
||||
static void mrhnp_complete(const orte_job_t *jdata);
|
||||
|
||||
static int finalize(void);
|
||||
|
||||
static int mrhnp_ft_event(int state);
|
||||
|
||||
/* The API's in this module are solely used to support LOCAL
|
||||
* procs - i.e., procs that are co-located to the HNP. Remote
|
||||
* procs interact with the HNP's IOF via the HNP's receive function,
|
||||
* which operates independently and is in the iof_mrhnp_receive.c file
|
||||
*/
|
||||
|
||||
orte_iof_base_module_t orte_iof_mrhnp_module = {
|
||||
init,
|
||||
mrhnp_push,
|
||||
mrhnp_pull,
|
||||
mrhnp_close,
|
||||
mrhnp_complete,
|
||||
finalize,
|
||||
mrhnp_ft_event
|
||||
};
|
||||
|
||||
/* Initialize the module */
|
||||
static int init(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
/* post non-blocking recv to catch forwarded IO from
|
||||
* the orteds
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
|
||||
ORTE_RML_TAG_IOF_HNP,
|
||||
ORTE_RML_PERSISTENT,
|
||||
orte_iof_mrhnp_recv,
|
||||
NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
|
||||
}
|
||||
|
||||
OBJ_CONSTRUCT(&mca_iof_mr_hnp_component.sinks, opal_list_t);
|
||||
OBJ_CONSTRUCT(&mca_iof_mr_hnp_component.procs, opal_list_t);
|
||||
mca_iof_mr_hnp_component.stdinev = NULL;
|
||||
OBJ_CONSTRUCT(&mca_iof_mr_hnp_component.stdin_jobs, opal_pointer_array_t);
|
||||
opal_pointer_array_init(&mca_iof_mr_hnp_component.stdin_jobs, 1, INT_MAX, 1);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* Setup to read from stdin.
|
||||
*/
|
||||
static int mrhnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, int fd)
|
||||
{
|
||||
orte_job_t *jdata;
|
||||
orte_iof_sink_t *sink;
|
||||
orte_iof_proc_t *proct;
|
||||
opal_list_item_t *item;
|
||||
int flags;
|
||||
char *outfile;
|
||||
int fdout;
|
||||
int np, numdigs;
|
||||
orte_ns_cmp_bitmask_t mask;
|
||||
orte_iof_job_t *jptr;
|
||||
int j;
|
||||
bool found;
|
||||
|
||||
/* don't do this if the dst vpid is invalid or the fd is negative! */
|
||||
if (ORTE_VPID_INVALID == dst_name->vpid || fd < 0) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s iof:mrhnp pushing fd %d for process %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
fd, ORTE_NAME_PRINT(dst_name)));
|
||||
|
||||
/* we get a push for stdout, stderr, and stddiag on every LOCAL process, so
|
||||
* setup to read those streams and forward them to the next app_context
|
||||
*/
|
||||
if (!(src_tag & ORTE_IOF_STDIN)) {
|
||||
/* set the file descriptor to non-blocking - do this before we setup
|
||||
* and activate the read event in case it fires right away
|
||||
*/
|
||||
if((flags = fcntl(fd, F_GETFL, 0)) < 0) {
|
||||
opal_output(orte_iof_base.iof_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n",
|
||||
__FILE__, __LINE__, errno);
|
||||
} else {
|
||||
flags |= O_NONBLOCK;
|
||||
fcntl(fd, F_SETFL, flags);
|
||||
}
|
||||
/* do we already have this process in our list? */
|
||||
for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs);
|
||||
item != opal_list_get_end(&mca_iof_mr_hnp_component.procs);
|
||||
item = opal_list_get_next(item)) {
|
||||
proct = (orte_iof_proc_t*)item;
|
||||
mask = ORTE_NS_CMP_ALL;
|
||||
if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proct->name, dst_name)) {
|
||||
/* found it */
|
||||
goto SETUP;
|
||||
}
|
||||
}
|
||||
/* if we get here, then we don't yet have this proc in our list */
|
||||
proct = OBJ_NEW(orte_iof_proc_t);
|
||||
proct->name.jobid = dst_name->jobid;
|
||||
proct->name.vpid = dst_name->vpid;
|
||||
opal_list_append(&mca_iof_mr_hnp_component.procs, &proct->super);
|
||||
/* see if we are to output to a file */
|
||||
if (NULL != orte_output_filename) {
|
||||
/* get the jobdata for this proc */
|
||||
if (NULL == (jdata = orte_get_job_data_object(dst_name->jobid))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
np = jdata->num_procs / 10;
|
||||
/* determine the number of digits required for max vpid */
|
||||
numdigs = 1;
|
||||
while (np > 0) {
|
||||
numdigs++;
|
||||
np = np / 10;
|
||||
}
|
||||
/* construct the filename */
|
||||
asprintf(&outfile, "%s.%d.%0*lu", orte_output_filename,
|
||||
(int)ORTE_LOCAL_JOBID(proct->name.jobid),
|
||||
numdigs, (unsigned long)proct->name.vpid);
|
||||
/* create the file */
|
||||
fdout = open(outfile, O_CREAT|O_RDWR|O_TRUNC, 0644);
|
||||
free(outfile);
|
||||
if (fdout < 0) {
|
||||
/* couldn't be opened */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE);
|
||||
return ORTE_ERR_FILE_OPEN_FAILURE;
|
||||
}
|
||||
/* define a sink to that file descriptor */
|
||||
ORTE_IOF_SINK_DEFINE(&sink, dst_name, fdout, ORTE_IOF_STDOUTALL,
|
||||
orte_iof_base_write_handler,
|
||||
&mca_iof_mr_hnp_component.sinks);
|
||||
}
|
||||
|
||||
SETUP:
|
||||
/* define a read event but don't activate it */
|
||||
if (src_tag & ORTE_IOF_STDOUT) {
|
||||
ORTE_IOF_READ_EVENT(&proct->revstdout, dst_name, fd, ORTE_IOF_STDOUT,
|
||||
orte_iof_mrhnp_read_local_handler, false);
|
||||
} else if (src_tag & ORTE_IOF_STDERR) {
|
||||
ORTE_IOF_READ_EVENT(&proct->revstderr, dst_name, fd, ORTE_IOF_STDERR,
|
||||
orte_iof_mrhnp_read_local_handler, false);
|
||||
} else if (src_tag & ORTE_IOF_STDDIAG) {
|
||||
ORTE_IOF_READ_EVENT(&proct->revstddiag, dst_name, fd, ORTE_IOF_STDDIAG,
|
||||
orte_iof_mrhnp_read_local_handler, false);
|
||||
}
|
||||
/* if -all- of the readevents for this proc have been defined, then
|
||||
* activate them. Otherwise, we can think that the proc is complete
|
||||
* because one of the readevents fires -prior- to all of them having been defined!
|
||||
*/
|
||||
if (NULL != proct->revstdout && NULL != proct->revstderr && NULL != proct->revstddiag) {
|
||||
/* now activate read events */
|
||||
proct->revstdout->active = true;
|
||||
opal_event_add(proct->revstdout->ev, 0);
|
||||
proct->revstderr->active = true;
|
||||
opal_event_add(proct->revstderr->ev, 0);
|
||||
proct->revstddiag->active = true;
|
||||
opal_event_add(proct->revstddiag->ev, 0);
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*** HANDLE STDIN PUSH ***/
|
||||
|
||||
/* get the job object for this proc and check to see if it
|
||||
* is a mapper - if so, add it to the jobs that receive
|
||||
* our stdin
|
||||
*/
|
||||
jdata = orte_get_job_data_object(dst_name->jobid);
|
||||
if (ORTE_JOB_CONTROL_MAPPER & jdata->controls) {
|
||||
/* see if we already have it */
|
||||
found = false;
|
||||
for (j=0; j < mca_iof_mr_hnp_component.stdin_jobs.size; j++) {
|
||||
if (NULL == (jptr = (orte_iof_job_t*)opal_pointer_array_get_item(&mca_iof_mr_hnp_component.stdin_jobs, j))) {
|
||||
continue;
|
||||
}
|
||||
if (jptr->jdata->jobid == jdata->jobid) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
jptr = OBJ_NEW(orte_iof_job_t);
|
||||
OBJ_RETAIN(jdata);
|
||||
jptr->jdata = jdata;
|
||||
opal_bitmap_init(&jptr->xoff, jdata->num_procs);
|
||||
opal_pointer_array_add(&mca_iof_mr_hnp_component.stdin_jobs, jptr);
|
||||
}
|
||||
}
|
||||
|
||||
/* now setup the read - but check to only do this once */
|
||||
if (NULL == mca_iof_mr_hnp_component.stdinev) {
|
||||
/* Since we are the HNP, we don't want to set nonblocking on our
|
||||
* stdio stream. If we do so, we set the file descriptor to
|
||||
* non-blocking for everyone that has that file descriptor, which
|
||||
* includes everyone else in our shell pipeline chain. (See
|
||||
* http://lists.freebsd.org/pipermail/freebsd-hackers/2005-January/009742.html).
|
||||
* This causes things like "mpirun -np 1 big_app | cat" to lose
|
||||
* output, because cat's stdout is then ALSO non-blocking and cat
|
||||
* isn't built to deal with that case (same with almost all other
|
||||
* unix text utils).
|
||||
*/
|
||||
if (0 != fd) {
|
||||
if((flags = fcntl(fd, F_GETFL, 0)) < 0) {
|
||||
opal_output(orte_iof_base.iof_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n",
|
||||
__FILE__, __LINE__, errno);
|
||||
} else {
|
||||
flags |= O_NONBLOCK;
|
||||
fcntl(fd, F_SETFL, flags);
|
||||
}
|
||||
}
|
||||
if (isatty(fd)) {
|
||||
/* We should avoid trying to read from stdin if we
|
||||
* have a terminal, but are backgrounded. Catch the
|
||||
* signals that are commonly used when we switch
|
||||
* between being backgrounded and not. If the
|
||||
* filedescriptor is not a tty, don't worry about it
|
||||
* and always stay connected.
|
||||
*/
|
||||
opal_event_signal_set(orte_event_base, &mca_iof_mr_hnp_component.stdinsig,
|
||||
SIGCONT, orte_iof_mrhnp_stdin_cb,
|
||||
NULL);
|
||||
|
||||
/* setup a read event to read stdin, but don't activate it yet. The
|
||||
* dst_name indicates who should receive the stdin. If that recipient
|
||||
* doesn't do a corresponding pull, however, then the stdin will
|
||||
* be dropped upon receipt at the local daemon
|
||||
*/
|
||||
ORTE_IOF_READ_EVENT(&mca_iof_mr_hnp_component.stdinev,
|
||||
dst_name, fd, ORTE_IOF_STDIN,
|
||||
orte_iof_mrhnp_read_local_handler, false);
|
||||
|
||||
/* check to see if we want the stdin read event to be
|
||||
* active - we will always at least define the event,
|
||||
* but may delay its activation
|
||||
*/
|
||||
if (!(src_tag & ORTE_IOF_STDIN) || orte_iof_mrhnp_stdin_check(fd)) {
|
||||
mca_iof_mr_hnp_component.stdinev->active = true;
|
||||
opal_event_add(mca_iof_mr_hnp_component.stdinev->ev, 0);
|
||||
}
|
||||
} else {
|
||||
/* if we are not looking at a tty, just setup a read event
|
||||
* and activate it
|
||||
*/
|
||||
ORTE_IOF_READ_EVENT(&mca_iof_mr_hnp_component.stdinev,
|
||||
dst_name, fd, ORTE_IOF_STDIN,
|
||||
orte_iof_mrhnp_read_local_handler, true);
|
||||
}
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Since we are the HNP, the only "pull" call comes from a local
|
||||
* process so we can record the file descriptor for its stdin.
|
||||
*/
|
||||
|
||||
static int mrhnp_pull(const orte_process_name_t* dst_name,
|
||||
orte_iof_tag_t src_tag,
|
||||
int fd)
|
||||
{
|
||||
orte_iof_sink_t *sink;
|
||||
int flags, j;
|
||||
orte_iof_proc_t *ptr, *proct;
|
||||
opal_list_item_t *item;
|
||||
orte_job_t *jdata;
|
||||
orte_iof_job_t *jptr;
|
||||
bool found;
|
||||
|
||||
/* this is a local call - only stdin is supported */
|
||||
if (ORTE_IOF_STDIN != src_tag) {
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s iof:mrhnp pulling fd %d for process %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
fd, ORTE_NAME_PRINT(dst_name)));
|
||||
|
||||
/* get the job object for this proc and check to see if it
|
||||
* is a mapper - if so, add it to the jobs that receive
|
||||
* our stdin
|
||||
*/
|
||||
jdata = orte_get_job_data_object(dst_name->jobid);
|
||||
if (ORTE_JOB_CONTROL_MAPPER & jdata->controls) {
|
||||
/* see if we already have it */
|
||||
found = false;
|
||||
for (j=0; j < mca_iof_mr_hnp_component.stdin_jobs.size; j++) {
|
||||
if (NULL == (jptr = (orte_iof_job_t*)opal_pointer_array_get_item(&mca_iof_mr_hnp_component.stdin_jobs, j))) {
|
||||
continue;
|
||||
}
|
||||
if (jptr->jdata->jobid == jdata->jobid) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
jptr = OBJ_NEW(orte_iof_job_t);
|
||||
OBJ_RETAIN(jdata);
|
||||
jptr->jdata = jdata;
|
||||
opal_bitmap_init(&jptr->xoff, jdata->num_procs);
|
||||
opal_pointer_array_add(&mca_iof_mr_hnp_component.stdin_jobs, jptr);
|
||||
}
|
||||
}
|
||||
|
||||
/* set the file descriptor to non-blocking - do this before we setup
|
||||
* the sink in case it fires right away
|
||||
*/
|
||||
if((flags = fcntl(fd, F_GETFL, 0)) < 0) {
|
||||
opal_output(orte_iof_base.iof_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n",
|
||||
__FILE__, __LINE__, errno);
|
||||
} else {
|
||||
flags |= O_NONBLOCK;
|
||||
fcntl(fd, F_SETFL, flags);
|
||||
}
|
||||
|
||||
ORTE_IOF_SINK_DEFINE(&sink, dst_name, fd, ORTE_IOF_STDIN,
|
||||
stdin_write_handler, NULL);
|
||||
sink->daemon.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
sink->daemon.vpid = ORTE_PROC_MY_NAME->vpid;
|
||||
|
||||
/* find the proct for this proc */
|
||||
proct = NULL;
|
||||
for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs);
|
||||
item != opal_list_get_end(&mca_iof_mr_hnp_component.procs);
|
||||
item = opal_list_get_next(item)) {
|
||||
ptr = (orte_iof_proc_t*)item;
|
||||
if (ptr->name.jobid == dst_name->jobid &&
|
||||
ptr->name.vpid == dst_name->vpid) {
|
||||
proct = ptr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (NULL == proct) {
|
||||
/* we don't yet have this proc in our list */
|
||||
proct = OBJ_NEW(orte_iof_proc_t);
|
||||
proct->name.jobid = dst_name->jobid;
|
||||
proct->name.vpid = dst_name->vpid;
|
||||
opal_list_append(&mca_iof_mr_hnp_component.procs, &proct->super);
|
||||
}
|
||||
proct->sink = sink;
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* One of our local procs wants us to close the specifed
|
||||
* stream(s), thus terminating any potential io to/from it.
|
||||
*/
|
||||
static int mrhnp_close(const orte_process_name_t* peer,
|
||||
orte_iof_tag_t source_tag)
|
||||
{
|
||||
opal_list_item_t *item, *next_item;
|
||||
orte_iof_sink_t* sink;
|
||||
orte_ns_cmp_bitmask_t mask;
|
||||
|
||||
for (item = opal_list_get_first(&mca_iof_mr_hnp_component.sinks);
|
||||
item != opal_list_get_end(&mca_iof_mr_hnp_component.sinks);
|
||||
item = next_item ) {
|
||||
sink = (orte_iof_sink_t*)item;
|
||||
next_item = opal_list_get_next(item);
|
||||
|
||||
mask = ORTE_NS_CMP_ALL;
|
||||
|
||||
if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &sink->name, peer) &&
|
||||
(source_tag & sink->tag)) {
|
||||
|
||||
/* No need to delete the event or close the file
|
||||
* descriptor - the destructor will automatically
|
||||
* do it for us.
|
||||
*/
|
||||
opal_list_remove_item(&mca_iof_mr_hnp_component.sinks, item);
|
||||
OBJ_RELEASE(item);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static void send_data(orte_process_name_t *name, orte_iof_tag_t tag,
|
||||
orte_jobid_t jobid,
|
||||
unsigned char *data, int32_t nbytes)
|
||||
{
|
||||
opal_buffer_t *buf;
|
||||
int rc;
|
||||
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &tag, 1, ORTE_IOF_TAG))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &jobid, 1, ORTE_JOBID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, data, nbytes, OPAL_BYTE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(name, buf, ORTE_RML_TAG_IOF_PROXY,
|
||||
0, orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(buf);
|
||||
}
|
||||
}
|
||||
|
||||
static void mrhnp_complete(const orte_job_t *jdata)
|
||||
{
|
||||
orte_job_t *jptr;
|
||||
orte_job_map_t *map;
|
||||
orte_proc_t *daemon;
|
||||
orte_iof_proc_t *proct;
|
||||
unsigned char data[1];
|
||||
opal_list_item_t *item;
|
||||
int i;
|
||||
orte_node_t *node;
|
||||
|
||||
if (ORTE_JOBID_INVALID == jdata->stdout_target) {
|
||||
/* nothing to do */
|
||||
return;
|
||||
}
|
||||
|
||||
/* the job is complete - close out the stdin
|
||||
* of any procs it was feeding
|
||||
*/
|
||||
jptr = orte_get_job_data_object(jdata->stdout_target);
|
||||
map = jptr->map;
|
||||
/* cycle thru the map to find any node that has at least
|
||||
* one proc from this job
|
||||
*/
|
||||
for (i=0; i < map->nodes->size; i++) {
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
|
||||
continue;
|
||||
}
|
||||
daemon = node->daemon;
|
||||
if (daemon->name.vpid == ORTE_PROC_MY_NAME->vpid) {
|
||||
for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs);
|
||||
item != opal_list_get_end(&mca_iof_mr_hnp_component.procs);
|
||||
item = opal_list_get_next(item)) {
|
||||
proct = (orte_iof_proc_t*)item;
|
||||
if (proct->name.jobid == jptr->jobid) {
|
||||
if (NULL != proct->sink) {
|
||||
/* need to write a 0-byte event to clear the stream and close it */
|
||||
orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, 0, proct->sink->wev);
|
||||
proct->sink = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s sending close stdin to daemon %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&daemon->name)));
|
||||
|
||||
/* need to send a 0-byte message to clear the stream and close it */
|
||||
send_data(&daemon->name, ORTE_IOF_STDIN, jptr->jobid, data, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int finalize(void)
|
||||
{
|
||||
opal_list_item_t* item;
|
||||
orte_iof_write_output_t *output;
|
||||
orte_iof_write_event_t *wev;
|
||||
int num_written;
|
||||
bool dump;
|
||||
int i;
|
||||
orte_job_t *jdata;
|
||||
|
||||
/* check if anything is still trying to be written out */
|
||||
wev = orte_iof_base.iof_write_stdout->wev;
|
||||
if (!opal_list_is_empty(&wev->outputs)) {
|
||||
dump = false;
|
||||
/* make one last attempt to write this out */
|
||||
while (NULL != (item = opal_list_remove_first(&wev->outputs))) {
|
||||
output = (orte_iof_write_output_t*)item;
|
||||
if (!dump) {
|
||||
num_written = write(wev->fd, output->data, output->numbytes);
|
||||
if (num_written < output->numbytes) {
|
||||
/* don't retry - just cleanout the list and dump it */
|
||||
dump = true;
|
||||
}
|
||||
}
|
||||
OBJ_RELEASE(output);
|
||||
}
|
||||
}
|
||||
if (!orte_xml_output) {
|
||||
/* we only opened stderr channel if we are NOT doing xml output */
|
||||
wev = orte_iof_base.iof_write_stderr->wev;
|
||||
if (!opal_list_is_empty(&wev->outputs)) {
|
||||
dump = false;
|
||||
/* make one last attempt to write this out */
|
||||
while (NULL != (item = opal_list_remove_first(&wev->outputs))) {
|
||||
output = (orte_iof_write_output_t*)item;
|
||||
if (!dump) {
|
||||
num_written = write(wev->fd, output->data, output->numbytes);
|
||||
if (num_written < output->numbytes) {
|
||||
/* don't retry - just cleanout the list and dump it */
|
||||
dump = true;
|
||||
}
|
||||
}
|
||||
OBJ_RELEASE(output);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_IOF_HNP);
|
||||
|
||||
/* clear our stdin job array */
|
||||
for (i=0; i < mca_iof_mr_hnp_component.stdin_jobs.size; i++) {
|
||||
if (NULL == (jdata = (orte_job_t*)opal_pointer_array_get_item(&mca_iof_mr_hnp_component.stdin_jobs, i))) {
|
||||
continue;
|
||||
}
|
||||
OBJ_RELEASE(jdata);
|
||||
}
|
||||
OBJ_DESTRUCT(&mca_iof_mr_hnp_component.stdin_jobs);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
int mrhnp_ft_event(int state) {
|
||||
/*
|
||||
* Replica doesn't need to do anything for a checkpoint
|
||||
*/
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static void stdin_write_handler(int fd, short event, void *cbdata)
|
||||
{
|
||||
orte_iof_sink_t *sink = (orte_iof_sink_t*)cbdata;
|
||||
orte_iof_write_event_t *wev = sink->wev;
|
||||
opal_list_item_t *item;
|
||||
orte_iof_write_output_t *output;
|
||||
int num_written;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s mrhnp:stdin:write:handler writing data to %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
wev->fd));
|
||||
|
||||
wev->pending = false;
|
||||
|
||||
while (NULL != (item = opal_list_remove_first(&wev->outputs))) {
|
||||
output = (orte_iof_write_output_t*)item;
|
||||
/* if an abnormal termination has occurred, just dump
|
||||
* this data as we are aborting
|
||||
*/
|
||||
if (orte_abnormal_term_ordered) {
|
||||
OBJ_RELEASE(output);
|
||||
continue;
|
||||
}
|
||||
if (0 == output->numbytes) {
|
||||
/* this indicates we are to close the fd - there is
|
||||
* nothing to write
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_iof_base.iof_output,
|
||||
"%s iof:mrhnp closing fd %d on write event due to zero bytes output",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), wev->fd));
|
||||
OBJ_RELEASE(wev);
|
||||
sink->wev = NULL;
|
||||
/* just leave - we don't want to restart the
|
||||
* read event!
|
||||
*/
|
||||
return;
|
||||
}
|
||||
num_written = write(wev->fd, output->data, output->numbytes);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s mrhnp:stdin:write:handler wrote %d bytes",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
num_written));
|
||||
if (num_written < 0) {
|
||||
if (EAGAIN == errno || EINTR == errno) {
|
||||
/* push this item back on the front of the list */
|
||||
opal_list_prepend(&wev->outputs, item);
|
||||
/* leave the write event running so it will call us again
|
||||
* when the fd is ready.
|
||||
*/
|
||||
wev->pending = true;
|
||||
opal_event_add(wev->ev, 0);
|
||||
goto CHECK;
|
||||
}
|
||||
/* otherwise, something bad happened so all we can do is declare an
|
||||
* error and abort
|
||||
*/
|
||||
OBJ_RELEASE(output);
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_iof_base.iof_output,
|
||||
"%s iof:mrhnp closing fd %d on write event due to negative bytes written",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), wev->fd));
|
||||
OBJ_RELEASE(wev);
|
||||
sink->wev = NULL;
|
||||
return;
|
||||
} else if (num_written < output->numbytes) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s mrhnp:stdin:write:handler incomplete write %d - adjusting data",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_written));
|
||||
/* incomplete write - adjust data to avoid duplicate output */
|
||||
memmove(output->data, &output->data[num_written], output->numbytes - num_written);
|
||||
/* push this item back on the front of the list */
|
||||
opal_list_prepend(&wev->outputs, item);
|
||||
/* leave the write event running so it will call us again
|
||||
* when the fd is ready.
|
||||
*/
|
||||
wev->pending = true;
|
||||
opal_event_add(wev->ev, 0);
|
||||
goto CHECK;
|
||||
}
|
||||
OBJ_RELEASE(output);
|
||||
}
|
||||
|
||||
CHECK:
|
||||
if (NULL != mca_iof_mr_hnp_component.stdinev &&
|
||||
!orte_abnormal_term_ordered &&
|
||||
!mca_iof_mr_hnp_component.stdinev->active) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"read event is off - checking if okay to restart"));
|
||||
/* if we have turned off the read event, check to
|
||||
* see if the output list has shrunk enough to
|
||||
* turn it back on
|
||||
*
|
||||
* RHC: Note that when multiple procs want stdin, we
|
||||
* can get into a fight between a proc turnin stdin
|
||||
* back "on" and other procs turning it "off". There
|
||||
* is no clear way to resolve this as different procs
|
||||
* may take input at different rates.
|
||||
*/
|
||||
if (opal_list_get_size(&wev->outputs) < ORTE_IOF_MAX_INPUT_BUFFERS) {
|
||||
/* restart the read */
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"restarting read event"));
|
||||
mca_iof_mr_hnp_component.stdinev->active = true;
|
||||
opal_event_add(mca_iof_mr_hnp_component.stdinev->ev, 0);
|
||||
}
|
||||
}
|
||||
}
|
64
orte/mca/iof/mr_hnp/iof_mrhnp.h
Обычный файл
64
orte/mca/iof/mr_hnp/iof_mrhnp.h
Обычный файл
@ -0,0 +1,64 @@
|
||||
/*
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef ORTE_IOF_MRHNP_H
|
||||
#define ORTE_IOF_MRHNP_H
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif /* HAVE_SYS_TYPES_H */
|
||||
#ifdef HAVE_SYS_UIO_H
|
||||
#include <sys/uio.h>
|
||||
#endif /* HAVE_SYS_UIO_H */
|
||||
#ifdef HAVE_NET_UIO_H
|
||||
#include <net/uio.h>
|
||||
#endif /* HAVE_NET_UIO_H */
|
||||
|
||||
#include "orte/mca/iof/iof.h"
|
||||
#include "orte/mca/iof/base/base.h"
|
||||
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/**
|
||||
* IOF HNP Component
|
||||
*/
|
||||
typedef struct {
|
||||
orte_iof_base_component_t super;
|
||||
opal_list_t sinks;
|
||||
opal_list_t procs;
|
||||
orte_iof_read_event_t *stdinev;
|
||||
opal_event_t stdinsig;
|
||||
char **input_files;
|
||||
opal_pointer_array_t stdin_jobs;
|
||||
} orte_iof_mrhnp_component_t;
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_iof_mrhnp_component_t mca_iof_mr_hnp_component;
|
||||
extern orte_iof_base_module_t orte_iof_mrhnp_module;
|
||||
|
||||
void orte_iof_mrhnp_recv(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata);
|
||||
|
||||
void orte_iof_mrhnp_read_local_handler(int fd, short event, void *cbdata);
|
||||
void orte_iof_mrhnp_stdin_cb(int fd, short event, void *cbdata);
|
||||
bool orte_iof_mrhnp_stdin_check(int fd);
|
||||
|
||||
int orte_iof_hnp_send_data_to_endpoint(orte_process_name_t *host,
|
||||
orte_process_name_t *target,
|
||||
orte_iof_tag_t tag,
|
||||
unsigned char *data, int numbytes);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
96
orte/mca/iof/mr_hnp/iof_mrhnp_component.c
Обычный файл
96
orte/mca/iof/mr_hnp/iof_mrhnp_component.c
Обычный файл
@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
|
||||
#include "orte/mca/iof/base/base.h"
|
||||
#include "iof_mrhnp.h"
|
||||
|
||||
/*
|
||||
* Local functions
|
||||
*/
|
||||
static int mrhnp_open(void);
|
||||
static int mrhnp_close(void);
|
||||
static int mrhnp_query(mca_base_module_t **module, int *priority);
|
||||
|
||||
/*
|
||||
* Public string showing the iof hnp component version number
|
||||
*/
|
||||
const char *mca_iof_mr_hnp_component_version_string =
|
||||
"Open MPI mr_hnp iof MCA component version " ORTE_VERSION;
|
||||
|
||||
orte_iof_mrhnp_component_t mca_iof_mr_hnp_component = {
|
||||
{
|
||||
/* First, the mca_base_component_t struct containing meta
|
||||
information about the component itself */
|
||||
|
||||
{
|
||||
ORTE_IOF_BASE_VERSION_2_0_0,
|
||||
|
||||
"mr_hnp", /* MCA component name */
|
||||
ORTE_MAJOR_VERSION, /* MCA component major version */
|
||||
ORTE_MINOR_VERSION, /* MCA component minor version */
|
||||
ORTE_RELEASE_VERSION, /* MCA component release version */
|
||||
|
||||
/* Component open, close, and query functions */
|
||||
mrhnp_open,
|
||||
mrhnp_close,
|
||||
mrhnp_query
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* component open/close/init function
|
||||
*/
|
||||
static int mrhnp_open(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int mrhnp_close(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Module query
|
||||
*/
|
||||
|
||||
static int mrhnp_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
mca_iof_mr_hnp_component.input_files = NULL;
|
||||
|
||||
/* select if we are HNP and map-reduce mode is operational */
|
||||
if (ORTE_PROC_IS_HNP && orte_map_reduce) {
|
||||
*priority = 1000;
|
||||
*module = (mca_base_module_t *) &orte_iof_mrhnp_module;
|
||||
if (NULL != orte_iof_base.input_files) {
|
||||
mca_iof_mr_hnp_component.input_files = opal_argv_split(orte_iof_base.input_files, ',');
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
*priority = -1;
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
}
|
383
orte/mca/iof/mr_hnp/iof_mrhnp_read.c
Обычный файл
383
orte/mca/iof/mr_hnp/iof_mrhnp_read.c
Обычный файл
@ -0,0 +1,383 @@
|
||||
/*
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include <errno.h>
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif /* HAVE_UNISTD_H */
|
||||
#ifdef HAVE_STRING_H
|
||||
#include <string.h>
|
||||
#endif /* HAVE_STRING_H */
|
||||
|
||||
#include "opal/dss/dss.h"
|
||||
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/odls/odls_types.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/mca/state/state.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "orte/mca/iof/iof.h"
|
||||
#include "orte/mca/iof/base/base.h"
|
||||
|
||||
#include "iof_mrhnp.h"
|
||||
|
||||
static void send_data(orte_process_name_t *name, orte_iof_tag_t tag,
|
||||
orte_jobid_t jobid,
|
||||
unsigned char *data, int32_t nbytes);
|
||||
|
||||
static void restart_stdin(int fd, short event, void *cbdata)
|
||||
{
|
||||
orte_timer_t *tm = (orte_timer_t*)cbdata;
|
||||
|
||||
opal_output(0, "RESTART STDIN");
|
||||
if (NULL != mca_iof_mr_hnp_component.stdinev &&
|
||||
!orte_job_term_ordered &&
|
||||
!mca_iof_mr_hnp_component.stdinev->active) {
|
||||
mca_iof_mr_hnp_component.stdinev->active = true;
|
||||
opal_event_add(mca_iof_mr_hnp_component.stdinev->ev, 0);
|
||||
}
|
||||
|
||||
/* if this was a timer callback, then release the timer */
|
||||
if (NULL != tm) {
|
||||
OBJ_RELEASE(tm);
|
||||
}
|
||||
}
|
||||
|
||||
/* return true if we should read stdin from fd, false otherwise */
|
||||
bool orte_iof_mrhnp_stdin_check(int fd)
|
||||
{
|
||||
#if !defined(__WINDOWS__) && defined(HAVE_TCGETPGRP)
|
||||
if( isatty(fd) && (getpgrp() != tcgetpgrp(fd)) ) {
|
||||
return false;
|
||||
}
|
||||
#elif defined(__WINDOWS__)
|
||||
return false;
|
||||
#endif /* !defined(__WINDOWS__) */
|
||||
return true;
|
||||
}
|
||||
|
||||
void orte_iof_mrhnp_stdin_cb(int fd, short event, void *cbdata)
|
||||
{
|
||||
bool should_process = orte_iof_mrhnp_stdin_check(0);
|
||||
|
||||
if (should_process) {
|
||||
mca_iof_mr_hnp_component.stdinev->active = true;
|
||||
opal_event_add(mca_iof_mr_hnp_component.stdinev->ev, 0);
|
||||
} else {
|
||||
opal_event_del(mca_iof_mr_hnp_component.stdinev->ev);
|
||||
mca_iof_mr_hnp_component.stdinev->active = false;
|
||||
}
|
||||
}
|
||||
|
||||
/* this is the read handler for my own child procs and stdin
|
||||
*/
|
||||
void orte_iof_mrhnp_read_local_handler(int fd, short event, void *cbdata)
|
||||
{
|
||||
orte_iof_read_event_t *rev = (orte_iof_read_event_t*)cbdata;
|
||||
unsigned char data[ORTE_IOF_BASE_MSG_MAX];
|
||||
int32_t numbytes;
|
||||
opal_list_item_t *item;
|
||||
orte_iof_proc_t *proct;
|
||||
int i, j;
|
||||
orte_ns_cmp_bitmask_t mask;
|
||||
orte_job_t *jdata;
|
||||
orte_iof_job_t *iofjob;
|
||||
orte_node_t *node;
|
||||
orte_proc_t *daemon;
|
||||
orte_job_map_t *map;
|
||||
bool write_out=false;
|
||||
|
||||
/* read up to the fragment size */
|
||||
#if !defined(__WINDOWS__)
|
||||
numbytes = read(fd, data, sizeof(data));
|
||||
#else
|
||||
{
|
||||
DWORD readed;
|
||||
HANDLE handle = (HANDLE)_get_osfhandle(fd);
|
||||
ReadFile(handle, data, sizeof(data), &readed, NULL);
|
||||
numbytes = (int)readed;
|
||||
}
|
||||
#endif /* !defined(__WINDOWS__) */
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s iof:mrhnp:read handler read %d bytes from %s:%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
|
||||
ORTE_NAME_PRINT(&rev->name), fd));
|
||||
|
||||
if (numbytes < 0) {
|
||||
/* either we have a connection error or it was a non-blocking read */
|
||||
|
||||
/* non-blocking, retry */
|
||||
if (EAGAIN == errno || EINTR == errno) {
|
||||
opal_event_add(rev->ev, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s iof:mrhnp:read handler %s Error on connection:%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&rev->name), fd));
|
||||
/* Un-recoverable error. Allow the code to flow as usual in order to
|
||||
* to send the zero bytes message up the stream, and then close the
|
||||
* file descriptor and delete the event.
|
||||
*/
|
||||
numbytes = 0;
|
||||
}
|
||||
|
||||
/* if job termination has been ordered, just ignore the
|
||||
* data and delete the stdin read event, if that is what fired
|
||||
*/
|
||||
if (orte_job_term_ordered) {
|
||||
if (ORTE_IOF_STDIN & rev->tag) {
|
||||
OBJ_RELEASE(mca_iof_mr_hnp_component.stdinev);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (ORTE_IOF_STDIN & rev->tag) {
|
||||
/* The event has fired, so it's no longer active until we
|
||||
* re-add it
|
||||
*/
|
||||
mca_iof_mr_hnp_component.stdinev->active = false;
|
||||
/* if this was read from my stdin, I need to send this input to all
|
||||
* daemons who host mapper procs
|
||||
*/
|
||||
for (j=0; j < mca_iof_mr_hnp_component.stdin_jobs.size; j++) {
|
||||
if (NULL == (iofjob = (orte_iof_job_t*)opal_pointer_array_get_item(&mca_iof_mr_hnp_component.stdin_jobs, j))) {
|
||||
continue;
|
||||
}
|
||||
jdata = iofjob->jdata;
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s read %d bytes from stdin - writing to job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
|
||||
ORTE_JOBID_PRINT(jdata->jobid)));
|
||||
map = jdata->map;
|
||||
for (i=0; i < map->nodes->size; i++) {
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
|
||||
continue;
|
||||
}
|
||||
daemon = node->daemon;
|
||||
|
||||
if (daemon->name.vpid == ORTE_PROC_MY_NAME->vpid) {
|
||||
/* if it is me, then send the bytes down the stdin pipe
|
||||
* for every local proc (they are all on my proct list) - we even send 0 byte events
|
||||
* down the pipe so it forces out any preceding data before
|
||||
* closing the output stream. We add a 0 byte message if
|
||||
* numbytes < sizeof(data) as this means the chunk we read
|
||||
* was the end of the file.
|
||||
*/
|
||||
for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs);
|
||||
item != opal_list_get_end(&mca_iof_mr_hnp_component.procs);
|
||||
item = opal_list_get_next(item)) {
|
||||
proct = (orte_iof_proc_t*)item;
|
||||
if (proct->name.jobid == jdata->jobid) {
|
||||
if (NULL == proct->sink) {
|
||||
opal_output(0, "NULL SINK FOR PROC %s", ORTE_NAME_PRINT(&proct->name));
|
||||
continue;
|
||||
}
|
||||
if (ORTE_IOF_MAX_INPUT_BUFFERS < orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, numbytes, proct->sink->wev)) {
|
||||
/* getting too backed up - stop the read event for now if it is still active */
|
||||
if (mca_iof_mr_hnp_component.stdinev->active) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"buffer backed up - holding"));
|
||||
mca_iof_mr_hnp_component.stdinev->active = false;
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (0 < numbytes && numbytes < (int)sizeof(data)) {
|
||||
/* need to write a 0-byte event to clear the stream and close it */
|
||||
orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, 0, proct->sink->wev);
|
||||
proct->sink = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s sending %d bytes from stdin to daemon %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
|
||||
ORTE_NAME_PRINT(&daemon->name)));
|
||||
|
||||
/* send the data to the daemon so it can
|
||||
* write it to all local procs from this job.
|
||||
* If the connection closed,
|
||||
* numbytes will be zero so zero bytes will be
|
||||
* sent - this will tell the daemon to close
|
||||
* the fd for stdin to that proc
|
||||
*/
|
||||
send_data(&daemon->name, ORTE_IOF_STDIN, jdata->jobid, data, numbytes);
|
||||
if (0 < numbytes && numbytes < (int)sizeof(data)) {
|
||||
/* need to send a 0-byte message to clear the stream and close it */
|
||||
send_data(&daemon->name, ORTE_IOF_STDIN, jdata->jobid, data, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/* if num_bytes was zero, then we need to terminate the event */
|
||||
if (0 == numbytes || numbytes < (int)sizeof(data)) {
|
||||
/* this will also close our stdin file descriptor */
|
||||
if (NULL != mca_iof_mr_hnp_component.stdinev) {
|
||||
OBJ_RELEASE(mca_iof_mr_hnp_component.stdinev);
|
||||
}
|
||||
} else {
|
||||
/* if we are looking at a tty, then we just go ahead and restart the
|
||||
* read event assuming we are not backgrounded
|
||||
*/
|
||||
if (orte_iof_mrhnp_stdin_check(fd)) {
|
||||
restart_stdin(fd, 0, NULL);
|
||||
} else {
|
||||
/* delay for awhile and then restart */
|
||||
ORTE_TIMER_EVENT(0, 10000, restart_stdin, ORTE_INFO_PRI);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (ORTE_IOF_STDOUT & rev->tag && 0 < numbytes) {
|
||||
/* see if we need to forward this output */
|
||||
jdata = orte_get_job_data_object(rev->name.jobid);
|
||||
if (ORTE_JOBID_INVALID == jdata->stdout_target) {
|
||||
/* end of the chain - just output the info */
|
||||
write_out = true;
|
||||
goto PROCESS;
|
||||
}
|
||||
/* it goes to the next job in the chain */
|
||||
jdata = orte_get_job_data_object(jdata->stdout_target);
|
||||
map = jdata->map;
|
||||
for (i=0; i < map->nodes->size; i++) {
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
|
||||
continue;
|
||||
}
|
||||
daemon = node->daemon;
|
||||
|
||||
if (daemon->name.vpid == ORTE_PROC_MY_NAME->vpid) {
|
||||
/* if it is me, then send the bytes down the stdin pipe
|
||||
* for every local proc (they are all on my proct list)
|
||||
*/
|
||||
for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs);
|
||||
item != opal_list_get_end(&mca_iof_mr_hnp_component.procs);
|
||||
item = opal_list_get_next(item)) {
|
||||
proct = (orte_iof_proc_t*)item;
|
||||
if (proct->name.jobid == jdata->jobid) {
|
||||
if (NULL == proct->sink) {
|
||||
opal_output(0, "NULL SINK FOR PROC %s", ORTE_NAME_PRINT(&proct->name));
|
||||
continue;
|
||||
}
|
||||
orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, numbytes, proct->sink->wev);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s sending %d bytes from stdout of %s to daemon %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
|
||||
ORTE_NAME_PRINT(&rev->name),
|
||||
ORTE_NAME_PRINT(&daemon->name)));
|
||||
|
||||
/* send the data to the daemon so it can
|
||||
* write it to all local procs from this job
|
||||
*/
|
||||
send_data(&daemon->name, ORTE_IOF_STDIN, jdata->jobid, data, numbytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PROCESS:
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s read %d bytes from %s of %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
|
||||
(ORTE_IOF_STDOUT & rev->tag) ? "stdout" : ((ORTE_IOF_STDERR & rev->tag) ? "stderr" : "stddiag"),
|
||||
ORTE_NAME_PRINT(&rev->name)));
|
||||
|
||||
if (0 == numbytes) {
|
||||
/* if we read 0 bytes from the stdout/err/diag, find this proc
|
||||
* on our list and
|
||||
* release the appropriate event. This will delete the
|
||||
* read event and close the file descriptor
|
||||
*/
|
||||
for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs);
|
||||
item != opal_list_get_end(&mca_iof_mr_hnp_component.procs);
|
||||
item = opal_list_get_next(item)) {
|
||||
proct = (orte_iof_proc_t*)item;
|
||||
mask = ORTE_NS_CMP_ALL;
|
||||
if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proct->name, &rev->name)) {
|
||||
/* found it - release corresponding event. This deletes
|
||||
* the read event and closes the file descriptor
|
||||
*/
|
||||
if (rev->tag & ORTE_IOF_STDOUT) {
|
||||
OBJ_RELEASE(proct->revstdout);
|
||||
} else if (rev->tag & ORTE_IOF_STDERR) {
|
||||
OBJ_RELEASE(proct->revstderr);
|
||||
} else if (rev->tag & ORTE_IOF_STDDIAG) {
|
||||
OBJ_RELEASE(proct->revstddiag);
|
||||
}
|
||||
/* check to see if they are all done */
|
||||
if (NULL == proct->revstdout &&
|
||||
NULL == proct->revstderr &&
|
||||
NULL == proct->revstddiag) {
|
||||
/* this proc's iof is complete */
|
||||
opal_list_remove_item(&mca_iof_mr_hnp_component.procs, item);
|
||||
ORTE_ACTIVATE_PROC_STATE(&proct->name, ORTE_PROC_STATE_IOF_COMPLETE);
|
||||
OBJ_RELEASE(proct);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return;
|
||||
} else {
|
||||
/* output this to our local output */
|
||||
if (ORTE_IOF_STDOUT & rev->tag) {
|
||||
if (write_out) {
|
||||
orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, orte_iof_base.iof_write_stdout->wev);
|
||||
}
|
||||
} else {
|
||||
orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, orte_iof_base.iof_write_stderr->wev);
|
||||
}
|
||||
}
|
||||
|
||||
/* re-add the event */
|
||||
opal_event_add(rev->ev, 0);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static void send_data(orte_process_name_t *name, orte_iof_tag_t tag,
|
||||
orte_jobid_t jobid,
|
||||
unsigned char *data, int32_t nbytes)
|
||||
{
|
||||
opal_buffer_t *buf;
|
||||
int rc;
|
||||
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &tag, 1, ORTE_IOF_TAG))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &jobid, 1, ORTE_JOBID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, data, nbytes, OPAL_BYTE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(name, buf, ORTE_RML_TAG_IOF_PROXY,
|
||||
0, orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(buf);
|
||||
}
|
||||
}
|
106
orte/mca/iof/mr_hnp/iof_mrhnp_receive.c
Обычный файл
106
orte/mca/iof/mr_hnp/iof_mrhnp_receive.c
Обычный файл
@ -0,0 +1,106 @@
|
||||
/*
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include <errno.h>
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif /* HAVE_UNISTD_H */
|
||||
#ifdef HAVE_STRING_H
|
||||
#include <string.h>
|
||||
#endif /* HAVE_STRING_H */
|
||||
#ifdef HAVE_FCNTL_H
|
||||
#include <fcntl.h>
|
||||
#else
|
||||
#ifdef HAVE_SYS_FCNTL_H
|
||||
#include <sys/fcntl.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "orte/mca/iof/iof.h"
|
||||
#include "orte/mca/iof/base/base.h"
|
||||
|
||||
#include "iof_mrhnp.h"
|
||||
|
||||
|
||||
void orte_iof_mrhnp_recv(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata)
|
||||
{
|
||||
orte_process_name_t origin;
|
||||
unsigned char data[ORTE_IOF_BASE_MSG_MAX];
|
||||
orte_iof_tag_t stream;
|
||||
int32_t count, numbytes;
|
||||
int rc;
|
||||
|
||||
|
||||
/* unpack the stream first as this may be flow control info */
|
||||
count = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &stream, &count, ORTE_IOF_TAG))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEAN_RETURN;
|
||||
}
|
||||
|
||||
if (ORTE_IOF_XON & stream) {
|
||||
/* re-start the stdin read event */
|
||||
if (NULL != mca_iof_mr_hnp_component.stdinev &&
|
||||
!orte_job_term_ordered &&
|
||||
!mca_iof_mr_hnp_component.stdinev->active) {
|
||||
mca_iof_mr_hnp_component.stdinev->active = true;
|
||||
opal_event_add(mca_iof_mr_hnp_component.stdinev->ev, 0);
|
||||
}
|
||||
goto CLEAN_RETURN;
|
||||
} else if (ORTE_IOF_XOFF & stream) {
|
||||
/* stop the stdin read event */
|
||||
if (NULL != mca_iof_mr_hnp_component.stdinev &&
|
||||
!mca_iof_mr_hnp_component.stdinev->active) {
|
||||
opal_event_del(mca_iof_mr_hnp_component.stdinev->ev);
|
||||
mca_iof_mr_hnp_component.stdinev->active = false;
|
||||
}
|
||||
goto CLEAN_RETURN;
|
||||
}
|
||||
|
||||
/* get name of the process whose io we are discussing */
|
||||
count = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &origin, &count, ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEAN_RETURN;
|
||||
}
|
||||
|
||||
/* this must have come from a daemon forwarding output - unpack the data */
|
||||
numbytes=ORTE_IOF_BASE_MSG_MAX;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, data, &numbytes, OPAL_BYTE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEAN_RETURN;
|
||||
}
|
||||
/* numbytes will contain the actual #bytes that were sent */
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s unpacked %d bytes from remote proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
|
||||
ORTE_NAME_PRINT(&origin)));
|
||||
|
||||
/* output this to our local output */
|
||||
if (ORTE_IOF_STDOUT & stream || orte_xml_output) {
|
||||
orte_iof_base_write_output(&origin, stream, data, numbytes, orte_iof_base.iof_write_stdout->wev);
|
||||
} else {
|
||||
orte_iof_base_write_output(&origin, stream, data, numbytes, orte_iof_base.iof_write_stderr->wev);
|
||||
}
|
||||
|
||||
CLEAN_RETURN:
|
||||
return;
|
||||
}
|
40
orte/mca/iof/mr_orted/Makefile.am
Обычный файл
40
orte/mca/iof/mr_orted/Makefile.am
Обычный файл
@ -0,0 +1,40 @@
|
||||
#
|
||||
# Copyright (c) 2012 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
EXTRA_DIST = .windows
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_orte_iof_mr_orted_DSO
|
||||
component_noinst =
|
||||
component_install = mca_iof_mr_orted.la
|
||||
else
|
||||
component_noinst = libmca_iof_mr_orted.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mr_orted_SOURCES = \
|
||||
iof_mrorted.c \
|
||||
iof_mrorted.h \
|
||||
iof_mrorted_component.c \
|
||||
iof_mrorted_read.c \
|
||||
iof_mrorted_receive.c
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_iof_mr_orted_la_SOURCES = $(mr_orted_SOURCES)
|
||||
mca_iof_mr_orted_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_iof_mr_orted_la_SOURCES = $(mr_orted_SOURCES)
|
||||
libmca_iof_mr_orted_la_LIBADD =
|
||||
libmca_iof_mr_orted_la_LDFLAGS = -module -avoid-version
|
19
orte/mca/iof/mr_orted/configure.m4
Обычный файл
19
orte/mca/iof/mr_orted/configure.m4
Обычный файл
@ -0,0 +1,19 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2012 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
# MCA_iof_mr_orted_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_orte_iof_mr_orted_CONFIG], [
|
||||
AC_CONFIG_FILES([orte/mca/iof/mr_orted/Makefile])
|
||||
|
||||
AS_IF([test "$orte_without_full_support" = 0],
|
||||
[$1],
|
||||
[$2])
|
||||
])
|
464
orte/mca/iof/mr_orted/iof_mrorted.c
Обычный файл
464
orte/mca/iof/mr_orted/iof_mrorted.c
Обычный файл
@ -0,0 +1,464 @@
|
||||
/*
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include <errno.h>
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif /* HAVE_UNISTD_H */
|
||||
#ifdef HAVE_STRING_H
|
||||
#include <string.h>
|
||||
#endif /* HAVE_STRING_H */
|
||||
|
||||
#ifdef HAVE_FCNTL_H
|
||||
#include <fcntl.h>
|
||||
#else
|
||||
#ifdef HAVE_SYS_FCNTL_H
|
||||
#include <sys/fcntl.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/odls/odls_types.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
|
||||
#include "orte/mca/iof/iof.h"
|
||||
#include "orte/mca/iof/base/base.h"
|
||||
|
||||
#include "iof_mrorted.h"
|
||||
|
||||
|
||||
/* LOCAL FUNCTIONS */
|
||||
static void stdin_write_handler(int fd, short event, void *cbdata);
|
||||
|
||||
|
||||
/* API FUNCTIONS */
|
||||
static int init(void);
|
||||
|
||||
static int mrorted_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, int fd);
|
||||
|
||||
static int mrorted_pull(const orte_process_name_t* src_name,
|
||||
orte_iof_tag_t src_tag,
|
||||
int fd);
|
||||
|
||||
static int mrorted_close(const orte_process_name_t* peer,
|
||||
orte_iof_tag_t source_tag);
|
||||
|
||||
static void mrorted_complete(const orte_job_t *jdata);
|
||||
|
||||
static int finalize(void);
|
||||
|
||||
static int mrorted_ft_event(int state);
|
||||
|
||||
/* The API's in this module are solely used to support LOCAL
|
||||
* procs - i.e., procs that are co-located to the daemon. Output
|
||||
* from local procs is automatically sent to the HNP for output
|
||||
* and possible forwarding to other requestors. The HNP automatically
|
||||
* determines and wires up the stdin configuration, so we don't
|
||||
* have to do anything here.
|
||||
*/
|
||||
|
||||
orte_iof_base_module_t orte_iof_mrorted_module = {
|
||||
init,
|
||||
mrorted_push,
|
||||
mrorted_pull,
|
||||
mrorted_close,
|
||||
mrorted_complete,
|
||||
finalize,
|
||||
mrorted_ft_event
|
||||
};
|
||||
|
||||
static int init(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
/* post a non-blocking RML receive to get messages
|
||||
from the HNP IOF component */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
|
||||
ORTE_RML_TAG_IOF_PROXY,
|
||||
ORTE_RML_PERSISTENT,
|
||||
orte_iof_mrorted_recv,
|
||||
NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
|
||||
}
|
||||
|
||||
/* setup the local global variables */
|
||||
OBJ_CONSTRUCT(&mca_iof_mr_orted_component.sinks, opal_list_t);
|
||||
OBJ_CONSTRUCT(&mca_iof_mr_orted_component.procs, opal_list_t);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Push data from the specified file descriptor
|
||||
* to the HNP
|
||||
*/
|
||||
|
||||
static int mrorted_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, int fd)
|
||||
{
|
||||
int flags;
|
||||
opal_list_item_t *item;
|
||||
orte_iof_proc_t *proct;
|
||||
orte_iof_sink_t *sink;
|
||||
char *outfile;
|
||||
int fdout;
|
||||
orte_job_t *jobdat=NULL;
|
||||
int np, numdigs;
|
||||
orte_ns_cmp_bitmask_t mask;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s iof:mrorted pushing fd %d for process %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
fd, ORTE_NAME_PRINT(dst_name)));
|
||||
|
||||
/* set the file descriptor to non-blocking - do this before we setup
|
||||
* and activate the read event in case it fires right away
|
||||
*/
|
||||
if ((flags = fcntl(fd, F_GETFL, 0)) < 0) {
|
||||
opal_output(orte_iof_base.iof_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n",
|
||||
__FILE__, __LINE__, errno);
|
||||
} else {
|
||||
flags |= O_NONBLOCK;
|
||||
fcntl(fd, F_SETFL, flags);
|
||||
}
|
||||
|
||||
/* do we already have this process in our list? */
|
||||
for (item = opal_list_get_first(&mca_iof_mr_orted_component.procs);
|
||||
item != opal_list_get_end(&mca_iof_mr_orted_component.procs);
|
||||
item = opal_list_get_next(item)) {
|
||||
proct = (orte_iof_proc_t*)item;
|
||||
mask = ORTE_NS_CMP_ALL;
|
||||
if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proct->name, dst_name)) {
|
||||
/* found it */
|
||||
goto SETUP;
|
||||
}
|
||||
}
|
||||
/* if we get here, then we don't yet have this proc in our list */
|
||||
proct = OBJ_NEW(orte_iof_proc_t);
|
||||
proct->name.jobid = dst_name->jobid;
|
||||
proct->name.vpid = dst_name->vpid;
|
||||
opal_list_append(&mca_iof_mr_orted_component.procs, &proct->super);
|
||||
/* see if we are to output to a file */
|
||||
if (NULL != orte_output_filename) {
|
||||
/* get the local jobdata for this proc */
|
||||
if (NULL == (jobdat = orte_get_job_data_object(proct->name.jobid))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
np = jobdat->num_procs / 10;
|
||||
/* determine the number of digits required for max vpid */
|
||||
numdigs = 1;
|
||||
while (np > 0) {
|
||||
numdigs++;
|
||||
np = np / 10;
|
||||
}
|
||||
/* construct the filename */
|
||||
asprintf(&outfile, "%s.%d.%0*lu", orte_output_filename,
|
||||
(int)ORTE_LOCAL_JOBID(proct->name.jobid),
|
||||
numdigs, (unsigned long)proct->name.vpid);
|
||||
/* create the file */
|
||||
fdout = open(outfile, O_CREAT|O_RDWR|O_TRUNC, 0644);
|
||||
free(outfile);
|
||||
if (fdout < 0) {
|
||||
/* couldn't be opened */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE);
|
||||
return ORTE_ERR_FILE_OPEN_FAILURE;
|
||||
}
|
||||
/* define a sink to that file descriptor */
|
||||
ORTE_IOF_SINK_DEFINE(&sink, dst_name, fdout, ORTE_IOF_STDOUTALL,
|
||||
orte_iof_base_write_handler,
|
||||
&mca_iof_mr_orted_component.sinks);
|
||||
}
|
||||
|
||||
SETUP:
|
||||
/* define a read event but don't activate it */
|
||||
if (src_tag & ORTE_IOF_STDOUT) {
|
||||
ORTE_IOF_READ_EVENT(&proct->revstdout, dst_name, fd, ORTE_IOF_STDOUT,
|
||||
orte_iof_mrorted_read_handler, false);
|
||||
} else if (src_tag & ORTE_IOF_STDERR) {
|
||||
ORTE_IOF_READ_EVENT(&proct->revstderr, dst_name, fd, ORTE_IOF_STDERR,
|
||||
orte_iof_mrorted_read_handler, false);
|
||||
} else if (src_tag & ORTE_IOF_STDDIAG) {
|
||||
ORTE_IOF_READ_EVENT(&proct->revstddiag, dst_name, fd, ORTE_IOF_STDDIAG,
|
||||
orte_iof_mrorted_read_handler, false);
|
||||
}
|
||||
/* if -all- of the readevents for this proc have been defined, then
|
||||
* activate them. Otherwise, we can think that the proc is complete
|
||||
* because one of the readevents fires -prior- to all of them having
|
||||
* been defined!
|
||||
*/
|
||||
if (NULL != proct->revstdout && NULL != proct->revstderr && NULL != proct->revstddiag) {
|
||||
proct->revstdout->active = true;
|
||||
opal_event_add(proct->revstdout->ev, 0);
|
||||
proct->revstderr->active = true;
|
||||
opal_event_add(proct->revstderr->ev, 0);
|
||||
proct->revstddiag->active = true;
|
||||
opal_event_add(proct->revstddiag->ev, 0);
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Pull for a daemon tells
|
||||
* us that any info we receive from someone that is targeted
|
||||
* for stdin of the specified process should be fed down the
|
||||
* indicated file descriptor. Thus, all we need to do here
|
||||
* is define a local endpoint so we know where to feed anything
|
||||
* that comes to us
|
||||
*/
|
||||
|
||||
static int mrorted_pull(const orte_process_name_t* dst_name,
|
||||
orte_iof_tag_t src_tag,
|
||||
int fd)
|
||||
{
|
||||
orte_iof_sink_t *sink;
|
||||
int flags;
|
||||
orte_iof_proc_t *proct, *ptr;
|
||||
opal_list_item_t *item;
|
||||
|
||||
/* this is a local call - only stdin is supported */
|
||||
if (ORTE_IOF_STDIN != src_tag) {
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s iof:mrorted pulling fd %d for process %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
fd, ORTE_NAME_PRINT(dst_name)));
|
||||
|
||||
/* set the file descriptor to non-blocking - do this before we setup
|
||||
* the sink in case it fires right away
|
||||
*/
|
||||
if((flags = fcntl(fd, F_GETFL, 0)) < 0) {
|
||||
opal_output(orte_iof_base.iof_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n",
|
||||
__FILE__, __LINE__, errno);
|
||||
} else {
|
||||
flags |= O_NONBLOCK;
|
||||
fcntl(fd, F_SETFL, flags);
|
||||
}
|
||||
|
||||
ORTE_IOF_SINK_DEFINE(&sink, dst_name, fd, ORTE_IOF_STDIN,
|
||||
stdin_write_handler, NULL);
|
||||
|
||||
sink->daemon.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
sink->daemon.vpid = ORTE_PROC_MY_NAME->vpid;
|
||||
|
||||
/* find the proct for this proc */
|
||||
proct = NULL;
|
||||
for (item = opal_list_get_first(&mca_iof_mr_orted_component.procs);
|
||||
item != opal_list_get_end(&mca_iof_mr_orted_component.procs);
|
||||
item = opal_list_get_next(item)) {
|
||||
ptr = (orte_iof_proc_t*)item;
|
||||
if (ptr->name.jobid == dst_name->jobid &&
|
||||
ptr->name.vpid == dst_name->vpid) {
|
||||
proct = ptr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (NULL == proct) {
|
||||
/* we don't yet have this proc in our list */
|
||||
proct = OBJ_NEW(orte_iof_proc_t);
|
||||
proct->name.jobid = dst_name->jobid;
|
||||
proct->name.vpid = dst_name->vpid;
|
||||
opal_list_append(&mca_iof_mr_orted_component.procs, &proct->super);
|
||||
}
|
||||
proct->sink = sink;
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* One of our local procs wants us to close the specifed
|
||||
* stream(s), thus terminating any potential io to/from it.
|
||||
* For the orted, this just means closing the local fd
|
||||
*/
|
||||
static int mrorted_close(const orte_process_name_t* peer,
|
||||
orte_iof_tag_t source_tag)
|
||||
{
|
||||
opal_list_item_t *item, *next_item;
|
||||
orte_iof_sink_t* sink;
|
||||
orte_ns_cmp_bitmask_t mask;
|
||||
|
||||
for(item = opal_list_get_first(&mca_iof_mr_orted_component.sinks);
|
||||
item != opal_list_get_end(&mca_iof_mr_orted_component.sinks);
|
||||
item = next_item ) {
|
||||
sink = (orte_iof_sink_t*)item;
|
||||
next_item = opal_list_get_next(item);
|
||||
|
||||
mask = ORTE_NS_CMP_ALL;
|
||||
|
||||
if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &sink->name, peer) &&
|
||||
(source_tag & sink->tag)) {
|
||||
|
||||
/* No need to delete the event or close the file
|
||||
* descriptor - the destructor will automatically
|
||||
* do it for us.
|
||||
*/
|
||||
opal_list_remove_item(&mca_iof_mr_orted_component.sinks, item);
|
||||
OBJ_RELEASE(item);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static void mrorted_complete(const orte_job_t *jdata)
|
||||
{
|
||||
orte_iof_proc_t *proct;
|
||||
unsigned char data[1];
|
||||
opal_list_item_t *item;
|
||||
|
||||
/* the job is complete - close out the stdin
|
||||
* of any procs it was feeding
|
||||
*/
|
||||
for (item = opal_list_get_first(&mca_iof_mr_orted_component.procs);
|
||||
item != opal_list_get_end(&mca_iof_mr_orted_component.procs);
|
||||
item = opal_list_get_next(item)) {
|
||||
proct = (orte_iof_proc_t*)item;
|
||||
if (proct->name.jobid == jdata->stdout_target) {
|
||||
if (NULL == proct->sink) {
|
||||
opal_output(0, "NULL SINK FOR PROC %s", ORTE_NAME_PRINT(&proct->name));
|
||||
continue;
|
||||
} else {
|
||||
/* need to write a 0-byte event to clear the stream and close it */
|
||||
orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, 0, proct->sink->wev);
|
||||
proct->sink = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static int finalize(void)
|
||||
{
|
||||
int rc;
|
||||
opal_list_item_t *item;
|
||||
|
||||
while ((item = opal_list_remove_first(&mca_iof_mr_orted_component.sinks)) != NULL) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
OBJ_DESTRUCT(&mca_iof_mr_orted_component.sinks);
|
||||
while ((item = opal_list_remove_first(&mca_iof_mr_orted_component.procs)) != NULL) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
OBJ_DESTRUCT(&mca_iof_mr_orted_component.procs);
|
||||
/* Cancel the RML receive */
|
||||
rc = orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_IOF_PROXY);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* FT event
|
||||
*/
|
||||
|
||||
static int mrorted_ft_event(int state)
|
||||
{
|
||||
return ORTE_ERR_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
static void stdin_write_handler(int fd, short event, void *cbdata)
|
||||
{
|
||||
orte_iof_sink_t *sink = (orte_iof_sink_t*)cbdata;
|
||||
orte_iof_write_event_t *wev = sink->wev;
|
||||
opal_list_item_t *item;
|
||||
orte_iof_write_output_t *output;
|
||||
int num_written;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s mrorted:stdin:write:handler writing data to %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
wev->fd));
|
||||
|
||||
wev->pending = false;
|
||||
|
||||
while (NULL != (item = opal_list_remove_first(&wev->outputs))) {
|
||||
output = (orte_iof_write_output_t*)item;
|
||||
if (0 == output->numbytes) {
|
||||
/* this indicates we are to close the fd - there is
|
||||
* nothing to write
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_iof_base.iof_output,
|
||||
"%s iof:mrorted closing fd %d on write event due to zero bytes output",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), wev->fd));
|
||||
OBJ_RELEASE(wev);
|
||||
sink->wev = NULL;
|
||||
return;
|
||||
}
|
||||
num_written = write(wev->fd, output->data, output->numbytes);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s mrorted:stdin:write:handler wrote %d bytes",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
num_written));
|
||||
if (num_written < 0) {
|
||||
if (EAGAIN == errno || EINTR == errno) {
|
||||
/* push this item back on the front of the list */
|
||||
opal_list_prepend(&wev->outputs, item);
|
||||
/* leave the write event running so it will call us again
|
||||
* when the fd is ready.
|
||||
*/
|
||||
wev->pending = true;
|
||||
opal_event_add(wev->ev, 0);
|
||||
goto CHECK;
|
||||
}
|
||||
/* otherwise, something bad happened so all we can do is declare an error */
|
||||
OBJ_RELEASE(output);
|
||||
OPAL_OUTPUT_VERBOSE((20, orte_iof_base.iof_output,
|
||||
"%s iof:mrorted closing fd %d on write event due to negative bytes written",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), wev->fd));
|
||||
OBJ_RELEASE(wev);
|
||||
sink->wev = NULL;
|
||||
return;
|
||||
} else if (num_written < output->numbytes) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s mrorted:stdin:write:handler incomplete write %d - adjusting data",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_written));
|
||||
/* incomplete write - adjust data to avoid duplicate output */
|
||||
memmove(output->data, &output->data[num_written], output->numbytes - num_written);
|
||||
/* push this item back on the front of the list */
|
||||
opal_list_prepend(&wev->outputs, item);
|
||||
/* leave the write event running so it will call us again
|
||||
* when the fd is ready.
|
||||
*/
|
||||
wev->pending = true;
|
||||
opal_event_add(wev->ev, 0);
|
||||
goto CHECK;
|
||||
}
|
||||
OBJ_RELEASE(output);
|
||||
}
|
||||
|
||||
CHECK:
|
||||
if (sink->xoff) {
|
||||
/* if we have told the HNP to stop reading stdin, see if
|
||||
* the proc has absorbed enough to justify restart
|
||||
*
|
||||
* RHC: Note that when multiple procs want stdin, we
|
||||
* can get into a fight between a proc turnin stdin
|
||||
* back "on" and other procs turning it "off". There
|
||||
* is no clear way to resolve this as different procs
|
||||
* may take input at different rates.
|
||||
*/
|
||||
if (opal_list_get_size(&wev->outputs) < ORTE_IOF_MAX_INPUT_BUFFERS) {
|
||||
/* restart the read */
|
||||
sink->xoff = false;
|
||||
orte_iof_mrorted_send_xonxoff(&sink->name, ORTE_IOF_XON);
|
||||
}
|
||||
}
|
||||
}
|
45
orte/mca/iof/mr_orted/iof_mrorted.h
Обычный файл
45
orte/mca/iof/mr_orted/iof_mrorted.h
Обычный файл
@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
#ifndef ORTE_IOF_MR_ORTED_H
|
||||
#define ORTE_IOF_MR_ORTED_H
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
|
||||
#include "orte/mca/rml/rml_types.h"
|
||||
|
||||
#include "orte/mca/iof/iof.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/**
|
||||
* IOF MR_ORTED Component
|
||||
*/
|
||||
typedef struct {
|
||||
orte_iof_base_component_t super;
|
||||
opal_list_t sinks;
|
||||
opal_list_t procs;
|
||||
} orte_iof_mrorted_component_t;
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_iof_mrorted_component_t mca_iof_mr_orted_component;
|
||||
extern orte_iof_base_module_t orte_iof_mrorted_module;
|
||||
|
||||
void orte_iof_mrorted_recv(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata);
|
||||
|
||||
void orte_iof_mrorted_read_handler(int fd, short event, void *data);
|
||||
void orte_iof_mrorted_send_xonxoff(orte_process_name_t *name, orte_iof_tag_t tag);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
85
orte/mca/iof/mr_orted/iof_mrorted_component.c
Обычный файл
85
orte/mca/iof/mr_orted/iof_mrorted_component.c
Обычный файл
@ -0,0 +1,85 @@
|
||||
/*
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
|
||||
#include "iof_mrorted.h"
|
||||
|
||||
/*
|
||||
* Local functions
|
||||
*/
|
||||
static int mr_orted_open(void);
|
||||
static int mr_orted_close(void);
|
||||
static int mr_orted_query(mca_base_module_t **module, int *priority);
|
||||
|
||||
|
||||
/*
|
||||
* Public string showing the iof mr_orted component version number
|
||||
*/
|
||||
const char *mca_iof_mr_orted_component_version_string =
|
||||
"Open MPI mr_orted iof MCA component version " ORTE_VERSION;
|
||||
|
||||
|
||||
orte_iof_mrorted_component_t mca_iof_mr_orted_component = {
|
||||
{
|
||||
{
|
||||
ORTE_IOF_BASE_VERSION_2_0_0,
|
||||
|
||||
"mr_orted", /* MCA component name */
|
||||
ORTE_MAJOR_VERSION, /* MCA component major version */
|
||||
ORTE_MINOR_VERSION, /* MCA component minor version */
|
||||
ORTE_RELEASE_VERSION, /* MCA component release version */
|
||||
|
||||
/* Component open, close, and query functions */
|
||||
mr_orted_open,
|
||||
mr_orted_close,
|
||||
mr_orted_query
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* component open/close/init function
|
||||
*/
|
||||
static int mr_orted_open(void)
|
||||
{
|
||||
/* Nothing to do */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int mr_orted_close(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int mr_orted_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
if (ORTE_PROC_IS_DAEMON && orte_map_reduce) {
|
||||
*priority = 1000;
|
||||
*module = (mca_base_module_t *) &orte_iof_mrorted_module;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
*priority = -1;
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
281
orte/mca/iof/mr_orted/iof_mrorted_read.c
Обычный файл
281
orte/mca/iof/mr_orted/iof_mrorted_read.c
Обычный файл
@ -0,0 +1,281 @@
|
||||
/*
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include <errno.h>
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif /* HAVE_UNISTD_H */
|
||||
#ifdef HAVE_STRING_H
|
||||
#include <string.h>
|
||||
#endif /* HAVE_STRING_H */
|
||||
|
||||
#include "opal/dss/dss.h"
|
||||
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/odls/odls_types.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/mca/state/state.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "orte/mca/iof/iof.h"
|
||||
#include "orte/mca/iof/base/base.h"
|
||||
|
||||
#include "iof_mrorted.h"
|
||||
|
||||
static void send_data(orte_process_name_t *name, orte_iof_tag_t tag,
|
||||
orte_jobid_t jobid,
|
||||
unsigned char *data, int32_t nbytes);
|
||||
|
||||
void orte_iof_mrorted_read_handler(int fd, short event, void *cbdata)
|
||||
{
|
||||
orte_iof_read_event_t *rev = (orte_iof_read_event_t*)cbdata;
|
||||
unsigned char data[ORTE_IOF_BASE_MSG_MAX];
|
||||
opal_buffer_t *buf=NULL;
|
||||
int rc;
|
||||
int32_t numbytes;
|
||||
opal_list_item_t *item;
|
||||
orte_iof_proc_t *proct;
|
||||
orte_ns_cmp_bitmask_t mask;
|
||||
orte_job_t *jdata;
|
||||
orte_job_map_t *map;
|
||||
int i;
|
||||
bool write_out=false;
|
||||
orte_node_t *node;
|
||||
orte_proc_t *daemon;
|
||||
|
||||
/* read up to the fragment size */
|
||||
#if !defined(__WINDOWS__)
|
||||
numbytes = read(fd, data, sizeof(data));
|
||||
#else
|
||||
{
|
||||
DWORD readed;
|
||||
HANDLE handle = (HANDLE)_get_osfhandle(fd);
|
||||
ReadFile(handle, data, sizeof(data), &readed, NULL);
|
||||
numbytes = (int)readed;
|
||||
}
|
||||
#endif /* !defined(__WINDOWS__) */
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s iof:mrorted:read handler read %d bytes from %s, fd %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
numbytes, ORTE_NAME_PRINT(&rev->name), fd));
|
||||
|
||||
if (numbytes <= 0) {
|
||||
if (0 > numbytes) {
|
||||
/* either we have a connection error or it was a non-blocking read */
|
||||
if (EAGAIN == errno || EINTR == errno) {
|
||||
/* non-blocking, retry */
|
||||
opal_event_add(rev->ev, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s iof:mrorted:read handler %s Error on connection:%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&rev->name), fd));
|
||||
}
|
||||
/* numbytes must have been zero, so go down and close the fd etc */
|
||||
goto CLEAN_RETURN;
|
||||
}
|
||||
|
||||
/* see if the user wanted the output directed to files */
|
||||
if (NULL != orte_output_filename) {
|
||||
/* find the sink for this rank */
|
||||
for (item = opal_list_get_first(&mca_iof_mr_orted_component.sinks);
|
||||
item != opal_list_get_end(&mca_iof_mr_orted_component.sinks);
|
||||
item = opal_list_get_next(item)) {
|
||||
orte_iof_sink_t *sink = (orte_iof_sink_t*)item;
|
||||
/* if the target is set, then this sink is for another purpose - ignore it */
|
||||
if (ORTE_JOBID_INVALID != sink->daemon.jobid) {
|
||||
continue;
|
||||
}
|
||||
/* if this sink isn't for output, ignore it */
|
||||
if (ORTE_IOF_STDIN & sink->tag) {
|
||||
continue;
|
||||
}
|
||||
|
||||
mask = ORTE_NS_CMP_ALL;
|
||||
|
||||
/* is this the desired proc? */
|
||||
if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &sink->name, &rev->name)) {
|
||||
/* output to the corresponding file */
|
||||
orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, sink->wev);
|
||||
/* done */
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ORTE_IOF_STDOUT & rev->tag) {
|
||||
/* see if we need to forward this output */
|
||||
jdata = orte_get_job_data_object(rev->name.jobid);
|
||||
if (ORTE_JOBID_INVALID == jdata->stdout_target) {
|
||||
/* end of the chain - just output the info */
|
||||
write_out = true;
|
||||
goto PROCESS;
|
||||
}
|
||||
/* it goes to the next job in the chain */
|
||||
jdata = orte_get_job_data_object(jdata->stdout_target);
|
||||
map = jdata->map;
|
||||
for (i=0; i < map->nodes->size; i++) {
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
|
||||
continue;
|
||||
}
|
||||
daemon = node->daemon;
|
||||
if (daemon->name.vpid == ORTE_PROC_MY_NAME->vpid) {
|
||||
/* if it is me, then send the bytes down the stdin pipe
|
||||
* for every local proc (they are all on my proct list)
|
||||
*/
|
||||
for (item = opal_list_get_first(&mca_iof_mr_orted_component.procs);
|
||||
item != opal_list_get_end(&mca_iof_mr_orted_component.procs);
|
||||
item = opal_list_get_next(item)) {
|
||||
proct = (orte_iof_proc_t*)item;
|
||||
if (proct->name.jobid == jdata->jobid) {
|
||||
if (NULL == proct->sink) {
|
||||
opal_output(0, "NULL SINK FOR PROC %s", ORTE_NAME_PRINT(&proct->name));
|
||||
continue;
|
||||
}
|
||||
orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, numbytes, proct->sink->wev);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s sending %d bytes from stdout of %s to daemon %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
|
||||
ORTE_NAME_PRINT(&rev->name),
|
||||
ORTE_NAME_PRINT(&daemon->name)));
|
||||
|
||||
/* send the data to the daemon so it can
|
||||
* write it to all local procs from this job
|
||||
*/
|
||||
send_data(&daemon->name, ORTE_IOF_STDIN, jdata->jobid, data, numbytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PROCESS:
|
||||
if (write_out) {
|
||||
/* prep the buffer */
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
|
||||
/* pack the stream first - we do this so that flow control messages can
|
||||
* consist solely of the tag
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &rev->tag, 1, ORTE_IOF_TAG))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEAN_RETURN;
|
||||
}
|
||||
|
||||
/* pack name of process that gave us this data */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &rev->name, 1, ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEAN_RETURN;
|
||||
}
|
||||
|
||||
/* pack the data - only pack the #bytes we read! */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &data, numbytes, OPAL_BYTE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEAN_RETURN;
|
||||
}
|
||||
|
||||
/* start non-blocking RML call to forward received data */
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s iof:mrorted:read handler sending %d bytes to HNP",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes));
|
||||
|
||||
orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP,
|
||||
0, orte_rml_send_callback, NULL);
|
||||
}
|
||||
|
||||
/* re-add the event */
|
||||
opal_event_add(rev->ev, 0);
|
||||
|
||||
return;
|
||||
|
||||
CLEAN_RETURN:
|
||||
/* must be an error, or zero bytes were read indicating that the
|
||||
* proc terminated this IOF channel - either way, find this proc
|
||||
* on our list and clean up
|
||||
*/
|
||||
for (item = opal_list_get_first(&mca_iof_mr_orted_component.procs);
|
||||
item != opal_list_get_end(&mca_iof_mr_orted_component.procs);
|
||||
item = opal_list_get_next(item)) {
|
||||
proct = (orte_iof_proc_t*)item;
|
||||
mask = ORTE_NS_CMP_ALL;
|
||||
if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proct->name, &rev->name)) {
|
||||
/* found it - release corresponding event. This deletes
|
||||
* the read event and closes the file descriptor
|
||||
*/
|
||||
if (rev->tag & ORTE_IOF_STDOUT) {
|
||||
if( NULL != proct->revstdout ) {
|
||||
OBJ_RELEASE(proct->revstdout);
|
||||
}
|
||||
} else if (rev->tag & ORTE_IOF_STDERR) {
|
||||
if( NULL != proct->revstderr ) {
|
||||
OBJ_RELEASE(proct->revstderr);
|
||||
}
|
||||
} else if (rev->tag & ORTE_IOF_STDDIAG) {
|
||||
if( NULL != proct->revstddiag ) {
|
||||
OBJ_RELEASE(proct->revstddiag);
|
||||
}
|
||||
}
|
||||
/* check to see if they are all done */
|
||||
if (NULL == proct->revstdout &&
|
||||
NULL == proct->revstderr &&
|
||||
NULL == proct->revstddiag) {
|
||||
/* this proc's iof is complete */
|
||||
opal_list_remove_item(&mca_iof_mr_orted_component.procs, item);
|
||||
ORTE_ACTIVATE_PROC_STATE(&proct->name, ORTE_PROC_STATE_IOF_COMPLETE);
|
||||
OBJ_RELEASE(proct);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (NULL != buf) {
|
||||
OBJ_RELEASE(buf);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
static void send_data(orte_process_name_t *name, orte_iof_tag_t tag,
|
||||
orte_jobid_t jobid,
|
||||
unsigned char *data, int32_t nbytes)
|
||||
{
|
||||
opal_buffer_t *buf;
|
||||
int rc;
|
||||
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &tag, 1, ORTE_IOF_TAG))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &jobid, 1, ORTE_JOBID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, data, nbytes, OPAL_BYTE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(name, buf, ORTE_RML_TAG_IOF_PROXY,
|
||||
0, orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(buf);
|
||||
}
|
||||
}
|
162
orte/mca/iof/mr_orted/iof_mrorted_receive.c
Обычный файл
162
orte/mca/iof/mr_orted/iof_mrorted_receive.c
Обычный файл
@ -0,0 +1,162 @@
|
||||
/*
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include <errno.h>
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif /* HAVE_UNISTD_H */
|
||||
#ifdef HAVE_STRING_H
|
||||
#include <string.h>
|
||||
#endif /* HAVE_STRING_H */
|
||||
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/rml/rml_types.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "orte/mca/iof/iof_types.h"
|
||||
#include "orte/mca/iof/base/base.h"
|
||||
|
||||
#include "iof_mrorted.h"
|
||||
|
||||
static void send_cb(int status, orte_process_name_t *peer,
|
||||
opal_buffer_t *buf, orte_rml_tag_t tag,
|
||||
void *cbdata)
|
||||
{
|
||||
/* nothing to do here - just release buffer and return */
|
||||
OBJ_RELEASE(buf);
|
||||
}
|
||||
|
||||
void orte_iof_mrorted_send_xonxoff(orte_process_name_t *name, orte_iof_tag_t tag)
|
||||
{
|
||||
opal_buffer_t *buf;
|
||||
int rc;
|
||||
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
|
||||
/* pack the tag - we do this first so that flow control messages can
|
||||
* consist solely of the tag
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &tag, 1, ORTE_IOF_TAG))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(buf);
|
||||
return;
|
||||
}
|
||||
/* add the name of the proc */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &tag, 1, ORTE_IOF_TAG))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(buf);
|
||||
return;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s sending %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(ORTE_IOF_XON == tag) ? "xon" : "xoff"));
|
||||
|
||||
/* send the buffer to the HNP */
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP,
|
||||
0, send_cb, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The only messages coming to an orted are either:
|
||||
*
|
||||
* (a) stdin, which is to be copied to whichever local
|
||||
* procs "pull'd" a copy
|
||||
*
|
||||
* (b) flow control messages
|
||||
*/
|
||||
void orte_iof_mrorted_recv(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata)
|
||||
{
|
||||
unsigned char data[ORTE_IOF_BASE_MSG_MAX];
|
||||
orte_iof_tag_t stream;
|
||||
int32_t count, numbytes;
|
||||
orte_jobid_t jobid;
|
||||
opal_list_item_t *item;
|
||||
int rc;
|
||||
|
||||
/* see what stream generated this data */
|
||||
count = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &stream, &count, ORTE_IOF_TAG))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEAN_RETURN;
|
||||
}
|
||||
|
||||
/* if this isn't stdin, then we have an error */
|
||||
if (ORTE_IOF_STDIN != stream) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
|
||||
goto CLEAN_RETURN;
|
||||
}
|
||||
|
||||
/* unpack the intended target */
|
||||
count = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &jobid, &count, ORTE_JOBID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEAN_RETURN;
|
||||
}
|
||||
|
||||
/* unpack the data */
|
||||
numbytes=ORTE_IOF_BASE_MSG_MAX;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, data, &numbytes, OPAL_BYTE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEAN_RETURN;
|
||||
}
|
||||
/* numbytes will contain the actual #bytes that were sent */
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s unpacked %d bytes for local job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
|
||||
ORTE_JOBID_PRINT(jobid)));
|
||||
|
||||
/* cycle through our list of procs */
|
||||
for (item = opal_list_get_first(&mca_iof_mr_orted_component.procs);
|
||||
item != opal_list_get_end(&mca_iof_mr_orted_component.procs);
|
||||
item = opal_list_get_next(item)) {
|
||||
orte_iof_proc_t* sink = (orte_iof_proc_t*)item;
|
||||
|
||||
/* is this intended for this jobid? */
|
||||
if (jobid == sink->name.jobid) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
|
||||
"%s writing data to local proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&sink->name)));
|
||||
if (NULL == sink->sink->wev || sink->sink->wev->fd < 0) {
|
||||
/* this sink was already closed - ignore this data */
|
||||
goto CLEAN_RETURN;
|
||||
}
|
||||
/* send the bytes down the pipe - we even send 0 byte events
|
||||
* down the pipe so it forces out any preceding data before
|
||||
* closing the output stream
|
||||
*/
|
||||
if (ORTE_IOF_MAX_INPUT_BUFFERS < orte_iof_base_write_output(&sink->name, stream, data, numbytes, sink->sink->wev)) {
|
||||
/* getting too backed up - tell the HNP to hold off any more input if we
|
||||
* haven't already told it
|
||||
*/
|
||||
if (!sink->sink->xoff) {
|
||||
sink->sink->xoff = true;
|
||||
orte_iof_mrorted_send_xonxoff(&sink->name, ORTE_IOF_XOFF);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CLEAN_RETURN:
|
||||
return;
|
||||
}
|
@ -84,6 +84,7 @@ orte_iof_base_module_t orte_iof_orted_module = {
|
||||
orted_push,
|
||||
orted_pull,
|
||||
orted_close,
|
||||
NULL,
|
||||
finalize,
|
||||
orted_ft_event
|
||||
};
|
||||
|
@ -10,7 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -62,6 +62,7 @@ orte_iof_base_module_t orte_iof_tool_module = {
|
||||
tool_push,
|
||||
tool_pull,
|
||||
tool_close,
|
||||
NULL,
|
||||
finalize,
|
||||
tool_ft_event
|
||||
};
|
||||
|
@ -229,12 +229,6 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *data,
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* pack the number of nodes involved in this job */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &map->num_nodes, 1, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* pack the number of procs in this launch */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &jdata->num_procs, 1, ORTE_VPID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -267,6 +261,12 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *data,
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* pack the stdout target */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &jdata->stdout_target, 1, ORTE_JOBID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* pack whether or not process recovery is allowed for this job */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &jdata->enable_recovery, 1, OPAL_BOOL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -538,15 +538,10 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
|
||||
goto REPORT_ERROR;
|
||||
}
|
||||
|
||||
/* unpack the number of nodes involved in this job */
|
||||
/* ensure the map object is present */
|
||||
if (NULL == jdata->map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
}
|
||||
cnt=1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jdata->map->num_nodes, &cnt, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto REPORT_ERROR;
|
||||
}
|
||||
/* unpack the number of procs in this launch */
|
||||
cnt=1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jdata->num_procs, &cnt, ORTE_VPID))) {
|
||||
@ -579,6 +574,12 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto REPORT_ERROR;
|
||||
}
|
||||
/* unpack the stdout target for the job */
|
||||
cnt=1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jdata->stdout_target, &cnt, ORTE_JOBID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto REPORT_ERROR;
|
||||
}
|
||||
/* unpack whether or not process recovery is allowed for this job */
|
||||
cnt=1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jdata->enable_recovery, &cnt, OPAL_BOOL))) {
|
||||
@ -1114,7 +1115,6 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
|
||||
char **argvsav=NULL;
|
||||
int inm, j, idx;
|
||||
int total_num_local_procs = 0;
|
||||
orte_nid_t *nid;
|
||||
orte_node_t *node;
|
||||
orte_odls_launch_local_t *caddy = (orte_odls_launch_local_t*)cbdata;
|
||||
orte_job_t *jobdat;
|
||||
@ -1145,25 +1145,13 @@ void orte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
|
||||
|
||||
/* see if the mapper thinks we are oversubscribed */
|
||||
oversubscribed = false;
|
||||
if (ORTE_PROC_IS_HNP) {
|
||||
/* just fake it - we don't keep a local nidmap */
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
ORTE_ACTIVATE_JOB_STATE(jobdat, ORTE_JOB_STATE_FAILED_TO_LAUNCH);
|
||||
goto ERROR_OUT;
|
||||
}
|
||||
if (node->oversubscribed) {
|
||||
oversubscribed = true;
|
||||
}
|
||||
} else {
|
||||
/* RHC: the nidmap will eventually disappear, so for now just
|
||||
* make this a non-fatal error
|
||||
*/
|
||||
if (NULL != (nid = orte_util_lookup_nid(ORTE_PROC_MY_NAME))) {
|
||||
if (nid->oversubscribed) {
|
||||
oversubscribed = true;
|
||||
}
|
||||
}
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
ORTE_ACTIVATE_JOB_STATE(jobdat, ORTE_JOB_STATE_FAILED_TO_LAUNCH);
|
||||
goto ERROR_OUT;
|
||||
}
|
||||
if (node->oversubscribed) {
|
||||
oversubscribed = true;
|
||||
}
|
||||
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
@ -1745,7 +1733,7 @@ void orte_odls_base_setup_singleton_jobdat(orte_jobid_t jobid)
|
||||
opal_dss.pack(&buffer, &vpid1, 1, ORTE_VPID); /* num_procs */
|
||||
#if OPAL_HAVE_HWLOC
|
||||
bind_level = OPAL_HWLOC_NODE_LEVEL;
|
||||
opal_dss.pack(&buffer, &bind_level, 1, OPAL_HWLOC_LEVEL_T); /* num_procs */
|
||||
opal_dss.pack(&buffer, &bind_level, 1, OPAL_HWLOC_LEVEL_T); /* binding level */
|
||||
#endif
|
||||
one32 = 0;
|
||||
opal_dss.pack(&buffer, &one32, 1, OPAL_INT32); /* node index */
|
||||
@ -2095,6 +2083,9 @@ void odls_base_default_wait_local_proc(pid_t pid, int status, void* cbdata)
|
||||
if (NULL == (cptr = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) {
|
||||
continue;
|
||||
}
|
||||
if (cptr->name.jobid != proc->name.jobid) {
|
||||
continue;
|
||||
}
|
||||
if (cptr->registered) {
|
||||
/* someone has registered, and we didn't before
|
||||
* terminating - this is an abnormal termination
|
||||
|
@ -191,6 +191,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
|
||||
* do it - no new daemons will be launched
|
||||
*/
|
||||
if (ORTE_JOB_CONTROL_DEBUGGER_DAEMON & state->jdata->controls) {
|
||||
state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
|
||||
ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
OBJ_RELEASE(state);
|
||||
return;
|
||||
@ -213,7 +214,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
|
||||
* job to move to the following step
|
||||
*/
|
||||
state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
|
||||
ORTE_ACTIVATE_JOB_STATE(daemons, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
OBJ_RELEASE(state);
|
||||
return;
|
||||
}
|
||||
@ -234,7 +235,9 @@ static void launch_daemons(int fd, short args, void *cbdata)
|
||||
"%s plm:alps: no new daemons to launch",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
|
||||
ORTE_ACTIVATE_JOB_STATE(daemons, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
if (ORTE_JOB_STATE_DAEMONS_REPORTED == daemons->state) {
|
||||
ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
}
|
||||
OBJ_RELEASE(state);
|
||||
return;
|
||||
}
|
||||
@ -404,6 +407,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
|
||||
|
||||
/* indicate that the daemons for this job were launched */
|
||||
state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
|
||||
daemons->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
|
||||
|
||||
/* flag that launch was successful, so far as we currently know */
|
||||
failed_launch = false;
|
||||
|
@ -83,6 +83,7 @@ ORTE_DECLSPEC void orte_plm_base_app_report_launch(int fd, short event, void *da
|
||||
ORTE_DECLSPEC void orte_plm_base_receive_process_msg(int fd, short event, void *data);
|
||||
|
||||
ORTE_DECLSPEC void orte_plm_base_setup_job(int fd, short args, void *cbdata);
|
||||
ORTE_DECLSPEC void orte_plm_base_setup_job_complete(int fd, short args, void *cbdata);
|
||||
ORTE_DECLSPEC void orte_plm_base_complete_setup(int fd, short args, void *cbdata);
|
||||
ORTE_DECLSPEC void orte_plm_base_daemons_reported(int fd, short args, void *cbdata);
|
||||
ORTE_DECLSPEC void orte_plm_base_daemons_launched(int fd, short args, void *cbdata);
|
||||
|
@ -80,8 +80,6 @@
|
||||
void orte_plm_base_daemons_reported(int fd, short args, void *cbdata)
|
||||
{
|
||||
orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
|
||||
int i;
|
||||
orte_job_t *jdata;
|
||||
|
||||
#if OPAL_HAVE_HWLOC
|
||||
{
|
||||
@ -106,21 +104,17 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata)
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
|
||||
continue;
|
||||
}
|
||||
node->topology = t;
|
||||
if (NULL == node->topology) {
|
||||
node->topology = t;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* progress all jobs whose daemons have launched */
|
||||
for (i=1; i < orte_job_data->size; i++) {
|
||||
if (NULL == (jdata = (orte_job_t*)opal_pointer_array_get_item(orte_job_data, i))) {
|
||||
continue;
|
||||
}
|
||||
if (ORTE_JOB_STATE_DAEMONS_LAUNCHED == jdata->state) {
|
||||
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP);
|
||||
}
|
||||
}
|
||||
/* progress the job */
|
||||
caddy->jdata->state = ORTE_JOB_STATE_DAEMONS_REPORTED;
|
||||
ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_MAP);
|
||||
|
||||
/* cleanup */
|
||||
OBJ_RELEASE(caddy);
|
||||
@ -213,12 +207,21 @@ void orte_plm_base_setup_job(int fd, short args, void *cbdata)
|
||||
free(bar2_val);
|
||||
|
||||
/* set the job state to the next position */
|
||||
ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_ALLOCATE);
|
||||
ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_INIT_COMPLETE);
|
||||
|
||||
/* cleanup */
|
||||
OBJ_RELEASE(caddy);
|
||||
}
|
||||
|
||||
void orte_plm_base_setup_job_complete(int fd, short args, void *cbdata)
|
||||
{
|
||||
orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
|
||||
|
||||
/* nothing to do here but move along */
|
||||
ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_ALLOCATE);
|
||||
OBJ_RELEASE(caddy);
|
||||
}
|
||||
|
||||
void orte_plm_base_complete_setup(int fd, short args, void *cbdata)
|
||||
{
|
||||
orte_job_t *jdata, *jdatorted;
|
||||
@ -510,12 +513,12 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
|
||||
opal_buffer_t *buffer,
|
||||
orte_rml_tag_t tag, void *cbdata)
|
||||
{
|
||||
orte_process_name_t peer;
|
||||
char *rml_uri = NULL, *ptr;
|
||||
int rc, idx;
|
||||
orte_proc_t *daemon=NULL;
|
||||
char *nodename;
|
||||
orte_node_t *node;
|
||||
orte_job_t *jdata;
|
||||
|
||||
/* get the daemon job, if necessary */
|
||||
if (NULL == jdatorted) {
|
||||
@ -562,7 +565,7 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:orted_report_launch from daemon %s on node %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer), nodename));
|
||||
ORTE_NAME_PRINT(sender), nodename));
|
||||
|
||||
/* look this node up, if necessary */
|
||||
if (!orte_plm_globals.daemon_nodes_assigned_at_launch) {
|
||||
@ -593,16 +596,29 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:orted_report_launch attempting to assign daemon %s to node %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer), nodename));
|
||||
ORTE_NAME_PRINT(sender), nodename));
|
||||
for (idx=0; idx < orte_node_pool->size; idx++) {
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, idx))) {
|
||||
continue;
|
||||
}
|
||||
if (NULL != node->daemon) {
|
||||
if (node->location_verified) {
|
||||
/* already assigned */
|
||||
continue;
|
||||
}
|
||||
if (0 == strcmp(nodename, node->name)) {
|
||||
/* flag that we verified the location */
|
||||
node->location_verified = true;
|
||||
if (node == daemon->node) {
|
||||
/* it wound up right where it should */
|
||||
break;
|
||||
}
|
||||
/* remove the prior association */
|
||||
if (NULL != daemon->node) {
|
||||
OBJ_RELEASE(daemon->node);
|
||||
}
|
||||
if (NULL != node->daemon) {
|
||||
OBJ_RELEASE(node->daemon);
|
||||
}
|
||||
/* associate this daemon with the node */
|
||||
node->daemon = daemon;
|
||||
OBJ_RETAIN(daemon);
|
||||
@ -687,8 +703,18 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
|
||||
} else {
|
||||
jdatorted->num_reported++;
|
||||
if (jdatorted->num_procs == jdatorted->num_reported) {
|
||||
/* activate the daemons_reported state */
|
||||
ORTE_ACTIVATE_JOB_STATE(jdatorted, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
jdatorted->state = ORTE_JOB_STATE_DAEMONS_REPORTED;
|
||||
/* activate the daemons_reported state for all jobs
|
||||
* whose daemons were launched
|
||||
*/
|
||||
for (idx=1; idx < orte_job_data->size; idx++) {
|
||||
if (NULL == (jdata = (orte_job_t*)opal_pointer_array_get_item(orte_job_data, idx))) {
|
||||
continue;
|
||||
}
|
||||
if (ORTE_JOB_STATE_DAEMONS_LAUNCHED == jdata->state) {
|
||||
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -776,6 +802,9 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
|
||||
opal_argv_append(argc, argv, "1");
|
||||
}
|
||||
#endif
|
||||
if (orte_map_reduce) {
|
||||
opal_argv_append(argc, argv, "--mapreduce");
|
||||
}
|
||||
|
||||
/* the following two are not mca params */
|
||||
if ((int)ORTE_VPID_INVALID != orted_debug_failure) {
|
||||
@ -1116,7 +1145,6 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata)
|
||||
node = (orte_node_t*)item;
|
||||
/* if this node is already in the map, skip it */
|
||||
if (NULL != node->daemon) {
|
||||
OBJ_RELEASE(node);
|
||||
continue;
|
||||
}
|
||||
/* add the node to the map */
|
||||
@ -1146,19 +1174,22 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata)
|
||||
return rc;
|
||||
}
|
||||
++daemons->num_procs;
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:setup_vm assigning new daemon %s to node %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc->name),
|
||||
node->name));
|
||||
/* point the node to the daemon */
|
||||
node->daemon = proc;
|
||||
OBJ_RETAIN(proc); /* maintain accounting */
|
||||
/* point the proc to the node and maintain accounting */
|
||||
proc->node = node;
|
||||
proc->nodename = node->name;
|
||||
OBJ_RETAIN(node);
|
||||
if (orte_plm_globals.daemon_nodes_assigned_at_launch) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:setup_vm assigning new daemon %s to node %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc->name),
|
||||
node->name));
|
||||
/* point the node to the daemon */
|
||||
node->daemon = proc;
|
||||
OBJ_RETAIN(proc); /* maintain accounting */
|
||||
/* point the proc to the node and maintain accounting */
|
||||
proc->node = node;
|
||||
proc->nodename = node->name;
|
||||
OBJ_RETAIN(node);
|
||||
node->location_verified = true;
|
||||
} else {
|
||||
node->location_verified = false;
|
||||
}
|
||||
/* track number of daemons to be launched */
|
||||
++map->num_new_daemons;
|
||||
|
@ -192,7 +192,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
|
||||
* job to move to the following step
|
||||
*/
|
||||
state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
|
||||
ORTE_ACTIVATE_JOB_STATE(daemons, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
OBJ_RELEASE(state);
|
||||
return;
|
||||
}
|
||||
@ -219,7 +219,9 @@ static void launch_daemons(int fd, short args, void *cbdata)
|
||||
"%s plm:lsf: no new daemons to launch",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
|
||||
ORTE_ACTIVATE_JOB_STATE(daemons, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
if (ORTE_JOB_STATE_DAEMONS_REPORTED == daemons->state) {
|
||||
ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
}
|
||||
OBJ_RELEASE(state);
|
||||
return;
|
||||
}
|
||||
@ -349,6 +351,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
|
||||
|
||||
/* indicate that the daemons for this job were launched */
|
||||
state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
|
||||
daemons->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
|
||||
|
||||
/* flag that launch was successful, so far as we currently know */
|
||||
failed_launch = false;
|
||||
|
@ -97,18 +97,19 @@ typedef int32_t orte_job_state_t;
|
||||
|
||||
#define ORTE_JOB_STATE_UNDEF 0
|
||||
#define ORTE_JOB_STATE_INIT 1 /* ready to be assigned id */
|
||||
#define ORTE_JOB_STATE_ALLOCATE 2 /* ready to be allocated */
|
||||
#define ORTE_JOB_STATE_MAP 3 /* ready to be mapped */
|
||||
#define ORTE_JOB_STATE_SYSTEM_PREP 4 /* ready for final sanity check and system values updated */
|
||||
#define ORTE_JOB_STATE_LAUNCH_DAEMONS 5 /* ready to launch daemons */
|
||||
#define ORTE_JOB_STATE_DAEMONS_LAUNCHED 6 /* daemons for this job have been launched */
|
||||
#define ORTE_JOB_STATE_DAEMONS_REPORTED 7 /* all launched daemons have reported */
|
||||
#define ORTE_JOB_STATE_LAUNCH_APPS 8 /* ready to launch apps */
|
||||
#define ORTE_JOB_STATE_RUNNING 9 /* all procs have been fork'd */
|
||||
#define ORTE_JOB_STATE_SUSPENDED 10 /* job has been suspended */
|
||||
#define ORTE_JOB_STATE_REGISTERED 11 /* all procs registered for sync */
|
||||
#define ORTE_JOB_STATE_READY_FOR_DEBUGGERS 12 /* job ready for debugger init after spawn */
|
||||
#define ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE 13 /* all local procs have attempted launch */
|
||||
#define ORTE_JOB_STATE_INIT_COMPLETE 2 /* jobid assigned and setup */
|
||||
#define ORTE_JOB_STATE_ALLOCATE 3 /* ready to be allocated */
|
||||
#define ORTE_JOB_STATE_MAP 4 /* ready to be mapped */
|
||||
#define ORTE_JOB_STATE_SYSTEM_PREP 5 /* ready for final sanity check and system values updated */
|
||||
#define ORTE_JOB_STATE_LAUNCH_DAEMONS 6 /* ready to launch daemons */
|
||||
#define ORTE_JOB_STATE_DAEMONS_LAUNCHED 7 /* daemons for this job have been launched */
|
||||
#define ORTE_JOB_STATE_DAEMONS_REPORTED 8 /* all launched daemons have reported */
|
||||
#define ORTE_JOB_STATE_LAUNCH_APPS 9 /* ready to launch apps */
|
||||
#define ORTE_JOB_STATE_RUNNING 10 /* all procs have been fork'd */
|
||||
#define ORTE_JOB_STATE_SUSPENDED 11 /* job has been suspended */
|
||||
#define ORTE_JOB_STATE_REGISTERED 12 /* all procs registered for sync */
|
||||
#define ORTE_JOB_STATE_READY_FOR_DEBUGGERS 13 /* job ready for debugger init after spawn */
|
||||
#define ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE 14 /* all local procs have attempted launch */
|
||||
|
||||
/*
|
||||
* Define a "boundary" so we can easily and quickly determine
|
||||
|
@ -1109,7 +1109,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
|
||||
* job to move to the following step
|
||||
*/
|
||||
state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
|
||||
ORTE_ACTIVATE_JOB_STATE(daemons, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
OBJ_RELEASE(state);
|
||||
return;
|
||||
}
|
||||
@ -1127,7 +1127,9 @@ static void launch_daemons(int fd, short args, void *cbdata)
|
||||
* job to move to the following step
|
||||
*/
|
||||
state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
|
||||
ORTE_ACTIVATE_JOB_STATE(daemons, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
if (ORTE_JOB_STATE_DAEMONS_REPORTED == daemons->state) {
|
||||
ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
}
|
||||
OBJ_RELEASE(state);
|
||||
return;
|
||||
}
|
||||
@ -1410,7 +1412,8 @@ static void launch_daemons(int fd, short args, void *cbdata)
|
||||
|
||||
/* set the job state to indicate the daemons are launched */
|
||||
state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
|
||||
|
||||
daemons->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
|
||||
|
||||
/* trigger the event to start processing the launch list */
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
"%s plm:process: activating launch event",
|
||||
|
@ -957,6 +957,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
|
||||
* do it - no new daemons will be launched
|
||||
*/
|
||||
if (ORTE_JOB_CONTROL_DEBUGGER_DAEMON & state->jdata->controls) {
|
||||
state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
|
||||
ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
OBJ_RELEASE(state);
|
||||
return;
|
||||
@ -979,7 +980,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
|
||||
* job to move to the following step
|
||||
*/
|
||||
state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
|
||||
ORTE_ACTIVATE_JOB_STATE(daemons, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
OBJ_RELEASE(state);
|
||||
return;
|
||||
}
|
||||
@ -997,7 +998,9 @@ static void launch_daemons(int fd, short args, void *cbdata)
|
||||
* job to move to the following step
|
||||
*/
|
||||
state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
|
||||
ORTE_ACTIVATE_JOB_STATE(daemons, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
if (ORTE_JOB_STATE_DAEMONS_REPORTED == daemons->state) {
|
||||
ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
}
|
||||
OBJ_RELEASE(state);
|
||||
return;
|
||||
}
|
||||
|
@ -199,6 +199,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
|
||||
* do it - no new daemons will be launched
|
||||
*/
|
||||
if (ORTE_JOB_CONTROL_DEBUGGER_DAEMON & state->jdata->controls) {
|
||||
state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
|
||||
ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
OBJ_RELEASE(state);
|
||||
return;
|
||||
@ -221,7 +222,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
|
||||
* job to move to the following step
|
||||
*/
|
||||
state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
|
||||
ORTE_ACTIVATE_JOB_STATE(daemons, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
OBJ_RELEASE(state);
|
||||
return;
|
||||
}
|
||||
@ -242,7 +243,9 @@ static void launch_daemons(int fd, short args, void *cbdata)
|
||||
"%s plm:slurm: no new daemons to launch",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
|
||||
ORTE_ACTIVATE_JOB_STATE(daemons, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
if (ORTE_JOB_STATE_DAEMONS_REPORTED == daemons->state) {
|
||||
ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
}
|
||||
OBJ_RELEASE(state);
|
||||
return;
|
||||
}
|
||||
@ -407,6 +410,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
|
||||
|
||||
/* indicate that the daemons for this job were launched */
|
||||
state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
|
||||
daemons->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
|
||||
|
||||
/* flag that launch was successful, so far as we currently know */
|
||||
failed_launch = false;
|
||||
|
@ -195,6 +195,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
|
||||
* do it - no new daemons will be launched
|
||||
*/
|
||||
if (ORTE_JOB_CONTROL_DEBUGGER_DAEMON & jdata->controls) {
|
||||
jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
|
||||
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
OBJ_RELEASE(state);
|
||||
return;
|
||||
@ -217,7 +218,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
|
||||
* job to move to the following step
|
||||
*/
|
||||
jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
|
||||
ORTE_ACTIVATE_JOB_STATE(daemons, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
OBJ_RELEASE(state);
|
||||
return;
|
||||
}
|
||||
@ -235,7 +236,9 @@ static void launch_daemons(int fd, short args, void *cbdata)
|
||||
* job to move to the following step
|
||||
*/
|
||||
jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
|
||||
ORTE_ACTIVATE_JOB_STATE(daemons, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
if (ORTE_JOB_STATE_DAEMONS_REPORTED == daemons->state) {
|
||||
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
|
||||
}
|
||||
OBJ_RELEASE(state);
|
||||
return;
|
||||
}
|
||||
@ -408,6 +411,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
|
||||
|
||||
/* indicate that the daemons for this job were launched */
|
||||
state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
|
||||
daemons->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
|
||||
|
||||
/* flag that launch was successful, so far as we currently know */
|
||||
failed_launch = false;
|
||||
|
@ -43,10 +43,10 @@ void orte_state_base_activate_job_state(orte_job_t *jdata,
|
||||
}
|
||||
if (s->job_state == state) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_state_base_output,
|
||||
"%s ACTIVATING JOB %s STATE %s",
|
||||
"%s ACTIVATING JOB %s STATE %s PRI %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == jdata) ? "NULL" : ORTE_JOBID_PRINT(jdata->jobid),
|
||||
orte_job_state_to_str(state)));
|
||||
orte_job_state_to_str(state), s->priority));
|
||||
if (NULL == s->cbfunc) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_state_base_output,
|
||||
"%s NULL CBFUNC FOR JOB %s STATE %s",
|
||||
@ -90,6 +90,11 @@ void orte_state_base_activate_job_state(orte_job_t *jdata,
|
||||
caddy->job_state = state;
|
||||
OBJ_RETAIN(jdata);
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_state_base_output,
|
||||
"%s ACTIVATING JOB %s STATE %s PRI %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == jdata) ? "NULL" : ORTE_JOBID_PRINT(jdata->jobid),
|
||||
orte_job_state_to_str(state), s->priority));
|
||||
opal_event_set(orte_event_base, &caddy->ev, -1, OPAL_EV_WRITE, s->cbfunc, caddy);
|
||||
opal_event_set_priority(&caddy->ev, s->priority);
|
||||
opal_event_active(&caddy->ev, OPAL_EV_WRITE, 1);
|
||||
@ -217,10 +222,10 @@ void orte_state_base_activate_proc_state(orte_process_name_t *proc,
|
||||
}
|
||||
if (s->proc_state == state) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_state_base_output,
|
||||
"%s ACTIVATING PROC %s STATE %s",
|
||||
"%s ACTIVATING PROC %s STATE %s PRI %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_proc_state_to_str(state)));
|
||||
orte_proc_state_to_str(state), s->priority));
|
||||
if (NULL == s->cbfunc) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_state_base_output,
|
||||
"%s NULL CBFUNC FOR PROC %s STATE %s",
|
||||
@ -258,6 +263,11 @@ void orte_state_base_activate_proc_state(orte_process_name_t *proc,
|
||||
caddy = OBJ_NEW(orte_state_caddy_t);
|
||||
caddy->name = *proc;
|
||||
caddy->proc_state = state;
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_state_base_output,
|
||||
"%s ACTIVATING PROC %s STATE %s PRI %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_proc_state_to_str(state), s->priority));
|
||||
opal_event_set(orte_event_base, &caddy->ev, -1, OPAL_EV_WRITE, s->cbfunc, caddy);
|
||||
opal_event_set_priority(&caddy->ev, s->priority);
|
||||
opal_event_active(&caddy->ev, OPAL_EV_WRITE, 1);
|
||||
|
@ -86,6 +86,7 @@ static void report_progress(int fd, short argc, void *cbdata);
|
||||
*/
|
||||
static orte_job_state_t launch_states[] = {
|
||||
ORTE_JOB_STATE_INIT,
|
||||
ORTE_JOB_STATE_INIT_COMPLETE,
|
||||
ORTE_JOB_STATE_ALLOCATE,
|
||||
ORTE_JOB_STATE_DAEMONS_LAUNCHED,
|
||||
ORTE_JOB_STATE_DAEMONS_REPORTED,
|
||||
@ -102,6 +103,7 @@ static orte_job_state_t launch_states[] = {
|
||||
};
|
||||
static orte_state_cbfunc_t launch_callbacks[] = {
|
||||
orte_plm_base_setup_job,
|
||||
orte_plm_base_setup_job_complete,
|
||||
orte_ras_base_allocate,
|
||||
orte_plm_base_daemons_launched,
|
||||
orte_plm_base_daemons_reported,
|
||||
@ -372,6 +374,11 @@ static void check_all_complete(int fd, short args, void *cbdata)
|
||||
/* turn off any sensor monitors on this job */
|
||||
orte_sensor.stop(jdata->jobid);
|
||||
|
||||
/* tell the IOF that the job is complete */
|
||||
if (NULL != orte_iof.complete) {
|
||||
orte_iof.complete(jdata);
|
||||
}
|
||||
|
||||
if (0 < jdata->num_non_zero_exit && !orte_abort_non_zero_exit) {
|
||||
if (!orte_report_child_jobs_separately || 1 == ORTE_LOCAL_JOBID(jdata->jobid)) {
|
||||
/* update the exit code */
|
||||
|
@ -160,6 +160,8 @@ static void track_jobs(int fd, short argc, void *cbdata)
|
||||
int rc;
|
||||
|
||||
if (ORTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE == caddy->job_state) {
|
||||
opal_output(0, "%s state:orted:track_jobs sending local launch complete for job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(caddy->jdata->jobid));
|
||||
/* update the HNP with all proc states for this job */
|
||||
alert = OBJ_NEW(opal_buffer_t);
|
||||
/* pack update state command */
|
||||
@ -281,14 +283,6 @@ static void track_procs(int fd, short argc, void *cbdata)
|
||||
* while we are still trying to notify the HNP of
|
||||
* successful launch for short-lived procs
|
||||
*/
|
||||
/* Release only the stdin IOF file descriptor for this child, if one
|
||||
* was defined. File descriptors for the other IOF channels - stdout,
|
||||
* stderr, and stddiag - were released when their associated pipes
|
||||
* were cleared and closed due to termination of the process
|
||||
*/
|
||||
if (NULL != orte_iof.close) {
|
||||
orte_iof.close(proc, ORTE_IOF_STDIN);
|
||||
}
|
||||
pdata->iof_complete = true;
|
||||
if (pdata->waitpid_recvd) {
|
||||
/* the proc has terminated */
|
||||
@ -325,6 +319,16 @@ static void track_procs(int fd, short argc, void *cbdata)
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Release the stdin IOF file descriptor for this child, if one
|
||||
* was defined. File descriptors for the other IOF channels - stdout,
|
||||
* stderr, and stddiag - were released when their associated pipes
|
||||
* were cleared and closed due to termination of the process
|
||||
* Do this after we handle termination in case the IOF needs
|
||||
* to check to see if all procs from the job are actually terminated
|
||||
*/
|
||||
if (NULL != orte_iof.close) {
|
||||
orte_iof.close(proc, ORTE_IOF_STDIN);
|
||||
}
|
||||
} else if (ORTE_PROC_STATE_WAITPID_FIRED == state) {
|
||||
/* do NOT update the proc state as this can hit
|
||||
* while we are still trying to notify the HNP of
|
||||
|
@ -116,6 +116,7 @@ static struct {
|
||||
int fail;
|
||||
int fail_delay;
|
||||
bool abort;
|
||||
bool mapreduce;
|
||||
} orted_globals;
|
||||
|
||||
/*
|
||||
@ -205,6 +206,10 @@ opal_cmd_line_init_t orte_cmd_line_opts[] = {
|
||||
"Nodes in cluster may differ in topology, so send the topology back from each node [Default = false]" },
|
||||
#endif
|
||||
|
||||
{ NULL, NULL, NULL, '\0', "mapreduce", "mapreduce", 0,
|
||||
&orted_globals.mapreduce, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Whether to report process bindings to stderr" },
|
||||
|
||||
/* End of list */
|
||||
{ NULL, NULL, NULL, '\0', NULL, NULL, 0,
|
||||
NULL, OPAL_CMD_LINE_TYPE_NULL, NULL }
|
||||
@ -327,6 +332,11 @@ int orte_daemon(int argc, char *argv[])
|
||||
#endif
|
||||
tmp_env_var = NULL; /* Silence compiler warning */
|
||||
|
||||
/* if mapreduce set, flag it */
|
||||
if (orted_globals.mapreduce) {
|
||||
orte_map_reduce = true;
|
||||
}
|
||||
|
||||
/* Set the flag telling OpenRTE that I am NOT a
|
||||
* singleton, but am "infrastructure" - prevents setting
|
||||
* up incorrect infrastructure that only a singleton would
|
||||
|
@ -124,6 +124,7 @@ opal_pointer_array_t *orte_job_data;
|
||||
opal_pointer_array_t *orte_node_pool;
|
||||
opal_pointer_array_t *orte_node_topologies;
|
||||
opal_pointer_array_t *orte_local_children;
|
||||
uint16_t orte_num_jobs = 0;
|
||||
|
||||
/* Nidmap and job maps */
|
||||
opal_pointer_array_t orte_nidmap;
|
||||
@ -166,9 +167,6 @@ bool orte_do_not_barrier = false;
|
||||
bool orte_enable_recovery;
|
||||
int32_t orte_max_restarts;
|
||||
|
||||
/* comm fn for updating state */
|
||||
orte_default_comm_fn_t orte_comm;
|
||||
|
||||
/* exit status reporting */
|
||||
bool orte_report_child_jobs_separately;
|
||||
struct timeval orte_child_time_to_exit;
|
||||
@ -183,6 +181,9 @@ char *orte_forward_envars = NULL;
|
||||
/* preload binaries */
|
||||
bool orte_preload_binaries = false;
|
||||
|
||||
/* map-reduce mode */
|
||||
bool orte_map_reduce = false;
|
||||
|
||||
/* map stddiag output to stderr so it isn't forwarded to mpirun */
|
||||
bool orte_map_stddiag_to_stderr = false;
|
||||
|
||||
@ -637,6 +638,7 @@ static void orte_job_construct(orte_job_t* job)
|
||||
job->num_apps = 0;
|
||||
job->controls = ORTE_JOB_CONTROL_FORWARD_OUTPUT;
|
||||
job->stdin_target = ORTE_VPID_INVALID;
|
||||
job->stdout_target = ORTE_JOBID_INVALID;
|
||||
job->total_slots_alloc = 0;
|
||||
job->num_procs = 0;
|
||||
job->procs = OBJ_NEW(opal_pointer_array_t);
|
||||
@ -758,6 +760,7 @@ static void orte_node_construct(orte_node_t* node)
|
||||
node->index = -1;
|
||||
node->daemon = NULL;
|
||||
node->daemon_launched = false;
|
||||
node->location_verified = false;
|
||||
node->launch_id = -1;
|
||||
|
||||
node->num_procs = 0;
|
||||
|
@ -208,7 +208,10 @@ typedef uint16_t orte_job_controls_t;
|
||||
#define ORTE_JOB_CONTROL_SPIN_FOR_DEBUG 0x0100
|
||||
#define ORTE_JOB_CONTROL_RESTART 0x0200
|
||||
#define ORTE_JOB_CONTROL_PROCS_MIGRATING 0x0400
|
||||
|
||||
#define ORTE_JOB_CONTROL_MAPPER 0x0800
|
||||
#define ORTE_JOB_CONTROL_REDUCER 0x1000
|
||||
#define ORTE_JOB_CONTROL_COMBINER 0x2000
|
||||
|
||||
/* global type definitions used by RTE - instanced in orte_globals.c */
|
||||
|
||||
/************
|
||||
@ -293,6 +296,11 @@ typedef struct {
|
||||
struct orte_proc_t *daemon;
|
||||
/* whether or not this daemon has been launched */
|
||||
bool daemon_launched;
|
||||
/* whether or not the location has been verified - used
|
||||
* for environments where the daemon's final destination
|
||||
* is uncertain
|
||||
*/
|
||||
bool location_verified;
|
||||
/** Launch id - needed by some systems to launch a proc on this node */
|
||||
int32_t launch_id;
|
||||
/** number of procs on this node */
|
||||
@ -359,6 +367,8 @@ typedef struct {
|
||||
* (wildcard), or none (invalid)
|
||||
*/
|
||||
orte_vpid_t stdin_target;
|
||||
/* job that is to receive the stdout (on its stdin) from this one */
|
||||
orte_jobid_t stdout_target;
|
||||
/* collective ids */
|
||||
orte_grpcomm_coll_id_t peer_modex;
|
||||
orte_grpcomm_coll_id_t peer_init_barrier;
|
||||
@ -635,6 +645,7 @@ ORTE_DECLSPEC extern opal_pointer_array_t *orte_job_data;
|
||||
ORTE_DECLSPEC extern opal_pointer_array_t *orte_node_pool;
|
||||
ORTE_DECLSPEC extern opal_pointer_array_t *orte_node_topologies;
|
||||
ORTE_DECLSPEC extern opal_pointer_array_t *orte_local_children;
|
||||
ORTE_DECLSPEC extern uint16_t orte_num_jobs;
|
||||
|
||||
/* Nidmap and job maps */
|
||||
ORTE_DECLSPEC extern opal_pointer_array_t orte_nidmap;
|
||||
@ -673,14 +684,6 @@ ORTE_DECLSPEC extern int32_t orte_max_restarts;
|
||||
/* barrier control */
|
||||
ORTE_DECLSPEC extern bool orte_do_not_barrier;
|
||||
|
||||
/* comm interface */
|
||||
typedef void (*orte_default_cbfunc_t)(int fd, short event, void *data);
|
||||
|
||||
typedef int (*orte_default_comm_fn_t)(orte_process_name_t *recipient,
|
||||
opal_buffer_t *buf,
|
||||
orte_rml_tag_t tag,
|
||||
orte_default_cbfunc_t cbfunc);
|
||||
|
||||
/* exit status reporting */
|
||||
ORTE_DECLSPEC extern bool orte_report_child_jobs_separately;
|
||||
ORTE_DECLSPEC extern struct timeval orte_child_time_to_exit;
|
||||
@ -695,6 +698,9 @@ ORTE_DECLSPEC extern char *orte_forward_envars;
|
||||
/* preload binaries */
|
||||
ORTE_DECLSPEC extern bool orte_preload_binaries;
|
||||
|
||||
/* map-reduce mode */
|
||||
ORTE_DECLSPEC extern bool orte_map_reduce;
|
||||
|
||||
/* map stddiag output to stderr so it isn't forwarded to mpirun */
|
||||
ORTE_DECLSPEC extern bool orte_map_stddiag_to_stderr;
|
||||
|
||||
|
@ -35,7 +35,8 @@ SUBDIRS += \
|
||||
tools/wrappers \
|
||||
tools/orte-top \
|
||||
tools/orte-info \
|
||||
tools/orte-migrate
|
||||
tools/orte-migrate \
|
||||
tools/mapreduce
|
||||
|
||||
DIST_SUBDIRS += \
|
||||
tools/orte-checkpoint \
|
||||
@ -47,5 +48,6 @@ DIST_SUBDIRS += \
|
||||
tools/wrappers \
|
||||
tools/orte-top \
|
||||
tools/orte-info \
|
||||
tools/orte-migrate
|
||||
tools/orte-migrate \
|
||||
tools/mapreduce
|
||||
|
||||
|
40
orte/tools/mapreduce/Makefile.am
Обычный файл
40
orte/tools/mapreduce/Makefile.am
Обычный файл
@ -0,0 +1,40 @@
|
||||
#
|
||||
# Copyright (c) 2012 Los Alamos National Security, LLC. All rights
|
||||
# reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
include $(top_srcdir)/Makefile.man-page-rules
|
||||
|
||||
man_pages = mapreduce.1
|
||||
EXTRA_DIST = $(man_pages:.1=.1in)
|
||||
|
||||
if !ORTE_DISABLE_FULL_SUPPORT
|
||||
if OMPI_INSTALL_BINARIES
|
||||
|
||||
bin_PROGRAMS = mapreduce
|
||||
|
||||
nodist_man_MANS = $(man_pages)
|
||||
|
||||
# Ensure that the man pages are rebuilt if the opal_config.h file
|
||||
# changes; a "good enough" way to know if configure was run again (and
|
||||
# therefore the release date or version may have changed)
|
||||
$(nodist_man_MANS): $(top_builddir)/opal/include/opal_config.h
|
||||
|
||||
dist_pkgdata_DATA = help-mapreduce.txt
|
||||
|
||||
endif # OMPI_INSTALL_BINARIES
|
||||
|
||||
mapreduce_SOURCES = \
|
||||
mapreduce.c
|
||||
|
||||
mapreduce_LDADD = $(top_builddir)/orte/libopen-rte.la
|
||||
|
||||
endif # !ORTE_DISABLE_FULL_SUPPORT
|
||||
|
||||
distclean-local:
|
||||
rm -f $(man_pages)
|
627
orte/tools/mapreduce/help-mapreduce.txt
Обычный файл
627
orte/tools/mapreduce/help-mapreduce.txt
Обычный файл
@ -0,0 +1,627 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
# This is the US/English general help file for Open RTE's orterun.
|
||||
#
|
||||
[orterun:init-failure]
|
||||
Open RTE was unable to initialize properly. The error occured while
|
||||
attempting to %s. Returned value %d instead of ORTE_SUCCESS.
|
||||
[orterun:usage]
|
||||
%s (%s) %s
|
||||
|
||||
Usage: %s [OPTION]... [PROGRAM]...
|
||||
Start the given program using Open RTE
|
||||
|
||||
%s
|
||||
|
||||
Report bugs to %s
|
||||
[orterun:version]
|
||||
%s (%s) %s
|
||||
|
||||
Report bugs to %s
|
||||
[orterun:allocate-resources]
|
||||
%s was unable to allocate enough resources to start your application.
|
||||
This might be a transient error (too many nodes in the cluster were
|
||||
unavailable at the time of the request) or a permenant error (you
|
||||
requsted more nodes than exist in your cluster).
|
||||
|
||||
While probably only useful to Open RTE developers, the error returned
|
||||
was %d.
|
||||
[orterun:error-spawning]
|
||||
%s was unable to start the specified application. An attempt has been
|
||||
made to clean up all processes that did start. The error returned was
|
||||
%d.
|
||||
[orterun:appfile-not-found]
|
||||
Unable to open the appfile:
|
||||
|
||||
%s
|
||||
|
||||
Double check that this file exists and is readable.
|
||||
[orterun:executable-not-specified]
|
||||
No executable was specified on the %s command line.
|
||||
|
||||
Aborting.
|
||||
[orterun:multi-apps-and-zero-np]
|
||||
%s found multiple applications specified on the command line, with
|
||||
at least one that failed to specify the number of processes to execute.
|
||||
When specifying multiple applications, you must specify how many processes
|
||||
of each to launch via the -np argument.
|
||||
[orterun:nothing-to-do]
|
||||
%s could not find anything to do.
|
||||
|
||||
It is possible that you forgot to specify how many processes to run
|
||||
via the "-np" argument.
|
||||
[orterun:call-failed]
|
||||
%s encountered a %s call failure. This should not happen, and
|
||||
usually indicates an error within the operating system itself.
|
||||
Specifically, the following error occurred:
|
||||
|
||||
%s
|
||||
|
||||
The only other available information that may be helpful is the errno
|
||||
that was returned: %d.
|
||||
[orterun:environ]
|
||||
%s was unable to set
|
||||
%s = %s
|
||||
in the environment. Returned value %d instead of ORTE_SUCCESS.
|
||||
[orterun:precondition]
|
||||
%s was unable to precondition transports
|
||||
Returned value %d instead of ORTE_SUCCESS.
|
||||
[orterun:attr-failed]
|
||||
%s was unable to define an attribute
|
||||
Returned value %d instead of ORTE_SUCCESS.
|
||||
#
|
||||
[orterun:proc-ordered-abort]
|
||||
%s has exited due to process rank %lu with PID %lu on
|
||||
node %s calling "abort". This may have caused other processes
|
||||
in the application to be terminated by signals sent by %s
|
||||
(as reported here).
|
||||
#
|
||||
[orterun:proc-exit-no-sync]
|
||||
%s has exited due to process rank %lu with PID %lu on
|
||||
node %s exiting improperly. There are three reasons this could occur:
|
||||
|
||||
1. this process did not call "init" before exiting, but others in
|
||||
the job did. This can cause a job to hang indefinitely while it waits
|
||||
for all processes to call "init". By rule, if one process calls "init",
|
||||
then ALL processes must call "init" prior to termination.
|
||||
|
||||
2. this process called "init", but exited without calling "finalize".
|
||||
By rule, all processes that call "init" MUST call "finalize" prior to
|
||||
exiting or it will be considered an "abnormal termination"
|
||||
|
||||
3. this process called "MPI_Abort" or "orte_abort" and the mca parameter
|
||||
orte_create_session_dirs is set to false. In this case, the run-time cannot
|
||||
detect that the abort call was an abnormal termination. Hence, the only
|
||||
error message you will receive is this one.
|
||||
|
||||
This may have caused other processes in the application to be
|
||||
terminated by signals sent by %s (as reported here).
|
||||
|
||||
You can avoid this message by specifying -quiet on the %s command line.
|
||||
|
||||
#
|
||||
[orterun:proc-exit-no-sync-unknown]
|
||||
%s has exited due to a process exiting without calling "finalize",
|
||||
but has no info as to the process that caused that situation. This
|
||||
may have caused other processes in the application to be
|
||||
terminated by signals sent by %s (as reported here).
|
||||
#
|
||||
[orterun:proc-aborted]
|
||||
%s noticed that process rank %lu with PID %lu on node %s exited on signal %d.
|
||||
#
|
||||
[orterun:proc-aborted-unknown]
|
||||
%s noticed that the job aborted, but has no info as to the process
|
||||
that caused that situation.
|
||||
#
|
||||
[orterun:proc-aborted-signal-unknown]
|
||||
%s noticed that the job aborted by signal, but has no info as
|
||||
to the process that caused that situation.
|
||||
#
|
||||
[orterun:proc-aborted-strsignal]
|
||||
%s noticed that process rank %lu with PID %lu on node %s exited on signal %d (%s).
|
||||
#
|
||||
[orterun:abnormal-exit]
|
||||
WARNING: %s has exited before it received notification that all
|
||||
started processes had terminated. You should double check and ensure
|
||||
that there are no runaway processes still executing.
|
||||
#
|
||||
[orterun:sigint-while-processing]
|
||||
WARNING: %s is in the process of killing a job, but has detected an
|
||||
interruption (probably control-C).
|
||||
|
||||
It is dangerous to interrupt %s while it is killing a job (proper
|
||||
termination may not be guaranteed). Hit control-C again within 1
|
||||
second if you really want to kill %s immediately.
|
||||
#
|
||||
[orterun:double-prefix]
|
||||
Both a prefix was supplied to %s and the absolute path to %s was
|
||||
given:
|
||||
|
||||
Prefix: %s
|
||||
Path: %s
|
||||
|
||||
Only one should be specified to avoid potential version
|
||||
confusion. Operation will continue, but the -prefix option will be
|
||||
used. This is done to allow you to select a different prefix for
|
||||
the backend computation nodes than used on the frontend for %s.
|
||||
#
|
||||
[orterun:app-prefix-conflict]
|
||||
Both a prefix or absolute path was given for %s, and a different
|
||||
prefix provided for the first app_context:
|
||||
|
||||
Mpirun prefix: %s
|
||||
App prefix: %s
|
||||
|
||||
Only one should be specified to avoid potential version
|
||||
confusion. Operation will continue, but the applicaton's prefix
|
||||
option will be ignored.
|
||||
#
|
||||
[orterun:empty-prefix]
|
||||
A prefix was supplied to %s that only contained slashes.
|
||||
|
||||
This is a fatal error; %s will now abort. No processes were launched.
|
||||
#
|
||||
[debugger-mca-param-not-found]
|
||||
Internal error -- the orte_base_user_debugger MCA parameter was not able to
|
||||
be found. Please contact the Open RTE developers; this should not
|
||||
happen.
|
||||
#
|
||||
[debugger-orte_base_user_debugger-empty]
|
||||
The MCA parameter "orte_base_user_debugger" was empty, indicating that
|
||||
no user-level debuggers have been defined. Please set this MCA
|
||||
parameter to a value and try again.
|
||||
#
|
||||
[debugger-not-found]
|
||||
A suitable debugger could not be found in your PATH. Check the values
|
||||
specified in the orte_base_user_debugger MCA parameter for the list of
|
||||
debuggers that was searched.
|
||||
#
|
||||
[debugger-exec-failed]
|
||||
%s was unable to launch the specified debugger. This is what was
|
||||
launched:
|
||||
|
||||
%s
|
||||
|
||||
Things to check:
|
||||
|
||||
- Ensure that the debugger is installed properly
|
||||
- Ensure that the "%s" executable is in your path
|
||||
- Ensure that any required licenses are available to run the debugger
|
||||
#
|
||||
[orterun:sys-limit-pipe]
|
||||
%s was unable to launch the specified application as it encountered an error:
|
||||
|
||||
Error: system limit exceeded on number of pipes that can be open
|
||||
Node: %s
|
||||
|
||||
when attempting to start process rank %lu.
|
||||
|
||||
This can be resolved by setting the mca parameter opal_set_max_sys_limits to 1,
|
||||
increasing your limit descriptor setting (using limit or ulimit commands),
|
||||
asking the system administrator for that node to increase the system limit, or
|
||||
by rearranging your processes to place fewer of them on that node.
|
||||
#
|
||||
[orterun:sys-limit-sockets]
|
||||
Error: system limit exceeded on number of network connections that can be open
|
||||
|
||||
This can be resolved by setting the mca parameter opal_set_max_sys_limits to 1,
|
||||
increasing your limit descriptor setting (using limit or ulimit commands),
|
||||
or asking the system administrator to increase the system limit.
|
||||
#
|
||||
[orterun:pipe-setup-failure]
|
||||
%s was unable to launch the specified application as it encountered an error:
|
||||
|
||||
Error: pipe function call failed when setting up I/O forwarding subsystem
|
||||
Node: %s
|
||||
|
||||
while attempting to start process rank %lu.
|
||||
#
|
||||
[orterun:sys-limit-children]
|
||||
%s was unable to launch the specified application as it encountered an error:
|
||||
|
||||
Error: system limit exceeded on number of processes that can be started
|
||||
Node: %s
|
||||
|
||||
when attempting to start process rank %lu.
|
||||
|
||||
This can be resolved by either asking the system administrator for that node to
|
||||
increase the system limit, or by rearranging your processes to place fewer of them
|
||||
on that node.
|
||||
#
|
||||
[orterun:failed-term-attrs]
|
||||
%s was unable to launch the specified application as it encountered an error:
|
||||
|
||||
Error: reading tty attributes function call failed while setting up I/O forwarding system
|
||||
Node: %s
|
||||
|
||||
while attempting to start process rank %lu.
|
||||
#
|
||||
[orterun:wdir-not-found]
|
||||
%s was unable to launch the specified application as it could not
|
||||
change to the specified working directory:
|
||||
|
||||
Working directory: %s
|
||||
Node: %s
|
||||
|
||||
while attempting to start process rank %lu.
|
||||
#
|
||||
[orterun:exe-not-found]
|
||||
%s was unable to find the specified executable file, and therefore
|
||||
did not launch the job. This error was first reported for process
|
||||
rank %lu; it may have occurred for other processes as well.
|
||||
|
||||
NOTE: A common cause for this error is misspelling a %s command
|
||||
line parameter option (remember that %s interprets the first
|
||||
unrecognized command line token as the executable).
|
||||
|
||||
Node: %s
|
||||
Executable: %s
|
||||
#
|
||||
[orterun:exe-not-accessible]
|
||||
%s was unable to launch the specified application as it could not access
|
||||
or execute an executable:
|
||||
|
||||
Executable: %s
|
||||
Node: %s
|
||||
|
||||
while attempting to start process rank %lu.
|
||||
#
|
||||
[orterun:pipe-read-failure]
|
||||
%s was unable to launch the specified application as it encountered an error:
|
||||
|
||||
Error: reading from a pipe function call failed while spawning a local process
|
||||
Node: %s
|
||||
|
||||
while attempting to start process rank %lu.
|
||||
#
|
||||
[orterun:proc-failed-to-start]
|
||||
%s was unable to start the specified application as it encountered an
|
||||
error:
|
||||
|
||||
Error name: %s
|
||||
Node: %s
|
||||
|
||||
when attempting to start process rank %lu.
|
||||
#
|
||||
[orterun:proc-socket-not-avail]
|
||||
%s was unable to start the specified application as it encountered an
|
||||
error:
|
||||
|
||||
Error name: %s
|
||||
Node: %s
|
||||
|
||||
when attempting to start process rank %lu.
|
||||
#
|
||||
[orterun:proc-failed-to-start-no-status]
|
||||
%s was unable to start the specified application as it encountered an
|
||||
error on node %s. More information may be available above.
|
||||
#
|
||||
[orterun:proc-failed-to-start-no-status-no-node]
|
||||
%s was unable to start the specified application as it encountered an
|
||||
error. More information may be available above.
|
||||
#
|
||||
[debugger requires -np]
|
||||
The number of MPI processes to launch was not specified on the command
|
||||
line.
|
||||
|
||||
The %s debugger requires that you specify a number of MPI processes to
|
||||
launch on the command line via the "-np" command line parameter. For
|
||||
example:
|
||||
|
||||
%s -np 4 %s
|
||||
|
||||
Skipping the %s debugger for now.
|
||||
#
|
||||
[debugger requires executable]
|
||||
The %s debugger requires that you specify an executable on the %s
|
||||
command line; you cannot specify application context files when
|
||||
launching this job in the %s debugger. For example:
|
||||
|
||||
%s -np 4 my_mpi_executable
|
||||
|
||||
Skipping the %s debugger for now.
|
||||
#
|
||||
[debugger only accepts single app]
|
||||
The %s debugger only accepts SPMD-style launching; specifying an
|
||||
MPMD-style launch (with multiple applications separated via ':') is
|
||||
not permitted.
|
||||
|
||||
Skipping the %s debugger for now.
|
||||
#
|
||||
[orterun:daemon-died-during-execution]
|
||||
%s has detected that a required daemon terminated during execution
|
||||
of the application with a non-zero status. This is a fatal error.
|
||||
A best-effort attempt has been made to cleanup. However, it is
|
||||
-strongly- recommended that you execute the orte-clean utility
|
||||
to ensure full cleanup is accomplished.
|
||||
#
|
||||
[orterun:no-orted-object-exit]
|
||||
%s was unable to determine the status of the daemons used to
|
||||
launch this application. Additional manual cleanup may be required.
|
||||
Please refer to the "orte-clean" tool for assistance.
|
||||
#
|
||||
[orterun:unclean-exit]
|
||||
%s was unable to cleanly terminate the daemons on the nodes shown
|
||||
below. Additional manual cleanup may be required - please refer to
|
||||
the "orte-clean" tool for assistance.
|
||||
#
|
||||
[orterun:event-def-failed]
|
||||
%s was unable to define an event required for proper operation of
|
||||
the system. The reason for this error was:
|
||||
|
||||
Error: %s
|
||||
|
||||
Please report this to the Open MPI mailing list users@open-mpi.org.
|
||||
#
|
||||
[orterun:ompi-server-filename-bad]
|
||||
%s was unable to parse the filename where contact info for the
|
||||
ompi-server was to be found. The option we were given was:
|
||||
|
||||
--ompi-server %s
|
||||
|
||||
This appears to be missing the required ':' following the
|
||||
keyword "file". Please remember that the correct format for this
|
||||
command line option is:
|
||||
|
||||
--ompi-server file:path-to-file
|
||||
|
||||
where path-to-file can be either relative to the cwd or absolute.
|
||||
#
|
||||
[orterun:ompi-server-filename-missing]
|
||||
%s was unable to parse the filename where contact info for the
|
||||
ompi-server was to be found. The option we were given was:
|
||||
|
||||
--ompi-server %s
|
||||
|
||||
This appears to be missing a filename following the ':'. Please
|
||||
remember that the correct format for this command line option is:
|
||||
|
||||
--ompi-server file:path-to-file
|
||||
|
||||
where path-to-file can be either relative to the cwd or absolute.
|
||||
#
|
||||
[orterun:ompi-server-filename-access]
|
||||
%s was unable to access the filename where contact info for the
|
||||
ompi-server was to be found. The option we were given was:
|
||||
|
||||
--ompi-server %s
|
||||
|
||||
Please remember that the correct format for this command line option is:
|
||||
|
||||
--ompi-server file:path-to-file
|
||||
|
||||
where path-to-file can be either relative to the cwd or absolute, and that
|
||||
you must have read access permissions to that file.
|
||||
#
|
||||
[orterun:ompi-server-file-bad]
|
||||
%s was unable to read the ompi-server's contact info from the
|
||||
given filename. The filename we were given was:
|
||||
|
||||
FILE: %s
|
||||
|
||||
Please remember that the correct format for this command line option is:
|
||||
|
||||
--ompi-server file:path-to-file
|
||||
|
||||
where path-to-file can be either relative to the cwd or absolute, and that
|
||||
the file must have a single line in it that contains the Open MPI
|
||||
uri for the ompi-server. Note that this is *not* a standard uri, but
|
||||
a special format used internally by Open MPI for communications. It can
|
||||
best be generated by simply directing the ompi-server to put its
|
||||
uri in a file, and then giving %s that filename.
|
||||
[orterun:multiple-hostfiles]
|
||||
Error: More than one hostfile was passed for a single application
|
||||
context, which is not supported at this time.
|
||||
#
|
||||
[orterun:conflicting-params]
|
||||
%s has detected multiple instances of an MCA param being specified on
|
||||
the command line, with conflicting values:
|
||||
|
||||
MCA param: %s
|
||||
Value 1: %s
|
||||
Value 2: %s
|
||||
|
||||
This MCA param does not support multiple values, and the system is unable
|
||||
to identify which value was intended. If this was done in error, please
|
||||
re-issue the command with only one value. You may wish to review the
|
||||
output from ompi_info for guidance on accepted values for this param.
|
||||
|
||||
[orterun:server-not-found]
|
||||
%s was instructed to wait for the requested ompi-server, but was unable to
|
||||
establish contact with the server during the specified wait time:
|
||||
|
||||
Server uri: %s
|
||||
Timeout time: %ld
|
||||
|
||||
Error received: %s
|
||||
|
||||
Please check to ensure that the requested server matches the actual server
|
||||
information, and that the server is in operation.
|
||||
#
|
||||
[orterun:ompi-server-pid-bad]
|
||||
%s was unable to parse the PID of the %s to be used as the ompi-server.
|
||||
The option we were given was:
|
||||
|
||||
--ompi-server %s
|
||||
|
||||
Please remember that the correct format for this command line option is:
|
||||
|
||||
--ompi-server PID:pid-of-%s
|
||||
|
||||
where PID can be either "PID" or "pid".
|
||||
#
|
||||
[orterun:ompi-server-could-not-get-hnp-list]
|
||||
%s was unable to search the list of local %s contact files to find the
|
||||
specified pid. You might check to see if your local session directory
|
||||
is available and that you have read permissions on the top of that
|
||||
directory tree.
|
||||
#
|
||||
[orterun:ompi-server-pid-not-found]
|
||||
%s was unable to find an %s with the specified pid of %d that was to
|
||||
be used as the ompi-server. The option we were given was:
|
||||
|
||||
--ompi-server %s
|
||||
|
||||
Please remember that the correct format for this command line option is:
|
||||
|
||||
--ompi-server PID:pid-of-%s
|
||||
|
||||
where PID can be either "PID" or "pid".
|
||||
#
|
||||
[orterun:write_file]
|
||||
%s was unable to open a file to printout %s as requested. The file
|
||||
name given was:
|
||||
|
||||
File: %s
|
||||
#
|
||||
[orterun:multiple-paffinity-schemes]
|
||||
Multiple processor affinity schemes were specified (can only specify
|
||||
one):
|
||||
|
||||
Slot list: %s
|
||||
opal_paffinity_alone: true
|
||||
|
||||
Please specify only the one desired method.
|
||||
#
|
||||
[orterun:slot-list-failed]
|
||||
We were unable to successfully process/set the requested processor
|
||||
affinity settings:
|
||||
|
||||
Specified slot list: %s
|
||||
Error: %s
|
||||
|
||||
This could mean that a non-existent processor was specified, or
|
||||
that the specification had improper syntax.
|
||||
#
|
||||
[orterun:invalid-node-rank]
|
||||
An invalid node rank was obtained - this is probably something
|
||||
that should be reported to the OMPI developers.
|
||||
#
|
||||
[orterun:invalid-local-rank]
|
||||
An invalid local rank was obtained - this is probably something
|
||||
that should be reported to the OMPI developers.
|
||||
#
|
||||
[orterun:invalid-phys-cpu]
|
||||
An invalid physical processor id was returned when attempting to
|
||||
set processor affinity - please check to ensure that your system
|
||||
supports such functionality. If so, then this is probably something
|
||||
that should be reported to the OMPI developers.
|
||||
#
|
||||
[orterun:failed-set-paff]
|
||||
An attempt to set processor affinity has failed - please check to
|
||||
ensure that your system supports such functionality. If so, then
|
||||
this is probably something that should be reported to the OMPI
|
||||
developers.
|
||||
#
|
||||
[orterun:topo-not-supported]
|
||||
An attempt was made to bind a process to a specific hardware topology
|
||||
mapping (e.g., binding to a socket) but the operating system does not
|
||||
support such topology-aware actions. Talk to your local system
|
||||
administrator to find out if your system can support topology-aware
|
||||
functionality (e.g., Linux Kernels newer than v2.6.18).
|
||||
|
||||
Systems that do not support processor topology-aware functionality
|
||||
cannot use "bind to socket" and other related functionality.
|
||||
|
||||
Local host: %s
|
||||
Action attempted: %s %s
|
||||
Application name: %s
|
||||
#
|
||||
[orterun:binding-not-avail]
|
||||
A request to bind the processes if the operating system supports such
|
||||
an operation was made, but the OS does not support this operation:
|
||||
|
||||
Local host: %s
|
||||
Action requested: %s
|
||||
Application name: %s
|
||||
|
||||
Because the request was made on an "if-available" basis, the job was
|
||||
launched without taking the requested action. If this is not the
|
||||
desired behavior, talk to your local system administrator to find out
|
||||
if your system can support the requested action.
|
||||
#
|
||||
[orterun:not-enough-resources]
|
||||
Not enough %s were found on the local host to meet the requested
|
||||
binding action:
|
||||
|
||||
Local host: %s
|
||||
Action requested: %s
|
||||
Application name: %s
|
||||
|
||||
Please revise the request and try again.
|
||||
#
|
||||
[orterun:paffinity-missing-module]
|
||||
A request to bind processes was made, but no paffinity module
|
||||
was found:
|
||||
|
||||
Local host: %s
|
||||
|
||||
This is potentially a configuration. You can rerun your job without
|
||||
requesting binding, or check the configuration.
|
||||
#
|
||||
[orterun:invalid-slot-list-range]
|
||||
A slot list was provided that exceeds the boundaries on available
|
||||
resources:
|
||||
|
||||
Local host: %s
|
||||
Slot list: %s
|
||||
|
||||
Please check your boundaries and try again.
|
||||
#
|
||||
[orterun:proc-comm-failed]
|
||||
A critical communication path was lost to:
|
||||
|
||||
My name: %s
|
||||
Process name: %s
|
||||
Node: %s
|
||||
#
|
||||
[orterun:proc-mem-exceeded]
|
||||
A process exceeded memory limits:
|
||||
|
||||
Process name: %s
|
||||
Node: %s
|
||||
#
|
||||
[orterun:proc-stalled]
|
||||
One or more processes appear to have stalled - a monitored file
|
||||
failed to show the required activity.
|
||||
#
|
||||
[orterun:proc-sensor-exceeded]
|
||||
One or more processes have exceeded a specified sensor limit, but
|
||||
no further info is available.
|
||||
#
|
||||
[orterun:proc-called-abort]
|
||||
%s detected that one or more processes called %s_abort, thus causing
|
||||
the job to be terminated.
|
||||
#
|
||||
[orterun:proc-heartbeat-failed]
|
||||
%s failed to receive scheduled heartbeat communications from a remote
|
||||
process:
|
||||
|
||||
Process name: %s
|
||||
Node: %s
|
||||
#
|
||||
[orterun:non-zero-exit]
|
||||
%s detected that one or more processes exited with non-zero status, thus causing
|
||||
the job to be terminated. The first process to do so was:
|
||||
|
||||
Process name: %s
|
||||
Exit code: %d
|
||||
#
|
1293
orte/tools/mapreduce/mapreduce.1in
Обычный файл
1293
orte/tools/mapreduce/mapreduce.1in
Обычный файл
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
2138
orte/tools/mapreduce/mapreduce.c
Обычный файл
2138
orte/tools/mapreduce/mapreduce.c
Обычный файл
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@ -621,3 +621,12 @@ the job to be terminated. The first process to do so was:
|
||||
Process name: %s
|
||||
Exit code: %d
|
||||
#
|
||||
[orterun:unrecognized-mr-type]
|
||||
%s does not recognize the type of job. This should not happen and
|
||||
indicates an ORTE internal problem.
|
||||
#
|
||||
[multiple-combiners]
|
||||
More than one combiner was specified. The combiner takes the output
|
||||
from the final reducer in each chain to produce a single, combined
|
||||
result. Thus, there can only be one combiner for a job. Please
|
||||
review your command line and try again.
|
||||
|
@ -203,6 +203,8 @@ const char *orte_job_state_to_str(orte_job_state_t state)
|
||||
return "UNDEFINED";
|
||||
case ORTE_JOB_STATE_INIT:
|
||||
return "PENDING INIT";
|
||||
case ORTE_JOB_STATE_INIT_COMPLETE:
|
||||
return "INIT_COMPLETE";
|
||||
case ORTE_JOB_STATE_ALLOCATE:
|
||||
return "PENDING ALLOCATION";
|
||||
case ORTE_JOB_STATE_MAP:
|
||||
|
@ -9,6 +9,9 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -1103,13 +1106,15 @@ int orte_util_decode_daemon_pidmap(opal_byte_object_t *bo)
|
||||
#endif
|
||||
orte_std_cntr_t n;
|
||||
opal_buffer_t buf;
|
||||
int rc, j;
|
||||
int rc, j, k;
|
||||
orte_job_t *jdata;
|
||||
orte_proc_t *proc, *pptr;
|
||||
orte_node_t *node;
|
||||
orte_node_t *node, *nptr;
|
||||
orte_proc_state_t *states=NULL;
|
||||
orte_app_idx_t *app_idx=NULL;
|
||||
int32_t *restarts=NULL;
|
||||
orte_job_map_t *map;
|
||||
bool found;
|
||||
|
||||
/* xfer the byte object to a buffer for unpacking */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
@ -1212,6 +1217,11 @@ int orte_util_decode_daemon_pidmap(opal_byte_object_t *bo)
|
||||
}
|
||||
|
||||
/* xfer the data */
|
||||
map = jdata->map;
|
||||
if (NULL == map) {
|
||||
jdata->map = OBJ_NEW(orte_job_map_t);
|
||||
map = jdata->map;
|
||||
}
|
||||
for (i=0; i < num_procs; i++) {
|
||||
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, i))) {
|
||||
proc = OBJ_NEW(orte_proc_t);
|
||||
@ -1231,6 +1241,21 @@ int orte_util_decode_daemon_pidmap(opal_byte_object_t *bo)
|
||||
OBJ_RELEASE(pptr);
|
||||
opal_pointer_array_set_item(proc->node->procs, j, NULL);
|
||||
proc->node->num_procs--;
|
||||
if (0 == proc->node->num_procs) {
|
||||
/* remove node from the map */
|
||||
for (k=0; k < map->nodes->size; k++) {
|
||||
if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(map->nodes, k))) {
|
||||
continue;
|
||||
}
|
||||
if (nptr == proc->node) {
|
||||
/* maintain accounting */
|
||||
OBJ_RELEASE(nptr);
|
||||
opal_pointer_array_set_item(map->nodes, k, NULL);
|
||||
map->num_nodes--;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -1242,6 +1267,21 @@ int orte_util_decode_daemon_pidmap(opal_byte_object_t *bo)
|
||||
node = OBJ_NEW(orte_node_t);
|
||||
opal_pointer_array_set_item(orte_node_pool, nodes[i], node);
|
||||
}
|
||||
/* see if this node is already in the map */
|
||||
found = false;
|
||||
for (j=0; j < map->nodes->size; j++) {
|
||||
if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(map->nodes, j))) {
|
||||
continue;
|
||||
}
|
||||
if (nptr == node) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
opal_pointer_array_add(map->nodes, node);
|
||||
map->num_nodes++;
|
||||
}
|
||||
/* add the node to the proc */
|
||||
OBJ_RETAIN(node);
|
||||
proc->node = node;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user