1
1

Roll in the revamped IOF subsystem. Per the devel mailing list email, this is a complete rewrite of the iof framework designed to simplify the code for maintainability, and to support features we had planned to do, but were too difficult to implement in the old code. Specifically, the new code:

1. completely and cleanly separates responsibilities between the HNP, orted, and tool components.

2. removes all wireup messaging during launch and shutdown.

3. maintains flow control for stdin to avoid large-scale consumption of memory by orteds when large input files are forwarded. This is done using an xon/xoff protocol.

4. enables specification of stdin recipients on the mpirun cmd line. Allowed options include rank, "all", or "none". Default is rank 0.

5. creates a new MPI_Info key "ompi_stdin_target" that supports the above options for child jobs. Default is "none".

6. adds a new tool "orte-iof" that can connect to a running mpirun and display the output. Cmd line options allow selection of any combination of stdout, stderr, and stddiag. Default is stdout.

7. adds a new mpirun and orte-iof cmd line option "tag-output" that will tag each line of output with process name and stream ident. For example, "[1,0]<stdout>this is output"

This is not intended for the 1.3 release as it is a major change requiring considerable soak time.

This commit was SVN r19767.
Этот коммит содержится в:
Ralph Castain 2008-10-18 00:00:49 +00:00
родитель 4858c9b43c
Коммит 6e5d844c36
91 изменённых файлов: 4209 добавлений и 4336 удалений

Просмотреть файл

@ -1315,6 +1315,7 @@ AC_CONFIG_FILES([
orte/tools/wrappers/ortecc-wrapper-data.txt
orte/tools/wrappers/ortec++-wrapper-data.txt
orte/tools/orte-checkpoint/Makefile
orte/tools/orte-iof/Makefile
orte/tools/orte-restart/Makefile
orte/tools/orte-ps/Makefile
orte/tools/orte-clean/Makefile

Просмотреть файл

@ -501,6 +501,7 @@ static int spawn(int count, char **array_of_commands,
char cwd[OMPI_PATH_MAX];
char host[OMPI_PATH_MAX]; /*** should define OMPI_HOST_MAX ***/
char prefix[OMPI_PATH_MAX];
char stdin_target[OMPI_PATH_MAX];
orte_job_t *jdata;
orte_app_context_t *app;
@ -665,6 +666,20 @@ static int spawn(int count, char **array_of_commands,
if (non_mpi) {
jdata->controls |= ORTE_JOB_CONTROL_NON_ORTE_JOB;
}
/* see if user specified what to do with stdin - defaults to
* not forwarding stdin to child processes
*/
ompi_info_get (array_of_info[i], "ompi_stdin_target", valuelen, stdin_target, &flag);
if ( flag ) {
if (0 == strcmp(stdin_target, "all")) {
jdata->stdin_target = ORTE_VPID_WILDCARD;
} else if (0 == strcmp(stdin_target, "none")) {
jdata->stdin_target = ORTE_VPID_INVALID;
} else {
jdata->stdin_target = strtoul(stdin_target, NULL, 10);
}
}
}
/* default value: If the user did not tell us where to look for the

Просмотреть файл

@ -159,6 +159,9 @@ typedef void* orte_iov_base_ptr_t;
/* GRPCOMM types */
#define ORTE_GRPCOMM_MODE (OPAL_DSS_ID_DYNAMIC + 19)
/* IOF types */
#define ORTE_IOF_TAG (OPAL_DSS_ID_DYNAMIC + 20)
#endif /* !ORTE_DISABLE_FULL_SUPPORT */
#endif

Просмотреть файл

@ -226,7 +226,7 @@ int orte_ess_base_app_finalize(void)
orte_rml_base_close();
orte_session_dir_finalize(ORTE_PROC_MY_NAME);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -190,8 +190,7 @@ int orte_ess_base_orted_setup(void)
goto error;
}
/*
* setup I/O forwarding system - must come after we init routes */
/* setup I/O forwarding system - must come after we init routes */
if (ORTE_SUCCESS != (ret = orte_iof_base_open())) {
ORTE_ERROR_LOG(ret);
error = "orte_iof_base_open";
@ -303,7 +302,7 @@ int orte_ess_base_orted_finalize(void)
orte_wait_finalize();
orte_iof_base_close();
/* finalize selected modules */
if (plm_in_use) {
orte_plm_base_close();

Просмотреть файл

@ -36,6 +36,7 @@
#include "orte/mca/rml/base/base.h"
#include "orte/mca/routed/base/base.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/iof/base/base.h"
#if OPAL_ENABLE_FT == 1
#include "orte/mca/snapc/base/base.h"
#endif
@ -105,6 +106,27 @@ int orte_ess_base_tool_setup(void)
goto error;
}
/* setup the routed info - the selected routed component
* will know what to do.
*/
if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, NULL))) {
ORTE_ERROR_LOG(ret);
error = "orte_routed.init_routes";
goto error;
}
/* setup I/O forwarding system - must come after we init routes */
if (ORTE_SUCCESS != (ret = orte_iof_base_open())) {
ORTE_ERROR_LOG(ret);
error = "orte_iof_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_iof_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_iof_base_select";
goto error;
}
#if OPAL_ENABLE_FT == 1
/*
* Setup the SnapC
@ -140,6 +162,7 @@ int orte_ess_base_tool_finalize(void)
* a very small subset of orte_init - ensure that
* I only back those elements out
*/
orte_iof_base_close();
orte_routed_base_close();
orte_rml_base_close();

Просмотреть файл

@ -73,18 +73,24 @@
static int rte_init(char flags);
static int rte_finalize(void);
static void rte_abort(int status, bool report) __opal_attribute_noreturn__;
static bool proc_is_local(orte_process_name_t *proc);
static char* proc_get_hostname(orte_process_name_t *proc);
static uint32_t proc_get_arch(orte_process_name_t *proc);
static orte_local_rank_t proc_get_local_rank(orte_process_name_t *proc);
static orte_node_rank_t proc_get_node_rank(orte_process_name_t *proc);
static int update_arch(orte_process_name_t *proc, uint32_t arch);
orte_ess_base_module_t orte_ess_hnp_module = {
rte_init,
rte_finalize,
rte_abort,
NULL, /* don't need a proc_is_local fn */
NULL, /* don't need a proc_get_hostname fn */
NULL, /* don't need a proc_get_arch fn */
NULL, /* don't need a proc_get_local_rank fn */
NULL, /* don't need a proc_get_node_rank fn */
NULL, /* don't need to update_nidmap */
proc_is_local,
proc_get_hostname,
proc_get_arch,
proc_get_local_rank,
proc_get_node_rank,
update_arch,
NULL /* ft_event */
};
@ -341,8 +347,7 @@ static int rte_init(char flags)
goto error;
}
/*
* setup I/O forwarding system - must come after we init routes */
/* setup I/O forwarding system - must come after we init routes */
if (ORTE_SUCCESS != (ret = orte_iof_base_open())) {
ORTE_ERROR_LOG(ret);
error = "orte_iof_base_open";
@ -510,3 +515,143 @@ static void rte_abort(int status, bool report)
exit(status);
}
static bool proc_is_local(orte_process_name_t *proc)
{
orte_node_t **nodes;
orte_proc_t **procs;
orte_vpid_t i;
/* the HNP is always on node=0 of the node array */
nodes = (orte_node_t**)orte_node_pool->addr;
procs = (orte_proc_t**)nodes[0]->procs->addr;
/* cycle through the array of local procs */
for (i=0; i < nodes[0]->num_procs; i++) {
if (procs[i]->name.jobid == proc->jobid &&
procs[i]->name.vpid == proc->vpid) {
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:hnp: proc %s is LOCAL",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc)));
return true;
}
}
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:env: proc %s is REMOTE",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc)));
return false;
}
static orte_proc_t* find_proc(orte_process_name_t *proc)
{
orte_job_t *jdata;
orte_proc_t **procs;
if (NULL == (jdata = orte_get_job_data_object(proc->jobid))) {
return NULL;
}
procs = (orte_proc_t**)jdata->procs->addr;
if (jdata->num_procs < proc->vpid) {
return NULL;
}
return procs[proc->vpid];
}
static char* proc_get_hostname(orte_process_name_t *proc)
{
orte_proc_t *pdata;
if (NULL == (pdata = find_proc(proc))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return NULL;
}
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:hnp: proc %s is on host %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
pdata->node->name));
return pdata->node->name;
}
static uint32_t proc_get_arch(orte_process_name_t *proc)
{
orte_proc_t *pdata;
if (NULL == (pdata = find_proc(proc))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return 0;
}
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:hnp: proc %s has arch %0x",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
pdata->node->arch));
return pdata->node->arch;
}
static int update_arch(orte_process_name_t *proc, uint32_t arch)
{
orte_proc_t *pdata;
if (NULL == (pdata = find_proc(proc))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:hnp: updating proc %s to arch %0x",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
arch));
pdata->node->arch = arch;
return ORTE_SUCCESS;
}
static orte_local_rank_t proc_get_local_rank(orte_process_name_t *proc)
{
orte_proc_t *pdata;
if (NULL == (pdata = find_proc(proc))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return UINT8_MAX;
}
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:hnp: proc %s has local rank %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
(int)pdata->local_rank));
return pdata->local_rank;
}
static orte_node_rank_t proc_get_node_rank(orte_process_name_t *proc)
{
orte_proc_t *pdata;
if (NULL == (pdata = find_proc(proc))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return UINT8_MAX;
}
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:hnp: proc %s has node rank %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
(int)pdata->node_rank));
return pdata->node_rank;
}

Просмотреть файл

@ -25,17 +25,12 @@ libmca_iof_la_SOURCES += \
if !ORTE_DISABLE_FULL_SUPPORT
headers += \
base/iof_base_header.h \
base/iof_base_endpoint.h \
base/iof_base_fragment.h \
base/iof_base_setup.h
libmca_iof_la_SOURCES += \
base/iof_base_close.c \
base/iof_base_select.c \
base/iof_base_flush.c \
base/iof_base_endpoint.c \
base/iof_base_fragment.c \
base/iof_base_output.c \
base/iof_base_setup.c
endif

Просмотреть файл

@ -37,6 +37,9 @@
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_SIGNAL_H
#include <signal.h>
#endif
#include "opal/class/opal_free_list.h"
#include "opal/threads/condition.h"
@ -50,21 +53,150 @@ ORTE_DECLSPEC int orte_iof_base_open(void);
#if !ORTE_DISABLE_FULL_SUPPORT
/*
* Maximum size of single msg
*/
#define ORTE_IOF_BASE_MSG_MAX 1024
#define ORTE_IOF_BASE_TAG_MAX 50
#define ORTE_IOF_BASE_TAGGED_OUT_MAX 2048
#define ORTE_IOF_MAX_INPUT_BUFFERS 1024
typedef struct {
opal_list_item_t super;
bool pending;
opal_event_t ev;
int fd;
opal_list_t outputs;
} orte_iof_write_event_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_iof_write_event_t);
struct orte_iof_base_t {
int iof_output;
opal_list_t iof_components_opened;
bool iof_flush;
opal_list_t iof_endpoints;
opal_mutex_t iof_lock;
opal_condition_t iof_condition;
size_t iof_waiting;
opal_free_list_t iof_fragments;
size_t iof_window_size;
orte_process_name_t iof_service;
int iof_output;
opal_list_t iof_components_opened;
opal_mutex_t iof_write_output_lock;
orte_iof_write_event_t iof_write_stdout;
orte_iof_write_event_t iof_write_stderr;
};
typedef struct orte_iof_base_t orte_iof_base_t;
typedef struct {
opal_list_item_t super;
orte_process_name_t name;
orte_process_name_t daemon;
orte_iof_tag_t tag;
orte_iof_write_event_t wev;
#if OMPI_ENABLE_DEBUG
char *file;
int line;
#endif
} orte_iof_sink_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_iof_sink_t);
typedef struct {
opal_list_item_t super;
orte_process_name_t name;
opal_event_t ev;
orte_iof_tag_t tag;
bool active;
#if OMPI_ENABLE_DEBUG
char *file;
int line;
#endif
} orte_iof_read_event_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_iof_read_event_t);
typedef struct {
opal_list_item_t super;
char data[ORTE_IOF_BASE_TAGGED_OUT_MAX];
int numbytes;
#if OMPI_ENABLE_DEBUG
char *file;
int line;
#endif
} orte_iof_write_output_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_iof_write_output_t);
#if OMPI_ENABLE_DEBUG
#define ORTE_IOF_SINK_DEFINE(snk, nm, fid, tg, wrthndlr, eplist) \
do { \
orte_iof_sink_t *ep; \
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, \
"defining endpoint: %s %d", \
__FILE__, __LINE__)); \
ep = OBJ_NEW(orte_iof_sink_t); \
ep->name.jobid = (nm)->jobid; \
ep->name.vpid = (nm)->vpid; \
ep->tag = (tg); \
ep->wev.fd = (fid); \
opal_event_set(&(ep->wev.ev), ep->wev.fd, \
OPAL_EV_WRITE|OPAL_EV_PERSIST, \
wrthndlr, &(ep->wev)); \
opal_list_append((eplist), &ep->super); \
*(snk) = ep; \
ep->file = strdup(__FILE__); \
ep->line = __LINE__; \
} while(0);
#define ORTE_IOF_READ_EVENT(nm, fid, tg, cbfunc, revlist, actv) \
do { \
orte_iof_read_event_t *rev; \
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, \
"%s defining read event for %s: %s %d", \
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
ORTE_NAME_PRINT((nm)), \
__FILE__, __LINE__)); \
rev = OBJ_NEW(orte_iof_read_event_t); \
rev->name.jobid = (nm)->jobid; \
rev->name.vpid = (nm)->vpid; \
rev->tag = (tg); \
rev->file = strdup(__FILE__); \
rev->line = __LINE__; \
opal_event_set(&rev->ev, (fid), \
OPAL_EV_READ | OPAL_EV_PERSIST, \
(cbfunc), rev); \
if ((actv)) { \
opal_event_add(&rev->ev, 0); \
opal_list_append((revlist), &rev->super); \
} else { \
opal_list_prepend((revlist), &rev->super); \
} \
} while(0);
#else
#define ORTE_IOF_SINK_DEFINE(nm, fid, tg, lcl, eplist) \
do { \
orte_iof_sink_t *ep; \
ep = OBJ_NEW(orte_iof_sink_t); \
ep->name.jobid = (nm)->jobid; \
ep->name.vpid = (nm)->vpid; \
ep->tag = (tg); \
ep->fd = (fid); \
ep->local = (lcl); \
opal_list_append((eplist), &ep->super); \
} while(0);
#define ORTE_IOF_READ_EVENT(nm, fid, tg, cbfunc, revlist, actv) \
do { \
orte_iof_read_event_t *rev; \
rev = OBJ_NEW(orte_iof_read_event_t); \
rev->name.jobid = (nm)->jobid; \
rev->name.vpid = (nm)->vpid; \
rev->tag = (tg); \
opal_event_set(&rev->ev, (fid), \
OPAL_EV_READ | OPAL_EV_PERSIST, \
(cbfunc), rev); \
if ((actv)) { \
opal_event_add(&rev->ev, 0); \
opal_list_append((revlist), &rev->super); \
} else { \
opal_list_prepend((revlist), $rev->super); \
} \
} while(0);
#endif
ORTE_DECLSPEC int orte_iof_base_close(void);
ORTE_DECLSPEC int orte_iof_base_select(void);
@ -72,6 +204,12 @@ ORTE_DECLSPEC int orte_iof_base_flush(void);
ORTE_DECLSPEC extern orte_iof_base_t orte_iof_base;
/* base functions */
ORTE_DECLSPEC int orte_iof_base_write_output(orte_process_name_t *name, orte_iof_tag_t stream,
unsigned char *data, int numbytes,
orte_iof_write_event_t *channel);
ORTE_DECLSPEC void orte_iof_base_write_handler(int fd, short event, void *cbdata);
#endif /* ORTE_DISABLE_FULL_SUPPORT */
END_C_DECLS

Просмотреть файл

@ -24,47 +24,67 @@
#include "opal/event/event.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "orte/util/proc_info.h"
#include "orte/mca/iof/iof.h"
#include "orte/mca/iof/base/base.h"
#include "orte/mca/iof/base/iof_base_endpoint.h"
int orte_iof_base_close(void)
{
opal_list_item_t* item;
/* We only need to flush if an iof component was successfully
selected */
if (orte_iof_base.iof_flush) {
orte_iof.iof_flush();
orte_iof_base.iof_flush = false;
}
/* finalize component */
if (NULL != orte_iof.iof_finalize) {
orte_iof.iof_finalize();
}
bool dump;
opal_list_item_t *item;
orte_iof_write_output_t *output;
int num_written;
/* shutdown any remaining opened components */
if (0 != opal_list_get_size(&orte_iof_base.iof_components_opened)) {
mca_base_components_close(orte_iof_base.iof_output,
&orte_iof_base.iof_components_opened, NULL);
}
/* final cleanup of resources */
OPAL_THREAD_LOCK(&orte_iof_base.iof_lock);
while((item = opal_list_remove_first(&orte_iof_base.iof_endpoints)) != NULL) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&orte_iof_base.iof_components_opened);
OBJ_DESTRUCT(&orte_iof_base.iof_endpoints);
OBJ_DESTRUCT(&orte_iof_base.iof_condition);
OBJ_DESTRUCT(&orte_iof_base.iof_fragments);
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
OBJ_DESTRUCT(&orte_iof_base.iof_lock);
OPAL_THREAD_LOCK(&orte_iof_base.iof_write_output_lock);
if (!orte_process_info.daemon) {
/* check if anything is still trying to be written out */
if (!opal_list_is_empty(&orte_iof_base.iof_write_stdout.outputs)) {
dump = false;
/* make one last attempt to write this out */
while (NULL != (item = opal_list_remove_first(&orte_iof_base.iof_write_stdout.outputs))) {
output = (orte_iof_write_output_t*)item;
if (!dump) {
num_written = write(orte_iof_base.iof_write_stdout.fd, output->data, output->numbytes);
if (num_written < output->numbytes) {
/* don't retry - just cleanout the list and dump it */
dump = true;
}
}
OBJ_RELEASE(output);
}
}
OBJ_DESTRUCT(&orte_iof_base.iof_write_stdout);
if (!opal_list_is_empty(&orte_iof_base.iof_write_stderr.outputs)) {
dump = false;
/* make one last attempt to write this out */
while (NULL != (item = opal_list_remove_first(&orte_iof_base.iof_write_stderr.outputs))) {
output = (orte_iof_write_output_t*)item;
if (!dump) {
num_written = write(orte_iof_base.iof_write_stderr.fd, output->data, output->numbytes);
if (num_written < output->numbytes) {
/* don't retry - just cleanout the list and dump it */
dump = true;
}
}
OBJ_RELEASE(output);
}
}
OBJ_DESTRUCT(&orte_iof_base.iof_write_stderr);
}
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_write_output_lock);
OBJ_DESTRUCT(&orte_iof_base.iof_write_output_lock);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -1,826 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include <stdlib.h>
#include <string.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <errno.h>
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#else
#ifdef HAVE_SYS_FCNTL_H
#include <sys/fcntl.h>
#endif
#endif
#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif
#ifdef HAVE_SIGNAL_H
#include <signal.h>
#endif /* HAVE_SIGNAL_H */
#include "orte/util/show_help.h"
#include "orte/mca/rml/rml.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/iof/base/base.h"
#include "orte/mca/iof/base/iof_base_endpoint.h"
#include "orte/mca/iof/base/iof_base_fragment.h"
/*
* Globals
*/
static bool sigpipe_event_initialized = false;
static struct opal_event sigpipe_event;
static void sigpipe_signal_callback(int fd, short event, void *arg)
{
/* Do nothing -- the purpose of this handler is so that we don't
die due to SIGPIPE, but we don't need to *do* anything in this
handler. */
}
/**
* Construct/Destructor
*/
static void orte_iof_base_endpoint_construct(orte_iof_base_endpoint_t* endpoint)
{
endpoint->ep_mode = ORTE_IOF_SOURCE; /* default value */
endpoint->ep_seq = 0;
endpoint->ep_ack = 0;
endpoint->ep_fd = -1;
memset(&endpoint->ep_event,0,sizeof(endpoint->ep_event));
OBJ_CONSTRUCT(&endpoint->ep_source_frags, opal_list_t);
OBJ_CONSTRUCT(&endpoint->ep_sink_frags, opal_list_t);
OBJ_CONSTRUCT(&endpoint->ep_callbacks, opal_list_t);
}
static void orte_iof_base_endpoint_destruct(orte_iof_base_endpoint_t* endpoint)
{
if(endpoint->ep_fd >= 0) {
opal_event_del(&endpoint->ep_event);
}
OBJ_DESTRUCT(&endpoint->ep_source_frags);
OBJ_DESTRUCT(&endpoint->ep_sink_frags);
OBJ_DESTRUCT(&endpoint->ep_callbacks);
}
OBJ_CLASS_INSTANCE(
orte_iof_base_endpoint_t,
opal_list_item_t,
orte_iof_base_endpoint_construct,
orte_iof_base_endpoint_destruct);
/**
* Construct/Destructor
*/
static void orte_iof_base_callback_construct(orte_iof_base_callback_t* cb)
{
cb->cb_func = 0;
cb->cb_data = NULL;
}
OBJ_CLASS_INSTANCE(
orte_iof_base_callback_t,
opal_list_item_t,
orte_iof_base_callback_construct,
NULL);
/*
* Callback when non-blocking RML send completes.
*/
static void orte_iof_base_endpoint_send_cb(
int status,
orte_process_name_t* peer,
struct iovec* msg,
int count,
orte_rml_tag_t tag,
void* cbdata)
{
orte_iof_base_frag_t* frag = (orte_iof_base_frag_t*)cbdata;
orte_iof_base_endpoint_t* endpoint = frag->frag_owner;
opal_list_remove_item(&endpoint->ep_source_frags, &frag->super.super);
opal_output_verbose(1, orte_iof_base.iof_output, "iof_base_endpoint: send cb, source_frags list len: %d",
(int) opal_list_get_size(&endpoint->ep_source_frags));
ORTE_IOF_BASE_FRAG_RETURN(frag);
/* Decrement the refcount on the endpoint; matches the RETAIN for
when this frag's send was initiated in
orte_iof_base_endpoint_read_handler() */
OBJ_RELEASE(endpoint);
}
/*
* Callback when data is available on the endpoint to read.
*/
static void orte_iof_base_endpoint_read_handler(int fd, short flags, void *cbdata)
{
orte_iof_base_endpoint_t* endpoint = (orte_iof_base_endpoint_t*)cbdata;
orte_iof_base_frag_t* frag;
orte_iof_base_header_t* hdr;
int rc;
/* allocate a fragment */
ORTE_IOF_BASE_FRAG_ALLOC(frag,rc);
if(NULL == frag) {
/* JMS shouldn't we do something here? */
return;
}
OPAL_THREAD_LOCK(&orte_iof_base.iof_lock);
/* read up to the fragment size */
#if !defined(__WINDOWS__)
rc = read(fd, frag->frag_data, sizeof(frag->frag_data));
#else
{
DWORD readed;
HANDLE handle = (HANDLE)_get_osfhandle(fd);
ReadFile(handle, frag->frag_data, sizeof(frag->frag_data), &readed, NULL);
rc = (int)readed;
}
#endif /* !defined(__WINDOWS__) */
if (rc < 0) {
/* non-blocking, retry */
if (EAGAIN == errno || EINTR == errno) {
ORTE_IOF_BASE_FRAG_RETURN(frag);
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
return;
}
/* Error on the connection */
opal_output_verbose(1, orte_iof_base.iof_output, "iof_base_endpoint: read handler, error on read");
orte_iof_base_endpoint_closed(endpoint);
/* Fall through to send 0 byte message to other side
indicating that the endpoint is now closed. */
rc = 0;
} else if (rc == 0) {
/* peer has closed connection (will fall through to send a 0
byte message, therefore telling the RML side that the fd
side has closed its connection) */
opal_output_verbose(1, orte_iof_base.iof_output, "iof_base_endpoint: read handler, peer closed fd");
orte_iof_base_endpoint_closed(endpoint);
}
/* Do not append the fragment before we know that we have some
data (even a 0 byte mesage is OK -- that indicates that the
file descriptor has closed) */
frag->frag_owner = endpoint;
opal_list_append(&endpoint->ep_source_frags, &frag->super.super);
opal_output_verbose(1, orte_iof_base.iof_output, "iof_base_endpoint: read handler, source_frags list len: %d",
(int) opal_list_get_size(&endpoint->ep_source_frags));
frag->frag_iov[1].iov_len = frag->frag_len = rc;
/* fill in the header */
hdr = &frag->frag_hdr;
hdr->hdr_common.hdr_type = ORTE_IOF_BASE_HDR_MSG;
hdr->hdr_msg.msg_origin = endpoint->ep_origin;
hdr->hdr_msg.msg_proxy = *ORTE_PROC_MY_NAME;
hdr->hdr_msg.msg_tag = endpoint->ep_tag;
hdr->hdr_msg.msg_seq = endpoint->ep_seq;
hdr->hdr_msg.msg_len = frag->frag_len;
ORTE_IOF_BASE_HDR_MSG_HTON(hdr->hdr_msg);
/* if window size has been exceeded - disable forwarding */
endpoint->ep_seq += frag->frag_len;
if (ORTE_IOF_BASE_SEQDIFF(endpoint->ep_seq,endpoint->ep_ack) > orte_iof_base.iof_window_size) {
opal_output_verbose(1, orte_iof_base.iof_output, "iof_base_endpoint read handler: window exceeded -- reading disabled");
opal_event_del(&endpoint->ep_event);
}
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
/* Increment the refcount on the endpoint so that it doesn't get
deleted before the frag */
OBJ_RETAIN(endpoint);
/* start non-blocking RML call to forward received data */
opal_output_verbose(1, orte_iof_base.iof_output, "iof_base_endpoint read handler: sending data to svc");
rc = orte_rml.send_nb(
&orte_iof_base.iof_service,
frag->frag_iov,
2,
ORTE_RML_TAG_IOF_SVC,
0,
orte_iof_base_endpoint_send_cb,
frag);
}
/**
* Callback when the endpoint is available for write.
*/
static void orte_iof_base_endpoint_write_handler(int sd, short flags, void *user)
{
int errno_save;
orte_iof_base_endpoint_t* endpoint = (orte_iof_base_endpoint_t*)user;
/*
* step through the list of queued fragments and attempt to write
* until the output descriptor would block
*/
OPAL_THREAD_LOCK(&orte_iof_base.iof_lock);
while(opal_list_get_size(&endpoint->ep_sink_frags)) {
orte_iof_base_frag_t* frag = (orte_iof_base_frag_t*)opal_list_get_first(&endpoint->ep_sink_frags);
int rc;
/* close connection on zero byte message */
if(frag->frag_len == 0) {
opal_output_verbose(1, orte_iof_base.iof_output, "iof_base_endpoint: write handler, peer closed fd");
orte_iof_base_endpoint_closed(endpoint);
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
return;
}
/* progress pending messages */
rc = write(endpoint->ep_fd, frag->frag_ptr, frag->frag_len);
errno_save = errno;
if (rc < 0) {
if (EAGAIN == errno_save) {
break;
}
if (EINTR == errno_save) {
continue;
}
/* All other errors -- to include sigpipe -- mean that
Something Bad happened and we should abort in
despair. */
opal_output_verbose(1, orte_iof_base.iof_output, "iof_base_endpoint: write handler, error on fd");
orte_iof_base_endpoint_closed(endpoint);
/* Send a ACK-AND-CLOSE back to the service so that it
knows not to wait for any further ACKs */
orte_iof_base_frag_ack(frag, true);
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
return;
}
frag->frag_len -= rc;
frag->frag_ptr += rc;
if(frag->frag_len > 0) {
break;
}
opal_list_remove_item(&endpoint->ep_sink_frags, &frag->super.super);
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
orte_iof_base_frag_ack(frag, false);
OPAL_THREAD_LOCK(&orte_iof_base.iof_lock);
}
/* is there anything left to write? */
if(opal_list_get_size(&endpoint->ep_sink_frags) == 0) {
opal_event_del(&endpoint->ep_event);
if(orte_iof_base.iof_waiting) {
opal_condition_signal(&orte_iof_base.iof_condition);
}
}
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
}
/* return true if we should read stdin from fd, false otherwise */
static bool orte_iof_base_endpoint_stdin_check(int fd)
{
#if !defined(__WINDOWS__) && defined(HAVE_TCGETPGRP)
if( isatty(fd) && (getpgrp() != tcgetpgrp(fd)) ) {
return false;
}
#endif /* !defined(__WINDOWS__) */
return true;
}
static void orte_iof_base_endpoint_stdin_cb(int sd, short flags, void *user)
{
orte_iof_base_endpoint_t* endpoint = (orte_iof_base_endpoint_t*)user;
bool should_process = orte_iof_base_endpoint_stdin_check(endpoint->ep_fd);
if (should_process) {
opal_event_add(&endpoint->ep_event, 0);
} else {
opal_event_del(&endpoint->ep_event);
}
}
/*
* Lookup existing endpoint matching parameters
* supplied to create.
*/
static orte_iof_base_endpoint_t* orte_iof_base_endpoint_lookup(
const orte_process_name_t* proc,
orte_iof_base_mode_t mode,
int tag)
{
opal_list_item_t* item;
for(item = opal_list_get_first(&orte_iof_base.iof_endpoints);
item != opal_list_get_end(&orte_iof_base.iof_endpoints);
item = opal_list_get_next(item)) {
orte_iof_base_endpoint_t* endpoint = (orte_iof_base_endpoint_t*)item;
if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL,proc,&endpoint->ep_origin) &&
endpoint->ep_tag == tag && endpoint->ep_mode == mode) {
OBJ_RETAIN(endpoint);
return endpoint;
}
}
return NULL;
}
/*
* Create a local endpoint.
*/
int orte_iof_base_endpoint_create(
const orte_process_name_t* proc,
orte_iof_base_mode_t mode,
int tag,
int fd)
{
orte_iof_base_endpoint_t* endpoint;
int flags;
int rc;
OPAL_THREAD_LOCK(&orte_iof_base.iof_lock);
#if !defined(__WINDOWS__)
/* If we haven't initialized the event yet, do so now */
if (!sigpipe_event_initialized) {
opal_signal_set(&sigpipe_event, SIGPIPE,
sigpipe_signal_callback, &sigpipe_event);
opal_signal_add(&sigpipe_event, NULL);
sigpipe_event_initialized = true;
}
#endif /* !defined(__WINDOWS__) */
if((endpoint = orte_iof_base_endpoint_lookup(proc,mode,tag)) != NULL) {
OBJ_RETAIN(endpoint);
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
return ORTE_SUCCESS;
}
endpoint = OBJ_NEW(orte_iof_base_endpoint_t);
if(NULL == endpoint) {
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
return ORTE_ERR_OUT_OF_RESOURCE;
}
endpoint->ep_origin = *proc;
endpoint->ep_mode = mode;
endpoint->ep_tag = tag;
endpoint->ep_fd = fd;
/* If it looks like we're on the mpirun side of a standard IO
stream (like we're a SOURCE and tag is STDIN and we're mucking
with fd 0), we don't want to set nonblocking. If we do so, we
set the file descriptor to non-blocking for everyone that has
that file descriptor, which includes everyone else in our shell
pipeline chain. (See
http://lists.freebsd.org/pipermail/freebsd-hackers/2005-January/009742.html).
This causes things like "mpirun -np 1 big_app | cat" to lose
output, because cat's stdout is then ALSO non-blocking and cat
isn't built to deal with that case (same with almost all other
unix text utils).
Otherwise, we're probably on the non-mpirun end of things, and
should be non-blocking.
*/
if ( ! ((ORTE_IOF_SOURCE == mode && ORTE_IOF_STDIN == tag && 0 == fd) ||
(ORTE_IOF_SINK == mode && ORTE_IOF_STDOUT == tag && 1 == fd) ||
(ORTE_IOF_SINK == mode && ORTE_IOF_STDERR == tag && 2 == fd) ||
(ORTE_IOF_SINK == mode && ORTE_IOF_INTERNAL == tag))) {
if((flags = fcntl(fd, F_GETFL, 0)) < 0) {
opal_output_verbose(1, orte_iof_base.iof_output, "[%s:%d]: fcntl(F_GETFL) failed with errno=%d\n",
__FILE__, __LINE__, errno);
} else {
flags |= O_NONBLOCK;
fcntl(fd, F_SETFL, flags);
}
}
/* setup event handler */
switch(mode) {
case ORTE_IOF_SOURCE:
if (tag == ORTE_IOF_STDIN && isatty(endpoint->ep_fd)) {
/* We should avoid trying to read from stdin if we
have a terminal, but are backgrounded. Catch the
signals that are commonly used when we switch
between being backgrounded and not. If the
filedescriptor is not a tty, don't worry about it
and always stay connected. */
#if !defined(__WINDOWS__)
opal_signal_set(&(endpoint->ep_stdin_event),
SIGCONT,
orte_iof_base_endpoint_stdin_cb,
endpoint);
opal_signal_add(&(endpoint->ep_stdin_event), NULL);
#endif /* !defined(__WINDOWS__) */
}
/* always setup the event, but only add it if we should be
reading from stdin right now (per rules above) */
opal_event_set(
&endpoint->ep_event,
endpoint->ep_fd,
OPAL_EV_READ|OPAL_EV_PERSIST,
orte_iof_base_endpoint_read_handler,
endpoint);
if (tag != ORTE_IOF_STDIN ||
orte_iof_base_endpoint_stdin_check(endpoint->ep_fd)) {
rc = opal_event_add(&endpoint->ep_event, 0);
if (ORTE_SUCCESS != rc) return rc;
}
break;
case ORTE_IOF_SINK:
/* Create the event for use later; don't add it now */
opal_event_set(
&endpoint->ep_event,
endpoint->ep_fd,
OPAL_EV_WRITE|OPAL_EV_PERSIST,
orte_iof_base_endpoint_write_handler,
endpoint);
break;
default:
opal_output_verbose(1, orte_iof_base.iof_output, "orte_iof_base_endpoint_create: invalid mode %d\n", mode);
return ORTE_ERR_BAD_PARAM;
}
opal_list_append(&orte_iof_base.iof_endpoints, &endpoint->super);
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
return ORTE_SUCCESS;
}
/*
* Close one or more matching endpoints.
*/
int orte_iof_base_endpoint_delete(
const orte_process_name_t* proc,
orte_ns_cmp_bitmask_t mask,
int tag)
{
opal_list_item_t* item;
OPAL_THREAD_LOCK(&orte_iof_base.iof_lock);
item = opal_list_get_first(&orte_iof_base.iof_endpoints);
while(item != opal_list_get_end(&orte_iof_base.iof_endpoints)) {
opal_list_item_t* next = opal_list_get_next(item);
orte_iof_base_endpoint_t* endpoint = (orte_iof_base_endpoint_t*)item;
if (OPAL_EQUAL == orte_util_compare_name_fields(mask,proc,&endpoint->ep_origin)) {
if (endpoint->ep_tag == tag ||
ORTE_IOF_ANY == endpoint->ep_tag ||
ORTE_IOF_ANY == tag) {
opal_list_remove_item(&orte_iof_base.iof_endpoints,&endpoint->super);
OBJ_RELEASE(endpoint);
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
return ORTE_SUCCESS;
}
}
item = next;
}
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
return ORTE_ERR_NOT_FOUND;
}
/*
* Connection has gone away - cleanup and signal SOH monitor.
*/
void orte_iof_base_endpoint_closed(orte_iof_base_endpoint_t* endpoint)
{
/* For sinks: discard any fragments that were waiting to be
written down the fd (because the process on the other side of
the fd is no longer there -- we're just about to close the
fd). */
opal_output_verbose(1, orte_iof_base.iof_output,
"orte_iof_base_endpoint_closed: mode %s, origin [%s], tag %d",
(ORTE_IOF_SOURCE == endpoint->ep_mode) ? "SOURCE" : "SINK",
ORTE_NAME_PRINT(&endpoint->ep_origin), endpoint->ep_tag);
if (ORTE_IOF_SINK == endpoint->ep_mode) {
while (NULL != opal_list_remove_first(&(endpoint->ep_sink_frags))){
continue;
}
/* Upper layer will take care of signaling any waiting
condition variable -- no need to do it here */
}
/* Special case: if we're a sink and one of the special streams
(stdout, stderr, or internal), don't close anything because we
don't want to *actually* close stdout or stderr just because a
remote process closes theirs (but we do if a remote
source/stdin closes theirs, for example). */
if (ORTE_IOF_SINK == endpoint->ep_mode &&
(ORTE_IOF_STDOUT == endpoint->ep_tag ||
ORTE_IOF_STDERR == endpoint->ep_tag ||
ORTE_IOF_INTERNAL == endpoint->ep_tag)) {
return;
}
/* remove any event handlers */
opal_event_del(&endpoint->ep_event);
/* close associated file descriptor */
close(endpoint->ep_fd);
endpoint->ep_fd = -1;
}
/*
* Lookup endpoint based on destination process name/mask/tag.
*/
orte_iof_base_endpoint_t* orte_iof_base_endpoint_match(
const orte_process_name_t* target_name,
orte_ns_cmp_bitmask_t target_mask,
int target_tag)
{
opal_list_item_t* item;
OPAL_THREAD_LOCK(&orte_iof_base.iof_lock);
for(item = opal_list_get_first(&orte_iof_base.iof_endpoints);
item != opal_list_get_end(&orte_iof_base.iof_endpoints);
item = opal_list_get_next(item)) {
orte_iof_base_endpoint_t* endpoint = (orte_iof_base_endpoint_t*)item;
if(OPAL_EQUAL == orte_util_compare_name_fields(target_mask,target_name,&endpoint->ep_origin)) {
if(endpoint->ep_tag == target_tag ||
endpoint->ep_tag == ORTE_IOF_ANY ||
target_tag == ORTE_IOF_ANY) {
OBJ_RETAIN(endpoint);
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
return endpoint;
}
}
}
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
return NULL;
}
/*
* Forward data out the endpoint as the destination
* is available. Queue incomplete fragments in order
* received and process as the destination becomes available.
*/
int orte_iof_base_endpoint_forward(
orte_iof_base_endpoint_t* endpoint,
const orte_process_name_t* origin,
orte_iof_base_msg_header_t* hdr,
const unsigned char* data)
{
opal_list_item_t* item;
orte_iof_base_frag_t* frag;
size_t len = hdr->msg_len;
int rc = 0;
if(endpoint->ep_mode != ORTE_IOF_SINK) {
return ORTE_ERR_BAD_PARAM;
}
/* allocate and initialize a fragment */
ORTE_IOF_BASE_FRAG_ALLOC(frag, rc);
if(NULL == frag) {
return ORTE_ERR_OUT_OF_RESOURCE;
}
OPAL_THREAD_LOCK(&orte_iof_base.iof_lock);
frag->frag_owner = endpoint;
frag->frag_src = *origin;
frag->frag_hdr.hdr_msg = *hdr;
frag->frag_len = len;
/* call any registered callbacks */
for(item = opal_list_get_first(&endpoint->ep_callbacks);
item != opal_list_get_end(&endpoint->ep_callbacks);
item = opal_list_get_next(item)) {
orte_iof_base_callback_t* cb = (orte_iof_base_callback_t*)item;
cb->cb_func(
&hdr->msg_origin,
hdr->msg_tag,
cb->cb_data,
data,
hdr->msg_len);
}
if(endpoint->ep_fd >= 0) {
/* try to write w/out copying data */
if(opal_list_get_size(&endpoint->ep_sink_frags) == 0) {
if(len == 0) {
/* No ACK required because the frag is of 0 length
(ACKs are based on fragment length; an ACK of 0
bytes would do nothing) */
ORTE_IOF_BASE_FRAG_RETURN(frag);
opal_output_verbose(1, orte_iof_base.iof_output, "iof_base_endpoint: forward: peer closed fd");
orte_iof_base_endpoint_closed(endpoint);
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
return ORTE_SUCCESS;
}
rc = write(endpoint->ep_fd,data,len);
if(rc < 0) {
if (errno != EAGAIN && errno != EINTR) {
opal_output_verbose(1, orte_iof_base.iof_output, "iof_base_endpoint: forward: write error");
orte_iof_base_endpoint_closed(endpoint);
/* Send a ACK-AND-CLOSE back to the service so
that it knows not to wait for any further
ACKs */
orte_iof_base_frag_ack(frag, true);
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
return ORTE_SUCCESS;
}
rc = 0; /* don't affect the remaining length of the data */
}
frag->frag_len -= rc;
}
/* Ensure to handle both cases:
1. When ep_sink_frags was not empty (regardless of frag_len)
2. When ep_sink_frags was empty, but we fell through from above */
if(frag->frag_len > 0 || 0 == len) {
/* handle incomplete write - also queue up 0 byte message
* and recognize this as a request to close the descriptor
* when all pending operations complete
*/
frag->frag_ptr = frag->frag_data;
memcpy(frag->frag_ptr, data+rc, frag->frag_len);
opal_list_append(&endpoint->ep_sink_frags, &frag->super.super);
/* If we're the first frag to be put on the sink_frags
list, then enable the event that will tell us when the
fd becomes writeable */
if(opal_list_get_size(&endpoint->ep_sink_frags) == 1) {
opal_output_verbose(1, orte_iof_base.iof_output, "iof_base_endpoint forwarding frag; re-enabled reading for endpoint");
opal_event_add(&endpoint->ep_event,0);
}
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
} else {
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
/* acknowledge fragment */
orte_iof_base_frag_ack(frag, false);
}
} else {
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
/* acknowledge fragment */
orte_iof_base_frag_ack(frag, false);
}
return ORTE_SUCCESS;
}
/**
* Register a callback
*/
int orte_iof_base_callback_create(
const orte_process_name_t* proc,
int tag,
orte_iof_base_callback_fn_t cbfunc,
void *cbdata)
{
orte_iof_base_callback_t* cb = OBJ_NEW(orte_iof_base_callback_t);
orte_iof_base_endpoint_t* endpoint;
if(NULL == cb)
return ORTE_ERR_OUT_OF_RESOURCE;
OPAL_THREAD_LOCK(&orte_iof_base.iof_lock);
if((endpoint = orte_iof_base_endpoint_lookup(proc,ORTE_IOF_SINK,tag)) == NULL) {
endpoint = OBJ_NEW(orte_iof_base_endpoint_t);
if(NULL == endpoint) {
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
return ORTE_ERR_OUT_OF_RESOURCE;
}
endpoint->ep_origin = *proc;
endpoint->ep_mode = ORTE_IOF_SINK;
endpoint->ep_tag = tag;
endpoint->ep_fd = -1;
opal_list_append(&orte_iof_base.iof_endpoints, &endpoint->super);
} else {
OBJ_RETAIN(endpoint);
}
cb->cb_func = cbfunc;
cb->cb_data = cbdata;
opal_list_append(&endpoint->ep_callbacks, (opal_list_item_t*)cb);
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
return ORTE_SUCCESS;
}
/**
* Remove a callback
*/
int orte_iof_base_callback_delete(
const orte_process_name_t* proc,
int tag)
{
orte_iof_base_endpoint_t* endpoint;
opal_list_item_t* item;
OPAL_THREAD_LOCK(&orte_iof_base.iof_lock);
if(NULL == (endpoint = orte_iof_base_endpoint_lookup(proc,ORTE_IOF_SINK, tag))) {
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
return ORTE_ERR_NOT_FOUND;
}
while(NULL != (item = opal_list_remove_first(&endpoint->ep_callbacks))) {
OBJ_RELEASE(item);
}
OBJ_RELEASE(endpoint);
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
return ORTE_SUCCESS;
}
/**
* Update the acknowledged sequence number. If forwarding had
* previously been disabled as the window closed, and the window
* is now open, re-enable forwarding.
*/
int orte_iof_base_endpoint_ack(
orte_iof_base_endpoint_t* endpoint,
uint32_t seq)
{
bool window_closed, window_open;
OPAL_THREAD_LOCK(&orte_iof_base.iof_lock);
window_closed =
ORTE_IOF_BASE_SEQDIFF(endpoint->ep_seq,endpoint->ep_ack) >= orte_iof_base.iof_window_size;
endpoint->ep_ack = seq;
window_open =
ORTE_IOF_BASE_SEQDIFF(endpoint->ep_seq,endpoint->ep_ack) < orte_iof_base.iof_window_size;
/* someone is waiting on all output to be flushed */
if(orte_iof_base.iof_waiting && endpoint->ep_seq == endpoint->ep_ack) {
opal_condition_signal(&orte_iof_base.iof_condition);
}
/* check to see if we need to reenable forwarding */
if(window_closed && window_open) {
opal_output_verbose(1, orte_iof_base.iof_output, "iof_base_endpoint ack; re-enabled reading for endpoint");
opal_event_add(&endpoint->ep_event, 0);
}
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
return ORTE_SUCCESS;
}
/*
* See description in iof_base_endpoint.h
*/
bool orte_iof_base_endpoint_have_pending_frags(
orte_iof_base_endpoint_t* endpoint)
{
if (ORTE_IOF_SOURCE == endpoint->ep_mode) {
return !opal_list_is_empty(&endpoint->ep_source_frags);
} else {
return !opal_list_is_empty(&endpoint->ep_sink_frags);
}
}
/*
* See description in iof_base_endpoint.h
*/
bool orte_iof_base_endpoint_have_pending_acks(
orte_iof_base_endpoint_t* endpoint)
{
if (ORTE_IOF_SOURCE == endpoint->ep_mode) {
return (endpoint->ep_seq == endpoint->ep_ack);
} else {
return true;
}
}

Просмотреть файл

@ -1,219 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef _IOF_BASE_ENDPOINT_
#define _IOF_BASE_ENDPOINT_
#include "orte_config.h"
#include "opal/class/opal_list.h"
#include "opal/event/event.h"
#include "orte/mca/iof/iof.h"
#include "orte/mca/iof/base/iof_base_header.h"
BEGIN_C_DECLS
/**
* Structure store callbacks
*/
struct orte_iof_base_callback_t {
opal_list_item_t super;
orte_iof_base_callback_fn_t cb_func;
void* cb_data;
};
typedef struct orte_iof_base_callback_t orte_iof_base_callback_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_iof_base_callback_t);
/**
* Structure that represents a published endpoint.
*/
struct orte_iof_base_endpoint_t {
/** Parent */
opal_list_item_t super;
/** ORTE_IOF_SOURCE or ORTE_IOF_SINK */
orte_iof_base_mode_t ep_mode;
/** The origin process for this endpoint. Will either by myself
(i.e., it's an fd that represents a source or a sink in my
process) or another process (i.e., this process is acting as a
proxy for another process and [typically] has a pipe/fd optn
to that process to get their stdin, stdout, or stderr). */
orte_process_name_t ep_origin;
/** Predefined tags: ORTE_IOF_ANY, ORTE_IOF_STDIN, ORTE_IOF_STDOUT,
ORTE_IOF_STDERR */
int ep_tag;
/** File descriptor to read or write from (or -1 if it has been
closed */
int ep_fd;
/** Rollover byte count of what has been forwarded from the fd to
other targets */
uint32_t ep_seq;
/** Minimum byte count of what has been ACK'ed from all the targets
that are listening to this endpoint */
uint32_t ep_ack;
/** Event library event for this file descriptor */
opal_event_t ep_event;
/** Special event library event for the case of stdin */
opal_event_t ep_stdin_event;
/** The list for fragments that are in-flight from a SOURCE
endpoint */
opal_list_t ep_source_frags;
/** The list for fragments that are in-flight from a SINK
endpoint */
opal_list_t ep_sink_frags;
/** List of callbacks for subscriptions */
opal_list_t ep_callbacks;
};
typedef struct orte_iof_base_endpoint_t orte_iof_base_endpoint_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_iof_base_endpoint_t);
/*
* Diff between two sequence numbers allowing for rollover
*/
#define ORTE_IOF_BASE_SEQDIFF(s1,s2) \
((s1 >= s2) ? (s1 - s2) : (s1 + (ULONG_MAX - s2)))
/**
* Create a local endpoint.
*
* @param name Origin process name corresponding to endpoint.
* @param mode Source or sink of data (exclusive).
* @param tag Logical tag for matching.
* @param fd Local file descriptor corresponding to endpoint. If the
* endpoint originates in this process, it'll be an fd in this
* process. If this process is acting as a proxy for another process,
* then the fd will be a pipe to that other process (e.g., the origin
* process' stdin, stdout, or stderr).
*/
ORTE_DECLSPEC int orte_iof_base_endpoint_create(
const orte_process_name_t* name,
orte_iof_base_mode_t mode,
int tag,
int fd);
/**
* Associate a callback on receipt of data.
*
* @param name Process name corresponding to endpoint.
* @param cbfunc Logical tag for matching.
* @aram cbdata Local file descriptor corresponding to endpoint.
*/
ORTE_DECLSPEC int orte_iof_base_callback_create(
const orte_process_name_t *name,
int tag,
orte_iof_base_callback_fn_t cbfunc,
void* cbdata);
ORTE_DECLSPEC int orte_iof_base_callback_delete(
const orte_process_name_t *name,
int tag);
/**
* Delete all local endpoints matching the specified origin / mask /
* tag parameters.
*
* @paran name Origin process name corresponding to one or more endpoint(s).
* @param mask Mask used for name comparisons.
* @param tag Tag for matching endpoints.
*/
ORTE_DECLSPEC int orte_iof_base_endpoint_delete(
const orte_process_name_t* name,
orte_ns_cmp_bitmask_t mask,
int tag);
/**
* Disable forwarding through the specified endpoint.
*/
ORTE_DECLSPEC int orte_iof_base_endpoint_close(
orte_iof_base_endpoint_t* endpoint);
/**
* Attempt to match an endpoint based on the origin process name /
* mask / tag.
*/
ORTE_DECLSPEC orte_iof_base_endpoint_t* orte_iof_base_endpoint_match(
const orte_process_name_t* target_name,
orte_ns_cmp_bitmask_t target_mask,
int target_tag);
/**
* Forward the specified message out the endpoint.
*/
ORTE_DECLSPEC int orte_iof_base_endpoint_forward(
orte_iof_base_endpoint_t* endpoint,
const orte_process_name_t* origin,
orte_iof_base_msg_header_t* hdr,
const unsigned char* data);
/*
* Close the file descriptor associated with an endpoint and perform
* any necessary cleanup.
*/
ORTE_DECLSPEC void orte_iof_base_endpoint_closed(
orte_iof_base_endpoint_t* endpoint);
/**
* Callback when the next set of bytes has been acknowledged.
*/
ORTE_DECLSPEC int orte_iof_base_endpoint_ack(
orte_iof_base_endpoint_t* endpoint,
uint32_t seq);
/**
* Simple check for whether we have any frags "in flight".
*
* Return "true" for SOURCEs if source_frags is not empty, indicating
* that there are frags in-flight via the RML.
*
* Return "true" for SINKs if sink_frags is not empty, indicating that
* there are pending frags for the fd that are either partially
* written or have not yet been written (because writing to the fd
* would have blocked).
*/
bool orte_iof_base_endpoint_have_pending_frags(
orte_iof_base_endpoint_t* endpoint);
/**
* Simple check for whether we have all the ACKs that we expect.
*
* Return "true" for SOURCEs if ep_seq == ep_ack.
*
* Return "true" for SINKs always; SINK endpoints don't receive ACKs.
*/
bool orte_iof_base_endpoint_have_pending_acks(
orte_iof_base_endpoint_t* endpoint);
END_C_DECLS
#endif

Просмотреть файл

@ -1,133 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include <stdlib.h>
#include <string.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <errno.h>
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_SYS_FCNTL_H
#include <sys/fcntl.h>
#endif
#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif
#include "orte/util/show_help.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/iof/base/base.h"
#include "orte/mca/iof/base/iof_base_endpoint.h"
#include "orte/mca/iof/base/iof_base_fragment.h"
/**
* timer callback out of the event loop
*/
static void orte_iof_base_timer_cb(int fd, short flags, void *cbdata)
{
int *flushed = (int*)cbdata;
OPAL_THREAD_LOCK(&orte_iof_base.iof_lock);
*flushed = 1;
opal_condition_signal(&orte_iof_base.iof_condition);
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
}
/*
* flush output streams and block until there is no pending I/O
* on any of the current endpoints.
*/
int orte_iof_base_flush(void)
{
opal_list_item_t* item;
opal_event_t ev;
struct timeval tv = { 0, 0 };
int flushed = 0;
size_t pending;
static int32_t lock = 0;
opal_output_verbose(1, orte_iof_base.iof_output, "CALLING IOF BASE FLUSH!");
if(OPAL_THREAD_ADD32(&lock,1) > 1) {
OPAL_THREAD_ADD32(&lock,-1);
return ORTE_SUCCESS;
}
/* flush any pending output */
fflush(NULL);
/* force all file descriptors to be progressed at least once,
* wait on a timer callback to be called out of the event loop
*/
opal_output_verbose(1, orte_iof_base.iof_output,
"IOF BASE FLUSH: tweaking all endpoints once");
if(opal_event_progress_thread() == false) {
OPAL_THREAD_LOCK(&orte_iof_base.iof_lock);
opal_evtimer_set(&ev, orte_iof_base_timer_cb, &flushed);
opal_event_add(&ev, &tv);
while(0 == flushed) {
opal_condition_wait(&orte_iof_base.iof_condition, &orte_iof_base.iof_lock);
}
} else {
opal_event_loop(OPAL_EVLOOP_NONBLOCK);
OPAL_THREAD_LOCK(&orte_iof_base.iof_lock);
}
opal_output_verbose(1, orte_iof_base.iof_output,
"IOF BASE FLUSH: done tweaking all endpoints once");
orte_iof_base.iof_waiting++;
/* wait for all of the endpoints to reach an idle state */
do {
pending = 0;
/* Count how many endpoints have fragments pending to be
written */
for (item = opal_list_get_first(&orte_iof_base.iof_endpoints);
item != opal_list_get_end(&orte_iof_base.iof_endpoints);
item = opal_list_get_next(item)) {
orte_iof_base_endpoint_t* endpoint =
(orte_iof_base_endpoint_t*)item;
if (orte_iof_base_endpoint_have_pending_frags(endpoint)) {
++pending;
}
}
/* If there were any with pending writes, try to make some
progress */
if (pending > 0) {
if (!opal_event_progress_thread()) {
opal_condition_wait(&orte_iof_base.iof_condition,
&orte_iof_base.iof_lock);
} else {
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
opal_event_loop(OPAL_EVLOOP_ONCE);
OPAL_THREAD_LOCK(&orte_iof_base.iof_lock);
}
}
} while (pending > 0);
opal_output_verbose(1, orte_iof_base.iof_output, "IOF BASE FLUSH: done waiting");
orte_iof_base.iof_waiting--;
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
OPAL_THREAD_ADD32(&lock,-1);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -1,120 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include <stdlib.h>
#include <string.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <errno.h>
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_SYS_FCNTL_H
#include <sys/fcntl.h>
#endif
#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif
#include "orte/util/show_help.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/rml/rml_types.h"
#include "orte/mca/iof/base/base.h"
#include "orte/mca/iof/base/iof_base_endpoint.h"
#include "orte/mca/iof/base/iof_base_fragment.h"
/**
*
*/
static void orte_iof_base_frag_construct(orte_iof_base_frag_t* frag)
{
OMPI_DEBUG_ZERO(*frag);
frag->frag_owner = NULL;
frag->frag_len = 0;
frag->frag_iov[0].iov_base = (IOVBASE_TYPE*)&frag->frag_hdr;
frag->frag_iov[0].iov_len = sizeof(frag->frag_hdr);
frag->frag_iov[1].iov_base = (IOVBASE_TYPE*)frag->frag_data;
frag->frag_iov[1].iov_len = sizeof(frag->frag_data);
}
static void orte_iof_base_frag_destruct(orte_iof_base_frag_t* frag)
{
frag->frag_iov[0].iov_base = (IOVBASE_TYPE*)&frag->frag_hdr;
frag->frag_iov[0].iov_len = sizeof(frag->frag_hdr);
frag->frag_iov[1].iov_base = (IOVBASE_TYPE*)frag->frag_data;
frag->frag_iov[1].iov_len = sizeof(frag->frag_data);
}
OBJ_CLASS_INSTANCE(
orte_iof_base_frag_t,
opal_free_list_item_t,
orte_iof_base_frag_construct,
orte_iof_base_frag_destruct);
/*
*
*/
static void orte_iof_base_frag_send_cb(
int status,
orte_process_name_t* peer,
struct iovec* msg,
int count,
orte_rml_tag_t tag,
void* cbdata)
{
orte_iof_base_frag_t* frag = (orte_iof_base_frag_t*)cbdata;
opal_output_verbose(1, orte_iof_base.iof_output, "iof_base_fragment: ACK send done");
ORTE_IOF_BASE_FRAG_RETURN(frag);
}
/*
*
*/
int _orte_iof_base_frag_ack(orte_iof_base_frag_t* frag, bool do_close,
const char* file, int line)
{
int rc = ORTE_SUCCESS;
if(frag->frag_hdr.hdr_msg.msg_len > 0) {
frag->frag_hdr.hdr_common.hdr_type =
do_close ? ORTE_IOF_BASE_HDR_CLOSE : ORTE_IOF_BASE_HDR_ACK;
ORTE_IOF_BASE_HDR_MSG_HTON(frag->frag_hdr.hdr_msg);
/* start non-blocking OOB call to forward header */
opal_output_verbose(1, orte_iof_base.iof_output, "iof_base_fragment: sending ACK");
rc = orte_rml.send_nb(
&frag->frag_src,
frag->frag_iov,
1,
ORTE_RML_TAG_IOF_SVC,
0,
orte_iof_base_frag_send_cb,
frag);
if(rc != ORTE_SUCCESS) {
opal_output_verbose(1, 0, "orte_iof_base_frag_ack: orte_oob_send failed with status=%d\n", rc);
}
}
return rc;
}

Просмотреть файл

@ -1,85 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef _IOF_BASE_FRAGMENT_
#define _IOF_BASE_FRAGMENT_
#include "orte_config.h"
#include "opal/class/opal_list.h"
#include "opal/class/opal_free_list.h"
#include "opal/event/event.h"
#include "orte/mca/iof/iof.h"
#include "orte/mca/iof/base/base.h"
#include "orte/mca/iof/base/iof_base_header.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/**
* Fragment used to hold message header/data.
*/
struct orte_iof_base_frag_t {
opal_free_list_item_t super;
orte_iof_base_header_t frag_hdr;
orte_process_name_t frag_src;
unsigned char frag_data[ORTE_IOF_BASE_MSG_MAX];
unsigned char* frag_ptr;
size_t frag_len;
struct iovec frag_iov[2];
struct orte_iof_base_endpoint_t* frag_owner;
};
typedef struct orte_iof_base_frag_t orte_iof_base_frag_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_iof_base_frag_t);
/**
* Free-list allocation of fragments.
*/
#define ORTE_IOF_BASE_FRAG_ALLOC(frag,rc) do { \
opal_free_list_item_t* _item; \
OPAL_FREE_LIST_GET(&orte_iof_base.iof_fragments, _item, rc); \
if(NULL == (frag = (orte_iof_base_frag_t*)_item)) { \
opal_output(0, "ORTE_IOF_BASE_FRAG_ALLOC failed with status=%d\n", rc); \
} \
frag->frag_owner = NULL; \
frag->frag_ptr = frag->frag_data; \
frag->frag_len = 0; \
} while(0)
#define ORTE_IOF_BASE_FRAG_RETURN(frag) \
OPAL_FREE_LIST_RETURN(&orte_iof_base.iof_fragments, \
&frag->super);
/**
* Send an acknowledgment to the peer that this fragment has been received.
*/
#define orte_iof_base_frag_ack(frag, do_close) _orte_iof_base_frag_ack((frag), (do_close), __FILE__,__LINE__)
int _orte_iof_base_frag_ack(orte_iof_base_frag_t*, bool do_close, const char*, int);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -1,166 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef _IOF_BASE_HEADER_
#define _IOF_BASE_HEADER_
#include "orte_config.h"
#include "orte/mca/iof/iof.h"
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif
#define ORTE_IOF_BASE_HDR_MSG 0
#define ORTE_IOF_BASE_HDR_ACK 1
#define ORTE_IOF_BASE_HDR_PUB 2
#define ORTE_IOF_BASE_HDR_UNPUB 3
#define ORTE_IOF_BASE_HDR_SUB 4
#define ORTE_IOF_BASE_HDR_UNSUB 5
#define ORTE_IOF_BASE_HDR_CLOSE 6
/*
* Maximum size of msg
*/
#define ORTE_IOF_BASE_MSG_MAX 2048
/**
* Fields common to all headers.
*/
struct orte_iof_base_common_header_t {
uint8_t hdr_type;
uint8_t hdr_reserve;
int16_t hdr_status;
};
typedef struct orte_iof_base_common_header_t orte_iof_base_common_header_t;
#define ORTE_IOF_BASE_HDR_CMN_NTOH(h) \
(h).hdr_status = ntohs((h).hdr_status)
#define ORTE_IOF_BASE_HDR_CMN_HTON(h) \
(h).hdr_status = htons((h).hdr_status)
/**
* Header for data.
*/
struct orte_iof_base_msg_header_t {
orte_iof_base_common_header_t hdr_common;
orte_process_name_t msg_origin;
orte_process_name_t msg_proxy;
int32_t msg_tag;
uint32_t msg_seq;
uint32_t msg_len;
};
typedef struct orte_iof_base_msg_header_t orte_iof_base_msg_header_t;
#define ORTE_IOF_BASE_HDR_MSG_NTOH(h) \
ORTE_IOF_BASE_HDR_CMN_NTOH((h).hdr_common); \
ORTE_PROCESS_NAME_NTOH((h).msg_origin); \
ORTE_PROCESS_NAME_NTOH((h).msg_proxy); \
(h).msg_tag = ntohl((h).msg_tag); \
(h).msg_seq = ntohl((h).msg_seq); \
(h).msg_len = ntohl((h).msg_len);
#define ORTE_IOF_BASE_HDR_MSG_HTON(h) \
ORTE_IOF_BASE_HDR_CMN_HTON((h).hdr_common); \
ORTE_PROCESS_NAME_HTON((h).msg_origin); \
ORTE_PROCESS_NAME_HTON((h).msg_proxy); \
(h).msg_tag = htonl((h).msg_tag); \
(h).msg_seq = htonl((h).msg_seq); \
(h).msg_len = htonl((h).msg_len);
/**
* Publish/Unpublish
*/
struct orte_iof_base_pub_header_t {
orte_iof_base_common_header_t hdr_common;
orte_process_name_t pub_name;
orte_process_name_t pub_proxy;
int32_t pub_mask;
int32_t pub_tag;
};
typedef struct orte_iof_base_pub_header_t orte_iof_base_pub_header_t;
#define ORTE_IOF_BASE_HDR_PUB_NTOH(h) \
ORTE_IOF_BASE_HDR_CMN_NTOH((h).hdr_common); \
ORTE_PROCESS_NAME_NTOH((h).pub_proxy); \
ORTE_PROCESS_NAME_NTOH((h).pub_name); \
(h).pub_mask = ntohl((h).pub_mask); \
(h).pub_tag = ntohl((h).pub_tag);
#define ORTE_IOF_BASE_HDR_PUB_HTON(h) \
ORTE_IOF_BASE_HDR_CMN_HTON((h).hdr_common); \
ORTE_PROCESS_NAME_HTON((h).pub_name); \
ORTE_PROCESS_NAME_HTON((h).pub_proxy); \
(h).pub_mask = htonl((h).pub_mask); \
(h).pub_tag = htonl((h).pub_tag);
/**
* Subscription message.
*/
struct orte_iof_base_sub_header_t {
orte_iof_base_common_header_t hdr_common;
orte_process_name_t origin_name;
orte_ns_cmp_bitmask_t origin_mask;
int32_t origin_tag;
orte_process_name_t target_name;
orte_ns_cmp_bitmask_t target_mask;
int32_t target_tag;
};
typedef struct orte_iof_base_sub_header_t orte_iof_base_sub_header_t;
#define ORTE_IOF_BASE_HDR_SUB_NTOH(h) \
ORTE_IOF_BASE_HDR_CMN_NTOH((h).hdr_common); \
ORTE_PROCESS_NAME_NTOH((h).origin_name); \
(h).origin_tag = ntohl((h).origin_tag); \
ORTE_PROCESS_NAME_NTOH((h).target_name); \
(h).target_tag = ntohl((h).target_tag);
#define ORTE_IOF_BASE_HDR_SUB_HTON(h) \
ORTE_IOF_BASE_HDR_CMN_HTON((h).hdr_common); \
ORTE_PROCESS_NAME_HTON((h).origin_name); \
(h).origin_tag = htonl((h).origin_tag); \
ORTE_PROCESS_NAME_HTON((h).target_name); \
(h).target_tag = htonl((h).target_tag);
/**
* Union of all header types.
*/
union orte_iof_base_header_t {
orte_iof_base_common_header_t hdr_common;
orte_iof_base_msg_header_t hdr_msg;
orte_iof_base_sub_header_t hdr_sub;
orte_iof_base_pub_header_t hdr_pub;
};
typedef union orte_iof_base_header_t orte_iof_base_header_t;
#endif

Просмотреть файл

@ -27,17 +27,11 @@
#include "opal/mca/base/mca_base_param.h"
#include "orte/util/show_help.h"
#include "orte/util/proc_info.h"
#include "orte/mca/iof/iof.h"
#include "orte/mca/iof/base/base.h"
#if !ORTE_DISABLE_FULL_SUPPORT
#include "orte/mca/iof/base/iof_base_header.h"
#include "orte/mca/iof/base/iof_base_fragment.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#endif
/*
* The following file was created by configure. It contains extern
* statements and the definition of an array of pointers to each
@ -46,6 +40,9 @@
#include "orte/mca/iof/base/static-components.h"
orte_iof_base_module_t orte_iof;
#if ORTE_DISABLE_FULL_SUPPORT
/* have to include a bogus function here so that
* the build system sees at least one function
@ -58,6 +55,54 @@ int orte_iof_base_open(void)
#else
/* class instances */
static void orte_iof_base_sink_construct(orte_iof_sink_t* ptr)
{
OBJ_CONSTRUCT(&ptr->wev, orte_iof_write_event_t);
}
static void orte_iof_base_sink_destruct(orte_iof_sink_t* ptr)
{
OBJ_DESTRUCT(&ptr->wev);
}
OBJ_CLASS_INSTANCE(orte_iof_sink_t,
opal_list_item_t,
orte_iof_base_sink_construct,
orte_iof_base_sink_destruct);
static void orte_iof_base_read_event_construct(orte_iof_read_event_t* rev)
{
memset(&rev->ev,0,sizeof(rev->ev));
}
static void orte_iof_base_read_event_destruct(orte_iof_read_event_t* rev)
{
opal_event_del(&rev->ev);
}
OBJ_CLASS_INSTANCE(orte_iof_read_event_t,
opal_list_item_t,
orte_iof_base_read_event_construct,
orte_iof_base_read_event_destruct);
static void orte_iof_base_write_event_construct(orte_iof_write_event_t* wev)
{
wev->pending = false;
wev->fd = -1;
OBJ_CONSTRUCT(&wev->outputs, opal_list_t);
}
static void orte_iof_base_write_event_destruct(orte_iof_write_event_t* wev)
{
opal_event_del(&wev->ev);
OBJ_DESTRUCT(&wev->outputs);
}
OBJ_CLASS_INSTANCE(orte_iof_write_event_t,
opal_list_item_t,
orte_iof_base_write_event_construct,
orte_iof_base_write_event_destruct);
OBJ_CLASS_INSTANCE(orte_iof_write_output_t,
opal_list_item_t,
NULL, NULL);
/*
* Global variables
*/
@ -71,44 +116,35 @@ orte_iof_base_t orte_iof_base;
*/
int orte_iof_base_open(void)
{
int id;
int int_value;
char *str_value;
/* Initialize globals */
OBJ_CONSTRUCT(&orte_iof_base.iof_components_opened, opal_list_t);
OBJ_CONSTRUCT(&orte_iof_base.iof_endpoints, opal_list_t);
OBJ_CONSTRUCT(&orte_iof_base.iof_lock, opal_mutex_t);
OBJ_CONSTRUCT(&orte_iof_base.iof_condition, opal_condition_t);
OBJ_CONSTRUCT(&orte_iof_base.iof_fragments, opal_free_list_t);
orte_iof_base.iof_waiting = 0;
orte_iof_base.iof_flush = false;
/* lookup common parameters */
id = mca_base_param_register_int("iof","base","window_size",NULL,ORTE_IOF_BASE_MSG_MAX << 1);
mca_base_param_lookup_int(id,&int_value);
orte_iof_base.iof_window_size = int_value;
/* someone might pass in an iof_service name, so do a little
* dance to setup the default
*/
orte_util_convert_process_name_to_string(&str_value, ORTE_PROC_MY_HNP);
id = mca_base_param_register_string("iof","base","service",NULL,str_value);
free(str_value);
mca_base_param_lookup_string(id,&str_value);
orte_util_convert_string_to_process_name(&orte_iof_base.iof_service, str_value);
free(str_value);
OBJ_CONSTRUCT(&orte_iof_base.iof_write_output_lock, opal_mutex_t);
/* daemons do not need to do this as they do not write out stdout/err */
if (!orte_process_info.daemon) {
/* setup the stdout event */
OBJ_CONSTRUCT(&orte_iof_base.iof_write_stdout, orte_iof_write_event_t);
orte_iof_base.iof_write_stdout.fd = 1;
/* create the write event, but don't add it until we need it */
opal_event_set(&orte_iof_base.iof_write_stdout.ev,
orte_iof_base.iof_write_stdout.fd,
OPAL_EV_WRITE|OPAL_EV_PERSIST,
orte_iof_base_write_handler,
&orte_iof_base.iof_write_stdout);
/* setup the stderr event */
OBJ_CONSTRUCT(&orte_iof_base.iof_write_stderr, orte_iof_write_event_t);
orte_iof_base.iof_write_stderr.fd = 2;
/* create the write event, but don't add it until we need it */
opal_event_set(&orte_iof_base.iof_write_stderr.ev,
orte_iof_base.iof_write_stderr.fd,
OPAL_EV_WRITE|OPAL_EV_PERSIST,
orte_iof_base_write_handler,
&orte_iof_base.iof_write_stderr);
}
orte_iof_base.iof_output = opal_output_open(NULL);
/* initialize free list */
opal_free_list_init( &orte_iof_base.iof_fragments,
sizeof(orte_iof_base_frag_t),
OBJ_CLASS(orte_iof_base_frag_t),
0, /* number to initially allocate */
-1, /* maximum elements to allocate */
16 ); /* number per allocation */
/* Open up all available components */
if (ORTE_SUCCESS !=
mca_base_components_open("iof", orte_iof_base.iof_output,

168
orte/mca/iof/base/iof_base_output.c Обычный файл
Просмотреть файл

@ -0,0 +1,168 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* These symbols are in a file by themselves to provide nice linker
* semantics. Since linkers generally pull in symbols by object
* files, keeping these symbols as the only symbols in this file
* prevents utility programs such as "ompi_info" from having to import
* entire components just to query their version and parameters.
*/
#include "orte_config.h"
#include "orte/constants.h"
#include <stdlib.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/iof/base/base.h"
int orte_iof_base_write_output(orte_process_name_t *name, orte_iof_tag_t stream,
unsigned char *data, int numbytes,
orte_iof_write_event_t *channel)
{
char tag[ORTE_IOF_BASE_TAG_MAX], *suffix;
orte_iof_write_output_t *output;
int i, j, k, taglen, num_buffered;
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
"%s write:output setting up to write %d bytes to %s of %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
(ORTE_IOF_STDIN & stream) ? "stdin" : ((ORTE_IOF_STDOUT & stream) ? "stdout" : ((ORTE_IOF_STDERR & stream) ? "stderr" : "stddiag")),
ORTE_NAME_PRINT(name)));
/* setup output object */
output = OBJ_NEW(orte_iof_write_output_t);
/* write output data to the corresponding tag */
if (ORTE_IOF_STDIN & stream) {
suffix = NULL;
} else if (ORTE_IOF_STDOUT & stream) {
/* write the bytes to stdout */
suffix = "<stdout>";
} else if (ORTE_IOF_STDERR & stream) {
/* write the bytes to stderr */
suffix = "<stderr>";
} else if (ORTE_IOF_STDDIAG & stream) {
/* write the bytes to stderr */
suffix = "<stddiag>";
} else {
/* error - this should never happen */
ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS);
return ORTE_ERR_VALUE_OUT_OF_BOUNDS;
}
/* see if data is to be tagged */
if (orte_tag_output && NULL != suffix) {
snprintf(tag, ORTE_IOF_BASE_TAG_MAX, "[%s,%s]%s",
ORTE_LOCAL_JOBID_PRINT(name->jobid),
ORTE_VPID_PRINT(name->vpid), suffix);
taglen = strlen(tag);
/* start with the tag */
for (j=0, k=0; j < taglen; j++) {
output->data[k++] = tag[j];
}
/* cycle through the data looking for <cr>
* and replace those with the tag
*/
for (i=0; i < numbytes-1; i++) {
if ('\n' == data[i]) {
/* move the <cr> first */
output->data[k++] = '\n';
for (j=0; j < taglen; j++) {
output->data[k++] = tag[j];
}
} else {
output->data[k++] = data[i];
}
}
output->data[k++] = data[numbytes-1];
output->numbytes = k;
} else {
/* copy over the data to be written */
memcpy(output->data, data, numbytes);
output->numbytes = numbytes;
}
/* lock us up to protect global operations */
OPAL_THREAD_LOCK(&orte_iof_base.iof_write_output_lock);
/* add this data to the write list for this fd */
opal_list_append(&channel->outputs, &output->super);
/* record how big the buffer is */
num_buffered = opal_list_get_size(&channel->outputs);
/* is the write event issued? */
if (!channel->pending) {
/* issue it */
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
"%s write:output adding write event",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
opal_event_add(&channel->ev, 0);
channel->pending = true;
}
/* unlock and go */
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_write_output_lock);
return num_buffered;
}
void orte_iof_base_write_handler(int fd, short event, void *cbdata)
{
orte_iof_write_event_t *wev = (orte_iof_write_event_t*)cbdata;
opal_list_item_t *item;
orte_iof_write_output_t *output;
int num_written;
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
"%s write:handler writing data to %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
wev->fd));
/* lock us up to protect global operations */
OPAL_THREAD_LOCK(&orte_iof_base.iof_write_output_lock);
while (NULL != (item = opal_list_remove_first(&wev->outputs))) {
output = (orte_iof_write_output_t*)item;
num_written = write(wev->fd, output->data, output->numbytes);
if (num_written < output->numbytes) {
/* incomplete write - adjust data to avoid duplicate output */
memmove(output->data, &output->data[num_written], output->numbytes - num_written);
/* push this item back on the front of the list */
opal_list_prepend(&wev->outputs, item);
/* leave the write event running so it will call us again
* when the fd is ready
*/
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_write_output_lock);
return;
}
OBJ_RELEASE(output);
}
opal_event_del(&wev->ev);
wev->pending = false;
/* unlock and go */
OPAL_THREAD_UNLOCK(&orte_iof_base.iof_write_output_lock);
}

Просмотреть файл

@ -19,14 +19,13 @@
#include "orte_config.h"
#include "orte/constants.h"
#include "orte/util/show_help.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "orte/util/show_help.h"
#include "orte/mca/iof/iof.h"
#include "orte/mca/iof/base/base.h"
#include "opal/mca/base/mca_base_component_repository.h"
orte_iof_base_module_t orte_iof;
/**
* Call the init function on all available components to find out if
@ -35,95 +34,32 @@ orte_iof_base_module_t orte_iof;
*/
int orte_iof_base_select(void)
{
opal_list_item_t *item;
mca_base_component_list_item_t *cli;
int selected_priority = -1;
orte_iof_base_component_t *selected_component = NULL;
orte_iof_base_module_t *selected_module = NULL;
bool selected_allow_user;
bool selected_have_hidden;
/* Traverse the list of opened modules; call their init functions. */
for(item = opal_list_get_first(&orte_iof_base.iof_components_opened);
item != opal_list_get_end(&orte_iof_base.iof_components_opened);
item = opal_list_get_next(item)) {
orte_iof_base_component_t* component;
cli = (mca_base_component_list_item_t *) item;
component = (orte_iof_base_component_t *) cli->cli_component;
opal_output_verbose(10, orte_iof_base.iof_output,
"orte_iof_base_select: initializing %s component %s",
component->iof_version.mca_type_name,
component->iof_version.mca_component_name);
if (NULL == component->iof_init) {
opal_output_verbose(10, orte_iof_base.iof_output,
"orte_iof_base_select: no init function; ignoring component");
continue;
} else {
bool allow_user;
bool have_hidden;
int priority;
orte_iof_base_module_t* module = component->iof_init(&priority, &allow_user, &have_hidden);
/* If the component didn't initialize, remove it from the opened
list and remove it from the component repository */
if (NULL == module) {
opal_output_verbose(10, orte_iof_base.iof_output,
"orte_iof_base_select: init returned failure");
continue;
}
/* This iof component architecture is borked. There is an init function without the
* corresponding finalize. The only available finalize (and looking to the SVC it
* seems true) is the one attached to the module. Therefore, in order to allow
* the iof to cleanly deselect we have to call the module finalize function when
* we know that the component will not get selected.
*/
if(priority > selected_priority) {
if( (NULL != selected_module) && (NULL != selected_module->iof_finalize) )
selected_module->iof_finalize();
selected_priority = priority;
selected_component = component;
selected_module = module;
selected_allow_user = allow_user;
selected_have_hidden = have_hidden;
} else {
if( NULL != module->iof_finalize )
module->iof_finalize();
}
}
int exit_status = ORTE_SUCCESS;
orte_iof_base_component_t *best_component = NULL;
orte_iof_base_module_t *best_module = NULL;
/*
* Select the best component
*/
if( OPAL_SUCCESS != mca_base_select("iof", orte_iof_base.iof_output,
&orte_iof_base.iof_components_opened,
(mca_base_module_t **) &best_module,
(mca_base_component_t **) &best_component) ) {
/* This will only happen if no component was selected, which
* is an error.
*
* NOTE: processes do not open/select the IOF - only daemons,
* the HNP, and tools do.
*/
exit_status = ORTE_ERR_NOT_FOUND;
goto cleanup;
}
/* Save the winner */
orte_iof = *best_module;
/* unload all components that were not selected */
item = opal_list_get_first(&orte_iof_base.iof_components_opened);
while(item != opal_list_get_end(&orte_iof_base.iof_components_opened)) {
opal_list_item_t* next = opal_list_get_next(item);
orte_iof_base_component_t* component;
cli = (mca_base_component_list_item_t *) item;
component = (orte_iof_base_component_t *) cli->cli_component;
if(component != selected_component) {
opal_output_verbose(10, orte_iof_base.iof_output,
"orte_iof_base_select: module %s unloaded",
component->iof_version.mca_component_name);
mca_base_component_repository_release((mca_base_component_t *) component);
opal_list_remove_item(&orte_iof_base.iof_components_opened, item);
OBJ_RELEASE(item);
}
item = next;
}
/* setup reference to selected module */
if (NULL != selected_module) {
orte_iof = *selected_module;
orte_iof_base.iof_flush = true;
return ORTE_SUCCESS;
}
/* Oops -- this shouldn't happen */
opal_output(orte_iof_base.iof_output, "iof:select: no components found!");
return ORTE_ERR_OUT_OF_RESOURCE;
cleanup:
return exit_status;
}

Просмотреть файл

@ -213,8 +213,7 @@ orte_iof_base_setup_parent(const orte_process_name_t* name,
/* connect stdin endpoint */
if (opts->connect_stdin) {
/* and connect the pty to stdin */
ret = orte_iof.iof_publish(name, ORTE_IOF_SINK,
ORTE_IOF_STDIN, opts->p_stdin[1]);
ret = orte_iof.pull(name, ORTE_IOF_STDIN, opts->p_stdin[1]);
if(ORTE_SUCCESS != ret) {
ORTE_ERROR_LOG(ret);
return ret;
@ -224,22 +223,19 @@ orte_iof_base_setup_parent(const orte_process_name_t* name,
}
/* connect read ends to IOF */
ret = orte_iof.iof_publish(name, ORTE_IOF_SOURCE,
ORTE_IOF_STDOUT, opts->p_stdout[0]);
ret = orte_iof.push(name, ORTE_IOF_STDOUT, opts->p_stdout[0]);
if(ORTE_SUCCESS != ret) {
ORTE_ERROR_LOG(ret);
return ret;
}
ret = orte_iof.iof_publish(name, ORTE_IOF_SOURCE,
ORTE_IOF_STDERR, opts->p_stderr[0]);
ret = orte_iof.push(name, ORTE_IOF_STDERR, opts->p_stderr[0]);
if(ORTE_SUCCESS != ret) {
ORTE_ERROR_LOG(ret);
return ret;
}
ret = orte_iof.iof_publish(name, ORTE_IOF_SOURCE,
ORTE_IOF_INTERNAL, opts->p_internal[0]);
ret = orte_iof.push(name, ORTE_IOF_STDDIAG, opts->p_internal[0]);
if(ORTE_SUCCESS != ret) {
ORTE_ERROR_LOG(ret);
return ret;

Просмотреть файл

@ -20,27 +20,28 @@
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_iof_proxy_DSO
if OMPI_BUILD_iof_hnp_DSO
component_noinst =
component_install = mca_iof_proxy.la
component_install = mca_iof_hnp.la
else
component_noinst = libmca_iof_proxy.la
component_noinst = libmca_iof_hnp.la
component_install =
endif
proxy_SOURCES = \
iof_proxy.c \
iof_proxy.h \
iof_proxy_component.c \
iof_proxy_svc.c \
iof_proxy_svc.h
hnp_SOURCES = \
iof_hnp.c \
iof_hnp.h \
iof_hnp_component.c \
iof_hnp_read.c \
iof_hnp_send.c \
iof_hnp_receive.c
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_iof_proxy_la_SOURCES = $(proxy_SOURCES)
mca_iof_proxy_la_LDFLAGS = -module -avoid-version
mca_iof_hnp_la_SOURCES = $(hnp_SOURCES)
mca_iof_hnp_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_iof_proxy_la_SOURCES = $(proxy_SOURCES)
libmca_iof_proxy_la_LIBADD =
libmca_iof_proxy_la_LDFLAGS = -module -avoid-version
libmca_iof_hnp_la_SOURCES = $(hnp_SOURCES)
libmca_iof_hnp_la_LIBADD =
libmca_iof_hnp_la_LDFLAGS = -module -avoid-version

Просмотреть файл

298
orte/mca/iof/hnp/iof_hnp.c Обычный файл
Просмотреть файл

@ -0,0 +1,298 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include <errno.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#include "orte/util/show_help.h"
#include "orte/mca/oob/base/base.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/iof/base/base.h"
#include "iof_hnp.h"
/* LOCAL FUNCTIONS */
static void stdin_write_handler(int fd, short event, void *cbdata);
/* API FUNCTIONS */
static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, int fd);
static int hnp_pull(const orte_process_name_t* src_name,
orte_iof_tag_t src_tag,
int fd);
static int hnp_close(const orte_process_name_t* peer,
orte_iof_tag_t source_tag);
static int hnp_ft_event(int state);
/* The API's in this module are solely used to support LOCAL
* procs - i.e., procs that are co-located to the HNP. Remote
* procs interact with the HNP's IOF via the HNP's receive function,
* which operates independently and is in the iof_hnp_receive.c file
*/
orte_iof_base_module_t orte_iof_hnp_module = {
hnp_push,
hnp_pull,
hnp_close,
hnp_ft_event
};
/* Setup to read local data. If the tag is other than STDIN,
* then this is output being pushed from one of my child processes
* and I'll write the data out myself. If the tag is STDIN,
* then I need to setup to read from my stdin, and send anything
* I get to the specified dst_name. The dst_name in this case tells
* us which procs are to get stdin - only two options are supported:
*
* (a) a specific name, usually vpid=0; or
*
* (b) all procs, specified by vpid=ORTE_VPID_WILDCARD
*
* The orte_plm_base_launch_apps function calls iof.push after
* the procs are launched and tells us how to distribute stdin. This
* ensures that the procs are started -before- we begin reading stdin
* and attempting to send it to remote procs
*/
static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, int fd)
{
orte_job_t *jdata;
orte_proc_t **procs;
orte_iof_sink_t *sink;
/* don't do this if the dst vpid is invalid */
if (ORTE_VPID_INVALID == dst_name->vpid) {
return ORTE_SUCCESS;
}
/* only setup to read stdin once */
if ((src_tag & ORTE_IOF_STDIN) && NULL == mca_iof_hnp_component.stdinev) {
/* Since we are the HNP, we don't want to set nonblocking on our
* file descriptors. If we do so, we set the file descriptor to
* non-blocking for everyone that has that file descriptor, which
* includes everyone else in our shell pipeline chain. (See
* http://lists.freebsd.org/pipermail/freebsd-hackers/2005-January/009742.html).
* This causes things like "mpirun -np 1 big_app | cat" to lose
* output, because cat's stdout is then ALSO non-blocking and cat
* isn't built to deal with that case (same with almost all other
* unix text utils).
*/
if (isatty(fd)) {
/* We should avoid trying to read from stdin if we
* have a terminal, but are backgrounded. Catch the
* signals that are commonly used when we switch
* between being backgrounded and not. If the
* filedescriptor is not a tty, don't worry about it
* and always stay connected.
*/
opal_signal_set(&mca_iof_hnp_component.stdinsig,
SIGCONT, orte_iof_hnp_stdin_cb,
NULL);
/* setup a read event to read stdin, but don't activate it yet. The
* dst_name indicates who should receive the stdin. If that recipient
* doesn't do a corresponding pull, however, then the stdin will
* be dropped upon receipt at the local daemon
*/
ORTE_IOF_READ_EVENT(dst_name, fd, src_tag,
orte_iof_hnp_read_local_handler,
&mca_iof_hnp_component.read_events, false);
/* save it somewhere convenient */
mca_iof_hnp_component.stdinev =
(orte_iof_read_event_t*)opal_list_get_first(&mca_iof_hnp_component.read_events);
/* check to see if we want the stdin read event to be
* active - we will always at least define the event,
* but may delay its activation
*/
if (!(src_tag & ORTE_IOF_STDIN) || orte_iof_hnp_stdin_check(fd)) {
mca_iof_hnp_component.stdinev->active = true;
opal_event_add(&(mca_iof_hnp_component.stdinev->ev), 0);
}
} else{
/* if we are not looking at a tty, just setup a read event
* and activate it
*/
ORTE_IOF_READ_EVENT(dst_name, fd, src_tag,
orte_iof_hnp_read_local_handler,
&mca_iof_hnp_component.read_events, true);
/* save it somewhere convenient */
mca_iof_hnp_component.stdinev =
(orte_iof_read_event_t*)opal_list_get_first(&mca_iof_hnp_component.read_events);
}
}
/* if we are pushing stdin, this is happening only during launch - setup
* a target for this destination if it is going somewhere other than me
*/
if (src_tag & ORTE_IOF_STDIN) {
/* is this wildcard? */
if (ORTE_VPID_WILDCARD == dst_name->vpid) {
/* define a sink with that info so it gets sent out */
ORTE_IOF_SINK_DEFINE(&sink, dst_name, -1, src_tag,
stdin_write_handler,
&mca_iof_hnp_component.sinks);
} else {
/* no - lookup the proc's daemon and set that into sink */
if (NULL == (jdata = orte_get_job_data_object(dst_name->jobid))) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
procs = (orte_proc_t**)jdata->procs->addr;
/* if it is me, then don't set this up - we'll get it on the pull */
if (ORTE_PROC_MY_NAME->vpid != procs[dst_name->vpid]->node->daemon->name.vpid) {
ORTE_IOF_SINK_DEFINE(&sink, dst_name, -1, src_tag,
stdin_write_handler,
&mca_iof_hnp_component.sinks);
sink->daemon.jobid = ORTE_PROC_MY_NAME->jobid;
sink->daemon.vpid = procs[dst_name->vpid]->node->daemon->name.vpid;
}
}
} else {
/* if we are not after stdin. then define a read event and activate it */
ORTE_IOF_READ_EVENT(dst_name, fd, src_tag,
orte_iof_hnp_read_local_handler,
&mca_iof_hnp_component.read_events, true);
}
return ORTE_SUCCESS;
}
/*
* Since we are the HNP, the only "pull" call comes from a local
* process so we can record the file descriptor for its stdin.
*/
static int hnp_pull(const orte_process_name_t* dst_name,
orte_iof_tag_t src_tag,
int fd)
{
orte_iof_sink_t *sink;
/* this is a local call - only stdin is supported */
if (ORTE_IOF_STDIN != src_tag) {
return ORTE_ERR_NOT_SUPPORTED;
}
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
"%s hnp:pull setting up %s to pass stdin",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(dst_name)));
ORTE_IOF_SINK_DEFINE(&sink, dst_name, fd, src_tag,
stdin_write_handler,
&mca_iof_hnp_component.sinks);
sink->daemon.jobid = ORTE_PROC_MY_NAME->jobid;
sink->daemon.vpid = ORTE_PROC_MY_NAME->vpid;
return ORTE_SUCCESS;
}
/*
* One of our local procs wants us to close the specifed
* stream(s), thus terminating any potential io to/from it.
*/
static int hnp_close(const orte_process_name_t* peer,
orte_iof_tag_t source_tag)
{
return ORTE_SUCCESS;
}
int hnp_ft_event(int state) {
/*
* Replica doesn't need to do anything for a checkpoint
*/
return ORTE_SUCCESS;
}
static void stdin_write_handler(int fd, short event, void *cbdata)
{
orte_iof_write_event_t *wev = (orte_iof_write_event_t*)cbdata;
opal_list_item_t *item;
orte_iof_write_output_t *output;
int num_written;
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
"%s hnp:stdin:write:handler writing data to %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
wev->fd));
/* lock us up to protect global operations */
OPAL_THREAD_LOCK(&mca_iof_hnp_component.lock);
while (NULL != (item = opal_list_remove_first(&wev->outputs))) {
output = (orte_iof_write_output_t*)item;
num_written = write(wev->fd, output->data, output->numbytes);
if (num_written < output->numbytes) {
/* incomplete write - adjust data to avoid duplicate output */
memmove(output->data, &output->data[num_written], output->numbytes - num_written);
/* push this item back on the front of the list */
opal_list_prepend(&wev->outputs, item);
/* leave the write event running so it will call us again
* when the fd is ready.
*/
goto CHECK;
}
OBJ_RELEASE(output);
}
opal_event_del(&wev->ev);
wev->pending = false;
CHECK:
if (!mca_iof_hnp_component.stdinev->active) {
/* if we have turned off the read event, check to
* see if the output list has shrunk enough to
* turn it back on
*
* RHC: Note that when multiple procs want stdin, we
* can get into a fight between a proc turnin stdin
* back "on" and other procs turning it "off". There
* is no clear way to resolve this as different procs
* may take input at different rates.
*/
if (opal_list_get_size(&wev->outputs) < ORTE_IOF_MAX_INPUT_BUFFERS) {
/* restart the read */
mca_iof_hnp_component.stdinev->active = true;
opal_event_add(&(mca_iof_hnp_component.stdinev->ev), 0);
}
}
/* unlock and go */
OPAL_THREAD_UNLOCK(&mca_iof_hnp_component.lock);
}

87
orte/mca/iof/hnp/iof_hnp.h Обычный файл
Просмотреть файл

@ -0,0 +1,87 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* The hnp IOF component is used in HNP processes only. It is the
* "hub" for all IOF activity, meaning that *all* IOF traffic is
* routed to the hnp component, and this component figures out where
* it is supposed to go from there. Specifically: there is *no*
* direct proxy-to-proxy IOF communication. If a proxy/orted wants to
* get a stream from another proxy/orted, the stream will go
* proxy/orted -> HNP -> proxy/orted.
*
* The hnp IOF component does two things: 1. forward fragments between
* file descriptors and streams, and 2. maintain forwarding tables to
* "route" incoming fragments to outgoing destinations (both file
* descriptors and other published streams).
*
*/
#ifndef ORTE_IOF_HNP_H
#define ORTE_IOF_HNP_H
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif /* HAVE_SYS_TYPES_H */
#ifdef HAVE_SYS_UIO_H
#include <sys/uio.h>
#endif /* HAVE_SYS_UIO_H */
#ifdef HAVE_NET_UIO_H
#include <net/uio.h>
#endif /* HAVE_NET_UIO_H */
#include "orte/mca/iof/iof.h"
#include "orte/mca/iof/base/base.h"
BEGIN_C_DECLS
/**
* IOF HNP Component
*/
struct orte_iof_hnp_component_t {
orte_iof_base_component_t super;
opal_list_t sinks;
opal_list_t read_events;
orte_iof_read_event_t *stdinev;
opal_event_t stdinsig;
opal_mutex_t lock;
};
typedef struct orte_iof_hnp_component_t orte_iof_hnp_component_t;
ORTE_MODULE_DECLSPEC extern orte_iof_hnp_component_t mca_iof_hnp_component;
extern orte_iof_base_module_t orte_iof_hnp_module;
void orte_iof_hnp_recv(int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata);
void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata);
void orte_iof_hnp_stdin_cb(int fd, short event, void *cbdata);
bool orte_iof_hnp_stdin_check(int fd);
void orte_iof_hnp_send_data_to_endpoint(orte_process_name_t *host,
orte_process_name_t *target,
orte_iof_tag_t tag,
unsigned char *data, int numbytes);
END_C_DECLS
#endif

202
orte/mca/iof/hnp/iof_hnp_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,202 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/event/event.h"
#include "orte/util/show_help.h"
#include "orte/util/proc_info.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/iof/base/base.h"
#include "iof_hnp.h"
/*
* Local functions
*/
static int orte_iof_hnp_open(void);
static int orte_iof_hnp_close(void);
static int orte_iof_hnp_query(mca_base_module_t **module, int *priority);
static void
orte_iof_hnp_exception_handler(const orte_process_name_t* peer, orte_rml_exception_t reason);
/*
* Local variables
*/
static bool initialized = false;
/*
* Public string showing the iof hnp component version number
*/
const char *mca_iof_hnp_component_version_string =
"Open MPI hnp iof MCA component version " ORTE_VERSION;
orte_iof_hnp_component_t mca_iof_hnp_component = {
{
/* First, the mca_base_component_t struct containing meta
information about the component itself */
{
ORTE_IOF_BASE_VERSION_2_0_0,
"hnp", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
/* Component open, close, and query functions */
orte_iof_hnp_open,
orte_iof_hnp_close,
orte_iof_hnp_query
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
}
};
/**
* component open/close/init function
*/
static int orte_iof_hnp_open(void)
{
/* Nothing to do */
return ORTE_SUCCESS;
}
static int orte_iof_hnp_close(void)
{
opal_list_item_t* item;
if (initialized) {
OPAL_THREAD_LOCK(&mca_iof_hnp_component.lock);
/* if the stdin event is active, delete it */
if (NULL != mca_iof_hnp_component.stdinev && mca_iof_hnp_component.stdinev->active) {
opal_event_del(&(mca_iof_hnp_component.stdinev->ev));
}
/* cleanout all registered sinks */
while ((item = opal_list_remove_first(&mca_iof_hnp_component.sinks)) != NULL) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&mca_iof_hnp_component.sinks);
/* cleanout all pending receive events */
while ((item = opal_list_remove_first(&mca_iof_hnp_component.read_events)) != NULL) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&mca_iof_hnp_component.read_events);
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_IOF_HNP);
OPAL_THREAD_UNLOCK(&mca_iof_hnp_component.lock);
OBJ_DESTRUCT(&mca_iof_hnp_component.lock);
}
return ORTE_SUCCESS;
}
/**
* Module query
*/
static int orte_iof_hnp_query(mca_base_module_t **module, int *priority)
{
int rc;
/* set default */
*module = NULL;
*priority = -1;
/* if we are not the HNP, then don't use this module */
if (!orte_process_info.hnp) {
return ORTE_ERROR;
}
/* post non-blocking recv to catch forwarded IO from
* the orteds
*/
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_IOF_HNP,
ORTE_RML_NON_PERSISTENT,
orte_iof_hnp_recv,
NULL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_rml.add_exception_handler(orte_iof_hnp_exception_handler))) {
ORTE_ERROR_LOG(rc);
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_IOF_HNP);
return rc;
}
OBJ_CONSTRUCT(&mca_iof_hnp_component.lock, opal_mutex_t);
OBJ_CONSTRUCT(&mca_iof_hnp_component.sinks, opal_list_t);
OBJ_CONSTRUCT(&mca_iof_hnp_component.read_events, opal_list_t);
mca_iof_hnp_component.stdinev = NULL;
/* we must be selected */
*priority = 100;
*module = (mca_base_module_t *) &orte_iof_hnp_module;
initialized = true;
return ORTE_SUCCESS;
}
/**
* Callback when peer is disconnected
*/
static void
orte_iof_hnp_exception_handler(const orte_process_name_t* peer, orte_rml_exception_t reason)
{
#if 0
orte_iof_base_endpoint_t *endpoint;
opal_output_verbose(1, orte_iof_base.iof_output,
"iof svc exception handler! %s\n",
ORTE_NAME_PRINT((orte_process_name_t*)peer));
/* If we detect an exception on the RML connection to a peer,
delete all of its subscriptions and publications. Note that
exceptions can be detected during a normal RML shutdown; they
are recoverable events (no need to abort). */
orte_iof_hnp_sub_delete_all(peer);
orte_iof_hnp_pub_delete_all(peer);
opal_output_verbose(1, orte_iof_base.iof_output, "deleted all pubs and subs\n");
/* Find any streams on any endpoints for this peer and close them */
while (NULL !=
(endpoint = orte_iof_base_endpoint_match(peer, ORTE_NS_CMP_ALL,
ORTE_IOF_ANY))) {
orte_iof_base_endpoint_closed(endpoint);
/* Delete the endpoint that we just matched */
orte_iof_base_endpoint_delete(peer, ORTE_NS_CMP_ALL, ORTE_IOF_ANY);
}
#endif
opal_output_verbose(1, orte_iof_base.iof_output, "done with exception handler\n");
}

235
orte/mca/iof/hnp/iof_hnp_read.c Обычный файл
Просмотреть файл

@ -0,0 +1,235 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include <errno.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#include "orte/util/show_help.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/ess/ess.h"
#include "orte/mca/iof/iof.h"
#include "orte/mca/iof/base/base.h"
#include "iof_hnp.h"
/* return true if we should read stdin from fd, false otherwise */
bool orte_iof_hnp_stdin_check(int fd)
{
#if !defined(__WINDOWS__) && defined(HAVE_TCGETPGRP)
if( isatty(fd) && (getpgrp() != tcgetpgrp(fd)) ) {
return false;
}
#endif /* !defined(__WINDOWS__) */
return true;
}
void orte_iof_hnp_stdin_cb(int fd, short event, void *cbdata)
{
bool should_process = orte_iof_hnp_stdin_check(0);
if (should_process) {
mca_iof_hnp_component.stdinev->active = true;
opal_event_add(&(mca_iof_hnp_component.stdinev->ev), 0);
} else {
opal_event_del(&(mca_iof_hnp_component.stdinev->ev));
mca_iof_hnp_component.stdinev->active = false;
}
}
/* this is the read handler for my own child procs. In this case,
* the data is going nowhere - I just output it myself
*/
void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata)
{
orte_iof_read_event_t *rev = (orte_iof_read_event_t*)cbdata;
unsigned char data[ORTE_IOF_BASE_MSG_MAX];
int32_t numbytes;
opal_list_item_t *item;
OPAL_THREAD_LOCK(&mca_iof_hnp_component.lock);
/* read up to the fragment size */
#if !defined(__WINDOWS__)
numbytes = read(fd, data, sizeof(data));
#else
{
DWORD readed;
HANDLE handle = (HANDLE)_get_osfhandle(fd);
ReadFile(handle, data, sizeof(data), &readed, NULL);
numbytes = (int)readed;
}
#endif /* !defined(__WINDOWS__) */
if (numbytes < 0) {
/* either we have a connection error or it was a non-blocking read */
/* non-blocking, retry */
if (EAGAIN == errno || EINTR == errno) {
OPAL_THREAD_UNLOCK(&mca_iof_hnp_component.lock);
return;
}
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
"%s iof:hnp:read handler %s Error on connection:%d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&rev->name), fd));
opal_event_del(&rev->ev);
goto CLEAN_RETURN;
}
/* is this read from our stdin? */
if (ORTE_IOF_STDIN & rev->tag) {
/* cycle through our list of sinks */
for (item = opal_list_get_first(&mca_iof_hnp_component.sinks);
item != opal_list_get_end(&mca_iof_hnp_component.sinks);
item = opal_list_get_next(item)) {
orte_iof_sink_t* sink = (orte_iof_sink_t*)item;
/* only look at stdin sinks */
if (!(ORTE_IOF_STDIN & sink->tag)) {
continue;
}
/* if the daemon is me, then this is a local sink */
if (ORTE_PROC_MY_NAME->jobid == sink->daemon.jobid &&
ORTE_PROC_MY_NAME->vpid == sink->daemon.vpid) {
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
"%s read %d bytes from stdin - writing to %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
ORTE_NAME_PRINT(&rev->name)));
/* if stdin was closed, we need to close it too so the proc
* knows it is done
*/
if (0 == numbytes) {
/* make sure the write event is off */
if (!sink->wev.pending) {
opal_event_del(&(sink->wev.ev));
}
close(sink->wev.fd);
sink->wev.fd =-1;
} else if (sink->wev.fd < 0) {
/* the fd has already been closed or this doesn't refer to a local
* sink - skip this entry */
continue;
} else {
/* send the bytes down the pipe */
if (ORTE_IOF_MAX_INPUT_BUFFERS < orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, &sink->wev)) {
/* getting too backed up - stop the read event for now if it is still active */
if (!mca_iof_hnp_component.stdinev->active) {
opal_event_del(&(mca_iof_hnp_component.stdinev->ev));
mca_iof_hnp_component.stdinev->active = false;
}
}
}
} else {
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
"%s sending data to daemon %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&sink->daemon)));
/* send the data to the daemon so it can
* write it to the proc's fd - in this case,
* we pass sink->name to indicate who is to
* receive the data. If the connection closed,
* numbytes will be zero so zero bytes will be
* sent - this will tell the daemon to close
* the fd for stdin to that proc
*/
orte_iof_hnp_send_data_to_endpoint(&sink->daemon, &sink->name, ORTE_IOF_STDIN, data, numbytes);
}
}
/* check if stdin was closed */
if (0 == numbytes) {
opal_event_del(&rev->ev);
}
/* nothing more to do */
goto CLEAN_RETURN;
}
/* this must be output from one of my local procs - see
* if anyone else has requested a copy of this info
*/
for (item = opal_list_get_first(&mca_iof_hnp_component.sinks);
item != opal_list_get_end(&mca_iof_hnp_component.sinks);
item = opal_list_get_next(item)) {
orte_iof_sink_t *sink = (orte_iof_sink_t*)item;
if (sink->tag & rev->tag &&
sink->name.jobid == rev->name.jobid &&
(ORTE_VPID_WILDCARD == sink->name.vpid || sink->name.vpid == rev->name.vpid)) {
/* need to send the data to the remote endpoint - if
* the connection closed, numbytes will be zero, so
* the remote endpoint will know to close its local fd.
* In this case, we pass rev->name to indicate who the
* data came from.
*/
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
"%s sending data to tool %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&sink->daemon)));
orte_iof_hnp_send_data_to_endpoint(&sink->daemon, &rev->name, rev->tag, data, numbytes);
if (0 == numbytes) {
opal_event_del(&rev->ev);
}
}
}
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
"%s read %d bytes from %s of %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
(ORTE_IOF_STDOUT & rev->tag) ? "stdout" : ((ORTE_IOF_STDERR & rev->tag) ? "stderr" : "stddiag"),
ORTE_NAME_PRINT(&rev->name)));
/* if we read 0 bytes from the stdout/err/diag, there is
* nothing to output - we do not close these file descriptors,
* but we do terminate the event
*/
if (0 == numbytes) {
opal_event_del(&rev->ev);
goto CLEAN_RETURN;
}
data[numbytes] = '\0';
if (ORTE_IOF_STDOUT & rev->tag) {
orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, &orte_iof_base.iof_write_stdout);
} else {
orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, &orte_iof_base.iof_write_stderr);
}
CLEAN_RETURN:
OPAL_THREAD_UNLOCK(&mca_iof_hnp_component.lock);
/* since the event is persistent, we do not need to re-add it */
return;
}

208
orte/mca/iof/hnp/iof_hnp_receive.c Обычный файл
Просмотреть файл

@ -0,0 +1,208 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include <errno.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#include "orte/util/show_help.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/iof/iof.h"
#include "orte/mca/iof/base/base.h"
#include "iof_hnp.h"
static void process_msg(int fd, short event, void *cbdata)
{
orte_message_event_t *mev = (orte_message_event_t*)cbdata;
orte_process_name_t origin;
unsigned char data[ORTE_IOF_BASE_MSG_MAX];
orte_iof_tag_t stream;
int32_t count, numbytes;
orte_iof_sink_t *sink;
opal_list_item_t *item, *next;
int rc;
/* unpack the stream first as this may be flow control info */
count = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(mev->buffer, &stream, &count, ORTE_IOF_TAG))) {
ORTE_ERROR_LOG(rc);
goto CLEAN_RETURN;
}
if (ORTE_IOF_XON & stream) {
/* re-start the stdin read event */
if (!mca_iof_hnp_component.stdinev->active) {
mca_iof_hnp_component.stdinev->active = true;
opal_event_add(&(mca_iof_hnp_component.stdinev->ev), 0);
}
goto CLEAN_RETURN;
} else if (ORTE_IOF_XOFF & stream) {
/* stop the stdin read event */
if (!mca_iof_hnp_component.stdinev->active) {
opal_event_del(&(mca_iof_hnp_component.stdinev->ev));
mca_iof_hnp_component.stdinev->active = false;
}
goto CLEAN_RETURN;
}
/* get name of the process whose io we are discussing */
count = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(mev->buffer, &origin, &count, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto CLEAN_RETURN;
}
/* check to see if a tool has requested something */
if (ORTE_IOF_PULL & stream) {
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
"%s received pull cmd from remote tool %s for proc %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&mev->sender),
ORTE_NAME_PRINT(&origin)));
/* a tool is requesting that we send it a copy of the specified stream(s)
* from the specified process(es), so create a sink for it
*/
ORTE_IOF_SINK_DEFINE(&sink, &origin, -1, stream,
NULL, &mca_iof_hnp_component.sinks);
/* specify the name of the tool that wants this data */
sink->daemon.jobid = mev->sender.jobid;
sink->daemon.vpid = mev->sender.vpid;
goto CLEAN_RETURN;
}
if (ORTE_IOF_CLOSE & stream) {
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
"%s received close cmd from remote tool %s for proc %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&mev->sender),
ORTE_NAME_PRINT(&origin)));
/* a tool is requesting that we no longer forward a copy of the
* specified stream(s) from the specified process(es) - remove the sink
*/
item = opal_list_get_first(&mca_iof_hnp_component.sinks);
while (item != opal_list_get_end(&mca_iof_hnp_component.sinks)) {
next = opal_list_get_next(item);
sink = (orte_iof_sink_t*)item;
/* if this sink is the designated one, then remove it from list */
if (stream & sink->tag &&
sink->name.jobid == origin.jobid &&
(ORTE_VPID_WILDCARD == sink->name.vpid ||
ORTE_VPID_WILDCARD == origin.vpid ||
sink->name.vpid == origin.vpid)) {
/* send an ack message to the requestor - this ensures that the RML has
* completed sending anything to that requestor before it exits
*/
orte_iof_hnp_send_data_to_endpoint(&sink->daemon, &origin, ORTE_IOF_CLOSE, NULL, 0);
opal_list_remove_item(&mca_iof_hnp_component.sinks, item);
OBJ_RELEASE(item);
}
item = next;
}
goto CLEAN_RETURN;
}
/* this must have come from a daemon forwarding output - unpack the data */
numbytes=ORTE_IOF_BASE_MSG_MAX;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(mev->buffer, data, &numbytes, OPAL_BYTE))) {
ORTE_ERROR_LOG(rc);
goto CLEAN_RETURN;
}
/* numbytes will contain the actual #bytes that were sent */
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
"%s unpacked %d bytes from remote proc %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
ORTE_NAME_PRINT(&origin)));
/* write the output locally */
if (ORTE_IOF_STDOUT & stream) {
orte_iof_base_write_output(&origin, stream, data, numbytes, &orte_iof_base.iof_write_stdout);
} else {
orte_iof_base_write_output(&origin, stream, data, numbytes, &orte_iof_base.iof_write_stderr);
}
/* cycle through the endpoints to see if someone else wants a copy */
for (item = opal_list_get_first(&mca_iof_hnp_component.sinks);
item != opal_list_get_end(&mca_iof_hnp_component.sinks);
item = opal_list_get_next(item)) {
orte_iof_sink_t* sink = (orte_iof_sink_t*)item;
if (stream & sink->tag &&
sink->name.jobid == origin.jobid &&
(ORTE_VPID_WILDCARD == sink->name.vpid ||
ORTE_VPID_WILDCARD == origin.vpid ||
sink->name.vpid == origin.vpid)) {
/* send the data to the tool */
orte_iof_hnp_send_data_to_endpoint(&sink->daemon, &origin, stream, data, numbytes);
}
}
CLEAN_RETURN:
/* release the message event */
OBJ_RELEASE(mev);
return;
}
void orte_iof_hnp_recv(int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata)
{
int rc;
OPAL_OUTPUT_VERBOSE((5, orte_iof_base.iof_output,
"%s iof:hnp:receive got message from %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(sender)));
/* don't process this right away - we need to get out of the recv before
* we process the message to avoid performing the rest of the job while
* inside this receive! Instead, setup an event so that the message gets processed
* as soon as we leave the recv.
*
* The macro makes a copy of the buffer, which we release above - the incoming
* buffer, however, is NOT released here, although its payload IS transferred
* to the message buffer for later processing
*/
ORTE_MESSAGE_EVENT(sender, buffer, tag, process_msg);
/* reissue the recv */
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_IOF_HNP,
ORTE_RML_NON_PERSISTENT,
orte_iof_hnp_recv,
NULL))) {
ORTE_ERROR_LOG(rc);
}
return;
}

108
orte/mca/iof/hnp/iof_hnp_send.c Обычный файл
Просмотреть файл

@ -0,0 +1,108 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include <errno.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#include "orte/util/show_help.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/ess/ess.h"
#include "orte/mca/grpcomm/grpcomm.h"
#include "orte/mca/iof/iof.h"
#include "orte/mca/iof/base/base.h"
#include "iof_hnp.h"
/*
* Callback when non-blocking RML send completes.
*/
static void send_cb(int status, orte_process_name_t *peer,
opal_buffer_t *buf, orte_rml_tag_t tag,
void *cbdata)
{
/* nothing to do here - just release buffer and return */
OBJ_RELEASE(buf);
}
void orte_iof_hnp_send_data_to_endpoint(orte_process_name_t *host,
orte_process_name_t *target,
orte_iof_tag_t tag,
unsigned char *data, int numbytes)
{
opal_buffer_t *buf;
int rc;
buf = OBJ_NEW(opal_buffer_t);
/* pack the tag - we do this first so that flow control messages can
* consist solely of the tag
*/
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &tag, 1, ORTE_IOF_TAG))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buf);
return;
}
/* pack the name of the target - this is either the intended
* recipient (if the tag is stdin and we are sending to a daemon),
* or the source (if we are sending to anyone else)
*/
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, target, 1, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buf);
return;
}
/* if data is NULL, then we are done */
if (NULL != data) {
/* pack the data - if numbytes is zero, we will pack zero bytes */
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, data, numbytes, OPAL_BYTE))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buf);
return;
}
}
/* if the target is wildcard, then this needs to go to everyone - xcast it */
if (ORTE_PROC_MY_NAME->jobid == host->jobid &&
ORTE_VPID_WILDCARD == host->vpid) {
/* xcast this to everyone - the local daemons will know how to handle it */
orte_grpcomm.xcast(ORTE_PROC_MY_NAME->jobid, buf, ORTE_RML_TAG_IOF_PROXY);
OBJ_RELEASE(buf);
return;
}
/* send the buffer to the host - this is either a daemon or
* a tool that requested IOF
*/
orte_rml.send_buffer_nb(host, buf, ORTE_RML_TAG_IOF_PROXY,
0, send_cb, NULL);
}

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

Просмотреть файл

@ -20,37 +20,59 @@
* @file
*
* I/O Forwarding Service
* The I/O forwarding service (IOF) is used to push file descriptor
* streams between ORTE processes. It is currently primarily used to
* push stdin, stdout, and stderr between ORTE processes, but can be
* used with any file descriptor stream.
* The I/O forwarding service (IOF) is used to connect stdin, stdout, and
* stderr file descriptor streams from MPI processes to the user
*
* In practice, the IOF acts as a multiplexor between local file
* descriptors and the RML; the RML relays information from local file
* descriptors to remote file descriptors. Note that the IOF allows
* many-to-one mappings; SOURCE streams can be directed to multiple
* destinations and SINK streams can receive input from multiple
* sources.
* The design is fairly simple: when a proc is spawned, the IOF establishes
* connections between its stdin, stdout, and stderr to a
* corresponding IOF stream. In addition, the IOF designates a separate
* stream for passing OMPI/ORTE internal diagnostic/help output to mpirun.
* This is done specifically to separate such output from the user's
* stdout/err - basically, it allows us to present it to the user in
* a separate format for easier recognition. Data read from a source
* on any stream (e.g., printed to stdout by the proc) is relayed
* by the local daemon to the other end of the stream - i.e., stdin
* is relayed to the local proc, while stdout/err is relayed to mpirun.
* Thus, the eventual result is to connect ALL streams to/from
* the application process and mpirun.
*
* The design is fairly simple: streams are designated as either
* ORTE_IOF_SOURCEs or ORTE_IOF_SINKs. SOURCE streams provide content
* that is pushed elsewhere. SINK streams accept content that
* originated from elsewhere. In short, we read from SOURCEs and we
* write to SINKs.
* Note: By default, data read from stdin is forwarded -only- to rank=0.
* Stdin for all other procs is tied to "/dev/null".
*
* Streams are identified by ORTE process name (to include wildecards,
* External tools can "pull" copies of stdout/err and
* the diagnostic stream from mpirun for any process. In this case,
* mpirun will send a copy of the output to the "pulling" process. Note that external tools
* cannot "push" something into stdin unless the user specifically directed
* that stdin remain open, nor under any conditions "pull" a copy of the
* stdin being sent to rank=0.
*
* Tools can exploit either of two mechanisms for this purpose:
*
* (a) call orte_init themselves and utilize the ORTE tool comm
* library to access the IOF. This also provides access to
* other tool library functions - e.g., to order that a job
* be spawned; or
*
* (b) fork/exec the "orte-iof" tool and let it serve as the interface
* to mpirun. This lets the tool avoid calling orte_init, and means
* the tool will not have to compile against the ORTE/OMPI libraries.
* However, the orte-iof tool is limited solely to interfacing
* stdio and cannot be used for other functions included in
* the tool comm library
*
* Thus, mpirun acts as a "switchyard" for IO, taking input from stdin
* and passing it to rank=0 of the job, and taking stdout/err/diag from all
* ranks and passing it to its own stdout/err/diag plus any "pull"
* requestors.
*
* Streams are identified by ORTE process name (to include wildcards,
* such as "all processes in ORTE job X") and tag. There are
* currently 4 predefined tags, although any integer value is
* sufficient:
* currently only 4 allowed predefined tags:
*
* - ORTE_IOF_ANY (value -1): any stream will match
* - ORTE_IOF_STDIN (value 0): recommended for file descriptor 0, or
* wherever the standard input is currently tied.
* - ORTE_IOF_STDOUT (value 1): recommended for file descriptor 1, or
* wherever the standard output is currently tied.
* - ORTE_IOF_STDERR (value 2): recommended for file descriptor 2, or
* wherever the standard error is currently tied.
* - ORTE_IOF_INTERNAL (value 3): recommended for "internal" messages
* - ORTE_IOF_STDIN (value 0)
* - ORTE_IOF_STDOUT (value 1)
* - ORTE_IOF_STDERR (value 2)
* - ORTE_IOF_INTERNAL (value 3): for "internal" messages
* from the infrastructure, just to differentiate them from user job
* stdout/stderr
*
@ -63,82 +85,27 @@
* Details for the various components are given in their source code
* bases.
*
* The following basic actions are supported in IOF:
* Each IOF component must support the following API:
*
* publish: File descriptors are "published" as a stream as a
* mechanism to make them available to other processes. For example,
* if a stdout descriptor is available from process X, then process X
* needs to publish it (and make it a stream) in order to make that
* stdout stream available to any other process.
* --> today, this isn't necessarily true for the proxy because
* everything is atuomatically sent to the svc. But the proxy
* should be fixed someday to make this definition consistent.
* push: Tie a local file descriptor (*not* a stream!) to the stdin
* of the specified process. If the user has not specified that stdin
* of the specified process is to remain open, this will return an error.
*
* unpublish: The opposite of publish; when a stream is unpublished,
* the content from that file desciptor is no longer available to
* other processes.
* pull: Tie a local file descriptor (*not* a stream!) to a stream.
* Subsequent input that appears via the stream will
* automatically be sent to the target file descriptor until the
* stream is "closed" or an EOF is received on the local file descriptor.
* Valid source values include ORTE_IOF_STDOUT, ORTE_IOF_STDERR, and
* ORTE_IOF_INTERNAL
*
* push: Tie together a local file descriptor (*not* a stream!) that
* should be treated as a source to a stream that should be treated as
* a SINK. Subsequent input that appears on the file descriptor will
* automatically be pushed to the SINK stream. There is currently no
* way to stop a push; once it starts, it runs until an EOF is
* received on the file descriptor or the target stream is
* unpublished.
* close: Closes a stream, flushing any pending data down it and
* terminating any "push/pull" connections against it. Unclear yet
* if this needs to be blocking, or can be done non-blocking.
*
* pull: Tie together a local file descriptor (*not* a stream!) that
* should be treated as a sink to a stream that should be treated as a
* SOURCE. Subsequent input that appears via the stream will
* automatically be sent to the target file descriptor. There is
* currently no way to stop a pull; once it starts, it runs until an
* EOF is receives on the file descriptor or the source stream is
* unpublished.
* flush: Block until all pending data on all open streams has been
* written down local file descriptors and/or completed sending across
* the OOB to remote process targets.
*
* subscribe: Setup a callback function that is invoked whenever a
* fragment from a matching stream arrives. This can be used to
* post-process fragment information, such as prepending a prefix to
* stdout data before outputting it to the user's display in order to
* identify the source process.
*
* unsubscribe: Remove a callback that was previously setup via the
* subscribe action.
*
* flush: Block until all pending data has been written down local
* file descriptors and/or completed sending across the OOB to remote
* process targets.
*
* Two terms that are used in the IOF interface are "origin" and
* "target" indicating the process where data started and where it is
* going. These terms are used to distinguish IOF component
* implementation details because data does not necessarily only from
* the SOURCE process to the SINK process. In practice, data can flow
* from a SINK to a SOURCE (e.g., an ACK), or be routed through a
* proxy. So the "origin" and "target" processes are those where the
* data started and will terminate, respectively, regardless of the
* designation of the originating process (as the SOURCE, SINK, or
* proxy) and the destination process (as the SOURCE, SINK, or proxy).
*
* Additionally, the "proxy" is as it is described above: it may be
* the origin or target itself, or it may be an intermediary acting on
* behalf of the origin or target.
*
* Examples:
*
* 1. mpirun -np 1 hostname
* Assume that orteds and an HNP are used. An orted will be
* launched on the same node as "hostname". It will act as a proxy
* for the hostname process' stdin, stdout, and stderr. Data read
* by the orted from the hostname process stdout will be sent to
* mpirun. In this case, the hostname process is the origin,
* mpirun is the target, and the orted is the proxy.
*
* 2. mpirun -np 1 read_stdin < input_filename
* Assume that orteds and an HNP are used. As with #1, an orted
* will proxy the stdin, stdout, and stderr of the read_stdin
* process. When mpirun reads data on its stdin, it will forward
* it to the orted to write down the pipe to the read_stdin
* process. In this case, mpirun is both the origin process *and*
* proxy, and read_stdin is the target.
*/
#ifndef ORTE_IOF_H
@ -155,159 +122,41 @@
#include "orte/util/name_fns.h"
#include "iof_types.h"
BEGIN_C_DECLS
/* Predefined tag values */
enum {
ORTE_IOF_ANY = -1,
ORTE_IOF_STDIN = 0,
ORTE_IOF_STDOUT = 1,
ORTE_IOF_STDERR = 2,
ORTE_IOF_INTERNAL = 3
};
typedef int orte_iof_base_tag_t;
/* endpoint mode */
enum {
ORTE_IOF_SOURCE = 0,
ORTE_IOF_SINK
};
typedef int orte_iof_base_mode_t;
/**
* Publish a local file descriptor as an endpoint that is logically
* associated with the specified origin process name. The file
* descriptor may be local to this process (in which case the origin
* process name is this process' name), or it may be a pipe to another
* process (i.e., this process is acting as a proxy for another
* process -- typically the case for stdin, stdout, stderr).
*
* @param origin Origin process name associated with the endpoint (not
* the proxy process).
* @param mode Is the endpoint an input or output (SOURCE or SINK)
* @param tag The logical tag associated with this file descriptor.
* @param fd Local file descriptor
*
*/
typedef int (*orte_iof_base_publish_fn_t)(
const orte_process_name_t* origin,
orte_iof_base_mode_t mode,
orte_iof_base_tag_t tag,
int fd
);
/**
* Remove all endpoints matching the specified origin process name,
* mask and tag values.
*
* @param name Origin process name associated with the endpoint.
* @param mask A mask indicating the set of processes to unpublish.
* @param tag The endpoint tag.
*
*/
typedef int (*orte_iof_base_unpublish_fn_t)(
const orte_process_name_t* origin,
orte_ns_cmp_bitmask_t mask,
orte_iof_base_tag_t tag
);
/**
* Callback function for subscriptions (see orte_iof_base_subscribe_fn_t).
*/
typedef void (*orte_iof_base_callback_fn_t)(
orte_process_name_t* origin_name,
orte_iof_base_tag_t orign_tag,
void *cbdata,
const unsigned char* data,
size_t count
);
/**
* Subscribe to receive a callback on receipt of data from a specified
* set of origin peers.
*
* This function is a general purpose utility for getting data from a
* stream; the incoming fragment is delivered to the callback in a
* buffer. You can do whatever you want with the buffer when you get
* the callback (e.g., buffer it, call syslog, ...etc.).
*
* Note that the orte_iof_base_pull_fn_t is a customized common-case
* version of this function; it always takes incoming fragments from a
* stream and writes them down an fd.
*/
typedef int (*orte_iof_base_subscribe_fn_t)(
const orte_process_name_t* origin_name,
orte_ns_cmp_bitmask_t origin_mask,
orte_iof_base_tag_t origin_tag,
orte_iof_base_callback_fn_t cb,
void* cbdata
);
/**
* Delete a subscription created by orte_iof_base_subscribe_fn_t.
*/
typedef int (*orte_iof_base_unsubscribe_fn_t)(
const orte_process_name_t* origin_name,
orte_ns_cmp_bitmask_t src_mask,
orte_iof_base_tag_t src_tag
);
/**
* Explicitly push data from the specified input file descriptor to
* the indicated set of SINK peers.
* the stdin of the indicated peer(s). The provided peer name can
* include wildcard values.
*
* This function is a shortcut for publishing a SOURCE stream and
* tying that stream to an fd that is providing data to be sent across
* the stream (e.g., read from stdin and push it out to a stream).
* Any data that appears on the fd will automatically be read and sent
* across the stream.
*
* @param sink_name Name used to qualify set of target peers.
* @param sink_mask Mask that specified how name is interpreted.
* @param sink_tag Match a specific peer endpoint.
* @param fd Local file descriptor for input.
* @param peer Name of target peer(s)
* @param fd Local file descriptor for input.
*/
typedef int (*orte_iof_base_push_fn_t)(
const orte_process_name_t* sink_name,
orte_ns_cmp_bitmask_t sink_mask,
orte_iof_base_tag_t sink_tag,
int fd
);
typedef int (*orte_iof_base_push_fn_t)(const orte_process_name_t* peer,
orte_iof_tag_t src_tag, int fd);
/**
* Explicitly pull data from the specified set of SOURCE peers and
* dump to the indicated output file descriptor.
*
* This function is a shortcut for subscribing to a SOURCE stream and
* tying that stream to an fd that will consume the data received from
* the stream (i.e., get a fragment from a stream and write it down an
* fd). Any fragments that arrive on the stream will automatically be
* written down the fd.
* dump to the indicated output file descriptor. Any fragments that
* arrive on the stream will automatically be written down the fd.
*
* @param source_name Name used to qualify set of origin peers.
* @param source_mask Mask that specified how name is interpreted.
* @param source_tag Match a specific peer endpoint.
* @param fd Local file descriptor for output.
* @param peer Name used to qualify set of origin peers.
* @param source_tag Indicates the output streams to be forwarded
* @param fd Local file descriptor for output.
*/
typedef int (*orte_iof_base_pull_fn_t)(
const orte_process_name_t* source_name,
orte_ns_cmp_bitmask_t source_mask,
orte_iof_base_tag_t source_tag,
int fd
);
typedef int (*orte_iof_base_pull_fn_t)(const orte_process_name_t* peer,
orte_iof_tag_t source_tag,
int fd);
/**
* Flush all output and block until output is delivered.
* Close the specified iof stream(s) from the indicated peer(s)
*/
typedef int (*orte_iof_base_flush_fn_t)(void);
typedef int (*orte_iof_base_close_fn_t)(const orte_process_name_t* peer,
orte_iof_tag_t source_tag);
/**
* Shut down an IOF module
*/
typedef int (*orte_iof_base_finalize_fn_t)(void);
/**
* FT Event Notification
*/
@ -316,36 +165,20 @@ typedef int (*orte_iof_base_ft_event_fn_t)(int state);
/**
* IOF module.
*/
struct orte_iof_base_module_1_0_0_t {
orte_iof_base_publish_fn_t iof_publish;
orte_iof_base_unpublish_fn_t iof_unpublish;
orte_iof_base_subscribe_fn_t iof_subscribe;
orte_iof_base_unsubscribe_fn_t iof_unsubscribe;
orte_iof_base_push_fn_t iof_push;
orte_iof_base_pull_fn_t iof_pull;
orte_iof_base_flush_fn_t iof_flush;
orte_iof_base_finalize_fn_t iof_finalize;
struct orte_iof_base_module_2_0_0_t {
orte_iof_base_push_fn_t push;
orte_iof_base_pull_fn_t pull;
orte_iof_base_close_fn_t close;
orte_iof_base_ft_event_fn_t ft_event;
};
typedef struct orte_iof_base_module_1_0_0_t orte_iof_base_module_1_0_0_t;
typedef orte_iof_base_module_1_0_0_t orte_iof_base_module_t;
typedef struct orte_iof_base_module_2_0_0_t orte_iof_base_module_2_0_0_t;
typedef orte_iof_base_module_2_0_0_t orte_iof_base_module_t;
ORTE_DECLSPEC extern orte_iof_base_module_t orte_iof;
/**
* IOF component init function. Contains component version
* information and component open/close/init functions.
*/
typedef orte_iof_base_module_t* (*orte_iof_base_component_init_fn_t)(
int *priority,
bool *allow_user_threads,
bool *have_hidden_threads
);
struct orte_iof_base_component_2_0_0_t {
mca_base_component_t iof_version;
mca_base_component_data_t iof_data;
orte_iof_base_component_init_fn_t iof_init;
};
typedef struct orte_iof_base_component_2_0_0_t orte_iof_base_component_2_0_0_t;
typedef struct orte_iof_base_component_2_0_0_t orte_iof_base_component_t;

49
orte/mca/iof/iof_types.h Обычный файл
Просмотреть файл

@ -0,0 +1,49 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2008 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef ORTE_IOF_TYPES_H
#define ORTE_IOF_TYPES_H
#include "orte_config.h"
#include "orte/types.h"
BEGIN_C_DECLS
/* Predefined tag values */
typedef uint8_t orte_iof_tag_t;
#define ORTE_IOF_TAG_T OPAL_UINT8
#define ORTE_IOF_STDIN 0x01
#define ORTE_IOF_STDOUT 0x02
#define ORTE_IOF_STDERR 0x04
#define ORTE_IOF_STDDIAG 0x08
/* flow control flags */
#define ORTE_IOF_XON 0x10
#define ORTE_IOF_XOFF 0x20
/* tool requests */
#define ORTE_IOF_PULL 0x40
#define ORTE_IOF_CLOSE 0x80
END_C_DECLS
#endif /* ORTE_IOF_TYPES_H */

Просмотреть файл

@ -1,37 +0,0 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_iof_null_CONFIG([action-if-found], [action-if-not-found])
# -----------------------------------------------------------
AC_DEFUN([MCA_iof_null_CONFIG],[
# README README README README README README README README README
#
# The NULL iof component is here for debugging some things when
# using TM. It should not be used anywhere else, as you won't
# get I/O. So check for tm and follow the tm pls lead.
#
# README README README README README README README README README
OMPI_CHECK_TM([iof_null], [iof_null_good=1], [iof_null_good=0])
# don't need to set any flags - there's no TM-using code in this
# component
AS_IF([test "$iof_null_good" = "1"], [$1], [$2])
])dnl

Просмотреть файл

@ -1,147 +0,0 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef ORTE_IOF_NULL_H
#define ORTE_IOF_NULL_H
#include "orte/mca/iof/iof.h"
BEGIN_C_DECLS
int orte_iof_null_finalize(void);
/**
* Publish a local file descriptor as an endpoint that is logically
* associated with the specified process name (e.g. master side of a
* pipe/pty connected to a child process)
*
* @param name
* @param mode
* @param tag
* @param fd
*
*/
int orte_iof_null_publish(
const orte_process_name_t* name,
orte_iof_base_mode_t mode,
orte_iof_base_tag_t tag,
int fd
);
/**
* Remove all registrations matching the specified process
* name, mask and tag values.
*
* @param name
* @param mask
* @param tag
*
*/
int orte_iof_null_unpublish(
const orte_process_name_t* name,
orte_ns_cmp_bitmask_t mask,
orte_iof_base_tag_t tag
);
/**
* Explicitly push data from the specified file descriptor
* to the indicated set of peers.
*
* @param dst_name Name used to qualify set of peers.
* @param dst_mask Mask that specified how name is interpreted.
* @param dst_tag Match a specific peer endpoint.
* @param fd Local file descriptor.
*/
int orte_iof_null_push(
const orte_process_name_t* dst_name,
orte_ns_cmp_bitmask_t dst_mask,
orte_iof_base_tag_t dst_tag,
int fd
);
/**
* Explicitly pull data from the specified set of peers
* and dump to the indicated file descriptor.
*
* @param dst_name Name used to qualify set of peers.
* @param dst_mask Mask that specified how name is interpreted.
* @param dst_tag Match a specific peer endpoint.
* @param fd Local file descriptor.
*/
int orte_iof_null_pull(
const orte_process_name_t* src_name,
orte_ns_cmp_bitmask_t src_mask,
orte_iof_base_tag_t src_tag,
int fd
);
/**
* Setup buffering for a specified set of endpoints.
*/
int orte_iof_null_buffer(
const orte_process_name_t* src_name,
orte_ns_cmp_bitmask_t src_mask,
orte_iof_base_tag_t src_tag,
size_t buffer_size
);
/*
* Subscribe to receive a callback on receipt of data
* from a specified set of peers.
*/
int orte_iof_null_subscribe(
const orte_process_name_t* src_name,
orte_ns_cmp_bitmask_t src_mask,
orte_iof_base_tag_t src_tag,
orte_iof_base_callback_fn_t cb,
void* cbdata
);
int orte_iof_null_unsubscribe(
const orte_process_name_t* src_name,
orte_ns_cmp_bitmask_t src_mask,
orte_iof_base_tag_t src_tag
);
int orte_iof_null_ft_event( int state );
/**
* IOF null Component
*/
struct orte_iof_null_component_t {
orte_iof_base_component_t super;
int null_override;
struct iovec null_iov[1];
};
typedef struct orte_iof_null_component_t orte_iof_null_component_t;
ORTE_MODULE_DECLSPEC extern orte_iof_null_component_t mca_iof_null_component;
extern orte_iof_base_module_t orte_iof_null_module;
END_C_DECLS
#endif

Просмотреть файл

@ -1,99 +0,0 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/util/proc_info.h"
#include "orte/util/show_help.h"
#include "opal/runtime/opal_progress.h"
#include "orte/mca/rml/rml.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/mca/iof/base/base.h"
#include "orte/mca/iof/base/iof_base_endpoint.h"
#include "iof_null.h"
/*
* Local functions
*/
static int orte_iof_null_open(void);
static orte_iof_base_module_t* orte_iof_null_init(
int* priority,
bool *allow_multi_user_threads,
bool *have_hidden_threads);
orte_iof_null_component_t mca_iof_null_component = {
{
/* First, the mca_base_component_t struct containing meta
information about the component itself */
{
ORTE_IOF_BASE_VERSION_2_0_0,
"null", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
orte_iof_null_open, /* component open */
NULL
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
orte_iof_null_init
},
false,
/*{{NULL, 0}}*/
};
/*
* component open/init function
*/
static int orte_iof_null_open(void)
{
mca_base_param_reg_int(&mca_iof_null_component.super.iof_version,
"override",
"Whether to use the null IOF component or not",
false, false, 0,
&mca_iof_null_component.null_override);
return ORTE_SUCCESS;
}
static orte_iof_base_module_t*
orte_iof_null_init(int* priority, bool *allow_multi_user_threads,
bool *have_hidden_threads)
{
/* Only be used in a PBS environment -- this component is
currently *only* for debugging */
if (0 != mca_iof_null_component.null_override &&
(NULL != getenv("PBS_ENVIRONMENT") &&
NULL != getenv("PBS_JOBID"))) {
*priority = 50;
*allow_multi_user_threads = true;
*have_hidden_threads = false;
return &orte_iof_null_module;
}
return NULL;
}

Просмотреть файл

@ -1,178 +0,0 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include <errno.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#include <string.h>
#include "orte/util/show_help.h"
#include "orte/mca/iof/iof.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/rml/rml_types.h"
#include "orte/mca/iof/iof.h"
#include "orte/mca/iof/base/base.h"
#include "orte/mca/iof/base/iof_base_endpoint.h"
#include "iof_null.h"
orte_iof_base_module_t orte_iof_null_module = {
orte_iof_null_publish,
orte_iof_null_unpublish,
orte_iof_null_subscribe,
orte_iof_null_unsubscribe,
orte_iof_null_push,
orte_iof_null_pull,
orte_iof_base_flush,
orte_iof_null_finalize,
orte_iof_null_ft_event
};
int orte_iof_null_finalize(void) {
return ORTE_SUCCESS;
}
/**
* Publish a local file descriptor as an endpoint that is logically
* associated with the specified process name (e.g. master side of a
* pipe/pty connected to a child process)
*
* @param name
* @param mode
* @param tag
* @param fd
*
*/
int orte_iof_null_publish(
const orte_process_name_t* name,
orte_iof_base_mode_t mode,
orte_iof_base_tag_t tag,
int fd)
{
return ORTE_SUCCESS;
}
/**
* Remove all registrations matching the specified process
* name, mask and tag values.
*
* @param name
* @param mask
* @param tag
*
*/
int orte_iof_null_unpublish(
const orte_process_name_t* name,
orte_ns_cmp_bitmask_t mask,
orte_iof_base_tag_t tag)
{
return ORTE_SUCCESS;
}
/**
* Explicitly push data from the specified file descriptor
* to the indicated set of peers.
*
* @param dst_name Name used to qualify set of peers.
* @param dst_mask Mask that specified how name is interpreted.
* @param dst_tag Match a specific peer endpoint.
* @param fd Local file descriptor.
*/
int orte_iof_null_push(
const orte_process_name_t* dst_name,
orte_ns_cmp_bitmask_t dst_mask,
orte_iof_base_tag_t dst_tag,
int fd)
{
return ORTE_SUCCESS;
}
/**
* Explicitly pull data from the specified set of peers
* and dump to the indicated file descriptor.
*
* @param dst_name Name used to qualify set of peers.
* @param dst_mask Mask that specified how name is interpreted.
* @param dst_tag Match a specific peer endpoint.
* @param fd Local file descriptor.
*/
int orte_iof_null_pull(
const orte_process_name_t* src_name,
orte_ns_cmp_bitmask_t src_mask,
orte_iof_base_tag_t src_tag,
int fd)
{
return ORTE_SUCCESS;
}
/**
* Setup buffering for a specified set of endpoints.
*/
int orte_iof_null_buffer(
const orte_process_name_t* src_name,
orte_ns_cmp_bitmask_t src_mask,
orte_iof_base_tag_t src_tag,
size_t buffer_size)
{
return ORTE_SUCCESS;
}
/*
* Subscribe to receive a callback on receipt of data
* from a specified set of peers.
*/
int orte_iof_null_subscribe(
const orte_process_name_t* src_name,
orte_ns_cmp_bitmask_t src_mask,
orte_iof_base_tag_t src_tag,
orte_iof_base_callback_fn_t cb,
void* cbdata)
{
return ORTE_SUCCESS;
}
int orte_iof_null_unsubscribe(
const orte_process_name_t* src_name,
orte_ns_cmp_bitmask_t src_mask,
orte_iof_base_tag_t src_tag)
{
return ORTE_SUCCESS;
}
int orte_iof_null_ft_event( int state ) {
/*
* Do nothing :)
*/
return ORTE_SUCCESS;
}

Просмотреть файл

@ -20,31 +20,27 @@
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_iof_svc_DSO
if OMPI_BUILD_iof_orted_DSO
component_noinst =
component_install = mca_iof_svc.la
component_install = mca_iof_orted.la
else
component_noinst = libmca_iof_svc.la
component_noinst = libmca_iof_orted.la
component_install =
endif
svc_SOURCES = \
iof_svc.c \
iof_svc.h \
iof_svc_component.c \
iof_svc_proxy.h \
iof_svc_proxy.c \
iof_svc_pub.h \
iof_svc_pub.c \
iof_svc_sub.h \
iof_svc_sub.c
orted_SOURCES = \
iof_orted.c \
iof_orted.h \
iof_orted_component.c \
iof_orted_read.c \
iof_orted_receive.c
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_iof_svc_la_SOURCES = $(svc_SOURCES)
mca_iof_svc_la_LDFLAGS = -module -avoid-version
mca_iof_orted_la_SOURCES = $(orted_SOURCES)
mca_iof_orted_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_iof_svc_la_SOURCES = $(svc_SOURCES)
libmca_iof_svc_la_LIBADD =
libmca_iof_svc_la_LDFLAGS = -module -avoid-version
libmca_iof_orted_la_SOURCES = $(orted_SOURCES)
libmca_iof_orted_la_LIBADD =
libmca_iof_orted_la_LDFLAGS = -module -avoid-version

196
orte/mca/iof/orted/iof_orted.c Обычный файл
Просмотреть файл

@ -0,0 +1,196 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include <errno.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#include "orte/util/show_help.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/iof/iof.h"
#include "orte/mca/iof/base/base.h"
#include "iof_orted.h"
/* LOCAL FUNCTIONS */
static void stdin_write_handler(int fd, short event, void *cbdata);
/* API FUNCTIONS */
static int orted_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, int fd);
static int orted_pull(const orte_process_name_t* src_name,
orte_iof_tag_t src_tag,
int fd);
static int orted_close(const orte_process_name_t* peer,
orte_iof_tag_t source_tag);
static int orted_ft_event(int state);
/* The API's in this module are solely used to support LOCAL
* procs - i.e., procs that are co-located to the daemon. Output
* from local procs is automatically sent to the HNP for output
* and possible forwarding to other requestors. The HNP automatically
* determines and wires up the stdin configuration, so we don't
* have to do anything here.
*/
orte_iof_base_module_t orte_iof_orted_module = {
orted_push,
orted_pull,
orted_close,
orted_ft_event
};
/**
* Push data from the specified file descriptor
* to the HNP
*/
static int orted_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, int fd)
{
/* setup to read from the specified file descriptor and
* forward anything we get to the HNP
*/
ORTE_IOF_READ_EVENT(dst_name, fd, src_tag,
orte_iof_orted_read_handler,
&mca_iof_orted_component.read_events, true);
return ORTE_SUCCESS;
}
/**
* Pull for a daemon tells
* us that any info we receive from the HNP that is targeted
* for stdin of the specified process should be fed down the
* indicated file descriptor. Thus, all we need to do here
* is define a local endpoint so we know where to feed anything
* that comes to us
*/
static int orted_pull(const orte_process_name_t* dst_name,
orte_iof_tag_t src_tag,
int fd)
{
orte_iof_sink_t *sink;
/* this is a local call - only stdin is supported */
if (ORTE_IOF_STDIN != src_tag) {
return ORTE_ERR_NOT_SUPPORTED;
}
ORTE_IOF_SINK_DEFINE(&sink, dst_name, fd, src_tag,
stdin_write_handler,
&mca_iof_orted_component.sinks);
return ORTE_SUCCESS;
}
/*
* One of our local procs wants us to close the specifed
* stream(s), thus terminating any potential io to/from it.
* For the orted, this just means closing the local fd
*/
static int orted_close(const orte_process_name_t* peer,
orte_iof_tag_t source_tag)
{
return ORTE_SUCCESS;
}
/*
* FT event
*/
static int orted_ft_event(int state)
{
return ORTE_ERR_NOT_IMPLEMENTED;
}
static void stdin_write_handler(int fd, short event, void *cbdata)
{
orte_iof_write_event_t *wev = (orte_iof_write_event_t*)cbdata;
opal_list_item_t *item;
orte_iof_write_output_t *output;
int num_written;
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
"%s hnp:stdin:write:handler writing data to %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
wev->fd));
/* lock us up to protect global operations */
OPAL_THREAD_LOCK(&mca_iof_orted_component.lock);
while (NULL != (item = opal_list_remove_first(&wev->outputs))) {
output = (orte_iof_write_output_t*)item;
num_written = write(wev->fd, output->data, output->numbytes);
if (num_written < output->numbytes) {
/* incomplete write - adjust data to avoid duplicate output */
memmove(output->data, &output->data[num_written], output->numbytes - num_written);
/* push this item back on the front of the list */
opal_list_prepend(&wev->outputs, item);
/* leave the write event running so it will call us again
* when the fd is ready.
*/
goto CHECK;
}
OBJ_RELEASE(output);
}
opal_event_del(&wev->ev);
wev->pending = false;
CHECK:
if (mca_iof_orted_component.xoff) {
/* if we have told the HNP to stop reading stdin, see if
* the proc has absorbed enough to justify restart
*
* RHC: Note that when multiple procs want stdin, we
* can get into a fight between a proc turnin stdin
* back "on" and other procs turning it "off". There
* is no clear way to resolve this as different procs
* may take input at different rates.
*/
if (opal_list_get_size(&wev->outputs) < ORTE_IOF_MAX_INPUT_BUFFERS) {
/* restart the read */
mca_iof_orted_component.xoff = false;
orte_iof_orted_send_xonxoff(ORTE_IOF_XON);
}
}
/* unlock and go */
OPAL_THREAD_UNLOCK(&mca_iof_orted_component.lock);
}

81
orte/mca/iof/orted/iof_orted.h Обычный файл
Просмотреть файл

@ -0,0 +1,81 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* The orted IOF component is used in daemons. It is used
* to orted all IOF actions back to the "hnp" IOF component (i.e., the
* IOF component that runs in the HNP). The orted IOF component is
* loaded in an orted and then tied to the stdin, stdout,
* and stderr streams of created child processes via pipes. The orted
* IOF component in the orted then acts as the relay between the
* stdin/stdout/stderr pipes and the IOF component in the HNP.
* This design allows us to manipulate stdin/stdout/stderr from before
* main() in the child process.
*
* Much of the intelligence of this component is actually contained in
* iof_base_endpoint.c (reading and writing to local file descriptors,
* setting up events based on file descriptors, etc.).
*
* A non-blocking OOB receive is posted at the initialization of this
* component to receive all messages from the HNP (e.g., data
* fragments from streams, ACKs to fragments).
*
* Flow control is employed on a per-stream basis to ensure that
* SOURCEs don't overwhelm SINK resources (E.g., send an entire input
* file to an orted before the target process has read any of it).
*
*/
#ifndef ORTE_IOF_ORTED_H
#define ORTE_IOF_ORTED_H
#include "orte_config.h"
#include "opal/class/opal_list.h"
#include "orte/mca/iof/iof.h"
BEGIN_C_DECLS
/**
* IOF ORTED Component
*/
struct orte_iof_orted_component_t {
orte_iof_base_component_t super;
opal_list_t sinks;
opal_list_t read_events;
opal_mutex_t lock;
bool xoff;
};
typedef struct orte_iof_orted_component_t orte_iof_orted_component_t;
ORTE_MODULE_DECLSPEC extern orte_iof_orted_component_t mca_iof_orted_component;
extern orte_iof_base_module_t orte_iof_orted_module;
void orte_iof_orted_recv(int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata);
void orte_iof_orted_read_handler(int fd, short event, void *data);
void orte_iof_orted_send_xonxoff(orte_iof_tag_t tag);
END_C_DECLS
#endif

147
orte/mca/iof/orted/iof_orted_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,147 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/util/show_help.h"
#include "opal/runtime/opal_progress.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/mca/rml/rml.h"
#include "orte/util/proc_info.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/runtime/orte_globals.h"
#include "iof_orted.h"
/*
* Local functions
*/
static int orte_iof_orted_open(void);
static int orte_iof_orted_close(void);
static int orte_iof_orted_query(mca_base_module_t **module, int *priority);
/*
* Local variables
*/
static bool initialized = false;
/*
* Public string showing the iof orted component version number
*/
const char *mca_iof_orted_component_version_string =
"Open MPI orted iof MCA component version " ORTE_VERSION;
orte_iof_orted_component_t mca_iof_orted_component = {
{
{
ORTE_IOF_BASE_VERSION_2_0_0,
"orted", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
/* Component open, close, and query functions */
orte_iof_orted_open,
orte_iof_orted_close,
orte_iof_orted_query
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
}
};
/**
* component open/close/init function
*/
static int orte_iof_orted_open(void)
{
/* Nothing to do */
return ORTE_SUCCESS;
}
static int orte_iof_orted_close(void)
{
int rc = ORTE_SUCCESS;
opal_list_item_t *item;
if (initialized) {
OPAL_THREAD_LOCK(&mca_iof_orted_component.lock);
while ((item = opal_list_remove_first(&mca_iof_orted_component.sinks)) != NULL) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&mca_iof_orted_component.sinks);
while ((item = opal_list_remove_first(&mca_iof_orted_component.read_events)) != NULL) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&mca_iof_orted_component.read_events);
/* Cancel the RML receive */
rc = orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_IOF_PROXY);
OPAL_THREAD_UNLOCK(&mca_iof_orted_component.lock);
OBJ_DESTRUCT(&mca_iof_orted_component.lock);
}
return rc;
}
static int orte_iof_orted_query(mca_base_module_t **module, int *priority)
{
int rc;
/* set default */
*module = NULL;
*priority = -1;
/* if we are not a daemon, then don't use this module */
if (!orte_process_info.daemon) {
return ORTE_ERROR;
}
/* post a non-blocking RML receive to get messages
from the HNP IOF component */
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_IOF_PROXY,
ORTE_RML_NON_PERSISTENT,
orte_iof_orted_recv,
NULL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* setup the local global variables */
OBJ_CONSTRUCT(&mca_iof_orted_component.lock, opal_mutex_t);
OBJ_CONSTRUCT(&mca_iof_orted_component.sinks, opal_list_t);
OBJ_CONSTRUCT(&mca_iof_orted_component.read_events, opal_list_t);
mca_iof_orted_component.xoff = false;
/* we must be selected */
*priority = 100;
*module = (mca_base_module_t *) &orte_iof_orted_module;
initialized = true;
return ORTE_SUCCESS;
}

149
orte/mca/iof/orted/iof_orted_read.c Обычный файл
Просмотреть файл

@ -0,0 +1,149 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include <errno.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#include "orte/util/show_help.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/iof/iof.h"
#include "orte/mca/iof/base/base.h"
#include "iof_orted.h"
/*
* Callback when non-blocking RML send completes.
*/
static void send_cb(int status, orte_process_name_t *peer,
opal_buffer_t *buf, orte_rml_tag_t tag,
void *cbdata)
{
/* nothing to do here - just release buffer and return */
OBJ_RELEASE(buf);
}
void orte_iof_orted_read_handler(int fd, short event, void *cbdata)
{
orte_iof_read_event_t *rev = (orte_iof_read_event_t*)cbdata;
unsigned char data[ORTE_IOF_BASE_MSG_MAX];
opal_buffer_t *buf=NULL;
int rc;
int32_t numbytes;
OPAL_THREAD_LOCK(&mca_iof_orted_component.lock);
/* read up to the fragment size */
#if !defined(__WINDOWS__)
numbytes = read(fd, data, sizeof(data));
#else
{
DWORD readed;
HANDLE handle = (HANDLE)_get_osfhandle(fd);
ReadFile(handle, data, sizeof(data), &readed, NULL);
numbytes = (int)readed;
}
#endif /* !defined(__WINDOWS__) */
if (numbytes < 0) {
/* either we have a connection error or it was a non-blocking read */
/* non-blocking, retry */
if (EAGAIN == errno || EINTR == errno) {
OPAL_THREAD_UNLOCK(&mca_iof_orted_component.lock);
return;
}
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
"%s iof:orted:read handler %s Error on connection:%d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&rev->name), fd));
goto CLEAN_RETURN;
} else if (0 == numbytes) {
/* child process closed connection - close the fd */
close(fd);
goto CLEAN_RETURN;
}
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
"%s iof:orted:read handler %s %d bytes from fd %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&rev->name),
numbytes, fd));
/* prep the buffer */
buf = OBJ_NEW(opal_buffer_t);
/* pack the stream first - we do this so that flow control messages can
* consist solely of the tag
*/
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &rev->tag, 1, ORTE_IOF_TAG))) {
ORTE_ERROR_LOG(rc);
goto CLEAN_RETURN;
}
/* pack name of process that gave us this data */
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &rev->name, 1, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto CLEAN_RETURN;
}
/* pack the data - only pack the #bytes we read! */
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &data, numbytes, OPAL_BYTE))) {
ORTE_ERROR_LOG(rc);
goto CLEAN_RETURN;
}
/* start non-blocking RML call to forward received data */
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
"%s iof:orted:read handler sending %d bytes to HNP",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes));
orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP,
0, send_cb, NULL);
OPAL_THREAD_UNLOCK(&mca_iof_orted_component.lock);
/* since the event is persistent, we do not need to re-add it */
return;
CLEAN_RETURN:
/* delete the event from the event library */
opal_event_del(&rev->ev);
if (NULL != buf) {
OBJ_RELEASE(buf);
}
OPAL_THREAD_UNLOCK(&mca_iof_orted_component.lock);
return;
}

215
orte/mca/iof/orted/iof_orted_receive.c Обычный файл
Просмотреть файл

@ -0,0 +1,215 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include <errno.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#include "orte/util/show_help.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/iof/iof_types.h"
#include "orte/mca/iof/base/base.h"
#include "iof_orted.h"
static void send_cb(int status, orte_process_name_t *peer,
opal_buffer_t *buf, orte_rml_tag_t tag,
void *cbdata)
{
/* nothing to do here - just release buffer and return */
OBJ_RELEASE(buf);
}
void orte_iof_orted_send_xonxoff(orte_iof_tag_t tag)
{
opal_buffer_t *buf;
int rc;
buf = OBJ_NEW(opal_buffer_t);
/* pack the tag - we do this first so that flow control messages can
* consist solely of the tag
*/
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &tag, 1, ORTE_IOF_TAG))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buf);
return;
}
OPAL_OUTPUT_VERBOSE((0, orte_iof_base.iof_output,
"%s sending %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(ORTE_IOF_XON == tag) ? "xon" : "xoff"));
/* send the buffer to the HNP */
orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP,
0, send_cb, NULL);
}
/*
* The only messages coming to an orted are either:
*
* (a) stdin, which is to be copied to whichever local
* procs "pull'd" a copy
*
* (b) flow control messages
*/
static void process_msg(int fd, short event, void *cbdata)
{
orte_message_event_t *mev = (orte_message_event_t*)cbdata;
unsigned char data[ORTE_IOF_BASE_MSG_MAX];
orte_iof_tag_t stream;
int32_t count, numbytes;
orte_process_name_t target;
opal_list_item_t *item;
int rc;
/* see what stream generated this data */
count = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(mev->buffer, &stream, &count, ORTE_IOF_TAG))) {
ORTE_ERROR_LOG(rc);
goto CLEAN_RETURN;
}
/* if this isn't stdin, then we have an error */
if (ORTE_IOF_STDIN != stream) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
goto CLEAN_RETURN;
}
/* unpack the intended target */
count = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(mev->buffer, &target, &count, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto CLEAN_RETURN;
}
/* unpack the data */
numbytes=ORTE_IOF_BASE_MSG_MAX;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(mev->buffer, data, &numbytes, OPAL_BYTE))) {
ORTE_ERROR_LOG(rc);
goto CLEAN_RETURN;
}
/* numbytes will contain the actual #bytes that were sent */
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
"%s unpacked %d bytes for local proc %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
ORTE_NAME_PRINT(&target)));
/* cycle through our list of sinks */
for (item = opal_list_get_first(&mca_iof_orted_component.sinks);
item != opal_list_get_end(&mca_iof_orted_component.sinks);
item = opal_list_get_next(item)) {
orte_iof_sink_t* sink = (orte_iof_sink_t*)item;
/* is this intended for this jobid? */
if (target.jobid == sink->name.jobid) {
/* yes - is this intended for all vpids or this vpid? */
if (ORTE_VPID_WILDCARD == target.vpid ||
sink->name.vpid == target.vpid) {
/* if stdin was closed, we need to close it too so the proc
* knows it is done
*/
if (0 == numbytes) {
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
"%s closing stdin of local proc %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&sink->name)));
/* make sure the write event is off */
if (!sink->wev.pending) {
opal_event_del(&(sink->wev.ev));
}
close(sink->wev.fd);
sink->wev.fd =-1;
} else if (sink->wev.fd < 0) {
/* the fd has already been closed - ignore this input */
continue;
} else {
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
"%s writing data to local proc %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&sink->name)));
/* send the bytes down the pipe */
if (ORTE_IOF_MAX_INPUT_BUFFERS < orte_iof_base_write_output(&target, stream, data, numbytes, &sink->wev)) {
/* getting too backed up - tell the HNP to hold off any more input if we
* haven't already told it
*/
if (!mca_iof_orted_component.xoff) {
mca_iof_orted_component.xoff = true;
orte_iof_orted_send_xonxoff(ORTE_IOF_XOFF);
}
}
}
}
}
}
CLEAN_RETURN:
/* release the message event */
OBJ_RELEASE(mev);
return;
}
void orte_iof_orted_recv(int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata)
{
int rc;
OPAL_OUTPUT_VERBOSE((5, orte_iof_base.iof_output,
"%s iof:orted:receive got message from %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(sender)));
/* don't process this right away - we need to get out of the recv before
* we process the message to avoid performing the rest of the job while
* inside this receive! Instead, setup an event so that the message gets processed
* as soon as we leave the recv.
*
* The macro makes a copy of the buffer, which we release above - the incoming
* buffer, however, is NOT released here, although its payload IS transferred
* to the message buffer for later processing
*/
ORTE_MESSAGE_EVENT(sender, buffer, tag, process_msg);
/* reissue the recv */
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_IOF_PROXY,
ORTE_RML_NON_PERSISTENT,
orte_iof_orted_recv,
NULL))) {
ORTE_ERROR_LOG(rc);
}
return;
}

Просмотреть файл

@ -1,373 +0,0 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include <errno.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#include "orte/util/show_help.h"
#include "orte/mca/iof/iof.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/rml/rml_types.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/iof/iof.h"
#include "orte/mca/iof/base/base.h"
#include "orte/mca/iof/base/iof_base_endpoint.h"
#include "iof_proxy.h"
#include "iof_proxy_svc.h"
orte_iof_base_module_t orte_iof_proxy_module = {
orte_iof_proxy_publish,
orte_iof_proxy_unpublish,
orte_iof_proxy_subscribe,
orte_iof_proxy_unsubscribe,
orte_iof_proxy_push,
orte_iof_proxy_pull,
orte_iof_base_flush,
orte_iof_proxy_finalize,
orte_iof_proxy_ft_event
};
/*
* Finalize module; nothing to do
*/
int orte_iof_proxy_finalize(void )
{
return ORTE_SUCCESS;
}
/**
* Create an endpoint for a local file descriptor and "publish" it
* under the name of the origin process. If the publish mode is a
* SINK, then create a publication entry for it so that incoming
* messages can be forwarded to it.
*
* SOURCEs do not need to create publication records because a) the
* endpoint will automatically wake up the event engine and read off
* the fd whenever there is data available, and b) this data is then
* automatically sent to the iof svc component for possible
* forwarding.
*/
int orte_iof_proxy_publish(
const orte_process_name_t* origin,
orte_iof_base_mode_t mode,
orte_iof_base_tag_t tag,
int fd)
{
int rc;
if (orte_iof_base.iof_output >= 0) {
char* name_str;
orte_util_convert_process_name_to_string(&name_str, origin);
opal_output_verbose(1, orte_iof_base.iof_output,
"orte_iof_proxy_publish(%s,%d,%d,%d)\n",
name_str, mode, tag, fd);
free(name_str);
}
rc = orte_iof_base_endpoint_create(
origin,
mode,
tag,
fd);
if (ORTE_SUCCESS != rc) {
return rc;
}
/* publish to server */
if (ORTE_IOF_SINK == mode) {
rc = orte_iof_proxy_svc_publish(origin, tag);
if (rc != ORTE_SUCCESS) {
return rc;
}
}
return ORTE_SUCCESS;
}
/**
* Remove all registrations matching the specified origin process
* name, mask and tag values.
*/
int orte_iof_proxy_unpublish(
const orte_process_name_t* origin,
orte_ns_cmp_bitmask_t mask,
orte_iof_base_tag_t tag)
{
int rc;
#if 0
{
int i = 0;
opal_output_verbose(1, orte_iof_base.iof_output, "%s orted: ******** ABOUT TO IOF PROXY UNPUBLISH, %d", ORTE_NAME_PRINT(orte_process_info.my_name), getpid());
fflush(stderr);
while (0 == i) sleep(5);
}
#endif
/* cleanup server */
orte_iof_proxy_svc_unpublish(
origin,
mask,
tag);
/* delete local endpoint. Note that the endpoint may have already
been deleted (e.g., if some entity noticed that the fd closed
and called orte_iof_base_endpoint_delete on the corresopnding
endpoint already). So if we get NOT_FOUND, ignore that error
-- the end result is what we want: the endpoint is deleted when
we return. */
rc = orte_iof_base_endpoint_delete(
origin,
mask,
tag);
if (ORTE_ERR_NOT_FOUND == rc || ORTE_SUCCESS == rc) {
return ORTE_SUCCESS;
} else {
return rc;
}
}
/**
* Explicitly push data from the specified file descriptor
* to the indicated SINK set of peers.
*/
int orte_iof_proxy_push(
const orte_process_name_t* sink_name,
orte_ns_cmp_bitmask_t sink_mask,
orte_iof_base_tag_t sink_tag,
int fd)
{
int rc;
/* setup a local endpoint to reflect registration. Do this before
we send the subscription to the server in case a callback
occurs *while* we are sending the subscription request. */
rc = orte_iof_base_endpoint_create(
ORTE_PROC_MY_NAME,
ORTE_IOF_SOURCE,
sink_tag,
fd);
if (ORTE_SUCCESS != rc) {
return rc;
}
/* send a subscription to server on behalf of the destination */
rc = orte_iof_proxy_svc_subscribe(
ORTE_PROC_MY_NAME,
ORTE_NS_CMP_ALL,
sink_tag,
sink_name,
sink_mask,
sink_tag
);
return rc;
}
/**
* Explicitly pull data from the specified set of SOURCE peers and
* dump to the indicated file descriptor.
*/
int orte_iof_proxy_pull(
const orte_process_name_t* source_name,
orte_ns_cmp_bitmask_t source_mask,
orte_iof_base_tag_t source_tag,
int fd)
{
/* setup a local endpoint */
int rc;
rc = orte_iof_base_endpoint_create(
ORTE_PROC_MY_NAME,
ORTE_IOF_SINK,
source_tag,
fd);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* publish this endpoint */
rc = orte_iof_proxy_svc_publish(
ORTE_PROC_MY_NAME,
source_tag);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* subscribe to peer */
rc = orte_iof_proxy_svc_subscribe(
source_name,
source_mask,
source_tag,
ORTE_PROC_MY_NAME,
ORTE_NS_CMP_ALL,
source_tag);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
return rc;
}
/*
* Subscribe to receive a callback on receipt of data
* from a specified set of origin peers.
*/
int orte_iof_proxy_subscribe(
const orte_process_name_t* origin_name,
orte_ns_cmp_bitmask_t origin_mask,
orte_iof_base_tag_t origin_tag,
orte_iof_base_callback_fn_t cbfunc,
void* cbdata)
{
int rc;
/* create a local registration to reflect the callback */
rc = orte_iof_base_callback_create(ORTE_PROC_MY_NAME,origin_tag,cbfunc,cbdata);
if (ORTE_SUCCESS != rc) {
return rc;
}
/* send a subscription message to the service */
rc = orte_iof_proxy_svc_subscribe(
origin_name,
origin_mask,
origin_tag,
ORTE_PROC_MY_NAME,
ORTE_NS_CMP_ALL,
origin_tag);
return rc;
}
/*
* Remove a subscription
*/
int orte_iof_proxy_unsubscribe(
const orte_process_name_t* origin_name,
orte_ns_cmp_bitmask_t origin_mask,
orte_iof_base_tag_t origin_tag)
{
int rc;
/* send an unsubscribe message to the service */
rc = orte_iof_proxy_svc_unsubscribe(
origin_name,
origin_mask,
origin_tag,
ORTE_PROC_MY_NAME,
ORTE_NS_CMP_ALL,
origin_tag);
if (ORTE_SUCCESS != rc) {
return rc;
}
/* remove local callback */
return orte_iof_base_callback_delete(ORTE_PROC_MY_NAME,origin_tag);
}
/*
* FT event
*/
int orte_iof_proxy_ft_event(int state) {
int ret, exit_status = ORTE_SUCCESS;
if(OPAL_CRS_CHECKPOINT == state) {
/*
* Flush
*/
if( ORTE_SUCCESS != (ret = orte_iof_base_flush() ) ) {
return ret;
}
/*
* Stop receiving events
*/
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_IOF_SVC);
}
else if(OPAL_CRS_CONTINUE == state) {
/*
* Restart Receiving events
*/
if(ORTE_SUCCESS != (ret = orte_rml.recv_nb(
ORTE_NAME_WILDCARD,
mca_iof_proxy_component.proxy_iov,
1,
ORTE_RML_TAG_IOF_SVC,
ORTE_RML_ALLOC|ORTE_RML_PERSISTENT,
orte_iof_proxy_svc_recv,
NULL
) ) ) {
exit_status = ret;
goto cleanup;
}
}
else if(OPAL_CRS_RESTART == state) {
/*
* Restart Receiving events
*/
if(ORTE_SUCCESS != (ret = orte_rml.recv_nb(
ORTE_NAME_WILDCARD,
mca_iof_proxy_component.proxy_iov,
1,
ORTE_RML_TAG_IOF_SVC,
ORTE_RML_ALLOC|ORTE_RML_PERSISTENT,
orte_iof_proxy_svc_recv,
NULL
) ) ) {
exit_status = ret;
goto cleanup;
}
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
cleanup:
return exit_status;
}

Просмотреть файл

@ -1,164 +0,0 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* The proxy IOF component is used in non-HNP processes. It is used
* to proxy all IOF actions back to the "svc" IOF component (i.e., the
* IOF component that runs in the HNP). The proxy IOF component is
* typically loaded in an orted and then tied to the stdin, stdout,
* and stderr streams of created child processes via pipes. The proxy
* IOF component in the orted then acts as the delay between the
* stdin/stdout/stderr pipes and the svc IOF component in the HNP.
* This design allows us to manipulate stdin/stdout/stderr from before
* main() in the child process.
*
* Publish actions for SINKs are pushed back to the svc/HNP. Publish
* actions for SOURCEs are not pushed back to SINKs because all data
* fragments from SOURCEs are automatically sent back to the svc/HNP.
*
* All unpublish actions are pushed back to the svc/HNP (I'm not sure
* why -- perhaps this is a bug?).
*
* Push and pull actions are essentially implemented in terms of
* subscribe / unsubscribe.
*
* Subscribe / unsubscribe actions are fairly straightforward.
*
* Much of the intelligence of this component is actually contained in
* iof_base_endpoint.c (reading and writing to local file descriptors,
* setting up events based on file descriptors, etc.).
*
* A non-blocking OOB receive is posted at the initializtion of this
* component to receive all messages from the svc/HNP (e.g., data
* fragments from streams, ACKs to fragments).
*
* Flow control is employed on a per-stream basis to ensure that
* SOURCEs don't overwhelm SINK resources (E.g., send an entire input
* file to an orted before the target process has read any of it).
*
* Important: this component is designed to work with the svc IOF
* component only. If we ever do a different IOF implementation
* scheme, it is likely that only some of this component will be
* useful for cannibalisation (if any at all).
*/
#ifndef ORTE_IOF_PROXY_H
#define ORTE_IOF_PROXY_H
#include "orte/mca/iof/iof.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/**
* Module publish
*/
int orte_iof_proxy_publish(
const orte_process_name_t* name,
orte_iof_base_mode_t mode,
orte_iof_base_tag_t tag,
int fd
);
/**
* Module unpublish
*/
int orte_iof_proxy_unpublish(
const orte_process_name_t* name,
orte_ns_cmp_bitmask_t mask,
orte_iof_base_tag_t tag
);
/**
* Module push
*/
int orte_iof_proxy_push(
const orte_process_name_t* dst_name,
orte_ns_cmp_bitmask_t dst_mask,
orte_iof_base_tag_t dst_tag,
int fd
);
/**
* Module pull
*/
int orte_iof_proxy_pull(
const orte_process_name_t* src_name,
orte_ns_cmp_bitmask_t src_mask,
orte_iof_base_tag_t src_tag,
int fd
);
/**
* Module subscribe
*/
int orte_iof_proxy_subscribe(
const orte_process_name_t* src_name,
orte_ns_cmp_bitmask_t src_mask,
orte_iof_base_tag_t src_tag,
orte_iof_base_callback_fn_t cb,
void* cbdata
);
/**
* Module unsubscribe
*/
int orte_iof_proxy_unsubscribe(
const orte_process_name_t* src_name,
orte_ns_cmp_bitmask_t src_mask,
orte_iof_base_tag_t src_tag
);
/**
* Module finalize
*/
int orte_iof_proxy_finalize( void );
/**
* Module FT event
*/
int orte_iof_proxy_ft_event(int state);
/**
* IOF proxy Component
*/
struct orte_iof_proxy_component_t {
orte_iof_base_component_t super;
struct iovec proxy_iov[1];
};
typedef struct orte_iof_proxy_component_t orte_iof_proxy_component_t;
ORTE_MODULE_DECLSPEC extern orte_iof_proxy_component_t mca_iof_proxy_component;
extern orte_iof_base_module_t orte_iof_proxy_module;
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -1,131 +0,0 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/util/show_help.h"
#include "opal/runtime/opal_progress.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/mca/rml/rml.h"
#include "orte/util/proc_info.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/iof/base/base.h"
#include "orte/mca/iof/base/iof_base_endpoint.h"
#include "iof_proxy.h"
#include "iof_proxy_svc.h"
/*
* Local functions
*/
static int orte_iof_proxy_open(void);
static int orte_iof_proxy_close(void);
static orte_iof_base_module_t* orte_iof_proxy_init(
int* priority,
bool *allow_multi_user_threads,
bool *have_hidden_threads);
/*
* Local variables
*/
static bool initialized = false;
orte_iof_proxy_component_t mca_iof_proxy_component = {
{
/* First, the mca_base_component_t struct containing meta
information about the component itself */
{
ORTE_IOF_BASE_VERSION_2_0_0,
"proxy", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
orte_iof_proxy_open, /* component open */
orte_iof_proxy_close /* component close */
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
orte_iof_proxy_init
}
};
/**
* component open/close/init function
*/
static int orte_iof_proxy_open(void)
{
/* Nothing to do */
return ORTE_SUCCESS;
}
static orte_iof_base_module_t*
orte_iof_proxy_init(int* priority, bool *allow_multi_user_threads, bool *have_hidden_threads)
{
int rc;
if(orte_process_info.hnp == true)
return NULL;
*priority = 1;
*allow_multi_user_threads = true;
*have_hidden_threads = false;
/* post a non-blocking, persistent RML receive to get messages
from the svc IOF component */
mca_iof_proxy_component.proxy_iov[0].iov_base = NULL;
mca_iof_proxy_component.proxy_iov[0].iov_len = 0;
rc = orte_rml.recv_nb(
ORTE_NAME_WILDCARD,
mca_iof_proxy_component.proxy_iov,
1,
ORTE_RML_TAG_IOF_SVC,
ORTE_RML_ALLOC|ORTE_RML_PERSISTENT,
orte_iof_proxy_svc_recv,
NULL
);
if(rc < 0) {
opal_output(orte_iof_base.iof_output,
"orte_iof_proxy_init: unable to post non-blocking recv");
return NULL;
}
initialized = true;
return &orte_iof_proxy_module;
}
static int orte_iof_proxy_close(void)
{
int rc = ORTE_SUCCESS;
if (initialized) {
/* Cancel the RML receive */
rc = orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_IOF_SVC);
}
return rc;
}

Просмотреть файл

@ -1,311 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/util/show_help.h"
#include "orte/mca/rml/rml.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/iof/base/base.h"
#include "orte/mca/iof/base/iof_base_header.h"
#include "orte/mca/iof/base/iof_base_endpoint.h"
#include "orte/mca/errmgr/errmgr.h"
#include "iof_proxy.h"
#include "iof_proxy_svc.h"
/*
* Local function prototypes.
*/
static void orte_iof_proxy_svc_msg(
const orte_process_name_t* origin,
orte_iof_base_msg_header_t* msg,
unsigned char* data);
static void orte_iof_proxy_svc_ack(
const orte_process_name_t* origin,
orte_iof_base_msg_header_t* msg);
/*
* Send a "publish" request to the svc component
*/
int orte_iof_proxy_svc_publish(
const orte_process_name_t* name,
int tag)
{
orte_iof_base_header_t hdr;
struct iovec iov;
int rc;
hdr.hdr_common.hdr_type = ORTE_IOF_BASE_HDR_PUB;
hdr.hdr_common.hdr_status = 0;
hdr.hdr_pub.pub_name = *name;
hdr.hdr_pub.pub_proxy = *ORTE_PROC_MY_NAME;
hdr.hdr_pub.pub_mask = ORTE_NS_CMP_ALL;
hdr.hdr_pub.pub_tag = tag;
ORTE_IOF_BASE_HDR_PUB_HTON(hdr.hdr_pub);
iov.iov_base = (IOVBASE_TYPE*)&hdr;
iov.iov_len = sizeof(hdr);
rc = orte_rml.send(
&orte_iof_base.iof_service,
&iov,
1,
ORTE_RML_TAG_IOF_SVC,
0);
if(rc < 0) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
}
/*
* Send an "unpublish" request to the svc component
*/
int orte_iof_proxy_svc_unpublish(
const orte_process_name_t* name,
orte_ns_cmp_bitmask_t mask,
int tag)
{
orte_iof_base_header_t hdr;
struct iovec iov;
int rc;
hdr.hdr_common.hdr_type = ORTE_IOF_BASE_HDR_UNPUB;
hdr.hdr_common.hdr_status = 0;
hdr.hdr_pub.pub_name = *name;
hdr.hdr_pub.pub_proxy = *ORTE_PROC_MY_NAME;
hdr.hdr_pub.pub_mask = mask;
hdr.hdr_pub.pub_tag = tag;
ORTE_IOF_BASE_HDR_PUB_HTON(hdr.hdr_pub);
iov.iov_base = (IOVBASE_TYPE*)&hdr;
iov.iov_len = sizeof(hdr);
rc = orte_rml.send(
&orte_iof_base.iof_service,
&iov,
1,
ORTE_RML_TAG_IOF_SVC,
0);
if(rc < 0) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
}
/*
* Send a "subscribe" request to the svc component
*/
int orte_iof_proxy_svc_subscribe(
const orte_process_name_t* origin_name,
orte_ns_cmp_bitmask_t origin_mask,
int origin_tag,
const orte_process_name_t* target_name,
orte_ns_cmp_bitmask_t target_mask,
int target_tag
)
{
orte_iof_base_header_t hdr;
struct iovec iov;
int rc;
hdr.hdr_common.hdr_type = ORTE_IOF_BASE_HDR_SUB;
hdr.hdr_common.hdr_status = 0;
hdr.hdr_sub.origin_name = *origin_name;
hdr.hdr_sub.origin_mask = origin_mask;
hdr.hdr_sub.origin_tag = origin_tag;
hdr.hdr_sub.target_name = *target_name;
hdr.hdr_sub.target_mask = target_mask;
hdr.hdr_sub.target_tag = target_tag;
ORTE_IOF_BASE_HDR_SUB_HTON(hdr.hdr_sub);
iov.iov_base = (IOVBASE_TYPE*)&hdr;
iov.iov_len = sizeof(hdr);
rc = orte_rml.send(
&orte_iof_base.iof_service,
&iov,
1,
ORTE_RML_TAG_IOF_SVC,
0);
if(rc < 0) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
}
/*
* Send an "unsubscribe" request to the svc component
*/
int orte_iof_proxy_svc_unsubscribe(
const orte_process_name_t* origin_name,
orte_ns_cmp_bitmask_t origin_mask,
int origin_tag,
const orte_process_name_t* target_name,
orte_ns_cmp_bitmask_t target_mask,
int target_tag
)
{
orte_iof_base_header_t hdr;
struct iovec iov;
int rc;
hdr.hdr_common.hdr_type = ORTE_IOF_BASE_HDR_UNSUB;
hdr.hdr_common.hdr_reserve = (uint8_t)0;
hdr.hdr_common.hdr_status = (int16_t)0;
hdr.hdr_sub.origin_name = *origin_name;
hdr.hdr_sub.origin_mask = origin_mask;
hdr.hdr_sub.origin_tag = origin_tag;
hdr.hdr_sub.target_name = *target_name;
hdr.hdr_sub.target_mask = target_mask;
hdr.hdr_sub.target_tag = target_tag;
ORTE_IOF_BASE_HDR_SUB_HTON(hdr.hdr_sub);
iov.iov_base = (IOVBASE_TYPE*)&hdr;
iov.iov_len = sizeof(hdr);
rc = orte_rml.send(
&orte_iof_base.iof_service,
&iov,
1,
ORTE_RML_TAG_IOF_SVC,
0);
if(rc < 0) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
}
/*
* Receive messages via the RML from the svc component.
*/
void orte_iof_proxy_svc_recv(
int status,
orte_process_name_t* origin,
struct iovec* msg,
int count,
orte_rml_tag_t tag,
void* cbdata)
{
orte_iof_base_header_t* hdr = (orte_iof_base_header_t*)msg->iov_base;
if(NULL == msg->iov_base) {
opal_output(orte_iof_base.iof_output,
"orte_iof_proxy_svc_recv: invalid message\n");
return;
}
/* We only receive 2 types of messages from the svc component:
- Messages: containing forwarded data intended to be consumed
by endpoints in this process either representing local fd's
or pipes to proxied processes (e.g., orted's fronting ORTE
processes)
- ACKs: acknowledging data sent from this process to the svc
component (which may have been forwarded on to other
processes).
*/
switch(hdr->hdr_common.hdr_type) {
case ORTE_IOF_BASE_HDR_MSG:
ORTE_IOF_BASE_HDR_MSG_NTOH(hdr->hdr_msg);
orte_iof_proxy_svc_msg(origin,&hdr->hdr_msg,(unsigned char*)(hdr+1));
break;
case ORTE_IOF_BASE_HDR_ACK:
ORTE_IOF_BASE_HDR_MSG_NTOH(hdr->hdr_msg);
orte_iof_proxy_svc_ack(origin,&hdr->hdr_msg);
break;
default:
break;
}
free(hdr);
/* reset the data in the RML receive */
mca_iof_proxy_component.proxy_iov[0].iov_base = NULL;
mca_iof_proxy_component.proxy_iov[0].iov_len = 0;
}
/*
* The svc component has sent data to us that matches a tag that we
* must have previously published. Forward the data to the
* corresponding endpoint.
*/
static void orte_iof_proxy_svc_msg(
const orte_process_name_t* origin,
orte_iof_base_msg_header_t* msg,
unsigned char* data)
{
orte_iof_base_endpoint_t* endpoint;
/* Look for the endpoint corresponding to the tag in the message.
If we don't find the endpoint, this means that we have already
unpublished the endpoint and this message must have already
been enroute to us when we unpublished. So just discard it. */
endpoint = orte_iof_base_endpoint_match(ORTE_NAME_WILDCARD, ORTE_NS_CMP_NONE, msg->msg_tag);
if (NULL != endpoint) {
orte_iof_base_endpoint_forward(endpoint,origin,msg,data);
/* RELEASE the endpoint because endpoint_match() RETAINed it */
OBJ_RELEASE(endpoint);
}
}
/*
* The svc component has sent an ACK to us that matches a tag that we
* must have previously published. Forward the ACK to the
* corresponding endpoint.
*/
static void orte_iof_proxy_svc_ack(
const orte_process_name_t* origin,
orte_iof_base_msg_header_t* msg)
{
orte_iof_base_endpoint_t* endpoint;
/* Look for the endpoint corresponding to the tag in the ACK. If
we don't find the endpoint, this means that we have already
unpublished the endpoint and this ACK must have already been
enroute to us when we unpublished. So just discard it. */
endpoint = orte_iof_base_endpoint_match(&msg->msg_origin, ORTE_NS_CMP_ALL, msg->msg_tag);
if(endpoint != NULL) {
orte_iof_base_endpoint_ack(endpoint,msg->msg_seq + msg->msg_len);
/* RELEASE the endpoint because endpoint_match() RETAINed it */
OBJ_RELEASE(endpoint);
}
}

Просмотреть файл

@ -1,82 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights resereved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_IOF_PROXY_SVC_H
#define MCA_IOF_PROXY_SVC_H
#include "orte_config.h"
#include "orte/types.h"
#include "orte/mca/rml/rml_types.h"
#include "orte/util/name_fns.h"
#include "orte/mca/iof/iof.h"
BEGIN_C_DECLS
/*
* Send requests to the svc component
*/
int orte_iof_proxy_svc_publish(
const orte_process_name_t* name,
int tag
);
int orte_iof_proxy_svc_unpublish(
const orte_process_name_t* name,
orte_ns_cmp_bitmask_t mask,
int tag
);
int orte_iof_proxy_svc_subscribe(
const orte_process_name_t* src_name,
orte_ns_cmp_bitmask_t src_mask,
int src_tag,
const orte_process_name_t* dst_name,
orte_ns_cmp_bitmask_t dst_mask,
int dst_tag
);
int orte_iof_proxy_svc_unsubscribe(
const orte_process_name_t* src_name,
orte_ns_cmp_bitmask_t src_mask,
int src_tag,
const orte_process_name_t* dst_name,
orte_ns_cmp_bitmask_t dst_mask,
int dst_tag
);
/**
* Received RML messages from the svc component
*/
void orte_iof_proxy_svc_recv(
int status,
orte_process_name_t* peer,
struct iovec* msg,
int count,
orte_rml_tag_t tag,
void* cbdata);
END_C_DECLS
#endif

Просмотреть файл

@ -1,283 +0,0 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include <errno.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#include "orte/util/show_help.h"
#include "orte/mca/oob/base/base.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/iof/base/base.h"
#include "orte/mca/iof/base/iof_base_endpoint.h"
#include "iof_svc.h"
#include "iof_svc_pub.h"
#include "iof_svc_sub.h"
orte_iof_base_module_t orte_iof_svc_module = {
orte_iof_svc_publish,
orte_iof_svc_unpublish,
orte_iof_svc_subscribe,
orte_iof_svc_unsubscribe,
orte_iof_svc_push,
orte_iof_svc_pull,
orte_iof_base_flush,
orte_iof_svc_finalize,
orte_iof_svc_ft_event
};
/*
* Create an endpoint for a local file descriptor and "publish" it
* under the name of the origin process. If the publish mode is a
* SINK, then create a publication entry for it so that incoming
* messages can be forwarded to it.
*
* SOURCEs do not need to create publication records because a) the
* endpoint will automatically wake up the event engine and read off
* the fd whenever there is data available, and b) this data is then
* automatically sent to the iof svc component for possible
* forwarding.
*/
int orte_iof_svc_publish(
const orte_process_name_t* origin,
orte_iof_base_mode_t mode,
orte_iof_base_tag_t tag,
int fd)
{
int rc;
/* setup a local endpoint to reflect registration */
rc = orte_iof_base_endpoint_create(
origin,
mode,
tag,
fd);
if (ORTE_SUCCESS != rc) {
return rc;
}
/* publish endpoint */
if (ORTE_IOF_SINK == mode) {
rc = orte_iof_svc_pub_create(
origin,
ORTE_PROC_MY_NAME,
ORTE_NS_CMP_ALL,
tag);
}
return rc;
}
/*
* Remove all registrations matching the specified origin process
* name, mask and tag values (where, here in the svc component, origin
* should usually be just this process -- ths svc component is
* unlikely to act as an IOF proxy for any other processes like the
* orted does).
*/
int orte_iof_svc_unpublish(
const orte_process_name_t* origin,
orte_ns_cmp_bitmask_t mask,
orte_iof_base_tag_t tag)
{
int rc;
/* Delete the corresponding publish. Note that it may have
already been deleted by some other entity (e.g., message
arriving saying to unpublish), so we may get a NOT_FOUND.
That's ok/not an error -- the only end result that we want is
that there is no corresponding publish. */
rc = orte_iof_svc_pub_delete(
origin,
ORTE_PROC_MY_NAME,
mask,
tag);
if (ORTE_SUCCESS != rc && ORTE_ERR_NOT_FOUND != rc) {
return rc;
}
/* delete local endpoint. Note that the endpoint may have already
been deleted (e.g., if some entity noticed that the fd closed
and called orte_iof_base_endpoint_delete on the corresopnding
endpoint already). So if we get NOT_FOUND, ignore that error
-- the end result is what we want: the endpoint is deleted when
we return. */
rc = orte_iof_base_endpoint_delete(
origin,
mask,
tag);
if (ORTE_ERR_NOT_FOUND == rc || ORTE_SUCCESS == rc) {
return ORTE_SUCCESS;
} else {
return rc;
}
}
/**
* Explicitly push data from the specified file descriptor
* to the indicated set of SINK peers.
*/
int orte_iof_svc_push(
const orte_process_name_t* sink_name,
orte_ns_cmp_bitmask_t sink_mask,
orte_iof_base_tag_t sink_tag,
int fd)
{
int rc;
/* Setup a subscription. This will be matched against a publish
of a SINK from a remote process. */
rc = orte_iof_svc_sub_create(
ORTE_PROC_MY_NAME,
ORTE_NS_CMP_ALL,
sink_tag,
sink_name,
sink_mask,
sink_tag);
if (ORTE_SUCCESS != rc) {
return rc;
}
/* Setup a local endpoint to reflect registration. This will
enter the fd into the event engine and wakeup when there is
data to read. The data will be put in an IOF fragment and RML
send to iof_svc_proxy_recv() (i.e., in this module!) for
handling (i.e., matching and forwarding to the publish(es) that
was(were) matched to the above subscription).
Create this endpoint *after* we make the above subscription so
that it is not found and attached to the subscription.
Instead, data that is consumed by the event engine callbacks
will be RML-sent to iof_svc_proxy_recv(), as described
above. */
rc = orte_iof_base_endpoint_create(
ORTE_PROC_MY_NAME,
ORTE_IOF_SOURCE,
sink_tag,
fd);
return rc;
}
/*
* Explicitly pull data from the specified set of SOURCE peers
* and dump to the indicated file descriptor.
*/
int orte_iof_svc_pull(
const orte_process_name_t* source_name,
orte_ns_cmp_bitmask_t source_mask,
orte_iof_base_tag_t source_tag,
int fd)
{
int rc;
/* setup a local endpoint -- *before* we create the subscription
so that the subscription will find the endpoint and attach it
to the subscription */
rc = orte_iof_base_endpoint_create(
ORTE_PROC_MY_NAME,
ORTE_IOF_SINK,
source_tag,
fd);
if (ORTE_SUCCESS != rc) {
return rc;
}
/* create a subscription */
rc = orte_iof_svc_sub_create(
source_name,
source_mask,
source_tag,
ORTE_PROC_MY_NAME,
ORTE_NS_CMP_ALL,
source_tag);
return rc;
}
/*
* Subscribe to receive a callback on receipt of data
* from a specified set of origin peers.
*/
int orte_iof_svc_subscribe(
const orte_process_name_t* origin_name,
orte_ns_cmp_bitmask_t origin_mask,
orte_iof_base_tag_t origin_tag,
orte_iof_base_callback_fn_t cbfunc,
void* cbdata)
{
int rc;
/* create a local registration to reflect the callback */
rc = orte_iof_base_callback_create(ORTE_PROC_MY_NAME, origin_tag,
cbfunc, cbdata);
if (ORTE_SUCCESS != rc) {
return rc;
}
/* setup local subscription */
rc = orte_iof_svc_sub_create(
origin_name,
origin_mask,
origin_tag,
ORTE_PROC_MY_NAME,
ORTE_NS_CMP_ALL,
origin_tag);
return rc;
}
int orte_iof_svc_unsubscribe(
const orte_process_name_t* origin_name,
orte_ns_cmp_bitmask_t origin_mask,
orte_iof_base_tag_t origin_tag)
{
int rc;
/* delete local subscription */
rc = orte_iof_svc_sub_delete(
origin_name,
origin_mask,
origin_tag,
ORTE_PROC_MY_NAME,
ORTE_NS_CMP_ALL,
origin_tag);
if (ORTE_SUCCESS != rc) {
return rc;
}
/* cleanup any locally registered callback */
return orte_iof_base_callback_delete(ORTE_PROC_MY_NAME, origin_tag);
}

Просмотреть файл

@ -1,176 +0,0 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* The svc IOF component is used in HNP processes only. It is the
* "hub" for all IOF activity, meaning that *all* IOF traffic is
* routed to the svc component, and this component figures out where
* it is supposed to go from there. Specifically: there is *no*
* direct proxy-to-proxy IOF communication. If a proxy/orted wants to
* get a stream from another proxy/orted, the stream will go
* proxy/orted -> svc/HNP -> proxy/orted.
*
* The svc IOF component does two things: 1. forward fragments between
* file descriptors and streams, and 2. maintain forwarding tables to
* "route" incomding fragments to outgoing destinations (both file
* descriptors and other published streams).
*
* The svc IOF component maintains tables of all publications and all
* subscriptions. Subscriptions can have a list of publications
* and/or endpoints to forward incoming fragments to.
*
*
*
* Important: this component is designed to work with the proxy IOF
* component only. If we ever do a different IOF implementation
* scheme, it is likely that only some of this component will be
* useful for cannibalisation (if any at all).
*/
#ifndef ORTE_IOF_SVC_H
#define ORTE_IOF_SVC_H
#include "orte/mca/iof/iof.h"
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif /* HAVE_SYS_TYPES_H */
#ifdef HAVE_SYS_UIO_H
#include <sys/uio.h>
#endif /* HAVE_SYS_UIO_H */
#ifdef HAVE_NET_UIO_H
#include <net/uio.h>
#endif /* HAVE_NET_UIO_H */
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/**
* Publish a local file descriptor as an endpoint that is logically
* associated with the specified process name (e.g. master side of a
* pipe/pty connected to a child process)
*
* @param name
* @param mode
* @param tag
* @param fd
*
*/
int orte_iof_svc_publish(
const orte_process_name_t* name,
orte_iof_base_mode_t mode,
orte_iof_base_tag_t tag,
int fd
);
/**
* Remove all registrations matching the specified process
* name, mask and tag values.
*
* @param name
* @param mask
* @param tag
*
*/
int orte_iof_svc_unpublish(
const orte_process_name_t* name,
orte_ns_cmp_bitmask_t mask,
orte_iof_base_tag_t tag
);
/**
* Explicitly push data from the specified file descriptor
* to the indicated set of peers.
*
* @param dst_name Name used to qualify set of peers.
* @param dst_mask Mask that specified how name is interpreted.
* @param dst_tag Match a specific peer endpoint.
* @param fd Local file descriptor.
*/
int orte_iof_svc_push(
const orte_process_name_t* dst_name,
orte_ns_cmp_bitmask_t dst_mask,
orte_iof_base_tag_t dst_tag,
int fd
);
/**
* Explicitly pull data from the specified set of peers
* and dump to the indicated file descriptor.
*
* @param dst_name Name used to qualify set of peers.
* @param dst_mask Mask that specified how name is interpreted.
* @param dst_tag Match a specific peer endpoint.
* @param fd Local file descriptor.
*/
int orte_iof_svc_pull(
const orte_process_name_t* src_name,
orte_ns_cmp_bitmask_t src_mask,
orte_iof_base_tag_t src_tag,
int fd
);
/*
* Subscribe to receive a callback on receipt of data
* from a specified set of peers.
*/
int orte_iof_svc_subscribe(
const orte_process_name_t* src_name,
orte_ns_cmp_bitmask_t src_mask,
orte_iof_base_tag_t src_tag,
orte_iof_base_callback_fn_t cb,
void* cbdata
);
int orte_iof_svc_unsubscribe(
const orte_process_name_t* src_name,
orte_ns_cmp_bitmask_t src_mask,
orte_iof_base_tag_t src_tag
);
int orte_iof_svc_finalize(void);
int orte_iof_svc_ft_event(int state);
/**
* IOF svc Component
*/
struct orte_iof_svc_component_t {
orte_iof_base_component_t super;
opal_list_t svc_published;
opal_list_t svc_subscribed;
opal_mutex_t svc_lock;
struct iovec svc_iov[1];
};
typedef struct orte_iof_svc_component_t orte_iof_svc_component_t;
ORTE_MODULE_DECLSPEC extern orte_iof_svc_component_t mca_iof_svc_component;
extern orte_iof_base_module_t orte_iof_svc_module;
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -1,199 +0,0 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/util/show_help.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/util/proc_info.h"
#include "orte/mca/rml/rml.h"
#include "orte/runtime/orte_globals.h"
#include "iof_svc.h"
#include "iof_svc_proxy.h"
#include "iof_svc_pub.h"
#include "iof_svc_sub.h"
/*
* Local functions
*/
static int orte_iof_svc_open(void);
static int orte_iof_svc_close(void);
static orte_iof_base_module_t* orte_iof_svc_init(
int* priority,
bool *allow_multi_user_threads,
bool *have_hidden_threads);
/*
* Local variables
*/
static bool initialized = false;
orte_iof_svc_component_t mca_iof_svc_component = {
{
/* First, the mca_base_component_t struct containing meta
information about the component itself */
{
ORTE_IOF_BASE_VERSION_2_0_0,
"svc", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
orte_iof_svc_open, /* component open */
orte_iof_svc_close /* component close */
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
orte_iof_svc_init
}
};
/**
* component open/close/init function
*/
static int orte_iof_svc_open(void)
{
/* Nothing to do */
return ORTE_SUCCESS;
}
static int orte_iof_svc_close(void)
{
opal_list_item_t* item;
if (initialized) {
OPAL_THREAD_LOCK(&mca_iof_svc_component.svc_lock);
while((item = opal_list_remove_first(&mca_iof_svc_component.svc_subscribed)) != NULL) {
OBJ_RELEASE(item);
}
while((item = opal_list_remove_first(&mca_iof_svc_component.svc_published)) != NULL) {
OBJ_RELEASE(item);
}
OPAL_THREAD_UNLOCK(&mca_iof_svc_component.svc_lock);
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_IOF_SVC);
}
return ORTE_SUCCESS;
}
/**
* Callback when peer is disconnected
*/
static void
orte_iof_svc_exception_handler(const orte_process_name_t* peer, orte_rml_exception_t reason)
{
orte_iof_base_endpoint_t *endpoint;
opal_output_verbose(1, orte_iof_base.iof_output,
"iof svc exception handler! %s\n",
ORTE_NAME_PRINT((orte_process_name_t*)peer));
/* If we detect an exception on the RML connection to a peer,
delete all of its subscriptions and publications. Note that
exceptions can be detected during a normal RML shutdown; they
are recoverable events (no need to abort). */
orte_iof_svc_sub_delete_all(peer);
orte_iof_svc_pub_delete_all(peer);
opal_output_verbose(1, orte_iof_base.iof_output, "deleted all pubs and subs\n");
/* Find any streams on any endpoints for this peer and close them */
while (NULL !=
(endpoint = orte_iof_base_endpoint_match(peer, ORTE_NS_CMP_ALL,
ORTE_IOF_ANY))) {
orte_iof_base_endpoint_closed(endpoint);
/* Delete the endpoint that we just matched */
orte_iof_base_endpoint_delete(peer, ORTE_NS_CMP_ALL, ORTE_IOF_ANY);
}
opal_output_verbose(1, orte_iof_base.iof_output, "done with exception handler\n");
}
/**
* Module Initialization
*/
static orte_iof_base_module_t*
orte_iof_svc_init(int* priority, bool *allow_multi_user_threads, bool *have_hidden_threads)
{
int rc;
if (false == orte_process_info.hnp) {
return NULL;
}
*priority = 1;
*allow_multi_user_threads = true;
*have_hidden_threads = false;
OBJ_CONSTRUCT(&mca_iof_svc_component.svc_subscribed, opal_list_t);
OBJ_CONSTRUCT(&mca_iof_svc_component.svc_published, opal_list_t);
OBJ_CONSTRUCT(&mca_iof_svc_component.svc_lock, opal_mutex_t);
/* post non-blocking recv */
mca_iof_svc_component.svc_iov[0].iov_base = NULL;
mca_iof_svc_component.svc_iov[0].iov_len = 0;
rc = orte_rml.recv_nb(
ORTE_NAME_WILDCARD,
mca_iof_svc_component.svc_iov,
1,
ORTE_RML_TAG_IOF_SVC,
ORTE_RML_ALLOC|ORTE_RML_PERSISTENT,
orte_iof_svc_proxy_recv,
NULL
);
if(rc != ORTE_SUCCESS) {
opal_output(orte_iof_base.iof_output,
"orte_iof_svc_init: unable to post non-blocking recv");
return NULL;
}
rc = orte_rml.add_exception_handler(orte_iof_svc_exception_handler);
initialized = true;
return &orte_iof_svc_module;
}
int
orte_iof_svc_finalize(void)
{
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_IOF_SVC );
orte_rml.del_exception_handler(orte_iof_svc_exception_handler);
return ORTE_SUCCESS;
}
int orte_iof_svc_ft_event(int state) {
/*
* Replica doesn't need to do anything for a checkpoint
*/
return ORTE_SUCCESS;
}

Просмотреть файл

@ -20,25 +20,26 @@
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_iof_null_DSO
if OMPI_BUILD_iof_tool_DSO
component_noinst =
component_install = mca_iof_null.la
component_install = mca_iof_tool.la
else
component_noinst = libmca_iof_null.la
component_noinst = libmca_iof_tool.la
component_install =
endif
null_SOURCES = \
iof_null.h \
iof_null_module.c \
iof_null_component.c
tool_SOURCES = \
iof_tool.c \
iof_tool.h \
iof_tool_component.c \
iof_tool_receive.c
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_iof_null_la_SOURCES = $(null_SOURCES)
mca_iof_null_la_LDFLAGS = -module -avoid-version
mca_iof_tool_la_SOURCES = $(tool_SOURCES)
mca_iof_tool_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_iof_null_la_SOURCES = $(null_SOURCES)
libmca_iof_null_la_LIBADD =
libmca_iof_null_la_LDFLAGS = -module -avoid-version
libmca_iof_tool_la_SOURCES = $(tool_SOURCES)
libmca_iof_tool_la_LIBADD =
libmca_iof_tool_la_LDFLAGS = -module -avoid-version

Просмотреть файл

208
orte/mca/iof/tool/iof_tool.c Обычный файл
Просмотреть файл

@ -0,0 +1,208 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include <errno.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#include "orte/util/show_help.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/runtime/orte_wait.h"
#include "orte/mca/iof/iof.h"
#include "orte/mca/iof/base/base.h"
#include "iof_tool.h"
static int tool_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, int fd);
static int tool_pull(const orte_process_name_t* src_name,
orte_iof_tag_t src_tag,
int fd);
static int tool_close(const orte_process_name_t* peer,
orte_iof_tag_t source_tag);
static int tool_ft_event(int state);
orte_iof_base_module_t orte_iof_tool_module = {
tool_push,
tool_pull,
tool_close,
tool_ft_event
};
/**
* Push data from the specified file descriptor
* to the indicated SINK set of peers.
*/
static int tool_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, int fd)
{
/* at this time, we do not allow tools to push data into the
* stdin of a job. This is due to potential confusion over which
* stdin is being read/used, and the impossibility of resolving
* potential interleaving of the data
*/
return ORTE_ERR_NOT_SUPPORTED;
}
/*
* Callback when non-blocking RML send completes.
*/
static void send_cb(int status, orte_process_name_t *peer,
opal_buffer_t *buf, orte_rml_tag_t tag,
void *cbdata)
{
/* nothing to do here - just release buffer and return */
OBJ_RELEASE(buf);
}
/**
* Pull data from the specified set of SOURCE peers and
* dump to the indicated file descriptor.
*/
static int tool_pull(const orte_process_name_t* src_name,
orte_iof_tag_t src_tag,
int fd)
{
/* if we are a tool, then we need to request the HNP to please
* forward the data from the specified process to us. Note that
* the HNP will return an error if the specified stream of any
* intended recipient is not open. By default, stdout/err/diag
* are all left open. However, the user can also direct us to
* close any or all of those streams, so the success of this call
* will depend upon how the user executed the application
*/
opal_buffer_t *buf;
orte_iof_tag_t tag;
orte_process_name_t hnp;
int rc;
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
"%s pulling output for proc %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(src_name)));
buf = OBJ_NEW(opal_buffer_t);
/* setup the tag to pull from HNP */
tag = src_tag | ORTE_IOF_PULL;
/* pack the tag - we do this first so that flow control messages can
* consist solely of the tag
*/
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &tag, 1, ORTE_IOF_TAG))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buf);
return rc;
}
/* pack the name of the source */
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, src_name, 1, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buf);
return rc;
}
/* send the buffer to the correct HNP */
ORTE_HNP_NAME_FROM_JOB(&hnp, src_name->jobid);
orte_rml.send_buffer_nb(&hnp, buf, ORTE_RML_TAG_IOF_HNP,
0, send_cb, NULL);
return ORTE_SUCCESS;
}
static int tool_close(const orte_process_name_t* src_name,
orte_iof_tag_t src_tag)
{
/* if we are a tool, then we need to request the HNP to stop
* forwarding data from this process/stream
*/
opal_buffer_t *buf;
orte_iof_tag_t tag;
orte_process_name_t hnp;
int rc;
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
"%s closing output for proc %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(src_name)));
buf = OBJ_NEW(opal_buffer_t);
/* setup the tag to stop the copy */
tag = src_tag | ORTE_IOF_CLOSE;
/* pack the tag - we do this first so that flow control messages can
* consist solely of the tag
*/
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &tag, 1, ORTE_IOF_TAG))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buf);
return rc;
}
/* pack the name of the source */
if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, src_name, 1, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buf);
return rc;
}
/* flag that the close is incomplete */
mca_iof_tool_component.closed = false;
/* send the buffer to the correct HNP */
ORTE_HNP_NAME_FROM_JOB(&hnp, src_name->jobid);
orte_rml.send_buffer_nb(&hnp, buf, ORTE_RML_TAG_IOF_HNP,
0, send_cb, NULL);
/* wait right here until the close is confirmed */
ORTE_PROGRESSED_WAIT(mca_iof_tool_component.closed, 0, 1);
return ORTE_SUCCESS;
}
/*
* FT event
*/
static int tool_ft_event(int state)
{
return ORTE_ERR_NOT_IMPLEMENTED;
}

57
orte/mca/iof/tool/iof_tool.h Обычный файл
Просмотреть файл

@ -0,0 +1,57 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* The tool IOF component is used in tools. It is used
* to interface to the HNP to request forwarding of stdout/err/diag
* from any combination of procs, and to forward stdin from the
* tool to a specified proc provided the user has allowed that
* functionality.
*
* Flow control is employed on a per-stream basis to ensure that
* SOURCEs don't overwhelm SINK resources (E.g., send an entire input
* file to an orted before the target process has read any of it).
*
*/
#ifndef ORTE_IOF_TOOL_H
#define ORTE_IOF_TOOL_H
#include "orte/mca/iof/iof.h"
BEGIN_C_DECLS
struct orte_iof_tool_component_t {
orte_iof_base_component_t super;
bool closed;
opal_mutex_t lock;
};
typedef struct orte_iof_tool_component_t orte_iof_tool_component_t;
ORTE_MODULE_DECLSPEC extern orte_iof_tool_component_t mca_iof_tool_component;
extern orte_iof_base_module_t orte_iof_tool_module;
void orte_iof_tool_recv(int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata);
END_C_DECLS
#endif

135
orte/mca/iof/tool/iof_tool_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,135 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/util/show_help.h"
#include "opal/runtime/opal_progress.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/mca/rml/rml.h"
#include "orte/util/proc_info.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/runtime/orte_globals.h"
#include "iof_tool.h"
/*
* Local functions
*/
static int orte_iof_tool_open(void);
static int orte_iof_tool_close(void);
static int orte_iof_tool_query(mca_base_module_t **module, int *priority);
/*
* Local variables
*/
static bool initialized = false;
/*
* Public string showing the iof tool component version number
*/
const char *mca_iof_tool_component_version_string =
"Open MPI tool iof MCA component version " ORTE_VERSION;
orte_iof_tool_component_t mca_iof_tool_component = {
{
{
ORTE_IOF_BASE_VERSION_2_0_0,
"tool", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
/* Component open, close, and query functions */
orte_iof_tool_open,
orte_iof_tool_close,
orte_iof_tool_query
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
}
};
/**
* component open/close/init function
*/
static int orte_iof_tool_open(void)
{
/* Nothing to do */
return ORTE_SUCCESS;
}
static int orte_iof_tool_close(void)
{
int rc = ORTE_SUCCESS;
if (initialized) {
OPAL_THREAD_LOCK(&mca_iof_tool_component.lock);
/* Cancel the RML receive */
rc = orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_IOF_PROXY);
OPAL_THREAD_UNLOCK(&mca_iof_tool_component.lock);
OBJ_DESTRUCT(&mca_iof_tool_component.lock);
}
return rc;
}
static int orte_iof_tool_query(mca_base_module_t **module, int *priority)
{
int rc;
/* set default */
*module = NULL;
*priority = -1;
/* if we are not a tool, then don't use this module */
if (!orte_process_info.tool) {
return ORTE_ERROR;
}
/* post a non-blocking RML receive to get messages
from the HNP IOF component */
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_IOF_PROXY,
ORTE_RML_NON_PERSISTENT,
orte_iof_tool_recv,
NULL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
OBJ_CONSTRUCT(&mca_iof_tool_component.lock, opal_mutex_t);
mca_iof_tool_component.closed = false;
/* we must be selected */
*priority = 100;
*module = (mca_base_module_t *) &orte_iof_tool_module;
initialized = true;
return ORTE_SUCCESS;
}

137
orte/mca/iof/tool/iof_tool_receive.c Обычный файл
Просмотреть файл

@ -0,0 +1,137 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include <errno.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#include "orte/util/show_help.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/iof/iof.h"
#include "orte/mca/iof/base/base.h"
#include "iof_tool.h"
static void process_msg(int fd, short event, void *cbdata)
{
orte_message_event_t *mev = (orte_message_event_t*)cbdata;
orte_process_name_t origin;
unsigned char data[ORTE_IOF_BASE_MSG_MAX];
orte_iof_tag_t stream;
int32_t count, numbytes;
int rc;
/* unpack the stream first as this may be flow control info */
count = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(mev->buffer, &stream, &count, ORTE_IOF_TAG))) {
ORTE_ERROR_LOG(rc);
goto CLEAN_RETURN;
}
/* if this is a CLOSE tag, then ignore the rest - this is just the
* tail end of a handshake to indicate we have closed a stream
*/
if (ORTE_IOF_CLOSE & stream) {
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
"%s received CLOSE handshake from remote hnp %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&mev->sender)));
mca_iof_tool_component.closed = true;
goto CLEAN_RETURN;
}
/* get name of the process whose io we are receiving */
count = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(mev->buffer, &origin, &count, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
goto CLEAN_RETURN;
}
/* unpack the data */
numbytes=ORTE_IOF_BASE_MSG_MAX;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(mev->buffer, data, &numbytes, OPAL_BYTE))) {
ORTE_ERROR_LOG(rc);
goto CLEAN_RETURN;
}
/* numbytes will contain the actual #bytes that were sent */
OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output,
"%s unpacked %d bytes from remote proc %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes,
ORTE_NAME_PRINT(&origin)));
/* write the output locally */
if (ORTE_IOF_STDOUT & stream) {
orte_iof_base_write_output(&origin, stream, data, numbytes, &orte_iof_base.iof_write_stdout);
} else {
orte_iof_base_write_output(&origin, stream, data, numbytes, &orte_iof_base.iof_write_stderr);
}
CLEAN_RETURN:
/* release the message event */
OBJ_RELEASE(mev);
return;
}
void orte_iof_tool_recv(int status, orte_process_name_t* sender,
opal_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata)
{
int rc;
OPAL_OUTPUT_VERBOSE((5, orte_iof_base.iof_output,
"%s iof:tool:receive got message from %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(sender)));
/* don't process this right away - we need to get out of the recv before
* we process the message to avoid performing the rest of the job while
* inside this receive! Instead, setup an event so that the message gets processed
* as soon as we leave the recv.
*
* The macro makes a copy of the buffer, which we release above - the incoming
* buffer, however, is NOT released here, although its payload IS transferred
* to the message buffer for later processing
*/
ORTE_MESSAGE_EVENT(sender, buffer, tag, process_msg);
/* reissue the recv */
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_IOF_PROXY,
ORTE_RML_NON_PERSISTENT,
orte_iof_tool_recv,
NULL))) {
ORTE_ERROR_LOG(rc);
}
return;
}

Просмотреть файл

@ -171,7 +171,7 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *data,
return rc;
}
/* pack the control flags */
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &orte_debugger_daemon->controls, 1, OPAL_UINT16))) {
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &orte_debugger_daemon->controls, 1, ORTE_JOB_CONTROL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -197,7 +197,13 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *data,
}
/* pack the control flags for this job */
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &jdata->controls, 1, OPAL_UINT16))) {
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &jdata->controls, 1, ORTE_JOB_CONTROL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the stdin target */
if (ORTE_SUCCESS != (rc = opal_dss.pack(data, &jdata->stdin_target, 1, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -354,7 +360,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
goto REPORT_ERROR;
}
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &(orte_odls_globals.debugger->controls), &cnt, OPAL_UINT16))) {
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &(orte_odls_globals.debugger->controls), &cnt, ORTE_JOB_CONTROL))) {
ORTE_ERROR_LOG(rc);
goto REPORT_ERROR;
}
@ -410,7 +416,13 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
}
/* unpack the control flags for the job */
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->controls, &cnt, OPAL_UINT16))) {
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->controls, &cnt, ORTE_JOB_CONTROL))) {
ORTE_ERROR_LOG(rc);
goto REPORT_ERROR;
}
/* unpack the stdin target for the job */
cnt=1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobdat->stdin_target, &cnt, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
goto REPORT_ERROR;
}
@ -1118,7 +1130,7 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
}
}
rc = fork_local(app, child, app->env, ORTE_JOB_CONTROL_FORWARD_OUTPUT & jobdat->controls);
rc = fork_local(app, child, app->env, jobdat->controls, jobdat->stdin_target);
/* reaquire lock so we don't double unlock... */
OPAL_THREAD_LOCK(&orte_odls_globals.mutex);
if (ORTE_SUCCESS != rc) {
@ -1165,7 +1177,7 @@ CLEANUP:
(ORTE_JOB_CONTROL_FORWARD_OUTPUT & orte_odls_globals.debugger->controls) ? "output forwarded" : "no output"));
fork_local(orte_odls_globals.debugger->apps[0], NULL, NULL,
ORTE_JOB_CONTROL_FORWARD_OUTPUT & orte_odls_globals.debugger->controls);
orte_odls_globals.debugger->controls, ORTE_VPID_INVALID);
orte_odls_globals.debugger_launched = true;
}
@ -1663,39 +1675,12 @@ GOTCHILD:
goto MOVEON;
}
/* If this child was the (vpid==0), we hooked it up to orterun's
STDIN SOURCE earlier (do not change this without also changing
odsl_default_fork_local_proc()). So we have to tell the SOURCE
a) that we don't want any more data and b) that it should not
expect any more ACKs from this endpoint (so that the svc
component can still flush/shut down cleanly).
Note that the source may have already detected that this
process died as part of an OOB/RML exception, but that's ok --
its "exception" detection capabilities are not reliable, so we
*have* to do this unpublish here, even if it arrives after an
exception is detected and handled (in which case this unpublish
request will be ignored/discarded. */
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
"%s odls:wait_local_proc pid %ld corresponds to %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(long)pid,
ORTE_NAME_PRINT(child->name)));
if (0 == child->name->vpid) {
rc = orte_iof.iof_unpublish(child->name, ORTE_NS_CMP_ALL,
ORTE_IOF_STDIN);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
/* We can't really abort, so keep going... */
}
}
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
"%s odls:wait_local_proc orted sent IOF unpub message!",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* determine the state of this process */
if(WIFEXITED(status)) {
/* set the exit status appropriately */

Просмотреть файл

@ -102,6 +102,7 @@ static void orte_odls_job_constructor(orte_odls_job_t *ptr)
ptr->apps = NULL;
ptr->num_apps = 0;
ptr->controls = 0;
ptr->stdin_target = ORTE_VPID_INVALID;
ptr->total_slots_alloc = 0;
ptr->num_procs = 0;
ptr->num_local_procs = 0;

Просмотреть файл

@ -75,7 +75,8 @@ typedef struct orte_odls_job_t {
orte_jobid_t jobid; /* jobid for this data */
orte_app_context_t **apps; /* app_contexts for this job */
orte_std_cntr_t num_apps; /* number of app_contexts */
uint16_t controls; /* control flags for job */
orte_job_controls_t controls; /* control flags for job */
orte_vpid_t stdin_target; /* where stdin is to go */
orte_std_cntr_t total_slots_alloc;
orte_vpid_t num_procs;
int32_t num_local_procs;
@ -133,7 +134,8 @@ orte_odls_base_default_construct_child_list(opal_buffer_t *data,
typedef int (*orte_odls_base_fork_local_proc_fn_t)(orte_app_context_t *context,
orte_odls_child_t *child,
char **environ_copy,
bool forward_output);
orte_job_controls_t controls,
orte_vpid_t stdin_target);
ORTE_DECLSPEC int
orte_odls_base_default_launch_local(orte_jobid_t job,

Просмотреть файл

@ -168,7 +168,8 @@ int orte_odls_default_kill_local_procs(orte_jobid_t job, bool set_state)
static int odls_default_fork_local_proc(orte_app_context_t* context,
orte_odls_child_t *child,
char **environ_copy,
bool forward_output)
orte_job_controls_t controls,
orte_vpid_t stdin_target)
{
orte_iof_base_io_conf_t opts;
int rc;
@ -181,11 +182,9 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
default */
opts.usepty = OMPI_ENABLE_PTY_SUPPORT;
/* BWB - Fix post beta. Should setup stdin in orterun and make
part of the app_context. Do not change this without also
changing the reverse of this in
odls_default_wait_local_proc(). */
if (NULL != child && child->name->vpid == 0) {
/* do we want to setup stdin? */
if (NULL != child &&
(stdin_target == ORTE_VPID_WILDCARD || child->name->vpid == stdin_target)) {
opts.connect_stdin = true;
} else {
opts.connect_stdin = false;
@ -259,7 +258,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
}
} else if (!forward_output) {
} else if (!(ORTE_JOB_CONTROL_FORWARD_OUTPUT & controls)) {
/* tie stdin/out/err/internal to /dev/null */
int fdnull;
for (i=0; i < 3; i++) {
@ -320,7 +319,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
exit(1);
} else {
if (NULL != child && forward_output) {
if (NULL != child && (ORTE_JOB_CONTROL_FORWARD_OUTPUT & controls)) {
/* connect endpoints IOF */
rc = orte_iof_base_setup_parent(child->name, &opts);
if(ORTE_SUCCESS != rc) {

Просмотреть файл

@ -90,11 +90,11 @@ static int odls_process_kill_local_procs(orte_jobid_t job, bool set_state)
* Fork/exec the specified processes
*/
static int odls_process_fork_local_proc(
orte_app_context_t* context,
orte_odls_child_t *child,
char **environ_copy,
bool forward_output)
static int odls_process_fork_local_proc(orte_app_context_t* context,
orte_odls_child_t *child,
char **environ_copy,
orte_job_controls_t controls,
orte_vpid_t stdin_target)
{
pid_t pid;
orte_iof_base_io_conf_t opts;
@ -121,11 +121,8 @@ static int odls_process_fork_local_proc(
default */
opts.usepty = OMPI_ENABLE_PTY_SUPPORT;
/* BWB - Fix post beta. Should setup stdin in orterun and make
part of the app_context. Do not change this without also
changing the reverse of this in
odls_default_wait_local_proc(). */
if( 0 == child->name->vpid ) {
/* do we want to setup stdin? */
if (stdin_target == ORTE_VPID_WILDCARD || child->name->vpid == stdin_target) {
opts.connect_stdin = true;
} else {
opts.connect_stdin = false;

Просмотреть файл

@ -58,8 +58,7 @@ static int orte_plm_base_report_launched(orte_jobid_t job);
int orte_plm_base_setup_job(orte_job_t *jdata)
{
int rc, fd;
orte_process_name_t name = {ORTE_JOBID_INVALID, 0};
int rc;
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s plm:base:setup_job for job %s",
@ -118,29 +117,15 @@ int orte_plm_base_setup_job(orte_job_t *jdata)
exit(0);
}
/*
* setup I/O forwarding
*/
name.jobid = jdata->jobid;
if (ORTE_SUCCESS != (rc = orte_iof.iof_pull(&name, ORTE_NS_CMP_JOBID, ORTE_IOF_STDOUT, 1))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_iof.iof_pull(&name, ORTE_NS_CMP_JOBID, ORTE_IOF_STDERR, 2))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* IOF cannot currently handle multiple pulls to the same fd. So
dup stderr to another fd. :-\ */
fd = dup(2);
if (fd >= 0 &&
ORTE_SUCCESS != (rc = orte_iof.iof_pull(&name, ORTE_NS_CMP_JOBID,
ORTE_IOF_INTERNAL, fd))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/*** RHC: USER REQUEST TO TIE-OFF STDXXX TO /DEV/NULL
*** WILL BE SENT IN LAUNCH MESSAGE AS PART OF CONTROLS FIELD.
*** SO IF USER WANTS NO IO BEING SENT AROUND, THE ORTEDS
*** WILL TIE IT OFF AND THE IOF WILL NEVER RECEIVE ANYTHING.
*** THE IOF AUTOMATICALLY KNOWS TO OUTPUT ANY STDXXX
*** DATA IT -DOES- RECEIVE TO THE APPROPRIATE FD, SO THERE
*** IS NOTHING WE NEED DO HERE TO SETUP IOF
***/
#if OPAL_ENABLE_FT == 1
/*
* Notify the Global SnapC component regarding new job
@ -156,6 +141,7 @@ int orte_plm_base_setup_job(orte_job_t *jdata)
int orte_plm_base_launch_apps(orte_jobid_t job)
{
orte_job_t *jdata;
orte_daemon_cmd_flag_t command;
opal_buffer_t *buffer;
int rc;
@ -167,6 +153,13 @@ int orte_plm_base_launch_apps(orte_jobid_t job)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(job)));
/* find the job's data record */
if (NULL == (jdata = orte_get_job_data_object(job))) {
/* bad jobid */
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
/* setup the buffer */
buffer = OBJ_NEW(opal_buffer_t);
@ -206,8 +199,12 @@ int orte_plm_base_launch_apps(orte_jobid_t job)
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s plm:base:launch wiring up iof",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* push stdin - the IOF will know what to do with the specified target */
name.jobid = job;
if (ORTE_SUCCESS != (rc = orte_iof.iof_push(&name, ORTE_NS_CMP_JOBID, ORTE_IOF_STDIN, 0))) {
name.vpid = jdata->stdin_target;
if (ORTE_SUCCESS != (rc = orte_iof.push(&name, ORTE_IOF_STDIN, 0))) {
ORTE_ERROR_LOG(rc);
return rc;
}

Просмотреть файл

@ -52,8 +52,8 @@ BEGIN_C_DECLS
#define ORTE_RML_TAG_INVALID 0
#define ORTE_RML_TAG_DAEMON 1
#define ORTE_RML_TAG_IOF_SVC 2
#define ORTE_RML_TAG_IOF_CLNT 3
#define ORTE_RML_TAG_IOF_HNP 2
#define ORTE_RML_TAG_IOF_PROXY 3
#define ORTE_RML_TAG_XCAST_BARRIER 4
#define ORTE_RML_TAG_PLM 5
#define ORTE_RML_TAG_PLM_PROXY 6

Просмотреть файл

@ -256,4 +256,14 @@ int orte_dt_compare_grpcomm_mode(orte_grpcomm_mode_t *value1, orte_grpcomm_mode_
return OPAL_EQUAL;
}
/* ORTE_IOF_TAG */
int orte_dt_compare_iof_tag(orte_iof_tag_t *value1, orte_iof_tag_t *value2, opal_data_type_t type)
{
if (*value1 > *value2) return OPAL_VALUE1_GREATER;
if (*value2 > *value1) return OPAL_VALUE2_GREATER;
return OPAL_EQUAL;
}
#endif

Просмотреть файл

@ -356,4 +356,21 @@ int orte_dt_copy_grpcomm_mode(orte_grpcomm_mode_t **dest, orte_grpcomm_mode_t *s
return ORTE_SUCCESS;
}
int orte_dt_copy_iof_tag(orte_iof_tag_t **dest, orte_iof_tag_t *src, opal_data_type_t type)
{
size_t datasize;
datasize = sizeof(orte_iof_tag_t);
*dest = (orte_iof_tag_t*)malloc(datasize);
if (NULL == *dest) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
memcpy(*dest, src, datasize);
return ORTE_SUCCESS;
}
#endif

Просмотреть файл

@ -181,7 +181,14 @@ int orte_dt_pack_job(opal_buffer_t *buffer, const void *src,
/* pack the control flags */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
(void*)(&(jobs[i]->controls)), 1, OPAL_UINT16))) {
(void*)(&(jobs[i]->controls)), 1, ORTE_JOB_CONTROL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the stdin target */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer,
(void*)(&(jobs[i]->stdin_target)), 1, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -835,4 +842,19 @@ int orte_dt_pack_grpcomm_mode(opal_buffer_t *buffer, const void *src, int32_t nu
return ret;
}
/*
* ORTE_IOF_TAG
*/
int orte_dt_pack_iof_tag(opal_buffer_t *buffer, const void *src, int32_t num_vals,
opal_data_type_t type)
{
int ret;
/* Turn around and pack the real type */
if (ORTE_SUCCESS != (ret = opal_dss_pack_buffer(buffer, src, num_vals, ORTE_IOF_TAG_T))) {
ORTE_ERROR_LOG(ret);
}
return ret;
}
#endif

Просмотреть файл

@ -160,6 +160,10 @@ int orte_dt_std_print(char **output, char *prefix, void *src, opal_data_type_t t
break;
#endif
case ORTE_IOF_TAG:
orte_dt_quick_print(output, "ORTE_IOF_TAG", prefix, src, ORTE_IOF_TAG_T);
break;
default:
ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE);
return ORTE_ERR_UNKNOWN_DATA_TYPE;
@ -208,9 +212,9 @@ int orte_dt_print_job(char **output, char *prefix, orte_job_t *src, opal_data_ty
asprintf(&pfx2, "%s", prefix);
}
asprintf(&tmp, "\n%sData for job: %s\tNum apps: %ld\tControls: %0x\tState: %0x\tAbort: %s", pfx2,
asprintf(&tmp, "\n%sData for job: %s\tNum apps: %ld\tControls: %0x\tStdin target: %s\tState: %0x\tAbort: %s", pfx2,
ORTE_JOBID_PRINT(src->jobid),
(long)src->num_apps, src->controls,
(long)src->num_apps, src->controls, ORTE_VPID_PRINT(src->stdin_target),
src->state, src->abort ? "True" : "False");
asprintf(&pfx, "%s\t", pfx2);

Просмотреть файл

@ -81,6 +81,10 @@ int orte_dt_std_size(size_t *size, void *src, opal_data_type_t type)
break;
#endif
case ORTE_IOF_TAG:
*size = sizeof(orte_iof_tag_t);
break;
default:
ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE);
return ORTE_ERR_UNKNOWN_DATA_TYPE;

Просмотреть файл

@ -34,6 +34,7 @@
#include "orte/mca/plm/plm_types.h"
#include "orte/mca/rmaps/rmaps_types.h"
#include "orte/mca/rml/rml_types.h"
#include "orte/mca/iof/iof_types.h"
#include "orte/runtime/orte_globals.h"
@ -74,6 +75,7 @@ int orte_dt_compare_tags(orte_rml_tag_t *value1,
opal_data_type_t type);
int orte_dt_compare_daemon_cmd(orte_daemon_cmd_flag_t *value1, orte_daemon_cmd_flag_t *value2, opal_data_type_t type);
int orte_dt_compare_grpcomm_mode(orte_grpcomm_mode_t *value1, orte_grpcomm_mode_t *value2, opal_data_type_t type);
int orte_dt_compare_iof_tag(orte_iof_tag_t *value1, orte_iof_tag_t *value2, opal_data_type_t type);
#endif
/** Data type copy functions */
@ -96,6 +98,7 @@ int orte_dt_copy_tag(orte_rml_tag_t **dest,
opal_data_type_t type);
int orte_dt_copy_daemon_cmd(orte_daemon_cmd_flag_t **dest, orte_daemon_cmd_flag_t *src, opal_data_type_t type);
int orte_dt_copy_grpcomm_mode(orte_grpcomm_mode_t **dest, orte_grpcomm_mode_t *src, opal_data_type_t type);
int orte_dt_copy_iof_tag(orte_iof_tag_t **dest, orte_iof_tag_t *src, opal_data_type_t type);
#endif
/** Data type pack functions */
@ -136,6 +139,8 @@ int orte_dt_pack_daemon_cmd(opal_buffer_t *buffer, const void *src,
int32_t num_vals, opal_data_type_t type);
int orte_dt_pack_grpcomm_mode(opal_buffer_t *buffer, const void *src,
int32_t num_vals, opal_data_type_t type);
int orte_dt_pack_iof_tag(opal_buffer_t *buffer, const void *src, int32_t num_vals,
opal_data_type_t type);
#endif
/** Data type print functions */
@ -201,6 +206,8 @@ int orte_dt_unpack_daemon_cmd(opal_buffer_t *buffer, void *dest,
int32_t *num_vals, opal_data_type_t type);
int orte_dt_unpack_grpcomm_mode(opal_buffer_t *buffer, void *dest,
int32_t *num_vals, opal_data_type_t type);
int orte_dt_unpack_iof_tag(opal_buffer_t *buffer, void *dest, int32_t *num_vals,
opal_data_type_t type);
#endif
END_C_DECLS

Просмотреть файл

@ -197,11 +197,19 @@ int orte_dt_unpack_job(opal_buffer_t *buffer, void *dest,
/* unpack control flags */
n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
(&(jobs[i]->controls)), &n, OPAL_UINT16))) {
(&(jobs[i]->controls)), &n, ORTE_JOB_CONTROL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack stdin target */
n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
(&(jobs[i]->stdin_target)), &n, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the total slots allocated to the job */
n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
@ -916,4 +924,18 @@ int orte_dt_unpack_grpcomm_mode(opal_buffer_t *buffer, void *dest, int32_t *num_
return ret;
}
/*
* ORTE_IOF_TAG
*/
int orte_dt_unpack_iof_tag(opal_buffer_t *buffer, void *dest, int32_t *num_vals,
opal_data_type_t type)
{
int ret;
/* turn around and unpack the real type */
ret = opal_dss_unpack_buffer(buffer, dest, num_vals, ORTE_IOF_TAG_T);
return ret;
}
#endif

Просмотреть файл

@ -52,6 +52,8 @@ bool orte_do_not_launch = false;
bool orted_spin_flag = false;
bool orte_static_ports = false;
bool orte_keep_fqdn_hostnames = false;
bool orte_tag_output;
bool orte_xml_output;
int orted_debug_failure;
int orted_debug_failure_delay;
bool orte_homogeneous_nodes = false;
@ -346,6 +348,20 @@ int orte_dt_init(void)
#endif /* !ORTE_DISABLE_FULL_SUPPORT */
tmp = ORTE_IOF_TAG;
if (ORTE_SUCCESS != (rc = opal_dss.register_type(orte_dt_pack_iof_tag,
orte_dt_unpack_iof_tag,
(opal_dss_copy_fn_t)orte_dt_copy_iof_tag,
(opal_dss_compare_fn_t)orte_dt_compare_iof_tag,
(opal_dss_size_fn_t)orte_dt_std_size,
(opal_dss_print_fn_t)orte_dt_std_print,
(opal_dss_release_fn_t)orte_dt_std_release,
OPAL_DSS_UNSTRUCTURED,
"ORTE_IOF_TAG", &tmp))) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
}
@ -482,6 +498,7 @@ static void orte_job_construct(orte_job_t* job)
2);
job->num_apps = 0;
job->controls = ORTE_JOB_CONTROL_FORWARD_OUTPUT;
job->stdin_target = ORTE_VPID_INVALID;
job->total_slots_alloc = 0;
job->num_procs = 0;
job->procs = OBJ_NEW(opal_pointer_array_t);

Просмотреть файл

@ -226,12 +226,15 @@ typedef struct {
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_node_t);
/* define a set of flags to control the launch of a job */
#define ORTE_JOB_CONTROL_LOCAL_SPAWN (uint16_t) 0x0001
#define ORTE_JOB_CONTROL_NON_ORTE_JOB (uint16_t) 0x0002
#define ORTE_JOB_CONTROL_DEBUGGER_DAEMON (uint16_t) 0x0004
#define ORTE_JOB_CONTROL_FORWARD_OUTPUT (uint16_t) 0x0008
#define ORTE_JOB_CONTROL_DO_NOT_MONITOR (uint16_t) 0x0010
#define ORTE_JOB_CONTROL_FORWARD_COMM (uint16_t) 0x0020
typedef uint8_t orte_job_controls_t;
#define ORTE_JOB_CONTROL OPAL_UINT8
#define ORTE_JOB_CONTROL_LOCAL_SPAWN 0x01
#define ORTE_JOB_CONTROL_NON_ORTE_JOB 0x02
#define ORTE_JOB_CONTROL_DEBUGGER_DAEMON 0x04
#define ORTE_JOB_CONTROL_FORWARD_OUTPUT 0x08
#define ORTE_JOB_CONTROL_DO_NOT_MONITOR 0x10
#define ORTE_JOB_CONTROL_FORWARD_COMM 0x20
typedef struct {
/** Base object so this can be put on a list */
@ -245,7 +248,11 @@ typedef struct {
/* flags to control the launch of this job - see above
* for description of supported flags
*/
uint16_t controls;
orte_job_controls_t controls;
/* rank desiring stdin - for now, either one rank, all ranks
* (wildcard), or none (invalid)
*/
orte_vpid_t stdin_target;
/* total slots allocated to this job */
orte_std_cntr_t total_slots_alloc;
/* number of procs in this job */
@ -388,6 +395,9 @@ ORTE_DECLSPEC extern bool orted_spin_flag;
ORTE_DECLSPEC extern bool orte_static_ports;
ORTE_DECLSPEC extern int32_t orte_contiguous_nodes;
ORTE_DECLSPEC extern bool orte_keep_fqdn_hostnames;
ORTE_DECLSPEC extern bool orte_tag_output;
ORTE_DECLSPEC extern bool orte_xml_output;
ORTE_DECLSPEC extern int orte_debug_verbosity;
ORTE_DECLSPEC extern int orted_debug_failure;
ORTE_DECLSPEC extern int orted_debug_failure_delay;
ORTE_DECLSPEC extern bool orte_homogeneous_nodes;

Просмотреть файл

@ -171,6 +171,11 @@ int orte_register_params(void)
"Number of nodes after which contiguous nodename encoding will automatically be used [default: INT_MAX]",
false, false, INT32_MAX, &orte_contiguous_nodes);
mca_base_param_reg_int_name("orte", "tag_output",
"Tag all output with [job,rank] (default: false)",
false, false, (int) false, &value);
orte_tag_output = OPAL_INT_TO_BOOL(value);
mca_base_param_reg_int_name("orte", "xml_output",
"Display all output in XML format (default: false)",
false, false, (int) false, &value);

Просмотреть файл

@ -53,5 +53,6 @@ printf("%d completed MPI_Init\n", rank);
}
MPI_Finalize();
fprintf(stderr, "%d: exiting\n", pid);
return 0;
}

Просмотреть файл

@ -1,4 +1,4 @@
PROGS = no_op sigusr_trap spin orte_nodename orte_spawn orte_loop_spawn orte_loop_child orte_abort get_limits orte_ring spawn_child orte_tool orte_no_op binom oob_stress iof_stress
PROGS = no_op sigusr_trap spin orte_nodename orte_spawn orte_loop_spawn orte_loop_child orte_abort get_limits orte_ring spawn_child orte_tool orte_no_op binom oob_stress iof_stress iof_delay
all: $(PROGS)

68
orte/test/system/iof_delay.c Обычный файл
Просмотреть файл

@ -0,0 +1,68 @@
#include <stdio.h>
#include <signal.h>
#include <math.h>
#include "orte/util/proc_info.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/runtime/runtime.h"
#define MAX_COUNT 300
#define ORTE_IOF_BASE_MSG_MAX 2048
int
main(int argc, char *argv[]){
int count;
int msgsize;
unsigned char msg[ORTE_IOF_BASE_MSG_MAX];
int i, j, rc;
double maxpower;
unsigned char chr;
bool readstdin;
/*
* Init
*/
orte_init(ORTE_NON_TOOL);
if (argc >= 2) {
count = atoi(argv[1]);
if (count < 0) {
count = INT_MAX-1;
}
} else {
count = MAX_COUNT;
}
i = 1;
for (j=1; j < count+1; j++) {
#if 0
maxpower = (double)(j%7);
#endif
chr = (j % 26) + 65;
memset(msg, chr, ORTE_IOF_BASE_MSG_MAX);
msgsize = 10;
msg[msgsize-1] = '\n';
if (i == 1) {
i = 2;
} else {
i = 1;
}
write(i, msg, msgsize);
sleep(3);
}
orte_finalize();
return 0;
}

Просмотреть файл

@ -10,12 +10,14 @@
#include "orte/runtime/runtime.h"
#define MAX_COUNT 3
#define ORTE_IOF_BASE_MSG_MAX 2048
int
main(int argc, char *argv[]){
int count;
int msgsize;
uint8_t *msg;
unsigned char msg[ORTE_IOF_BASE_MSG_MAX];
int i, j, rc;
double maxpower;
unsigned char chr;
@ -42,26 +44,28 @@ main(int argc, char *argv[]){
readstdin = false;
}
if (0 == ORTE_PROC_MY_NAME->vpid && readstdin) {
while (0 != (msgsize = read(0, msg, ORTE_IOF_BASE_MSG_MAX))) {
if (msgsize > 0) {
msg[msgsize] = '\n';
write(1, msg, msgsize);
}
}
}
for (j=1; j < count+1; j++) {
#if 0
maxpower = (double)(j%7);
msgsize = (int)pow(10.0, maxpower);
msg = (uint8_t*)malloc(msgsize);
#endif
chr = (j % 26) + 65;
memset(msg, chr, msgsize);
memset(msg, chr, ORTE_IOF_BASE_MSG_MAX);
msgsize = 10;
msg[msgsize-1] = '\n';
if (0 == ORTE_PROC_MY_NAME->vpid) {
if (readstdin) {
msgsize = read(0, msg, msgsize);
}
write(1, msg, msgsize);
} else {
write(1, msg, msgsize);
}
write(1, msg, msgsize);
free(msg);
}
orte_finalize();

Просмотреть файл

@ -24,6 +24,7 @@
SUBDIRS += \
tools/orte-checkpoint \
tools/orte-clean \
tools/orte-iof \
tools/orte-ps \
tools/orte-restart \
tools/orted \
@ -33,6 +34,7 @@ SUBDIRS += \
DIST_SUBDIRS += \
tools/orte-checkpoint \
tools/orte-clean \
tools/orte-iof \
tools/orte-ps \
tools/orte-restart \
tools/orted \

48
orte/tools/orte-iof/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,48 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
include $(top_srcdir)/Makefile.man-page-rules
man_pages = orte-iof.1
EXTRA_DIST = $(man_pages:.1=.1in)
if !ORTE_DISABLE_FULL_SUPPORT
if OMPI_INSTALL_BINARIES
bin_PROGRAMS = orte-iof
nodist_man_MANS = $(man_pages)
# Ensure that the man pages are rebuilt if the opal_config.h file
# changes; a "good enough" way to know if configure was run again (and
# therefore the release date or version may have changed)
$(nodist_man_MANS): $(top_builddir)/opal/include/opal_config.h
dist_pkgdata_DATA = help-orte-iof.txt
endif # OMPI_INSTALL_BINARIES
orte_iof_SOURCES = orte-iof.c
orte_iof_LDADD = $(top_builddir)/orte/libopen-rte.la
endif # !ORTE_DISABLE_FULL_SUPPORT
distclean-local:
rm -f $(man_pages)

36
orte/tools/orte-iof/help-orte-iof.txt Обычный файл
Просмотреть файл

@ -0,0 +1,36 @@
# -*- text -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the US/English help file for Open MPI IOF tool
#
[usage]
orte-iof [OPTIONS]
Open MPI IO Forwarding Tool
%s
#
[pid-required]
The orte-iof tool requires the pid of the mpirun whose output
you would like to monitor. This is needed to ensure that the
tool contacts the correct mpirun when requesting access to the
output from the specified ranks.
Please use the --pid option to specify the mpirun's pid.
#

89
orte/tools/orte-iof/orte-iof.1in Обычный файл
Просмотреть файл

@ -0,0 +1,89 @@
.\"
.\" Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
.\" University Research and Technology
.\" Corporation. All rights reserved.
.\" Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
.\"
.\" Man page for OMPI's ompi-iof command
.\"
.\" .TH name section center-footer left-footer center-header
.TH OMPI-IOF 1 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
.\" **************************
.\" Name Section
.\" **************************
.SH NAME
.
ompi-iof, orte-iof \- Displays a copy of the output streams from a process.
.
.PP
.
\fBNOTE:\fP \fIompi-iof\fP, and \fIorte-iof\fP are exact
synonyms for each other. Using any of the names will result in exactly
identical behavior.
.
.\" **************************
.\" Synopsis Section
.\" **************************
.SH SYNOPSIS
.
.B ompi-iof
.R [ options ]
.
.\" **************************
.\" Options Section
.\" **************************
.SH Options
.
\fIompi-iof\fR will display a copy of the indicated stdout, stderr, and/or stddiag streams
from the designated process.
.
.TP 10
.B -pid | --pid
The pid of the mpirun executing the process whose streams are to be copied
.
.
.TP
.B -stdout | --stdout
Display a copy of the specified process' stdout. If no options are specified, this will
act as the default
.
.
.TP
.B -stderr | --stderr
Display a copy of the specified process' stderr.
.
.
.TP
.B -stddiag | --stddiag
Display a copy of the specified process' stddiag.
.
.
.TP
.B -rank | --rank
The rank of the process whose output is to be copied.
.
.
.TP
.B -tag-output | --tag-output
Tag each line of output with the process name and stream name.
.
.
.\" **************************
.\" Description Section
.\" **************************
.SH DESCRIPTION
.
.PP
\fIompi-iof\fR displays a copy of the indicated stdout, stderr, and/or stddiag streams
from the designated process. At this time, a ctrl-C must be used to terminate the program.
The program will terminate cleanly, telling the associated mpirun to close the requested
streams before exiting.
.
.
.\" **************************
.\" See Also Section
.\" **************************
.
.SH SEE ALSO
orterun(1)
.

315
orte/tools/orte-iof/orte-iof.c Обычный файл
Просмотреть файл

@ -0,0 +1,315 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @fie
* ORTE PS command
*
*/
#include "orte_config.h"
#include "orte/constants.h"
#include <stdio.h>
#include <errno.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif /* HAVE_STDLIB_H */
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif /* HAVE_SYS_STAT_H */
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif /* HAVE_SYS_TYPES_H */
#ifdef HAVE_SYS_WAIT_H
#include <sys/wait.h>
#endif /* HAVE_SYS_WAIT_H */
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#ifdef HAVE_DIRENT_H
#include <dirent.h>
#endif /* HAVE_DIRENT_H */
#include "opal/util/cmd_line.h"
#include "opal/util/argv.h"
#include "opal/util/opal_environ.h"
#include "opal/util/os_path.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/runtime/opal.h"
#if OPAL_ENABLE_FT == 1
#include "opal/runtime/opal_cr.h"
#endif
#include "opal/dss/dss.h"
#include "orte/runtime/runtime.h"
#include "orte/util/proc_info.h"
#include "opal/util/os_path.h"
#include "orte/util/session_dir.h"
#include "orte/util/hnp_contact.h"
#include "orte/util/name_fns.h"
#include "orte/util/show_help.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/iof/iof.h"
#if OPAL_ENABLE_FT == 1
#include "orte/mca/snapc/base/base.h"
#endif
#include "orte/runtime/orte_globals.h"
/*****************************************
* Global Vars for Command line Arguments
*****************************************/
static struct {
bool help;
int hnppid;
char *ranks;
bool stdout_req;
bool stderr_req;
bool stddiag_req;
bool tag;
orte_hnp_contact_t *target_hnp;
} my_globals;
opal_cmd_line_init_t cmd_line_opts[] = {
{ NULL, NULL, NULL,
'h', NULL, "help",
0,
&my_globals.help, OPAL_CMD_LINE_TYPE_BOOL,
"This help message" },
{ NULL, NULL, NULL,
'\0', "pid", "pid",
1,
&my_globals.hnppid, OPAL_CMD_LINE_TYPE_INT,
"The pid of the mpirun whose output you wish to see" },
{ NULL, NULL, NULL,
'\0', "stdout", "stdout",
0,
&my_globals.stdout_req, OPAL_CMD_LINE_TYPE_BOOL,
"Display stdout from specified process (default)" },
{ NULL, NULL, NULL,
'\0', "stderr", "stderr",
0,
&my_globals.stderr_req, OPAL_CMD_LINE_TYPE_BOOL,
"Display stderr from specified process" },
{ NULL, NULL, NULL,
'\0', "stddiag", "stddiag",
0,
&my_globals.stddiag_req, OPAL_CMD_LINE_TYPE_BOOL,
"Display stddiag from specified process" },
{ NULL, NULL, NULL,
'\0', "rank", "rank",
1,
&my_globals.ranks, OPAL_CMD_LINE_TYPE_STRING,
"Comma-separated list of ranks whose output is to be displayed" },
{ "orte", "tag", "output",
'\0', "tag-output", "tag-output",
0,
NULL, OPAL_CMD_LINE_TYPE_BOOL,
"Tag output with the stream and [job,rank] (default: no tags)" },
/* End of list */
{ NULL, NULL, NULL,
'\0', NULL, NULL,
0,
NULL, OPAL_CMD_LINE_TYPE_NULL,
NULL }
};
/*
* Local variables & functions
*/
static void abort_exit_callback(int fd, short flags, void *arg);
static struct opal_event term_handler;
static struct opal_event int_handler;
static opal_list_t hnp_list;
int
main(int argc, char *argv[])
{
int ret;
opal_cmd_line_t cmd_line;
opal_list_item_t* item = NULL;
orte_process_name_t target_proc;
orte_iof_tag_t stream;
/***************
* Initialize
***************/
/*
* Make sure to init util before parse_args
* to ensure installdirs is setup properly
* before calling mca_base_open();
*/
if( ORTE_SUCCESS != (ret = opal_init_util()) ) {
return ret;
}
/* initialize the globals */
my_globals.help = false;
my_globals.hnppid = -1;
my_globals.stdout_req = false;
my_globals.stderr_req = false;
my_globals.stddiag_req = false;
/* Parse the command line options */
opal_cmd_line_create(&cmd_line, cmd_line_opts);
mca_base_open();
mca_base_cmd_line_setup(&cmd_line);
ret = opal_cmd_line_parse(&cmd_line, true, argc, argv);
/**
* Now start parsing our specific arguments
*/
if (OPAL_SUCCESS != ret || my_globals.help) {
char *args = NULL;
args = opal_cmd_line_get_usage_msg(&cmd_line);
orte_show_help("help-orte-iof.txt", "usage", true, args);
free(args);
return ORTE_ERROR;
}
/*
* Must specify the mpirun pid
*/
if(my_globals.hnppid < 0) {
orte_show_help("help-orte-iof.txt", "pid-required", true);
return ORTE_ERROR;
}
/***************************
* We need all of OPAL and the TOOL portion of ORTE
***************************/
if (ORTE_SUCCESS != orte_init(ORTE_TOOL)) {
orte_finalize();
return 1;
}
/** setup callbacks for abort signals - from this point
* forward, we need to abort in a manner that allows us
* to cleanup
*/
opal_signal_set(&term_handler, SIGTERM,
abort_exit_callback, &term_handler);
opal_signal_add(&term_handler, NULL);
opal_signal_set(&int_handler, SIGINT,
abort_exit_callback, &int_handler);
opal_signal_add(&int_handler, NULL);
/*
* Get the list of available hnp's
*/
OBJ_CONSTRUCT(&hnp_list, opal_list_t);
if (ORTE_SUCCESS != (ret = orte_list_local_hnps(&hnp_list) ) ) {
goto cleanup;
}
/*
* For each hnp in the listing
*/
while (NULL != (item = opal_list_remove_first(&hnp_list))) {
orte_hnp_contact_t *hnp = (orte_hnp_contact_t*)item;
if (my_globals.hnppid == hnp->pid) {
/* this is the one we want */
my_globals.target_hnp = hnp;
break;
}
OBJ_RELEASE(hnp);
}
/* setup the stream */
stream = 0;
if (my_globals.stderr_req) {
stream |= ORTE_IOF_STDERR;
}
if (my_globals.stddiag_req) {
stream |= ORTE_IOF_STDDIAG;
}
if (my_globals.stdout_req) {
stream |= ORTE_IOF_STDOUT;
}
if (0 == stream) {
/* default to stdout */
stream |= ORTE_IOF_STDOUT;
}
/* we have our target - pull the specified output streams and dump to our stdout */
target_proc.jobid = my_globals.target_hnp->name.jobid + 1;
target_proc.vpid = 0;
if (ORTE_SUCCESS != (ret = orte_iof.pull(&target_proc, stream, 1))) {
ORTE_ERROR_LOG(ret);
goto cleanup;
}
/* just wait until the abort is fired */
opal_event_dispatch();
/***************
* Cleanup
***************/
cleanup:
while (NULL != (item = opal_list_remove_first(&hnp_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&hnp_list);
orte_finalize();
return ret;
}
static void abort_exit_callback(int fd, short ign, void *arg)
{
orte_process_name_t target_proc;
opal_list_item_t *item;
int ret;
/* Remove the TERM and INT signal handlers */
opal_signal_del(&term_handler);
opal_signal_del(&int_handler);
/* close the outstanding pull */
target_proc.jobid = my_globals.target_hnp->name.jobid + 1;
target_proc.vpid = 0;
if (ORTE_SUCCESS != (ret = orte_iof.close(&target_proc, ORTE_IOF_STDOUT))) {
ORTE_ERROR_LOG(ret);
}
while (NULL != (item = opal_list_remove_first(&hnp_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&hnp_list);
orte_finalize();
exit(1);
}

Просмотреть файл

@ -135,7 +135,10 @@ static opal_cmd_line_init_t cmd_line_init[] = {
{ NULL, NULL, NULL, 'q', NULL, "quiet", 0,
&orterun_globals.quiet, OPAL_CMD_LINE_TYPE_BOOL,
"Suppress helpful messages" },
{ NULL, NULL, NULL, '\0', "report-pid", "report-pid", 0,
&orterun_globals.report_pid, OPAL_CMD_LINE_TYPE_BOOL,
"Printout pid" },
/* hetero apps */
{ "orte", "hetero", "apps", '\0', NULL, "hetero", 0,
NULL, OPAL_CMD_LINE_TYPE_BOOL,
@ -146,6 +149,16 @@ static opal_cmd_line_init_t cmd_line_init[] = {
NULL, OPAL_CMD_LINE_TYPE_BOOL,
"Provide all output in XML format" },
/* tag output */
{ "orte", "tag", "output", '\0', "tag-output", "tag-output", 0,
NULL, OPAL_CMD_LINE_TYPE_BOOL,
"Tag all output with [job,rank]" },
/* select stdin option */
{ NULL, NULL, NULL, '\0', "stdin", "stdin", 1,
&orterun_globals.stdin_target, OPAL_CMD_LINE_TYPE_STRING,
"Specify procs to receive stdin [rank, all, none] (default: 0, indicating rank 0)" },
/* Specify the launch agent to be used */
{ "orte", "launch", "agent", '\0', "launch-agent", "launch-agent", 1,
NULL, OPAL_CMD_LINE_TYPE_STRING,
@ -397,9 +410,15 @@ int orterun(int argc, char *argv[])
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* always forward output from user apps */
jdata->controls = ORTE_JOB_CONTROL_FORWARD_OUTPUT;
/* check what user wants us to do with stdin */
if (0 == strcmp(orterun_globals.stdin_target, "all")) {
jdata->stdin_target = ORTE_VPID_WILDCARD;
} else if (0 == strcmp(orterun_globals.stdin_target, "none")) {
jdata->stdin_target = ORTE_VPID_INVALID;
} else {
jdata->stdin_target = strtoul(orterun_globals.stdin_target, NULL, 10);
}
/* Parse each app, adding it to the job object */
parse_locals(argc, argv);
@ -1098,6 +1117,7 @@ static int init_globals(void)
orterun_globals.ompi_server = NULL;
orterun_globals.wait_for_server = false;
orterun_globals.server_wait_timeout = 10;
orterun_globals.stdin_target = "0";
}
/* Reset the other fields every time */
@ -1106,6 +1126,7 @@ static int init_globals(void)
orterun_globals.version = false;
orterun_globals.verbose = false;
orterun_globals.quiet = false;
orterun_globals.report_pid = false;
orterun_globals.by_node = false;
orterun_globals.by_slot = false;
orterun_globals.debugger = false;
@ -1174,6 +1195,11 @@ static int parse_globals(int argc, char* argv[], opal_cmd_line_t *cmd_line)
exit(0);
}
/* check for request to report pid */
if (orterun_globals.report_pid) {
printf("%s pid: %d\n", orterun_basename, (int)getpid());
}
/* Do we want a user-level debugger? */
if (orterun_globals.debugger) {

Просмотреть файл

@ -42,6 +42,7 @@ struct orterun_globals_t {
bool version;
bool verbose;
bool quiet;
bool report_pid;
bool exit;
bool by_node;
bool by_slot;
@ -59,6 +60,7 @@ struct orterun_globals_t {
char *ompi_server;
bool wait_for_server;
int server_wait_timeout;
char *stdin_target;
};
/**

Просмотреть файл

@ -192,6 +192,86 @@ char* orte_util_print_jobids(const orte_jobid_t job)
return ptr->buffers[ptr->cntr-1];
}
char* orte_util_print_job_family(const orte_jobid_t job)
{
orte_print_args_buffers_t *ptr;
int rc;
unsigned long tmp1;
if (!fns_init) {
/* setup the print_args function */
if (ORTE_SUCCESS != (rc = opal_tsd_key_create(&print_args_tsd_key, buffer_cleanup))) {
ORTE_ERROR_LOG(rc);
return NULL;
}
fns_init = true;
}
ptr = get_print_name_buffer();
if (NULL == ptr) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return orte_print_args_null;
}
/* cycle around the ring */
if (ORTE_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) {
ptr->cntr = 0;
}
if (ORTE_JOBID_INVALID == job) {
snprintf(ptr->buffers[ptr->cntr++], ORTE_PRINT_NAME_ARGS_MAX_SIZE, "INVALID");
} else if (ORTE_JOBID_WILDCARD == job) {
snprintf(ptr->buffers[ptr->cntr++], ORTE_PRINT_NAME_ARGS_MAX_SIZE, "WILDCARD");
} else {
tmp1 = ((unsigned long)job & 0xffff0000) >> 16;
snprintf(ptr->buffers[ptr->cntr++],
ORTE_PRINT_NAME_ARGS_MAX_SIZE,
"%lu", tmp1);
}
return ptr->buffers[ptr->cntr-1];
}
char* orte_util_print_local_jobid(const orte_jobid_t job)
{
orte_print_args_buffers_t *ptr;
int rc;
unsigned long tmp1;
if (!fns_init) {
/* setup the print_args function */
if (ORTE_SUCCESS != (rc = opal_tsd_key_create(&print_args_tsd_key, buffer_cleanup))) {
ORTE_ERROR_LOG(rc);
return NULL;
}
fns_init = true;
}
ptr = get_print_name_buffer();
if (NULL == ptr) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return orte_print_args_null;
}
/* cycle around the ring */
if (ORTE_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) {
ptr->cntr = 0;
}
if (ORTE_JOBID_INVALID == job) {
snprintf(ptr->buffers[ptr->cntr++], ORTE_PRINT_NAME_ARGS_MAX_SIZE, "INVALID");
} else if (ORTE_JOBID_WILDCARD == job) {
snprintf(ptr->buffers[ptr->cntr++], ORTE_PRINT_NAME_ARGS_MAX_SIZE, "WILDCARD");
} else {
tmp1 = (unsigned long)job & 0x0000ffff;
snprintf(ptr->buffers[ptr->cntr++],
ORTE_PRINT_NAME_ARGS_MAX_SIZE,
"%lu", tmp1);
}
return ptr->buffers[ptr->cntr-1];
}
char* orte_util_print_vpids(const orte_vpid_t vpid)
{
orte_print_args_buffers_t *ptr;

Просмотреть файл

@ -53,6 +53,14 @@ ORTE_DECLSPEC char* orte_util_print_vpids(const orte_vpid_t vpid);
#define ORTE_VPID_PRINT(n) \
orte_util_print_vpids(n)
ORTE_DECLSPEC char* orte_util_print_job_family(const orte_jobid_t job);
#define ORTE_JOB_FAMILY_PRINT(n) \
orte_util_print_job_family(n)
ORTE_DECLSPEC char* orte_util_print_local_jobid(const orte_jobid_t job);
#define ORTE_LOCAL_JOBID_PRINT(n) \
orte_util_print_local_jobid(n)
/* a macro for identifying the job family - i.e., for
* extracting the mpirun-specific id field of the jobid
@ -60,6 +68,12 @@ ORTE_DECLSPEC char* orte_util_print_vpids(const orte_vpid_t vpid);
#define ORTE_JOB_FAMILY(n) \
(((n) >> 16) & 0x0000ffff)
/* a macro for discovering the HNP name of a proc given its jobid */
#define ORTE_HNP_NAME_FROM_JOB(n, job) \
do { \
(n)->jobid = (job) & 0xffff0000; \
(n)->vpid = 0; \
} while(0);
/* a macro for extracting the local jobid from the jobid - i.e.,
* the non-mpirun-specific id field of the jobid