1
1

Add some contributed examples of how to start and configure the spread library. Do a little more cleanup on the spread module, and ensure that it isn't selected if spread isn't running.

This commit was SVN r23101.
Этот коммит содержится в:
Ralph Castain 2010-05-04 23:44:00 +00:00
родитель 359d7e122e
Коммит 99f223210d
5 изменённых файлов: 305 добавлений и 15 удалений

20
contrib/spread/spread-init.txt Обычный файл
Просмотреть файл

@ -0,0 +1,20 @@
#!/bin/bash
case "$1" in
'start')
mkdir -p /var/run/spread
cd /var/run/spread
spread&
;;
'stop')
killall spread 2>/dev/null
;;
*)
echo "Usage: $0 { start | stop }"
exit 1
;;
esac
exit 0

244
contrib/spread/spread.conf Обычный файл
Просмотреть файл

@ -0,0 +1,244 @@
# Blank lines are permitted in this file.
# spread.conf sample file
#
# questions to spread@spread.org
#
#MINIMAL REQUIRED FILE
#
# Spread should work fine on one machine with just the uncommented
# lines below. The rest of the file documents all the options and
# more complex network setups.
#
# This configures one spread daemon running on port 4803 on localhost.
Spread_Segment 172.16.174.255:4803 {
rmcast-1 172.16.174.129
rmcast-2 172.16.174.130
rmcast-3 172.16.174.131
rmcast-4 172.16.174.132
}
# Spread options
#---------------------------------------------------------------------------
#---------------------------------------------------------------------------
#Set what internal Spread events are logged to the screen or file
# (see EventLogFile).
# Default setting is to enable PRINT and EXIT events only.
#The PRINT and EXIT types should always be enabled. The names of others are:
# EXIT PRINT DEBUG DATA_LINK NETWORK PROTOCOL SESSION
# CONFIGURATION MEMBERSHIP FLOW_CONTROL STATUS EVENTS
# GROUPS MEMORY SKIPLIST ALL NONE
# ALL and NONE are special and represent either enabling every type
# or enabling none of them.
# You can also use a "!" sign to negate a type,
# so { ALL !DATA_LINK } means log all events except data_link ones.
#DebugFlags = { PRINT EXIT }
DebugFlags = { ALL }
# Set priority level of events to output to log file or screen
# The possible levels are:
# pDEBUG INFO WARNING ERROR CRITICAL FATAL
# Once selected all events tagged with that priority or higher will
# be output. FATAL events are always output and cause the daemon to
# shut down. Some Events are tagged with a priority of PRINT which
# causes them to print out no matter what priority level is set.
#
# The default level used if nothing is set is INFO.
#EventPriority = INFO
#Set whether to log to a file as opposed to stdout/stderr and what
# file to log to.
# Default is to log to stdout.
#
#If option is not set then logging is to stdout.
#If option is set then logging is to the filename specified.
# The filename can include a %h or %H escape that will be replaced at runtime
# by the hostname of the machine upon which the daemon is running.
# For example "EventLogFile = spreadlog_%h.log" with 2 machines
# running Spread (machine1.mydomain.com and machine2.mydomain.com) will
# cause the daemons to log to "spreadlog_machine1.mydomain.com.log" and
# "spreadlog_machine2.mydomain.com.log" respectively.
#EventLogFile = testlog.out
EventLogFile = spread_%h.log
#Set whether to add a timestamp in front of all logged events or not.
# Default is no timestamps. Default format is "[%a %d %b %Y %H:%M:%S]".
#If option is commented out then no timestamp is added.
#If option is enabled then a timestamp is added with the default format
#If option is enabled and set equal to a string, then that string is used
# as the format string for the timestamp. The string must be a valid time
# format string as used by the strftime() function.
#EventTimeStamp
# or
#EventTimeStamp = "[%a %d %b %Y %H:%M:%S]"
EventTimeStamp = "[%a %d %b %Y %H:%M:%S]"
#Set whether to add a precise (microsecond) resolution timestamp to all logged
# events or not. This option requires that EventTimeStamp is also enabled.
# If the option is commented out then the microsecond timestamp is not added
# If the option is uncommented then a microsecond time will print in addition
# to the H:M:S resolution timestamp provided by EventTimeStamp.
#EventPreciseTimeStamp
# Set to initialize daemon sequence numbers to a 'large' number for testing
# this is purely a debugging capability and should never be enabled on
# production systems (note one side effect of enabling this is that
# your system will experience an extra daemon membership every few messages
# so you REALLY do not want this turned on)
# If you want to change the initial value the sequence number is set to
# you need to edit the #define INITIAL_SEQUENCE_NEAR_WRAP at the top
# of configuration.h
#DebugInitialSequence
#Set whether to allow dangerous monitor commands
# like "partition, flow_control, or kill"
# Default setting is FALSE.
#If option is set to false then only "safe" monitor commands are allowed
# (such as requesting a status update).
#If option is set to true then all monitor commands are enabled.
# THIS IS A SECURTIY RISK IF YOUR NETWORK IS NOT PROTECTED!
#DangerousMonitor = false
DangerousMonitor = true
#Set handling of SO_REUSEADDR socket option for the daemon's TCP
# listener. This is useful for facilitating quick daemon restarts (OSes
# often hold onto the interface/port combination for a short period of time
# after daemon shut down).
#
# AUTO - Active when bound to specific interfaces (default).
# ON - Always active, regardless of interface.
# SECURITY RISK FOR ANY OS WHICH ALLOW DOUBLE BINDS BY DIFFERENT USERS
# OFF - Always off.
#SocketPortReuse = AUTO
#Set what the maximum per-session queue should be for messages before disconnecting
# a session. Spread will buffer upto that number of messages that are destined to the
# session, but that can not be delivered currently because the session is not reading fast enough.
# The compiled in default is usually 1000 if you havn't changed it in the spread_params.h file.
#MaxSessionMessages = 5000
MaxSessionMessages = 5000
#Sets the runtime directory used when the Spread daemon is run as root
# as the directory to chroot to. Defaults to the value of the
# compile-time preprocessor define SP_RUNTIME_DIR, which is generally
# "/var/run/spread".
#RuntimeDir = /var/run/spread
#Sets the unix user that the Spread daemon runs as (when launched as
# the "root" user). Not effective on a Windows system. Defaults to
# the user and group "spread".
#DaemonUser = spread
#DaemonGroup = spread
#Set the list of authentication methods that the daemon will allow
# and those which are required in all cases.
# All of the methods listed in "RequiredAuthMethods" will be checked,
# irregardless of what methods the client chooses.
# Of the methods listed is "AllowedAuthMethods" the client is
# permitted to choose one or more, and all the ones the client chooses
# will also be checked.
#
# To support older clients, if NULL is enabled, then older clients can
# connect without any authentication. Any methods which do not require
# any interaction with the client (such as IP) can also be enabled
# for older clients. If you enable methods that require interaction,
# then essentially all older clients will be locked out.
#
#The current choices are:
# NULL for default, allow anyone authentication
# IP for IP based checks using the spread.access_ip file
#RequiredAuthMethods = " "
#AllowedAuthMethods = "NULL"
#Set the current access control policy.
# This is only needed if you want to establish a customized policy.
# The default policy is to allow any actions by authenticated clients.
#AccessControlPolicy = "PERMIT"
# network description line.
# Spread_Segment <multicast address for subnet> <port> {
# port is optional, if not specified the default 4803 port is used.
#Spread_Segment 127.0.0.255:4803 {
# either a name or IP address. If both are given, than the name is taken
# as-is, and the IP address is used for that name.
# localhost 127.0.0.1
#}
# repeat for next sub-network
#Spread_Segment x.2.2.255 {
# other1 128.2.2.10
# 128.2.2.11
# other3.my.com
#}
# Spread will feel free to use broadcast messages within a sub-network.
# if you do not want this to happen, you should specify your machines on
# different logical sub-networks.
# IP-Multicast addresses can also be used as the multicast address for
# the logical sub-network as in this example. If IP-multicast is supported
# by the operating system, then the messages will only be received
# by those machines who are in the group and not by all others in the same
# sub-network as happens with broadcast addresses
#Spread_Segment 225.0.1.1:3333 {
# mcast1 1.2.3.4
# mcast2 1.2.3.6
#}
# Multi-homed host setup
#
# If you run Spread on hosts with multiple interfaces you may want to
# control which interfaces Spread uses for client connections and for
# the daemon-to-daemon (and monitor control) messages. This can be done
# by adding an extra stanza to each configured machine.
#
#Sample:
#
#Spread_Segment 225.0.1.1 {
# multihomed1 1.2.3.4 {
# D 192.168.0.4
# C 1.2.3.4 }
# multihomed2 1.2.3.5 {
# D 192.168.0.5
# C 1.2.3.5
# C 127.0.0.1 }
# multihomed3 1.2.3.6 {
# 192.168.0.6
# 1.2.3.6 }
#}
# This configuration sets up three multihomed machines into a Spread segment.
# The first host has a 'main' IP address of 1.2.3.4 and listens for client
# connections only on that interface. All daemon-to-daemon UDP multicasts and
# the tokens and any monitor messages must use the 192.168.0.4 interface.
# The second host multihomed2 has a similar setup, except it also listens for
# client connections on the localhost interface as well as the 1.2.3.5 interface.
# If you make any use of the extra interface stanza ( a { } block ) then you must
# explicitly configure ALL interfaces you want as Spread removes all defaults when
# you use the explicit notation.
# The third multihomed3 host uses a shorthand form of omitting the D or C option and
# just listening for all types of traffic and events on both the 192.168.0 and 1.2.3
# networks. If no letter is listed before the interface address then ALL types of
# events are handled on that interface.

Просмотреть файл

@ -57,6 +57,7 @@ typedef int32_t orte_rmcast_tag_t;
#define ORTE_RMCAST_TAG_IOF 8
#define ORTE_RMCAST_TAG_DATA 9
#define ORTE_RMCAST_TAG_CMD_ACK 10
#define ORTE_RMCAST_TAG_HEARTBEAT 11
/* starting value for dynamically assignable tags */
#define ORTE_RMCAST_TAG_DYNAMIC 100

Просмотреть файл

@ -53,7 +53,6 @@ static opal_list_t channels;
static bool init_completed = false;
static orte_rmcast_channel_t next_channel;
static opal_pointer_array_t msg_log;
static char groups[256][MAX_GROUP_NAME];
static char private_group[MAX_GROUP_NAME];
static mailbox Mbox;
@ -1383,6 +1382,11 @@ cleanup:
}
static inline char * get_group_name(char groups[][MAX_GROUP_NAME], int indx)
{
return groups[indx];
}
static void recv_handler(int sd, short flags, void* cbdata)
{
uint8_t *data;
@ -1390,20 +1394,24 @@ static void recv_handler(int sd, short flags, void* cbdata)
rmcast_base_channel_t *chan = (rmcast_base_channel_t*)cbdata;
service srvc;
char sender[MAX_GROUP_NAME];
static void * groups;
static int size_groups;
int num_groups, size_data;
int16 mess_type;
int endian_mismatch;
if (!groups) {
size_groups = 1;
groups = malloc(size_groups*MAX_GROUP_NAME);
}
/* Read all available spread messages. */
while (SP_poll(sd) > 0) {
size_data = mca_rmcast_spread_component.max_msg_size;
data = (uint8_t*)malloc(size_data * sizeof(uint8_t));
srvc = 0;
do {
sz = SP_receive(sd, &srvc, sender, 256, &num_groups, groups, &mess_type, &endian_mismatch, size_data, (char *)data);
sz = SP_receive(sd, &srvc, sender, size_groups, &num_groups, groups, &mess_type, &endian_mismatch, size_data, (char *)data);
if (sz < 0) {
char error_string[1024];
@ -1411,13 +1419,18 @@ static void recv_handler(int sd, short flags, void* cbdata)
/* this shouldn't happen - report the errno */
opal_output(0, "%s Error on multicast recv spread event: %s(%d:%d:%d)",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), error_string, sz, num_groups, endian_mismatch);
switch (sz) {
case GROUPS_TOO_SHORT:
/*
* Just error out
* Number of groups required is "-num_groups" so we
* free the old groups array and malloc a new one of
* the right size (-num_groups)*MAX_GROUP_NAME.
*/
ORTE_ERROR_LOG(ORTE_ERR_TEMP_OUT_OF_RESOURCE);
exit(-1);
size_groups = -num_groups;
free(groups);
groups = malloc(size_groups*MAX_GROUP_NAME);
break;
case BUFFER_TOO_SHORT:
/*
@ -1443,16 +1456,17 @@ static void recv_handler(int sd, short flags, void* cbdata)
} while (sz < 0);
OPAL_OUTPUT_VERBOSE((2, orte_rmcast_base.rmcast_output,
"%s rmcast:spread recvd %d bytes from channel %d(%s)",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(int)sz, num_groups, groups[num_groups]));
(int)sz, num_groups, get_group_name(groups,0)));
if (Is_regular_mess(srvc)) {
int i;
for (i=0;i<num_groups;i++) {
chan = get_chan_from_name(groups[i]);
chan = get_chan_from_name(get_group_name(groups,i));
if (chan) {
OPAL_OUTPUT_VERBOSE((2, orte_rmcast_base.rmcast_output,
"%s rmcast:spread recvd %d bytes from channel %d(%s)",
@ -1470,7 +1484,7 @@ static void recv_handler(int sd, short flags, void* cbdata)
OPAL_OUTPUT_VERBOSE((2, orte_rmcast_base.rmcast_output,
"%s rmcast:spread recvd %d bytes from unknown channel named (%s)",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(int)sz, groups[i]));
(int)sz, get_group_name(groups,i)));
free(data);
}
}

Просмотреть файл

@ -16,6 +16,8 @@
#include "orte_config.h"
#include "orte/constants.h"
#include <sp.h>
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
@ -77,7 +79,16 @@ orte_rmcast_spread_component_open(void)
int orte_rmcast_spread_component_query(mca_base_module_t **module, int *priority)
{
/* if we built, then we probably want to be selected */
int major, minor, patch;
if (0 != SP_version(&major, &minor, &patch)) {
/* spread is not running, so we cannot be selected */
*priority = 0;
*module = NULL;
return ORTE_ERROR;
}
/* otherwise, we want to be selected */
*priority = 1000;
*module = (mca_base_module_t*)&orte_rmcast_spread_module;
return ORTE_SUCCESS;