Add some contributed examples of how to start and configure the spread library. Do a little more cleanup on the spread module, and ensure that it isn't selected if spread isn't running.
This commit was SVN r23101.
Этот коммит содержится в:
родитель
359d7e122e
Коммит
99f223210d
20
contrib/spread/spread-init.txt
Обычный файл
20
contrib/spread/spread-init.txt
Обычный файл
@ -0,0 +1,20 @@
|
||||
#!/bin/bash
|
||||
|
||||
case "$1" in
|
||||
'start')
|
||||
mkdir -p /var/run/spread
|
||||
cd /var/run/spread
|
||||
|
||||
spread&
|
||||
;;
|
||||
|
||||
'stop')
|
||||
killall spread 2>/dev/null
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "Usage: $0 { start | stop }"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
exit 0
|
244
contrib/spread/spread.conf
Обычный файл
244
contrib/spread/spread.conf
Обычный файл
@ -0,0 +1,244 @@
|
||||
# Blank lines are permitted in this file.
|
||||
# spread.conf sample file
|
||||
#
|
||||
# questions to spread@spread.org
|
||||
#
|
||||
|
||||
#MINIMAL REQUIRED FILE
|
||||
#
|
||||
# Spread should work fine on one machine with just the uncommented
|
||||
# lines below. The rest of the file documents all the options and
|
||||
# more complex network setups.
|
||||
#
|
||||
# This configures one spread daemon running on port 4803 on localhost.
|
||||
|
||||
Spread_Segment 172.16.174.255:4803 {
|
||||
|
||||
rmcast-1 172.16.174.129
|
||||
rmcast-2 172.16.174.130
|
||||
rmcast-3 172.16.174.131
|
||||
rmcast-4 172.16.174.132
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
# Spread options
|
||||
#---------------------------------------------------------------------------
|
||||
#---------------------------------------------------------------------------
|
||||
#Set what internal Spread events are logged to the screen or file
|
||||
# (see EventLogFile).
|
||||
# Default setting is to enable PRINT and EXIT events only.
|
||||
#The PRINT and EXIT types should always be enabled. The names of others are:
|
||||
# EXIT PRINT DEBUG DATA_LINK NETWORK PROTOCOL SESSION
|
||||
# CONFIGURATION MEMBERSHIP FLOW_CONTROL STATUS EVENTS
|
||||
# GROUPS MEMORY SKIPLIST ALL NONE
|
||||
# ALL and NONE are special and represent either enabling every type
|
||||
# or enabling none of them.
|
||||
# You can also use a "!" sign to negate a type,
|
||||
# so { ALL !DATA_LINK } means log all events except data_link ones.
|
||||
|
||||
#DebugFlags = { PRINT EXIT }
|
||||
DebugFlags = { ALL }
|
||||
|
||||
# Set priority level of events to output to log file or screen
|
||||
# The possible levels are:
|
||||
# pDEBUG INFO WARNING ERROR CRITICAL FATAL
|
||||
# Once selected all events tagged with that priority or higher will
|
||||
# be output. FATAL events are always output and cause the daemon to
|
||||
# shut down. Some Events are tagged with a priority of PRINT which
|
||||
# causes them to print out no matter what priority level is set.
|
||||
#
|
||||
# The default level used if nothing is set is INFO.
|
||||
|
||||
#EventPriority = INFO
|
||||
|
||||
#Set whether to log to a file as opposed to stdout/stderr and what
|
||||
# file to log to.
|
||||
# Default is to log to stdout.
|
||||
#
|
||||
#If option is not set then logging is to stdout.
|
||||
#If option is set then logging is to the filename specified.
|
||||
# The filename can include a %h or %H escape that will be replaced at runtime
|
||||
# by the hostname of the machine upon which the daemon is running.
|
||||
# For example "EventLogFile = spreadlog_%h.log" with 2 machines
|
||||
# running Spread (machine1.mydomain.com and machine2.mydomain.com) will
|
||||
# cause the daemons to log to "spreadlog_machine1.mydomain.com.log" and
|
||||
# "spreadlog_machine2.mydomain.com.log" respectively.
|
||||
|
||||
#EventLogFile = testlog.out
|
||||
EventLogFile = spread_%h.log
|
||||
|
||||
#Set whether to add a timestamp in front of all logged events or not.
|
||||
# Default is no timestamps. Default format is "[%a %d %b %Y %H:%M:%S]".
|
||||
#If option is commented out then no timestamp is added.
|
||||
#If option is enabled then a timestamp is added with the default format
|
||||
#If option is enabled and set equal to a string, then that string is used
|
||||
# as the format string for the timestamp. The string must be a valid time
|
||||
# format string as used by the strftime() function.
|
||||
|
||||
#EventTimeStamp
|
||||
# or
|
||||
#EventTimeStamp = "[%a %d %b %Y %H:%M:%S]"
|
||||
EventTimeStamp = "[%a %d %b %Y %H:%M:%S]"
|
||||
|
||||
#Set whether to add a precise (microsecond) resolution timestamp to all logged
|
||||
# events or not. This option requires that EventTimeStamp is also enabled.
|
||||
# If the option is commented out then the microsecond timestamp is not added
|
||||
# If the option is uncommented then a microsecond time will print in addition
|
||||
# to the H:M:S resolution timestamp provided by EventTimeStamp.
|
||||
|
||||
#EventPreciseTimeStamp
|
||||
|
||||
# Set to initialize daemon sequence numbers to a 'large' number for testing
|
||||
# this is purely a debugging capability and should never be enabled on
|
||||
# production systems (note one side effect of enabling this is that
|
||||
# your system will experience an extra daemon membership every few messages
|
||||
# so you REALLY do not want this turned on)
|
||||
# If you want to change the initial value the sequence number is set to
|
||||
# you need to edit the #define INITIAL_SEQUENCE_NEAR_WRAP at the top
|
||||
# of configuration.h
|
||||
|
||||
#DebugInitialSequence
|
||||
|
||||
#Set whether to allow dangerous monitor commands
|
||||
# like "partition, flow_control, or kill"
|
||||
# Default setting is FALSE.
|
||||
#If option is set to false then only "safe" monitor commands are allowed
|
||||
# (such as requesting a status update).
|
||||
#If option is set to true then all monitor commands are enabled.
|
||||
# THIS IS A SECURTIY RISK IF YOUR NETWORK IS NOT PROTECTED!
|
||||
|
||||
#DangerousMonitor = false
|
||||
DangerousMonitor = true
|
||||
|
||||
#Set handling of SO_REUSEADDR socket option for the daemon's TCP
|
||||
# listener. This is useful for facilitating quick daemon restarts (OSes
|
||||
# often hold onto the interface/port combination for a short period of time
|
||||
# after daemon shut down).
|
||||
#
|
||||
# AUTO - Active when bound to specific interfaces (default).
|
||||
# ON - Always active, regardless of interface.
|
||||
# SECURITY RISK FOR ANY OS WHICH ALLOW DOUBLE BINDS BY DIFFERENT USERS
|
||||
# OFF - Always off.
|
||||
|
||||
#SocketPortReuse = AUTO
|
||||
|
||||
#Set what the maximum per-session queue should be for messages before disconnecting
|
||||
# a session. Spread will buffer upto that number of messages that are destined to the
|
||||
# session, but that can not be delivered currently because the session is not reading fast enough.
|
||||
# The compiled in default is usually 1000 if you havn't changed it in the spread_params.h file.
|
||||
|
||||
#MaxSessionMessages = 5000
|
||||
MaxSessionMessages = 5000
|
||||
|
||||
#Sets the runtime directory used when the Spread daemon is run as root
|
||||
# as the directory to chroot to. Defaults to the value of the
|
||||
# compile-time preprocessor define SP_RUNTIME_DIR, which is generally
|
||||
# "/var/run/spread".
|
||||
|
||||
#RuntimeDir = /var/run/spread
|
||||
|
||||
#Sets the unix user that the Spread daemon runs as (when launched as
|
||||
# the "root" user). Not effective on a Windows system. Defaults to
|
||||
# the user and group "spread".
|
||||
|
||||
#DaemonUser = spread
|
||||
#DaemonGroup = spread
|
||||
|
||||
|
||||
#Set the list of authentication methods that the daemon will allow
|
||||
# and those which are required in all cases.
|
||||
# All of the methods listed in "RequiredAuthMethods" will be checked,
|
||||
# irregardless of what methods the client chooses.
|
||||
# Of the methods listed is "AllowedAuthMethods" the client is
|
||||
# permitted to choose one or more, and all the ones the client chooses
|
||||
# will also be checked.
|
||||
#
|
||||
# To support older clients, if NULL is enabled, then older clients can
|
||||
# connect without any authentication. Any methods which do not require
|
||||
# any interaction with the client (such as IP) can also be enabled
|
||||
# for older clients. If you enable methods that require interaction,
|
||||
# then essentially all older clients will be locked out.
|
||||
#
|
||||
#The current choices are:
|
||||
# NULL for default, allow anyone authentication
|
||||
# IP for IP based checks using the spread.access_ip file
|
||||
|
||||
#RequiredAuthMethods = " "
|
||||
#AllowedAuthMethods = "NULL"
|
||||
|
||||
#Set the current access control policy.
|
||||
# This is only needed if you want to establish a customized policy.
|
||||
# The default policy is to allow any actions by authenticated clients.
|
||||
#AccessControlPolicy = "PERMIT"
|
||||
|
||||
|
||||
# network description line.
|
||||
# Spread_Segment <multicast address for subnet> <port> {
|
||||
# port is optional, if not specified the default 4803 port is used.
|
||||
|
||||
#Spread_Segment 127.0.0.255:4803 {
|
||||
|
||||
# either a name or IP address. If both are given, than the name is taken
|
||||
# as-is, and the IP address is used for that name.
|
||||
|
||||
# localhost 127.0.0.1
|
||||
#}
|
||||
# repeat for next sub-network
|
||||
|
||||
#Spread_Segment x.2.2.255 {
|
||||
|
||||
# other1 128.2.2.10
|
||||
# 128.2.2.11
|
||||
# other3.my.com
|
||||
#}
|
||||
# Spread will feel free to use broadcast messages within a sub-network.
|
||||
# if you do not want this to happen, you should specify your machines on
|
||||
# different logical sub-networks.
|
||||
|
||||
# IP-Multicast addresses can also be used as the multicast address for
|
||||
# the logical sub-network as in this example. If IP-multicast is supported
|
||||
# by the operating system, then the messages will only be received
|
||||
# by those machines who are in the group and not by all others in the same
|
||||
# sub-network as happens with broadcast addresses
|
||||
|
||||
#Spread_Segment 225.0.1.1:3333 {
|
||||
# mcast1 1.2.3.4
|
||||
# mcast2 1.2.3.6
|
||||
#}
|
||||
|
||||
# Multi-homed host setup
|
||||
#
|
||||
# If you run Spread on hosts with multiple interfaces you may want to
|
||||
# control which interfaces Spread uses for client connections and for
|
||||
# the daemon-to-daemon (and monitor control) messages. This can be done
|
||||
# by adding an extra stanza to each configured machine.
|
||||
#
|
||||
#Sample:
|
||||
#
|
||||
#Spread_Segment 225.0.1.1 {
|
||||
# multihomed1 1.2.3.4 {
|
||||
# D 192.168.0.4
|
||||
# C 1.2.3.4 }
|
||||
# multihomed2 1.2.3.5 {
|
||||
# D 192.168.0.5
|
||||
# C 1.2.3.5
|
||||
# C 127.0.0.1 }
|
||||
# multihomed3 1.2.3.6 {
|
||||
# 192.168.0.6
|
||||
# 1.2.3.6 }
|
||||
#}
|
||||
# This configuration sets up three multihomed machines into a Spread segment.
|
||||
# The first host has a 'main' IP address of 1.2.3.4 and listens for client
|
||||
# connections only on that interface. All daemon-to-daemon UDP multicasts and
|
||||
# the tokens and any monitor messages must use the 192.168.0.4 interface.
|
||||
# The second host multihomed2 has a similar setup, except it also listens for
|
||||
# client connections on the localhost interface as well as the 1.2.3.5 interface.
|
||||
# If you make any use of the extra interface stanza ( a { } block ) then you must
|
||||
# explicitly configure ALL interfaces you want as Spread removes all defaults when
|
||||
# you use the explicit notation.
|
||||
# The third multihomed3 host uses a shorthand form of omitting the D or C option and
|
||||
# just listening for all types of traffic and events on both the 192.168.0 and 1.2.3
|
||||
# networks. If no letter is listed before the interface address then ALL types of
|
||||
# events are handled on that interface.
|
@ -57,6 +57,7 @@ typedef int32_t orte_rmcast_tag_t;
|
||||
#define ORTE_RMCAST_TAG_IOF 8
|
||||
#define ORTE_RMCAST_TAG_DATA 9
|
||||
#define ORTE_RMCAST_TAG_CMD_ACK 10
|
||||
#define ORTE_RMCAST_TAG_HEARTBEAT 11
|
||||
|
||||
/* starting value for dynamically assignable tags */
|
||||
#define ORTE_RMCAST_TAG_DYNAMIC 100
|
||||
|
@ -53,7 +53,6 @@ static opal_list_t channels;
|
||||
static bool init_completed = false;
|
||||
static orte_rmcast_channel_t next_channel;
|
||||
static opal_pointer_array_t msg_log;
|
||||
static char groups[256][MAX_GROUP_NAME];
|
||||
|
||||
static char private_group[MAX_GROUP_NAME];
|
||||
static mailbox Mbox;
|
||||
@ -1383,6 +1382,11 @@ cleanup:
|
||||
}
|
||||
|
||||
|
||||
static inline char * get_group_name(char groups[][MAX_GROUP_NAME], int indx)
|
||||
{
|
||||
return groups[indx];
|
||||
}
|
||||
|
||||
static void recv_handler(int sd, short flags, void* cbdata)
|
||||
{
|
||||
uint8_t *data;
|
||||
@ -1390,20 +1394,24 @@ static void recv_handler(int sd, short flags, void* cbdata)
|
||||
rmcast_base_channel_t *chan = (rmcast_base_channel_t*)cbdata;
|
||||
service srvc;
|
||||
char sender[MAX_GROUP_NAME];
|
||||
static void * groups;
|
||||
static int size_groups;
|
||||
int num_groups, size_data;
|
||||
int16 mess_type;
|
||||
int endian_mismatch;
|
||||
|
||||
if (!groups) {
|
||||
size_groups = 1;
|
||||
groups = malloc(size_groups*MAX_GROUP_NAME);
|
||||
}
|
||||
/* Read all available spread messages. */
|
||||
while (SP_poll(sd) > 0) {
|
||||
|
||||
size_data = mca_rmcast_spread_component.max_msg_size;
|
||||
data = (uint8_t*)malloc(size_data * sizeof(uint8_t));
|
||||
|
||||
srvc = 0;
|
||||
do {
|
||||
sz = SP_receive(sd, &srvc, sender, 256, &num_groups, groups, &mess_type, &endian_mismatch, size_data, (char *)data);
|
||||
|
||||
sz = SP_receive(sd, &srvc, sender, size_groups, &num_groups, groups, &mess_type, &endian_mismatch, size_data, (char *)data);
|
||||
if (sz < 0) {
|
||||
char error_string[1024];
|
||||
|
||||
@ -1411,13 +1419,18 @@ static void recv_handler(int sd, short flags, void* cbdata)
|
||||
/* this shouldn't happen - report the errno */
|
||||
opal_output(0, "%s Error on multicast recv spread event: %s(%d:%d:%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), error_string, sz, num_groups, endian_mismatch);
|
||||
|
||||
switch (sz) {
|
||||
|
||||
case GROUPS_TOO_SHORT:
|
||||
/*
|
||||
* Just error out
|
||||
* Number of groups required is "-num_groups" so we
|
||||
* free the old groups array and malloc a new one of
|
||||
* the right size (-num_groups)*MAX_GROUP_NAME.
|
||||
*/
|
||||
ORTE_ERROR_LOG(ORTE_ERR_TEMP_OUT_OF_RESOURCE);
|
||||
exit(-1);
|
||||
size_groups = -num_groups;
|
||||
free(groups);
|
||||
groups = malloc(size_groups*MAX_GROUP_NAME);
|
||||
break;
|
||||
case BUFFER_TOO_SHORT:
|
||||
/*
|
||||
@ -1443,16 +1456,17 @@ static void recv_handler(int sd, short flags, void* cbdata)
|
||||
|
||||
} while (sz < 0);
|
||||
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_rmcast_base.rmcast_output,
|
||||
"%s rmcast:spread recvd %d bytes from channel %d(%s)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(int)sz, num_groups, groups[num_groups]));
|
||||
(int)sz, num_groups, get_group_name(groups,0)));
|
||||
|
||||
if (Is_regular_mess(srvc)) {
|
||||
int i;
|
||||
|
||||
for (i=0;i<num_groups;i++) {
|
||||
chan = get_chan_from_name(groups[i]);
|
||||
chan = get_chan_from_name(get_group_name(groups,i));
|
||||
if (chan) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_rmcast_base.rmcast_output,
|
||||
"%s rmcast:spread recvd %d bytes from channel %d(%s)",
|
||||
@ -1470,7 +1484,7 @@ static void recv_handler(int sd, short flags, void* cbdata)
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_rmcast_base.rmcast_output,
|
||||
"%s rmcast:spread recvd %d bytes from unknown channel named (%s)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(int)sz, groups[i]));
|
||||
(int)sz, get_group_name(groups,i)));
|
||||
free(data);
|
||||
}
|
||||
}
|
||||
|
@ -16,6 +16,8 @@
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include <sp.h>
|
||||
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
|
||||
@ -77,7 +79,16 @@ orte_rmcast_spread_component_open(void)
|
||||
|
||||
int orte_rmcast_spread_component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
/* if we built, then we probably want to be selected */
|
||||
int major, minor, patch;
|
||||
|
||||
if (0 != SP_version(&major, &minor, &patch)) {
|
||||
/* spread is not running, so we cannot be selected */
|
||||
*priority = 0;
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
/* otherwise, we want to be selected */
|
||||
*priority = 1000;
|
||||
*module = (mca_base_module_t*)&orte_rmcast_spread_module;
|
||||
return ORTE_SUCCESS;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user