Add some contributed examples of how to start and configure the spread library. Do a little more cleanup on the spread module, and ensure that it isn't selected if spread isn't running.

This commit was SVN r23101.
2010-05-04 23:44:00 +00:00 · 2010-05-04 23:44:00 +00:00 · 99f223210d
--- a/contrib/spread/spread-init.txt
+++ b/contrib/spread/spread-init.txt
@ -0,0 +1,20 @@
+#!/bin/bash
+
+case "$1" in
+'start')
+	mkdir -p /var/run/spread
+	cd /var/run/spread
+
+	spread&
+	;;
+
+'stop')
+	killall spread 2>/dev/null
+	;;
+
+*)
+	echo "Usage: $0 { start | stop }"
+	exit 1
+	;;
+esac
+exit 0
--- a/contrib/spread/spread.conf
+++ b/contrib/spread/spread.conf
@ -0,0 +1,244 @@
+# Blank lines are permitted in this file.
+# spread.conf sample file
+# 
+# questions to spread@spread.org
+#
+
+#MINIMAL REQUIRED FILE
+#
+# Spread should work fine on one machine with just the uncommented 
+# lines below. The rest of the file documents all the options and
+# more complex network setups.
+#
+# This configures one spread daemon running on port 4803 on localhost.
+
+Spread_Segment  172.16.174.255:4803 {
+
+	rmcast-1	172.16.174.129
+	rmcast-2	172.16.174.130
+	rmcast-3	172.16.174.131
+	rmcast-4	172.16.174.132
+}
+
+
+
+
+# Spread options
+#---------------------------------------------------------------------------
+#---------------------------------------------------------------------------
+#Set what internal Spread events are logged to the screen or file 
+# (see EventLogFile).
+# Default setting is to enable PRINT and EXIT events only. 
+#The PRINT and EXIT types should always be enabled. The names of others are:
+#    	EXIT PRINT DEBUG DATA_LINK NETWORK PROTOCOL SESSION 
+#	CONFIGURATION MEMBERSHIP FLOW_CONTROL STATUS EVENTS 
+#	GROUPS MEMORY SKIPLIST ALL NONE	
+#    ALL and NONE are special and represent either enabling every type 
+#                                           or enabling none of them.
+#    You can also use a "!" sign to negate a type, 
+#        so { ALL !DATA_LINK } means log all events except data_link ones.
+
+#DebugFlags = { PRINT EXIT }
+DebugFlags = { ALL }
+
+# Set priority level of events to output to log file or screen
+# The possible levels are: 
+#	pDEBUG INFO WARNING ERROR CRITICAL FATAL
+# Once selected all events tagged with that priority or higher will
+# be output. FATAL events are always output and cause the daemon to 
+# shut down. Some Events are tagged with a priority of PRINT which
+# causes them to print out no matter what priority level is set. 
+#
+# The default level used if nothing is set is INFO.
+	
+#EventPriority =  INFO
+
+#Set whether to log to a file as opposed to stdout/stderr and what 
+# file to log to.
+# Default is to log to stdout.
+#
+#If option is not set then logging is to stdout.
+#If option is set then logging is to the filename specified.
+# The filename can include a %h or %H escape that will be replaced at runtime
+# by the hostname of the machine upon which the daemon is running.
+# For example "EventLogFile = spreadlog_%h.log" with 2 machines 
+# running Spread (machine1.mydomain.com and machine2.mydomain.com) will
+# cause the daemons to log to "spreadlog_machine1.mydomain.com.log" and
+# "spreadlog_machine2.mydomain.com.log" respectively.
+
+#EventLogFile = testlog.out
+EventLogFile = spread_%h.log
+
+#Set whether to add a timestamp in front of all logged events or not.
+# Default is no timestamps. Default format is "[%a %d %b %Y %H:%M:%S]".
+#If option is commented out then no timestamp is added.
+#If option is enabled then a timestamp is added with the default format
+#If option is enabled and set equal to a string, then that string is used
+#   as the format string for the timestamp. The string must be a valid time
+#   format string as used by the strftime() function.
+
+#EventTimeStamp
+# or
+#EventTimeStamp = "[%a %d %b %Y %H:%M:%S]"
+EventTimeStamp = "[%a %d %b %Y %H:%M:%S]"
+
+#Set whether to add a precise (microsecond) resolution timestamp to all logged
+# events or not. This option requires that EventTimeStamp is also enabled. 
+# If the option is commented out then the microsecond timestamp is not added
+# If the option is uncommented then a microsecond time will print in addition
+#  to the H:M:S resolution timestamp provided by EventTimeStamp. 
+
+#EventPreciseTimeStamp
+
+# Set to initialize daemon sequence numbers to a 'large' number for testing
+# this is purely a debugging capability and should never be enabled on
+# production systems (note one side effect of enabling this is that 
+# your system will experience an extra daemon membership every few messages
+# so you REALLY do not want this turned on)
+# If you want to change the initial value the sequence number is set to
+# you need to edit the #define INITIAL_SEQUENCE_NEAR_WRAP at the top
+# of configuration.h
+
+#DebugInitialSequence
+
+#Set whether to allow dangerous monitor commands 
+# like "partition, flow_control, or kill"
+# Default setting is FALSE.
+#If option is set to false then only "safe" monitor commands are allowed 
+#    (such as requesting a status update).
+#If option is set to true then all monitor commands are enabled. 
+#   THIS IS A SECURTIY RISK IF YOUR NETWORK IS NOT PROTECTED!
+
+#DangerousMonitor = false
+DangerousMonitor = true
+
+#Set handling of SO_REUSEADDR socket option for the daemon's TCP
+# listener.  This is useful for facilitating quick daemon restarts (OSes
+# often hold onto the interface/port combination for a short period of time
+# after daemon shut down).
+#
+# AUTO - Active when bound to specific interfaces (default).
+# ON   - Always active, regardless of interface.
+#        SECURITY RISK FOR ANY OS WHICH ALLOW DOUBLE BINDS BY DIFFERENT USERS
+# OFF  - Always off.
+
+#SocketPortReuse = AUTO
+
+#Set what the maximum per-session queue should be for messages before disconnecting
+# a session. Spread will buffer upto that number of messages that are destined to the 
+# session, but that can not be delivered currently because the session is not reading fast enough. 
+# The compiled in default is usually 1000 if you havn't changed it in the spread_params.h file. 
+
+#MaxSessionMessages = 5000
+MaxSessionMessages = 5000
+
+#Sets the runtime directory used when the Spread daemon is run as root
+# as the directory to chroot to.  Defaults to the value of the
+# compile-time preprocessor define SP_RUNTIME_DIR, which is generally
+# "/var/run/spread".
+
+#RuntimeDir = /var/run/spread
+
+#Sets the unix user that the Spread daemon runs as (when launched as
+# the "root" user).  Not effective on a Windows system.  Defaults to
+# the user and group "spread".
+
+#DaemonUser = spread
+#DaemonGroup = spread
+
+
+#Set the list of authentication methods that the daemon will allow
+# and those which are required in all cases.
+# All of the methods listed in "RequiredAuthMethods" will be checked,
+# irregardless of what methods the client chooses.
+# Of the methods listed is "AllowedAuthMethods" the client is
+# permitted to choose one or more, and all the ones the client chooses
+# will also be checked.
+#
+# To support older clients, if NULL is enabled, then older clients can
+# connect without any authentication. Any methods which do not require
+# any interaction with the client (such as IP) can also be enabled
+# for older clients. If you enable methods that require interaction,
+# then essentially all older clients will be locked out.
+#
+#The current choices are:
+#	NULL for default, allow anyone authentication
+#	IP for IP based checks using the spread.access_ip file
+
+#RequiredAuthMethods = "   "
+#AllowedAuthMethods = "NULL"
+
+#Set the current access control policy.
+# This is only needed if you want to establish a customized policy.
+# The default policy is to allow any actions by authenticated clients.
+#AccessControlPolicy = "PERMIT"
+
+
+# network description line.
+# Spread_Segment <multicast address for subnet> <port> {
+# port is optional, if not specified the default 4803 port is used.
+
+#Spread_Segment  127.0.0.255:4803 {
+
+# either a name or IP address.  If both are given, than the name is taken 
+# as-is, and the IP address is used for that name.
+
+#	localhost		127.0.0.1
+#}
+# repeat for next sub-network
+
+#Spread_Segment x.2.2.255 {
+
+#	other1			128.2.2.10
+#				128.2.2.11
+#	other3.my.com
+#}
+# Spread will feel free to use broadcast messages within a sub-network.
+# if you do not want this to happen, you should specify your machines on
+# different logical sub-networks.
+
+# IP-Multicast addresses can also be used as the multicast address for
+# the logical sub-network as in this example. If IP-multicast is supported
+# by the operating system, then the messages will only be received
+# by those machines who are in the group and not by all others in the same
+# sub-network as happens with broadcast addresses
+
+#Spread_Segment 225.0.1.1:3333 {
+#	mcast1			1.2.3.4
+#	mcast2			1.2.3.6
+#}
+
+# Multi-homed host setup
+#
+# If you run Spread on hosts with multiple interfaces you may want to 
+# control which interfaces Spread uses for client connections and for
+# the daemon-to-daemon (and monitor control) messages. This can be done
+# by adding an extra stanza to each configured machine. 
+#
+#Sample:
+#
+#Spread_Segment 225.0.1.1 {
+# 	multihomed1		1.2.3.4 {
+#		D 192.168.0.4
+#		C 1.2.3.4 }
+#	multihomed2		1.2.3.5 {
+#		D 192.168.0.5
+#		C 1.2.3.5
+#		C 127.0.0.1 }
+#	multihomed3		1.2.3.6 {
+#		192.168.0.6
+#		1.2.3.6 }
+#}
+# This configuration sets up three multihomed machines into a Spread segment.
+# The first host has a 'main' IP address of 1.2.3.4 and listens for client
+# connections only on that interface. All daemon-to-daemon UDP multicasts and
+# the tokens and any monitor messages must use the 192.168.0.4 interface.
+# The second host multihomed2 has a similar setup, except it also listens for
+# client connections on the localhost interface as well as the 1.2.3.5 interface.
+# If you make any use of the extra interface stanza ( a { } block ) then you must
+# explicitly configure ALL interfaces you want as Spread removes all defaults when
+# you use the explicit notation.
+# The third multihomed3 host uses a shorthand form of omitting the D or C option and
+# just listening for all types of traffic and events on both the 192.168.0 and 1.2.3 
+# networks. If no letter is listed before the interface address then ALL types of 
+# events are handled on that interface.
--- a/orte/mca/rmcast/rmcast_types.h
+++ b/orte/mca/rmcast/rmcast_types.h
@ -57,6 +57,7 @@ typedef int32_t orte_rmcast_tag_t;
 #define ORTE_RMCAST_TAG_IOF          8
 #define ORTE_RMCAST_TAG_DATA         9
 #define ORTE_RMCAST_TAG_CMD_ACK     10
+#define ORTE_RMCAST_TAG_HEARTBEAT   11

 /* starting value for dynamically assignable tags */
 #define ORTE_RMCAST_TAG_DYNAMIC     100
--- a/orte/mca/rmcast/spread/rmcast_spread.c
+++ b/orte/mca/rmcast/spread/rmcast_spread.c
@ -53,7 +53,6 @@ static opal_list_t channels;
 static bool init_completed = false;
 static orte_rmcast_channel_t next_channel;
 static opal_pointer_array_t msg_log;
-static char groups[256][MAX_GROUP_NAME];

 static  char    private_group[MAX_GROUP_NAME];
 static  mailbox Mbox;
@ -1383,6 +1382,11 @@ cleanup:
 }


+static inline char * get_group_name(char groups[][MAX_GROUP_NAME], int indx)
+{
+    return groups[indx];
+}
+
 static void recv_handler(int sd, short flags, void* cbdata)
 {
    uint8_t *data;
@ -1390,20 +1394,24 @@ static void recv_handler(int sd, short flags, void* cbdata)
    rmcast_base_channel_t *chan = (rmcast_base_channel_t*)cbdata;
    service srvc;
    char sender[MAX_GROUP_NAME];
+    static void * groups;
+    static int size_groups;
    int   num_groups, size_data;
    int16 mess_type;
    int endian_mismatch;
    
+    if (!groups) {
+        size_groups = 1;
+        groups = malloc(size_groups*MAX_GROUP_NAME);
+    }
    /* Read all available spread messages. */
    while (SP_poll(sd) > 0) {
-        
        size_data = mca_rmcast_spread_component.max_msg_size;
        data = (uint8_t*)malloc(size_data * sizeof(uint8_t));
        
        srvc = 0;
        do {
-            sz = SP_receive(sd, &srvc, sender, 256, &num_groups, groups, &mess_type, &endian_mismatch, size_data, (char *)data);
-            
+            sz = SP_receive(sd, &srvc, sender, size_groups, &num_groups, groups, &mess_type, &endian_mismatch, size_data, (char *)data);
            if (sz < 0) {
                char error_string[1024];
                
@ -1411,13 +1419,18 @@ static void recv_handler(int sd, short flags, void* cbdata)
                /* this shouldn't happen - report the errno */
                opal_output(0, "%s Error on multicast recv spread event: %s(%d:%d:%d)",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), error_string, sz, num_groups, endian_mismatch);
+                
                switch (sz) {
+                        
                    case GROUPS_TOO_SHORT:
                        /*
-                         * Just error out
+                         * Number of groups required is "-num_groups" so we
+                         * free the old groups array and malloc a new one of
+                         * the right size (-num_groups)*MAX_GROUP_NAME.
                         */
-                        ORTE_ERROR_LOG(ORTE_ERR_TEMP_OUT_OF_RESOURCE);
-                        exit(-1);
+                        size_groups = -num_groups;
+                        free(groups);
+                        groups = malloc(size_groups*MAX_GROUP_NAME);
                        break;
                    case BUFFER_TOO_SHORT:
                        /*
@ -1443,16 +1456,17 @@ static void recv_handler(int sd, short flags, void* cbdata)
            
        } while (sz < 0);
        
+        
        OPAL_OUTPUT_VERBOSE((2, orte_rmcast_base.rmcast_output,
                             "%s rmcast:spread recvd %d bytes from channel %d(%s)",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
-                             (int)sz, num_groups, groups[num_groups]));
+                             (int)sz, num_groups, get_group_name(groups,0)));
        
        if (Is_regular_mess(srvc)) {
            int i;
            
            for (i=0;i<num_groups;i++) {
-                chan = get_chan_from_name(groups[i]);
+                chan = get_chan_from_name(get_group_name(groups,i));
                if (chan) {
                    OPAL_OUTPUT_VERBOSE((2, orte_rmcast_base.rmcast_output,
                                         "%s rmcast:spread recvd %d bytes from channel %d(%s)",
@ -1470,7 +1484,7 @@ static void recv_handler(int sd, short flags, void* cbdata)
                    OPAL_OUTPUT_VERBOSE((2, orte_rmcast_base.rmcast_output,
                                         "%s rmcast:spread recvd %d bytes from unknown channel named (%s)",
                                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
-                                         (int)sz, groups[i]));
+                                         (int)sz, get_group_name(groups,i)));
                    free(data);
                }
            }
--- a/orte/mca/rmcast/spread/rmcast_spread_component.c
+++ b/orte/mca/rmcast/spread/rmcast_spread_component.c
@ -16,6 +16,8 @@
 #include "orte_config.h"
 #include "orte/constants.h"

+#include <sp.h>
+
 #include "opal/mca/base/base.h"
 #include "opal/mca/base/mca_base_param.h"

@ -77,7 +79,16 @@ orte_rmcast_spread_component_open(void)

 int orte_rmcast_spread_component_query(mca_base_module_t **module, int *priority)
 {
-    /* if we built, then we probably want to be selected */
+    int major, minor, patch;
+    
+    if (0 != SP_version(&major, &minor, &patch)) {
+        /* spread is not running, so we cannot be selected */
+        *priority = 0;
+        *module = NULL;
+        return ORTE_ERROR;
+    }
+    
+    /* otherwise, we want to be selected */
    *priority = 1000;
    *module = (mca_base_module_t*)&orte_rmcast_spread_module;
    return ORTE_SUCCESS;