1
1

Add a new TCP module to the reliable multicast framework. This module uses ORTE's grpcomm.xcast functionality to "fake" multicasts for environments where regular multicast isn't reliable.

Modify the startup logic to allow for this use-case.

This commit was SVN r22310.
Этот коммит содержится в:
Ralph Castain 2009-12-15 01:18:27 +00:00
родитель 4f68dfb03c
Коммит 9acec283af
12 изменённых файлов: 1365 добавлений и 76 удалений

Просмотреть файл

@ -66,20 +66,6 @@ int orte_ess_base_app_setup(void)
/* Setup the communication infrastructure */
/* start with multicast */
#if ORTE_ENABLE_MULTICAST
if (ORTE_SUCCESS != (ret = orte_rmcast_base_open())) {
ORTE_ERROR_LOG(ret);
error = "orte_rmcast_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_rmcast_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_rmcast_base_select";
goto error;
}
#endif
/* Runtime Messaging Layer */
if (ORTE_SUCCESS != (ret = orte_rml_base_open())) {
ORTE_ERROR_LOG(ret);
@ -117,6 +103,20 @@ int orte_ess_base_app_setup(void)
goto error;
}
/* multicast */
#if ORTE_ENABLE_MULTICAST
if (ORTE_SUCCESS != (ret = orte_rmcast_base_open())) {
ORTE_ERROR_LOG(ret);
error = "orte_rmcast_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_rmcast_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_rmcast_base_select";
goto error;
}
#endif
/* non-daemon/HNP apps can only have the default proxy PLM
* module open - provide a chance for it to initialize
*/
@ -235,16 +235,16 @@ int orte_ess_base_app_finalize(void)
orte_wait_finalize();
/* now can close the rml and its friendly group comm */
orte_grpcomm_base_close();
orte_routed_base_close();
orte_rml_base_close();
/* close the multicast */
#if ORTE_ENABLE_MULTICAST
orte_rmcast_base_close();
#endif
/* now can close the rml and its friendly group comm */
orte_grpcomm_base_close();
orte_routed_base_close();
orte_rml_base_close();
orte_session_dir_finalize(ORTE_PROC_MY_NAME);
return ORTE_SUCCESS;

Просмотреть файл

@ -161,20 +161,6 @@ int orte_ess_base_orted_setup(char **hosts)
/* Setup the communication infrastructure */
/* start with multicast */
#if ORTE_ENABLE_MULTICAST
if (ORTE_SUCCESS != (ret = orte_rmcast_base_open())) {
ORTE_ERROR_LOG(ret);
error = "orte_rmcast_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_rmcast_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_rmcast_base_select";
goto error;
}
#endif
/* Runtime Messaging Layer - this opens/selects the OOB as well */
if (ORTE_SUCCESS != (ret = orte_rml_base_open())) {
ORTE_ERROR_LOG(ret);
@ -212,6 +198,20 @@ int orte_ess_base_orted_setup(char **hosts)
goto error;
}
/* multicast */
#if ORTE_ENABLE_MULTICAST
if (ORTE_SUCCESS != (ret = orte_rmcast_base_open())) {
ORTE_ERROR_LOG(ret);
error = "orte_rmcast_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_rmcast_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_rmcast_base_select";
goto error;
}
#endif
/* Open/select the odls */
if (ORTE_SUCCESS != (ret = orte_odls_base_open())) {
ORTE_ERROR_LOG(ret);

Просмотреть файл

@ -43,6 +43,9 @@
#include "orte/util/proc_info.h"
#include "orte/util/session_dir.h"
#include "orte/util/show_help.h"
#if ORTE_ENABLE_MULTICAST
#include "orte/mca/rmcast/base/base.h"
#endif
#include "orte/runtime/orte_cr.h"
#include "orte/runtime/orte_globals.h"
@ -89,6 +92,20 @@ int orte_ess_base_tool_setup(void)
goto error;
}
/* multicast */
#if ORTE_ENABLE_MULTICAST
if (ORTE_SUCCESS != (ret = orte_rmcast_base_open())) {
ORTE_ERROR_LOG(ret);
error = "orte_rmcast_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_rmcast_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_rmcast_base_select";
goto error;
}
#endif
/* since I am a tool, then all I really want to do is communicate.
* So setup communications and be done - finding the HNP
* to which I want to communicate and setting up a route for

Просмотреть файл

@ -91,26 +91,29 @@ static int rte_init(void)
goto error;
}
/* open the reliable multicast framework, just in
* case we need it to query the HNP for a name
*/
if (ORTE_SUCCESS != (ret = orte_rmcast_base_open())) {
ORTE_ERROR_LOG(ret);
error = "orte_rmcast_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_rmcast_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_rmcast_base_select";
goto error;
}
if (ORTE_PROC_IS_DAEMON) {
/* get a name for ourselves */
if (ORTE_SUCCESS != (ret = cm_set_name())) {
error = "set_name";
goto error;
/* if we do not know the HNP, then we have to
* use the multicast system to find it
*/
if (NULL == orte_process_info.my_hnp_uri) {
/* open the reliable multicast framework */
if (ORTE_SUCCESS != (ret = orte_rmcast_base_open())) {
ORTE_ERROR_LOG(ret);
error = "orte_rmcast_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_rmcast_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_rmcast_base_select";
goto error;
}
/* get a name for ourselves */
if (ORTE_SUCCESS != (ret = cm_set_name())) {
error = "set_name";
goto error;
}
}
/* get the list of nodes used for this job */
@ -148,10 +151,27 @@ static int rte_init(void)
*/
orte_plm_base_close();
/* checkin with the HNP */
if (ORTE_SUCCESS != (ret = cm_set_name())) {
error = "set_name";
goto error;
/* if we do not know the HNP, then we have to use
* the multicast system to find it
*/
if (NULL == orte_process_info.my_hnp_uri) {
/* open the reliable multicast framework */
if (ORTE_SUCCESS != (ret = orte_rmcast_base_open())) {
ORTE_ERROR_LOG(ret);
error = "orte_rmcast_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_rmcast_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_rmcast_base_select";
goto error;
}
/* checkin with the HNP */
if (ORTE_SUCCESS != (ret = cm_set_name())) {
error = "set_name";
goto error;
}
}
/* do the rest of the standard tool init */

Просмотреть файл

@ -206,20 +206,6 @@ static int rte_init(void)
/* Setup the communication infrastructure */
/* start with multicast */
#if ORTE_ENABLE_MULTICAST
if (ORTE_SUCCESS != (ret = orte_rmcast_base_open())) {
ORTE_ERROR_LOG(ret);
error = "orte_rmcast_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_rmcast_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_rmcast_base_select";
goto error;
}
#endif
/*
* Runtime Messaging Layer
*/
@ -260,6 +246,20 @@ static int rte_init(void)
goto error;
}
/* multicast */
#if ORTE_ENABLE_MULTICAST
if (ORTE_SUCCESS != (ret = orte_rmcast_base_open())) {
ORTE_ERROR_LOG(ret);
error = "orte_rmcast_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_rmcast_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_rmcast_base_select";
goto error;
}
#endif
/* Now provide a chance for the PLM
* to perform any module-specific init functions. This
* needs to occur AFTER the communications are setup
@ -638,16 +638,16 @@ static int rte_finalize(void)
orte_plm_base_close();
orte_errmgr_base_close();
/* now can close the rml and its friendly group comm */
orte_grpcomm_base_close();
orte_routed_base_close();
orte_rml_base_close();
/* close the multicast */
#if ORTE_ENABLE_MULTICAST
orte_rmcast_base_close();
#endif
/* now can close the rml and its friendly group comm */
orte_grpcomm_base_close();
orte_routed_base_close();
orte_rml_base_close();
/* if we were doing timing studies, close the timing file */
if (orte_timing) {
if (stdout != orte_timing_output &&

Просмотреть файл

@ -65,6 +65,7 @@ typedef uint8_t orte_rmcast_flag_t;
typedef uint32_t orte_rmcast_seq_t;
#define ORTE_RMCAST_SEQ_MAX UINT32_MAX-1
#define ORTE_RMCAST_SEQ_INVALID UINT32_MAX
#define ORTE_RMCAST_SEQ_T OPAL_UINT32
END_C_DECLS

35
orte/mca/rmcast/tcp/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,35 @@
#
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_rmcast_tcp_DSO
component_noinst =
component_install = mca_rmcast_tcp.la
else
component_noinst = libmca_rmcast_tcp.la
component_install =
endif
rmcast_tcp_SOURCES = \
rmcast_tcp.c \
rmcast_tcp.h \
rmcast_tcp_component.c
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_rmcast_tcp_la_SOURCES = $(rmcast_tcp_SOURCES)
mca_rmcast_tcp_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_rmcast_tcp_la_SOURCES = $(rmcast_tcp_SOURCES)
libmca_rmcast_tcp_la_LIBADD =
libmca_rmcast_tcp_la_LDFLAGS = -module -avoid-version

13
orte/mca/rmcast/tcp/configure.params Обычный файл
Просмотреть файл

@ -0,0 +1,13 @@
# -*- shell-script -*-
#
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Specific to this module
PARAM_CONFIG_FILES="Makefile"

1079
orte/mca/rmcast/tcp/rmcast_tcp.c Обычный файл

Разница между файлами не показана из-за своего большого размера Загрузить разницу

32
orte/mca/rmcast/tcp/rmcast_tcp.h Обычный файл
Просмотреть файл

@ -0,0 +1,32 @@
/*
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
*/
#ifndef ORTE_RMCAST_TCP_H
#define ORTE_RMCAST_TCP_H
#include "orte_config.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "orte/mca/rmcast/rmcast.h"
BEGIN_C_DECLS
ORTE_MODULE_DECLSPEC extern orte_rmcast_base_component_t mca_rmcast_tcp_component;
extern orte_rmcast_module_t orte_rmcast_tcp_module;
END_C_DECLS
#endif

90
orte/mca/rmcast/tcp/rmcast_tcp_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,90 @@
/*
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "opal/mca/base/base.h"
#include "orte/util/proc_info.h"
#include "orte/runtime/orte_globals.h"
#include "rmcast_tcp.h"
/*
* Local functions
*/
static int orte_rmcast_tcp_open(void);
static int orte_rmcast_tcp_close(void);
static int orte_rmcast_tcp_query(mca_base_module_t **module, int *priority);
/*
* Local variables
*/
static bool initialized = false;
/*
* Public string showing the iof hnp component version number
*/
const char *mca_rmcast_tcp_component_version_string =
"Open MPI tcp rmcast MCA component version " ORTE_VERSION;
orte_rmcast_base_component_t mca_rmcast_tcp_component = {
{
ORTE_RMCAST_BASE_VERSION_1_0_0,
"tcp", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
/* Component open, close, and query functions */
orte_rmcast_tcp_open,
orte_rmcast_tcp_close,
orte_rmcast_tcp_query
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
};
/**
* component open/close/init function
*/
static int orte_rmcast_tcp_open(void)
{
return ORTE_SUCCESS;
}
static int orte_rmcast_tcp_close(void)
{
return ORTE_SUCCESS;
}
/**
* Module query
*/
static int orte_rmcast_tcp_query(mca_base_module_t **module, int *priority)
{
if (!ORTE_PROC_IS_HNP && NULL == orte_process_info.my_hnp_uri) {
/* cannot operate */
*priority = 0;
*module = NULL;
return ORTE_ERROR;
}
/* selected by choice */
*priority = 0;
*module = (mca_base_module_t *) &orte_rmcast_tcp_module;
initialized = true;
return ORTE_SUCCESS;
}

Просмотреть файл

@ -171,6 +171,8 @@ ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_msg_packet_t);
/* bootstrap */
#define ORTE_RML_TAG_BOOTSTRAP 35
/* TCP "fake" multicast */
#define ORTE_RML_TAG_MULTICAST 36
#define ORTE_RML_TAG_MAX 100