Rename the routed modules to more accurately reflect what they do and the role they will play in soon-to-come updates.
Add two new API's to the routed framework - stub them out so that collaborators can work on them in various components without conflicts. Remove a "finalize" from the select function that could cause problems as the component had not had its initialize called yet. This commit was SVN r18369.
Этот коммит содержится в:
родитель
f5311903ee
Коммит
b8bb990acf
@ -93,13 +93,6 @@ orte_routed_base_select(void)
|
||||
|
||||
if (priority > selected_priority) {
|
||||
/* Otherwise this is a normal module and subject to normal selection */
|
||||
if (NULL != selected_module && NULL != selected_module->finalize) {
|
||||
opal_output_verbose(10, orte_routed_base_output,
|
||||
"orte_routed_base_select: component %s deselected - finalizing",
|
||||
selected_component->routed_version.mca_component_name);
|
||||
selected_module->finalize();
|
||||
}
|
||||
|
||||
selected_priority = priority;
|
||||
selected_component = component;
|
||||
selected_module = module;
|
||||
@ -126,7 +119,7 @@ orte_routed_base_select(void)
|
||||
|
||||
/* initialize the selected component */
|
||||
opal_output_verbose(10, orte_routed_base_output,
|
||||
"orte_routed_base_select: initializing selectedl component %s",
|
||||
"orte_routed_base_select: initializing selected component %s",
|
||||
selected_component->routed_version.mca_component_name);
|
||||
if (ORTE_SUCCESS != orte_routed.initialize()) {
|
||||
return ORTE_ERROR;
|
||||
|
@ -9,28 +9,28 @@
|
||||
#
|
||||
|
||||
sources = \
|
||||
routed_tree.h \
|
||||
routed_tree.c \
|
||||
routed_tree_component.c
|
||||
routed_direct.h \
|
||||
routed_direct_component.c \
|
||||
routed_direct.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if OMPI_BUILD_routed_tree_DSO
|
||||
if OMPI_BUILD_routed_direct_DSO
|
||||
component_noinst =
|
||||
component_install = mca_routed_tree.la
|
||||
component_install = mca_routed_direct.la
|
||||
else
|
||||
component_noinst = libmca_routed_tree.la
|
||||
component_noinst = libmca_routed_direct.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_routed_tree_la_SOURCES = $(sources)
|
||||
mca_routed_tree_la_LDFLAGS = -module -avoid-version
|
||||
mca_routed_direct_la_SOURCES = $(sources)
|
||||
mca_routed_direct_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_routed_tree_la_SOURCES = $(sources)
|
||||
libmca_routed_tree_la_LDFLAGS = -module -avoid-version
|
||||
libmca_routed_direct_la_SOURCES = $(sources)
|
||||
libmca_routed_direct_la_LDFLAGS = -module -avoid-version
|
||||
|
@ -30,7 +30,7 @@
|
||||
#include "orte/mca/rml/base/rml_contact.h"
|
||||
|
||||
#include "orte/mca/routed/base/base.h"
|
||||
#include "routed_unity.h"
|
||||
#include "routed_direct.h"
|
||||
|
||||
static opal_condition_t cond;
|
||||
static opal_mutex_t lock;
|
||||
@ -43,24 +43,30 @@ static int update_route(orte_process_name_t *target,
|
||||
static orte_process_name_t get_route(orte_process_name_t *target);
|
||||
static int init_routes(orte_jobid_t job, opal_buffer_t *ndat);
|
||||
static int route_lost(const orte_process_name_t *route);
|
||||
static bool route_is_defined(const orte_process_name_t *target);
|
||||
static int update_routing_tree(void);
|
||||
static orte_vpid_t get_routing_tree(opal_list_t *children);
|
||||
static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf);
|
||||
|
||||
#if OPAL_ENABLE_FT == 1
|
||||
static int unity_ft_event(int state);
|
||||
static int direct_ft_event(int state);
|
||||
#endif
|
||||
|
||||
static orte_process_name_t *lifeline=NULL;
|
||||
|
||||
orte_routed_module_t orte_routed_unity_module = {
|
||||
orte_routed_module_t orte_routed_direct_module = {
|
||||
init,
|
||||
finalize,
|
||||
update_route,
|
||||
get_route,
|
||||
init_routes,
|
||||
route_lost,
|
||||
route_is_defined,
|
||||
update_routing_tree,
|
||||
get_routing_tree,
|
||||
get_wireup_info,
|
||||
#if OPAL_ENABLE_FT == 1
|
||||
unity_ft_event
|
||||
direct_ft_event
|
||||
#else
|
||||
NULL
|
||||
#endif
|
||||
@ -143,7 +149,7 @@ static int update_route(orte_process_name_t *target,
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_unity_update: diff job family routing %s --> %s",
|
||||
"%s routed_direct_update: diff job family routing %s --> %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(target),
|
||||
ORTE_NAME_PRINT(route)));
|
||||
@ -165,7 +171,7 @@ static int update_route(orte_process_name_t *target,
|
||||
direct:
|
||||
/* if it came from our own job family or was direct, there is nothing to do */
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_unity_update: %s --> %s",
|
||||
"%s routed_direct_update: %s --> %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(target),
|
||||
ORTE_NAME_PRINT(route)));
|
||||
@ -202,7 +208,7 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
||||
|
||||
found:
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_routed_base_output,
|
||||
"%s routed_unity_get(%s) --> %s",
|
||||
"%s routed_direct_get(%s) --> %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(target),
|
||||
ORTE_NAME_PRINT(ret)));
|
||||
@ -231,7 +237,7 @@ static int process_callback(orte_jobid_t job, opal_buffer_t *buffer)
|
||||
while (ORTE_SUCCESS == (rc = opal_dss.unpack(buffer, &rml_uri, &cnt, OPAL_STRING))) {
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
|
||||
"%s routed_unity:callback got uri %s",
|
||||
"%s routed_direct:callback got uri %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == rml_uri) ? "NULL" : rml_uri));
|
||||
|
||||
@ -270,7 +276,7 @@ static int process_callback(orte_jobid_t job, opal_buffer_t *buffer)
|
||||
if (jdata->num_reported == jdata->num_procs) {
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_unity:callback trigger fired on job %s",
|
||||
"%s routed_direct:callback trigger fired on job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid)));
|
||||
|
||||
/* update the job state */
|
||||
@ -300,7 +306,7 @@ static int process_callback(orte_jobid_t job, opal_buffer_t *buffer)
|
||||
|
||||
static int init_routes(orte_jobid_t job, opal_buffer_t *ndata)
|
||||
{
|
||||
/* the unity module just sends direct to everyone, so it requires
|
||||
/* the direct module just sends direct to everyone, so it requires
|
||||
* that the RML get loaded with contact info from all of our peers.
|
||||
* We also look for and provide contact info for our local daemon
|
||||
* so we can use it if needed
|
||||
@ -316,12 +322,12 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndata)
|
||||
int rc;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_unity: init routes for daemon job %s\n\thnp_uri %s",
|
||||
"%s routed_direct: init routes for daemon job %s\n\thnp_uri %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job),
|
||||
(NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri));
|
||||
if (NULL == ndata) {
|
||||
/* indicates this is being called during orte_init.
|
||||
* since the daemons in the unity component don't route messages,
|
||||
* since the daemons in the direct component don't route messages,
|
||||
* there is nothing for them to do - daemons will send their
|
||||
* contact info as part of the message confirming they are ready
|
||||
* to go. Just get the HNP's name for possible later use
|
||||
@ -343,7 +349,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndata)
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* we don't have to update the route as the unity component is
|
||||
/* we don't have to update the route as the direct component is
|
||||
* always "direct"
|
||||
*/
|
||||
|
||||
@ -372,7 +378,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndata)
|
||||
* shouldn't get called during daemon startup. This situation
|
||||
* would occur, though, when we are doing orte_init within the HNP
|
||||
* itself, but we store our data during orte_init anyway
|
||||
* However, for the unity component, I do have to make myself
|
||||
* However, for the direct component, I do have to make myself
|
||||
* available for processing incoming rml contact info messages
|
||||
* from the procs - so setup that receive here
|
||||
*/
|
||||
@ -413,7 +419,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndata)
|
||||
orte_rml_cmd_flag_t command;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_unity: init routes w/non-NULL data",
|
||||
"%s routed_direct: init routes w/non-NULL data",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* extract the RML command from the buffer and discard it - this
|
||||
@ -442,7 +448,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndata)
|
||||
opal_buffer_t buf;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_unity: init routes for proc job %s\n\thnp_uri %s\n\tdaemon uri %s",
|
||||
"%s routed_direct: init routes for proc job %s\n\thnp_uri %s\n\tdaemon uri %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job),
|
||||
(NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri,
|
||||
(NULL == orte_process_info.my_daemon_uri) ? "NULL" : orte_process_info.my_daemon_uri));
|
||||
@ -470,14 +476,14 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndata)
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* we don't have to update the route as the unity component is
|
||||
/* we don't have to update the route as the direct component is
|
||||
* always "direct"
|
||||
*/
|
||||
}
|
||||
|
||||
/* setup the hnp - this must always be provided, so
|
||||
* error if it isn't there as we won't know how to complete
|
||||
* the wireup for the unity component
|
||||
* the wireup for the direct component
|
||||
*/
|
||||
if (NULL == orte_process_info.my_hnp_uri) {
|
||||
/* fatal error */
|
||||
@ -486,7 +492,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndata)
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
|
||||
"%s routed_unity_init: set hnp contact info and name",
|
||||
"%s routed_direct_init: set hnp contact info and name",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* set the contact info into the hash table */
|
||||
@ -507,12 +513,12 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndata)
|
||||
*/
|
||||
lifeline = ORTE_PROC_MY_HNP;
|
||||
|
||||
/* we don't have to update the route as the unity component is
|
||||
/* we don't have to update the route as the direct component is
|
||||
* always "direct"
|
||||
*/
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
|
||||
"%s routed_unity_init: register sync",
|
||||
"%s routed_direct_init: register sync",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* register myself to require that I finalize before exiting
|
||||
@ -525,7 +531,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndata)
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
|
||||
"%s routed_unity_init: wait to recv contact info for peers",
|
||||
"%s routed_direct_init: wait to recv contact info for peers",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* now setup a blocking receive and wait right here until we get
|
||||
@ -540,7 +546,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndata)
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
|
||||
"%s routed_unity_init: peer contact info recvd",
|
||||
"%s routed_direct_init: peer contact info recvd",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* process it */
|
||||
@ -566,7 +572,7 @@ static int route_lost(const orte_process_name_t *route)
|
||||
if (!orte_finalizing &&
|
||||
NULL != lifeline &&
|
||||
OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, route, lifeline)) {
|
||||
opal_output(0, "%s routed:unity: Connection to lifeline %s lost",
|
||||
opal_output(0, "%s routed:direct: Connection to lifeline %s lost",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(lifeline));
|
||||
return ORTE_ERR_FATAL;
|
||||
@ -577,6 +583,25 @@ static int route_lost(const orte_process_name_t *route)
|
||||
}
|
||||
|
||||
|
||||
/******* stub functions - to be implemented ******/
|
||||
static bool route_is_defined(const orte_process_name_t *target)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static int update_routing_tree(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static orte_vpid_t get_routing_tree(opal_list_t *children)
|
||||
{
|
||||
return ORTE_VPID_INVALID;
|
||||
}
|
||||
|
||||
/*************************************/
|
||||
|
||||
|
||||
static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf)
|
||||
{
|
||||
int rc;
|
||||
@ -591,7 +616,7 @@ static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf)
|
||||
}
|
||||
|
||||
#if OPAL_ENABLE_FT == 1
|
||||
static int unity_ft_event(int state)
|
||||
static int direct_ft_event(int state)
|
||||
{
|
||||
int ret, exit_status = ORTE_SUCCESS;
|
||||
|
@ -8,8 +8,8 @@
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MCA_ROUTED_TREE_ROUTED_TREE_H
|
||||
#define MCA_ROUTED_TREE_ROUTED_TREE_H
|
||||
#ifndef MCA_ROUTED_DIRECT_H
|
||||
#define MCA_ROUTED_DIRECT_H
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/types.h"
|
||||
@ -18,9 +18,11 @@
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_routed_component_t mca_routed_tree_component;
|
||||
|
||||
extern orte_routed_module_t orte_routed_tree_module;
|
||||
ORTE_MODULE_DECLSPEC extern orte_routed_component_t mca_routed_direct_component;
|
||||
|
||||
extern orte_routed_module_t orte_routed_direct_module;
|
||||
|
||||
|
||||
END_C_DECLS
|
||||
|
@ -12,15 +12,15 @@
|
||||
#include "orte/constants.h"
|
||||
|
||||
|
||||
#include "routed_unity.h"
|
||||
#include "routed_direct.h"
|
||||
|
||||
static orte_routed_module_t* routed_unity_init(int* priority);
|
||||
static orte_routed_module_t* routed_direct_init(int* priority);
|
||||
|
||||
|
||||
/**
|
||||
* component definition
|
||||
*/
|
||||
orte_routed_component_t mca_routed_unity_component = {
|
||||
orte_routed_component_t mca_routed_direct_component = {
|
||||
/* First, the mca_base_component_t struct containing meta
|
||||
information about the component itself */
|
||||
|
||||
@ -30,7 +30,7 @@ orte_routed_component_t mca_routed_unity_component = {
|
||||
|
||||
ORTE_ROUTED_BASE_VERSION_1_0_0,
|
||||
|
||||
"unity", /* MCA component name */
|
||||
"direct", /* MCA component name */
|
||||
ORTE_MAJOR_VERSION, /* MCA component major version */
|
||||
ORTE_MINOR_VERSION, /* MCA component minor version */
|
||||
ORTE_RELEASE_VERSION, /* MCA component release version */
|
||||
@ -43,13 +43,13 @@ orte_routed_component_t mca_routed_unity_component = {
|
||||
/* This component can be checkpointed */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
routed_unity_init
|
||||
routed_direct_init
|
||||
};
|
||||
|
||||
static orte_routed_module_t*
|
||||
routed_unity_init(int* priority)
|
||||
routed_direct_init(int* priority)
|
||||
{
|
||||
*priority = 10;
|
||||
|
||||
return &orte_routed_unity_module;
|
||||
return &orte_routed_direct_module;
|
||||
}
|
@ -9,28 +9,28 @@
|
||||
#
|
||||
|
||||
sources = \
|
||||
routed_unity.h \
|
||||
routed_unity_component.c \
|
||||
routed_unity.c
|
||||
routed_linear.h \
|
||||
routed_linear.c \
|
||||
routed_linear_component.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if OMPI_BUILD_routed_unity_DSO
|
||||
if OMPI_BUILD_routed_linear_DSO
|
||||
component_noinst =
|
||||
component_install = mca_routed_unity.la
|
||||
component_install = mca_routed_linear.la
|
||||
else
|
||||
component_noinst = libmca_routed_unity.la
|
||||
component_noinst = libmca_routed_linear.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_routed_unity_la_SOURCES = $(sources)
|
||||
mca_routed_unity_la_LDFLAGS = -module -avoid-version
|
||||
mca_routed_linear_la_SOURCES = $(sources)
|
||||
mca_routed_linear_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_routed_unity_la_SOURCES = $(sources)
|
||||
libmca_routed_unity_la_LDFLAGS = -module -avoid-version
|
||||
libmca_routed_linear_la_SOURCES = $(sources)
|
||||
libmca_routed_linear_la_LDFLAGS = -module -avoid-version
|
||||
|
@ -28,7 +28,7 @@
|
||||
#include "orte/mca/rml/base/rml_contact.h"
|
||||
|
||||
#include "orte/mca/routed/base/base.h"
|
||||
#include "routed_tree.h"
|
||||
#include "routed_linear.h"
|
||||
|
||||
static int init(void);
|
||||
static int finalize(void);
|
||||
@ -37,24 +37,30 @@ static int update_route(orte_process_name_t *target,
|
||||
static orte_process_name_t get_route(orte_process_name_t *target);
|
||||
static int init_routes(orte_jobid_t job, opal_buffer_t *ndat);
|
||||
static int route_lost(const orte_process_name_t *route);
|
||||
static bool route_is_defined(const orte_process_name_t *target);
|
||||
static int update_routing_tree(void);
|
||||
static orte_vpid_t get_routing_tree(opal_list_t *children);
|
||||
static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf);
|
||||
|
||||
#if OPAL_ENABLE_FT == 1
|
||||
static int tree_ft_event(int state);
|
||||
static int linear_ft_event(int state);
|
||||
#endif
|
||||
|
||||
static orte_process_name_t *lifeline=NULL;
|
||||
|
||||
orte_routed_module_t orte_routed_tree_module = {
|
||||
orte_routed_module_t orte_routed_linear_module = {
|
||||
init,
|
||||
finalize,
|
||||
update_route,
|
||||
get_route,
|
||||
init_routes,
|
||||
route_lost,
|
||||
route_is_defined,
|
||||
update_routing_tree,
|
||||
get_routing_tree,
|
||||
get_wireup_info,
|
||||
#if OPAL_ENABLE_FT == 1
|
||||
tree_ft_event
|
||||
linear_ft_event
|
||||
#else
|
||||
NULL
|
||||
#endif
|
||||
@ -148,7 +154,7 @@ static int update_route(orte_process_name_t *target,
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_tree_update: %s --> %s",
|
||||
"%s routed_linear_update: %s --> %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(target),
|
||||
ORTE_NAME_PRINT(route)));
|
||||
@ -238,7 +244,7 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
||||
found:
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
|
||||
"%s routed_tree_get(%s) --> %s",
|
||||
"%s routed_linear_get(%s) --> %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(target),
|
||||
ORTE_NAME_PRINT(ret)));
|
||||
@ -267,7 +273,7 @@ static int process_callback(orte_jobid_t job, opal_buffer_t *buffer)
|
||||
while (ORTE_SUCCESS == (rc = opal_dss.unpack(buffer, &rml_uri, &cnt, OPAL_STRING))) {
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
|
||||
"%s routed_tree:callback got uri %s",
|
||||
"%s routed_linear:callback got uri %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == rml_uri) ? "NULL" : rml_uri));
|
||||
|
||||
@ -313,7 +319,7 @@ static int process_callback(orte_jobid_t job, opal_buffer_t *buffer)
|
||||
|
||||
static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
{
|
||||
/* the tree module routes all proc communications through
|
||||
/* the linear module routes all proc communications through
|
||||
* the local daemon. Daemons must identify which of their
|
||||
* daemon-peers is "hosting" the specified recipient and
|
||||
* route the message to that daemon. Daemon contact info
|
||||
@ -336,7 +342,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
if (orte_process_info.daemon) {
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_tree: init routes for daemon job %s\n\thnp_uri %s",
|
||||
"%s routed_linear: init routes for daemon job %s\n\thnp_uri %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(job),
|
||||
(NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri));
|
||||
@ -394,7 +400,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
|
||||
"%s routed_tree: completed init routes",
|
||||
"%s routed_linear: completed init routes",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
@ -404,7 +410,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
if (orte_process_info.hnp) {
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_tree: init routes for HNP job %s",
|
||||
"%s routed_linear: init routes for HNP job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(job)));
|
||||
|
||||
@ -454,7 +460,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
int rc;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_tree: init routes w/non-NULL data",
|
||||
"%s routed_linear: init routes w/non-NULL data",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* send the buffer to the proper tag on the daemon */
|
||||
@ -474,7 +480,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
*/
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
|
||||
"%s routed_tree: init routes for proc job %s\n\thnp_uri %s\n\tdaemon uri %s",
|
||||
"%s routed_linear: init routes for proc job %s\n\thnp_uri %s\n\tdaemon uri %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job),
|
||||
(NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri,
|
||||
(NULL == orte_process_info.my_daemon_uri) ? "NULL" : orte_process_info.my_daemon_uri));
|
||||
@ -485,7 +491,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
|
||||
*/
|
||||
opal_output(0, "%s ERROR: Failed to identify the local daemon's URI",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
opal_output(0, "%s ERROR: This is a fatal condition when the tree router",
|
||||
opal_output(0, "%s ERROR: This is a fatal condition when the linear router",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
opal_output(0, "%s ERROR: has been selected - either select the unity router",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
@ -561,7 +567,7 @@ static int route_lost(const orte_process_name_t *route)
|
||||
if (!orte_finalizing &&
|
||||
NULL != lifeline &&
|
||||
OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, route, lifeline)) {
|
||||
opal_output(0, "%s routed:tree: Connection to lifeline %s lost",
|
||||
opal_output(0, "%s routed:linear: Connection to lifeline %s lost",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(lifeline));
|
||||
return ORTE_ERR_FATAL;
|
||||
@ -572,6 +578,26 @@ static int route_lost(const orte_process_name_t *route)
|
||||
}
|
||||
|
||||
|
||||
|
||||
/******* stub functions - to be implemented ******/
|
||||
static bool route_is_defined(const orte_process_name_t *target)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static int update_routing_tree(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static orte_vpid_t get_routing_tree(opal_list_t *children)
|
||||
{
|
||||
return ORTE_VPID_INVALID;
|
||||
}
|
||||
|
||||
/*************************************/
|
||||
|
||||
|
||||
static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf)
|
||||
{
|
||||
int rc;
|
||||
@ -593,7 +619,7 @@ static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf)
|
||||
}
|
||||
|
||||
#if OPAL_ENABLE_FT == 1
|
||||
static int tree_ft_event(int state)
|
||||
static int linear_ft_event(int state)
|
||||
{
|
||||
int ret, exit_status = ORTE_SUCCESS;
|
||||
|
@ -8,8 +8,8 @@
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MCA_ROUTED_UNITY_ROUTED_UNITY_H
|
||||
#define MCA_ROUTED_UNITY_ROUTED_UNITY_H
|
||||
#ifndef MCA_ROUTED_LINEAR_H
|
||||
#define MCA_ROUTED_LINEAR_H
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/types.h"
|
||||
@ -18,11 +18,9 @@
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_routed_component_t mca_routed_linear_component;
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_routed_component_t mca_routed_unity_component;
|
||||
|
||||
extern orte_routed_module_t orte_routed_unity_module;
|
||||
|
||||
extern orte_routed_module_t orte_routed_linear_module;
|
||||
|
||||
END_C_DECLS
|
||||
|
@ -21,14 +21,14 @@
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "orte/mca/routed/base/base.h"
|
||||
#include "routed_tree.h"
|
||||
#include "routed_linear.h"
|
||||
|
||||
static orte_routed_module_t* routed_tree_init(int* priority);
|
||||
static orte_routed_module_t* routed_linear_init(int* priority);
|
||||
|
||||
/**
|
||||
* component definition
|
||||
*/
|
||||
orte_routed_component_t mca_routed_tree_component = {
|
||||
orte_routed_component_t mca_routed_linear_component = {
|
||||
/* First, the mca_base_component_t struct containing meta
|
||||
information about the component itself */
|
||||
|
||||
@ -38,7 +38,7 @@ orte_routed_component_t mca_routed_tree_component = {
|
||||
|
||||
ORTE_ROUTED_BASE_VERSION_1_0_0,
|
||||
|
||||
"tree", /* MCA component name */
|
||||
"linear", /* MCA component name */
|
||||
ORTE_MAJOR_VERSION, /* MCA component major version */
|
||||
ORTE_MINOR_VERSION, /* MCA component minor version */
|
||||
ORTE_RELEASE_VERSION, /* MCA component release version */
|
||||
@ -51,14 +51,14 @@ orte_routed_component_t mca_routed_tree_component = {
|
||||
/* This component can be checkpointed */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
routed_tree_init
|
||||
routed_linear_init
|
||||
};
|
||||
|
||||
static orte_routed_module_t*
|
||||
routed_tree_init(int* priority)
|
||||
routed_linear_init(int* priority)
|
||||
{
|
||||
*priority = 70;
|
||||
|
||||
return &orte_routed_tree_module;
|
||||
return &orte_routed_linear_module;
|
||||
}
|
||||
|
@ -171,6 +171,20 @@ typedef int (*orte_routed_module_init_routes_fn_t)(orte_jobid_t job, opal_buffer
|
||||
*/
|
||||
typedef int (*orte_routed_module_route_lost_fn_t)(const orte_process_name_t *route);
|
||||
|
||||
/*
|
||||
* Is this route defined?
|
||||
*
|
||||
* Check to see if a route to the specified target has been defined. The
|
||||
* function returns "true" if it has, and "false" if no route to the
|
||||
* target was previously defined.
|
||||
*
|
||||
* This is needed because routed modules will return their "wildcard"
|
||||
* route if we request a route to a target that they don't know about.
|
||||
* In some cases, though, we truly -do- need to know if a route was
|
||||
* specifically defined.
|
||||
*/
|
||||
typedef bool (*orte_routed_module_route_is_defined_fn_t)(const orte_process_name_t *target);
|
||||
|
||||
/**
|
||||
* Get wireup data for the specified job
|
||||
*
|
||||
@ -181,6 +195,25 @@ typedef int (*orte_routed_module_route_lost_fn_t)(const orte_process_name_t *rou
|
||||
typedef int (*orte_routed_module_get_wireup_info_fn_t)(orte_jobid_t job,
|
||||
opal_buffer_t *buf);
|
||||
|
||||
/*
|
||||
* Update the module's routing tree for this process
|
||||
*
|
||||
* Called only by a daemon and the HNP, this function creates a list
|
||||
* of "leaves" for this process and identifies the vpid of the parent
|
||||
* sitting above this process in the tree.
|
||||
*
|
||||
* @retval ORTE_SUCCESS The operation completed successfully
|
||||
* @retval ORTE_ERROR_xxx The specifed error occurred
|
||||
*/
|
||||
typedef int (*orte_routed_module_update_routing_tree_fn_t)(void);
|
||||
|
||||
/*
|
||||
* Get the routing tree for this process
|
||||
*
|
||||
* Fills the provided list with the direct children of this process
|
||||
* in the routing tree, and returns the vpid of the parent
|
||||
*/
|
||||
typedef orte_vpid_t (*orte_routed_module_get_routing_tree_fn_t)(opal_list_t *children);
|
||||
|
||||
/**
|
||||
* Handle fault tolerance updates
|
||||
@ -211,6 +244,10 @@ struct orte_routed_module_t {
|
||||
orte_routed_module_get_route_fn_t get_route;
|
||||
orte_routed_module_init_routes_fn_t init_routes;
|
||||
orte_routed_module_route_lost_fn_t route_lost;
|
||||
orte_routed_module_route_is_defined_fn_t route_is_defined;
|
||||
/* fns for daemons */
|
||||
orte_routed_module_update_routing_tree_fn_t update_routing_tree;
|
||||
orte_routed_module_get_routing_tree_fn_t get_routing_tree;
|
||||
orte_routed_module_get_wireup_info_fn_t get_wireup_info;
|
||||
/* FT Notification */
|
||||
orte_routed_module_ft_event_fn_t ft_event;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user