1
1

Rename the routed modules to more accurately reflect what they do and the role they will play in soon-to-come updates.

Add two new API's to the routed framework - stub them out so that collaborators can work on them in various components without conflicts.

Remove a "finalize" from the select function that could cause problems as the component had not had its initialize called yet.

This commit was SVN r18369.
Этот коммит содержится в:
Ralph Castain 2008-05-05 02:59:09 +00:00
родитель f5311903ee
Коммит b8bb990acf
12 изменённых файлов: 174 добавлений и 93 удалений

Просмотреть файл

@ -93,13 +93,6 @@ orte_routed_base_select(void)
if (priority > selected_priority) { if (priority > selected_priority) {
/* Otherwise this is a normal module and subject to normal selection */ /* Otherwise this is a normal module and subject to normal selection */
if (NULL != selected_module && NULL != selected_module->finalize) {
opal_output_verbose(10, orte_routed_base_output,
"orte_routed_base_select: component %s deselected - finalizing",
selected_component->routed_version.mca_component_name);
selected_module->finalize();
}
selected_priority = priority; selected_priority = priority;
selected_component = component; selected_component = component;
selected_module = module; selected_module = module;
@ -126,7 +119,7 @@ orte_routed_base_select(void)
/* initialize the selected component */ /* initialize the selected component */
opal_output_verbose(10, orte_routed_base_output, opal_output_verbose(10, orte_routed_base_output,
"orte_routed_base_select: initializing selectedl component %s", "orte_routed_base_select: initializing selected component %s",
selected_component->routed_version.mca_component_name); selected_component->routed_version.mca_component_name);
if (ORTE_SUCCESS != orte_routed.initialize()) { if (ORTE_SUCCESS != orte_routed.initialize()) {
return ORTE_ERROR; return ORTE_ERROR;

Просмотреть файл

@ -9,28 +9,28 @@
# #
sources = \ sources = \
routed_tree.h \ routed_direct.h \
routed_tree.c \ routed_direct_component.c \
routed_tree_component.c routed_direct.c
# Make the output library in this directory, and name it either # Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la # mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds). # (for static builds).
if OMPI_BUILD_routed_tree_DSO if OMPI_BUILD_routed_direct_DSO
component_noinst = component_noinst =
component_install = mca_routed_tree.la component_install = mca_routed_direct.la
else else
component_noinst = libmca_routed_tree.la component_noinst = libmca_routed_direct.la
component_install = component_install =
endif endif
mcacomponentdir = $(pkglibdir) mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component_install) mcacomponent_LTLIBRARIES = $(component_install)
mca_routed_tree_la_SOURCES = $(sources) mca_routed_direct_la_SOURCES = $(sources)
mca_routed_tree_la_LDFLAGS = -module -avoid-version mca_routed_direct_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst) noinst_LTLIBRARIES = $(component_noinst)
libmca_routed_tree_la_SOURCES = $(sources) libmca_routed_direct_la_SOURCES = $(sources)
libmca_routed_tree_la_LDFLAGS = -module -avoid-version libmca_routed_direct_la_LDFLAGS = -module -avoid-version

Просмотреть файл

@ -30,7 +30,7 @@
#include "orte/mca/rml/base/rml_contact.h" #include "orte/mca/rml/base/rml_contact.h"
#include "orte/mca/routed/base/base.h" #include "orte/mca/routed/base/base.h"
#include "routed_unity.h" #include "routed_direct.h"
static opal_condition_t cond; static opal_condition_t cond;
static opal_mutex_t lock; static opal_mutex_t lock;
@ -43,24 +43,30 @@ static int update_route(orte_process_name_t *target,
static orte_process_name_t get_route(orte_process_name_t *target); static orte_process_name_t get_route(orte_process_name_t *target);
static int init_routes(orte_jobid_t job, opal_buffer_t *ndat); static int init_routes(orte_jobid_t job, opal_buffer_t *ndat);
static int route_lost(const orte_process_name_t *route); static int route_lost(const orte_process_name_t *route);
static bool route_is_defined(const orte_process_name_t *target);
static int update_routing_tree(void);
static orte_vpid_t get_routing_tree(opal_list_t *children);
static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf); static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf);
#if OPAL_ENABLE_FT == 1 #if OPAL_ENABLE_FT == 1
static int unity_ft_event(int state); static int direct_ft_event(int state);
#endif #endif
static orte_process_name_t *lifeline=NULL; static orte_process_name_t *lifeline=NULL;
orte_routed_module_t orte_routed_unity_module = { orte_routed_module_t orte_routed_direct_module = {
init, init,
finalize, finalize,
update_route, update_route,
get_route, get_route,
init_routes, init_routes,
route_lost, route_lost,
route_is_defined,
update_routing_tree,
get_routing_tree,
get_wireup_info, get_wireup_info,
#if OPAL_ENABLE_FT == 1 #if OPAL_ENABLE_FT == 1
unity_ft_event direct_ft_event
#else #else
NULL NULL
#endif #endif
@ -143,7 +149,7 @@ static int update_route(orte_process_name_t *target,
} }
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output, OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
"%s routed_unity_update: diff job family routing %s --> %s", "%s routed_direct_update: diff job family routing %s --> %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(target), ORTE_NAME_PRINT(target),
ORTE_NAME_PRINT(route))); ORTE_NAME_PRINT(route)));
@ -165,7 +171,7 @@ static int update_route(orte_process_name_t *target,
direct: direct:
/* if it came from our own job family or was direct, there is nothing to do */ /* if it came from our own job family or was direct, there is nothing to do */
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output, OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
"%s routed_unity_update: %s --> %s", "%s routed_direct_update: %s --> %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(target), ORTE_NAME_PRINT(target),
ORTE_NAME_PRINT(route))); ORTE_NAME_PRINT(route)));
@ -202,7 +208,7 @@ static orte_process_name_t get_route(orte_process_name_t *target)
found: found:
OPAL_OUTPUT_VERBOSE((5, orte_routed_base_output, OPAL_OUTPUT_VERBOSE((5, orte_routed_base_output,
"%s routed_unity_get(%s) --> %s", "%s routed_direct_get(%s) --> %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(target), ORTE_NAME_PRINT(target),
ORTE_NAME_PRINT(ret))); ORTE_NAME_PRINT(ret)));
@ -231,7 +237,7 @@ static int process_callback(orte_jobid_t job, opal_buffer_t *buffer)
while (ORTE_SUCCESS == (rc = opal_dss.unpack(buffer, &rml_uri, &cnt, OPAL_STRING))) { while (ORTE_SUCCESS == (rc = opal_dss.unpack(buffer, &rml_uri, &cnt, OPAL_STRING))) {
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output, OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
"%s routed_unity:callback got uri %s", "%s routed_direct:callback got uri %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(NULL == rml_uri) ? "NULL" : rml_uri)); (NULL == rml_uri) ? "NULL" : rml_uri));
@ -270,7 +276,7 @@ static int process_callback(orte_jobid_t job, opal_buffer_t *buffer)
if (jdata->num_reported == jdata->num_procs) { if (jdata->num_reported == jdata->num_procs) {
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output, OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
"%s routed_unity:callback trigger fired on job %s", "%s routed_direct:callback trigger fired on job %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid))); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid)));
/* update the job state */ /* update the job state */
@ -300,7 +306,7 @@ static int process_callback(orte_jobid_t job, opal_buffer_t *buffer)
static int init_routes(orte_jobid_t job, opal_buffer_t *ndata) static int init_routes(orte_jobid_t job, opal_buffer_t *ndata)
{ {
/* the unity module just sends direct to everyone, so it requires /* the direct module just sends direct to everyone, so it requires
* that the RML get loaded with contact info from all of our peers. * that the RML get loaded with contact info from all of our peers.
* We also look for and provide contact info for our local daemon * We also look for and provide contact info for our local daemon
* so we can use it if needed * so we can use it if needed
@ -316,12 +322,12 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndata)
int rc; int rc;
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output, OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
"%s routed_unity: init routes for daemon job %s\n\thnp_uri %s", "%s routed_direct: init routes for daemon job %s\n\thnp_uri %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job),
(NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri)); (NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri));
if (NULL == ndata) { if (NULL == ndata) {
/* indicates this is being called during orte_init. /* indicates this is being called during orte_init.
* since the daemons in the unity component don't route messages, * since the daemons in the direct component don't route messages,
* there is nothing for them to do - daemons will send their * there is nothing for them to do - daemons will send their
* contact info as part of the message confirming they are ready * contact info as part of the message confirming they are ready
* to go. Just get the HNP's name for possible later use * to go. Just get the HNP's name for possible later use
@ -343,7 +349,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndata)
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* we don't have to update the route as the unity component is /* we don't have to update the route as the direct component is
* always "direct" * always "direct"
*/ */
@ -372,7 +378,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndata)
* shouldn't get called during daemon startup. This situation * shouldn't get called during daemon startup. This situation
* would occur, though, when we are doing orte_init within the HNP * would occur, though, when we are doing orte_init within the HNP
* itself, but we store our data during orte_init anyway * itself, but we store our data during orte_init anyway
* However, for the unity component, I do have to make myself * However, for the direct component, I do have to make myself
* available for processing incoming rml contact info messages * available for processing incoming rml contact info messages
* from the procs - so setup that receive here * from the procs - so setup that receive here
*/ */
@ -413,7 +419,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndata)
orte_rml_cmd_flag_t command; orte_rml_cmd_flag_t command;
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output, OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
"%s routed_unity: init routes w/non-NULL data", "%s routed_direct: init routes w/non-NULL data",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* extract the RML command from the buffer and discard it - this /* extract the RML command from the buffer and discard it - this
@ -442,7 +448,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndata)
opal_buffer_t buf; opal_buffer_t buf;
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output, OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
"%s routed_unity: init routes for proc job %s\n\thnp_uri %s\n\tdaemon uri %s", "%s routed_direct: init routes for proc job %s\n\thnp_uri %s\n\tdaemon uri %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job),
(NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri, (NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri,
(NULL == orte_process_info.my_daemon_uri) ? "NULL" : orte_process_info.my_daemon_uri)); (NULL == orte_process_info.my_daemon_uri) ? "NULL" : orte_process_info.my_daemon_uri));
@ -470,14 +476,14 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndata)
return rc; return rc;
} }
/* we don't have to update the route as the unity component is /* we don't have to update the route as the direct component is
* always "direct" * always "direct"
*/ */
} }
/* setup the hnp - this must always be provided, so /* setup the hnp - this must always be provided, so
* error if it isn't there as we won't know how to complete * error if it isn't there as we won't know how to complete
* the wireup for the unity component * the wireup for the direct component
*/ */
if (NULL == orte_process_info.my_hnp_uri) { if (NULL == orte_process_info.my_hnp_uri) {
/* fatal error */ /* fatal error */
@ -486,7 +492,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndata)
} }
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output, OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
"%s routed_unity_init: set hnp contact info and name", "%s routed_direct_init: set hnp contact info and name",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* set the contact info into the hash table */ /* set the contact info into the hash table */
@ -507,12 +513,12 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndata)
*/ */
lifeline = ORTE_PROC_MY_HNP; lifeline = ORTE_PROC_MY_HNP;
/* we don't have to update the route as the unity component is /* we don't have to update the route as the direct component is
* always "direct" * always "direct"
*/ */
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output, OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
"%s routed_unity_init: register sync", "%s routed_direct_init: register sync",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* register myself to require that I finalize before exiting /* register myself to require that I finalize before exiting
@ -525,7 +531,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndata)
} }
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output, OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
"%s routed_unity_init: wait to recv contact info for peers", "%s routed_direct_init: wait to recv contact info for peers",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* now setup a blocking receive and wait right here until we get /* now setup a blocking receive and wait right here until we get
@ -540,7 +546,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndata)
} }
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output, OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
"%s routed_unity_init: peer contact info recvd", "%s routed_direct_init: peer contact info recvd",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* process it */ /* process it */
@ -566,7 +572,7 @@ static int route_lost(const orte_process_name_t *route)
if (!orte_finalizing && if (!orte_finalizing &&
NULL != lifeline && NULL != lifeline &&
OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, route, lifeline)) { OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, route, lifeline)) {
opal_output(0, "%s routed:unity: Connection to lifeline %s lost", opal_output(0, "%s routed:direct: Connection to lifeline %s lost",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(lifeline)); ORTE_NAME_PRINT(lifeline));
return ORTE_ERR_FATAL; return ORTE_ERR_FATAL;
@ -577,6 +583,25 @@ static int route_lost(const orte_process_name_t *route)
} }
/******* stub functions - to be implemented ******/
static bool route_is_defined(const orte_process_name_t *target)
{
return true;
}
static int update_routing_tree(void)
{
return ORTE_SUCCESS;
}
static orte_vpid_t get_routing_tree(opal_list_t *children)
{
return ORTE_VPID_INVALID;
}
/*************************************/
static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf) static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf)
{ {
int rc; int rc;
@ -591,7 +616,7 @@ static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf)
} }
#if OPAL_ENABLE_FT == 1 #if OPAL_ENABLE_FT == 1
static int unity_ft_event(int state) static int direct_ft_event(int state)
{ {
int ret, exit_status = ORTE_SUCCESS; int ret, exit_status = ORTE_SUCCESS;

Просмотреть файл

@ -8,8 +8,8 @@
* $HEADER$ * $HEADER$
*/ */
#ifndef MCA_ROUTED_TREE_ROUTED_TREE_H #ifndef MCA_ROUTED_DIRECT_H
#define MCA_ROUTED_TREE_ROUTED_TREE_H #define MCA_ROUTED_DIRECT_H
#include "orte_config.h" #include "orte_config.h"
#include "orte/types.h" #include "orte/types.h"
@ -18,9 +18,11 @@
BEGIN_C_DECLS BEGIN_C_DECLS
ORTE_MODULE_DECLSPEC extern orte_routed_component_t mca_routed_tree_component;
extern orte_routed_module_t orte_routed_tree_module; ORTE_MODULE_DECLSPEC extern orte_routed_component_t mca_routed_direct_component;
extern orte_routed_module_t orte_routed_direct_module;
END_C_DECLS END_C_DECLS

Просмотреть файл

@ -12,15 +12,15 @@
#include "orte/constants.h" #include "orte/constants.h"
#include "routed_unity.h" #include "routed_direct.h"
static orte_routed_module_t* routed_unity_init(int* priority); static orte_routed_module_t* routed_direct_init(int* priority);
/** /**
* component definition * component definition
*/ */
orte_routed_component_t mca_routed_unity_component = { orte_routed_component_t mca_routed_direct_component = {
/* First, the mca_base_component_t struct containing meta /* First, the mca_base_component_t struct containing meta
information about the component itself */ information about the component itself */
@ -30,7 +30,7 @@ orte_routed_component_t mca_routed_unity_component = {
ORTE_ROUTED_BASE_VERSION_1_0_0, ORTE_ROUTED_BASE_VERSION_1_0_0,
"unity", /* MCA component name */ "direct", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */ ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */ ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */ ORTE_RELEASE_VERSION, /* MCA component release version */
@ -43,13 +43,13 @@ orte_routed_component_t mca_routed_unity_component = {
/* This component can be checkpointed */ /* This component can be checkpointed */
MCA_BASE_METADATA_PARAM_CHECKPOINT MCA_BASE_METADATA_PARAM_CHECKPOINT
}, },
routed_unity_init routed_direct_init
}; };
static orte_routed_module_t* static orte_routed_module_t*
routed_unity_init(int* priority) routed_direct_init(int* priority)
{ {
*priority = 10; *priority = 10;
return &orte_routed_unity_module; return &orte_routed_direct_module;
} }

Просмотреть файл

@ -9,28 +9,28 @@
# #
sources = \ sources = \
routed_unity.h \ routed_linear.h \
routed_unity_component.c \ routed_linear.c \
routed_unity.c routed_linear_component.c
# Make the output library in this directory, and name it either # Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la # mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds). # (for static builds).
if OMPI_BUILD_routed_unity_DSO if OMPI_BUILD_routed_linear_DSO
component_noinst = component_noinst =
component_install = mca_routed_unity.la component_install = mca_routed_linear.la
else else
component_noinst = libmca_routed_unity.la component_noinst = libmca_routed_linear.la
component_install = component_install =
endif endif
mcacomponentdir = $(pkglibdir) mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component_install) mcacomponent_LTLIBRARIES = $(component_install)
mca_routed_unity_la_SOURCES = $(sources) mca_routed_linear_la_SOURCES = $(sources)
mca_routed_unity_la_LDFLAGS = -module -avoid-version mca_routed_linear_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst) noinst_LTLIBRARIES = $(component_noinst)
libmca_routed_unity_la_SOURCES = $(sources) libmca_routed_linear_la_SOURCES = $(sources)
libmca_routed_unity_la_LDFLAGS = -module -avoid-version libmca_routed_linear_la_LDFLAGS = -module -avoid-version

Просмотреть файл

@ -28,7 +28,7 @@
#include "orte/mca/rml/base/rml_contact.h" #include "orte/mca/rml/base/rml_contact.h"
#include "orte/mca/routed/base/base.h" #include "orte/mca/routed/base/base.h"
#include "routed_tree.h" #include "routed_linear.h"
static int init(void); static int init(void);
static int finalize(void); static int finalize(void);
@ -37,24 +37,30 @@ static int update_route(orte_process_name_t *target,
static orte_process_name_t get_route(orte_process_name_t *target); static orte_process_name_t get_route(orte_process_name_t *target);
static int init_routes(orte_jobid_t job, opal_buffer_t *ndat); static int init_routes(orte_jobid_t job, opal_buffer_t *ndat);
static int route_lost(const orte_process_name_t *route); static int route_lost(const orte_process_name_t *route);
static bool route_is_defined(const orte_process_name_t *target);
static int update_routing_tree(void);
static orte_vpid_t get_routing_tree(opal_list_t *children);
static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf); static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf);
#if OPAL_ENABLE_FT == 1 #if OPAL_ENABLE_FT == 1
static int tree_ft_event(int state); static int linear_ft_event(int state);
#endif #endif
static orte_process_name_t *lifeline=NULL; static orte_process_name_t *lifeline=NULL;
orte_routed_module_t orte_routed_tree_module = { orte_routed_module_t orte_routed_linear_module = {
init, init,
finalize, finalize,
update_route, update_route,
get_route, get_route,
init_routes, init_routes,
route_lost, route_lost,
route_is_defined,
update_routing_tree,
get_routing_tree,
get_wireup_info, get_wireup_info,
#if OPAL_ENABLE_FT == 1 #if OPAL_ENABLE_FT == 1
tree_ft_event linear_ft_event
#else #else
NULL NULL
#endif #endif
@ -148,7 +154,7 @@ static int update_route(orte_process_name_t *target,
} }
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output, OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
"%s routed_tree_update: %s --> %s", "%s routed_linear_update: %s --> %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(target), ORTE_NAME_PRINT(target),
ORTE_NAME_PRINT(route))); ORTE_NAME_PRINT(route)));
@ -238,7 +244,7 @@ static orte_process_name_t get_route(orte_process_name_t *target)
found: found:
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output, OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
"%s routed_tree_get(%s) --> %s", "%s routed_linear_get(%s) --> %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(target), ORTE_NAME_PRINT(target),
ORTE_NAME_PRINT(ret))); ORTE_NAME_PRINT(ret)));
@ -267,7 +273,7 @@ static int process_callback(orte_jobid_t job, opal_buffer_t *buffer)
while (ORTE_SUCCESS == (rc = opal_dss.unpack(buffer, &rml_uri, &cnt, OPAL_STRING))) { while (ORTE_SUCCESS == (rc = opal_dss.unpack(buffer, &rml_uri, &cnt, OPAL_STRING))) {
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output, OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
"%s routed_tree:callback got uri %s", "%s routed_linear:callback got uri %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(NULL == rml_uri) ? "NULL" : rml_uri)); (NULL == rml_uri) ? "NULL" : rml_uri));
@ -313,7 +319,7 @@ static int process_callback(orte_jobid_t job, opal_buffer_t *buffer)
static int init_routes(orte_jobid_t job, opal_buffer_t *ndat) static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
{ {
/* the tree module routes all proc communications through /* the linear module routes all proc communications through
* the local daemon. Daemons must identify which of their * the local daemon. Daemons must identify which of their
* daemon-peers is "hosting" the specified recipient and * daemon-peers is "hosting" the specified recipient and
* route the message to that daemon. Daemon contact info * route the message to that daemon. Daemon contact info
@ -336,7 +342,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
if (orte_process_info.daemon) { if (orte_process_info.daemon) {
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output, OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
"%s routed_tree: init routes for daemon job %s\n\thnp_uri %s", "%s routed_linear: init routes for daemon job %s\n\thnp_uri %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(job), ORTE_JOBID_PRINT(job),
(NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri)); (NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri));
@ -394,7 +400,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
} }
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output, OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
"%s routed_tree: completed init routes", "%s routed_linear: completed init routes",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
return ORTE_SUCCESS; return ORTE_SUCCESS;
@ -404,7 +410,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
if (orte_process_info.hnp) { if (orte_process_info.hnp) {
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output, OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
"%s routed_tree: init routes for HNP job %s", "%s routed_linear: init routes for HNP job %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(job))); ORTE_JOBID_PRINT(job)));
@ -454,7 +460,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
int rc; int rc;
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output, OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
"%s routed_tree: init routes w/non-NULL data", "%s routed_linear: init routes w/non-NULL data",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* send the buffer to the proper tag on the daemon */ /* send the buffer to the proper tag on the daemon */
@ -474,7 +480,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
*/ */
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output, OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
"%s routed_tree: init routes for proc job %s\n\thnp_uri %s\n\tdaemon uri %s", "%s routed_linear: init routes for proc job %s\n\thnp_uri %s\n\tdaemon uri %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job),
(NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri, (NULL == orte_process_info.my_hnp_uri) ? "NULL" : orte_process_info.my_hnp_uri,
(NULL == orte_process_info.my_daemon_uri) ? "NULL" : orte_process_info.my_daemon_uri)); (NULL == orte_process_info.my_daemon_uri) ? "NULL" : orte_process_info.my_daemon_uri));
@ -485,7 +491,7 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
*/ */
opal_output(0, "%s ERROR: Failed to identify the local daemon's URI", opal_output(0, "%s ERROR: Failed to identify the local daemon's URI",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
opal_output(0, "%s ERROR: This is a fatal condition when the tree router", opal_output(0, "%s ERROR: This is a fatal condition when the linear router",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
opal_output(0, "%s ERROR: has been selected - either select the unity router", opal_output(0, "%s ERROR: has been selected - either select the unity router",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
@ -561,7 +567,7 @@ static int route_lost(const orte_process_name_t *route)
if (!orte_finalizing && if (!orte_finalizing &&
NULL != lifeline && NULL != lifeline &&
OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, route, lifeline)) { OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, route, lifeline)) {
opal_output(0, "%s routed:tree: Connection to lifeline %s lost", opal_output(0, "%s routed:linear: Connection to lifeline %s lost",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(lifeline)); ORTE_NAME_PRINT(lifeline));
return ORTE_ERR_FATAL; return ORTE_ERR_FATAL;
@ -572,6 +578,26 @@ static int route_lost(const orte_process_name_t *route)
} }
/******* stub functions - to be implemented ******/
static bool route_is_defined(const orte_process_name_t *target)
{
return true;
}
static int update_routing_tree(void)
{
return ORTE_SUCCESS;
}
static orte_vpid_t get_routing_tree(opal_list_t *children)
{
return ORTE_VPID_INVALID;
}
/*************************************/
static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf) static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf)
{ {
int rc; int rc;
@ -593,7 +619,7 @@ static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf)
} }
#if OPAL_ENABLE_FT == 1 #if OPAL_ENABLE_FT == 1
static int tree_ft_event(int state) static int linear_ft_event(int state)
{ {
int ret, exit_status = ORTE_SUCCESS; int ret, exit_status = ORTE_SUCCESS;

Просмотреть файл

@ -8,8 +8,8 @@
* $HEADER$ * $HEADER$
*/ */
#ifndef MCA_ROUTED_UNITY_ROUTED_UNITY_H #ifndef MCA_ROUTED_LINEAR_H
#define MCA_ROUTED_UNITY_ROUTED_UNITY_H #define MCA_ROUTED_LINEAR_H
#include "orte_config.h" #include "orte_config.h"
#include "orte/types.h" #include "orte/types.h"
@ -18,11 +18,9 @@
BEGIN_C_DECLS BEGIN_C_DECLS
ORTE_MODULE_DECLSPEC extern orte_routed_component_t mca_routed_linear_component;
ORTE_MODULE_DECLSPEC extern orte_routed_component_t mca_routed_unity_component; extern orte_routed_module_t orte_routed_linear_module;
extern orte_routed_module_t orte_routed_unity_module;
END_C_DECLS END_C_DECLS

Просмотреть файл

@ -21,14 +21,14 @@
#include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_globals.h"
#include "orte/mca/routed/base/base.h" #include "orte/mca/routed/base/base.h"
#include "routed_tree.h" #include "routed_linear.h"
static orte_routed_module_t* routed_tree_init(int* priority); static orte_routed_module_t* routed_linear_init(int* priority);
/** /**
* component definition * component definition
*/ */
orte_routed_component_t mca_routed_tree_component = { orte_routed_component_t mca_routed_linear_component = {
/* First, the mca_base_component_t struct containing meta /* First, the mca_base_component_t struct containing meta
information about the component itself */ information about the component itself */
@ -38,7 +38,7 @@ orte_routed_component_t mca_routed_tree_component = {
ORTE_ROUTED_BASE_VERSION_1_0_0, ORTE_ROUTED_BASE_VERSION_1_0_0,
"tree", /* MCA component name */ "linear", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */ ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */ ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */ ORTE_RELEASE_VERSION, /* MCA component release version */
@ -51,14 +51,14 @@ orte_routed_component_t mca_routed_tree_component = {
/* This component can be checkpointed */ /* This component can be checkpointed */
MCA_BASE_METADATA_PARAM_CHECKPOINT MCA_BASE_METADATA_PARAM_CHECKPOINT
}, },
routed_tree_init routed_linear_init
}; };
static orte_routed_module_t* static orte_routed_module_t*
routed_tree_init(int* priority) routed_linear_init(int* priority)
{ {
*priority = 70; *priority = 70;
return &orte_routed_tree_module; return &orte_routed_linear_module;
} }

Просмотреть файл

@ -171,6 +171,20 @@ typedef int (*orte_routed_module_init_routes_fn_t)(orte_jobid_t job, opal_buffer
*/ */
typedef int (*orte_routed_module_route_lost_fn_t)(const orte_process_name_t *route); typedef int (*orte_routed_module_route_lost_fn_t)(const orte_process_name_t *route);
/*
* Is this route defined?
*
* Check to see if a route to the specified target has been defined. The
* function returns "true" if it has, and "false" if no route to the
* target was previously defined.
*
* This is needed because routed modules will return their "wildcard"
* route if we request a route to a target that they don't know about.
* In some cases, though, we truly -do- need to know if a route was
* specifically defined.
*/
typedef bool (*orte_routed_module_route_is_defined_fn_t)(const orte_process_name_t *target);
/** /**
* Get wireup data for the specified job * Get wireup data for the specified job
* *
@ -181,6 +195,25 @@ typedef int (*orte_routed_module_route_lost_fn_t)(const orte_process_name_t *rou
typedef int (*orte_routed_module_get_wireup_info_fn_t)(orte_jobid_t job, typedef int (*orte_routed_module_get_wireup_info_fn_t)(orte_jobid_t job,
opal_buffer_t *buf); opal_buffer_t *buf);
/*
* Update the module's routing tree for this process
*
* Called only by a daemon and the HNP, this function creates a list
* of "leaves" for this process and identifies the vpid of the parent
* sitting above this process in the tree.
*
* @retval ORTE_SUCCESS The operation completed successfully
* @retval ORTE_ERROR_xxx The specifed error occurred
*/
typedef int (*orte_routed_module_update_routing_tree_fn_t)(void);
/*
* Get the routing tree for this process
*
* Fills the provided list with the direct children of this process
* in the routing tree, and returns the vpid of the parent
*/
typedef orte_vpid_t (*orte_routed_module_get_routing_tree_fn_t)(opal_list_t *children);
/** /**
* Handle fault tolerance updates * Handle fault tolerance updates
@ -211,6 +244,10 @@ struct orte_routed_module_t {
orte_routed_module_get_route_fn_t get_route; orte_routed_module_get_route_fn_t get_route;
orte_routed_module_init_routes_fn_t init_routes; orte_routed_module_init_routes_fn_t init_routes;
orte_routed_module_route_lost_fn_t route_lost; orte_routed_module_route_lost_fn_t route_lost;
orte_routed_module_route_is_defined_fn_t route_is_defined;
/* fns for daemons */
orte_routed_module_update_routing_tree_fn_t update_routing_tree;
orte_routed_module_get_routing_tree_fn_t get_routing_tree;
orte_routed_module_get_wireup_info_fn_t get_wireup_info; orte_routed_module_get_wireup_info_fn_t get_wireup_info;
/* FT Notification */ /* FT Notification */
orte_routed_module_ft_event_fn_t ft_event; orte_routed_module_ft_event_fn_t ft_event;