1
1

Fix the radix routed component.

This commit was SVN r25175.
Этот коммит содержится в:
Ralph Castain 2011-09-22 09:32:53 +00:00
родитель 82c93611e6
Коммит 8347385630
2 изменённых файлов: 93 добавлений и 17 удалений

Просмотреть файл

@ -273,7 +273,7 @@ static int update_route(orte_process_name_t *target,
ORTE_NAME_PRINT(route)));
jfam->route.jobid = route->jobid;
jfam->route.vpid = route->vpid;
ORTE_EPOCH_SET(jfam->route.epoch,route->epoch);
ORTE_EPOCH_SET(jfam->route.epoch,orte_ess.proc_get_epoch(&jfam->route));
return ORTE_SUCCESS;
}
}
@ -287,7 +287,7 @@ static int update_route(orte_process_name_t *target,
jfam->job_family = jfamily;
jfam->route.jobid = route->jobid;
jfam->route.vpid = route->vpid;
ORTE_EPOCH_SET(jfam->route.epoch,route->epoch);
ORTE_EPOCH_SET(jfam->route.epoch,orte_ess.proc_get_epoch(&jfam->route));
opal_pointer_array_add(&orte_routed_jobfams, jfam);
return ORTE_SUCCESS;
}
@ -309,13 +309,28 @@ static orte_process_name_t get_route(orte_process_name_t *target)
orte_routed_jobfam_t *jfam;
uint16_t jfamily;
/* initialize */
daemon.jobid = ORTE_PROC_MY_DAEMON->jobid;
daemon.vpid = ORTE_PROC_MY_DAEMON->vpid;
ORTE_EPOCH_SET(daemon.epoch,ORTE_PROC_MY_DAEMON->epoch);
#if ORTE_ENABLE_EPOCH
if (target->jobid == ORTE_JOBID_INVALID ||
target->vpid == ORTE_VPID_INVALID ||
0 == ORTE_EPOCH_CMP(target->epoch,ORTE_EPOCH_INVALID)) {
target->epoch == ORTE_EPOCH_INVALID) {
#else
if (target->jobid == ORTE_JOBID_INVALID ||
target->vpid == ORTE_VPID_INVALID) {
#endif
ret = ORTE_NAME_INVALID;
goto found;
}
if (0 > ORTE_EPOCH_CMP(target->epoch, orte_ess.proc_get_epoch(target))) {
ret = ORTE_NAME_INVALID;
goto found;
}
/* if it is me, then the route is just direct */
if (OPAL_EQUAL == opal_dss.compare(ORTE_PROC_MY_NAME, target, ORTE_NAME)) {
ret = target;
@ -328,6 +343,20 @@ static orte_process_name_t get_route(orte_process_name_t *target)
goto found;
}
/* if I am a tool, the route is direct if target is in
* my own job family, and to the target's HNP if not
*/
if (ORTE_PROC_IS_TOOL) {
if (ORTE_JOB_FAMILY(target->jobid) == ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
ret = target;
goto found;
} else {
ORTE_HNP_NAME_FROM_JOB(&daemon, target->jobid);
ret = &daemon;
goto found;
}
}
/****** HNP AND DAEMONS ONLY ******/
/* if the job family is zero, then this is going to a local slave,
@ -734,6 +763,34 @@ static int route_lost(const orte_process_name_t *route)
{
opal_list_item_t *item;
orte_routed_tree_t *child;
orte_routed_jobfam_t *jfam;
uint16_t jfamily;
int i;
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
"%s route to %s lost",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(route)));
/* if the route is to a different job family and we are the HNP, look it up */
if ((ORTE_JOB_FAMILY(route->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) &&
ORTE_PROC_IS_HNP) {
jfamily = ORTE_JOB_FAMILY(route->jobid);
for (i=0; i < orte_routed_jobfams.size; i++) {
if (NULL == (jfam = (orte_routed_jobfam_t*)opal_pointer_array_get_item(&orte_routed_jobfams, i))) {
continue;
}
if (jfam->job_family == jfamily) {
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
"%s routed_radix: route to %s lost",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOB_FAMILY_PRINT(route->jobid)));
opal_pointer_array_set_item(&orte_routed_jobfams, i, NULL);
OBJ_RELEASE(jfam);
break;
}
}
}
/* if we lose the connection to the lifeline and we are NOT already,
* in finalize, tell the OOB to abort.
@ -772,6 +829,34 @@ static int route_lost(const orte_process_name_t *route)
static bool route_is_defined(const orte_process_name_t *target)
{
int i;
orte_routed_jobfam_t *jfam;
uint16_t jfamily;
/* if the route is to a different job family and we are the HNP, look it up */
if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
if (ORTE_PROC_IS_HNP) {
jfamily = ORTE_JOB_FAMILY(target->jobid);
for (i=0; i < orte_routed_jobfams.size; i++) {
if (NULL == (jfam = (orte_routed_jobfam_t*)opal_pointer_array_get_item(&orte_routed_jobfams, i))) {
continue;
}
if (jfam->job_family == jfamily) {
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
"%s routed_radix: route to %s is defined",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOB_FAMILY_PRINT(target->jobid)));
return true;
}
}
return false;
}
/* if we are not the HNP, then the answer is always true as
* we send it via the HNP
*/
return true;
}
/* find out what daemon hosts this proc */
if (ORTE_VPID_INVALID == orte_ess.proc_get_daemon((orte_process_name_t*)target)) {
return false;

Просмотреть файл

@ -50,21 +50,12 @@ orte_routed_radix_component_t mca_routed_radix_component = {
static int orte_routed_radix_component_query(mca_base_module_t **module, int *priority)
{
int tmp;
mca_base_component_t *c = &mca_routed_radix_component.super.base_version;
mca_base_param_reg_int(c, NULL,
"Radix to be used for routed radix tree",
false, false, -1, &tmp);
if (0 < tmp) {
mca_routed_radix_component.radix = tmp;
*priority = 150;
*module = (mca_base_module_t *) &orte_routed_radix_module;
return ORTE_SUCCESS;
}
/* if radix not provided, then we can't run */
*priority = 0;
*module = NULL;
return ORTE_ERROR;
false, false, 32, &mca_routed_radix_component.radix);
*priority = 65;
*module = (mca_base_module_t *) &orte_routed_radix_module;
return ORTE_SUCCESS;
}