1
1

Update the routed framework to:

1. add a new API delete_route(orte_process_name_t*) to delete the specified proc from the routing table

2. modify update_route so that it actually updates pre-existing routes instead of only adding routing info the end of the hash table

This fixes ticket #1403

This commit was SVN r18970.
Этот коммит содержится в:
Ralph Castain 2008-07-21 21:37:09 +00:00
родитель 4180667adb
Коммит a4f0fa6e3a
4 изменённых файлов: 428 добавлений и 37 удалений

Просмотреть файл

@ -34,6 +34,7 @@
static int init(void);
static int finalize(void);
static int delete_route(orte_process_name_t *proc);
static int update_route(orte_process_name_t *target,
orte_process_name_t *route);
static orte_process_name_t get_route(orte_process_name_t *target);
@ -52,6 +53,7 @@ static int binomial_ft_event(int state);
orte_routed_module_t orte_routed_binomial_module = {
init,
finalize,
delete_route,
update_route,
get_route,
init_routes,
@ -162,6 +164,116 @@ static int finalize(void)
return ORTE_SUCCESS;
}
static int delete_route(orte_process_name_t *proc)
{
int rc;
orte_process_name_t *route_copy;
if (proc->jobid == ORTE_JOBID_INVALID ||
proc->vpid == ORTE_VPID_INVALID) {
return ORTE_ERR_BAD_PARAM;
}
/* if I am an application process, I don't have any routes
* so there is nothing for me to do
*/
if (!orte_process_info.hnp && !orte_process_info.daemon &&
!orte_process_info.tool) {
return ORTE_SUCCESS;
}
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
"%s routed_binomial_delete_route for %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc)));
/* if this is from a different job family, then I need to
* look it up appropriately
*/
if (ORTE_JOB_FAMILY(proc->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
/* if I am a daemon, then I will automatically route
* anything to this job family via my HNP - so I have nothing
* in my routing table and thus have nothing to do
* here, just return
*/
if (orte_process_info.daemon) {
return ORTE_SUCCESS;
}
/* see if this proc is present - it will have a wildcard vpid,
* so we have to look for it with that condition
*/
rc = opal_hash_table_get_value_uint32(&vpid_wildcard_list,
ORTE_JOB_FAMILY(proc->jobid),
(void**)&route_copy);
if (ORTE_SUCCESS == rc && NULL != route_copy) {
/* proc is present - remove the data */
free(route_copy);
rc = opal_hash_table_remove_value_uint32(&vpid_wildcard_list,
ORTE_JOB_FAMILY(proc->jobid));
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/* not present - nothing to do */
return ORTE_SUCCESS;
}
/* THIS CAME FROM OUR OWN JOB FAMILY... */
/* treat vpid wildcards separately so they go onto the correct list */
if (proc->jobid != ORTE_JOBID_WILDCARD &&
proc->vpid == ORTE_VPID_WILDCARD) {
/* see if this target is already present - it will have a wildcard vpid,
* so we have to look for it on that list
*/
rc = opal_hash_table_get_value_uint32(&vpid_wildcard_list,
proc->jobid,
(void**)&route_copy);
if (ORTE_SUCCESS == rc && NULL != route_copy) {
/* proc is present - remove the data */
free(route_copy);
rc = opal_hash_table_remove_value_uint32(&vpid_wildcard_list, proc->jobid);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/* not already present - nothing to do */
return ORTE_SUCCESS;
}
/* check for an exact match */
if (proc->jobid != ORTE_JOBID_WILDCARD &&
proc->vpid != ORTE_VPID_WILDCARD) {
/* see if this route already exists in our table */
rc = opal_hash_table_get_value_uint64(&peer_list,
orte_util_hash_name(proc),
(void**)&route_copy);
if (ORTE_SUCCESS == rc && NULL != route_copy) {
/* proc is present - remove the data */
free(route_copy);
rc = opal_hash_table_remove_value_uint64(&peer_list, orte_util_hash_name(proc));
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/* not already present - nothing to do */
return ORTE_SUCCESS;
}
/* this must be a process that doesn't match any of the
* prior conditions - sorry!
*/
return ORTE_ERR_NOT_SUPPORTED;
}
static int update_route(orte_process_name_t *target,
orte_process_name_t *route)
{
@ -214,8 +326,16 @@ static int update_route(orte_process_name_t *target,
ORTE_JOB_FAMILY(target->jobid),
(void**)&route_copy);
if (ORTE_SUCCESS == rc && NULL != route_copy) {
/* target already present - no need for duplicate entry */
return ORTE_SUCCESS;
/* target already present - update the route info
* in case it has changed
*/
*route_copy = *route;
rc = opal_hash_table_set_value_uint32(&vpid_wildcard_list,
ORTE_JOB_FAMILY(target->jobid), route_copy);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/* not there, so add the route FOR THE JOB FAMILY*/
@ -231,11 +351,63 @@ static int update_route(orte_process_name_t *target,
/* THIS CAME FROM OUR OWN JOB FAMILY... */
route_copy = malloc(sizeof(orte_process_name_t));
*route_copy = *route;
/* exact match */
/* treat vpid wildcards separately so they go onto the correct list */
if (target->jobid != ORTE_JOBID_WILDCARD &&
target->vpid == ORTE_VPID_WILDCARD) {
/* see if this target is already present - it will have a wildcard vpid,
* so we have to look for it on that list
*/
rc = opal_hash_table_get_value_uint32(&vpid_wildcard_list,
target->jobid,
(void**)&route_copy);
if (ORTE_SUCCESS == rc && NULL != route_copy) {
/* target already present - update the route info
* in case it has changed
*/
*route_copy = *route;
rc = opal_hash_table_set_value_uint32(&vpid_wildcard_list,
target->jobid, route_copy);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/* not already present, so let's add it */
route_copy = malloc(sizeof(orte_process_name_t));
*route_copy = *route;
rc = opal_hash_table_set_value_uint32(&vpid_wildcard_list,
target->jobid, route_copy);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/* check for an exact match */
if (target->jobid != ORTE_JOBID_WILDCARD &&
target->vpid != ORTE_VPID_WILDCARD) {
/* see if this route already exists in our table */
rc = opal_hash_table_get_value_uint64(&peer_list,
orte_util_hash_name(target),
(void**)&route_copy);
if (ORTE_SUCCESS == rc && NULL != route_copy) {
/* target already present - update the route info
* in case it has changed
*/
*route_copy = *route;
rc = opal_hash_table_set_value_uint64(&peer_list,
orte_util_hash_name(target), route_copy);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/* not present - add it to the table */
route_copy = malloc(sizeof(orte_process_name_t));
*route_copy = *route;
rc = opal_hash_table_set_value_uint64(&peer_list,
orte_util_hash_name(target), route_copy);
if (ORTE_SUCCESS != rc) {
@ -244,18 +416,9 @@ static int update_route(orte_process_name_t *target,
return rc;
}
/* vpid wildcard */
if (target->jobid != ORTE_JOBID_WILDCARD &&
target->vpid == ORTE_VPID_WILDCARD) {
rc = opal_hash_table_set_value_uint32(&vpid_wildcard_list,
target->jobid, route_copy);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
free(route_copy);
/* this must be a process that doesn't match any of the
* prior conditions - sorry!
*/
return ORTE_ERR_NOT_SUPPORTED;
}

Просмотреть файл

@ -45,6 +45,7 @@ static orte_process_name_t *lifeline=NULL;
/* API functions */
static int init(void);
static int finalize(void);
static int delete_route(orte_process_name_t *proc);
static int update_route(orte_process_name_t *target,
orte_process_name_t *route);
static orte_process_name_t get_route(orte_process_name_t *target);
@ -63,6 +64,7 @@ static int direct_ft_event(int state);
orte_routed_module_t orte_routed_direct_module = {
init,
finalize,
delete_route,
update_route,
get_route,
init_routes,
@ -141,6 +143,51 @@ static int finalize(void)
}
static int delete_route(orte_process_name_t *proc)
{
orte_process_name_t *route_copy;
int rc;
if (proc->jobid == ORTE_JOBID_INVALID ||
proc->vpid == ORTE_VPID_INVALID) {
return ORTE_ERR_BAD_PARAM;
}
/* if this isn't from a different job family, then there is
* nothing for us to do as all routes are direct - nothing
* is in the routing table
*/
if (ORTE_JOB_FAMILY(proc->jobid) == ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
return ORTE_SUCCESS;
}
/* if I am -not- the HNP or a tool, then I will automatically route
* anything to this job family via my HNP - so nothing to do
* here since nothing is in my routing table
*/
if (!orte_process_info.hnp && !orte_process_info.tool) {
return ORTE_SUCCESS;
}
/* must need to look it up */
rc = opal_hash_table_get_value_uint32(&peer_list,
ORTE_JOB_FAMILY(proc->jobid),
(void**)&route_copy);
if (ORTE_SUCCESS == rc && NULL != route_copy) {
/* proc is present - remove the data */
free(route_copy);
rc = opal_hash_table_remove_value_uint32(&peer_list,
ORTE_JOB_FAMILY(proc->jobid));
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/* wasn't here - nothing to do */
return ORTE_SUCCESS;
}
static int update_route(orte_process_name_t *target,
orte_process_name_t *route)
{
@ -176,8 +223,16 @@ static int update_route(orte_process_name_t *target,
ORTE_JOB_FAMILY(target->jobid),
(void**)&route_copy);
if (ORTE_SUCCESS == rc && NULL != route_copy) {
/* target already present - no need for duplicate entry */
return ORTE_SUCCESS;
/* target already present - update the route info
* in case it has changed
*/
*route_copy = *route;
rc = opal_hash_table_set_value_uint32(&peer_list,
ORTE_JOB_FAMILY(target->jobid), route_copy);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/* not there, so add the route FOR THE JOB FAMILY*/

Просмотреть файл

@ -33,6 +33,7 @@
static int init(void);
static int finalize(void);
static int delete_route(orte_process_name_t *proc);
static int update_route(orte_process_name_t *target,
orte_process_name_t *route);
static orte_process_name_t get_route(orte_process_name_t *target);
@ -51,6 +52,7 @@ static int linear_ft_event(int state);
orte_routed_module_t orte_routed_linear_module = {
init,
finalize,
delete_route,
update_route,
get_route,
init_routes,
@ -146,6 +148,116 @@ static int finalize(void)
return ORTE_SUCCESS;
}
static int delete_route(orte_process_name_t *proc)
{
int rc;
orte_process_name_t *route_copy;
if (proc->jobid == ORTE_JOBID_INVALID ||
proc->vpid == ORTE_VPID_INVALID) {
return ORTE_ERR_BAD_PARAM;
}
/* if I am an application process, I don't have any routes
* so there is nothing for me to do
*/
if (!orte_process_info.hnp && !orte_process_info.daemon &&
!orte_process_info.tool) {
return ORTE_SUCCESS;
}
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
"%s routed_binomial_delete_route for %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc)));
/* if this is from a different job family, then I need to
* look it up appropriately
*/
if (ORTE_JOB_FAMILY(proc->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
/* if I am a daemon, then I will automatically route
* anything to this job family via my HNP - so I have nothing
* in my routing table and thus have nothing to do
* here, just return
*/
if (orte_process_info.daemon) {
return ORTE_SUCCESS;
}
/* see if this proc is present - it will have a wildcard vpid,
* so we have to look for it with that condition
*/
rc = opal_hash_table_get_value_uint32(&vpid_wildcard_list,
ORTE_JOB_FAMILY(proc->jobid),
(void**)&route_copy);
if (ORTE_SUCCESS == rc && NULL != route_copy) {
/* proc is present - remove the data */
free(route_copy);
rc = opal_hash_table_remove_value_uint32(&vpid_wildcard_list,
ORTE_JOB_FAMILY(proc->jobid));
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/* not present - nothing to do */
return ORTE_SUCCESS;
}
/* THIS CAME FROM OUR OWN JOB FAMILY... */
/* treat vpid wildcards separately so they go onto the correct list */
if (proc->jobid != ORTE_JOBID_WILDCARD &&
proc->vpid == ORTE_VPID_WILDCARD) {
/* see if this target is already present - it will have a wildcard vpid,
* so we have to look for it on that list
*/
rc = opal_hash_table_get_value_uint32(&vpid_wildcard_list,
proc->jobid,
(void**)&route_copy);
if (ORTE_SUCCESS == rc && NULL != route_copy) {
/* proc is present - remove the data */
free(route_copy);
rc = opal_hash_table_remove_value_uint32(&vpid_wildcard_list, proc->jobid);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/* not already present - nothing to do */
return ORTE_SUCCESS;
}
/* check for an exact match */
if (proc->jobid != ORTE_JOBID_WILDCARD &&
proc->vpid != ORTE_VPID_WILDCARD) {
/* see if this route already exists in our table */
rc = opal_hash_table_get_value_uint64(&peer_list,
orte_util_hash_name(proc),
(void**)&route_copy);
if (ORTE_SUCCESS == rc && NULL != route_copy) {
/* proc is present - remove the data */
free(route_copy);
rc = opal_hash_table_remove_value_uint64(&peer_list, orte_util_hash_name(proc));
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/* not already present - nothing to do */
return ORTE_SUCCESS;
}
/* this must be a process that doesn't match any of the
* prior conditions - sorry!
*/
return ORTE_ERR_NOT_SUPPORTED;
}
static int update_route(orte_process_name_t *target,
orte_process_name_t *route)
{
@ -198,8 +310,16 @@ static int update_route(orte_process_name_t *target,
ORTE_JOB_FAMILY(target->jobid),
(void**)&route_copy);
if (ORTE_SUCCESS == rc && NULL != route_copy) {
/* target already present - no need for duplicate entry */
return ORTE_SUCCESS;
/* target already present - update the route info
* in case it has changed
*/
*route_copy = *route;
rc = opal_hash_table_set_value_uint32(&vpid_wildcard_list,
ORTE_JOB_FAMILY(target->jobid), route_copy);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/* not there, so add the route FOR THE JOB FAMILY*/
@ -215,22 +335,31 @@ static int update_route(orte_process_name_t *target,
/* THIS CAME FROM OUR OWN JOB FAMILY... */
route_copy = malloc(sizeof(orte_process_name_t));
*route_copy = *route;
/* exact match */
if (target->jobid != ORTE_JOBID_WILDCARD &&
target->vpid != ORTE_VPID_WILDCARD) {
rc = opal_hash_table_set_value_uint64(&peer_list,
orte_util_hash_name(target), route_copy);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/* vpid wildcard */
/* treat vpid wildcards separately so they go onto the correct list */
if (target->jobid != ORTE_JOBID_WILDCARD &&
target->vpid == ORTE_VPID_WILDCARD) {
/* see if this target is already present - it will have a wildcard vpid,
* so we have to look for it on that list
*/
rc = opal_hash_table_get_value_uint32(&vpid_wildcard_list,
target->jobid,
(void**)&route_copy);
if (ORTE_SUCCESS == rc && NULL != route_copy) {
/* target already present - update the route info
* in case it has changed
*/
*route_copy = *route;
rc = opal_hash_table_set_value_uint32(&vpid_wildcard_list,
target->jobid, route_copy);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/* not already present, so let's add it */
route_copy = malloc(sizeof(orte_process_name_t));
*route_copy = *route;
rc = opal_hash_table_set_value_uint32(&vpid_wildcard_list,
target->jobid, route_copy);
if (ORTE_SUCCESS != rc) {
@ -238,8 +367,42 @@ static int update_route(orte_process_name_t *target,
}
return rc;
}
free(route_copy);
/* check for an exact match */
if (target->jobid != ORTE_JOBID_WILDCARD &&
target->vpid != ORTE_VPID_WILDCARD) {
/* see if this route already exists in our table */
rc = opal_hash_table_get_value_uint64(&peer_list,
orte_util_hash_name(target),
(void**)&route_copy);
if (ORTE_SUCCESS == rc && NULL != route_copy) {
/* target already present - update the route info
* in case it has changed
*/
*route_copy = *route;
rc = opal_hash_table_set_value_uint64(&peer_list,
orte_util_hash_name(target), route_copy);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/* not present - add it to the table */
route_copy = malloc(sizeof(orte_process_name_t));
*route_copy = *route;
rc = opal_hash_table_set_value_uint64(&peer_list,
orte_util_hash_name(target), route_copy);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/* this must be a process that doesn't match any of the
* prior conditions - sorry!
*/
return ORTE_ERR_NOT_SUPPORTED;
}

Просмотреть файл

@ -94,6 +94,15 @@ typedef int (*orte_routed_module_init_fn_t)(void);
typedef int (*orte_routed_module_finalize_fn_t)(void);
/*
* Delete route
*
* Delete the route to the specified proc from the routing table. Note
* that wildcards are supported to remove routes from, for example, all
* procs in a given job
*/
typedef int (*orte_routed_module_delete_route_fn_t)(orte_process_name_t *proc);
/**
* Update route table with new information
*
@ -229,6 +238,7 @@ struct orte_routed_module_t {
orte_routed_module_init_fn_t initialize;
orte_routed_module_finalize_fn_t finalize;
/* API functions */
orte_routed_module_delete_route_fn_t delete_route;
orte_routed_module_update_route_fn_t update_route;
orte_routed_module_get_route_fn_t get_route;
orte_routed_module_init_routes_fn_t init_routes;