1
1

Make debruijn the default routed component. Update the radix component to "short-circuit" the tree when the job size permits

This commit was SVN r26580.
Этот коммит содержится в:
Ralph Castain 2012-06-08 00:35:36 +00:00
родитель ffcca0185a
Коммит 05122a2f93
5 изменённых файлов: 40 добавлений и 7 удалений

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* Copyright (c) 2007-2012 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2004-2008 The Trustees of Indiana University.
* All rights reserved.
@ -48,7 +48,10 @@ orte_routed_component_t mca_routed_binomial_component = {
static int orte_routed_binomial_component_query(mca_base_module_t **module, int *priority)
{
*priority = 70;
/* make this selected ONLY if the user directs as this module scales
* poorly compared to our other options
*/
*priority = 0;
*module = (mca_base_module_t *) &orte_routed_binomial_module;
return ORTE_SUCCESS;
}

Просмотреть файл

@ -49,7 +49,11 @@ orte_routed_component_t mca_routed_debruijn_component = {
static int orte_routed_debruijn_component_query(mca_base_module_t **module, int *priority)
{
*priority = 37; /* 37! */
/* Debruijn shall be our default, especially for large systems. For smaller
* systems, we will allow other options that have even fewer hops to
* support wireup
*/
*priority = 70;
*module = (mca_base_module_t *) &orte_routed_debruijn_module;
return ORTE_SUCCESS;
}

Просмотреть файл

@ -4,7 +4,7 @@
* Copyright (c) 2004-2011 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
@ -375,6 +375,9 @@ static orte_process_name_t get_route(orte_process_name_t *target)
/* THIS CAME FROM OUR OWN JOB FAMILY... */
/* if this is going to the HNP, then send it direct if we don't know
* how to get there - otherwise, send it via the tree
*/
if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, target)) {
if (!hnp_direct || orte_static_ports) {
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
@ -404,6 +407,10 @@ static orte_process_name_t get_route(orte_process_name_t *target)
if (ORTE_PROC_MY_NAME->vpid == daemon.vpid) {
ret = target;
goto found;
} else if (orte_process_info.num_procs < mca_routed_radix_component.max_connections) {
/* if the job is small enough, send direct to the target's daemon */
ret = &daemon;
goto found;
} else {
/* search routing tree for next step to that daemon */
for (item = opal_list_get_first(&my_children);

Просмотреть файл

@ -20,6 +20,7 @@ BEGIN_C_DECLS
typedef struct {
orte_routed_component_t super;
int radix;
orte_vpid_t max_connections;
} orte_routed_radix_component_t;
ORTE_MODULE_DECLSPEC extern orte_routed_radix_component_t mca_routed_radix_component;

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* Copyright (c) 2007-2012 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2004-2008 The Trustees of Indiana University.
* All rights reserved.
@ -15,7 +15,7 @@
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/util/sys_limits.h"
#include "orte/mca/routed/base/base.h"
#include "routed_radix.h"
@ -51,11 +51,29 @@ orte_routed_radix_component_t mca_routed_radix_component = {
static int orte_routed_radix_component_query(mca_base_module_t **module, int *priority)
{
mca_base_component_t *c = &mca_routed_radix_component.super.base_version;
int tmp;
mca_base_param_reg_int(c, NULL,
"Radix to be used for routed radix tree",
false, false, 32, &mca_routed_radix_component.radix);
*priority = 65;
mca_base_param_reg_int(c, "max_connections",
"Max number of connections a daemon may make before routing messages across tree",
false, false, -1, &tmp);
if (0 < tmp) {
mca_routed_radix_component.max_connections = tmp;
} else if (0 < opal_sys_limits.num_files) {
/* we really should compute the max connections as the total limit on file
* descriptors minus the radix minus the fd's needed for our local
* children. However, we don't have all that info until later, so just
* take a reasonable approximation here
*/
mca_routed_radix_component.max_connections = opal_sys_limits.num_files - mca_routed_radix_component.radix;
} else {
/* default to radix size for lack of anything better */
mca_routed_radix_component.max_connections = mca_routed_radix_component.radix;
}
*priority = 30;
*module = (mca_base_module_t *) &orte_routed_radix_module;
return ORTE_SUCCESS;
}