Make debruijn the default routed component. Update the radix component to "short-circuit" the tree when the job size permits
This commit was SVN r26580.
Этот коммит содержится в:
родитель
ffcca0185a
Коммит
05122a2f93
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Los Alamos National Security, LLC.
|
||||
* Copyright (c) 2007-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2004-2008 The Trustees of Indiana University.
|
||||
* All rights reserved.
|
||||
@ -48,7 +48,10 @@ orte_routed_component_t mca_routed_binomial_component = {
|
||||
|
||||
static int orte_routed_binomial_component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
*priority = 70;
|
||||
/* make this selected ONLY if the user directs as this module scales
|
||||
* poorly compared to our other options
|
||||
*/
|
||||
*priority = 0;
|
||||
*module = (mca_base_module_t *) &orte_routed_binomial_module;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -49,7 +49,11 @@ orte_routed_component_t mca_routed_debruijn_component = {
|
||||
|
||||
static int orte_routed_debruijn_component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
*priority = 37; /* 37! */
|
||||
/* Debruijn shall be our default, especially for large systems. For smaller
|
||||
* systems, we will allow other options that have even fewer hops to
|
||||
* support wireup
|
||||
*/
|
||||
*priority = 70;
|
||||
*module = (mca_base_module_t *) &orte_routed_debruijn_module;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -4,7 +4,7 @@
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -375,6 +375,9 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
||||
|
||||
/* THIS CAME FROM OUR OWN JOB FAMILY... */
|
||||
|
||||
/* if this is going to the HNP, then send it direct if we don't know
|
||||
* how to get there - otherwise, send it via the tree
|
||||
*/
|
||||
if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, target)) {
|
||||
if (!hnp_direct || orte_static_ports) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
|
||||
@ -404,6 +407,10 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
||||
if (ORTE_PROC_MY_NAME->vpid == daemon.vpid) {
|
||||
ret = target;
|
||||
goto found;
|
||||
} else if (orte_process_info.num_procs < mca_routed_radix_component.max_connections) {
|
||||
/* if the job is small enough, send direct to the target's daemon */
|
||||
ret = &daemon;
|
||||
goto found;
|
||||
} else {
|
||||
/* search routing tree for next step to that daemon */
|
||||
for (item = opal_list_get_first(&my_children);
|
||||
|
@ -20,6 +20,7 @@ BEGIN_C_DECLS
|
||||
typedef struct {
|
||||
orte_routed_component_t super;
|
||||
int radix;
|
||||
orte_vpid_t max_connections;
|
||||
} orte_routed_radix_component_t;
|
||||
ORTE_MODULE_DECLSPEC extern orte_routed_radix_component_t mca_routed_radix_component;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Los Alamos National Security, LLC.
|
||||
* Copyright (c) 2007-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2004-2008 The Trustees of Indiana University.
|
||||
* All rights reserved.
|
||||
@ -15,7 +15,7 @@
|
||||
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
|
||||
#include "opal/util/sys_limits.h"
|
||||
|
||||
#include "orte/mca/routed/base/base.h"
|
||||
#include "routed_radix.h"
|
||||
@ -51,11 +51,29 @@ orte_routed_radix_component_t mca_routed_radix_component = {
|
||||
static int orte_routed_radix_component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
mca_base_component_t *c = &mca_routed_radix_component.super.base_version;
|
||||
int tmp;
|
||||
|
||||
mca_base_param_reg_int(c, NULL,
|
||||
"Radix to be used for routed radix tree",
|
||||
false, false, 32, &mca_routed_radix_component.radix);
|
||||
*priority = 65;
|
||||
mca_base_param_reg_int(c, "max_connections",
|
||||
"Max number of connections a daemon may make before routing messages across tree",
|
||||
false, false, -1, &tmp);
|
||||
if (0 < tmp) {
|
||||
mca_routed_radix_component.max_connections = tmp;
|
||||
} else if (0 < opal_sys_limits.num_files) {
|
||||
/* we really should compute the max connections as the total limit on file
|
||||
* descriptors minus the radix minus the fd's needed for our local
|
||||
* children. However, we don't have all that info until later, so just
|
||||
* take a reasonable approximation here
|
||||
*/
|
||||
mca_routed_radix_component.max_connections = opal_sys_limits.num_files - mca_routed_radix_component.radix;
|
||||
} else {
|
||||
/* default to radix size for lack of anything better */
|
||||
mca_routed_radix_component.max_connections = mca_routed_radix_component.radix;
|
||||
}
|
||||
|
||||
*priority = 30;
|
||||
*module = (mca_base_module_t *) &orte_routed_radix_module;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user