18b2dca51c
There is a binomial algorithm in the code (i.e., the HNP would send to a subset of the orteds, which then relay it on according to the typical log-2 algo), but that has a bug in it so the code won't let you select it even if you tried (and the mca param doesn't show, so you'd *really* have to try). This also involved a slight change to the oob.xcast API, so propagated that as required. Note: this has *only* been tested on rsh, SLURM, and Bproc environments (now that it has been transferred to the OMPI trunk, I'll need to re-test it [only done rsh so far]). It should work fine on any environment that uses the ORTE daemons - anywhere else, you are on your own... :-) Also, correct a mistake where the orte_debug_flag was declared an int, but the mca param was set as a bool. Move the storage for that flag to the orte/runtime/params.c and orte/runtime/params.h files appropriately. This commit was SVN r14475.
84 строки
2.9 KiB
C
84 строки
2.9 KiB
C
/*
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
* University Research and Technology
|
|
* Corporation. All rights reserved.
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
* of Tennessee Research Foundation. All rights
|
|
* reserved.
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
* University of Stuttgart. All rights reserved.
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#include "orte_config.h"
|
|
|
|
#include "orte/orte_constants.h"
|
|
|
|
#ifdef HAVE_SYS_TIME_H
|
|
#include <sys/time.h>
|
|
#endif
|
|
|
|
#include "opal/mca/base/mca_base_param.h"
|
|
|
|
#include "orte/runtime/runtime.h"
|
|
#include "orte/runtime/params.h"
|
|
|
|
/* globals used by RTE */
|
|
bool orte_debug_flag;
|
|
struct timeval orte_abort_timeout;
|
|
|
|
|
|
/*
|
|
* Whether we have completed orte_init or not
|
|
*/
|
|
bool orte_initialized = false;
|
|
|
|
int orte_register_params(bool infrastructure)
|
|
{
|
|
int value;
|
|
|
|
mca_base_param_reg_int_name("orte", "debug",
|
|
"Top-level ORTE debug switch",
|
|
false, false, (int)false, &value);
|
|
orte_debug_flag = OPAL_INT_TO_BOOL(value);
|
|
|
|
mca_base_param_reg_int_name("orte_debug", "daemons_file",
|
|
"Whether want stdout/stderr of daemons to go to a file or not",
|
|
false, false, (int)false, NULL);
|
|
|
|
mca_base_param_reg_int_name("orte", "no_daemonize",
|
|
"Whether to properly daemonize the ORTE daemons or not",
|
|
false, false, (int)false, NULL);
|
|
|
|
mca_base_param_reg_int_name("orte_debug", "daemons",
|
|
"Whether to debug the ORTE daemons or not",
|
|
false, false, (int)false, NULL);
|
|
|
|
mca_base_param_reg_int_name("orte", "infrastructure",
|
|
"Whether we are ORTE infrastructure or an ORTE application",
|
|
true, true, (int)infrastructure, NULL);
|
|
|
|
/* User-level debugger info string */
|
|
|
|
mca_base_param_reg_string_name("orte", "base_user_debugger",
|
|
"Sequence of user-level debuggers to search for in orterun",
|
|
false, false, "totalview @mpirun@ -a @mpirun_args@ : fxp @mpirun@ -a @mpirun_args@", NULL);
|
|
|
|
|
|
mca_base_param_reg_int_name("orte", "abort_timeout",
|
|
"Time to wait [in seconds] before giving up on aborting an ORTE operation",
|
|
false, false, 10, &value);
|
|
orte_abort_timeout.tv_sec = value;
|
|
orte_abort_timeout.tv_usec = 0;
|
|
|
|
/* All done */
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|