1
1
openmpi/orte/mca/ess/tm/ess_tm_component.c
Ralph Castain f139cfd28a Fully enable the use of static ports to minimize connections on mpirun. When static ports are provided, daemons will automatically use routes defined by the selected routed module to callback to mpirun during startup, thus elimating the dedicated daemon-to-mpirun connection. Therefore, the total number of connections on mpirun will equal the fanout of the routed module (instead of #nodes in job).
Add a new tm ess module that exploits this capability.

Update the various plm modules to enable it - just a minor change reflecting an added param to a plm base function.

Additional fixes included:

1. remove an erroneous cleanup of session directories in the tool finalize procedure - tools don't create session directories to begin with!

2. fix a duplicate free when attempting to execute a non-existent app

3. cleanup an typo in the comm utilities 

4. fix comm_spawn - was perturbed by the changes in pack/unpack of orte_job_t to properly support orte-ps

Been tested on slurm and tm machines, using all tests in orte/test/mpi. May run into issue with command line length on large jobs due to inclusion of node info to support static ports - will fix this next with addition of regexp generator to compress that info.

This commit was SVN r21248.
2009-05-16 04:15:55 +00:00

98 строки
2.6 KiB
C

/*
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* These symbols are in a file by themselves to provide nice linker
* semantics. Since linkers generally pull in symbols by object
* files, keeping these symbols as the only symbols in this file
* prevents utility programs such as "ompi_info" from having to import
* entire components just to query their version and parameters.
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/util/proc_info.h"
#include "orte/mca/ess/ess.h"
#include "orte/mca/ess/tm/ess_tm.h"
extern orte_ess_base_module_t orte_ess_tm_module;
/*
* Instantiate the public struct with all of our public information
* and pointers to our public functions in it
*/
orte_ess_base_component_t mca_ess_tm_component = {
{
ORTE_ESS_BASE_VERSION_2_0_0,
/* Component name and version */
"tm",
ORTE_MAJOR_VERSION,
ORTE_MINOR_VERSION,
ORTE_RELEASE_VERSION,
/* Component open and close functions */
orte_ess_tm_component_open,
orte_ess_tm_component_close,
orte_ess_tm_component_query
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
};
int
orte_ess_tm_component_open(void)
{
return ORTE_SUCCESS;
}
int orte_ess_tm_component_query(mca_base_module_t **module, int *priority)
{
/* Are we running under a TM job? Were
* we given a path back to the HNP? If the
* answer to both is "yes", then we were launched
* by mpirun in a tm world
*/
if (NULL != getenv("PBS_JOBID") &&
NULL != orte_process_info.my_hnp_uri) {
*priority = 30;
*module = (mca_base_module_t *)&orte_ess_tm_module;
return ORTE_SUCCESS;
}
/* Sadly, no */
*priority = -1;
*module = NULL;
return ORTE_ERROR;
}
int
orte_ess_tm_component_close(void)
{
return ORTE_SUCCESS;
}