f139cfd28a
Add a new tm ess module that exploits this capability. Update the various plm modules to enable it - just a minor change reflecting an added param to a plm base function. Additional fixes included: 1. remove an erroneous cleanup of session directories in the tool finalize procedure - tools don't create session directories to begin with! 2. fix a duplicate free when attempting to execute a non-existent app 3. cleanup an typo in the comm utilities 4. fix comm_spawn - was perturbed by the changes in pack/unpack of orte_job_t to properly support orte-ps Been tested on slurm and tm machines, using all tests in orte/test/mpi. May run into issue with command line length on large jobs due to inclusion of node info to support static ports - will fix this next with addition of regexp generator to compress that info. This commit was SVN r21248.
98 строки
2.6 KiB
C
98 строки
2.6 KiB
C
/*
|
|
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
|
|
* University Research and Technology
|
|
* Corporation. All rights reserved.
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
* of Tennessee Research Foundation. All rights
|
|
* reserved.
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
* University of Stuttgart. All rights reserved.
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*
|
|
* These symbols are in a file by themselves to provide nice linker
|
|
* semantics. Since linkers generally pull in symbols by object
|
|
* files, keeping these symbols as the only symbols in this file
|
|
* prevents utility programs such as "ompi_info" from having to import
|
|
* entire components just to query their version and parameters.
|
|
*/
|
|
|
|
#include "orte_config.h"
|
|
#include "orte/constants.h"
|
|
|
|
#include "opal/mca/base/mca_base_param.h"
|
|
|
|
#include "orte/util/proc_info.h"
|
|
|
|
#include "orte/mca/ess/ess.h"
|
|
#include "orte/mca/ess/tm/ess_tm.h"
|
|
|
|
extern orte_ess_base_module_t orte_ess_tm_module;
|
|
|
|
/*
|
|
* Instantiate the public struct with all of our public information
|
|
* and pointers to our public functions in it
|
|
*/
|
|
orte_ess_base_component_t mca_ess_tm_component = {
|
|
{
|
|
ORTE_ESS_BASE_VERSION_2_0_0,
|
|
|
|
/* Component name and version */
|
|
"tm",
|
|
ORTE_MAJOR_VERSION,
|
|
ORTE_MINOR_VERSION,
|
|
ORTE_RELEASE_VERSION,
|
|
|
|
/* Component open and close functions */
|
|
orte_ess_tm_component_open,
|
|
orte_ess_tm_component_close,
|
|
orte_ess_tm_component_query
|
|
},
|
|
{
|
|
/* The component is checkpoint ready */
|
|
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
|
}
|
|
};
|
|
|
|
|
|
int
|
|
orte_ess_tm_component_open(void)
|
|
{
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
|
|
int orte_ess_tm_component_query(mca_base_module_t **module, int *priority)
|
|
{
|
|
/* Are we running under a TM job? Were
|
|
* we given a path back to the HNP? If the
|
|
* answer to both is "yes", then we were launched
|
|
* by mpirun in a tm world
|
|
*/
|
|
|
|
if (NULL != getenv("PBS_JOBID") &&
|
|
NULL != orte_process_info.my_hnp_uri) {
|
|
*priority = 30;
|
|
*module = (mca_base_module_t *)&orte_ess_tm_module;
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
/* Sadly, no */
|
|
*priority = -1;
|
|
*module = NULL;
|
|
return ORTE_ERROR;
|
|
}
|
|
|
|
|
|
int
|
|
orte_ess_tm_component_close(void)
|
|
{
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|