1
1
openmpi/orte/mca/ess/slurm/ess_slurm_module.c
Ralph Castain 9613b3176c Effectively revert the orte_output system and return to direct use of opal_output at all levels. Retain the orte_show_help subsystem to allow aggregation of show_help messages at the HNP.
After much work by Jeff and myself, and quite a lot of discussion, it has become clear that we simply cannot resolve the infinite loops caused by RML-involved subsystems calling orte_output. The original rationale for the change to orte_output has also been reduced by shifting the output of XML-formatted vs human readable messages to an alternative approach.

I have globally replaced the orte_output/ORTE_OUTPUT calls in the code base, as well as the corresponding .h file name. I have test compiled and run this on the various environments within my reach, so hopefully this will prove minimally disruptive.

This commit was SVN r18619.
2008-06-09 14:53:58 +00:00

348 строки
10 KiB
C

/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#include "orte_config.h"
#include "orte/constants.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#include <ctype.h>
#include "opal/util/opal_environ.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/util/argv.h"
#include "opal/class/opal_pointer_array.h"
#include "orte/util/proc_info.h"
#include "orte/util/show_help.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/ess/ess.h"
#include "orte/mca/ess/base/base.h"
#include "orte/mca/ess/slurm/ess_slurm.h"
static char *get_slurm_nodename(int nodeid);
static int slurm_set_name(void);
static int rte_init(char flags);
static int rte_finalize(void);
static bool proc_is_local(orte_process_name_t *proc);
static char* proc_get_hostname(orte_process_name_t *proc);
static uint32_t proc_get_arch(orte_process_name_t *proc);
static uint8_t proc_get_local_rank(orte_process_name_t *proc);
static uint8_t proc_get_node_rank(orte_process_name_t *proc);
orte_ess_base_module_t orte_ess_slurm_module = {
rte_init,
rte_finalize,
orte_ess_base_app_abort,
proc_is_local,
proc_get_hostname,
proc_get_arch,
proc_get_local_rank,
proc_get_node_rank,
NULL /* ft_event */
};
static opal_pointer_array_t nidmap;
static orte_pmap_t *pmap;
static orte_vpid_t nprocs;
static int rte_init(char flags)
{
int ret;
char *error = NULL;
/* Start by getting a unique name */
slurm_set_name();
/* if I am a daemon, complete my setup using the
* default procedure
*/
if (orte_process_info.daemon) {
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) {
ORTE_ERROR_LOG(ret);
error = "orte_ess_base_orted_setup";
goto error;
}
} else if (orte_process_info.tool) {
/* otherwise, if I am a tool proc, use that procedure */
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
ORTE_ERROR_LOG(ret);
error = "orte_ess_base_tool_setup";
goto error;
}
} else {
/* otherwise, I must be an application process - use
* the default procedure to finish my setup
*/
if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup())) {
ORTE_ERROR_LOG(ret);
error = "orte_ess_base_app_setup";
goto error;
}
/* setup the nidmap arrays */
OBJ_CONSTRUCT(&nidmap, opal_pointer_array_t);
opal_pointer_array_init(&nidmap, 8, INT32_MAX, 8);
/* if one was provided, build my nidmap */
if (ORTE_SUCCESS != (ret = orte_ess_base_build_nidmap(orte_process_info.sync_buf,
&nidmap, &pmap, &nprocs))) {
ORTE_ERROR_LOG(ret);
error = "orte_ess_base_build_nidmap";
goto error;
}
}
return ORTE_SUCCESS;
error:
orte_show_help("help-orte-runtime.txt",
"orte_init:startup:internal-failure",
true, error, ORTE_ERROR_NAME(ret), ret);
return ret;
}
static int rte_finalize(void)
{
int ret;
orte_nid_t **nids;
int32_t i;
/* if I am a daemon, finalize using the default procedure */
if (orte_process_info.daemon) {
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) {
ORTE_ERROR_LOG(ret);
}
} else if (orte_process_info.tool) {
/* otherwise, if I am a tool proc, use that procedure */
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) {
ORTE_ERROR_LOG(ret);
}
} else {
/* otherwise, I must be an application process - deconstruct
* my nidmap arrays
*/
nids = (orte_nid_t**)nidmap.addr;
for (i=0; i < nidmap.size; i++) {
if (NULL == nids[i]) {
break;
}
if (NULL != nids[i]->name) {
free(nids[i]->name);
}
}
OBJ_DESTRUCT(&nidmap);
free(pmap);
/* use the default procedure to finish */
if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) {
ORTE_ERROR_LOG(ret);
}
}
return ret;
}
static bool proc_is_local(orte_process_name_t *proc)
{
if (pmap[proc->vpid].node == (int32_t)ORTE_PROC_MY_DAEMON->vpid) {
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:slurm: proc %s is LOCAL",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc)));
return true;
}
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:slurm: proc %s is REMOTE",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc)));
return false;
}
static char* proc_get_hostname(orte_process_name_t *proc)
{
int32_t node;
orte_nid_t **nids;
node = pmap[proc->vpid].node;
nids = (orte_nid_t**)nidmap.addr;
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:slurm: proc %s is on host %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
nids[node]->name));
return nids[node]->name;
}
static uint32_t proc_get_arch(orte_process_name_t *proc)
{
int32_t node;
orte_nid_t **nids;
node = pmap[proc->vpid].node;
nids = (orte_nid_t**)nidmap.addr;
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:slurm: proc %s has arch %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
nids[node]->arch));
return nids[node]->arch;
}
static uint8_t proc_get_local_rank(orte_process_name_t *proc)
{
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:slurm: proc %s has local rank %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
(int)pmap[proc->vpid].local_rank));
return pmap[proc->vpid].local_rank;
}
static uint8_t proc_get_node_rank(orte_process_name_t *proc)
{
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:slurm: proc %s has node rank %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
(int)pmap[proc->vpid].node_rank));
return pmap[proc->vpid].node_rank;
}
static int slurm_set_name(void)
{
int slurm_nodeid;
int rc;
int id;
orte_jobid_t jobid;
orte_vpid_t vpid;
char* jobid_string;
char* vpid_string;
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
"ess:slurm setting name"));
id = mca_base_param_register_string("orte", "ess", "jobid", NULL, NULL);
mca_base_param_lookup_string(id, &jobid_string);
if (NULL == jobid_string) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_jobid(&jobid, jobid_string))) {
ORTE_ERROR_LOG(rc);
return(rc);
}
id = mca_base_param_register_string("orte", "ess", "vpid", NULL, NULL);
mca_base_param_lookup_string(id, &vpid_string);
if (NULL == vpid_string) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_vpid(&vpid, vpid_string))) {
ORTE_ERROR_LOG(rc);
return(rc);
}
ORTE_PROC_MY_NAME->jobid = jobid;
/* fix up the vpid and make it the "real" vpid */
slurm_nodeid = atoi(getenv("SLURM_NODEID"));
ORTE_PROC_MY_NAME->vpid = vpid + slurm_nodeid;
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
"ess:slurm set name to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* fix up the system info nodename to match exactly what slurm returned */
if (NULL != orte_process_info.nodename) {
free(orte_process_info.nodename);
}
orte_process_info.nodename = get_slurm_nodename(slurm_nodeid);
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
"ess:slurm set nodename to %s",
orte_process_info.nodename));
/* get the non-name common environmental variables */
if (ORTE_SUCCESS != (rc = orte_ess_env_get())) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
}
static char *
get_slurm_nodename(int nodeid)
{
char **names = NULL;
char *slurm_nodelist;
char *ret;
slurm_nodelist = getenv("OMPI_MCA_orte_slurm_nodelist");
if (NULL == slurm_nodelist) {
return NULL;
}
/* split the node list into an argv array */
names = opal_argv_split(slurm_nodelist, ',');
if (NULL == names) { /* got an error */
return NULL;
}
/* check to see if there are enough entries */
if (nodeid > opal_argv_count(names)) {
return NULL;
}
ret = strdup(names[nodeid]);
opal_argv_free(names);
/* All done */
return ret;
}