1
1
openmpi/ompi/mca/btl/base/btl_base_error.h
Ralph Castain 45e695928f As per the email discussion, revise the sparse handling of hostnames so that we avoid potential infinite loops while allowing large-scale users to improve their startup time:
* add a new MCA param orte_hostname_cutoff to specify the number of nodes at which we stop including hostnames. This defaults to INT_MAX => always include hostnames. If a value is given, then we will include hostnames for any allocation smaller than the given limit.

* remove ompi_proc_get_hostname. Replace all occurrences with a direct link to ompi_proc_t's proc_hostname, protected by appropriate "if NULL"

* modify the OMPI-ORTE integration component so that any call to modex_recv automatically loads the ompi_proc_t->proc_hostname field as well as returning the requested info. Thus, any process whose modex info you retrieve will automatically receive the hostname. Note that on-demand retrieval is still enabled - i.e., if we are running under direct launch with PMI, the hostname will be fetched upon first call to modex_recv, and then the ompi_proc_t->proc_hostname field will be loaded

* removed a stale MCA param "mpi_keep_peer_hostnames" that was no longer used anywhere in the code base

* added an envar lookup in ess/pmi for the number of nodes in the allocation. Sadly, PMI itself doesn't provide that info, so we have to get it a different way. Currently, we support PBS-based systems and SLURM - for any other, rank0 will emit a warning and we assume max number of daemons so we will always retain hostnames

This commit was SVN r29052.
2013-08-20 18:59:36 +00:00

100 строки
4.0 KiB
C

/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2011 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BTL_BASE_ERROR_H
#define MCA_BTL_BASE_ERROR_H
#include "ompi_config.h"
#include <errno.h>
#include <stdio.h>
#include "ompi/mca/rte/rte.h"
OMPI_DECLSPEC extern int mca_btl_base_verbose;
OMPI_DECLSPEC extern int mca_btl_base_err(const char*, ...) __opal_attribute_format__(__printf__, 1, 2);
OMPI_DECLSPEC extern int mca_btl_base_out(const char*, ...) __opal_attribute_format__(__printf__, 1, 2);
#define BTL_OUTPUT(args) \
do { \
mca_btl_base_out("[%s]%s[%s:%d:%s] ", \
ompi_process_info.nodename, \
OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), \
__FILE__, __LINE__, __func__); \
mca_btl_base_out args; \
mca_btl_base_out("\n"); \
} while(0);
#define BTL_ERROR(args) \
do { \
mca_btl_base_err("[%s]%s[%s:%d:%s] ", \
ompi_process_info.nodename, \
OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), \
__FILE__, __LINE__, __func__); \
mca_btl_base_err args; \
mca_btl_base_err("\n"); \
} while(0);
#define BTL_PEER_ERROR(proc, args) \
do { \
mca_btl_base_err("%s[%s:%d:%s] from %s ", \
OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), \
__FILE__, __LINE__, __func__, \
ompi_process_info.nodename); \
if(proc) { \
mca_btl_base_err("to: %s ", (NULL == proc->proc_hostname) ? \
"unknown" : proc->proc_hostname); \
} \
mca_btl_base_err args; \
mca_btl_base_err("\n"); \
} while(0);
#if OPAL_ENABLE_DEBUG
#define BTL_VERBOSE(args) \
do { \
if(mca_btl_base_verbose > 0) { \
mca_btl_base_err("[%s]%s[%s:%d:%s] ", \
ompi_process_info.nodename, \
OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), \
__FILE__, __LINE__, __func__); \
mca_btl_base_err args; \
mca_btl_base_err("\n"); \
} \
} while(0);
#else
#define BTL_VERBOSE(args)
#endif
#endif
BEGIN_C_DECLS
OMPI_DECLSPEC extern void mca_btl_base_error_no_nics(const char* transport,
const char* nic_name);
END_C_DECLS