1
1
openmpi/ompi/runtime/mpiruntime.h

231 строка
9.2 KiB
C
Исходник Обычный вид История

/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
2015-06-24 06:59:57 +03:00
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
2015-06-24 06:59:57 +03:00
* reserved.
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2009 University of Houston. All rights reserved.
When we abort during MPI_Init, we currently emit a totally incorrect error message stating that we were unable to aggregate error messages and cannot guarantee all other processes were killed. This simply isn't true IF the rte has been initialized. So track that the rte has reached that point, and only emit the new message if it is accurate. Note that we still generate a TON of output for a minor error: Ralphs-iMac:examples rhc$ mpirun -n 3 -mca btl sm ./hello_c -------------------------------------------------------------------------- At least one pair of MPI processes are unable to reach each other for MPI communications. This means that no Open MPI device has indicated that it can be used to communicate between these processes. This is an error; Open MPI requires that all MPI processes be able to reach each other. This error can sometimes be the result of forgetting to specify the "self" BTL. Process 1 ([[50239,1],2]) is on host: Ralphs-iMac Process 2 ([[50239,1],2]) is on host: Ralphs-iMac BTLs attempted: sm Your MPI job is now going to abort; sorry. -------------------------------------------------------------------------- *** An error occurred in MPI_Init *** on a NULL communicator *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort, *** and potentially your MPI job) *** An error occurred in MPI_Init *** on a NULL communicator *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort, *** and potentially your MPI job) *** An error occurred in MPI_Init *** on a NULL communicator *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort, *** and potentially your MPI job) -------------------------------------------------------------------------- MPI_INIT has failed because at least one MPI process is unreachable from another. This *usually* means that an underlying communication plugin -- such as a BTL or an MTL -- has either not loaded or not allowed itself to be used. Your MPI job will now abort. You may wish to try to narrow down the problem; * Check the output of ompi_info to see which BTL/MTL plugins are available. * Run your application with MPI_THREAD_SINGLE. * Set the MCA parameter btl_base_verbose to 100 (or mtl_base_verbose, if using MTL-based communications) to see exactly which communication plugins were considered and/or discarded. -------------------------------------------------------------------------- ------------------------------------------------------- Primary job terminated normally, but 1 process returned a non-zero exit code.. Per user-direction, the job has been aborted. ------------------------------------------------------- -------------------------------------------------------------------------- mpirun detected that one or more processes exited with non-zero status, thus causing the job to be terminated. The first process to do so was: Process name: [[50239,1],2] Exit code: 1 -------------------------------------------------------------------------- [Ralphs-iMac.local:23227] 2 more processes have sent help message help-mca-bml-r2.txt / unreachable proc [Ralphs-iMac.local:23227] Set MCA parameter "orte_base_help_aggregate" to 0 to see all help / error messages [Ralphs-iMac.local:23227] 2 more processes have sent help message help-mpi-runtime / mpi_init:startup:pml-add-procs-fail Ralphs-iMac:examples rhc$ Hopefully, we can agree on a way to reduce this verbage! This commit was SVN r31686. The following SVN revision numbers were found above: r2 --> open-mpi/ompi@58fdc188553052bc2e893ba28fb28fddbe78435a
2014-05-08 19:48:16 +04:00
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
2015-06-24 06:59:57 +03:00
*
* Additional copyrights may follow
2015-06-24 06:59:57 +03:00
*
* $HEADER$
*/
/**
* @file
*
* Interface into the MPI portion of the Open MPI Run Time Environment
*/
#ifndef OMPI_MPI_MPIRUNTIME_H
#define OMPI_MPI_MPIRUNTIME_H
#include "ompi_config.h"
#include "opal/class/opal_list.h"
#include "opal/class/opal_hash_table.h"
#include "opal/threads/mutex.h"
BEGIN_C_DECLS
/** forward type declaration */
struct ompi_communicator_t;
/** forward type declaration */
struct opal_thread_t;
== Highlights == 1. New mpifort wrapper compiler: you can utilize mpif.h, use mpi, and use mpi_f08 through this one wrapper compiler 1. mpif77 and mpif90 still exist, but are sym links to mpifort and may be removed in a future release 1. The mpi module has been re-implemented and is significantly "mo' bettah" 1. The mpi_f08 module offers many, many improvements over mpif.h and the mpi module This stuff is coming from a VERY long-lived mercurial branch (3 years!); it'll almost certainly take a few SVN commits and a bunch of testing before I get it correctly committed to the SVN trunk. == More details == Craig Rasmussen and I have been working with the MPI-3 Fortran WG and Fortran J3 committees for a long, long time to make a prototype MPI-3 Fortran bindings implementation. We think we're at a stable enough state to bring this stuff back to the trunk, with the goal of including it in OMPI v1.7. Special thanks go out to everyone who has been incredibly patient and helpful to us in this journey: * Rolf Rabenseifner/HLRS (mastermind/genius behind the entire MPI-3 Fortran effort) * The Fortran J3 committee * Tobias Burnus/gfortran * Tony !Goetz/Absoft * Terry !Donte/Oracle * ...and probably others whom I'm forgetting :-( There's still opportunities for optimization in the mpi_f08 implementation, but by and large, it is as far along as it can be until Fortran compilers start implementing the new F08 dimension(..) syntax. Note that gfortran is currently unsupported for the mpi_f08 module and the new mpi module. gfortran users will a) fall back to the same mpi module implementation that is in OMPI v1.5.x, and b) not get the new mpi_f08 module. The gfortran maintainers are actively working hard to add the necessary features to support both the new mpi_f08 module and the new mpi module implementations. This will take some time. As mentioned above, ompi/mpi/f77 and ompi/mpi/f90 no longer exist. All the fortran bindings implementations have been collated under ompi/mpi/fortran; each implementation has its own subdirectory: {{{ ompi/mpi/fortran/ base/ - glue code mpif-h/ - what used to be ompi/mpi/f77 use-mpi-tkr/ - what used to be ompi/mpi/f90 use-mpi-ignore-tkr/ - new mpi module implementation use-mpi-f08/ - new mpi_f08 module implementation }}} There's also a prototype 6-function-MPI implementation under use-mpi-f08-desc that emulates the new F08 dimension(..) syntax that isn't fully available in Fortran compilers yet. We did that to prove it to ourselves that it could be done once the compilers fully support it. This directory/implementation will likely eventually replace the use-mpi-f08 version. Other things that were done: * ompi_info grew a few new output fields to describe what level of Fortran support is included * Existing Fortran examples in examples/ were renamed; new mpi_f08 examples were added * The old Fortran MPI libraries were renamed: * libmpi_f77 -> libmpi_mpifh * libmpi_f90 -> libmpi_usempi * The configury for Fortran was consolidated and significantly slimmed down. Note that the F77 env variable is now IGNORED for configure; you should only use FC. Example: {{{ shell$ ./configure CC=icc CXX=icpc FC=ifort ... }}} All of this work was done in a Mercurial branch off the SVN trunk, and hosted at Bitbucket. This branch has got to be one of OMPI's longest-running branches. Its first commit was Tue Apr 07 23:01:46 2009 -0400 -- it's over 3 years old! :-) We think we've pulled in all relevant changes from the OMPI trunk (e.g., Fortran implementations of the new MPI-3 MPROBE stuff for mpif.h, use mpi, and use mpi_f08, and the recent Fujitsu Fortran patches). I anticipate some instability when we bring this stuff into the trunk, simply because it touches a LOT of code in the MPI layer in the OMPI code base. We'll try our best to make it as pain-free as possible, but please bear with us when it is committed. This commit was SVN r26283.
2012-04-18 19:57:29 +04:00
/** forward type declaration */
struct ompi_predefined_datatype_t;
/* Global variables and symbols for the MPI layer */
/** Mutex to protect all the _init and _finalize variables */
OMPI_DECLSPEC extern opal_mutex_t ompi_mpi_bootstrap_mutex;
/** Did MPI start to initialize? */
OMPI_DECLSPEC extern volatile bool ompi_mpi_init_started;
When we abort during MPI_Init, we currently emit a totally incorrect error message stating that we were unable to aggregate error messages and cannot guarantee all other processes were killed. This simply isn't true IF the rte has been initialized. So track that the rte has reached that point, and only emit the new message if it is accurate. Note that we still generate a TON of output for a minor error: Ralphs-iMac:examples rhc$ mpirun -n 3 -mca btl sm ./hello_c -------------------------------------------------------------------------- At least one pair of MPI processes are unable to reach each other for MPI communications. This means that no Open MPI device has indicated that it can be used to communicate between these processes. This is an error; Open MPI requires that all MPI processes be able to reach each other. This error can sometimes be the result of forgetting to specify the "self" BTL. Process 1 ([[50239,1],2]) is on host: Ralphs-iMac Process 2 ([[50239,1],2]) is on host: Ralphs-iMac BTLs attempted: sm Your MPI job is now going to abort; sorry. -------------------------------------------------------------------------- *** An error occurred in MPI_Init *** on a NULL communicator *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort, *** and potentially your MPI job) *** An error occurred in MPI_Init *** on a NULL communicator *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort, *** and potentially your MPI job) *** An error occurred in MPI_Init *** on a NULL communicator *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort, *** and potentially your MPI job) -------------------------------------------------------------------------- MPI_INIT has failed because at least one MPI process is unreachable from another. This *usually* means that an underlying communication plugin -- such as a BTL or an MTL -- has either not loaded or not allowed itself to be used. Your MPI job will now abort. You may wish to try to narrow down the problem; * Check the output of ompi_info to see which BTL/MTL plugins are available. * Run your application with MPI_THREAD_SINGLE. * Set the MCA parameter btl_base_verbose to 100 (or mtl_base_verbose, if using MTL-based communications) to see exactly which communication plugins were considered and/or discarded. -------------------------------------------------------------------------- ------------------------------------------------------- Primary job terminated normally, but 1 process returned a non-zero exit code.. Per user-direction, the job has been aborted. ------------------------------------------------------- -------------------------------------------------------------------------- mpirun detected that one or more processes exited with non-zero status, thus causing the job to be terminated. The first process to do so was: Process name: [[50239,1],2] Exit code: 1 -------------------------------------------------------------------------- [Ralphs-iMac.local:23227] 2 more processes have sent help message help-mca-bml-r2.txt / unreachable proc [Ralphs-iMac.local:23227] Set MCA parameter "orte_base_help_aggregate" to 0 to see all help / error messages [Ralphs-iMac.local:23227] 2 more processes have sent help message help-mpi-runtime / mpi_init:startup:pml-add-procs-fail Ralphs-iMac:examples rhc$ Hopefully, we can agree on a way to reduce this verbage! This commit was SVN r31686. The following SVN revision numbers were found above: r2 --> open-mpi/ompi@58fdc188553052bc2e893ba28fb28fddbe78435a
2014-05-08 19:48:16 +04:00
/** Has the RTE been initialized? */
OMPI_DECLSPEC extern volatile bool ompi_rte_initialized;
/** Is MPI fully initialized? */
OMPI_DECLSPEC extern volatile bool ompi_mpi_initialized;
/** Did MPI start to finalize? */
OMPI_DECLSPEC extern volatile bool ompi_mpi_finalize_started;
/** Has MPI been fully finalized? */
OMPI_DECLSPEC extern volatile bool ompi_mpi_finalized;
/** Do we have multiple threads? */
OMPI_DECLSPEC extern bool ompi_mpi_thread_multiple;
/** Thread level requested to \c MPI_Init_thread() */
OMPI_DECLSPEC extern int ompi_mpi_thread_requested;
/** Thread level provided by Open MPI */
OMPI_DECLSPEC extern int ompi_mpi_thread_provided;
/** Identifier of the main thread */
OMPI_DECLSPEC extern struct opal_thread_t *ompi_mpi_main_thread;
== Highlights == 1. New mpifort wrapper compiler: you can utilize mpif.h, use mpi, and use mpi_f08 through this one wrapper compiler 1. mpif77 and mpif90 still exist, but are sym links to mpifort and may be removed in a future release 1. The mpi module has been re-implemented and is significantly "mo' bettah" 1. The mpi_f08 module offers many, many improvements over mpif.h and the mpi module This stuff is coming from a VERY long-lived mercurial branch (3 years!); it'll almost certainly take a few SVN commits and a bunch of testing before I get it correctly committed to the SVN trunk. == More details == Craig Rasmussen and I have been working with the MPI-3 Fortran WG and Fortran J3 committees for a long, long time to make a prototype MPI-3 Fortran bindings implementation. We think we're at a stable enough state to bring this stuff back to the trunk, with the goal of including it in OMPI v1.7. Special thanks go out to everyone who has been incredibly patient and helpful to us in this journey: * Rolf Rabenseifner/HLRS (mastermind/genius behind the entire MPI-3 Fortran effort) * The Fortran J3 committee * Tobias Burnus/gfortran * Tony !Goetz/Absoft * Terry !Donte/Oracle * ...and probably others whom I'm forgetting :-( There's still opportunities for optimization in the mpi_f08 implementation, but by and large, it is as far along as it can be until Fortran compilers start implementing the new F08 dimension(..) syntax. Note that gfortran is currently unsupported for the mpi_f08 module and the new mpi module. gfortran users will a) fall back to the same mpi module implementation that is in OMPI v1.5.x, and b) not get the new mpi_f08 module. The gfortran maintainers are actively working hard to add the necessary features to support both the new mpi_f08 module and the new mpi module implementations. This will take some time. As mentioned above, ompi/mpi/f77 and ompi/mpi/f90 no longer exist. All the fortran bindings implementations have been collated under ompi/mpi/fortran; each implementation has its own subdirectory: {{{ ompi/mpi/fortran/ base/ - glue code mpif-h/ - what used to be ompi/mpi/f77 use-mpi-tkr/ - what used to be ompi/mpi/f90 use-mpi-ignore-tkr/ - new mpi module implementation use-mpi-f08/ - new mpi_f08 module implementation }}} There's also a prototype 6-function-MPI implementation under use-mpi-f08-desc that emulates the new F08 dimension(..) syntax that isn't fully available in Fortran compilers yet. We did that to prove it to ourselves that it could be done once the compilers fully support it. This directory/implementation will likely eventually replace the use-mpi-f08 version. Other things that were done: * ompi_info grew a few new output fields to describe what level of Fortran support is included * Existing Fortran examples in examples/ were renamed; new mpi_f08 examples were added * The old Fortran MPI libraries were renamed: * libmpi_f77 -> libmpi_mpifh * libmpi_f90 -> libmpi_usempi * The configury for Fortran was consolidated and significantly slimmed down. Note that the F77 env variable is now IGNORED for configure; you should only use FC. Example: {{{ shell$ ./configure CC=icc CXX=icpc FC=ifort ... }}} All of this work was done in a Mercurial branch off the SVN trunk, and hosted at Bitbucket. This branch has got to be one of OMPI's longest-running branches. Its first commit was Tue Apr 07 23:01:46 2009 -0400 -- it's over 3 years old! :-) We think we've pulled in all relevant changes from the OMPI trunk (e.g., Fortran implementations of the new MPI-3 MPROBE stuff for mpif.h, use mpi, and use mpi_f08, and the recent Fujitsu Fortran patches). I anticipate some instability when we bring this stuff into the trunk, simply because it touches a LOT of code in the MPI layer in the OMPI code base. We'll try our best to make it as pain-free as possible, but please bear with us when it is committed. This commit was SVN r26283.
2012-04-18 19:57:29 +04:00
/*
* These variables are for the MPI F03 bindings (F03 must bind Fortran
* varaiables to symbols; it cannot bind Fortran variables to the
* address of a C variable).
*/
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t *ompi_mpi_character_addr;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t *ompi_mpi_logical_addr;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t *ompi_mpi_logical1_addr;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t *ompi_mpi_logical2_addr;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t *ompi_mpi_logical4_addr;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t *ompi_mpi_logical8_addr;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t *ompi_mpi_integer_addr;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t *ompi_mpi_integer1_addr;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t *ompi_mpi_integer2_addr;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t *ompi_mpi_integer4_addr;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t *ompi_mpi_integer8_addr;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t *ompi_mpi_integer16_addr;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t *ompi_mpi_real_addr;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t *ompi_mpi_real4_addr;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t *ompi_mpi_real8_addr;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t *ompi_mpi_real16_addr;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t *ompi_mpi_dblprec_addr;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t *ompi_mpi_cplex_addr;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t *ompi_mpi_complex8_addr;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t *ompi_mpi_complex16_addr;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t *ompi_mpi_complex32_addr;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t *ompi_mpi_dblcplex_addr;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t *ompi_mpi_2real_addr;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t *ompi_mpi_2dblprec_addr;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t *ompi_mpi_2integer_addr;
OMPI_DECLSPEC extern struct ompi_status_public_t *ompi_mpi_status_ignore_addr;
OMPI_DECLSPEC extern struct ompi_status_public_t *ompi_mpi_statuses_ignore_addr;
/** Bitflags to be used for the modex exchange for the various thread
* levels. Required to support heterogeneous environments */
#define OMPI_THREADLEVEL_SINGLE_BF 0x00000001
#define OMPI_THREADLEVEL_FUNNELED_BF 0x00000002
#define OMPI_THREADLEVEL_SERIALIZED_BF 0x00000004
#define OMPI_THREADLEVEL_MULTIPLE_BF 0x00000008
#define OMPI_THREADLEVEL_SET_BITFLAG(threadlevelin,threadlevelout) { \
if ( MPI_THREAD_SINGLE == threadlevelin ) { \
threadlevelout |= OMPI_THREADLEVEL_SINGLE_BF; \
} else if ( MPI_THREAD_FUNNELED == threadlevelin ) { \
threadlevelout |= OMPI_THREADLEVEL_FUNNELED_BF; \
} else if ( MPI_THREAD_SERIALIZED == threadlevelin ) { \
threadlevelout |= OMPI_THREADLEVEL_SERIALIZED_BF; \
} else if ( MPI_THREAD_MULTIPLE == threadlevelin ) { \
threadlevelout |= OMPI_THREADLEVEL_MULTIPLE_BF; \
}}
#define OMPI_THREADLEVEL_IS_MULTIPLE(threadlevel) (threadlevel & OMPI_THREADLEVEL_MULTIPLE_BF)
/** In ompi_mpi_init: a list of all memory associated with calling
MPI_REGISTER_DATAREP so that we can free it during
MPI_FINALIZE. */
OMPI_DECLSPEC extern opal_list_t ompi_registered_datareps;
/** In ompi_mpi_init: the lists of Fortran 90 mathing datatypes.
* We need these lists and hashtables in order to satisfy the new
2015-06-24 06:59:57 +03:00
* requirements introduced in MPI 2-1 Sect. 10.2.5,
* MPI_TYPE_CREATE_F90_xxxx, page 295, line 47.
*/
extern opal_hash_table_t ompi_mpi_f90_integer_hashtable;
extern opal_hash_table_t ompi_mpi_f90_real_hashtable;
extern opal_hash_table_t ompi_mpi_f90_complex_hashtable;
/** version string of ompi */
OMPI_DECLSPEC extern const char ompi_version_string[];
/**
* Determine the thread level
*
* @param requested Thread support that is requested (IN)
* @param provided Thread support that is provided (OUT)
*/
void ompi_mpi_thread_level(int requested, int *provided);
/**
* Initialize the Open MPI MPI environment
*
* @param argc argc, typically from main() (IN)
* @param argv argv, typically from main() (IN)
* @param requested Thread support that is requested (IN)
* @param provided Thread support that is provided (OUT)
*
* @returns MPI_SUCCESS if successful
* @returns Error code if unsuccessful
*
* Intialize all support code needed for MPI applications. This
* function should only be called by MPI applications (including
* singletons). If this function is called, ompi_init() and
* ompi_rte_init() should *not* be called.
*
* It is permissable to pass in (0, NULL) for (argc, argv).
*/
int ompi_mpi_init(int argc, char **argv, int requested, int *provided);
/**
* Finalize the Open MPI MPI environment
*
* @returns MPI_SUCCESS if successful
* @returns Error code if unsuccessful
*
* Should be called after all MPI functionality is complete (usually
* during MPI_FINALIZE).
*/
int ompi_mpi_finalize(void);
/**
* Abort the processes of comm
*/
OMPI_DECLSPEC int ompi_mpi_abort(struct ompi_communicator_t* comm,
int errcode);
/**
* Do a preconnect of MPI connections (i.e., force connections to
* be made if they will be made).
*/
int ompi_init_preconnect_mpi(void);
/**
* Called to disable MPI dynamic process support. It should be called
* by transports and/or environments where MPI dynamic process
* functionality cannot be supported, and provide a string indicating
* why the functionality is disabled (because it will be shown in a
* user help message). For example, "<TRANSPORT> does not support MPI
* dynamic process functionality."
*
* This first-order functionality is fairly coarse-grained and simple:
* it presents a friendly show-help message to tell users why their
* MPI dynamic process functionality failed (vs. a potentially-cryptic
* network or hardware failure message).
*
* Someone may choose to implement a more fine-grained approach in the
* future.
*/
void ompi_mpi_dynamics_disable(const char *msg);
/**
* Called by the MPI dynamic process functions (e.g., MPI_Comm_spawn)
* to see if MPI dynamic process support is enabled. If it's not,
* this function will opal_show_help() a message and return false.
*/
bool ompi_mpi_dynamics_is_enabled(const char *function);
/**
* Clean up memory / resources by the MPI dynamics process
* functionality checker
*/
void ompi_mpi_dynamics_finalize(void);
END_C_DECLS
#endif /* OMPI_MPI_MPIRUNTIME_H */