2004-01-15 09:08:25 +03:00
|
|
|
/*
|
2005-11-05 22:57:48 +03:00
|
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
2004-11-28 23:09:25 +03:00
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
2005-03-24 15:43:37 +03:00
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
2004-11-22 04:38:40 +03:00
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
2004-01-15 09:08:25 +03:00
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
2004-06-07 19:33:53 +04:00
|
|
|
#include "ompi_config.h"
|
2004-01-15 09:08:25 +03:00
|
|
|
|
|
|
|
#include "mpi.h"
|
2005-08-16 20:17:52 +04:00
|
|
|
#include "opal/mca/base/base.h"
|
|
|
|
#include "opal/mca/paffinity/base/base.h"
|
2005-08-26 14:56:39 +04:00
|
|
|
#include "opal/mca/maffinity/base/base.h"
|
2005-08-16 20:17:52 +04:00
|
|
|
#include "opal/runtime/opal_progress.h"
|
|
|
|
#include "opal/threads/threads.h"
|
2005-07-04 06:38:44 +04:00
|
|
|
#include "opal/util/show_help.h"
|
|
|
|
#include "opal/util/stacktrace.h"
|
2005-07-03 16:07:29 +04:00
|
|
|
#include "opal/runtime/opal.h"
|
|
|
|
#include "opal/event/event.h"
|
2004-01-15 09:08:25 +03:00
|
|
|
|
2005-08-27 01:03:41 +04:00
|
|
|
#include "orte/util/sys_info.h"
|
2005-08-16 20:17:52 +04:00
|
|
|
#include "orte/util/proc_info.h"
|
|
|
|
#include "orte/util/session_dir.h"
|
|
|
|
#include "orte/runtime/runtime.h"
|
|
|
|
#include "orte/mca/oob/oob.h"
|
|
|
|
#include "orte/mca/oob/base/base.h"
|
|
|
|
#include "orte/mca/ns/ns.h"
|
2005-08-16 21:18:56 +04:00
|
|
|
#include "orte/mca/ns/base/base.h"
|
2005-08-16 20:17:52 +04:00
|
|
|
#include "orte/mca/gpr/gpr.h"
|
|
|
|
#include "orte/mca/rml/rml.h"
|
|
|
|
#include "orte/mca/schema/schema.h"
|
|
|
|
#include "orte/mca/soh/soh.h"
|
|
|
|
#include "orte/mca/soh/base/base.h"
|
|
|
|
#include "orte/mca/errmgr/errmgr.h"
|
|
|
|
|
2006-02-12 04:33:29 +03:00
|
|
|
#include "ompi/constants.h"
|
2005-11-22 18:24:39 +03:00
|
|
|
#include "ompi/mpi/f77/constants.h"
|
2005-08-16 20:17:52 +04:00
|
|
|
#include "ompi/runtime/mpiruntime.h"
|
|
|
|
#include "ompi/runtime/params.h"
|
|
|
|
#include "ompi/communicator/communicator.h"
|
|
|
|
#include "ompi/group/group.h"
|
|
|
|
#include "ompi/info/info.h"
|
|
|
|
#include "ompi/errhandler/errcode.h"
|
|
|
|
#include "ompi/errhandler/errclass.h"
|
|
|
|
#include "ompi/request/request.h"
|
|
|
|
#include "ompi/op/op.h"
|
|
|
|
#include "ompi/file/file.h"
|
|
|
|
#include "ompi/attribute/attribute.h"
|
|
|
|
#include "ompi/mca/allocator/base/base.h"
|
|
|
|
#include "ompi/mca/allocator/allocator.h"
|
2005-09-13 02:28:23 +04:00
|
|
|
#include "ompi/mca/rcache/base/base.h"
|
|
|
|
#include "ompi/mca/rcache/rcache.h"
|
2005-08-16 20:17:52 +04:00
|
|
|
#include "ompi/mca/mpool/base/base.h"
|
|
|
|
#include "ompi/mca/mpool/mpool.h"
|
|
|
|
#include "ompi/mca/pml/pml.h"
|
|
|
|
#include "ompi/mca/pml/base/pml_base_module_exchange.h"
|
|
|
|
#include "ompi/mca/pml/base/base.h"
|
2006-01-28 18:38:37 +03:00
|
|
|
#include "ompi/mca/osc/base/base.h"
|
2005-08-16 20:17:52 +04:00
|
|
|
#include "ompi/mca/coll/coll.h"
|
|
|
|
#include "ompi/mca/coll/base/base.h"
|
|
|
|
#include "ompi/mca/io/io.h"
|
|
|
|
#include "ompi/mca/io/base/base.h"
|
2005-09-01 00:35:15 +04:00
|
|
|
#include "ompi/debuggers/debuggers.h"
|
2005-09-13 00:25:01 +04:00
|
|
|
#include "ompi/proc/proc.h"
|
2005-08-16 20:17:52 +04:00
|
|
|
|
2004-02-05 04:52:56 +03:00
|
|
|
/*
|
|
|
|
* Global variables and symbols for the MPI layer
|
|
|
|
*/
|
|
|
|
|
2004-06-07 19:33:53 +04:00
|
|
|
bool ompi_mpi_initialized = false;
|
|
|
|
bool ompi_mpi_finalized = false;
|
2004-08-12 20:56:24 +04:00
|
|
|
|
2004-06-07 19:33:53 +04:00
|
|
|
bool ompi_mpi_thread_multiple = false;
|
|
|
|
int ompi_mpi_thread_requested = MPI_THREAD_SINGLE;
|
|
|
|
int ompi_mpi_thread_provided = MPI_THREAD_SINGLE;
|
2004-02-05 04:52:56 +03:00
|
|
|
|
2005-07-04 02:45:48 +04:00
|
|
|
opal_thread_t *ompi_mpi_main_thread = NULL;
|
2004-11-15 23:03:14 +03:00
|
|
|
|
2005-08-26 14:56:39 +04:00
|
|
|
bool ompi_mpi_maffinity_setup = false;
|
|
|
|
|
2005-11-22 18:24:39 +03:00
|
|
|
/*
|
|
|
|
* These variables are here, rather than under ompi/mpi/c/foo.c
|
|
|
|
* because it is not sufficient to have a .c file that only contains
|
|
|
|
* variables -- you must have a function that is invoked from
|
|
|
|
* elsewhere in the code to guarantee that all linkers will pull in
|
|
|
|
* the .o file from the library. Hence, although these are MPI
|
|
|
|
* constants, we might as well just define them here (i.e., in a file
|
|
|
|
* that already has a function that is guaranteed to be linked in,
|
|
|
|
* rather than make a new .c file with the constants and a
|
|
|
|
* corresponding dummy function that is invoked from this function).
|
|
|
|
*
|
|
|
|
* NOTE: See the big comment in ompi/mpi/f77/constants.h about why we
|
|
|
|
* have four symbols for each of the common blocks (e.g., the Fortran
|
|
|
|
* equivalent(s) of MPI_STATUS_IGNORE). Here, we can only have *one*
|
|
|
|
* value (not four). So the only thing we can do is make it equal to
|
|
|
|
* the fortran compiler convention that was selected at configure
|
|
|
|
* time. Note that this is also true for the value of .TRUE. from the
|
|
|
|
* Fortran compiler, so even though Open MPI supports all four Fortran
|
|
|
|
* symbol conventions, it can only support one convention for the two
|
|
|
|
* C constants (MPI_FORTRAN_STATUS[ES]_IGNORE) and only support one
|
|
|
|
* compiler for the value of .TRUE. Ugh!!
|
|
|
|
*
|
|
|
|
* Note that the casts here are ok -- we're *only* comparing pointer
|
|
|
|
* values (i.e., they'll never be de-referenced). The global symbols
|
|
|
|
* are actually of type (ompi_fortran_common_t) (for alignment
|
|
|
|
* issues), but MPI says that MPI_F_STATUS[ES]_IGNORE must be of type
|
|
|
|
* (MPI_Fint*). Hence, we have to cast to make compilers not
|
|
|
|
* complain.
|
|
|
|
*/
|
2005-11-23 00:52:14 +03:00
|
|
|
#if OMPI_WANT_F77_BINDINGS
|
|
|
|
# if OMPI_F77_CAPS
|
2005-11-22 18:24:39 +03:00
|
|
|
MPI_Fint *MPI_F_STATUS_IGNORE = (MPI_Fint*) &MPI_FORTRAN_STATUS_IGNORE;
|
|
|
|
MPI_Fint *MPI_F_STATUSES_IGNORE = (MPI_Fint*) &MPI_FORTRAN_STATUSES_IGNORE;
|
2005-11-23 00:52:14 +03:00
|
|
|
# elif OMPI_F77_PLAIN
|
2005-11-22 18:24:39 +03:00
|
|
|
MPI_Fint *MPI_F_STATUS_IGNORE = (MPI_Fint*) &mpi_fortran_status_ignore;
|
|
|
|
MPI_Fint *MPI_F_STATUSES_IGNORE = (MPI_Fint*) &mpi_fortran_statuses_ignore;
|
2005-11-23 00:52:14 +03:00
|
|
|
# elif OMPI_F77_SINGLE_UNDERSCORE
|
2005-11-22 18:24:39 +03:00
|
|
|
MPI_Fint *MPI_F_STATUS_IGNORE = (MPI_Fint*) &mpi_fortran_status_ignore_;
|
|
|
|
MPI_Fint *MPI_F_STATUSES_IGNORE = (MPI_Fint*) &mpi_fortran_statuses_ignore_;
|
2005-11-23 00:52:14 +03:00
|
|
|
# elif OMPI_F77_DOUBLE_UNDERSCORE
|
2005-11-22 18:24:39 +03:00
|
|
|
MPI_Fint *MPI_F_STATUS_IGNORE = (MPI_Fint*) &mpi_fortran_status_ignore__;
|
|
|
|
MPI_Fint *MPI_F_STATUSES_IGNORE = (MPI_Fint*) &mpi_fortran_statuses_ignore__;
|
2005-11-23 00:52:14 +03:00
|
|
|
# else
|
|
|
|
# error Unrecognized Fortran 77 name mangling scheme
|
|
|
|
# endif
|
2005-11-22 18:24:39 +03:00
|
|
|
#else
|
2005-11-23 00:52:14 +03:00
|
|
|
MPI_Fint *MPI_F_STATUS_IGNORE = NULL;
|
|
|
|
MPI_Fint *MPI_F_STATUSES_IGNORE = NULL;
|
|
|
|
#endif /* OMPI_WANT_F77_BINDINGS */
|
2005-11-22 18:24:39 +03:00
|
|
|
|
2005-08-26 14:56:39 +04:00
|
|
|
|
2004-06-07 19:33:53 +04:00
|
|
|
int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
2004-01-15 09:08:25 +03:00
|
|
|
{
|
2005-05-24 02:06:50 +04:00
|
|
|
int ret;
|
2004-06-07 19:33:53 +04:00
|
|
|
ompi_proc_t** procs;
|
2004-03-03 19:44:41 +03:00
|
|
|
size_t nprocs;
|
2004-11-17 05:30:07 +03:00
|
|
|
char *error = NULL;
|
2005-03-25 06:06:06 +03:00
|
|
|
bool compound_cmd = false;
|
2005-08-30 21:34:23 +04:00
|
|
|
|
2005-03-25 06:06:06 +03:00
|
|
|
/* Join the run-time environment - do the things that don't hit
|
|
|
|
the registry */
|
|
|
|
|
2005-05-23 18:50:52 +04:00
|
|
|
if (ORTE_SUCCESS != (ret = opal_init())) {
|
|
|
|
error = "ompi_mpi_init: opal_init failed";
|
|
|
|
goto error;
|
|
|
|
}
|
2005-08-16 20:17:52 +04:00
|
|
|
|
2005-08-27 00:13:35 +04:00
|
|
|
/* Setup ORTE stage 1, note that we are not infrastructre */
|
2005-05-23 18:50:52 +04:00
|
|
|
|
2005-08-27 00:13:35 +04:00
|
|
|
if (ORTE_SUCCESS != (ret = orte_init_stage1(false))) {
|
2005-03-23 20:50:12 +03:00
|
|
|
error = "ompi_mpi_init: orte_init_stage1 failed";
|
2005-08-16 20:17:52 +04:00
|
|
|
goto error;
|
Not as bad as this all may look. Tim and I made a significant change to the way we handle the startup of the oob, the seed, etc. We have made it backwards-compatible so that mpirun2 and singleton operations remain working. We had to adjust the name server and gpr as well, plus the process_info structure.
This also includes a checkpoint update to openmpi.c and ompid.c. I have re-enabled the ompid compile.
This latter raises an important point. The trunk compiles the programs like ompid just fine under Linux. It also does just fine for OSX under the dynamic libraries. However, we are seeing errors when compiling under OSX for the static case - the linker seems to have trouble resolving some variable names, even though linker diagnostics show the variables as being defined. Thus, a warning to Mac users that you may have to locally turn things off if you are trying to do static compiles. We ask, however, that you don't commit those changes that turn things off for everyone else - instead, let's try to figure out why the static compile is having a problem, and let everyone else continue to work.
Thanks
Ralph
This commit was SVN r2534.
2004-09-08 07:59:06 +04:00
|
|
|
}
|
|
|
|
|
2005-03-27 17:05:23 +04:00
|
|
|
/* If we are not the seed nor a singleton, AND we have not set the
|
|
|
|
orte_debug flag, then start recording the compound command that
|
|
|
|
starts us up. if we are the seed or a singleton, then don't do
|
|
|
|
this - the registry is local, so we'll just drive it
|
|
|
|
directly */
|
|
|
|
|
2005-03-23 20:50:12 +03:00
|
|
|
if (orte_process_info.seed ||
|
|
|
|
NULL == orte_process_info.ns_replica ||
|
|
|
|
orte_debug_flag) {
|
|
|
|
compound_cmd = false;
|
|
|
|
} else {
|
|
|
|
if (ORTE_SUCCESS != (ret = orte_gpr.begin_compound_cmd())) {
|
|
|
|
ORTE_ERROR_LOG(ret);
|
|
|
|
error = "ompi_mpi_init: orte_gpr.begin_compound_cmd failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
compound_cmd = true;
|
|
|
|
}
|
2004-09-23 18:35:02 +04:00
|
|
|
|
2005-03-23 20:50:12 +03:00
|
|
|
/* Now do the things that hit the registry */
|
2005-03-27 17:05:23 +04:00
|
|
|
|
2005-03-23 20:50:12 +03:00
|
|
|
if (ORTE_SUCCESS != (ret = orte_init_stage2())) {
|
|
|
|
ORTE_ERROR_LOG(ret);
|
|
|
|
error = "ompi_mpi_init: orte_init_stage2 failed";
|
|
|
|
goto error;
|
|
|
|
}
|
2005-03-27 17:05:23 +04:00
|
|
|
|
2004-08-14 05:56:05 +04:00
|
|
|
/* Once we've joined the RTE, see if any MCA parameters were
|
|
|
|
passed to the MPI level */
|
|
|
|
|
|
|
|
if (OMPI_SUCCESS != (ret = ompi_mpi_register_params())) {
|
2004-09-05 20:05:37 +04:00
|
|
|
error = "mca_mpi_register_params() failed";
|
|
|
|
goto error;
|
2004-08-14 05:56:05 +04:00
|
|
|
}
|
|
|
|
|
2005-08-16 20:17:52 +04:00
|
|
|
/* Setup process affinity */
|
|
|
|
|
|
|
|
if (ompi_mpi_paffinity_alone) {
|
|
|
|
int param, value;
|
2005-08-16 21:18:56 +04:00
|
|
|
bool set = false;
|
2005-08-16 20:17:52 +04:00
|
|
|
param = mca_base_param_find("mpi", NULL, "paffinity_processor");
|
|
|
|
if (param >= 0) {
|
2005-08-16 21:18:56 +04:00
|
|
|
if (OMPI_SUCCESS == mca_base_param_lookup_int(param, &value)) {
|
|
|
|
if (value >= 0) {
|
|
|
|
if (OPAL_SUCCESS == opal_paffinity_base_set(value)) {
|
|
|
|
set = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!set) {
|
|
|
|
char *vpid;
|
|
|
|
orte_ns_base_get_vpid_string(&vpid, orte_process_info.my_name);
|
|
|
|
opal_show_help("help-mpi-runtime",
|
|
|
|
"mpi_init:startup:paffinity-unavailable",
|
|
|
|
true, vpid);
|
|
|
|
free(vpid);
|
2005-08-16 20:17:52 +04:00
|
|
|
}
|
2005-08-26 14:56:39 +04:00
|
|
|
|
|
|
|
/* If we were able to set processor affinity, try setting
|
|
|
|
up memory affinity */
|
|
|
|
|
|
|
|
else {
|
|
|
|
if (OPAL_SUCCESS == opal_maffinity_base_open() &&
|
|
|
|
OPAL_SUCCESS == opal_maffinity_base_select()) {
|
|
|
|
ompi_mpi_maffinity_setup = true;
|
|
|
|
}
|
|
|
|
}
|
2005-08-16 20:17:52 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* initialize datatypes. This step should be done early as it will
|
|
|
|
* create the local convertor and local arch used in the proc
|
|
|
|
* init.
|
2005-07-29 04:15:26 +04:00
|
|
|
*/
|
|
|
|
if (OMPI_SUCCESS != (ret = ompi_ddt_init())) {
|
|
|
|
error = "ompi_ddt_init() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
2005-03-27 17:05:23 +04:00
|
|
|
|
2005-07-29 04:15:26 +04:00
|
|
|
/* Initialize OMPI procs */
|
2004-06-07 19:33:53 +04:00
|
|
|
if (OMPI_SUCCESS != (ret = ompi_proc_init())) {
|
2004-09-05 20:05:37 +04:00
|
|
|
error = "mca_proc_init() failed";
|
|
|
|
goto error;
|
2004-03-03 19:44:41 +03:00
|
|
|
}
|
|
|
|
|
2005-04-14 22:55:53 +04:00
|
|
|
/* initialize the progress engine for MPI functionality */
|
2005-07-04 01:57:43 +04:00
|
|
|
if (OMPI_SUCCESS != opal_progress_mpi_init()) {
|
|
|
|
error = "opal_progress_mpi_init() failed";
|
2005-04-14 22:55:53 +04:00
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2005-10-25 22:33:48 +04:00
|
|
|
|
|
|
|
/* initialize ops. This has to be done *after* ddt_init, but
|
|
|
|
befor mca_coll_base_open, since come collective modules
|
|
|
|
(e.g. the hierarchical) need them in the query function
|
|
|
|
*/
|
|
|
|
if (OMPI_SUCCESS != (ret = ompi_op_init())) {
|
|
|
|
error = "ompi_op_init() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2005-03-27 17:05:23 +04:00
|
|
|
/* Open up MPI-related MCA components */
|
2004-08-14 05:56:05 +04:00
|
|
|
|
2004-06-17 20:23:34 +04:00
|
|
|
if (OMPI_SUCCESS != (ret = mca_allocator_base_open())) {
|
2004-09-05 20:05:37 +04:00
|
|
|
error = "mca_allocator_base_open() failed";
|
|
|
|
goto error;
|
2004-06-17 20:23:34 +04:00
|
|
|
}
|
2005-09-13 02:28:23 +04:00
|
|
|
if (OMPI_SUCCESS != (ret = mca_rcache_base_open())) {
|
|
|
|
error = "mca_rcache_base_open() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
2004-06-17 20:23:34 +04:00
|
|
|
if (OMPI_SUCCESS != (ret = mca_mpool_base_open())) {
|
2004-09-05 20:05:37 +04:00
|
|
|
error = "mca_mpool_base_open() failed";
|
|
|
|
goto error;
|
2004-06-17 20:23:34 +04:00
|
|
|
}
|
2004-06-07 19:33:53 +04:00
|
|
|
if (OMPI_SUCCESS != (ret = mca_pml_base_open())) {
|
2004-09-05 20:05:37 +04:00
|
|
|
error = "mca_pml_base_open() failed";
|
|
|
|
goto error;
|
2004-02-13 16:56:55 +03:00
|
|
|
}
|
2004-06-07 19:33:53 +04:00
|
|
|
if (OMPI_SUCCESS != (ret = mca_coll_base_open())) {
|
2004-09-05 20:05:37 +04:00
|
|
|
error = "mca_coll_base_open() failed";
|
|
|
|
goto error;
|
2004-02-13 16:56:55 +03:00
|
|
|
}
|
2004-01-30 06:59:39 +03:00
|
|
|
|
2006-01-28 18:38:37 +03:00
|
|
|
if (OMPI_SUCCESS != (ret = ompi_osc_base_open())) {
|
|
|
|
error = "ompi_osc_base_open() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2005-03-27 17:05:23 +04:00
|
|
|
/* In order to reduce the common case for MPI apps (where they
|
|
|
|
don't use MPI-2 IO or MPI-1 topology functions), the io and
|
|
|
|
topo frameworks are initialized lazily, at the first use of
|
|
|
|
relevant functions (e.g., MPI_FILE_*, MPI_CART_*, MPI_GRAPH_*),
|
|
|
|
so they are not opened here. */
|
|
|
|
|
|
|
|
/* Initialize module exchange */
|
|
|
|
|
2005-08-05 22:03:30 +04:00
|
|
|
if (OMPI_SUCCESS != (ret = mca_pml_base_modex_init())) {
|
|
|
|
error = "mca_pml_base_modex_init() failed";
|
2004-10-15 00:50:06 +04:00
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2005-03-27 17:05:23 +04:00
|
|
|
/* Select which MPI components to use */
|
2004-01-30 06:59:39 +03:00
|
|
|
|
2004-06-07 19:33:53 +04:00
|
|
|
if (OMPI_SUCCESS !=
|
2005-03-27 17:05:23 +04:00
|
|
|
(ret = mca_mpool_base_init(OMPI_ENABLE_PROGRESS_THREADS,
|
|
|
|
OMPI_ENABLE_MPI_THREADS))) {
|
|
|
|
error = "mca_mpool_base_init() failed";
|
2004-09-05 20:05:37 +04:00
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2005-03-27 17:05:23 +04:00
|
|
|
if (OMPI_SUCCESS !=
|
|
|
|
(ret = mca_pml_base_select(OMPI_ENABLE_PROGRESS_THREADS,
|
|
|
|
OMPI_ENABLE_MPI_THREADS))) {
|
|
|
|
error = "mca_pml_base_select() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (OMPI_SUCCESS !=
|
|
|
|
(ret = mca_coll_base_find_available(OMPI_ENABLE_PROGRESS_THREADS,
|
|
|
|
OMPI_ENABLE_MPI_THREADS))) {
|
|
|
|
error = "mca_coll_base_find_available() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2006-01-28 18:38:37 +03:00
|
|
|
if (OMPI_SUCCESS !=
|
|
|
|
(ret = ompi_osc_base_find_available(OMPI_ENABLE_PROGRESS_THREADS,
|
|
|
|
OMPI_ENABLE_MPI_THREADS))) {
|
|
|
|
error = "ompi_osc_base_find_available() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2005-03-27 17:05:23 +04:00
|
|
|
/* io and topo components are not selected here -- see comment
|
|
|
|
above about the io and topo frameworks being loaded lazily */
|
|
|
|
|
|
|
|
/* Initialize each MPI handle subsystem */
|
2004-10-08 21:12:36 +04:00
|
|
|
/* initialize requests */
|
|
|
|
if (OMPI_SUCCESS != (ret = ompi_request_init())) {
|
|
|
|
error = "ompi_request_init() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2004-09-05 20:05:37 +04:00
|
|
|
/* initialize info */
|
|
|
|
if (OMPI_SUCCESS != (ret = ompi_info_init())) {
|
|
|
|
error = "ompi_info_init() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
2004-10-08 21:12:36 +04:00
|
|
|
|
2004-09-05 20:05:37 +04:00
|
|
|
/* initialize error handlers */
|
|
|
|
if (OMPI_SUCCESS != (ret = ompi_errhandler_init())) {
|
|
|
|
error = "ompi_errhandler_init() failed";
|
|
|
|
goto error;
|
2004-02-13 16:56:55 +03:00
|
|
|
}
|
2004-01-30 06:59:39 +03:00
|
|
|
|
2004-09-05 20:05:37 +04:00
|
|
|
/* initialize error codes */
|
|
|
|
if (OMPI_SUCCESS != (ret = ompi_mpi_errcode_init())) {
|
|
|
|
error = "ompi_mpi_errcode_init() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* initialize error classes */
|
|
|
|
if (OMPI_SUCCESS != (ret = ompi_errclass_init())) {
|
|
|
|
error = "ompi_errclass_init() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* initialize internal error codes */
|
|
|
|
if (OMPI_SUCCESS != (ret = ompi_errcode_intern_init())) {
|
|
|
|
error = "ompi_errcode_intern_init() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
2004-05-08 03:23:03 +04:00
|
|
|
|
2004-09-05 20:05:37 +04:00
|
|
|
/* initialize groups */
|
|
|
|
if (OMPI_SUCCESS != (ret = ompi_group_init())) {
|
|
|
|
error = "ompi_group_init() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* initialize communicators */
|
|
|
|
if (OMPI_SUCCESS != (ret = ompi_comm_init())) {
|
|
|
|
error = "ompi_comm_init() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* initialize file handles */
|
|
|
|
if (OMPI_SUCCESS != (ret = ompi_file_init())) {
|
|
|
|
error = "ompi_file_init() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2006-01-28 18:38:37 +03:00
|
|
|
/* initialize windows */
|
|
|
|
if (OMPI_SUCCESS != (ret = ompi_win_init())) {
|
|
|
|
error = "ompi_win_init() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2004-09-16 04:00:09 +04:00
|
|
|
/* initialize attribute meta-data structure for comm/win/dtype */
|
|
|
|
if (OMPI_SUCCESS != (ret = ompi_attr_init())) {
|
|
|
|
error = "ompi_attr_init() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
2004-09-05 20:05:37 +04:00
|
|
|
/* do module exchange */
|
2005-08-05 22:03:30 +04:00
|
|
|
if (OMPI_SUCCESS != (ret = mca_pml_base_modex_exchange())) {
|
|
|
|
error = "mca_pml_base_modex_exchange() failed";
|
2004-09-05 20:05:37 +04:00
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2005-05-06 21:00:06 +04:00
|
|
|
/* store our process info on registry */
|
|
|
|
if (ORTE_SUCCESS != (ret = orte_schema.store_my_info())) {
|
|
|
|
ORTE_ERROR_LOG(ret);
|
|
|
|
error = "could not store my info on registry";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2005-03-27 17:05:23 +04:00
|
|
|
/* Let system know we are at STG1 Barrier */
|
2005-03-14 23:57:21 +03:00
|
|
|
if (ORTE_SUCCESS != (ret = orte_soh.set_proc_soh(orte_process_info.my_name,
|
|
|
|
ORTE_PROC_STATE_AT_STG1, 0))) {
|
|
|
|
ORTE_ERROR_LOG(ret);
|
|
|
|
error = "set process state failed";
|
2004-11-20 22:12:43 +03:00
|
|
|
goto error;
|
2005-03-14 23:57:21 +03:00
|
|
|
}
|
2004-11-20 22:12:43 +03:00
|
|
|
|
2005-03-27 17:05:23 +04:00
|
|
|
/* if the compound command is operative, execute it */
|
|
|
|
|
2005-03-23 20:50:12 +03:00
|
|
|
if (compound_cmd) {
|
|
|
|
if (OMPI_SUCCESS != (ret = orte_gpr.exec_compound_cmd())) {
|
|
|
|
ORTE_ERROR_LOG(ret);
|
2005-08-16 20:17:52 +04:00
|
|
|
error = "ompi_rte_init: orte_gpr.exec_compound_cmd failed";
|
|
|
|
goto error;
|
2005-03-23 20:50:12 +03:00
|
|
|
}
|
2005-03-14 23:57:21 +03:00
|
|
|
}
|
2005-03-23 20:50:12 +03:00
|
|
|
|
2005-03-29 02:37:54 +04:00
|
|
|
/* FIRST BARRIER - WAIT FOR MSG FROM RMGR_PROC_STAGE_GATE_MGR TO ARRIVE */
|
2005-09-01 05:07:30 +04:00
|
|
|
if (ORTE_SUCCESS != (ret = orte_rml.xcast(NULL, NULL, 0, NULL,
|
|
|
|
orte_gpr.deliver_notify_msg, NULL))) {
|
2005-03-14 23:57:21 +03:00
|
|
|
ORTE_ERROR_LOG(ret);
|
2005-08-16 20:17:52 +04:00
|
|
|
error = "ompi_mpi_init: failed to see all procs register\n";
|
|
|
|
goto error;
|
2004-11-20 22:12:43 +03:00
|
|
|
}
|
|
|
|
|
2005-07-12 09:40:56 +04:00
|
|
|
/* start PTL's */
|
|
|
|
ret = MCA_PML_CALL(enable(true));
|
|
|
|
if( OMPI_SUCCESS != ret ) {
|
|
|
|
error = "PML control failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2004-09-05 20:05:37 +04:00
|
|
|
/* add all ompi_proc_t's to PML */
|
|
|
|
if (NULL == (procs = ompi_proc_world(&nprocs))) {
|
|
|
|
error = "ompi_proc_world() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
2005-07-12 09:40:56 +04:00
|
|
|
ret = MCA_PML_CALL(add_procs(procs, nprocs));
|
|
|
|
free(procs);
|
|
|
|
if( OMPI_SUCCESS != ret ) {
|
2004-09-05 20:05:37 +04:00
|
|
|
error = "PML add procs failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2005-05-24 02:06:50 +04:00
|
|
|
MCA_PML_CALL(add_comm(&ompi_mpi_comm_world));
|
|
|
|
MCA_PML_CALL(add_comm(&ompi_mpi_comm_self));
|
|
|
|
|
2005-03-27 17:05:23 +04:00
|
|
|
/* Figure out the final MPI thread levels. If we were not
|
|
|
|
compiled for support for MPI threads, then don't allow
|
|
|
|
MPI_THREAD_MULTIPLE. */
|
2004-02-05 04:52:56 +03:00
|
|
|
|
2004-06-07 19:33:53 +04:00
|
|
|
ompi_mpi_thread_requested = requested;
|
2005-04-14 22:55:53 +04:00
|
|
|
if (OMPI_HAVE_THREAD_SUPPORT == 0) {
|
2005-03-27 17:05:23 +04:00
|
|
|
ompi_mpi_thread_provided = *provided = MPI_THREAD_SINGLE;
|
|
|
|
ompi_mpi_main_thread = NULL;
|
|
|
|
} else if (OMPI_ENABLE_MPI_THREADS == 1) {
|
|
|
|
ompi_mpi_thread_provided = *provided = requested;
|
2005-07-04 02:45:48 +04:00
|
|
|
ompi_mpi_main_thread = opal_thread_get_self();
|
2005-03-27 17:05:23 +04:00
|
|
|
} else {
|
|
|
|
if (MPI_THREAD_MULTIPLE == requested) {
|
|
|
|
ompi_mpi_thread_provided = *provided = MPI_THREAD_SERIALIZED;
|
|
|
|
} else {
|
|
|
|
ompi_mpi_thread_provided = *provided = requested;
|
|
|
|
}
|
2005-07-04 02:45:48 +04:00
|
|
|
ompi_mpi_main_thread = opal_thread_get_self();
|
2005-03-27 17:05:23 +04:00
|
|
|
}
|
|
|
|
|
2004-06-29 04:02:25 +04:00
|
|
|
ompi_mpi_thread_multiple = (ompi_mpi_thread_provided ==
|
|
|
|
MPI_THREAD_MULTIPLE);
|
2005-03-27 17:05:23 +04:00
|
|
|
if (OMPI_ENABLE_PROGRESS_THREADS == 1 ||
|
|
|
|
OMPI_ENABLE_MPI_THREADS == 1) {
|
2005-07-04 02:45:48 +04:00
|
|
|
opal_set_using_threads(true);
|
2005-03-27 17:05:23 +04:00
|
|
|
}
|
2004-02-05 04:52:56 +03:00
|
|
|
|
2004-05-08 03:23:03 +04:00
|
|
|
/* Init coll for the comms */
|
|
|
|
|
2004-09-05 20:05:37 +04:00
|
|
|
if (OMPI_SUCCESS !=
|
|
|
|
(ret = mca_coll_base_comm_select(MPI_COMM_WORLD, NULL))) {
|
|
|
|
error = "mca_coll_base_comm_select(MPI_COMM_WORLD) failed";
|
|
|
|
goto error;
|
2004-07-13 16:35:43 +04:00
|
|
|
}
|
2004-05-08 03:23:03 +04:00
|
|
|
|
2005-11-12 06:47:17 +03:00
|
|
|
if (OMPI_SUCCESS !=
|
|
|
|
(ret = mca_coll_base_comm_select(MPI_COMM_SELF, NULL))) {
|
|
|
|
error = "mca_coll_base_comm_select(MPI_COMM_SELF) failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2005-04-09 23:34:32 +04:00
|
|
|
#if OMPI_ENABLE_PROGRESS_THREADS && 0
|
|
|
|
/* BWB - XXX - FIXME - is this actually correct? */
|
2005-01-13 18:30:49 +03:00
|
|
|
/* setup I/O forwarding */
|
2005-04-09 23:34:32 +04:00
|
|
|
if (orte_process_info.seed == false) {
|
2005-01-18 20:32:54 +03:00
|
|
|
if (OMPI_SUCCESS != (ret = ompi_mpi_init_io())) {
|
2005-08-16 20:17:52 +04:00
|
|
|
error = "ompi_rte_init_io failed";
|
|
|
|
goto error;
|
2005-01-18 20:32:54 +03:00
|
|
|
}
|
2005-01-13 18:30:49 +03:00
|
|
|
}
|
2004-11-18 02:37:49 +03:00
|
|
|
#endif
|
2004-05-08 03:23:03 +04:00
|
|
|
|
2005-07-09 01:01:37 +04:00
|
|
|
/*
|
|
|
|
* Dump all MCA parameters if requested
|
|
|
|
*/
|
|
|
|
if (ompi_mpi_show_mca_params) {
|
|
|
|
ompi_show_all_mca_params(ompi_mpi_comm_world.c_my_rank,
|
|
|
|
nprocs,
|
|
|
|
orte_system_info.nodename);
|
|
|
|
}
|
|
|
|
|
2005-03-27 17:05:23 +04:00
|
|
|
/* Let system know we are at STG2 Barrier */
|
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
if (ORTE_SUCCESS != (ret = orte_soh.set_proc_soh(orte_process_info.my_name,
|
|
|
|
ORTE_PROC_STATE_AT_STG2, 0))) {
|
|
|
|
ORTE_ERROR_LOG(ret);
|
|
|
|
error = "set process state failed";
|
|
|
|
goto error;
|
2005-02-21 21:56:30 +03:00
|
|
|
}
|
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
/* BWB - is this still needed? */
|
2005-02-16 20:42:07 +03:00
|
|
|
#if OMPI_ENABLE_PROGRESS_THREADS == 0
|
2005-07-04 03:09:55 +04:00
|
|
|
opal_progress_events(OPAL_EVLOOP_NONBLOCK);
|
2005-01-13 18:30:49 +03:00
|
|
|
#endif
|
|
|
|
|
2005-03-27 17:05:23 +04:00
|
|
|
/* Second barrier -- wait for message from
|
|
|
|
RMGR_PROC_STAGE_GATE_MGR to arrive */
|
|
|
|
|
2005-09-01 05:07:30 +04:00
|
|
|
if (ORTE_SUCCESS != (ret = orte_rml.xcast(NULL, NULL, 0, NULL,
|
|
|
|
orte_gpr.deliver_notify_msg, NULL))) {
|
2005-03-14 23:57:21 +03:00
|
|
|
ORTE_ERROR_LOG(ret);
|
|
|
|
error = "ompi_mpi_init: failed to see all procs register\n";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2004-09-29 16:41:55 +04:00
|
|
|
/* new very last step: check whether we have been spawned or not.
|
|
|
|
We introduce that at the very end, since we need collectives,
|
2005-03-27 17:05:23 +04:00
|
|
|
datatypes, ptls etc. up and running here.... */
|
|
|
|
|
2004-09-29 16:41:55 +04:00
|
|
|
if (OMPI_SUCCESS != (ret = ompi_comm_dyn_init())) {
|
|
|
|
error = "ompi_comm_dyn_init() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2004-09-05 20:05:37 +04:00
|
|
|
error:
|
|
|
|
if (ret != OMPI_SUCCESS) {
|
2005-10-18 00:47:44 +04:00
|
|
|
const char *err_msg = opal_strerror(ret);
|
2005-07-04 06:38:44 +04:00
|
|
|
opal_show_help("help-mpi-runtime",
|
2004-09-05 20:05:37 +04:00
|
|
|
"mpi_init:startup:internal-failure", true,
|
2005-10-18 00:47:44 +04:00
|
|
|
"MPI_INIT", "MPI_INIT", error, err_msg, ret);
|
2004-09-05 20:05:37 +04:00
|
|
|
return ret;
|
2004-06-29 04:02:25 +04:00
|
|
|
}
|
2004-05-08 03:23:03 +04:00
|
|
|
|
2005-03-30 05:40:26 +04:00
|
|
|
/* put the event library in "high performance MPI mode" */
|
2005-07-04 01:57:43 +04:00
|
|
|
if (OMPI_SUCCESS != opal_progress_mpi_enable()) {
|
|
|
|
error = "opal_progress_mpi_enable() failed";
|
2005-03-30 05:40:26 +04:00
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2005-03-27 17:05:23 +04:00
|
|
|
/* All done. Wasn't that simple? */
|
2004-02-05 04:52:56 +03:00
|
|
|
|
2004-06-07 19:33:53 +04:00
|
|
|
ompi_mpi_initialized = true;
|
2004-11-20 22:12:43 +03:00
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
if (orte_debug_flag) {
|
2005-08-16 20:17:52 +04:00
|
|
|
opal_output(0, "[%lu,%lu,%lu] ompi_mpi_init completed",
|
|
|
|
ORTE_NAME_ARGS(orte_process_info.my_name));
|
2004-11-20 22:12:43 +03:00
|
|
|
}
|
|
|
|
|
2005-09-20 19:22:15 +04:00
|
|
|
/* Do we need to wait for a TotalView-like debugger? */
|
|
|
|
ompi_wait_for_totalview();
|
|
|
|
|
2004-02-13 16:56:55 +03:00
|
|
|
return MPI_SUCCESS;
|
2004-01-15 09:08:25 +03:00
|
|
|
}
|