2004-01-15 09:08:25 +03:00
|
|
|
/*
|
2004-11-22 04:38:40 +03:00
|
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
|
|
|
* All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
|
|
|
* All rights reserved.
|
2004-11-28 23:09:25 +03:00
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
2005-03-24 15:43:37 +03:00
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
2004-11-22 04:38:40 +03:00
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
2004-01-15 09:08:25 +03:00
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
2004-06-07 19:33:53 +04:00
|
|
|
#include "ompi_config.h"
|
2004-01-15 09:08:25 +03:00
|
|
|
|
|
|
|
#include "mpi.h"
|
2005-08-16 20:17:52 +04:00
|
|
|
#include "opal/mca/base/base.h"
|
|
|
|
#include "opal/mca/paffinity/base/base.h"
|
2005-08-26 14:56:39 +04:00
|
|
|
#include "opal/mca/maffinity/base/base.h"
|
2005-08-16 20:17:52 +04:00
|
|
|
#include "opal/runtime/opal_progress.h"
|
|
|
|
#include "opal/threads/threads.h"
|
2005-07-04 06:38:44 +04:00
|
|
|
#include "opal/util/show_help.h"
|
|
|
|
#include "opal/util/stacktrace.h"
|
2005-07-03 16:07:29 +04:00
|
|
|
#include "opal/runtime/opal.h"
|
|
|
|
#include "opal/event/event.h"
|
2004-01-15 09:08:25 +03:00
|
|
|
|
2005-08-27 01:03:41 +04:00
|
|
|
#include "orte/util/sys_info.h"
|
2005-08-16 20:17:52 +04:00
|
|
|
#include "orte/util/proc_info.h"
|
|
|
|
#include "orte/util/session_dir.h"
|
|
|
|
#include "orte/runtime/runtime.h"
|
|
|
|
#include "orte/mca/oob/oob.h"
|
|
|
|
#include "orte/mca/oob/base/base.h"
|
|
|
|
#include "orte/mca/ns/ns.h"
|
2005-08-16 21:18:56 +04:00
|
|
|
#include "orte/mca/ns/base/base.h"
|
2005-08-16 20:17:52 +04:00
|
|
|
#include "orte/mca/gpr/gpr.h"
|
|
|
|
#include "orte/mca/rml/rml.h"
|
|
|
|
#include "orte/mca/schema/schema.h"
|
|
|
|
#include "orte/mca/soh/soh.h"
|
|
|
|
#include "orte/mca/soh/base/base.h"
|
|
|
|
#include "orte/mca/errmgr/errmgr.h"
|
|
|
|
|
|
|
|
#include "ompi/include/constants.h"
|
|
|
|
#include "ompi/runtime/mpiruntime.h"
|
|
|
|
#include "ompi/runtime/params.h"
|
|
|
|
#include "ompi/communicator/communicator.h"
|
|
|
|
#include "ompi/group/group.h"
|
|
|
|
#include "ompi/info/info.h"
|
|
|
|
#include "ompi/errhandler/errcode.h"
|
|
|
|
#include "ompi/errhandler/errclass.h"
|
|
|
|
#include "ompi/request/request.h"
|
|
|
|
#include "ompi/op/op.h"
|
|
|
|
#include "ompi/file/file.h"
|
|
|
|
#include "ompi/attribute/attribute.h"
|
|
|
|
#include "ompi/mca/allocator/base/base.h"
|
|
|
|
#include "ompi/mca/allocator/allocator.h"
|
|
|
|
#include "ompi/mca/mpool/base/base.h"
|
|
|
|
#include "ompi/mca/mpool/mpool.h"
|
|
|
|
#include "ompi/mca/pml/pml.h"
|
|
|
|
#include "ompi/mca/pml/base/pml_base_module_exchange.h"
|
|
|
|
#include "ompi/mca/pml/base/base.h"
|
|
|
|
#include "ompi/mca/coll/coll.h"
|
|
|
|
#include "ompi/mca/coll/base/base.h"
|
|
|
|
#include "ompi/mca/io/io.h"
|
|
|
|
#include "ompi/mca/io/base/base.h"
|
|
|
|
|
2004-02-05 04:52:56 +03:00
|
|
|
/*
|
|
|
|
* Global variables and symbols for the MPI layer
|
|
|
|
*/
|
|
|
|
|
2004-06-07 19:33:53 +04:00
|
|
|
bool ompi_mpi_initialized = false;
|
|
|
|
bool ompi_mpi_finalized = false;
|
2004-08-12 20:56:24 +04:00
|
|
|
|
2004-06-07 19:33:53 +04:00
|
|
|
bool ompi_mpi_thread_multiple = false;
|
|
|
|
int ompi_mpi_thread_requested = MPI_THREAD_SINGLE;
|
|
|
|
int ompi_mpi_thread_provided = MPI_THREAD_SINGLE;
|
2004-02-05 04:52:56 +03:00
|
|
|
|
2005-07-04 02:45:48 +04:00
|
|
|
opal_thread_t *ompi_mpi_main_thread = NULL;
|
2004-11-15 23:03:14 +03:00
|
|
|
|
2005-08-26 14:56:39 +04:00
|
|
|
bool ompi_mpi_maffinity_setup = false;
|
|
|
|
|
2005-08-30 21:34:23 +04:00
|
|
|
/*
|
|
|
|
* Variables for TotalView-like debuggers
|
|
|
|
*/
|
|
|
|
int MPIR_being_debugged = 0;
|
|
|
|
volatile int MPIR_debug_gate = 0;
|
|
|
|
volatile int MPIR_debug_state = 0;
|
2005-08-31 04:21:55 +04:00
|
|
|
struct MPIR_PROCDESC {
|
|
|
|
char *host_name; /* something that can be passed to inet_addr */
|
|
|
|
char *executable_name; /* name of binary */
|
|
|
|
int pid; /* process pid */
|
|
|
|
};
|
|
|
|
struct MPIR_PROCDESC *MPIR_proctable = NULL;
|
2005-08-30 21:34:23 +04:00
|
|
|
|
2005-08-26 14:56:39 +04:00
|
|
|
|
2004-06-07 19:33:53 +04:00
|
|
|
int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
2004-01-15 09:08:25 +03:00
|
|
|
{
|
2005-05-24 02:06:50 +04:00
|
|
|
int ret;
|
2004-06-07 19:33:53 +04:00
|
|
|
ompi_proc_t** procs;
|
2004-03-03 19:44:41 +03:00
|
|
|
size_t nprocs;
|
2004-11-17 05:30:07 +03:00
|
|
|
char *error = NULL;
|
2005-03-25 06:06:06 +03:00
|
|
|
bool compound_cmd = false;
|
2005-08-30 21:34:23 +04:00
|
|
|
int wait_for_totalview;
|
|
|
|
|
2005-03-25 06:06:06 +03:00
|
|
|
/* Join the run-time environment - do the things that don't hit
|
|
|
|
the registry */
|
|
|
|
|
2005-05-23 18:50:52 +04:00
|
|
|
if (ORTE_SUCCESS != (ret = opal_init())) {
|
|
|
|
error = "ompi_mpi_init: opal_init failed";
|
|
|
|
goto error;
|
|
|
|
}
|
2005-08-16 20:17:52 +04:00
|
|
|
|
2005-08-27 00:13:35 +04:00
|
|
|
/* Setup ORTE stage 1, note that we are not infrastructre */
|
2005-05-23 18:50:52 +04:00
|
|
|
|
2005-08-27 00:13:35 +04:00
|
|
|
if (ORTE_SUCCESS != (ret = orte_init_stage1(false))) {
|
2005-03-23 20:50:12 +03:00
|
|
|
error = "ompi_mpi_init: orte_init_stage1 failed";
|
2005-08-16 20:17:52 +04:00
|
|
|
goto error;
|
Not as bad as this all may look. Tim and I made a significant change to the way we handle the startup of the oob, the seed, etc. We have made it backwards-compatible so that mpirun2 and singleton operations remain working. We had to adjust the name server and gpr as well, plus the process_info structure.
This also includes a checkpoint update to openmpi.c and ompid.c. I have re-enabled the ompid compile.
This latter raises an important point. The trunk compiles the programs like ompid just fine under Linux. It also does just fine for OSX under the dynamic libraries. However, we are seeing errors when compiling under OSX for the static case - the linker seems to have trouble resolving some variable names, even though linker diagnostics show the variables as being defined. Thus, a warning to Mac users that you may have to locally turn things off if you are trying to do static compiles. We ask, however, that you don't commit those changes that turn things off for everyone else - instead, let's try to figure out why the static compile is having a problem, and let everyone else continue to work.
Thanks
Ralph
This commit was SVN r2534.
2004-09-08 07:59:06 +04:00
|
|
|
}
|
|
|
|
|
2005-03-27 17:05:23 +04:00
|
|
|
/* If we are not the seed nor a singleton, AND we have not set the
|
|
|
|
orte_debug flag, then start recording the compound command that
|
|
|
|
starts us up. if we are the seed or a singleton, then don't do
|
|
|
|
this - the registry is local, so we'll just drive it
|
|
|
|
directly */
|
|
|
|
|
2005-03-23 20:50:12 +03:00
|
|
|
if (orte_process_info.seed ||
|
|
|
|
NULL == orte_process_info.ns_replica ||
|
|
|
|
orte_debug_flag) {
|
|
|
|
compound_cmd = false;
|
|
|
|
} else {
|
|
|
|
if (ORTE_SUCCESS != (ret = orte_gpr.begin_compound_cmd())) {
|
|
|
|
ORTE_ERROR_LOG(ret);
|
|
|
|
error = "ompi_mpi_init: orte_gpr.begin_compound_cmd failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
compound_cmd = true;
|
|
|
|
}
|
2004-09-23 18:35:02 +04:00
|
|
|
|
2005-03-23 20:50:12 +03:00
|
|
|
/* Now do the things that hit the registry */
|
2005-03-27 17:05:23 +04:00
|
|
|
|
2005-03-23 20:50:12 +03:00
|
|
|
if (ORTE_SUCCESS != (ret = orte_init_stage2())) {
|
|
|
|
ORTE_ERROR_LOG(ret);
|
|
|
|
error = "ompi_mpi_init: orte_init_stage2 failed";
|
|
|
|
goto error;
|
|
|
|
}
|
2005-03-27 17:05:23 +04:00
|
|
|
|
2004-08-14 05:56:05 +04:00
|
|
|
/* Once we've joined the RTE, see if any MCA parameters were
|
|
|
|
passed to the MPI level */
|
|
|
|
|
|
|
|
if (OMPI_SUCCESS != (ret = ompi_mpi_register_params())) {
|
2004-09-05 20:05:37 +04:00
|
|
|
error = "mca_mpi_register_params() failed";
|
|
|
|
goto error;
|
2004-08-14 05:56:05 +04:00
|
|
|
}
|
|
|
|
|
2005-08-30 21:34:23 +04:00
|
|
|
/* Do we need to wait for a TotalView-like debugger? */
|
|
|
|
|
|
|
|
mca_base_param_reg_int_name("orte", "mpi_wait_for_totalview",
|
|
|
|
"Whether the MPI application should wait for a debugger or not",
|
|
|
|
false, false, (int)false, &wait_for_totalview);
|
|
|
|
if (wait_for_totalview) {
|
|
|
|
while (MPIR_debug_gate == 0) {
|
|
|
|
#if defined(WIN32)
|
|
|
|
sleep(100); /* milliseconds */
|
|
|
|
#else
|
|
|
|
usleep(100000); /* microseconds */
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-08-16 20:17:52 +04:00
|
|
|
/* Setup process affinity */
|
|
|
|
|
|
|
|
if (ompi_mpi_paffinity_alone) {
|
|
|
|
int param, value;
|
2005-08-16 21:18:56 +04:00
|
|
|
bool set = false;
|
2005-08-16 20:17:52 +04:00
|
|
|
param = mca_base_param_find("mpi", NULL, "paffinity_processor");
|
|
|
|
if (param >= 0) {
|
2005-08-16 21:18:56 +04:00
|
|
|
if (OMPI_SUCCESS == mca_base_param_lookup_int(param, &value)) {
|
|
|
|
if (value >= 0) {
|
|
|
|
if (OPAL_SUCCESS == opal_paffinity_base_set(value)) {
|
|
|
|
set = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!set) {
|
|
|
|
char *vpid;
|
|
|
|
orte_ns_base_get_vpid_string(&vpid, orte_process_info.my_name);
|
|
|
|
opal_show_help("help-mpi-runtime",
|
|
|
|
"mpi_init:startup:paffinity-unavailable",
|
|
|
|
true, vpid);
|
|
|
|
free(vpid);
|
2005-08-16 20:17:52 +04:00
|
|
|
}
|
2005-08-26 14:56:39 +04:00
|
|
|
|
|
|
|
/* If we were able to set processor affinity, try setting
|
|
|
|
up memory affinity */
|
|
|
|
|
|
|
|
else {
|
|
|
|
if (OPAL_SUCCESS == opal_maffinity_base_open() &&
|
|
|
|
OPAL_SUCCESS == opal_maffinity_base_select()) {
|
|
|
|
ompi_mpi_maffinity_setup = true;
|
|
|
|
}
|
|
|
|
}
|
2005-08-16 20:17:52 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-02-10 22:08:35 +03:00
|
|
|
#ifndef WIN32
|
2005-07-04 06:38:44 +04:00
|
|
|
if (OMPI_SUCCESS != (ret = opal_util_register_stackhandlers ())) {
|
2005-08-16 20:17:52 +04:00
|
|
|
error = "util_register_stackhandlers() failed";
|
|
|
|
goto error;
|
Add a Stacktrace feature, which figures where/what signal has happened
after MPI-startup.
For this a new mpirun-parameter "mpi_signal" is added, one may specify a
comma-separated list of signals to grab, e.g. mpirun --mca mpi_signal 8,11
will check for SIGFPE and SIGSEGV.
It only finds the first fault (SA_ONESHOT), as after the return the same
fault will occur again.
As printout, the data provided by siginfo_t is printed to STDOUT (yes,
it calls printf ,-]).
Additionally, with glibc, it uses backtrace and backtrace_symbols to
print the calling stack up to the function in which the signal was raised:
(Rank:0) Going to write to RD_ONLY mmaped shared mem
Signal:11 info.si_errno:0(Success) si_code:2(SEGV_ACCERR)
Failing at addr:0x4020c000
[0] func:/home/rusraink/ompi-gcc/lib/libmpi.so.0 [0x40121afe]
[1] func:./t0 [0x42029180]
[2] func:./t0(__libc_start_main+0x95) [0x42017589]
[3] func:./t0(__libc_start_main+0x49) [0x8048691]
This commit was SVN r4170.
2005-01-26 22:11:46 +03:00
|
|
|
}
|
2005-02-10 22:08:35 +03:00
|
|
|
#endif
|
Add a Stacktrace feature, which figures where/what signal has happened
after MPI-startup.
For this a new mpirun-parameter "mpi_signal" is added, one may specify a
comma-separated list of signals to grab, e.g. mpirun --mca mpi_signal 8,11
will check for SIGFPE and SIGSEGV.
It only finds the first fault (SA_ONESHOT), as after the return the same
fault will occur again.
As printout, the data provided by siginfo_t is printed to STDOUT (yes,
it calls printf ,-]).
Additionally, with glibc, it uses backtrace and backtrace_symbols to
print the calling stack up to the function in which the signal was raised:
(Rank:0) Going to write to RD_ONLY mmaped shared mem
Signal:11 info.si_errno:0(Success) si_code:2(SEGV_ACCERR)
Failing at addr:0x4020c000
[0] func:/home/rusraink/ompi-gcc/lib/libmpi.so.0 [0x40121afe]
[1] func:./t0 [0x42029180]
[2] func:./t0(__libc_start_main+0x95) [0x42017589]
[3] func:./t0(__libc_start_main+0x49) [0x8048691]
This commit was SVN r4170.
2005-01-26 22:11:46 +03:00
|
|
|
|
2005-08-16 20:17:52 +04:00
|
|
|
/* initialize datatypes. This step should be done early as it will
|
|
|
|
* create the local convertor and local arch used in the proc
|
|
|
|
* init.
|
2005-07-29 04:15:26 +04:00
|
|
|
*/
|
|
|
|
if (OMPI_SUCCESS != (ret = ompi_ddt_init())) {
|
|
|
|
error = "ompi_ddt_init() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
2005-03-27 17:05:23 +04:00
|
|
|
|
2005-07-29 04:15:26 +04:00
|
|
|
/* Initialize OMPI procs */
|
2004-06-07 19:33:53 +04:00
|
|
|
if (OMPI_SUCCESS != (ret = ompi_proc_init())) {
|
2004-09-05 20:05:37 +04:00
|
|
|
error = "mca_proc_init() failed";
|
|
|
|
goto error;
|
2004-03-03 19:44:41 +03:00
|
|
|
}
|
|
|
|
|
2005-04-14 22:55:53 +04:00
|
|
|
/* initialize the progress engine for MPI functionality */
|
2005-07-04 01:57:43 +04:00
|
|
|
if (OMPI_SUCCESS != opal_progress_mpi_init()) {
|
|
|
|
error = "opal_progress_mpi_init() failed";
|
2005-04-14 22:55:53 +04:00
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2005-03-27 17:05:23 +04:00
|
|
|
/* Open up MPI-related MCA components */
|
2004-08-14 05:56:05 +04:00
|
|
|
|
2004-06-17 20:23:34 +04:00
|
|
|
if (OMPI_SUCCESS != (ret = mca_allocator_base_open())) {
|
2004-09-05 20:05:37 +04:00
|
|
|
error = "mca_allocator_base_open() failed";
|
|
|
|
goto error;
|
2004-06-17 20:23:34 +04:00
|
|
|
}
|
|
|
|
if (OMPI_SUCCESS != (ret = mca_mpool_base_open())) {
|
2004-09-05 20:05:37 +04:00
|
|
|
error = "mca_mpool_base_open() failed";
|
|
|
|
goto error;
|
2004-06-17 20:23:34 +04:00
|
|
|
}
|
2004-06-07 19:33:53 +04:00
|
|
|
if (OMPI_SUCCESS != (ret = mca_pml_base_open())) {
|
2004-09-05 20:05:37 +04:00
|
|
|
error = "mca_pml_base_open() failed";
|
|
|
|
goto error;
|
2004-02-13 16:56:55 +03:00
|
|
|
}
|
2004-06-07 19:33:53 +04:00
|
|
|
if (OMPI_SUCCESS != (ret = mca_coll_base_open())) {
|
2004-09-05 20:05:37 +04:00
|
|
|
error = "mca_coll_base_open() failed";
|
|
|
|
goto error;
|
2004-02-13 16:56:55 +03:00
|
|
|
}
|
2004-01-30 06:59:39 +03:00
|
|
|
|
2005-03-27 17:05:23 +04:00
|
|
|
/* In order to reduce the common case for MPI apps (where they
|
|
|
|
don't use MPI-2 IO or MPI-1 topology functions), the io and
|
|
|
|
topo frameworks are initialized lazily, at the first use of
|
|
|
|
relevant functions (e.g., MPI_FILE_*, MPI_CART_*, MPI_GRAPH_*),
|
|
|
|
so they are not opened here. */
|
|
|
|
|
|
|
|
/* Initialize module exchange */
|
|
|
|
|
2005-08-05 22:03:30 +04:00
|
|
|
if (OMPI_SUCCESS != (ret = mca_pml_base_modex_init())) {
|
|
|
|
error = "mca_pml_base_modex_init() failed";
|
2004-10-15 00:50:06 +04:00
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2005-03-27 17:05:23 +04:00
|
|
|
/* Select which MPI components to use */
|
2004-01-30 06:59:39 +03:00
|
|
|
|
2004-06-07 19:33:53 +04:00
|
|
|
if (OMPI_SUCCESS !=
|
2005-03-27 17:05:23 +04:00
|
|
|
(ret = mca_mpool_base_init(OMPI_ENABLE_PROGRESS_THREADS,
|
|
|
|
OMPI_ENABLE_MPI_THREADS))) {
|
|
|
|
error = "mca_mpool_base_init() failed";
|
2004-09-05 20:05:37 +04:00
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2005-03-27 17:05:23 +04:00
|
|
|
if (OMPI_SUCCESS !=
|
|
|
|
(ret = mca_pml_base_select(OMPI_ENABLE_PROGRESS_THREADS,
|
|
|
|
OMPI_ENABLE_MPI_THREADS))) {
|
|
|
|
error = "mca_pml_base_select() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (OMPI_SUCCESS !=
|
|
|
|
(ret = mca_coll_base_find_available(OMPI_ENABLE_PROGRESS_THREADS,
|
|
|
|
OMPI_ENABLE_MPI_THREADS))) {
|
|
|
|
error = "mca_coll_base_find_available() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* io and topo components are not selected here -- see comment
|
|
|
|
above about the io and topo frameworks being loaded lazily */
|
|
|
|
|
|
|
|
/* Initialize each MPI handle subsystem */
|
2004-10-08 21:12:36 +04:00
|
|
|
/* initialize requests */
|
|
|
|
if (OMPI_SUCCESS != (ret = ompi_request_init())) {
|
|
|
|
error = "ompi_request_init() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2004-09-05 20:05:37 +04:00
|
|
|
/* initialize info */
|
|
|
|
if (OMPI_SUCCESS != (ret = ompi_info_init())) {
|
|
|
|
error = "ompi_info_init() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
2004-10-08 21:12:36 +04:00
|
|
|
|
2004-09-05 20:05:37 +04:00
|
|
|
/* initialize error handlers */
|
|
|
|
if (OMPI_SUCCESS != (ret = ompi_errhandler_init())) {
|
|
|
|
error = "ompi_errhandler_init() failed";
|
|
|
|
goto error;
|
2004-02-13 16:56:55 +03:00
|
|
|
}
|
2004-01-30 06:59:39 +03:00
|
|
|
|
2004-09-05 20:05:37 +04:00
|
|
|
/* initialize error codes */
|
|
|
|
if (OMPI_SUCCESS != (ret = ompi_mpi_errcode_init())) {
|
|
|
|
error = "ompi_mpi_errcode_init() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* initialize error classes */
|
|
|
|
if (OMPI_SUCCESS != (ret = ompi_errclass_init())) {
|
|
|
|
error = "ompi_errclass_init() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* initialize internal error codes */
|
|
|
|
if (OMPI_SUCCESS != (ret = ompi_errcode_intern_init())) {
|
|
|
|
error = "ompi_errcode_intern_init() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
2004-05-08 03:23:03 +04:00
|
|
|
|
2004-09-05 20:05:37 +04:00
|
|
|
/* initialize groups */
|
|
|
|
if (OMPI_SUCCESS != (ret = ompi_group_init())) {
|
|
|
|
error = "ompi_group_init() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* initialize communicators */
|
|
|
|
if (OMPI_SUCCESS != (ret = ompi_comm_init())) {
|
|
|
|
error = "ompi_comm_init() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* initialize ops */
|
|
|
|
if (OMPI_SUCCESS != (ret = ompi_op_init())) {
|
|
|
|
error = "ompi_op_init() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* initialize file handles */
|
|
|
|
if (OMPI_SUCCESS != (ret = ompi_file_init())) {
|
|
|
|
error = "ompi_file_init() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2004-09-16 04:00:09 +04:00
|
|
|
/* initialize attribute meta-data structure for comm/win/dtype */
|
|
|
|
if (OMPI_SUCCESS != (ret = ompi_attr_init())) {
|
|
|
|
error = "ompi_attr_init() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
2004-09-05 20:05:37 +04:00
|
|
|
/* do module exchange */
|
2005-08-05 22:03:30 +04:00
|
|
|
if (OMPI_SUCCESS != (ret = mca_pml_base_modex_exchange())) {
|
|
|
|
error = "mca_pml_base_modex_exchange() failed";
|
2004-09-05 20:05:37 +04:00
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2005-05-06 21:00:06 +04:00
|
|
|
/* store our process info on registry */
|
|
|
|
if (ORTE_SUCCESS != (ret = orte_schema.store_my_info())) {
|
|
|
|
ORTE_ERROR_LOG(ret);
|
|
|
|
error = "could not store my info on registry";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2005-03-27 17:05:23 +04:00
|
|
|
/* Let system know we are at STG1 Barrier */
|
2005-03-14 23:57:21 +03:00
|
|
|
if (ORTE_SUCCESS != (ret = orte_soh.set_proc_soh(orte_process_info.my_name,
|
|
|
|
ORTE_PROC_STATE_AT_STG1, 0))) {
|
|
|
|
ORTE_ERROR_LOG(ret);
|
|
|
|
error = "set process state failed";
|
2004-11-20 22:12:43 +03:00
|
|
|
goto error;
|
2005-03-14 23:57:21 +03:00
|
|
|
}
|
2004-11-20 22:12:43 +03:00
|
|
|
|
2005-03-27 17:05:23 +04:00
|
|
|
/* if the compound command is operative, execute it */
|
|
|
|
|
2005-03-23 20:50:12 +03:00
|
|
|
if (compound_cmd) {
|
|
|
|
if (OMPI_SUCCESS != (ret = orte_gpr.exec_compound_cmd())) {
|
|
|
|
ORTE_ERROR_LOG(ret);
|
2005-08-16 20:17:52 +04:00
|
|
|
error = "ompi_rte_init: orte_gpr.exec_compound_cmd failed";
|
|
|
|
goto error;
|
2005-03-23 20:50:12 +03:00
|
|
|
}
|
2005-03-14 23:57:21 +03:00
|
|
|
}
|
2005-03-23 20:50:12 +03:00
|
|
|
|
2005-03-29 02:37:54 +04:00
|
|
|
/* FIRST BARRIER - WAIT FOR MSG FROM RMGR_PROC_STAGE_GATE_MGR TO ARRIVE */
|
2005-03-14 23:57:21 +03:00
|
|
|
if (ORTE_SUCCESS != (ret = orte_rml.xcast(NULL, NULL, 0, NULL, NULL))) {
|
|
|
|
ORTE_ERROR_LOG(ret);
|
2005-08-16 20:17:52 +04:00
|
|
|
error = "ompi_mpi_init: failed to see all procs register\n";
|
|
|
|
goto error;
|
2004-11-20 22:12:43 +03:00
|
|
|
}
|
|
|
|
|
2005-07-12 09:40:56 +04:00
|
|
|
/* start PTL's */
|
|
|
|
ret = MCA_PML_CALL(enable(true));
|
|
|
|
if( OMPI_SUCCESS != ret ) {
|
|
|
|
error = "PML control failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2004-09-05 20:05:37 +04:00
|
|
|
/* add all ompi_proc_t's to PML */
|
|
|
|
if (NULL == (procs = ompi_proc_world(&nprocs))) {
|
|
|
|
error = "ompi_proc_world() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
2005-07-12 09:40:56 +04:00
|
|
|
ret = MCA_PML_CALL(add_procs(procs, nprocs));
|
|
|
|
free(procs);
|
|
|
|
if( OMPI_SUCCESS != ret ) {
|
2004-09-05 20:05:37 +04:00
|
|
|
error = "PML add procs failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2005-05-24 02:06:50 +04:00
|
|
|
MCA_PML_CALL(add_comm(&ompi_mpi_comm_world));
|
|
|
|
MCA_PML_CALL(add_comm(&ompi_mpi_comm_self));
|
|
|
|
|
2005-03-27 17:05:23 +04:00
|
|
|
/* Figure out the final MPI thread levels. If we were not
|
|
|
|
compiled for support for MPI threads, then don't allow
|
|
|
|
MPI_THREAD_MULTIPLE. */
|
2004-02-05 04:52:56 +03:00
|
|
|
|
2004-06-07 19:33:53 +04:00
|
|
|
ompi_mpi_thread_requested = requested;
|
2005-04-14 22:55:53 +04:00
|
|
|
if (OMPI_HAVE_THREAD_SUPPORT == 0) {
|
2005-03-27 17:05:23 +04:00
|
|
|
ompi_mpi_thread_provided = *provided = MPI_THREAD_SINGLE;
|
|
|
|
ompi_mpi_main_thread = NULL;
|
|
|
|
} else if (OMPI_ENABLE_MPI_THREADS == 1) {
|
|
|
|
ompi_mpi_thread_provided = *provided = requested;
|
2005-07-04 02:45:48 +04:00
|
|
|
ompi_mpi_main_thread = opal_thread_get_self();
|
2005-03-27 17:05:23 +04:00
|
|
|
} else {
|
|
|
|
if (MPI_THREAD_MULTIPLE == requested) {
|
|
|
|
ompi_mpi_thread_provided = *provided = MPI_THREAD_SERIALIZED;
|
|
|
|
} else {
|
|
|
|
ompi_mpi_thread_provided = *provided = requested;
|
|
|
|
}
|
2005-07-04 02:45:48 +04:00
|
|
|
ompi_mpi_main_thread = opal_thread_get_self();
|
2005-03-27 17:05:23 +04:00
|
|
|
}
|
|
|
|
|
2004-06-29 04:02:25 +04:00
|
|
|
ompi_mpi_thread_multiple = (ompi_mpi_thread_provided ==
|
|
|
|
MPI_THREAD_MULTIPLE);
|
2005-03-27 17:05:23 +04:00
|
|
|
if (OMPI_ENABLE_PROGRESS_THREADS == 1 ||
|
|
|
|
OMPI_ENABLE_MPI_THREADS == 1) {
|
2005-07-04 02:45:48 +04:00
|
|
|
opal_set_using_threads(true);
|
2005-03-27 17:05:23 +04:00
|
|
|
}
|
2004-02-05 04:52:56 +03:00
|
|
|
|
2004-05-08 03:23:03 +04:00
|
|
|
/* Init coll for the comms */
|
|
|
|
|
2004-09-05 20:05:37 +04:00
|
|
|
if (OMPI_SUCCESS !=
|
|
|
|
(ret = mca_coll_base_comm_select(MPI_COMM_SELF, NULL))) {
|
|
|
|
error = "mca_coll_base_comm_select(MPI_COMM_SELF) failed";
|
|
|
|
goto error;
|
2004-07-13 16:35:43 +04:00
|
|
|
}
|
2004-05-08 03:23:03 +04:00
|
|
|
|
2004-09-05 20:05:37 +04:00
|
|
|
if (OMPI_SUCCESS !=
|
|
|
|
(ret = mca_coll_base_comm_select(MPI_COMM_WORLD, NULL))) {
|
|
|
|
error = "mca_coll_base_comm_select(MPI_COMM_WORLD) failed";
|
|
|
|
goto error;
|
2004-07-13 16:35:43 +04:00
|
|
|
}
|
2004-05-08 03:23:03 +04:00
|
|
|
|
2005-04-09 23:34:32 +04:00
|
|
|
#if OMPI_ENABLE_PROGRESS_THREADS && 0
|
|
|
|
/* BWB - XXX - FIXME - is this actually correct? */
|
2005-01-13 18:30:49 +03:00
|
|
|
/* setup I/O forwarding */
|
2005-04-09 23:34:32 +04:00
|
|
|
if (orte_process_info.seed == false) {
|
2005-01-18 20:32:54 +03:00
|
|
|
if (OMPI_SUCCESS != (ret = ompi_mpi_init_io())) {
|
2005-08-16 20:17:52 +04:00
|
|
|
error = "ompi_rte_init_io failed";
|
|
|
|
goto error;
|
2005-01-18 20:32:54 +03:00
|
|
|
}
|
2005-01-13 18:30:49 +03:00
|
|
|
}
|
2004-11-18 02:37:49 +03:00
|
|
|
#endif
|
2004-05-08 03:23:03 +04:00
|
|
|
|
2005-07-09 01:01:37 +04:00
|
|
|
/*
|
|
|
|
* Dump all MCA parameters if requested
|
|
|
|
*/
|
|
|
|
if (ompi_mpi_show_mca_params) {
|
|
|
|
ompi_show_all_mca_params(ompi_mpi_comm_world.c_my_rank,
|
|
|
|
nprocs,
|
|
|
|
orte_system_info.nodename);
|
|
|
|
}
|
|
|
|
|
2005-03-27 17:05:23 +04:00
|
|
|
/* Let system know we are at STG2 Barrier */
|
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
if (ORTE_SUCCESS != (ret = orte_soh.set_proc_soh(orte_process_info.my_name,
|
|
|
|
ORTE_PROC_STATE_AT_STG2, 0))) {
|
|
|
|
ORTE_ERROR_LOG(ret);
|
|
|
|
error = "set process state failed";
|
|
|
|
goto error;
|
2005-02-21 21:56:30 +03:00
|
|
|
}
|
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
/* BWB - is this still needed? */
|
2005-02-16 20:42:07 +03:00
|
|
|
#if OMPI_ENABLE_PROGRESS_THREADS == 0
|
2005-07-04 03:09:55 +04:00
|
|
|
opal_progress_events(OPAL_EVLOOP_NONBLOCK);
|
2005-01-13 18:30:49 +03:00
|
|
|
#endif
|
|
|
|
|
2005-03-27 17:05:23 +04:00
|
|
|
/* Second barrier -- wait for message from
|
|
|
|
RMGR_PROC_STAGE_GATE_MGR to arrive */
|
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
if (ORTE_SUCCESS != (ret = orte_rml.xcast(NULL, NULL, 0, NULL, NULL))) {
|
|
|
|
ORTE_ERROR_LOG(ret);
|
|
|
|
error = "ompi_mpi_init: failed to see all procs register\n";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2004-09-29 16:41:55 +04:00
|
|
|
/* new very last step: check whether we have been spawned or not.
|
|
|
|
We introduce that at the very end, since we need collectives,
|
2005-03-27 17:05:23 +04:00
|
|
|
datatypes, ptls etc. up and running here.... */
|
|
|
|
|
2004-09-29 16:41:55 +04:00
|
|
|
if (OMPI_SUCCESS != (ret = ompi_comm_dyn_init())) {
|
|
|
|
error = "ompi_comm_dyn_init() failed";
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2004-09-05 20:05:37 +04:00
|
|
|
error:
|
|
|
|
if (ret != OMPI_SUCCESS) {
|
2005-07-04 06:38:44 +04:00
|
|
|
opal_show_help("help-mpi-runtime",
|
2004-09-05 20:05:37 +04:00
|
|
|
"mpi_init:startup:internal-failure", true,
|
|
|
|
"MPI_INIT", "MPI_INIT", error, ret);
|
|
|
|
return ret;
|
2004-06-29 04:02:25 +04:00
|
|
|
}
|
2004-05-08 03:23:03 +04:00
|
|
|
|
2005-03-30 05:40:26 +04:00
|
|
|
/* put the event library in "high performance MPI mode" */
|
2005-07-04 01:57:43 +04:00
|
|
|
if (OMPI_SUCCESS != opal_progress_mpi_enable()) {
|
|
|
|
error = "opal_progress_mpi_enable() failed";
|
2005-03-30 05:40:26 +04:00
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
2005-03-27 17:05:23 +04:00
|
|
|
/* All done. Wasn't that simple? */
|
2004-02-05 04:52:56 +03:00
|
|
|
|
2004-06-07 19:33:53 +04:00
|
|
|
ompi_mpi_initialized = true;
|
2004-11-20 22:12:43 +03:00
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
if (orte_debug_flag) {
|
2005-08-16 20:17:52 +04:00
|
|
|
opal_output(0, "[%lu,%lu,%lu] ompi_mpi_init completed",
|
|
|
|
ORTE_NAME_ARGS(orte_process_info.my_name));
|
2004-11-20 22:12:43 +03:00
|
|
|
}
|
|
|
|
|
2004-02-13 16:56:55 +03:00
|
|
|
return MPI_SUCCESS;
|
2004-01-15 09:08:25 +03:00
|
|
|
}
|