1
1
openmpi/ompi/runtime/ompi_mpi_init.c
George Bosilca 6aa956241f Solve the issues when several PML are available. The main problem here come from the fact that an PML
is a lot more difficult than a PTL, and it can adapt it's behavior to the level of threading required
by the user. In this case the behavior is the priorit of the PML. Therefore this information is never
availale before the init function (of the PML) is called. So I try to keep nearly the same structure
as it was before, with one change. When a PML get initialized it does not necessarily means it has been
selected, so it does not means it has to create all it's internal structures (and select the PTL and
all this stuff). They can all be done later, when a PML knows that it definitively get selected
(when the enable function is called with the argument set to true). Thus, in the case of a PML close
one have to check if the PML has been selected or not before trying to clean up the internals.

I had to change the MPI_Init function to allow the PML to be enabled before we start adding procs inside.

This commit was SVN r6434.
2005-07-12 05:40:56 +00:00

463 строки
14 KiB
C

/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "include/constants.h"
#include "ompi/runtime/mpiruntime.h"
#include "ompi/runtime/params.h"
#include "orte/runtime/runtime.h"
#include "opal/runtime/opal_progress.h"
#include "util/sys_info.h"
#include "util/proc_info.h"
#include "util/session_dir.h"
#include "mpi.h"
#include "communicator/communicator.h"
#include "group/group.h"
#include "info/info.h"
#include "opal/util/show_help.h"
#include "opal/util/stacktrace.h"
#include "errhandler/errcode.h"
#include "errhandler/errclass.h"
#include "request/request.h"
#include "op/op.h"
#include "file/file.h"
#include "attribute/attribute.h"
#include "opal/threads/thread.h"
#include "mca/base/base.h"
#include "mca/allocator/base/base.h"
#include "mca/allocator/allocator.h"
#include "mca/mpool/base/base.h"
#include "mca/mpool/mpool.h"
#include "mca/pml/pml.h"
#include "mca/pml/base/pml_base_module_exchange.h"
#include "mca/pml/base/base.h"
#include "mca/coll/coll.h"
#include "mca/coll/base/base.h"
#include "mca/io/io.h"
#include "mca/io/base/base.h"
#include "mca/oob/oob.h"
#include "mca/oob/base/base.h"
#include "mca/ns/ns.h"
#include "mca/gpr/gpr.h"
#include "mca/rml/rml.h"
#include "mca/schema/schema.h"
#include "mca/soh/soh.h"
#include "mca/soh/base/base.h"
#include "mca/errmgr/errmgr.h"
#include "opal/runtime/opal.h"
#include "opal/event/event.h"
/*
* Global variables and symbols for the MPI layer
*/
bool ompi_mpi_initialized = false;
bool ompi_mpi_finalized = false;
bool ompi_mpi_thread_multiple = false;
int ompi_mpi_thread_requested = MPI_THREAD_SINGLE;
int ompi_mpi_thread_provided = MPI_THREAD_SINGLE;
opal_thread_t *ompi_mpi_main_thread = NULL;
int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
{
int ret;
ompi_proc_t** procs;
size_t nprocs;
char *error = NULL;
bool compound_cmd = false;
/* Join the run-time environment - do the things that don't hit
the registry */
if (ORTE_SUCCESS != (ret = opal_init())) {
error = "ompi_mpi_init: opal_init failed";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_init_stage1())) {
error = "ompi_mpi_init: orte_init_stage1 failed";
goto error;
}
/* If we are not the seed nor a singleton, AND we have not set the
orte_debug flag, then start recording the compound command that
starts us up. if we are the seed or a singleton, then don't do
this - the registry is local, so we'll just drive it
directly */
if (orte_process_info.seed ||
NULL == orte_process_info.ns_replica ||
orte_debug_flag) {
compound_cmd = false;
} else {
if (ORTE_SUCCESS != (ret = orte_gpr.begin_compound_cmd())) {
ORTE_ERROR_LOG(ret);
error = "ompi_mpi_init: orte_gpr.begin_compound_cmd failed";
goto error;
}
compound_cmd = true;
}
/* Now do the things that hit the registry */
if (ORTE_SUCCESS != (ret = orte_init_stage2())) {
ORTE_ERROR_LOG(ret);
error = "ompi_mpi_init: orte_init_stage2 failed";
goto error;
}
/* Once we've joined the RTE, see if any MCA parameters were
passed to the MPI level */
if (OMPI_SUCCESS != (ret = ompi_mpi_register_params())) {
error = "mca_mpi_register_params() failed";
goto error;
}
#ifndef WIN32
if (OMPI_SUCCESS != (ret = opal_util_register_stackhandlers ())) {
error = "util_register_stackhandlers() failed";
goto error;
}
#endif
/* Initialize OMPI procs */
if (OMPI_SUCCESS != (ret = ompi_proc_init())) {
error = "mca_proc_init() failed";
goto error;
}
/* initialize the progress engine for MPI functionality */
if (OMPI_SUCCESS != opal_progress_mpi_init()) {
error = "opal_progress_mpi_init() failed";
goto error;
}
/* Open up MPI-related MCA components */
if (OMPI_SUCCESS != (ret = mca_allocator_base_open())) {
error = "mca_allocator_base_open() failed";
goto error;
}
if (OMPI_SUCCESS != (ret = mca_mpool_base_open())) {
error = "mca_mpool_base_open() failed";
goto error;
}
if (OMPI_SUCCESS != (ret = mca_pml_base_open())) {
error = "mca_pml_base_open() failed";
goto error;
}
if (OMPI_SUCCESS != (ret = mca_coll_base_open())) {
error = "mca_coll_base_open() failed";
goto error;
}
/* In order to reduce the common case for MPI apps (where they
don't use MPI-2 IO or MPI-1 topology functions), the io and
topo frameworks are initialized lazily, at the first use of
relevant functions (e.g., MPI_FILE_*, MPI_CART_*, MPI_GRAPH_*),
so they are not opened here. */
/* Initialize module exchange */
if (OMPI_SUCCESS != (ret = mca_base_modex_init())) {
error = "mca_base_modex_init() failed";
goto error;
}
/* Select which MPI components to use */
if (OMPI_SUCCESS !=
(ret = mca_mpool_base_init(OMPI_ENABLE_PROGRESS_THREADS,
OMPI_ENABLE_MPI_THREADS))) {
error = "mca_mpool_base_init() failed";
goto error;
}
if (OMPI_SUCCESS !=
(ret = mca_pml_base_select(OMPI_ENABLE_PROGRESS_THREADS,
OMPI_ENABLE_MPI_THREADS))) {
error = "mca_pml_base_select() failed";
goto error;
}
if (OMPI_SUCCESS !=
(ret = mca_coll_base_find_available(OMPI_ENABLE_PROGRESS_THREADS,
OMPI_ENABLE_MPI_THREADS))) {
error = "mca_coll_base_find_available() failed";
goto error;
}
/* io and topo components are not selected here -- see comment
above about the io and topo frameworks being loaded lazily */
/* Initialize each MPI handle subsystem */
/* initialize requests */
if (OMPI_SUCCESS != (ret = ompi_request_init())) {
error = "ompi_request_init() failed";
goto error;
}
/* initialize info */
if (OMPI_SUCCESS != (ret = ompi_info_init())) {
error = "ompi_info_init() failed";
goto error;
}
/* initialize error handlers */
if (OMPI_SUCCESS != (ret = ompi_errhandler_init())) {
error = "ompi_errhandler_init() failed";
goto error;
}
/* initialize error codes */
if (OMPI_SUCCESS != (ret = ompi_mpi_errcode_init())) {
error = "ompi_mpi_errcode_init() failed";
goto error;
}
/* initialize error classes */
if (OMPI_SUCCESS != (ret = ompi_errclass_init())) {
error = "ompi_errclass_init() failed";
goto error;
}
/* initialize internal error codes */
if (OMPI_SUCCESS != (ret = ompi_errcode_intern_init())) {
error = "ompi_errcode_intern_init() failed";
goto error;
}
/* initialize groups */
if (OMPI_SUCCESS != (ret = ompi_group_init())) {
error = "ompi_group_init() failed";
goto error;
}
/* initialize communicators */
if (OMPI_SUCCESS != (ret = ompi_comm_init())) {
error = "ompi_comm_init() failed";
goto error;
}
/* initialize datatypes */
if (OMPI_SUCCESS != (ret = ompi_ddt_init())) {
error = "ompi_ddt_init() failed";
goto error;
}
/* initialize ops */
if (OMPI_SUCCESS != (ret = ompi_op_init())) {
error = "ompi_op_init() failed";
goto error;
}
/* initialize file handles */
if (OMPI_SUCCESS != (ret = ompi_file_init())) {
error = "ompi_file_init() failed";
goto error;
}
/* initialize attribute meta-data structure for comm/win/dtype */
if (OMPI_SUCCESS != (ret = ompi_attr_init())) {
error = "ompi_attr_init() failed";
goto error;
}
/* do module exchange */
if (OMPI_SUCCESS != (ret = mca_base_modex_exchange())) {
error = "ompi_base_modex_exchange() failed";
goto error;
}
/* store our process info on registry */
if (ORTE_SUCCESS != (ret = orte_schema.store_my_info())) {
ORTE_ERROR_LOG(ret);
error = "could not store my info on registry";
goto error;
}
/* Let system know we are at STG1 Barrier */
if (ORTE_SUCCESS != (ret = orte_soh.set_proc_soh(orte_process_info.my_name,
ORTE_PROC_STATE_AT_STG1, 0))) {
ORTE_ERROR_LOG(ret);
error = "set process state failed";
goto error;
}
/* if the compound command is operative, execute it */
if (compound_cmd) {
if (OMPI_SUCCESS != (ret = orte_gpr.exec_compound_cmd())) {
ORTE_ERROR_LOG(ret);
error = "ompi_rte_init: orte_gpr.exec_compound_cmd failed";
goto error;
}
}
/* FIRST BARRIER - WAIT FOR MSG FROM RMGR_PROC_STAGE_GATE_MGR TO ARRIVE */
if (ORTE_SUCCESS != (ret = orte_rml.xcast(NULL, NULL, 0, NULL, NULL))) {
ORTE_ERROR_LOG(ret);
error = "ompi_mpi_init: failed to see all procs register\n";
goto error;
}
/* start PTL's */
ret = MCA_PML_CALL(enable(true));
if( OMPI_SUCCESS != ret ) {
error = "PML control failed";
goto error;
}
/* add all ompi_proc_t's to PML */
if (NULL == (procs = ompi_proc_world(&nprocs))) {
error = "ompi_proc_world() failed";
goto error;
}
ret = MCA_PML_CALL(add_procs(procs, nprocs));
free(procs);
if( OMPI_SUCCESS != ret ) {
error = "PML add procs failed";
goto error;
}
MCA_PML_CALL(add_comm(&ompi_mpi_comm_world));
MCA_PML_CALL(add_comm(&ompi_mpi_comm_self));
/* Figure out the final MPI thread levels. If we were not
compiled for support for MPI threads, then don't allow
MPI_THREAD_MULTIPLE. */
ompi_mpi_thread_requested = requested;
if (OMPI_HAVE_THREAD_SUPPORT == 0) {
ompi_mpi_thread_provided = *provided = MPI_THREAD_SINGLE;
ompi_mpi_main_thread = NULL;
} else if (OMPI_ENABLE_MPI_THREADS == 1) {
ompi_mpi_thread_provided = *provided = requested;
ompi_mpi_main_thread = opal_thread_get_self();
} else {
if (MPI_THREAD_MULTIPLE == requested) {
ompi_mpi_thread_provided = *provided = MPI_THREAD_SERIALIZED;
} else {
ompi_mpi_thread_provided = *provided = requested;
}
ompi_mpi_main_thread = opal_thread_get_self();
}
ompi_mpi_thread_multiple = (ompi_mpi_thread_provided ==
MPI_THREAD_MULTIPLE);
if (OMPI_ENABLE_PROGRESS_THREADS == 1 ||
OMPI_ENABLE_MPI_THREADS == 1) {
opal_set_using_threads(true);
}
/* Init coll for the comms */
if (OMPI_SUCCESS !=
(ret = mca_coll_base_comm_select(MPI_COMM_SELF, NULL))) {
error = "mca_coll_base_comm_select(MPI_COMM_SELF) failed";
goto error;
}
if (OMPI_SUCCESS !=
(ret = mca_coll_base_comm_select(MPI_COMM_WORLD, NULL))) {
error = "mca_coll_base_comm_select(MPI_COMM_WORLD) failed";
goto error;
}
#if OMPI_ENABLE_PROGRESS_THREADS && 0
/* BWB - XXX - FIXME - is this actually correct? */
/* setup I/O forwarding */
if (orte_process_info.seed == false) {
if (OMPI_SUCCESS != (ret = ompi_mpi_init_io())) {
error = "ompi_rte_init_io failed";
goto error;
}
}
#endif
/*
* Dump all MCA parameters if requested
*/
if (ompi_mpi_show_mca_params) {
ompi_show_all_mca_params(ompi_mpi_comm_world.c_my_rank,
nprocs,
orte_system_info.nodename);
}
/* Let system know we are at STG2 Barrier */
if (ORTE_SUCCESS != (ret = orte_soh.set_proc_soh(orte_process_info.my_name,
ORTE_PROC_STATE_AT_STG2, 0))) {
ORTE_ERROR_LOG(ret);
error = "set process state failed";
goto error;
}
/* BWB - is this still needed? */
#if OMPI_ENABLE_PROGRESS_THREADS == 0
opal_progress_events(OPAL_EVLOOP_NONBLOCK);
#endif
/* Second barrier -- wait for message from
RMGR_PROC_STAGE_GATE_MGR to arrive */
if (ORTE_SUCCESS != (ret = orte_rml.xcast(NULL, NULL, 0, NULL, NULL))) {
ORTE_ERROR_LOG(ret);
error = "ompi_mpi_init: failed to see all procs register\n";
goto error;
}
/* new very last step: check whether we have been spawned or not.
We introduce that at the very end, since we need collectives,
datatypes, ptls etc. up and running here.... */
if (OMPI_SUCCESS != (ret = ompi_comm_dyn_init())) {
error = "ompi_comm_dyn_init() failed";
goto error;
}
error:
if (ret != OMPI_SUCCESS) {
opal_show_help("help-mpi-runtime",
"mpi_init:startup:internal-failure", true,
"MPI_INIT", "MPI_INIT", error, ret);
return ret;
}
/* put the event library in "high performance MPI mode" */
if (OMPI_SUCCESS != opal_progress_mpi_enable()) {
error = "opal_progress_mpi_enable() failed";
goto error;
}
/* All done. Wasn't that simple? */
ompi_mpi_initialized = true;
if (orte_debug_flag) {
opal_output(0, "[%lu,%lu,%lu] ompi_mpi_init completed",
ORTE_NAME_ARGS(orte_process_info.my_name));
}
return MPI_SUCCESS;
}