1
1
openmpi/config/opal_configure_options.m4

483 строки
16 KiB
Plaintext
Исходник Обычный вид История

dnl -*- shell-script -*-
dnl
dnl Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
dnl University Research and Technology
dnl Corporation. All rights reserved.
dnl Copyright (c) 2004-2005 The University of Tennessee and The University
dnl of Tennessee Research Foundation. All rights
dnl reserved.
dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
dnl University of Stuttgart. All rights reserved.
dnl Copyright (c) 2004-2005 The Regents of the University of California.
dnl All rights reserved.
dnl Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved.
dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
dnl Copyright (c) 2009 IBM Corporation. All rights reserved.
dnl Copyright (c) 2009 Los Alamos National Security, LLC. All rights
dnl reserved.
dnl Copyright (c) 2009-2011 Oak Ridge National Labs. All rights reserved.
dnl Copyright (c) 2011-2013 NVIDIA Corporation. All rights reserved.
dnl Copyright (c) 2013 Intel, Inc. All rights reserved
dnl
dnl $COPYRIGHT$
dnl
dnl Additional copyrights may follow
dnl
dnl $HEADER$
dnl
AC_DEFUN([OPAL_CONFIGURE_OPTIONS],[
opal_show_subtitle "OPAL Configuration options"
#
# Is this a developer copy?
#
if test -d .svn -o -d .hg -o -d .git; then
OPAL_DEVEL=1
else
OPAL_DEVEL=0
fi
#
# Code coverage options
#
AC_MSG_CHECKING([if want to run code coverage])
AC_ARG_ENABLE(coverage,
AC_HELP_STRING([--enable-coverage],
[enable code coverage files to be generated]))
if test "$enable_coverage" = "yes"; then
if test "$enable_shared" = "yes"; then
AC_MSG_WARN([Code coverage can run only with static libraries. Please
run configure with --enable-static --disable-shared if
you want code coverage. Also ensure that you execute
make clean too ensure removal of all leftover shared
mpi libraries])
AC_MSG_ERROR([Cannot continue processing])
fi
AC_MSG_RESULT([yes])
WANT_COVERAGE=1
else
AC_MSG_RESULT([no])
WANT_COVERAGE=0
fi
#
# Branch Probabilities options
#
AC_MSG_CHECKING([if want to compile with branch probabilities])
AC_ARG_ENABLE(branch-probabilities,
AC_HELP_STRING([--enable-branch-probabilities],
[enable profile arcs and branch probability optimization]))
if test "$enable_branch_probabilities" = "yes"; then
AC_MSG_RESULT([yes])
WANT_BRANCH_PROBABILITIES=1
else
AC_MSG_RESULT([no])
WANT_BRANCH_PROBABILITIES=0
fi
#
# Memory debugging
#
AC_MSG_CHECKING([if want to debug memory usage])
AC_ARG_ENABLE(mem-debug,
AC_HELP_STRING([--enable-mem-debug],
[enable memory debugging (debugging only) (default: disabled)]))
if test "$enable_mem_debug" = "yes"; then
AC_MSG_RESULT([yes])
WANT_MEM_DEBUG=1
else
AC_MSG_RESULT([no])
WANT_MEM_DEBUG=0
fi
#################### Early development override ####################
if test "$WANT_MEM_DEBUG" = "0" -a -z "$enable_mem_debug" -a "$OPAL_DEVEL" = 1; then
WANT_MEM_DEBUG=1
echo "--> developer override: enable mem profiling by default"
fi
#################### Early development override ####################
AC_DEFINE_UNQUOTED(OPAL_ENABLE_MEM_DEBUG, $WANT_MEM_DEBUG,
[Whether we want the memory profiling or not])
#
# Memory profiling
#
AC_MSG_CHECKING([if want to profile memory usage])
AC_ARG_ENABLE(mem-profile,
AC_HELP_STRING([--enable-mem-profile],
[enable memory profiling (debugging only) (default: disabled)]))
if test "$enable_mem_profile" = "yes"; then
AC_MSG_RESULT([yes])
WANT_MEM_PROFILE=1
else
AC_MSG_RESULT([no])
WANT_MEM_PROFILE=0
fi
#################### Early development override ####################
if test "$WANT_MEM_PROFILE" = "0" -a -z "$enable_mem_profile" -a "$OPAL_DEVEL" = 1; then
WANT_MEM_PROFILE=1
echo "--> developer override: enable mem profiling by default"
fi
#################### Early development override ####################
AC_DEFINE_UNQUOTED(OPAL_ENABLE_MEM_PROFILE, $WANT_MEM_PROFILE,
[Whether we want the memory profiling or not])
#
# Developer picky compiler options
#
AC_MSG_CHECKING([if want developer-level compiler pickyness])
AC_ARG_ENABLE(picky,
AC_HELP_STRING([--enable-picky],
[enable developer-level compiler pickyness when building Open MPI (default: disabled)]))
if test "$enable_picky" = "yes"; then
AC_MSG_RESULT([yes])
WANT_PICKY_COMPILER=1
else
AC_MSG_RESULT([no])
WANT_PICKY_COMPILER=0
fi
#################### Early development override ####################
if test "$WANT_PICKY_COMPILER" = "0" -a -z "$enable_picky" -a "$OPAL_DEVEL" = 1; then
WANT_PICKY_COMPILER=1
echo "--> developer override: enable picky compiler by default"
fi
#################### Early development override ####################
#
# Developer debugging
#
AC_MSG_CHECKING([if want developer-level debugging code])
AC_ARG_ENABLE(debug,
AC_HELP_STRING([--enable-debug],
[enable developer-level debugging code (not for general MPI users!) (default: disabled)]))
if test "$enable_debug" = "yes"; then
AC_MSG_RESULT([yes])
WANT_DEBUG=1
else
AC_MSG_RESULT([no])
WANT_DEBUG=0
fi
#################### Early development override ####################
if test "$WANT_DEBUG" = "0" -a -z "$enable_debug" -a "$OPAL_DEVEL" = 1; then
WANT_DEBUG=1
echo "--> developer override: enable debugging code by default"
fi
#################### Early development override ####################
if test "$WANT_DEBUG" = "0"; then
CFLAGS="-DNDEBUG $CFLAGS"
CXXFLAGS="-DNDEBUG $CXXFLAGS"
fi
AC_DEFINE_UNQUOTED(OPAL_ENABLE_DEBUG, $WANT_DEBUG,
[Whether we want developer-level debugging code or not])
AC_ARG_ENABLE(debug-symbols,
AC_HELP_STRING([--disable-debug-symbols],
[Disable adding compiler flags to enable debugging symbols if --enable-debug is specified. For non-debugging builds, this flag has no effect.]))
#
# Do we want to install all of OPAL/ORTE and OMPI's header files?
#
AC_MSG_CHECKING([if want to install project-internal header files])
AC_ARG_WITH(devel-headers,
AC_HELP_STRING([--with-devel-headers],
[normal MPI users/applications do not need this (mpi.h and mpif.h are ALWAYS installed). Developer headers are only necessary for MCA module authors (default: disabled).]))
if test "$with_devel_headers" = "yes"; then
AC_MSG_RESULT([yes])
WANT_INSTALL_HEADERS=1
else
AC_MSG_RESULT([no])
WANT_INSTALL_HEADERS=0
fi
AM_CONDITIONAL(WANT_INSTALL_HEADERS, test "$WANT_INSTALL_HEADERS" = 1)
#
# Do we want the pretty-print stack trace feature?
#
AC_MSG_CHECKING([if want pretty-print stacktrace])
AC_ARG_ENABLE([pretty-print-stacktrace],
[AC_HELP_STRING([--enable-pretty-print-stacktrace],
[Pretty print stacktrace on process signal (default: enabled)])])
if test "$enable_pretty_print_stacktrace" = "no" ; then
AC_MSG_RESULT([no])
WANT_PRETTY_PRINT_STACKTRACE=0
else
AC_MSG_RESULT([yes])
WANT_PRETTY_PRINT_STACKTRACE=1
fi
AC_DEFINE_UNQUOTED([OPAL_WANT_PRETTY_PRINT_STACKTRACE],
[$WANT_PRETTY_PRINT_STACKTRACE],
[if want pretty-print stack trace feature])
#
# Do we want PTY support?
#
AC_MSG_CHECKING([if want pty support])
AC_ARG_ENABLE(pty-support,
AC_HELP_STRING([--enable-pty-support],
[Enable/disable PTY support for STDIO forwarding. (default: enabled)]))
if test "$enable_pty_support" = "no" ; then
AC_MSG_RESULT([no])
OPAL_ENABLE_PTY_SUPPORT=0
else
AC_MSG_RESULT([yes])
OPAL_ENABLE_PTY_SUPPORT=1
fi
AC_DEFINE_UNQUOTED([OPAL_ENABLE_PTY_SUPPORT], [$OPAL_ENABLE_PTY_SUPPORT],
[Whether user wants PTY support or not])
#
# Do we want to disable weak symbols for some reason?
#
AC_MSG_CHECKING([if want weak symbol support])
AC_ARG_ENABLE(weak-symbols,
AC_HELP_STRING([--enable-weak-symbols],
[use weak symbols, if available (default: enabled)]))
if test "$enable_weak_symbols" != "no"; then
AC_MSG_RESULT([yes])
WANT_WEAK_SYMBOLS=1
else
AC_MSG_RESULT([no])
WANT_WEAK_SYMBOLS=0
fi
#
# Do we want to allow DLOPEN?
#
AC_MSG_CHECKING([if want dlopen support])
AC_ARG_ENABLE([dlopen],
[AC_HELP_STRING([--enable-dlopen],
[Whether build should attempt to use dlopen (or
similar) to dynamically load components.
Disabling dlopen implies --disable-mca-dso.
(default: enabled)])])
if test "$enable_dlopen" = "no" ; then
enable_mca_dso="no"
enable_mca_static="yes"
OPAL_ENABLE_DLOPEN_SUPPORT=0
AC_MSG_RESULT([no])
else
OPAL_ENABLE_DLOPEN_SUPPORT=1
AC_MSG_RESULT([yes])
fi
#
# Heterogeneous support
#
AC_MSG_CHECKING([if want heterogeneous support])
AC_ARG_ENABLE([heterogeneous],
[AC_HELP_STRING([--enable-heterogeneous],
[Enable features required for heterogeneous
platform support (default: disabled)])])
if test "$enable_heterogeneous" = "yes" ; then
AC_MSG_RESULT([yes])
opal_want_heterogeneous=1
else
AC_MSG_RESULT([no])
opal_want_heterogeneous=0
fi
AC_DEFINE_UNQUOTED([OPAL_ENABLE_HETEROGENEOUS_SUPPORT],
[$opal_want_heterogeneous],
[Enable features required for heterogeneous support])
#
# Cross-compile data
#
AC_ARG_WITH([cross],
[AC_HELP_STRING([--with-cross=FILE],
[Specify configure values that can not be determined in a cross-compilation environment. See the Open MPI FAQ.])])
if test "$with_cross" = "yes" ; then
AC_MSG_ERROR([--with-cross argument must include FILE option])
elif test "$with_cross" = "no" ; then
AC_MSG_ERROR([--without-cross is not a valid argument])
elif test "$with_cross" != "" ; then
if test ! -r $with_cross ; then
AC_MSG_ERROR([could not find cross-compile data file $with_cross])
fi
# eval into environment
OPAL_LOG_MSG([Loading cross-compile file $with_cross, with contents below])
OPAL_LOG_FILE([$with_cross])
. "$with_cross"
fi
#
# Do we want to install binaries?
#
AC_ARG_ENABLE([binaries],
[AC_HELP_STRING([--enable-binaries],
[Build and install binaries required for Open MPI, such as the wrapper compilers. Useful for multi-lib installations. (default: enabled)])])
AM_CONDITIONAL([OPAL_INSTALL_BINARIES], [test "$enable_binaries" != "no"])
AC_ARG_ENABLE([script-wrapper-compilers],
[AC_HELP_STRING([--enable-script-wrapper-compilers],
[Use less featured script-based wrapper compilers instead of the standard C-based wrapper compilers. This option ignores the --disable-binaries option and is mainly useful in cross-compile environments])])
if test "$enable_script_wrapper_compilers" = "yes"; then
WANT_SCRIPT_WRAPPER_COMPILERS=1
else
WANT_SCRIPT_WRAPPER_COMPILERS=0
fi
AM_CONDITIONAL([OPAL_WANT_SCRIPT_WRAPPER_COMPILERS], [test "$enable_script_wrapper_compilers" = "yes"])
#
# Support per-user config files?
#
AC_ARG_ENABLE([per-user-config-files],
[AC_HELP_STRING([--enable-per-user-config-files],
[Disable per-user configuration files, to save disk accesses during job start-up. This is likely desirable for large jobs. Note that this can also be acheived by environment variables at run-time. (default: enabled)])])
if test "$enable_per_user_config_files" = "no" ; then
result=0
else
result=1
fi
AC_DEFINE_UNQUOTED([OPAL_WANT_HOME_CONFIG_FILES], [$result],
[Enable per-user config files])
#
# Do we want to enable IPv6 support?
#
AC_MSG_CHECKING([if want IPv6 support])
AC_ARG_ENABLE([ipv6],
[AC_HELP_STRING([--enable-ipv6],
[Enable IPv6 support, but only if the underlying system supports it (default: disabled)])])
if test "$enable_ipv6" = "yes"; then
AC_MSG_RESULT([yes])
opal_want_ipv6=1
else
AC_MSG_RESULT([no])
opal_want_ipv6=0
fi
AC_DEFINE_UNQUOTED([OPAL_ENABLE_IPV6], [$opal_want_ipv6],
[Enable IPv6 support, but only if the underlying system supports it])
#
# Package/brand string
#
AC_MSG_CHECKING([if want package/brand string])
AC_ARG_WITH([package-string],
[AC_HELP_STRING([--with-package-string=STRING],
[Use a branding string throughout Open MPI])])
if test "$with_package_string" = "" -o "$with_package_string" = "no"; then
with_package_string="Open MPI $OPAL_CONFIGURE_USER@$OPAL_CONFIGURE_HOST Distribution"
fi
AC_DEFINE_UNQUOTED([OPAL_PACKAGE_STRING], ["$with_package_string"],
[package/branding string for Open MPI])
AC_MSG_RESULT([$with_package_string])
#
# Ident string
#
AC_MSG_CHECKING([if want ident string])
AC_ARG_WITH([ident-string],
[AC_HELP_STRING([--with-ident-string=STRING],
[Embed an ident string into Open MPI object files])])
if test "$with_ident_string" = "" -o "$with_ident_string" = "no"; then
with_ident_string="%VERSION%"
fi
# This is complicated, because $OPAL_VERSION may have spaces in it.
# So put the whole sed expr in single quotes -- i.e., directly
# substitute %VERSION% for (not expanded) $OPAL_VERSION.
with_ident_string="`echo $with_ident_string | sed -e 's/%VERSION%/$OPAL_VERSION/'`"
# Now eval an echo of that so that the "$OPAL_VERSION" token is
# replaced with its value. Enclose the whole thing in "" so that it
# ends up as 1 token.
with_ident_string="`eval echo $with_ident_string`"
AC_DEFINE_UNQUOTED([OPAL_IDENT_STRING], ["$with_ident_string"],
[ident string for Open MPI])
AC_MSG_RESULT([$with_ident_string])
#
# Use alternative checksum algorithm
#
AC_MSG_CHECKING([if want to use an alternative checksum algo for messages])
AC_ARG_WITH([dst-checksum],
[AC_HELP_STRING([--with-dst-checksum],
[Use an alternative checksum algorithm for messages])])
if test "$with_dst_checksum" = "yes"; then
AC_MSG_RESULT([yes])
- Split the datatype engine into two parts: an MPI specific part in OMPI and a language agnostic part in OPAL. The convertor is completely moved into OPAL. This offers several benefits as described in RFC http://www.open-mpi.org/community/lists/devel/2009/07/6387.php namely: - Fewer basic types (int* and float* types, boolean and wchar - Fixing naming scheme to ompi-nomenclature. - Usability outside of the ompi-layer. - Due to the fixed nature of simple opal types, their information is completely known at compile time and therefore constified - With fewer datatypes (22), the actual sizes of bit-field types may be reduced from 64 to 32 bits, allowing reorganizing the opal_datatype structure, eliminating holes and keeping data required in convertor (upon send/recv) in one cacheline... This has implications to the convertor-datastructure and other parts of the code. - Several performance tests have been run, the netpipe latency does not change with this patch on Linux/x86-64 on the smoky cluster. - Extensive tests have been done to verify correctness (no new regressions) using: 1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and ompi-ddt: a. running both trunk and ompi-ddt resulted in no differences (except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run correctly). b. with --enable-memchecker and running under valgrind (one buglet when run with static found in test-suite, commited) 2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt: all passed (except for the dynamic/ tests failed!! as trunk/MTT) 3. compilation and usage of HDF5 tests on Jaguar using PGI and PathScale compilers. 4. compilation and usage on Scicortex. - Please note, that for the heterogeneous case, (-m32 compiled binaries/ompi), neither ompi-trunk, nor ompi-ddt branch would successfully launch. This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
CFLAGS="-DOPAL_CSUM_DST $CFLAGS"
else
AC_MSG_RESULT([no])
fi
#
# User level (mpi.h.in) visible maximum lengths of strings.
# These may be required in lower-level libraries to set up matching
# data-structures (e.g. OPAL_MAX_OBJECT_NAME).
#
# Default values (as of OMPI-1.3), and some sane minimum and maximum values
#
# No lower and upper bound required or enforced
OPAL_WITH_OPTION_MIN_MAX_VALUE(processor_name, 256, 16, 1024)
# Min length according to information passed in ompi/errhandler/errcode.c
OPAL_WITH_OPTION_MIN_MAX_VALUE(error_string, 256, 64, 1024)
# Min length according to MPI-2.1, p. 236 (information passed in ompi/communicator/comm.c: min only 48)
OPAL_WITH_OPTION_MIN_MAX_VALUE(object_name, 64, 64, 256)
# Min and Max length according to MPI-2.1, p. 287 is 32; longest key in ROMIO however 33
OPAL_WITH_OPTION_MIN_MAX_VALUE(info_key, 36, 34, 255)
# No lower and upper bound required or enforced!
OPAL_WITH_OPTION_MIN_MAX_VALUE(info_val, 256, 32, 1024)
# Min length according to _POSIX_HOST_NAME_MAX=255 (4*HOST_NAME_MAX)
OPAL_WITH_OPTION_MIN_MAX_VALUE(port_name, 1024, 255, 2048)
# Min length accroding to MPI-2.1, p. 418
OPAL_WITH_OPTION_MIN_MAX_VALUE(datarep_string, 128, 64, 256)
# How to build libltdl
AC_ARG_WITH([libltdl],
[AC_HELP_STRING([--with-libltdl(=DIR)],
[Where to find libltdl (this option is ignored if --disable-dlopen is used). DIR can take one of three values: "internal", "external", or a valid directory name. "internal" (or no DIR value) forces Open MPI to use its internal copy of libltdl. "external" forces Open MPI to use an external installation of libltdl. Supplying a valid directory name also forces Open MPI to use an external installation of libltdl, and adds DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries.])])
A number of C/R enhancements per RFC below: http://www.open-mpi.org/community/lists/devel/2010/07/8240.php Documentation: http://osl.iu.edu/research/ft/ Major Changes: -------------- * Added C/R-enabled Debugging support. Enabled with the --enable-crdebug flag. See the following website for more information: http://osl.iu.edu/research/ft/crdebug/ * Added Stable Storage (SStore) framework for checkpoint storage * 'central' component does a direct to central storage save * 'stage' component stages checkpoints to central storage while the application continues execution. * 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress) * 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching) * Added Compression (compress) framework to support * Add two new ErrMgr recovery policies * {{{crmig}}} C/R Process Migration * {{{autor}}} C/R Automatic Recovery * Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component * Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option) * {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342) * {{{OMPI_CR_Restart}}} * {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules) * {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192) * {{{OMPI_CR_Quiesce_start}}} * {{{OMPI_CR_Quiesce_checkpoint}}} * {{{OMPI_CR_Quiesce_end}}} * {{{OMPI_CR_self_register_checkpoint_callback}}} * {{{OMPI_CR_self_register_restart_callback}}} * {{{OMPI_CR_self_register_continue_callback}}} * The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future. * Add a progress meter to: * FileM rsh (filem_rsh_process_meter) * SnapC full (snapc_full_progress_meter) * SStore stage (sstore_stage_progress_meter) * Added 2 new command line options to ompi-restart * --showme : Display the full command line that would have been exec'ed. * --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413) * Deprecated some MCA params: * crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir * snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir * snapc_base_global_shared deprecated, use sstore_stage_global_is_shared * snapc_base_store_in_place deprecated, replaced with different components of SStore * snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref * snapc_base_establish_global_snapshot_dir deprecated, never well supported * snapc_full_skip_filem deprecated, use sstore_stage_skip_filem Minor Changes: -------------- * Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing. * Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components * Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it. * Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}} * Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set. * opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality. * Cleanup the CRS framework and components to work with the SStore framework. * Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably). * Add 'quiesce' hook to CRCP for a future enhancement. * We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}. * Add optional application level INC callbacks (registered through the CR MPI Ext interface). * Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive. * {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked. * {{{opal-restart}}} also support local decompression before restarting * {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata * {{{orte-restart}}} now uses the SStore framework to work with the metadata * Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality. * Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}. * Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped. * Make sure to decrement the number of 'num_local_procs' in the orted when one goes away. * odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options. * Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities. * Improve the checks for 'already checkpointing' error path. * A a recovery output timer, to show how long it takes to restart a job * Do a better job of cleaning up the old session directory on restart. * Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment) * Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize. This commit was SVN r23587. The following Trac tickets were found above: Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924 Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097 Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161 Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192 Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208 Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342 Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-11 00:51:11 +04:00
AC_DEFINE_UNQUOTED([OPAL_ENABLE_CRDEBUG], [0],
A number of C/R enhancements per RFC below: http://www.open-mpi.org/community/lists/devel/2010/07/8240.php Documentation: http://osl.iu.edu/research/ft/ Major Changes: -------------- * Added C/R-enabled Debugging support. Enabled with the --enable-crdebug flag. See the following website for more information: http://osl.iu.edu/research/ft/crdebug/ * Added Stable Storage (SStore) framework for checkpoint storage * 'central' component does a direct to central storage save * 'stage' component stages checkpoints to central storage while the application continues execution. * 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress) * 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching) * Added Compression (compress) framework to support * Add two new ErrMgr recovery policies * {{{crmig}}} C/R Process Migration * {{{autor}}} C/R Automatic Recovery * Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component * Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option) * {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342) * {{{OMPI_CR_Restart}}} * {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules) * {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192) * {{{OMPI_CR_Quiesce_start}}} * {{{OMPI_CR_Quiesce_checkpoint}}} * {{{OMPI_CR_Quiesce_end}}} * {{{OMPI_CR_self_register_checkpoint_callback}}} * {{{OMPI_CR_self_register_restart_callback}}} * {{{OMPI_CR_self_register_continue_callback}}} * The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future. * Add a progress meter to: * FileM rsh (filem_rsh_process_meter) * SnapC full (snapc_full_progress_meter) * SStore stage (sstore_stage_progress_meter) * Added 2 new command line options to ompi-restart * --showme : Display the full command line that would have been exec'ed. * --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413) * Deprecated some MCA params: * crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir * snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir * snapc_base_global_shared deprecated, use sstore_stage_global_is_shared * snapc_base_store_in_place deprecated, replaced with different components of SStore * snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref * snapc_base_establish_global_snapshot_dir deprecated, never well supported * snapc_full_skip_filem deprecated, use sstore_stage_skip_filem Minor Changes: -------------- * Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing. * Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components * Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it. * Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}} * Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set. * opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality. * Cleanup the CRS framework and components to work with the SStore framework. * Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably). * Add 'quiesce' hook to CRCP for a future enhancement. * We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}. * Add optional application level INC callbacks (registered through the CR MPI Ext interface). * Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive. * {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked. * {{{opal-restart}}} also support local decompression before restarting * {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata * {{{orte-restart}}} now uses the SStore framework to work with the metadata * Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality. * Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}. * Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped. * Make sure to decrement the number of 'num_local_procs' in the orted when one goes away. * odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options. * Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities. * Improve the checks for 'already checkpointing' error path. * A a recovery output timer, to show how long it takes to restart a job * Do a better job of cleaning up the old session directory on restart. * Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment) * Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize. This commit was SVN r23587. The following Trac tickets were found above: Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924 Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097 Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161 Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192 Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208 Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342 Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-11 00:51:11 +04:00
[Whether we want checkpoint/restart enabled debugging functionality or not])
# some systems don't want/like getpwuid
AC_MSG_CHECKING([if want getpwuid support])
AC_ARG_ENABLE([getpwuid],
[AC_HELP_STRING([--disable-getpwuid],
[Disable getpwuid support (default: enabled)])])
if test "$enable_getpwuid" = "no"; then
AC_MSG_RESULT([no])
opal_want_getpwuid=0
else
AC_MSG_RESULT([yes])
opal_want_getpwuid=1
fi
AC_DEFINE_UNQUOTED([OPAL_ENABLE_GETPWUID], [$opal_want_getpwuid],
[Disable getpwuid support (default: enabled)])
])dnl