Per the RFC schedule, this commit adds Mellanox OpenSHMEM to the trunk. It does not yet run on OSX or with CM PML for an MTL other than MXM. Mellanox is aware of these issues and is in the process of resolving them. This should be added to \ncmr=v1.7.4:subject=Move OSHMEM to 1.7.4:reviewer=rhc
This commit was SVN r29153.
Этот коммит содержится в:
родитель
46ed907003
Коммит
b3f88c4a1d
5
NEWS
5
NEWS
@ -87,6 +87,11 @@ Trunk (not on release branches yet)
|
||||
- Add support for the MPI tool information interface (MPI_T).
|
||||
- Update ompi_info to support limiting output by opal info level.
|
||||
|
||||
- Wrapper compilers now add rpath support by default to generated
|
||||
executables on systems that support it. This behavior can be
|
||||
disabled via --disable-wrapper-rpath. See note in README about ABI
|
||||
issues when using rpath in MPI applications.
|
||||
|
||||
|
||||
1.7.2
|
||||
-----
|
||||
|
3
VERSION
3
VERSION
@ -1,6 +1,8 @@
|
||||
# Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
# Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
|
||||
# This is the VERSION file for Open MPI, describing the precise
|
||||
# version of Open MPI in this distribution. The various components of
|
||||
@ -96,6 +98,7 @@ libmpi_usempi_ignore_tkr_so_version=0:0:0
|
||||
libopen_rte_so_version=0:0:0
|
||||
libopen_pal_so_version=0:0:0
|
||||
libmpi_java_so_version=0:0:0
|
||||
libshmem_so_version=0:0:0
|
||||
|
||||
# "Common" components install standalone libraries that are run-time
|
||||
# linked by one or more components. So they need to be versioned as
|
||||
|
12
autogen.pl
12
autogen.pl
@ -2,7 +2,8 @@
|
||||
#
|
||||
# Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
||||
#
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -41,6 +42,7 @@ my @subdirs;
|
||||
# Command line parameters
|
||||
my $no_ompi_arg = 0;
|
||||
my $no_orte_arg = 0;
|
||||
my $no_oshmem_arg = 0;
|
||||
my $quiet_arg = 0;
|
||||
my $debug_arg = 0;
|
||||
my $help_arg = 0;
|
||||
@ -985,6 +987,7 @@ sub patch_autotools_output {
|
||||
|
||||
my $ok = Getopt::Long::GetOptions("no-ompi" => \$no_ompi_arg,
|
||||
"no-orte" => \$no_orte_arg,
|
||||
"no-oshmem" => \$no_oshmem_arg,
|
||||
"quiet|q" => \$quiet_arg,
|
||||
"debug|d" => \$debug_arg,
|
||||
"help|h" => \$help_arg,
|
||||
@ -999,6 +1002,7 @@ if (!$ok || $help_arg) {
|
||||
print "Options:
|
||||
--no-ompi | -no-ompi Do not build the Open MPI layer
|
||||
--no-orte | -no-orte Do not build the ORTE layer
|
||||
--no-oshmem | -no-oshmem Do not build the OSHMEM layer
|
||||
--quiet | -q Do not display normal verbose output
|
||||
--debug | -d Output lots of debug information
|
||||
--help | -h This help list
|
||||
@ -1027,6 +1031,10 @@ if (! -e "orte") {
|
||||
$no_orte_arg = 1;
|
||||
debug "No orte subdirectory found - will not build ORTE\n";
|
||||
}
|
||||
if (! -e "oshmem") {
|
||||
$no_oshmem_arg = 1;
|
||||
debug "No oshmem subdirectory found - will not build OSHMEM\n";
|
||||
}
|
||||
|
||||
if ($no_ompi_arg == 1 && $no_orte_arg == 0) {
|
||||
$project_name_long = "Open MPI Run Time Environment";
|
||||
@ -1193,6 +1201,8 @@ push(@{$projects}, { name => "orte", dir => "orte", need_base => 1 })
|
||||
if (!$no_orte_arg);
|
||||
push(@{$projects}, { name => "ompi", dir => "ompi", need_base => 1 })
|
||||
if (!$no_ompi_arg);
|
||||
push(@{$projects}, { name => "oshmem", dir => "oshmem", need_base => 1 })
|
||||
if (!$no_ompi_arg && !$no_orte_arg && !$no_oshmem_arg);
|
||||
|
||||
$m4 .= "dnl Separate m4 define for each project\n";
|
||||
foreach my $p (@$projects) {
|
||||
|
@ -12,6 +12,8 @@
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -215,6 +217,7 @@ AC_DEFUN([OPAL_CHECK_ATTRIBUTES], [
|
||||
opal_cv___attribute__visibility=0
|
||||
opal_cv___attribute__warn_unused_result=0
|
||||
opal_cv___attribute__weak_alias=0
|
||||
opal_cv___attribute__destructor=0
|
||||
else
|
||||
AC_MSG_RESULT([yes])
|
||||
|
||||
@ -533,6 +536,13 @@ AC_DEFUN([OPAL_CHECK_ATTRIBUTES], [
|
||||
[],
|
||||
[])
|
||||
|
||||
_OPAL_CHECK_SPECIFIC_ATTRIBUTE([destructor],
|
||||
[
|
||||
void foo(void) __attribute__ ((__destructor__));
|
||||
void foo(void) { return ; }
|
||||
],
|
||||
[],
|
||||
[])
|
||||
fi
|
||||
|
||||
# Now that all the values are set, define them
|
||||
@ -581,4 +591,6 @@ AC_DEFUN([OPAL_CHECK_ATTRIBUTES], [
|
||||
[Whether your compiler has __attribute__ warn unused result or not])
|
||||
AC_DEFINE_UNQUOTED(OPAL_HAVE_ATTRIBUTE_WEAK_ALIAS, [$opal_cv___attribute__weak_alias],
|
||||
[Whether your compiler has __attribute__ weak alias or not])
|
||||
AC_DEFINE_UNQUOTED(OPAL_HAVE_ATTRIBUTE_DESTRUCTOR, [$opal_cv___attribute__destructor],
|
||||
[Whether your compiler has __attribute__ destructor or not])
|
||||
])
|
||||
|
26
config/oshmem_config_files.m4
Обычный файл
26
config/oshmem_config_files.m4
Обычный файл
@ -0,0 +1,26 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
AC_DEFUN([OSHMEM_CONFIG_FILES],[
|
||||
AC_CONFIG_FILES([
|
||||
oshmem/Makefile
|
||||
oshmem/include/Makefile
|
||||
oshmem/shmem/c/Makefile
|
||||
oshmem/shmem/f77/Makefile
|
||||
|
||||
oshmem/shmem/c/profile/Makefile
|
||||
|
||||
oshmem/tools/wrappers/Makefile
|
||||
oshmem/tools/wrappers/shmemcc-wrapper-data.txt
|
||||
oshmem/tools/wrappers/shmemf77-wrapper-data.txt
|
||||
oshmem/tools/wrappers/shmemf90-wrapper-data.txt
|
||||
])
|
||||
])
|
164
config/oshmem_configure_options.m4
Обычный файл
164
config/oshmem_configure_options.m4
Обычный файл
@ -0,0 +1,164 @@
|
||||
dnl -*- shell-script -*-
|
||||
dnl
|
||||
dnl Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
dnl All rights reserved.
|
||||
dnl
|
||||
dnl $COPYRIGHT$
|
||||
dnl
|
||||
dnl Additional copyrights may follow
|
||||
dnl
|
||||
dnl $HEADER$
|
||||
dnl
|
||||
|
||||
|
||||
|
||||
AC_DEFUN([OSHMEM_CONFIGURE_OPTIONS],[
|
||||
ompi_show_subtitle "OSHMEM Configuration options"
|
||||
|
||||
|
||||
AC_SUBST(OSHMEM_LIBSHMEM_EXTRA_LIBS)
|
||||
AC_SUBST(OSHMEM_LIBSHMEM_EXTRA_LDFLAGS)
|
||||
|
||||
#
|
||||
# Enable compatibility mode
|
||||
#
|
||||
AC_MSG_CHECKING([if want SGI/Quadrix compatibility mode])
|
||||
AC_ARG_ENABLE(oshmem-compat,
|
||||
AC_HELP_STRING([--enable-oshmem-compat],
|
||||
[enable compatibility mode (default: enabled)]))
|
||||
if test "$enable_oshmem_compat" != "no"; then
|
||||
AC_MSG_RESULT([yes])
|
||||
OSHMEM_SPEC_COMPAT=1
|
||||
else
|
||||
AC_MSG_RESULT([no])
|
||||
OSHMEM_SPEC_COMPAT=0
|
||||
fi
|
||||
AC_DEFINE_UNQUOTED([OSHMEM_SPEC_COMPAT], [$OSHMEM_SPEC_COMPAT],
|
||||
[Whether user wants OSHMEM in compatibility mode or not])
|
||||
|
||||
|
||||
|
||||
#
|
||||
# Do we want to disable OSHMEM parameter checking at run-time?
|
||||
#
|
||||
AC_MSG_CHECKING([if want SHMEM API parameter checking])
|
||||
AC_ARG_WITH(shmem-param-check,
|
||||
AC_HELP_STRING([--shmem-param-check(=VALUE)],
|
||||
[behavior of SHMEM function parameter checking. Valid values are: always, never. If --with-shmem-param-check is specified with no VALUE argument, it is equivalent to a VALUE of "always"; --without-shmem-param-check is equivalent to "never" (default: always).]))
|
||||
shmem_param_check=1
|
||||
if test "$with_shmem_param_check" = "no" -o \
|
||||
"$with_shmem_param_check" = "never"; then
|
||||
shmem_param_check=0
|
||||
AC_MSG_RESULT([never])
|
||||
elif test "$with_shmem_param_check" = "yes" -o \
|
||||
"$with_shmem_param_check" = "always" -o \
|
||||
-z "$with_shmem_param_check"; then
|
||||
shmem_param_check=1
|
||||
AC_MSG_RESULT([always])
|
||||
else
|
||||
AC_MSG_RESULT([unknown])
|
||||
AC_MSG_WARN([*** Unrecognized --with-shmem-param-check value])
|
||||
AC_MSG_WARN([*** See "configure --help" output])
|
||||
AC_MSG_WARN([*** Defaulting to "runtime"])
|
||||
fi
|
||||
AC_DEFINE_UNQUOTED(OSHMEM_PARAM_CHECK, $shmem_param_check,
|
||||
[Whether we want to check SHMEM parameters always or never])
|
||||
|
||||
|
||||
#
|
||||
# OSHMEM profiling support
|
||||
#
|
||||
AC_MSG_CHECKING([if want pshmem_])
|
||||
AC_ARG_ENABLE(oshmem-profile,
|
||||
AC_HELP_STRING([--enable-oshmem-profile],
|
||||
[enable OSHMEM profiling (default: enabled)]))
|
||||
if test "$enable_oshmem_profile" != "no"; then
|
||||
AC_MSG_RESULT([yes])
|
||||
oshmem_progiling_support=1
|
||||
else
|
||||
AC_MSG_RESULT([no])
|
||||
oshmem_progiling_support=0
|
||||
fi
|
||||
AM_CONDITIONAL(OSHMEM_PROFILING, test "$oshmem_progiling_support" = 1)
|
||||
#AC_DEFINE_UNQUOTED([OSHMEM_PROFILING], [$oshmem_progiling_support],
|
||||
# [Whether user wants OSHMEM profiling])
|
||||
|
||||
])
|
||||
|
||||
|
||||
AC_DEFUN([OSHMEM_SETUP_CFLAGS],[
|
||||
|
||||
|
||||
OMPI_C_COMPILER_VENDOR([oshmem_c_vendor])
|
||||
|
||||
#
|
||||
# OSHMEM force warnings as errors
|
||||
#
|
||||
#
|
||||
# Since SHMEM libraries are not fully ISO99 C compliant
|
||||
# -pedantic and -Wundef raise a bunch of warnings, so
|
||||
# we just strip them off for this component
|
||||
AC_MSG_WARN([Removed -pedantic and -Wundef from CFLAGS for OSHMEM])
|
||||
|
||||
oshmem_CFLAGS="$CFLAGS"
|
||||
|
||||
# Strip off problematic arguments
|
||||
oshmem_CFLAGS="`echo $oshmem_CFLAGS | sed 's/-pedantic//g'`"
|
||||
oshmem_CFLAGS="`echo $oshmem_CFLAGS | sed 's/-Wundef//g'`"
|
||||
oshmem_CFLAGS="`echo $oshmem_CFLAGS | sed 's/-Wno-long-double//g'`"
|
||||
CFLAGS="$oshmem_CFLAGS"
|
||||
|
||||
case "$oshmem_c_vendor" in
|
||||
gnu)
|
||||
OSHMEM_CFLAGS=" -Werror"
|
||||
;;
|
||||
intel)
|
||||
# we want specifically the warning on format string conversion
|
||||
OSHMEM_CFLAGS=" -Werror "
|
||||
;;
|
||||
esac
|
||||
|
||||
AC_SUBST([OSHMEM_CFLAGS])
|
||||
|
||||
|
||||
|
||||
OMPI_CHECK_OPENFABRICS([openib],
|
||||
[openib_happy="yes"],
|
||||
[openib_happy="no"])
|
||||
|
||||
# substitute in the things needed to build MEMHEAP BASE
|
||||
AC_SUBST([openib_CFLAGS])
|
||||
AC_SUBST([openib_CPPFLAGS])
|
||||
AC_SUBST([openib_LDFLAGS])
|
||||
AC_SUBST([openib_LIBS])
|
||||
|
||||
# If we have the openib stuff available, find out what we've got
|
||||
AS_IF(
|
||||
[test "$openib_happy" = "yes"],
|
||||
[
|
||||
OSHMEM_LIBSHMEM_EXTRA_LDFLAGS="$OSHMEM_LIBSHMEM_EXTRA_LDFLAGS $openib_LDFLAGS"
|
||||
OSHMEM_LIBSHMEM_EXTRA_LIBS="$OSHMEM_LIBSHMEM_EXTRA_LIBS $openib_LIBS"
|
||||
|
||||
# ibv_reg_shared_mr was added in MOFED 1.8
|
||||
oshmem_have_mpage=0
|
||||
|
||||
openib_save_CPPFLAGS="$CPPFLAGS"
|
||||
openib_save_LDFLAGS="$LDFLAGS"
|
||||
openib_save_LIBS="$LIBS"
|
||||
|
||||
CPPFLAGS="$CPPFLAGS $openib_CPPFLAGS"
|
||||
LDFLAGS="$LDFLAGS $openib_LDFLAGS"
|
||||
LIBS="$LIBS $openib_LIBS"
|
||||
|
||||
AC_CHECK_DECLS([IBV_ACCESS_ALLOCATE_MR,IBV_ACCESS_SHARED_MR_USER_READ],
|
||||
[oshmem_have_mpage=2], [],
|
||||
[#include <infiniband/verbs.h>])
|
||||
|
||||
CPPFLAGS="$openib_save_CPPFLAGS"
|
||||
LDFLAGS="$openib_save_LDFLAGS"
|
||||
LIBS="$openib_save_LIBS"
|
||||
|
||||
AC_DEFINE_UNQUOTED(MPAGE_ENABLE, $oshmem_have_mpage,
|
||||
[Whether we can use M-PAGE supported since MOFED 1.8])
|
||||
])
|
||||
])dnl
|
24
configure.ac
24
configure.ac
@ -17,6 +17,8 @@
|
||||
# Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
|
||||
# Copyright (c) 2011-2013 NVIDIA Corporation. All rights reserved.
|
||||
# Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -99,6 +101,8 @@ AC_SUBST([CONFIGURE_DEPENDENCIES], ['$(top_srcdir)/VERSION'])
|
||||
# Set up project specific AM_CONDITIONALs
|
||||
AM_CONDITIONAL([PROJECT_OMPI], m4_ifdef([project_ompi], [true], [false]))
|
||||
AM_CONDITIONAL([PROJECT_ORTE], m4_ifdef([project_orte], [true], [false]))
|
||||
AM_CONDITIONAL([PROJECT_OSHMEM], m4_ifdef([project_oshmem], [true], [false]))
|
||||
|
||||
|
||||
ompi_show_subtitle "Checking versions"
|
||||
|
||||
@ -113,6 +117,11 @@ m4_ifdef([project_orte],
|
||||
[$srcdir/VERSION],
|
||||
[orte/include/orte/version.h])])
|
||||
|
||||
m4_ifdef([project_oshmem],
|
||||
[OPAL_SAVE_VERSION([OSHMEM], [Open SHMEM],
|
||||
[$srcdir/VERSION],
|
||||
[oshmem/include/oshmem/version.h])])
|
||||
|
||||
OPAL_SAVE_VERSION([OPAL], [Open Portable Access Layer], [$srcdir/VERSION],
|
||||
[opal/include/opal/version.h])
|
||||
|
||||
@ -137,6 +146,8 @@ m4_ifdef([project_ompi],
|
||||
AC_SUBST(libmca_common_portals_so_version)])
|
||||
m4_ifdef([project_orte],
|
||||
[AC_SUBST(libopen_rte_so_version)])
|
||||
m4_ifdef([project_oshmem],
|
||||
[AC_SUBST(libshmem_so_version)])
|
||||
AC_SUBST(libmca_opal_common_hwloc_so_version)
|
||||
AC_SUBST(libmca_opal_common_pmi_so_version)
|
||||
AC_SUBST(libopen_pal_so_version)
|
||||
@ -161,6 +172,8 @@ m4_ifdef([project_orte],
|
||||
[AC_CONFIG_HEADERS([orte/include/orte_config.h])])
|
||||
m4_ifdef([project_ompi],
|
||||
[AC_CONFIG_HEADERS([ompi/include/ompi_config.h ompi/include/mpi.h])])
|
||||
m4_ifdef([project_oshmem],
|
||||
[AC_CONFIG_HEADER([oshmem/include/oshmem_config.h oshmem/include/shmem.h oshmem/include/shmem_portable_platform.h])])
|
||||
|
||||
# override/fixup the version numbers set by AC_INIT, since on
|
||||
# developer builds, there's no good way to know what the version is
|
||||
@ -241,6 +254,7 @@ OPAL_CONFIGURE_OPTIONS
|
||||
OPAL_CHECK_CUDA
|
||||
m4_ifdef([project_orte], [ORTE_CONFIGURE_OPTIONS])
|
||||
m4_ifdef([project_ompi], [OMPI_CONFIGURE_OPTIONS])
|
||||
m4_ifdef([project_oshmem], [OSHMEM_CONFIGURE_OPTIONS])
|
||||
|
||||
if test "$enable_binaries" = "no" -a "$enable_dist" = "yes"; then
|
||||
AC_MSG_WARN([--disable-binaries is incompatible with --enable dist])
|
||||
@ -860,6 +874,12 @@ AC_DEFINE_UNQUOTED(OMPI_MPI_OFFSET_TYPE, $MPI_OFFSET_TYPE, [Type of MPI_Offset -
|
||||
AC_DEFINE_UNQUOTED(OMPI_MPI_OFFSET_SIZE, $MPI_OFFSET_SIZE, [Size of the MPI_Offset])
|
||||
AC_DEFINE_UNQUOTED(OMPI_OFFSET_DATATYPE, $MPI_OFFSET_DATATYPE, [MPI datatype corresponding to MPI_Offset])
|
||||
|
||||
AC_DEFINE_UNQUOTED(OSHMEM_SHMEM_OFFSET_TYPE, $MPI_OFFSET_TYPE, [Type of SHMEM_Offset -- has to be defined here and typedef'ed later because shmem.h does not get AC SUBST's])
|
||||
AC_DEFINE_UNQUOTED(OSHMEM_SHMEM_OFFSET_SIZE, $MPI_OFFSET_SIZE, [Size of the SHMEM_Offset])
|
||||
AC_DEFINE_UNQUOTED(OSHMEM_OFFSET_DATATYPE, $MPI_OFFSET_DATATYPE, [SHMEM datatype corresponding to SHMEM_Offset])
|
||||
|
||||
AC_DEFINE_UNQUOTED(OPAL_SIZEOF_LONG, $ac_cv_sizeof_long, "Size of 'long' type")
|
||||
|
||||
if test $MPI_COUNT_SIZE -eq 8 ; then
|
||||
MPI_COUNT_MAX="0x7fffffffffffffffll"
|
||||
elif test $MPI_COUNT_SIZE -eq 4 ; then
|
||||
@ -1205,7 +1225,7 @@ if test "$OMPI_TOP_BUILDDIR" != "$OMPI_TOP_SRCDIR"; then
|
||||
# rather than have successive assignments to these shell
|
||||
# variables, lest the $(foo) names try to get evaluated here.
|
||||
# Yuck!
|
||||
CPPFLAGS='-I$(top_srcdir) -I$(top_builddir) -I$(top_srcdir)/opal/include m4_ifdef([project_orte], [-I$(top_srcdir)/orte/include]) m4_ifdef([project_ompi], [-I$(top_srcdir)/ompi/include])'" $CPPFLAGS"
|
||||
CPPFLAGS='-I$(top_srcdir) -I$(top_builddir) -I$(top_srcdir)/opal/include m4_ifdef([project_orte], [-I$(top_srcdir)/orte/include]) m4_ifdef([project_ompi], [-I$(top_srcdir)/ompi/include]) m4_ifdef([project_oshmem], [-I$(top_srcdir)/oshmem/include])'" $CPPFLAGS"
|
||||
# C++ is only relevant if we're building OMPI
|
||||
m4_ifdef([project_ompi], [CXXCPPFLAGS='-I$(top_srcdir) -I$(top_builddir) -I$(top_srcdir)/opal/include -I$(top_srcdir)/orte/include -I$(top_srcdir)/ompi/include'" $CXXCPPFLAGS"])
|
||||
else
|
||||
@ -1219,6 +1239,7 @@ fi
|
||||
|
||||
m4_ifdef([project_orte], [ORTE_SETUP_DEBUGGER_FLAGS],
|
||||
[m4_ifdef([project_ompi], [ORTE_SETUP_DEBUGGER_FLAGS])])
|
||||
m4_ifdef([project_oshmem], [OSHMEM_SETUP_CFLAGS])
|
||||
|
||||
#
|
||||
# Delayed the substitution of CFLAGS and CXXFLAGS until now because
|
||||
@ -1325,5 +1346,6 @@ AC_CONFIG_FILES([
|
||||
OPAL_CONFIG_FILES
|
||||
m4_ifdef([project_orte], [ORTE_CONFIG_FILES])
|
||||
m4_ifdef([project_ompi], [OMPI_CONFIG_FILES])
|
||||
m4_ifdef([project_oshmem], [OSHMEM_CONFIG_FILES])
|
||||
|
||||
AC_OUTPUT
|
||||
|
61
contrib/dist/linux/openmpi.spec
поставляемый
61
contrib/dist/linux/openmpi.spec
поставляемый
@ -10,6 +10,8 @@
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -97,7 +99,7 @@
|
||||
# Should we use the default "check_files" RPM step (i.e., check for
|
||||
# unpackaged files)? It is discouraged to disable this, but some
|
||||
# installers need it (e.g., older versions of OFED, because they
|
||||
# installed lots of other stuff in the BUILD_ROOT before Open MPI).
|
||||
# installed lots of other stuff in the BUILD_ROOT before Open MPI/SHMEM).
|
||||
# type: bool (0/1)
|
||||
%{!?use_check_files: %define use_check_files 1}
|
||||
|
||||
@ -122,7 +124,7 @@
|
||||
# type: bool (0/1)
|
||||
%{!?disable_auto_requires: %define disable_auto_requires 0}
|
||||
|
||||
# On some platforms, Open MPI just flat-out doesn't work with
|
||||
# On some platforms, Open MPI/SHMEM just flat-out doesn't work with
|
||||
# -D_FORTIFY_SOURCE (e.g., some users have reported that there are
|
||||
# problems on ioa64 platforms). In this case, just turn it off
|
||||
# (meaning: this specfile will strip out that flag from the
|
||||
@ -194,7 +196,7 @@
|
||||
#
|
||||
#############################################################################
|
||||
|
||||
Summary: A powerful implementation of MPI
|
||||
Summary: A powerful implementation of MPI/SHMEM
|
||||
Name: %{?_name:%{_name}}%{!?_name:openmpi}
|
||||
Version: $VERSION
|
||||
Release: 1
|
||||
@ -222,8 +224,14 @@ Open MPI is a project combining technologies and resources from
|
||||
several other projects (FT-MPI, LA-MPI, LAM/MPI, and PACX-MPI) in
|
||||
order to build the best MPI library available.
|
||||
|
||||
The project includes implementation of SHMEM parallel
|
||||
programming library in the Partitioned Global Address Space.
|
||||
This library provides fast inter-processor communication for large
|
||||
messages using data passing and one-sided communication techniques.
|
||||
SHMEM API based on OpenSHMEM standard from http://www.openshmem.org/
|
||||
|
||||
This RPM contains all the tools necessary to compile, link, and run
|
||||
Open MPI jobs.
|
||||
Open MPI/SHMEM jobs.
|
||||
|
||||
%if !%{build_all_in_one_rpm}
|
||||
|
||||
@ -234,7 +242,7 @@ Open MPI jobs.
|
||||
#############################################################################
|
||||
|
||||
%package runtime
|
||||
Summary: Tools and plugin modules for running Open MPI jobs
|
||||
Summary: Tools and plugin modules for running Open MPI/SHMEM jobs
|
||||
Group: Development/Libraries
|
||||
Provides: mpi
|
||||
%if %{disable_auto_requires}
|
||||
@ -249,9 +257,15 @@ Open MPI is a project combining technologies and resources from several other
|
||||
projects (FT-MPI, LA-MPI, LAM/MPI, and PACX-MPI) in order to build the best
|
||||
MPI library available.
|
||||
|
||||
The project includes implementation of SHMEM parallel
|
||||
programming library in the Partitioned Global Address Space.
|
||||
This library provides fast inter-processor communication for large
|
||||
messages using data passing and one-sided communication techniques.
|
||||
SHMEM API based on OpenSHMEM standard from http://www.openshmem.org/
|
||||
|
||||
This subpackage provides general tools (mpirun, mpiexec, etc.) and the
|
||||
Module Component Architecture (MCA) base and plugins necessary for
|
||||
running Open MPI jobs.
|
||||
running Open MPI/SHMEM jobs.
|
||||
|
||||
%endif
|
||||
|
||||
@ -262,7 +276,7 @@ running Open MPI jobs.
|
||||
#############################################################################
|
||||
|
||||
%package devel
|
||||
Summary: Development tools and header files for Open MPI
|
||||
Summary: Development tools and header files for Open MPI/SHMEM
|
||||
Group: Development/Libraries
|
||||
%if %{disable_auto_requires}
|
||||
AutoReq: no
|
||||
@ -274,8 +288,14 @@ Open MPI is a project combining technologies and resources from
|
||||
several other projects (FT-MPI, LA-MPI, LAM/MPI, and PACX-MPI) in
|
||||
order to build the best MPI library available.
|
||||
|
||||
This subpackage provides the development files for Open MPI, such as
|
||||
wrapper compilers and header files for MPI development.
|
||||
The project includes implementation of SHMEM parallel
|
||||
programming library in the Partitioned Global Address Space.
|
||||
This library provides fast inter-processor communication for large
|
||||
messages using data passing and one-sided communication techniques.
|
||||
SHMEM API based on OpenSHMEM standard from http://www.openshmem.org/
|
||||
|
||||
This subpackage provides the development files for Open MPI/SHMEM, such as
|
||||
wrapper compilers and header files for MPI/SHMEM development.
|
||||
|
||||
#############################################################################
|
||||
#
|
||||
@ -284,7 +304,7 @@ wrapper compilers and header files for MPI development.
|
||||
#############################################################################
|
||||
|
||||
%package docs
|
||||
Summary: Documentation for Open MPI
|
||||
Summary: Documentation for Open MPI/SHMEM
|
||||
Group: Development/Documentation
|
||||
%if %{disable_auto_requires}
|
||||
AutoReq: no
|
||||
@ -296,7 +316,13 @@ Open MPI is a project combining technologies and resources from several other
|
||||
projects (FT-MPI, LA-MPI, LAM/MPI, and PACX-MPI) in order to build the best
|
||||
MPI library available.
|
||||
|
||||
This subpackage provides the documentation for Open MPI.
|
||||
The project includes implementation of SHMEM parallel
|
||||
programming library in the Partitioned Global Address Space.
|
||||
This library provides fast inter-processor communication for large
|
||||
messages using data passing and one-sided communication techniques.
|
||||
SHMEM API based on OpenSHMEM standard from http://www.openshmem.org/
|
||||
|
||||
This subpackage provides the documentation for Open MPI/SHMEM.
|
||||
|
||||
#############################################################################
|
||||
#
|
||||
@ -423,14 +449,14 @@ cat <<EOF >$RPM_BUILD_ROOT/%{modulefile_path}/%{modulefile_subdir}/%{modulefile_
|
||||
#%Module
|
||||
|
||||
# NOTE: This is an automatically-generated file! (generated by the
|
||||
# Open MPI RPM). Any changes made here will be lost a) if the RPM is
|
||||
# Open MPI/SHMEM RPM). Any changes made here will be lost a) if the RPM is
|
||||
# uninstalled, or b) if the RPM is upgraded or uninstalled.
|
||||
|
||||
proc ModulesHelp { } {
|
||||
puts stderr "This module adds Open MPI v%{version} to various paths"
|
||||
puts stderr "This module adds Open MPI/SHMEM v%{version} to various paths"
|
||||
}
|
||||
|
||||
module-whatis "Sets up Open MPI v%{version} in your enviornment"
|
||||
module-whatis "Sets up Open MPI/SHMEM v%{version} in your enviornment"
|
||||
|
||||
prepend-path PATH "%{_prefix}/bin/"
|
||||
prepend-path LD_LIBRARY_PATH %{_libdir}
|
||||
@ -445,7 +471,7 @@ EOF
|
||||
%{__mkdir_p} $RPM_BUILD_ROOT/%{shell_scripts_path}
|
||||
cat <<EOF > $RPM_BUILD_ROOT/%{shell_scripts_path}/%{shell_scripts_basename}.sh
|
||||
# NOTE: This is an automatically-generated file! (generated by the
|
||||
# Open MPI RPM). Any changes made here will be lost if the RPM is
|
||||
# Open MPI/SHMEM RPM). Any changes made here will be lost if the RPM is
|
||||
# uninstalled or upgraded.
|
||||
|
||||
# PATH
|
||||
@ -472,7 +498,7 @@ export MPI_ROOT
|
||||
EOF
|
||||
cat <<EOF > $RPM_BUILD_ROOT/%{shell_scripts_path}/%{shell_scripts_basename}.csh
|
||||
# NOTE: This is an automatically-generated file! (generated by the
|
||||
# Open MPI RPM). Any changes made here will be lost if the RPM is
|
||||
# Open MPI/SHMEM RPM). Any changes made here will be lost if the RPM is
|
||||
# uninstalled or upgraded.
|
||||
|
||||
# path
|
||||
@ -718,6 +744,9 @@ test "x$RPM_BUILD_ROOT" != "x" && rm -rf $RPM_BUILD_ROOT
|
||||
#
|
||||
#############################################################################
|
||||
%changelog
|
||||
* Mon Jun 24 2013 Igor Ivanov <Igor.Ivanov@itseez.com>
|
||||
- Add Open SHMEM parallel programming library as part of Open MPI
|
||||
|
||||
* Tue Dec 11 2012 Jeff Squyres <jsquyres@cisco.com>
|
||||
- Re-release 1.6.0-1.6.3 SRPMs (with new SRPM Release numbers) with
|
||||
patch for VampirTrace's configure script to make it install the
|
||||
|
@ -28,7 +28,7 @@ CXX = mpic++
|
||||
CCC = mpic++
|
||||
FC = mpifort
|
||||
JAVAC = mpijavac
|
||||
|
||||
SHMEM = shmemcc
|
||||
# Using -g is not necessary, but it is helpful for example programs,
|
||||
# especially if users want to examine them with debuggers. Note that
|
||||
# gmake requires the CXXFLAGS macro, while other versions of make
|
||||
@ -47,6 +47,7 @@ EXAMPLES = \
|
||||
hello_mpifh \
|
||||
hello_usempi \
|
||||
hello_usempif08 \
|
||||
hello_shmem \
|
||||
Hello.class \
|
||||
ring_c \
|
||||
ring_cxx \
|
||||
@ -75,6 +76,9 @@ all: hello_c ring_c connectivity_c
|
||||
@ if ompi_info --parsable | grep bindings:java:yes >/dev/null; then \
|
||||
$(MAKE) Hello.class Ring.class; \
|
||||
fi
|
||||
@ if ompi_info --parsable | grep mca:shmem >/dev/null; then \
|
||||
$(MAKE) hello_shmem; \
|
||||
fi
|
||||
|
||||
# The usual "clean" target
|
||||
|
||||
@ -102,3 +106,8 @@ Hello.class: Hello.java
|
||||
$(JAVAC) Hello.java
|
||||
Ring.class: Ring.java
|
||||
$(JAVAC) Ring.java
|
||||
|
||||
hello_shmem: hello_shmem.c
|
||||
$(SHMEM) $(CFLAGS) $^ -o $@
|
||||
|
||||
|
||||
|
@ -29,6 +29,7 @@ different MPI interfaces:
|
||||
Fortran use mpi: hello_usempi.f90
|
||||
Fortran use mpi_f08: hello_usempif08.f90
|
||||
Java: Hello.java
|
||||
OSHMEM: hello_shmem.c
|
||||
|
||||
- Send a trivial message around in a ring
|
||||
C: ring_c.c
|
||||
|
128
examples/hello_shmem.c
Обычный файл
128
examples/hello_shmem.c
Обычный файл
@ -0,0 +1,128 @@
|
||||
#include "shmem.h"
|
||||
#include "stdio.h"
|
||||
|
||||
#define N 100
|
||||
static int target[N];
|
||||
|
||||
static int source[N];
|
||||
|
||||
#define STATIC_CHECK 1
|
||||
#define DYNAMIC_CHECK 1
|
||||
#define ATOMIC 1
|
||||
#define PEER 1
|
||||
|
||||
int main()
|
||||
{
|
||||
int *source_d,*target_d;
|
||||
int i;
|
||||
|
||||
start_pes(0);
|
||||
|
||||
source_d = shmalloc(sizeof(*source_d)*N);
|
||||
target_d = shmalloc(sizeof(*target_d)*N);
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
source_d[i] = source[i] = 1;
|
||||
target[i] = target_d[i] = 9;
|
||||
}
|
||||
|
||||
int peer = PEER;
|
||||
if (_my_pe() == 0)
|
||||
{
|
||||
#if STATIC_CHECK
|
||||
int c, f;
|
||||
int a = c, b = f;
|
||||
#if ATOMIC
|
||||
for (i = 0; i < N; i++)
|
||||
target[i] = shmem_int_g(source + i, peer);
|
||||
#else
|
||||
shmem_int_get(target, source, N, PEER);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if DYNAMIC_CHECK
|
||||
#if ATOMIC
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
target_d[i] = shmem_int_g(source_d + i, peer);
|
||||
}
|
||||
#else
|
||||
shmem_int_get(target_d, source_d, N, PEER);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
}
|
||||
if(_my_pe() == 0)
|
||||
{
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
#if DYNAMIC_CHECK
|
||||
if(target_d[i] != 1)
|
||||
{
|
||||
printf("Get dynamic error %d, target + i = %p, target[0] = %d, target[1] = %d\n",i, target_d + i,target_d[0], target_d[1]);
|
||||
fflush(stdout);
|
||||
return 1;
|
||||
#endif
|
||||
#if STATIC_CHECK
|
||||
if (target[i] != 1)
|
||||
{
|
||||
printf("Get static error %d, target + i = %p, target[i] = %d\n",i, target + i,target[i]);
|
||||
fflush(stdout);
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*put check*/
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
source_d[i] = source[i] = 1;
|
||||
target[i] = target_d[i] = -9;
|
||||
}
|
||||
|
||||
shmem_barrier_all();
|
||||
|
||||
if (_my_pe() == 0)
|
||||
{
|
||||
#if STATIC_CHECK
|
||||
shmem_int_put(target, source, N, PEER);
|
||||
#endif
|
||||
#if DYNAMIC_CHECK
|
||||
shmem_int_put(target_d, source_d, N, PEER);
|
||||
#endif
|
||||
}
|
||||
|
||||
shmem_barrier_all();
|
||||
|
||||
if(_my_pe() == PEER)
|
||||
{
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
#if DYNAMIC_CHECK
|
||||
if(target_d[i] != 1)
|
||||
{
|
||||
printf("Put dynamic error\n");
|
||||
fflush(stdout);
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
#if STATIC_CHECK
|
||||
if (target[i] != 1)
|
||||
{
|
||||
printf("Put static error\n");
|
||||
fflush(stdout);
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
printf("All test passed\n");fflush(stdout);
|
||||
shmem_finalize();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
/*
|
||||
* Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -12,7 +13,7 @@
|
||||
#ifndef BTL_OPENIB_CONNECT_BASE_H
|
||||
#define BTL_OPENIB_CONNECT_BASE_H
|
||||
|
||||
#include "connect/connect.h"
|
||||
#include "ompi/mca/btl/openib/connect/connect.h"
|
||||
|
||||
#ifdef OMPI_HAVE_RDMAOE
|
||||
#define BTL_OPENIB_CONNECT_BASE_CHECK_IF_NOT_IB(btl) \
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2006-2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2006-2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2008-2011 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2008-2013 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2009-2011 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved
|
||||
*
|
||||
@ -375,6 +375,40 @@ static int qp_connect_all(mca_btl_openib_endpoint_t *endpoint)
|
||||
}
|
||||
|
||||
|
||||
static void permute_array(int *permuted_qps, int nqps)
|
||||
{
|
||||
int i;
|
||||
int idx;
|
||||
int tmp;
|
||||
int control[nqps];
|
||||
|
||||
for (i = 0; i < nqps; i++) {
|
||||
permuted_qps[i] = i;
|
||||
control[i] = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < nqps - 1; i++) {
|
||||
idx = i + random() % (nqps - i);
|
||||
tmp = permuted_qps[i];
|
||||
permuted_qps[i] = permuted_qps[idx];
|
||||
permuted_qps[idx] = tmp;
|
||||
}
|
||||
|
||||
/* verify that permutation is ok: */
|
||||
for (i = 0; i < nqps; i++) {
|
||||
control[permuted_qps[i]] ++;
|
||||
}
|
||||
for (i = 0; i < nqps; i++) {
|
||||
if (control[i] != 1) {
|
||||
BTL_VERBOSE(("bad permutation detected: "));
|
||||
for (i = 0; i < nqps; i++) BTL_VERBOSE(("%d ", permuted_qps[i]));
|
||||
BTL_VERBOSE(("\n"));
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Create the local side of all the qp's. The remote sides will be
|
||||
* connected later.
|
||||
@ -384,6 +418,12 @@ static int qp_create_all(mca_btl_base_endpoint_t* endpoint)
|
||||
int qp, rc, pp_qp_num = 0;
|
||||
int32_t rd_rsv_total = 0;
|
||||
|
||||
int rand_qpns[mca_btl_openib_component.num_qps];
|
||||
int i;
|
||||
|
||||
permute_array(rand_qpns, mca_btl_openib_component.num_qps);
|
||||
|
||||
|
||||
for (qp = 0; qp < mca_btl_openib_component.num_qps; ++qp)
|
||||
if(BTL_OPENIB_QP_TYPE_PP(qp)) {
|
||||
rd_rsv_total +=
|
||||
@ -396,11 +436,12 @@ static int qp_create_all(mca_btl_base_endpoint_t* endpoint)
|
||||
if(0 == pp_qp_num && true == endpoint->use_eager_rdma)
|
||||
pp_qp_num = 1;
|
||||
|
||||
for (qp = 0; qp < mca_btl_openib_component.num_qps; ++qp) {
|
||||
for (i = 0; i < mca_btl_openib_component.num_qps; ++i) {
|
||||
struct ibv_srq *srq = NULL;
|
||||
uint32_t max_recv_wr, max_send_wr;
|
||||
int32_t rd_rsv, rd_num_credits;
|
||||
|
||||
qp = rand_qpns[i];
|
||||
/* QP used for SW flow control need some additional recourses */
|
||||
if(qp == mca_btl_openib_component.credits_qp) {
|
||||
rd_rsv = rd_rsv_total;
|
||||
@ -467,7 +508,7 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
|
||||
memset(&attr, 0, sizeof(attr));
|
||||
|
||||
init_attr.qp_type = IBV_QPT_RC;
|
||||
init_attr.send_cq = openib_btl->device->ib_cq[BTL_OPENIB_LP_CQ];
|
||||
init_attr.send_cq = openib_btl->device->ib_cq[BTL_OPENIB_RDMA_QP(qp) ? BTL_OPENIB_HP_CQ: BTL_OPENIB_LP_CQ];
|
||||
init_attr.recv_cq = openib_btl->device->ib_cq[qp_cq_prio(qp)];
|
||||
init_attr.srq = srq;
|
||||
init_attr.cap.max_inline_data = req_inline =
|
||||
|
@ -1,5 +1,7 @@
|
||||
/*
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved.
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -210,6 +212,9 @@ BEGIN_C_DECLS
|
||||
|
||||
#define OMPI_RML_PCONNECT_TAG OMPI_RML_TAG_BASE+13
|
||||
|
||||
/* open shmem oob communication */
|
||||
#define OMPI_RML_TAG_SHMEM OMPI_RML_TAG_BASE+14
|
||||
|
||||
#define OMPI_RML_TAG_DYNAMIC OMPI_RML_TAG_BASE+200
|
||||
|
||||
/*
|
||||
|
@ -11,6 +11,8 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -220,6 +222,12 @@
|
||||
# define __opal_attribute_weak_alias__(a)
|
||||
#endif
|
||||
|
||||
#if OPAL_HAVE_ATTRIBUTE_DESTRUCTOR
|
||||
# define __opal_attribute_destructor__ __attribute__((__destructor__))
|
||||
#else
|
||||
# define __opal_attribute_destructor__
|
||||
#endif
|
||||
|
||||
# if OPAL_C_HAVE_VISIBILITY
|
||||
# define OPAL_DECLSPEC __opal_attribute_visibility__("default")
|
||||
# define OPAL_MODULE_DECLSPEC __opal_attribute_visibility__("default")
|
||||
|
123
oshmem/Makefile.am
Обычный файл
123
oshmem/Makefile.am
Обычный файл
@ -0,0 +1,123 @@
|
||||
#
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
AM_CFLAGS = $(OSHMEM_CFLAGS)
|
||||
|
||||
# Do we have profiling?
|
||||
if OSHMEM_PROFILING
|
||||
c_pshmem_lib = shmem/c/profile/libshmem_c_pshmem.la
|
||||
else
|
||||
c_pshmem_lib =
|
||||
endif
|
||||
|
||||
f77_shmem_lib = shmem/f77/libshmem_f77.la
|
||||
|
||||
# Note that the ordering of "." in SUBDIRS is important: the C++, F77,
|
||||
# and F90 bindings are all in standalone .la files that depend on
|
||||
# libshmem.la. So we must fully build libshmem.la first.
|
||||
|
||||
# NOTE: A handful of files in mpi/f77/base must be included in
|
||||
# libshmem.la. But we wanted to keep all the Fortran sources together
|
||||
# in the same tree, so we moved those sources to a separate
|
||||
# subdirectory with its own Makefile.include that is included in this
|
||||
# Makefile.am (NOTE: it did *not* work to put all the files -- base
|
||||
# q
|
||||
# and non-base -- into mpi/f77 and have both a regular Makefile.am for
|
||||
# building the f77 bindings library and a separate Makefile.include
|
||||
# that was included in this top-level Makefile.am; problems occurred
|
||||
# with "make distclean" and files in the ompi/mpi/f77/.deps directory
|
||||
# -- it's not clear whether this is an AM bug or whether this behavior
|
||||
# is simply not supported). This ompi/mpi/f77/base/Makefile.include
|
||||
# file makes a convenience LT library that is then sucked into
|
||||
# libshmem.la (the ompi/mpi/f77/base sources must be compiled with
|
||||
# special CPPFLAGS; we can't just add the raw sources to
|
||||
# libshmem_la_SOURCES, unfortunately).
|
||||
|
||||
# The end of the result is that libshmem.la -- including a few sources
|
||||
# from mpi/f77/base -- is fully built before the C++, F77, and F90
|
||||
# bindings are built. Therefore, the C++, F77 and F90 bindings
|
||||
# libraries can all link against libshmem.la.
|
||||
|
||||
SUBDIRS = \
|
||||
include \
|
||||
shmem/c \
|
||||
shmem/f77 \
|
||||
$(EXT_oshmem_FRAMEWORKS_SUBDIRS) \
|
||||
$(EXT_oshmem_FRAMEWORK_COMPONENT_STATIC_SUBDIRS) \
|
||||
$(MCA_oshmem_FRAMEWORKS_SUBDIRS) \
|
||||
$(MCA_oshmem_FRAMEWORK_COMPONENT_STATIC_SUBDIRS) \
|
||||
. \
|
||||
$(MCA_oshmem_FRAMEWORK_COMPONENT_DSO_SUBDIRS)
|
||||
|
||||
DIST_SUBDIRS = \
|
||||
include \
|
||||
shmem/c \
|
||||
shmem/f77 \
|
||||
$(EXT_oshmem_FRAMEWORKS_SUBDIRS) \
|
||||
$(EXT_oshmem_FRAMEWORK_COMPONENT_ALL_SUBDIRS) \
|
||||
$(MCA_oshmem_FRAMEWORKS_SUBDIRS) \
|
||||
$(MCA_oshmem_FRAMEWORK_COMPONENT_ALL_SUBDIRS)
|
||||
|
||||
#Build The main SHMEM library
|
||||
lib_LTLIBRARIES = libshmem.la
|
||||
libshmem_la_SOURCES =
|
||||
libshmem_la_LIBADD = \
|
||||
shmem/c/libshmem_c.la \
|
||||
$(c_pshmem_lib) \
|
||||
$(f77_shmem_lib) \
|
||||
$(MCA_oshmem_FRAMEWORK_LIBS) \
|
||||
$(top_ompi_builddir)/ompi/libmpi.la
|
||||
libshmem_la_DEPENDENCIES = $(libshmem_la_LIBADD)
|
||||
libshmem_la_LDFLAGS = \
|
||||
-version-info $(libshmem_so_version) \
|
||||
$(OSHMEM_LIBSHMEM_EXTRA_LDFLAGS)
|
||||
|
||||
# included subdirectory Makefile.am's and appended-to variables
|
||||
headers =
|
||||
noinst_LTLIBRARIES =
|
||||
include_HEADERS =
|
||||
nobase_oshmem_HEADERS =
|
||||
dist_pkgdata_DATA =
|
||||
libshmem_la_SOURCES += $(headers)
|
||||
nodist_man_MANS =
|
||||
|
||||
# Conditionally install the header files
|
||||
|
||||
if WANT_INSTALL_HEADERS
|
||||
oshmemdir = $(includedir)/oshmem/oshmem
|
||||
nobase_oshmem_HEADERS += $(headers)
|
||||
else
|
||||
oshmemdir = $(includedir)
|
||||
endif
|
||||
|
||||
include op/Makefile.am
|
||||
include proc/Makefile.am
|
||||
include request/Makefile.am
|
||||
include runtime/Makefile.am
|
||||
include shmem/Makefile.am
|
||||
include tools/Makefile.am
|
||||
|
||||
# Ensure that the man page directory exists before we try to make man
|
||||
# page files (because oshmem/shmem/man/man3 has no config.status-generated
|
||||
# Makefile)
|
||||
dir_stamp = $(top_builddir)/$(subdir)/shmem/man/man3/.dir-stamp
|
||||
|
||||
# Also ensure that the man pages are rebuilt if the opal_config.h file
|
||||
# changes (e.g., configure was run again, meaning that the release
|
||||
# date or version may have changed)
|
||||
$(nodist_man_MANS): $(dir_stamp) $(top_builddir)/opal/include/opal_config.h
|
||||
|
||||
$(dir_stamp):
|
||||
$(mkdir_p) `dirname $@`
|
||||
touch "$@"
|
||||
|
||||
# Remove the generated man pages
|
||||
distclean-local:
|
||||
rm -f $(nodist_man_MANS) $(dir_stamp)
|
51
oshmem/include/Makefile.am
Обычный файл
51
oshmem/include/Makefile.am
Обычный файл
@ -0,0 +1,51 @@
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# mpif-common.h is not generated, but mpif.h and mpif-config.h are.
|
||||
# See big comments in these files for an explanation.
|
||||
|
||||
# note - headers and nodist_headers will go in ${includedir}/openmpi,
|
||||
# not ${includedir}/
|
||||
headers =
|
||||
nodist_headers = \
|
||||
oshmem_config.h
|
||||
|
||||
# Install these in $(includedir)
|
||||
include_HEADERS =
|
||||
|
||||
# Install these in $(includedir)
|
||||
mppincludedir = $(includedir)/mpp
|
||||
mppinclude_HEADERS = mpp/shmem.h \
|
||||
mpp/shmem.fh
|
||||
|
||||
# Always install these in $(pkgincludedir)
|
||||
pkginclude_HEADERS =
|
||||
|
||||
include_HEADERS += shmem.fh
|
||||
|
||||
# These files are always installed in $(includedir), but shouldn't be
|
||||
# shipped since they are generated by configure from their .in
|
||||
# counterparts (which AM automatically ships).
|
||||
nodist_include_HEADERS = \
|
||||
shmem.h \
|
||||
shmem_portable_platform.h
|
||||
|
||||
if WANT_INSTALL_HEADERS
|
||||
oshmemdir = $(includedir)/oshmem
|
||||
nobase_dist_oshmem_HEADERS = $(headers)
|
||||
nobase_nodist_oshmem_HEADERS = $(nodist_headers)
|
||||
else
|
||||
oshmemdir = $(includedir)
|
||||
nobase_dist_noinst_HEADERS = $(headers)
|
||||
nobase_nodist_noinst_HEADERS = $(nodist_headers)
|
||||
endif
|
||||
|
||||
distclean-local:
|
||||
|
||||
include oshmem/Makefile.am
|
457
oshmem/include/mpif-common.h
Обычный файл
457
oshmem/include/mpif-common.h
Обычный файл
@ -0,0 +1,457 @@
|
||||
!
|
||||
! Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
! All rights reserved.
|
||||
! $COPYRIGHT$
|
||||
!
|
||||
! Additional copyrights may follow
|
||||
!
|
||||
! $HEADER$
|
||||
!
|
||||
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
! WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
|
||||
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
!
|
||||
! Do ***not*** copy this file to the directory where your Fortran
|
||||
! fortran application is compiled unless it is absolutely necessary! Most
|
||||
! modern Fortran compilers now support the -I command line flag, which
|
||||
! tells the compiler where to find .h files (specifically, this one). For
|
||||
! example:
|
||||
!
|
||||
! shell$ mpif77 foo.f -o foo -I$OMPI_HOME/include
|
||||
!
|
||||
! will probably do the trick (assuming that you have set OMPI_HOME
|
||||
! properly).
|
||||
!
|
||||
! That being said, OMPI's "mpif77" wrapper compiler should
|
||||
! automatically include the -I option for you. The following command
|
||||
! should be equivalent to the command listed above:
|
||||
!
|
||||
! shell$ mpif77 foo.f -o foo
|
||||
!
|
||||
! You should not copy this file to your local directory because it is
|
||||
! possible that this file will be changed between versions of Open MPI.
|
||||
! Indeed, this mpif.h is incompatible with the mpif.f of other
|
||||
! implementations of MPI. Using this mpif.h with other implementations
|
||||
! of MPI, or with other versions of Open MPI will result in undefined
|
||||
! behavior (to include incorrect results, segmentation faults,
|
||||
! unexplainable "hanging" in your application, etc.). Always use the
|
||||
! -I command line option instead (or let mpif77 do it for you).
|
||||
!
|
||||
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
! WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
|
||||
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
!
|
||||
! This file contains the bulk of the Open MPI Fortran interface. It
|
||||
! is included as a back-end file to both mpif.h (i.e., the
|
||||
! standardized MPI Fortran header file) and mpi.f90 (the MPI-2
|
||||
! Fortran module source file, found in ompi/mpi/f90).
|
||||
!
|
||||
! This file is marginally different than mpif.h. mpif.h includes
|
||||
! some "external" statements that are not suitable for use with the
|
||||
! MPI-2 F90 module, and therefore cannot be included in the mpi.f90
|
||||
! source file. Hence, this file is essentially everything that
|
||||
! needs to be in the standardized mpif.h *except* the "external"
|
||||
! statements, and is therefore suitable to be included in mpi.f90.
|
||||
!
|
||||
|
||||
! First, however, include some output from configure.
|
||||
!
|
||||
include 'mpif-config.h'
|
||||
|
||||
!
|
||||
! MPI version
|
||||
!
|
||||
integer MPI_VERSION, MPI_SUBVERSION
|
||||
|
||||
parameter (MPI_VERSION=2)
|
||||
parameter (MPI_SUBVERSION=1)
|
||||
!
|
||||
! Miscellaneous constants
|
||||
!
|
||||
integer MPI_ANY_SOURCE, MPI_ANY_TAG
|
||||
integer MPI_PROC_NULL
|
||||
integer MPI_ROOT
|
||||
integer MPI_UNDEFINED
|
||||
integer MPI_CART, MPI_GRAPH, MPI_KEYVAL_INVALID
|
||||
integer MPI_SOURCE, MPI_TAG, MPI_ERROR
|
||||
integer MPI_TAG_UB, MPI_HOST, MPI_IO, MPI_WTIME_IS_GLOBAL
|
||||
integer MPI_APPNUM, MPI_LASTUSEDCODE, MPI_UNIVERSE_SIZE
|
||||
integer IMPI_CLIENT_SIZE, IMPI_CLIENT_COLOR
|
||||
integer IMPI_HOST_SIZE, IMPI_HOST_COLOR
|
||||
integer MPI_BSEND_OVERHEAD
|
||||
integer MPI_ORDER_C, MPI_ORDER_FORTRAN
|
||||
integer MPI_DISTRIBUTE_BLOCK, MPI_DISTRIBUTE_CYCLIC
|
||||
integer MPI_DISTRIBUTE_NONE, MPI_DISTRIBUTE_DFLT_DARG
|
||||
integer MPI_TYPECLASS_INTEGER, MPI_TYPECLASS_REAL
|
||||
integer MPI_TYPECLASS_COMPLEX
|
||||
integer MPI_MODE_NOCHECK, MPI_MODE_NOPRECEDE, MPI_MODE_NOPUT
|
||||
integer MPI_MODE_NOSTORE, MPI_MODE_NOSUCCEED
|
||||
integer MPI_LOCK_EXCLUSIVE, MPI_LOCK_SHARED
|
||||
integer MPI_WIN_BASE, MPI_WIN_SIZE, MPI_WIN_DISP_UNIT
|
||||
|
||||
parameter (MPI_ANY_SOURCE=-1)
|
||||
parameter (MPI_ANY_TAG=-1)
|
||||
parameter (MPI_PROC_NULL=-2)
|
||||
parameter (MPI_ROOT=-4)
|
||||
parameter (MPI_UNDEFINED=-32766)
|
||||
parameter (MPI_CART=1)
|
||||
parameter (MPI_GRAPH=2)
|
||||
parameter (MPI_KEYVAL_INVALID=-1)
|
||||
parameter (MPI_SOURCE=1)
|
||||
parameter (MPI_TAG=2)
|
||||
parameter (MPI_ERROR=3)
|
||||
parameter (MPI_TAG_UB=0)
|
||||
parameter (MPI_HOST=1)
|
||||
parameter (MPI_IO=2)
|
||||
parameter (MPI_WTIME_IS_GLOBAL=3)
|
||||
parameter (MPI_APPNUM=4)
|
||||
parameter (MPI_LASTUSEDCODE=5)
|
||||
parameter (MPI_UNIVERSE_SIZE=6)
|
||||
parameter (MPI_WIN_BASE=7)
|
||||
parameter (MPI_WIN_SIZE=8)
|
||||
parameter (MPI_WIN_DISP_UNIT=9)
|
||||
parameter (IMPI_CLIENT_SIZE=10)
|
||||
parameter (IMPI_CLIENT_COLOR=11)
|
||||
parameter (IMPI_HOST_SIZE=12)
|
||||
parameter (IMPI_HOST_COLOR=13)
|
||||
|
||||
parameter (MPI_BSEND_OVERHEAD=128)
|
||||
parameter (MPI_ORDER_C=0)
|
||||
parameter (MPI_ORDER_FORTRAN=1)
|
||||
parameter (MPI_DISTRIBUTE_BLOCK=0)
|
||||
parameter (MPI_DISTRIBUTE_CYCLIC=1)
|
||||
parameter (MPI_DISTRIBUTE_NONE=2)
|
||||
parameter (MPI_DISTRIBUTE_DFLT_DARG=-1)
|
||||
parameter (MPI_TYPECLASS_INTEGER=1)
|
||||
parameter (MPI_TYPECLASS_REAL=2)
|
||||
parameter (MPI_TYPECLASS_COMPLEX=3)
|
||||
parameter (MPI_MODE_NOCHECK=1)
|
||||
parameter (MPI_MODE_NOPRECEDE=2)
|
||||
parameter (MPI_MODE_NOPUT=4)
|
||||
parameter (MPI_MODE_NOSTORE=8)
|
||||
parameter (MPI_MODE_NOSUCCEED=16)
|
||||
parameter (MPI_LOCK_EXCLUSIVE=1)
|
||||
parameter (MPI_LOCK_SHARED=2)
|
||||
|
||||
!
|
||||
! MPI sentinel values
|
||||
!
|
||||
! Several of these types were chosen with care to match specific
|
||||
! overloaded functions in the F90 bindings. They should also match
|
||||
! the types of their corresponding C variables. Do not arbitrarily
|
||||
! change their types without also updating the F90 bindings and
|
||||
! their corresponding types in ompi/mpi/f77/constants.h and
|
||||
! ompi/mpi/runtime/ompi_init.c!
|
||||
!
|
||||
! MPI_BOTTOM is only used where choice buffers can be used (meaning
|
||||
! that we already have overloaded F90 bindings for all available
|
||||
! types), so any type is fine.
|
||||
integer MPI_BOTTOM
|
||||
! MPI_IN_PLACE has the same rationale as MPI_BOTTOM.
|
||||
integer MPI_IN_PLACE
|
||||
! Making MPI_ARGV_NULL be the same type as the parameter that is
|
||||
! exepected in the F90 binding for MPI_COMM_SPAWN means that we
|
||||
! don't need another binding for MPI_COMM_SPAWN.
|
||||
character MPI_ARGV_NULL(1)
|
||||
! The array_of_argv parameter in the F90 bindings for
|
||||
! MPI_COMM_SPAWN_MULTIPLE takes a variable number of dimensions
|
||||
! (specified by the "count" parameter), so it's not possible to have
|
||||
! a single variable match all possible values. Hence, make it an
|
||||
! entirely different type (one that would never likely be used by a
|
||||
! correct program, e.g., double) and have a separate F90 binding for
|
||||
! matching just this type.
|
||||
double precision MPI_ARGVS_NULL
|
||||
! MPI_ERRCODES_IGNORE has similar rationale to MPI_ARGV_NULL. The
|
||||
! F77 functions are all smart enough to check that the errcodes
|
||||
! parameter is not ERRCODES_IGNORE before assigning values into it
|
||||
! (hence, the fact that this is an array of only 1 element does not
|
||||
! matter -- we'll never overrun it because we never assign values
|
||||
! into it).
|
||||
integer MPI_ERRCODES_IGNORE(1)
|
||||
! MPI_STATUS_IGNORE has similar rationale to MPI_ERRCODES_IGNORE.
|
||||
integer MPI_STATUS_IGNORE(MPI_STATUS_SIZE)
|
||||
! MPI_STATUSES_IGNORE has similar rationale to MPI_ARGVS_NULL.
|
||||
double precision MPI_STATUSES_IGNORE
|
||||
|
||||
common/mpi_fortran_bottom/MPI_BOTTOM
|
||||
common/mpi_fortran_in_place/MPI_IN_PLACE
|
||||
common/mpi_fortran_argv_null/MPI_ARGV_NULL
|
||||
common/mpi_fortran_argvs_null/MPI_ARGVS_NULL
|
||||
common/mpi_fortran_errcodes_ignore/MPI_ERRCODES_IGNORE
|
||||
common/mpi_fortran_status_ignore/MPI_STATUS_IGNORE
|
||||
common/mpi_fortran_statuses_ignore/MPI_STATUSES_IGNORE
|
||||
!
|
||||
! NULL "handles" (indices)
|
||||
!
|
||||
integer MPI_GROUP_NULL, MPI_COMM_NULL, MPI_DATATYPE_NULL
|
||||
integer MPI_REQUEST_NULL, MPI_OP_NULL, MPI_ERRHANDLER_NULL
|
||||
integer MPI_INFO_NULL, MPI_WIN_NULL
|
||||
|
||||
parameter (MPI_GROUP_NULL=0)
|
||||
parameter (MPI_COMM_NULL=2)
|
||||
parameter (MPI_DATATYPE_NULL=0)
|
||||
parameter (MPI_REQUEST_NULL=0)
|
||||
parameter (MPI_OP_NULL=0)
|
||||
parameter (MPI_ERRHANDLER_NULL=0)
|
||||
parameter (MPI_INFO_NULL=0)
|
||||
parameter (MPI_WIN_NULL=0)
|
||||
!
|
||||
! MPI_Init_thread constants
|
||||
!
|
||||
integer MPI_THREAD_SINGLE, MPI_THREAD_FUNNELED
|
||||
integer MPI_THREAD_SERIALIZED, MPI_THREAD_MULTIPLE
|
||||
|
||||
parameter (MPI_THREAD_SINGLE=0)
|
||||
parameter (MPI_THREAD_FUNNELED=1)
|
||||
parameter (MPI_THREAD_SERIALIZED=2)
|
||||
parameter (MPI_THREAD_MULTIPLE=3)
|
||||
!
|
||||
! error classes
|
||||
!
|
||||
integer SHMEM_SUCCESS
|
||||
integer SHMEM_ERR_BUFFER
|
||||
integer SHMEM_ERR_COUNT
|
||||
integer SHMEM_ERR_TYPE
|
||||
integer SHMEM_ERR_TAG
|
||||
integer SHMEM_ERR_COMM
|
||||
integer SHMEM_ERR_RANK
|
||||
integer SHMEM_ERR_REQUEST
|
||||
integer SHMEM_ERR_ROOT
|
||||
integer SHMEM_ERR_GROUP
|
||||
integer SHMEM_ERR_OP
|
||||
integer SHMEM_ERR_TOPOLOGY
|
||||
integer SHMEM_ERR_DIMS
|
||||
integer SHMEM_ERR_ARG
|
||||
integer SHMEM_ERR_UNKNOWN
|
||||
integer SHMEM_ERR_TRUNCATE
|
||||
integer SHMEM_ERR_OTHER
|
||||
integer SHMEM_ERR_INTERN
|
||||
integer SHMEM_ERR_IN_STATUS
|
||||
integer SHMEM_ERR_PENDING
|
||||
integer SHMEM_ERR_ACCESS
|
||||
integer SHMEM_ERR_AMODE
|
||||
integer SHMEM_ERR_ASSERT
|
||||
integer SHMEM_ERR_BAD_FILE
|
||||
integer SHMEM_ERR_BASE
|
||||
integer SHMEM_ERR_CONVERSION
|
||||
integer SHMEM_ERR_DISP
|
||||
integer SHMEM_ERR_DUP_DATAREP
|
||||
integer SHMEM_ERR_FILE_EXISTS
|
||||
integer SHMEM_ERR_FILE_IN_USE
|
||||
integer SHMEM_ERR_FILE
|
||||
integer SHMEM_ERR_INFO_KEY
|
||||
integer SHMEM_ERR_INFO_NOKEY
|
||||
integer SHMEM_ERR_INFO_VALUE
|
||||
integer SHMEM_ERR_INFO
|
||||
integer SHMEM_ERR_IO
|
||||
integer SHMEM_ERR_KEYVAL
|
||||
integer SHMEM_ERR_LOCKTYPE
|
||||
integer SHMEM_ERR_NAME
|
||||
integer SHMEM_ERR_NO_MEM
|
||||
integer SHMEM_ERR_NOT_SAME
|
||||
integer SHMEM_ERR_NO_SPACE
|
||||
integer SHMEM_ERR_NO_SUCH_FILE
|
||||
integer SHMEM_ERR_PORT
|
||||
integer SHMEM_ERR_QUOTA
|
||||
integer SHMEM_ERR_READ_ONLY
|
||||
integer SHMEM_ERR_RMA_CONFLICT
|
||||
integer SHMEM_ERR_RMA_SYNC
|
||||
integer SHMEM_ERR_SERVICE
|
||||
integer SHMEM_ERR_SIZE
|
||||
integer SHMEM_ERR_SPAWN
|
||||
integer SHMEM_ERR_UNSUPPORTED_DATAREP
|
||||
integer SHMEM_ERR_UNSUPPORTED_OPERATION
|
||||
integer SHMEM_ERR_WIN
|
||||
|
||||
integer SHMEM_ERR_SYSRESOURCE
|
||||
integer SHMEM_ERR_LASTCODE
|
||||
|
||||
parameter( SHMEM_SUCCESS = 0)
|
||||
parameter( SHMEM_ERR_BUFFER = 1)
|
||||
parameter( SHMEM_ERR_COUNT = 2)
|
||||
parameter( SHMEM_ERR_TYPE = 3)
|
||||
parameter( SHMEM_ERR_TAG = 4)
|
||||
parameter( SHMEM_ERR_COMM = 5)
|
||||
parameter( SHMEM_ERR_RANK = 6)
|
||||
parameter( SHMEM_ERR_REQUEST = 7)
|
||||
parameter( SHMEM_ERR_ROOT = 8)
|
||||
parameter( SHMEM_ERR_GROUP = 9)
|
||||
parameter( SHMEM_ERR_OP = 10)
|
||||
parameter( SHMEM_ERR_TOPOLOGY = 11)
|
||||
parameter( SHMEM_ERR_DIMS = 12)
|
||||
parameter( SHMEM_ERR_ARG = 13)
|
||||
parameter( SHMEM_ERR_UNKNOWN = 14)
|
||||
parameter( SHMEM_ERR_TRUNCATE = 15)
|
||||
parameter( SHMEM_ERR_OTHER = 16)
|
||||
parameter( SHMEM_ERR_INTERN = 17)
|
||||
parameter( SHMEM_ERR_IN_STATUS = 18)
|
||||
parameter( SHMEM_ERR_PENDING = 19)
|
||||
parameter( SHMEM_ERR_ACCESS = 20)
|
||||
parameter( SHMEM_ERR_AMODE = 21)
|
||||
parameter( SHMEM_ERR_ASSERT = 22)
|
||||
parameter( SHMEM_ERR_BAD_FILE = 23)
|
||||
parameter( SHMEM_ERR_BASE = 24)
|
||||
parameter( SHMEM_ERR_CONVERSION = 25)
|
||||
parameter( SHMEM_ERR_DISP = 26)
|
||||
parameter( SHMEM_ERR_DUP_DATAREP = 27)
|
||||
parameter( SHMEM_ERR_FILE_EXISTS = 28)
|
||||
parameter( SHMEM_ERR_FILE_IN_USE = 29)
|
||||
parameter( SHMEM_ERR_FILE = 30)
|
||||
parameter( SHMEM_ERR_INFO_KEY = 31)
|
||||
parameter( SHMEM_ERR_INFO_NOKEY = 32)
|
||||
parameter( SHMEM_ERR_INFO_VALUE = 33)
|
||||
parameter( SHMEM_ERR_INFO = 34)
|
||||
parameter( SHMEM_ERR_IO = 35)
|
||||
parameter( SHMEM_ERR_KEYVAL = 36)
|
||||
parameter( SHMEM_ERR_LOCKTYPE = 37)
|
||||
parameter( SHMEM_ERR_NAME = 38)
|
||||
parameter( SHMEM_ERR_NO_MEM = 39)
|
||||
parameter( SHMEM_ERR_NOT_SAME = 40)
|
||||
parameter( SHMEM_ERR_NO_SPACE = 41)
|
||||
parameter( SHMEM_ERR_NO_SUCH_FILE = 42)
|
||||
parameter( SHMEM_ERR_PORT = 43)
|
||||
parameter( SHMEM_ERR_QUOTA = 44)
|
||||
parameter( SHMEM_ERR_READ_ONLY = 45)
|
||||
parameter( SHMEM_ERR_RMA_CONFLICT = 46)
|
||||
parameter( SHMEM_ERR_RMA_SYNC = 47)
|
||||
parameter( SHMEM_ERR_SERVICE = 48)
|
||||
parameter( SHMEM_ERR_SIZE = 49)
|
||||
parameter( SHMEM_ERR_SPAWN = 50)
|
||||
parameter( SHMEM_ERR_UNSUPPORTED_DATAREP = 51)
|
||||
parameter( SHMEM_ERR_UNSUPPORTED_OPERATION= 52)
|
||||
parameter( SHMEM_ERR_WIN = 53)
|
||||
|
||||
parameter( SHMEM_ERR_SYSRESOURCE = -2)
|
||||
parameter( SHMEM_ERR_LASTCODE = 54)
|
||||
|
||||
!
|
||||
! comparison results
|
||||
!
|
||||
integer MPI_IDENT, MPI_CONGRUENT, MPI_SIMILAR, MPI_UNEQUAL
|
||||
|
||||
parameter (MPI_IDENT=0)
|
||||
parameter (MPI_CONGRUENT=1)
|
||||
parameter (MPI_SIMILAR=2)
|
||||
parameter (MPI_UNEQUAL=3)
|
||||
!
|
||||
! datatype combiners
|
||||
!
|
||||
integer MPI_COMBINER_NAMED
|
||||
integer MPI_COMBINER_DUP
|
||||
integer MPI_COMBINER_CONTIGUOUS
|
||||
integer MPI_COMBINER_VECTOR
|
||||
integer MPI_COMBINER_HVECTOR_INTEGER
|
||||
integer MPI_COMBINER_HVECTOR
|
||||
integer MPI_COMBINER_INDEXED
|
||||
integer MPI_COMBINER_HINDEXED_INTEGER
|
||||
integer MPI_COMBINER_HINDEXED
|
||||
integer MPI_COMBINER_INDEXED_BLOCK
|
||||
integer MPI_COMBINER_STRUCT_INTEGER
|
||||
integer MPI_COMBINER_STRUCT
|
||||
integer MPI_COMBINER_SUBARRAY
|
||||
integer MPI_COMBINER_DARRAY
|
||||
integer MPI_COMBINER_F90_REAL
|
||||
integer MPI_COMBINER_F90_COMPLEX
|
||||
integer MPI_COMBINER_F90_INTEGER
|
||||
integer MPI_COMBINER_RESIZED
|
||||
|
||||
parameter (MPI_COMBINER_NAMED=0)
|
||||
parameter (MPI_COMBINER_DUP=1)
|
||||
parameter (MPI_COMBINER_CONTIGUOUS=2)
|
||||
parameter (MPI_COMBINER_VECTOR=3)
|
||||
parameter (MPI_COMBINER_HVECTOR_INTEGER=4)
|
||||
parameter (MPI_COMBINER_HVECTOR=5)
|
||||
parameter (MPI_COMBINER_INDEXED=6)
|
||||
parameter (MPI_COMBINER_HINDEXED_INTEGER=7)
|
||||
parameter (MPI_COMBINER_HINDEXED=8)
|
||||
parameter (MPI_COMBINER_INDEXED_BLOCK=9)
|
||||
parameter (MPI_COMBINER_STRUCT_INTEGER=10)
|
||||
parameter (MPI_COMBINER_STRUCT=11)
|
||||
parameter (MPI_COMBINER_SUBARRAY=12)
|
||||
parameter (MPI_COMBINER_DARRAY=13)
|
||||
parameter (MPI_COMBINER_F90_REAL=14)
|
||||
parameter (MPI_COMBINER_F90_COMPLEX=15)
|
||||
parameter (MPI_COMBINER_F90_INTEGER=16)
|
||||
parameter (MPI_COMBINER_RESIZED=17)
|
||||
!
|
||||
! lookup table indices
|
||||
!
|
||||
integer MPI_COMM_WORLD, MPI_COMM_SELF
|
||||
integer MPI_GROUP_EMPTY
|
||||
integer MPI_ERRORS_ARE_FATAL, MPI_ERRORS_RETURN
|
||||
|
||||
parameter (MPI_COMM_WORLD=0)
|
||||
parameter (MPI_COMM_SELF=1)
|
||||
parameter (MPI_GROUP_EMPTY=1)
|
||||
parameter (MPI_ERRORS_ARE_FATAL=1)
|
||||
parameter (MPI_ERRORS_RETURN=2)
|
||||
|
||||
integer MPI_BYTE, MPI_PACKED, MPI_UB, MPI_LB
|
||||
integer MPI_CHARACTER, MPI_LOGICAL
|
||||
integer MPI_INTEGER, MPI_INTEGER1, MPI_INTEGER2, MPI_INTEGER4
|
||||
integer MPI_INTEGER8, MPI_INTEGER16
|
||||
integer MPI_REAL, MPI_REAL2, MPI_REAL4, MPI_REAL8, MPI_REAL16
|
||||
integer MPI_DOUBLE_PRECISION
|
||||
integer MPI_COMPLEX, MPI_COMPLEX8, MPI_COMPLEX16, MPI_COMPLEX32
|
||||
integer MPI_DOUBLE_COMPLEX
|
||||
integer MPI_2REAL, MPI_2DOUBLE_PRECISION, MPI_2INTEGER
|
||||
integer MPI_2COMPLEX, MPI_2DOUBLE_COMPLEX
|
||||
! Note that MPI_LOGICALx are not defined by the MPI spec, but there are
|
||||
! other MPI implementations that have them, so it's good for us to have
|
||||
! as well.
|
||||
integer MPI_LOGICAL1, MPI_LOGICAL2, MPI_LOGICAL4, MPI_LOGICAL8
|
||||
|
||||
!
|
||||
! Do NOT change the order of these parameters
|
||||
!
|
||||
parameter (MPI_BYTE=1)
|
||||
parameter (MPI_PACKED=2)
|
||||
parameter (MPI_UB=3)
|
||||
parameter (MPI_LB=4)
|
||||
parameter (MPI_CHARACTER=5)
|
||||
parameter (MPI_LOGICAL=6)
|
||||
parameter (MPI_INTEGER=7)
|
||||
parameter (MPI_INTEGER1=8)
|
||||
parameter (MPI_INTEGER2=9)
|
||||
parameter (MPI_INTEGER4=10)
|
||||
parameter (MPI_INTEGER8=11)
|
||||
parameter (MPI_INTEGER16=12)
|
||||
parameter (MPI_REAL=13)
|
||||
parameter (MPI_REAL4=14)
|
||||
parameter (MPI_REAL8=15)
|
||||
parameter (MPI_REAL16=16)
|
||||
parameter (MPI_DOUBLE_PRECISION=17)
|
||||
parameter (MPI_COMPLEX=18)
|
||||
parameter (MPI_COMPLEX8=19)
|
||||
parameter (MPI_COMPLEX16=20)
|
||||
parameter (MPI_COMPLEX32=21)
|
||||
parameter (MPI_DOUBLE_COMPLEX=22)
|
||||
parameter (MPI_2REAL=23)
|
||||
parameter (MPI_2DOUBLE_PRECISION=24)
|
||||
parameter (MPI_2INTEGER=25)
|
||||
parameter (MPI_2COMPLEX=26)
|
||||
parameter (MPI_2DOUBLE_COMPLEX=27)
|
||||
parameter (MPI_REAL2=28)
|
||||
parameter (MPI_LOGICAL1=29)
|
||||
parameter (MPI_LOGICAL2=30)
|
||||
parameter (MPI_LOGICAL4=31)
|
||||
parameter (MPI_LOGICAL8=32)
|
||||
|
||||
integer MPI_MAX, MPI_MIN, MPI_SUM, MPI_PROD, MPI_LAND
|
||||
integer MPI_BAND, MPI_LOR, MPI_BOR, MPI_LXOR, MPI_BXOR
|
||||
integer MPI_MAXLOC, MPI_MINLOC, MPI_REPLACE
|
||||
|
||||
parameter (MPI_MAX=1)
|
||||
parameter (MPI_MIN=2)
|
||||
parameter (MPI_SUM=3)
|
||||
parameter (MPI_PROD=4)
|
||||
parameter (MPI_LAND=5)
|
||||
parameter (MPI_BAND=6)
|
||||
parameter (MPI_LOR=7)
|
||||
parameter (MPI_BOR=8)
|
||||
parameter (MPI_LXOR=9)
|
||||
parameter (MPI_BXOR=10)
|
||||
parameter (MPI_MAXLOC=11)
|
||||
parameter (MPI_MINLOC=12)
|
||||
parameter (MPI_REPLACE=13)
|
99
oshmem/include/mpif-config.h.in
Обычный файл
99
oshmem/include/mpif-config.h.in
Обычный файл
@ -0,0 +1,99 @@
|
||||
! -*- fortran -*-
|
||||
!
|
||||
! Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
! All rights reserved.
|
||||
! $COPYRIGHT$
|
||||
!
|
||||
! Additional copyrights may follow
|
||||
!
|
||||
! $HEADER$
|
||||
!
|
||||
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
! WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
|
||||
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
!
|
||||
! Do ***not*** copy this file to the directory where your Fortran
|
||||
! fortran application is compiled unless it is absolutely necessary! Most
|
||||
! modern Fortran compilers now support the -I command line flag, which
|
||||
! tells the compiler where to find .h files (specifically, this one). For
|
||||
! example:
|
||||
!
|
||||
! shell$ mpif77 foo.f -o foo -I$OMPI_HOME/include
|
||||
!
|
||||
! will probably do the trick (assuming that you have set OMPI_HOME
|
||||
! properly).
|
||||
!
|
||||
! That being said, OMPI's "mpif77" wrapper compiler should
|
||||
! automatically include the -I option for you. The following command
|
||||
! should be equivalent to the command listed above:
|
||||
!
|
||||
! shell$ mpif77 foo.f -o foo
|
||||
!
|
||||
! You should not copy this file to your local directory because it is
|
||||
! possible that this file will be changed between versions of Open MPI.
|
||||
! Indeed, this mpif.h is incompatible with the mpif.f of other
|
||||
! implementations of MPI. Using this mpif.h with other implementations
|
||||
! of MPI, or with other versions of Open MPI will result in undefined
|
||||
! behavior (to include incorrect results, segmentation faults,
|
||||
! unexplainable "hanging" in your application, etc.). Always use the
|
||||
! -I command line option instead (or let mpif77 do it for you).
|
||||
!
|
||||
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
! WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
|
||||
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
!
|
||||
! This file is included as a back-end file to both mpif.h (i.e., the
|
||||
! standardized MPI Fortran header file) and a bunch of the MPI
|
||||
! Fortran 90 subroutine implementations found in ompi/mpi/f90.
|
||||
!
|
||||
! This file contains the output from configure that is relevant for
|
||||
! Fortran applications (both 77 and 90) and a few values that are
|
||||
! necessary to compile the F90 module (e.g., MPI_STATUS_SIZE).
|
||||
!
|
||||
|
||||
! Include the MPI I/O stuff, if needed
|
||||
@OMPI_MPIF_MPI_IO_INCLUDE@
|
||||
|
||||
!
|
||||
! OMPI version
|
||||
! This file is generated from configure; do not edit it manually.
|
||||
!
|
||||
integer OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION
|
||||
integer OMPI_RELEASE_VERSION
|
||||
character*32 OMPI_GREEK_VERSION
|
||||
character*32 OMPI_SVN_VERSION
|
||||
parameter (OMPI_MAJOR_VERSION=@OMPI_MAJOR_VERSION@)
|
||||
parameter (OMPI_MINOR_VERSION=@OMPI_MINOR_VERSION@)
|
||||
parameter (OMPI_RELEASE_VERSION=@OMPI_RELEASE_VERSION@)
|
||||
parameter (OMPI_GREEK_VERSION="@OMPI_GREEK_VERSION@")
|
||||
parameter (OMPI_SVN_VERSION="@OMPI_SVN_R@")
|
||||
!
|
||||
! Kind parameters
|
||||
!
|
||||
integer MPI_OFFSET_KIND, MPI_ADDRESS_KIND, MPI_INTEGER_KIND
|
||||
parameter (MPI_INTEGER_KIND=@OMPI_MPI_INTEGER_KIND@)
|
||||
parameter (MPI_ADDRESS_KIND=@OMPI_MPI_ADDRESS_KIND@)
|
||||
parameter (MPI_OFFSET_KIND=@OMPI_MPI_OFFSET_KIND@)
|
||||
!
|
||||
! Miscellaneous constants
|
||||
!
|
||||
integer MPI_STATUS_SIZE
|
||||
parameter (MPI_STATUS_SIZE=5)
|
||||
!
|
||||
! Configurable length constants
|
||||
!
|
||||
integer MPI_MAX_PROCESSOR_NAME
|
||||
integer MPI_MAX_ERROR_STRING
|
||||
integer MPI_MAX_OBJECT_NAME
|
||||
integer MPI_MAX_INFO_KEY
|
||||
integer MPI_MAX_INFO_VAL
|
||||
integer MPI_MAX_PORT_NAME
|
||||
integer MPI_MAX_DATAREP_STRING
|
||||
parameter (MPI_MAX_PROCESSOR_NAME=@OPAL_MAX_PROCESSOR_NAME@-1)
|
||||
parameter (MPI_MAX_ERROR_STRING=@OPAL_MAX_ERROR_STRING@-1)
|
||||
parameter (MPI_MAX_OBJECT_NAME=@OPAL_MAX_OBJECT_NAME@-1)
|
||||
parameter (MPI_MAX_INFO_KEY=@OPAL_MAX_INFO_KEY@-1)
|
||||
parameter (MPI_MAX_INFO_VAL=@OPAL_MAX_INFO_VAL@-1)
|
||||
parameter (MPI_MAX_PORT_NAME=@OPAL_MAX_PORT_NAME@-1)
|
||||
parameter (MPI_MAX_DATAREP_STRING=@OPAL_MAX_DATAREP_STRING@-1)
|
74
oshmem/include/mpif-mpi-io.h
Обычный файл
74
oshmem/include/mpif-mpi-io.h
Обычный файл
@ -0,0 +1,74 @@
|
||||
!Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
! All rights reserved.
|
||||
! $COPYRIGHT$
|
||||
!
|
||||
! Additional copyrights may follow
|
||||
!
|
||||
! $HEADER$
|
||||
!
|
||||
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
! WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
|
||||
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
!
|
||||
! Do ***not*** copy this file to the directory where your Fortran
|
||||
! fortran application is compiled unless it is absolutely necessary! Most
|
||||
! modern Fortran compilers now support the -I command line flag, which
|
||||
! tells the compiler where to find .h files (specifically, this one). For
|
||||
! example:
|
||||
!
|
||||
! shell$ mpif77 foo.f -o foo -I$OMPI_HOME/include
|
||||
!
|
||||
! will probably do the trick (assuming that you have set OMPI_HOME
|
||||
! properly).
|
||||
!
|
||||
! That being said, OMPI's "mpif77" wrapper compiler should
|
||||
! automatically include the -I option for you. The following command
|
||||
! should be equivalent to the command listed above:
|
||||
!
|
||||
! shell$ mpif77 foo.f -o foo
|
||||
!
|
||||
! You should not copy this file to your local directory because it is
|
||||
! possible that this file will be changed between versions of Open MPI.
|
||||
! Indeed, this mpif.h is incompatible with the mpif.f of other
|
||||
! implementations of MPI. Using this mpif.h with other implementations
|
||||
! of MPI, or with other versions of Open MPI will result in undefined
|
||||
! behavior (to include incorrect results, segmentation faults,
|
||||
! unexplainable "hanging" in your application, etc.). Always use the
|
||||
! -I command line option instead (or let mpif77 do it for you).
|
||||
!
|
||||
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
! WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
|
||||
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
!
|
||||
! This file is included as a back-end file to both mpif.h (i.e., the
|
||||
! standardized MPI Fortran header file) and a bunch of the MPI
|
||||
! Fortran 90 subroutine implementations found in ompi/mpi/f90.
|
||||
!
|
||||
! This file contains the output from configure that is relevant for
|
||||
! Fortran applications (both 77 and 90) and a few values that are
|
||||
! necessary to compile the F90 module (e.g., MPI_STATUS_SIZE).
|
||||
!
|
||||
|
||||
integer MPI_FILE_NULL
|
||||
integer MPI_SEEK_SET, MPI_SEEK_CUR, MPI_SEEK_END
|
||||
integer MPI_MODE_CREATE
|
||||
integer MPI_MODE_RDONLY, MPI_MODE_WRONLY, MPI_MODE_RDWR
|
||||
integer MPI_MODE_DELETE_ON_CLOSE, MPI_MODE_UNIQUE_OPEN
|
||||
integer MPI_MODE_EXCL, MPI_MODE_APPEND, MPI_MODE_SEQUENTIAL
|
||||
integer MPI_DISPLACEMENT_CURRENT
|
||||
|
||||
parameter (MPI_FILE_NULL=0)
|
||||
parameter (MPI_SEEK_SET=600)
|
||||
parameter (MPI_SEEK_CUR=602)
|
||||
parameter (MPI_SEEK_END=604)
|
||||
parameter (MPI_MODE_CREATE=1)
|
||||
parameter (MPI_MODE_RDONLY=2)
|
||||
parameter (MPI_MODE_WRONLY=4)
|
||||
parameter (MPI_MODE_RDWR=8)
|
||||
parameter (MPI_MODE_DELETE_ON_CLOSE=16)
|
||||
parameter (MPI_MODE_UNIQUE_OPEN=32)
|
||||
parameter (MPI_MODE_EXCL=64)
|
||||
parameter (MPI_MODE_APPEND=128)
|
||||
parameter (MPI_MODE_SEQUENTIAL=256)
|
||||
parameter (MPI_DISPLACEMENT_CURRENT=-54278278)
|
76
oshmem/include/mpif.h.in
Обычный файл
76
oshmem/include/mpif.h.in
Обычный файл
@ -0,0 +1,76 @@
|
||||
! -*- fortran -*-
|
||||
!
|
||||
! Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
! All rights reserved.
|
||||
! $COPYRIGHT$
|
||||
!
|
||||
! Additional copyrights may follow
|
||||
!
|
||||
! $HEADER$
|
||||
!
|
||||
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
! WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
|
||||
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
!
|
||||
! Do ***not*** copy this file to the directory where your Fortran
|
||||
! fortran application is compiled unless it is absolutely necessary! Most
|
||||
! modern Fortran compilers now support the -I command line flag, which
|
||||
! tells the compiler where to find .h files (specifically, this one). For
|
||||
! example:
|
||||
!
|
||||
! shell$ mpif77 foo.f -o foo -I$OMPI_HOME/include
|
||||
!
|
||||
! will probably do the trick (assuming that you have set OMPI_HOME
|
||||
! properly).
|
||||
!
|
||||
! That being said, OMPI's "mpif77" wrapper compiler should
|
||||
! automatically include the -I option for you. The following command
|
||||
! should be equivalent to the command listed above:
|
||||
!
|
||||
! shell$ mpif77 foo.f -o foo
|
||||
!
|
||||
! You should not copy this file to your local directory because it is
|
||||
! possible that this file will be changed between versions of Open MPI.
|
||||
! Indeed, this mpif.h is incompatible with the mpif.f of other
|
||||
! implementations of MPI. Using this mpif.h with other implementations
|
||||
! of MPI, or with other versions of Open MPI will result in undefined
|
||||
! behavior (to include incorrect results, segmentation faults,
|
||||
! unexplainable "hanging" in your application, etc.). Always use the
|
||||
! -I command line option instead (or let mpif77 do it for you).
|
||||
!
|
||||
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
! WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
|
||||
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
!
|
||||
! Include the back-end file that has the bulk of the MPI Fortran
|
||||
! interface.
|
||||
!
|
||||
|
||||
include 'mpif-common.h'
|
||||
|
||||
!
|
||||
! These "external" statements are specific to the MPI F77 interface
|
||||
! (and are toxic to the MPI F90 interface), and are therefore in the
|
||||
! MPI F77-specific header file (i.e., this one).
|
||||
!
|
||||
external MPI_NULL_COPY_FN, MPI_NULL_DELETE_FN
|
||||
external MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN
|
||||
external MPI_TYPE_NULL_COPY_FN, MPI_TYPE_NULL_DELETE_FN
|
||||
external MPI_DUP_FN, MPI_COMM_DUP_FN, MPI_TYPE_DUP_FN
|
||||
external MPI_WIN_NULL_COPY_FN
|
||||
external MPI_WIN_NULL_DELETE_FN
|
||||
external MPI_WIN_DUP_FN
|
||||
! Note that MPI_CONVERSION_FN_NULL is a "constant" (it is only ever
|
||||
! checked for comparison; it is never invoked), but it is passed as
|
||||
! a function pointer (to MPI_REGISTER_DATAREP) and therefore must be
|
||||
! the same size/type. It is therefore external'ed here, and not
|
||||
! defined with an integer value in mpif-common.h.
|
||||
external MPI_CONVERSION_FN_NULL
|
||||
|
||||
!
|
||||
! double precision functions
|
||||
!
|
||||
external MPI_WTIME, MPI_WTICK @MPIF_H_PMPI_W_FUNCS@
|
||||
double precision MPI_WTIME, MPI_WTICK @MPIF_H_PMPI_W_FUNCS@
|
||||
|
11
oshmem/include/mpp/shmem.fh
Обычный файл
11
oshmem/include/mpp/shmem.fh
Обычный файл
@ -0,0 +1,11 @@
|
||||
!
|
||||
! Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
! All rights reserved.
|
||||
! $COPYRIGHT$
|
||||
!
|
||||
! Additional copyrights may follow
|
||||
!
|
||||
! $HEADER$
|
||||
!
|
||||
|
||||
include 'shmem.fh'
|
16
oshmem/include/mpp/shmem.h
Обычный файл
16
oshmem/include/mpp/shmem.h
Обычный файл
@ -0,0 +1,16 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef __MPP_SHMEM_H__
|
||||
#define __MPP_SHMEM_H__
|
||||
|
||||
#include <shmem.h>
|
||||
|
||||
#endif
|
15
oshmem/include/oshmem/Makefile.am
Обычный файл
15
oshmem/include/oshmem/Makefile.am
Обычный файл
@ -0,0 +1,15 @@
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
|
||||
|
||||
headers += \
|
||||
oshmem/constants.h \
|
||||
oshmem/types.h
|
||||
|
||||
nodist_headers += \
|
||||
oshmem/version.h
|
127
oshmem/include/oshmem/constants.h
Обычный файл
127
oshmem/include/oshmem/constants.h
Обычный файл
@ -0,0 +1,127 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef OSHMEM_CONSTANTS_H
|
||||
#define OSHMEM_CONSTANTS_H
|
||||
|
||||
#include "orte/constants.h"
|
||||
#if defined(OSHMEM_PROFILING) && (OSHMEM_PROFILING == 1)
|
||||
#include "oshmem/shmem/c/profile/defines.h"
|
||||
#endif
|
||||
#include "oshmem/include/shmem.h"
|
||||
|
||||
|
||||
#define OSHMEM_ERR_BASE ORTE_ERR_MAX
|
||||
|
||||
/* error codes */
|
||||
enum {
|
||||
/* Error codes inherited from ORTE/OPAL. Still enum values so
|
||||
that we might get nice debugger help */
|
||||
OSHMEM_SUCCESS = ORTE_SUCCESS,
|
||||
|
||||
OSHMEM_ERROR = ORTE_ERROR,
|
||||
OSHMEM_ERR_OUT_OF_RESOURCE = ORTE_ERR_OUT_OF_RESOURCE,
|
||||
OSHMEM_ERR_TEMP_OUT_OF_RESOURCE = ORTE_ERR_TEMP_OUT_OF_RESOURCE,
|
||||
OSHMEM_ERR_RESOURCE_BUSY = ORTE_ERR_RESOURCE_BUSY,
|
||||
OSHMEM_ERR_BAD_PARAM = ORTE_ERR_BAD_PARAM,
|
||||
OSHMEM_ERR_FATAL = ORTE_ERR_FATAL,
|
||||
OSHMEM_ERR_NOT_IMPLEMENTED = ORTE_ERR_NOT_IMPLEMENTED,
|
||||
OSHMEM_ERR_NOT_SUPPORTED = ORTE_ERR_NOT_SUPPORTED,
|
||||
OSHMEM_ERR_INTERUPTED = ORTE_ERR_INTERUPTED,
|
||||
OSHMEM_ERR_WOULD_BLOCK = ORTE_ERR_WOULD_BLOCK,
|
||||
OSHMEM_ERR_IN_ERRNO = ORTE_ERR_IN_ERRNO,
|
||||
OSHMEM_ERR_UNREACH = ORTE_ERR_UNREACH,
|
||||
OSHMEM_ERR_NOT_FOUND = ORTE_ERR_NOT_FOUND,
|
||||
OSHMEM_EXISTS = ORTE_EXISTS, /* indicates that the specified object already exists */
|
||||
OSHMEM_ERR_TIMEOUT = ORTE_ERR_TIMEOUT,
|
||||
OSHMEM_ERR_NOT_AVAILABLE = ORTE_ERR_NOT_AVAILABLE,
|
||||
OSHMEM_ERR_PERM = ORTE_ERR_PERM,
|
||||
OSHMEM_ERR_VALUE_OUT_OF_BOUNDS = ORTE_ERR_VALUE_OUT_OF_BOUNDS,
|
||||
OSHMEM_ERR_FILE_READ_FAILURE = ORTE_ERR_FILE_READ_FAILURE,
|
||||
OSHMEM_ERR_FILE_WRITE_FAILURE = ORTE_ERR_FILE_WRITE_FAILURE,
|
||||
OSHMEM_ERR_FILE_OPEN_FAILURE = ORTE_ERR_FILE_OPEN_FAILURE,
|
||||
|
||||
OSHMEM_ERR_RECV_LESS_THAN_POSTED = ORTE_ERR_RECV_LESS_THAN_POSTED,
|
||||
OSHMEM_ERR_RECV_MORE_THAN_POSTED = ORTE_ERR_RECV_MORE_THAN_POSTED,
|
||||
OSHMEM_ERR_NO_MATCH_YET = ORTE_ERR_NO_MATCH_YET,
|
||||
OSHMEM_ERR_BUFFER = ORTE_ERR_BUFFER,
|
||||
OSHMEM_ERR_REQUEST = ORTE_ERR_REQUEST,
|
||||
OSHMEM_ERR_NO_CONNECTION_ALLOWED = ORTE_ERR_NO_CONNECTION_ALLOWED,
|
||||
OSHMEM_ERR_CONNECTION_REFUSED = ORTE_ERR_CONNECTION_REFUSED ,
|
||||
OSHMEM_ERR_CONNECTION_FAILED = ORTE_ERR_CONNECTION_FAILED,
|
||||
OSHMEM_PACK_MISMATCH = ORTE_ERR_PACK_MISMATCH,
|
||||
OSHMEM_ERR_PACK_FAILURE = ORTE_ERR_PACK_FAILURE,
|
||||
OSHMEM_ERR_UNPACK_FAILURE = ORTE_ERR_UNPACK_FAILURE,
|
||||
OSHMEM_ERR_COMM_FAILURE = ORTE_ERR_COMM_FAILURE,
|
||||
OSHMEM_UNPACK_INADEQUATE_SPACE = ORTE_ERR_UNPACK_INADEQUATE_SPACE,
|
||||
OSHMEM_UNPACK_READ_PAST_END_OF_BUFFER = ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER,
|
||||
OSHMEM_ERR_TYPE_MISMATCH = ORTE_ERR_TYPE_MISMATCH,
|
||||
OSHMEM_ERR_COMPARE_FAILURE = ORTE_ERR_COMPARE_FAILURE,
|
||||
OSHMEM_ERR_COPY_FAILURE = ORTE_ERR_COPY_FAILURE,
|
||||
OSHMEM_ERR_UNKNOWN_DATA_TYPE = ORTE_ERR_UNKNOWN_DATA_TYPE,
|
||||
OSHMEM_ERR_DATA_TYPE_REDEF = ORTE_ERR_DATA_TYPE_REDEF,
|
||||
OSHMEM_ERR_DATA_OVERWRITE_ATTEMPT = ORTE_ERR_DATA_OVERWRITE_ATTEMPT
|
||||
};
|
||||
|
||||
#define OSHMEM_ERR_MAX (OSHMEM_ERR_BASE - 1)
|
||||
|
||||
|
||||
/* C datatypes */
|
||||
/*
|
||||
* SHMEM_Init_thread constants
|
||||
* Do not change the order of these without also modifying mpif.h.in.
|
||||
*/
|
||||
enum {
|
||||
SHMEM_NULL = 0,
|
||||
SHMEM_CHAR,
|
||||
SHMEM_UCHAR,
|
||||
SHMEM_SHORT,
|
||||
SHMEM_USHORT,
|
||||
SHMEM_INT,
|
||||
SHMEM_UINT,
|
||||
SHMEM_LONG,
|
||||
SHMEM_ULONG,
|
||||
SHMEM_LLONG,
|
||||
SHMEM_ULLONG,
|
||||
SHMEM_FLOAT,
|
||||
SHMEM_DOUBLE,
|
||||
SHMEM_LDOUBLE,
|
||||
|
||||
SHMEM_FINT,
|
||||
SHMEM_FINT4,
|
||||
SHMEM_FINT8
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Miscellaneous constants
|
||||
*/
|
||||
#define SHMEM_ANY_SOURCE -1 /* match any source rank */
|
||||
#define SHMEM_PROC_NULL -2 /* rank of null process */
|
||||
#define SHMEM_UNDEFINED -32766 /* undefined stuff */
|
||||
|
||||
|
||||
#ifndef UNREFERENCED_PARAMETER
|
||||
#define UNREFERENCED_PARAMETER(P) ((void)P)
|
||||
#endif
|
||||
|
||||
#define OSHMEM_PREDEFINED_GLOBAL(type, global) ((type) ((void *) &(global)))
|
||||
|
||||
#if OMPI_WANT_MEMCHECKER
|
||||
#define MEMCHECKER(x) do { \
|
||||
x; \
|
||||
} while(0)
|
||||
#else
|
||||
#define MEMCHECKER(x)
|
||||
#endif /* OMPI_WANT_MEMCHECKER */
|
||||
|
||||
|
||||
#endif /* OSHMEM_CONSTANTS_H */
|
||||
|
23
oshmem/include/oshmem/types.h
Обычный файл
23
oshmem/include/oshmem/types.h
Обычный файл
@ -0,0 +1,23 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
#ifndef SHMEM_TYPES_H
|
||||
#define SHMEM_TYPES_H
|
||||
|
||||
|
||||
/*
|
||||
* Predefine some internal types so we dont need all the include
|
||||
* dependencies.
|
||||
*/
|
||||
|
||||
struct oshmem_proc_t;
|
||||
struct oshmem_group_t;
|
||||
struct oshmem_op_t;
|
||||
|
||||
#endif
|
30
oshmem/include/oshmem/version.h.in
Обычный файл
30
oshmem/include/oshmem/version.h.in
Обычный файл
@ -0,0 +1,30 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
* This file should be included by any file that needs full
|
||||
* version information for the OSHMEM project
|
||||
*/
|
||||
|
||||
#ifndef OSHMEM_VERSIONS_H
|
||||
#define OSHMEM_VERSIONS_H
|
||||
|
||||
#define OSHMEM_MAJOR_VERSION @OSHMEM_MAJOR_VERSION@
|
||||
#define OSHMEM_MINOR_VERSION @OSHMEM_MINOR_VERSION@
|
||||
#define OSHMEM_RELEASE_VERSION @OSHMEM_RELEASE_VERSION@
|
||||
#define OSHMEM_GREEK_VERSION "@OSHMEM_GREEK_VERSION@"
|
||||
#define OSHMEM_WANT_REPO_REV @OSHMEM_WANT_REPO_REV@
|
||||
#define OSHMEM_REPO_REV "@OSHMEM_REPO_REV@"
|
||||
#ifdef OSHMEM_VERSION
|
||||
/* If we included version.h, we want the real version, not the
|
||||
stripped (no-r number) version */
|
||||
#undef OSHMEM_VERSION
|
||||
#endif
|
||||
#define OSHMEM_VERSION "@OSHMEM_VERSION@"
|
||||
|
||||
#endif
|
125
oshmem/include/oshmem_config.h.in
Обычный файл
125
oshmem/include/oshmem_config.h.in
Обычный файл
@ -0,0 +1,125 @@
|
||||
/* -*- c -*-
|
||||
*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
* Function: - OS, CPU and compiler dependent configuration
|
||||
*/
|
||||
|
||||
#ifndef OSHMEM_CONFIG_H
|
||||
#define OSHMEM_CONFIG_H
|
||||
|
||||
#include "opal_config.h"
|
||||
|
||||
#define OSHMEM_IDENT_STRING OPAL_IDENT_STRING
|
||||
|
||||
/***********************************************************************
|
||||
*
|
||||
* OMPI-specific Fortran code that should be in ompi_config.h, but not
|
||||
* in the other projects.
|
||||
*
|
||||
**********************************************************************/
|
||||
|
||||
/* MPI_Fint is the same as ompi_fortran_INTEGER_t */
|
||||
#define MPI_Fint ompi_fortran_integer_t
|
||||
|
||||
#if OMPI_HAVE_FORTRAN_REAL && OMPI_HAVE_FORTRAN_COMPLEX
|
||||
/* * C type for Fortran COMPLEX */
|
||||
/*typedef struct {
|
||||
ompi_fortran_real_t real;
|
||||
ompi_fortran_real_t imag;
|
||||
} ompi_fortran_complex_t;*/
|
||||
#endif
|
||||
|
||||
#if OMPI_HAVE_FORTRAN_REAL4 && OMPI_HAVE_FORTRAN_COMPLEX8
|
||||
/* * C type for Fortran COMPLEX*8 */
|
||||
/*typedef struct {
|
||||
ompi_fortran_real4_t real;
|
||||
ompi_fortran_real4_t imag;
|
||||
} ompi_fortran_complex8_t;*/
|
||||
#endif
|
||||
|
||||
#if OMPI_HAVE_FORTRAN_REAL8 && OMPI_HAVE_FORTRAN_COMPLEX16
|
||||
/* * C type for Fortran COMPLEX*16 */
|
||||
/*typedef struct {
|
||||
ompi_fortran_real8_t real;
|
||||
ompi_fortran_real8_t imag;
|
||||
} ompi_fortran_complex16_t;*/
|
||||
#endif
|
||||
|
||||
#if OMPI_HAVE_FORTRAN_REAL16 && OMPI_HAVE_FORTRAN_COMPLEX32
|
||||
/* * C type for Fortran COMPLEX*32 */
|
||||
/*typedef struct {
|
||||
ompi_fortran_real16_t real;
|
||||
ompi_fortran_real16_t imag;
|
||||
} ompi_fortran_complex32_t;*/
|
||||
#endif
|
||||
|
||||
#if OMPI_HAVE_FORTRAN_DOUBLE_PRECISION
|
||||
/* * C type for Fortran DOUBLE COMPLEX */
|
||||
/*typedef struct {
|
||||
ompi_fortran_double_precision_t real;
|
||||
ompi_fortran_double_precision_t imag;
|
||||
} ompi_fortran_double_complex_t;*/
|
||||
#endif
|
||||
|
||||
#if OPAL_HAVE_ATTRIBUTE_DESTRUCTOR
|
||||
# define __opal_attribute_destructor__ __attribute__((__destructor__))
|
||||
#else
|
||||
# define __opal_attribute_destructor__
|
||||
#endif
|
||||
|
||||
#if defined(__WINDOWS__)
|
||||
|
||||
# if defined(_USRDLL) /* building shared libraries (.DLL) */
|
||||
# if defined(OSHMEM_EXPORTS)
|
||||
# define OSHMEM_DECLSPEC __declspec(dllexport)
|
||||
# define OSHMEM_MODULE_DECLSPEC
|
||||
# else
|
||||
# define OSHMEM_DECLSPEC __declspec(dllimport)
|
||||
# if defined(OSHMEM_MODULE_EXPORTS)
|
||||
# define OSHMEM_MODULE_DECLSPEC __declspec(dllexport)
|
||||
# else
|
||||
# define OSHMEM_MODULE_DECLSPEC __declspec(dllimport)
|
||||
# endif /* defined(OSHMEM_MODULE_EXPORTS) */
|
||||
# endif /* defined(OSHMEM_EXPORTS) */
|
||||
# else /* building static library */
|
||||
# if defined(OSHMEM_IMPORTS)
|
||||
# define OSHMEM_DECLSPEC __declspec(dllimport)
|
||||
# else
|
||||
# define OSHMEM_DECLSPEC
|
||||
# endif /* defined(OSHMEM_IMPORTS) */
|
||||
# define OSHMEM_MODULE_DECLSPEC
|
||||
# endif /* defined(_USRDLL) */
|
||||
|
||||
#else
|
||||
|
||||
# if OPAL_C_HAVE_VISIBILITY
|
||||
# ifndef OSHMEM_DECLSPEC
|
||||
# define OSHMEM_DECLSPEC __opal_attribute_visibility__("default")
|
||||
# endif
|
||||
# ifndef OSHMEM_MODULE_DECLSPEC
|
||||
# define OSHMEM_MODULE_DECLSPEC __opal_attribute_visibility__("default")
|
||||
# endif
|
||||
# ifndef OSHMEM_DESTRUCTOR
|
||||
# define OSHMEM_DESTRUCTOR __opal_attribute_destructor__
|
||||
# endif
|
||||
# else
|
||||
# ifndef OSHMEM_DECLSPEC
|
||||
# define OSHMEM_DECLSPEC
|
||||
# endif
|
||||
# ifndef OSHMEM_MODULE_DECLSPEC
|
||||
# define OSHMEM_MODULE_DECLSPEC
|
||||
# endif
|
||||
# ifndef OSHMEM_DESTRUCTOR
|
||||
# define OSHMEM_DESTRUCTOR
|
||||
# endif
|
||||
# endif
|
||||
#endif /* defined(__WINDOWS__) */
|
||||
|
||||
#endif
|
55
oshmem/include/shmem.fh
Обычный файл
55
oshmem/include/shmem.fh
Обычный файл
@ -0,0 +1,55 @@
|
||||
! Emacs: -*- mode: fortran; -*-
|
||||
!
|
||||
! Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
! All rights reserved.
|
||||
! $COPYRIGHT$
|
||||
!
|
||||
! Additional copyrights may follow
|
||||
!
|
||||
! $HEADER$
|
||||
!
|
||||
|
||||
!
|
||||
! TODO: exact values should be found during configuration
|
||||
!
|
||||
|
||||
integer SHMEM_BARRIER_SYNC_SIZE
|
||||
parameter ( SHMEM_BARRIER_SYNC_SIZE = 4 )
|
||||
|
||||
integer SHMEM_BCAST_SYNC_SIZE
|
||||
parameter ( SHMEM_BCAST_SYNC_SIZE = 8 )
|
||||
|
||||
|
||||
integer SHMEM_COLLECT_SYNC_SIZE
|
||||
parameter ( SHMEM_COLLECT_SYNC_SIZE = 8 )
|
||||
|
||||
integer SHMEM_REDUCE_SYNC_SIZE
|
||||
parameter ( SHMEM_REDUCE_SYNC_SIZE = 8 )
|
||||
|
||||
integer SHMEM_SYNC_VALUE
|
||||
parameter ( SHMEM_SYNC_VALUE = -1 )
|
||||
|
||||
integer SHMEM_REDUCE_MIN_WRKDATA_SIZE
|
||||
parameter ( SHMEM_REDUCE_MIN_WRKDATA_SIZE = 8 )
|
||||
|
||||
!
|
||||
! waits
|
||||
!
|
||||
integer SHMEM_CMP_EQ
|
||||
parameter ( SHMEM_CMP_EQ = 0 )
|
||||
integer SHMEM_CMP_NE
|
||||
parameter ( SHMEM_CMP_NE = 1 )
|
||||
integer SHMEM_CMP_GT
|
||||
parameter ( SHMEM_CMP_GT = 2 )
|
||||
integer SHMEM_CMP_LE
|
||||
parameter ( SHMEM_CMP_LE = 3 )
|
||||
integer SHMEM_CMP_LT
|
||||
parameter ( SHMEM_CMP_LT = 4 )
|
||||
integer SHMEM_CMP_GE
|
||||
parameter ( SHMEM_CMP_GE = 5 )
|
||||
|
||||
|
||||
logical shmem_pe_accessible
|
||||
logical shmem_addr_accessible
|
||||
|
||||
integer*8 shmem_ptr
|
390
oshmem/include/shmem.h.in
Обычный файл
390
oshmem/include/shmem.h.in
Обычный файл
@ -0,0 +1,390 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef OSHMEM_SHMEM_H
|
||||
#define OSHMEM_SHMEM_H
|
||||
|
||||
|
||||
#include <stddef.h> /* include for ptrdiff_t */
|
||||
|
||||
#if defined(WIN32) || defined(_WIN32)
|
||||
# define OSHMEM_COMPLEX_TYPE(type)
|
||||
#else
|
||||
# if defined(c_plusplus) || defined(__cplusplus)
|
||||
# include <complex>
|
||||
# define OSHMEM_COMPLEX_TYPE(type) std::complex<type>
|
||||
# else
|
||||
# include <complex.h>
|
||||
# define OSHMEM_COMPLEX_TYPE(type) type complex
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* SHMEM version
|
||||
*/
|
||||
#define SHMEM_VERSION 1
|
||||
#define SHMEM_SUBVERSION 5
|
||||
|
||||
|
||||
#ifndef OSHMEM_DECLSPEC
|
||||
# if defined(WIN32) || defined(_WIN32)
|
||||
# if defined(OSHMEM_IMPORTS)
|
||||
# define OSHMEM_DECLSPEC __declspec(dllimport)
|
||||
# else
|
||||
# define OSHMEM_DECLSPEC
|
||||
# endif /* defined(OSHMEM_IMPORTS) */
|
||||
# else
|
||||
# if defined(OPAL_C_HAVE_VISIBILITY) && (OPAL_C_HAVE_VISIBILITY == 1)
|
||||
# define OSHMEM_DECLSPEC __attribute__((visibility("default")))
|
||||
# else
|
||||
# define OSHMEM_DECLSPEC
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef OSHMEM_DESTRUCTOR
|
||||
# if defined(OPAL_C_HAVE_VISIBILITY) && (OPAL_C_HAVE_VISIBILITY == 1)
|
||||
# define OSHMEM_DESTRUCTOR __attribute__((__destructor__))
|
||||
# else
|
||||
# define OSHMEM_DESTRUCTOR
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* OpenSHMEM API (www.openshmem.org)
|
||||
*/
|
||||
|
||||
/*
|
||||
* Environment variables
|
||||
*/
|
||||
|
||||
/* size of symmetric heap in bytes.
|
||||
* Can be qualified with the letter 'K', 'M', 'G' or 'T'
|
||||
*/
|
||||
#define SHMEM_HEAP_SIZE "SHMEM_SYMMETRIC_HEAP_SIZE"
|
||||
|
||||
/* Following environment variables are Mellanox extension */
|
||||
|
||||
/*
|
||||
* Type of allocator used by symmetric heap
|
||||
*/
|
||||
#define SHMEM_HEAP_TYPE "SHMEM_SYMMETRIC_HEAP_ALLOCATOR"
|
||||
|
||||
/*
|
||||
* Constants and definitions
|
||||
*/
|
||||
enum shmem_wait_ops {
|
||||
SHMEM_CMP_EQ,
|
||||
SHMEM_CMP_NE,
|
||||
SHMEM_CMP_GT,
|
||||
SHMEM_CMP_LE,
|
||||
SHMEM_CMP_LT,
|
||||
SHMEM_CMP_GE
|
||||
};
|
||||
|
||||
#define _SHMEM_BARRIER_SYNC_SIZE (1)
|
||||
#define _SHMEM_BCAST_SYNC_SIZE (1 + _SHMEM_BARRIER_SYNC_SIZE)
|
||||
#define _SHMEM_COLLECT_SYNC_SIZE (1 + _SHMEM_BCAST_SYNC_SIZE)
|
||||
#define _SHMEM_REDUCE_SYNC_SIZE (1 + _SHMEM_BCAST_SYNC_SIZE)
|
||||
#define _SHMEM_REDUCE_MIN_WRKDATA_SIZE (1)
|
||||
#define _SHMEM_SYNC_VALUE (-1)
|
||||
|
||||
#define SHMEM_BARRIER_SYNC_SIZE _SHMEM_BARRIER_SYNC_SIZE
|
||||
#define SHMEM_BCAST_SYNC_SIZE _SHMEM_BCAST_SYNC_SIZE
|
||||
#define SHMEM_COLLECT_SYNC_SIZE _SHMEM_COLLECT_SYNC_SIZE
|
||||
#define SHMEM_REDUCE_SYNC_SIZE _SHMEM_REDUCE_SYNC_SIZE
|
||||
#define SHMEM_REDUCE_MIN_WRKDATA_SIZE _SHMEM_REDUCE_MIN_WRKDATA_SIZE
|
||||
#define SHMEM_SYNC_VALUE _SHMEM_SYNC_VALUE
|
||||
|
||||
|
||||
/*
|
||||
* Initialization routines
|
||||
*/
|
||||
OSHMEM_DECLSPEC void start_pes(int npes);
|
||||
|
||||
|
||||
/*
|
||||
* Query routines
|
||||
*/
|
||||
OSHMEM_DECLSPEC int _num_pes(void);
|
||||
OSHMEM_DECLSPEC int _my_pe(void);
|
||||
|
||||
|
||||
/*
|
||||
* Accessability routines
|
||||
*/
|
||||
OSHMEM_DECLSPEC int shmem_pe_accessible(int pe);
|
||||
OSHMEM_DECLSPEC int shmem_addr_accessible(void *addr, int pe);
|
||||
|
||||
/*
|
||||
* Symmetric heap routines
|
||||
*/
|
||||
OSHMEM_DECLSPEC void* shmalloc(size_t size);
|
||||
OSHMEM_DECLSPEC void* shmemalign(size_t align, size_t size);
|
||||
OSHMEM_DECLSPEC void* shrealloc(void *ptr, size_t size);
|
||||
OSHMEM_DECLSPEC void shfree(void* ptr);
|
||||
|
||||
/*
|
||||
* Remote pointer operations
|
||||
*/
|
||||
OSHMEM_DECLSPEC void *shmem_ptr(void *ptr, int pe);
|
||||
|
||||
/*
|
||||
* Elemental put routines
|
||||
*/
|
||||
OSHMEM_DECLSPEC void shmem_short_p(short* addr, short value, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_int_p(int* addr, int value, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_long_p(long* addr, long value, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_float_p(float* addr, float value, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_double_p(double* addr, double value, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_longlong_p(long long* addr, long long value, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_longdouble_p(long double* addr, long double value, int pe);
|
||||
|
||||
/*
|
||||
* Block data put routines
|
||||
*/
|
||||
OSHMEM_DECLSPEC void shmem_char_put(char *target, const char *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_short_put(short *target, const short *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_int_put(int* target, const int* source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_long_put(long *target, const long *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_float_put(float *target, const float *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_double_put(double *target, const double *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_longlong_put(long long *target, const long long *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_longdouble_put(long double *target, const long double *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_put32(void *target, const void *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_put64(void *target, const void *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_put128(void *target, const void *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_putmem(void *target, const void *source, size_t len, int pe);
|
||||
|
||||
/*
|
||||
* Strided put routines
|
||||
*/
|
||||
OSHMEM_DECLSPEC void shmem_int_iput(int* target, const int* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_short_iput(short* target, const short* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_float_iput(float* target, const float* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_double_iput(double* target, const double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_longlong_iput(long long* target, const long long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_longdouble_iput(long double* target, const long double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_long_iput(long* target, const long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_iput32(void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_iput64(void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_iput128(void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe);
|
||||
|
||||
/*
|
||||
* Elemental get routines
|
||||
*/
|
||||
OSHMEM_DECLSPEC short shmem_short_g(short* addr, int pe);
|
||||
OSHMEM_DECLSPEC int shmem_int_g(int* addr, int pe);
|
||||
OSHMEM_DECLSPEC long shmem_long_g(long* addr, int pe);
|
||||
OSHMEM_DECLSPEC float shmem_float_g(float* addr, int pe);
|
||||
OSHMEM_DECLSPEC double shmem_double_g(double* addr, int pe);
|
||||
OSHMEM_DECLSPEC long long shmem_longlong_g(long long* addr, int pe);
|
||||
OSHMEM_DECLSPEC long double shmem_longdouble_g(long double* addr, int pe);
|
||||
|
||||
/*
|
||||
* Block data get routines
|
||||
*/
|
||||
OSHMEM_DECLSPEC void shmem_char_get(char *target, const char *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_short_get(short *target, const short *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_int_get(int *target, const int *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_long_get(long *target, const long *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_float_get(float *target, const float *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_double_get(double *target, const double *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_longlong_get(long long *target, const long long *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_longdouble_get(long double *target, const long double *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_get32(void *target, const void *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_get64(void *target, const void *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_get128(void *target, const void *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_getmem(void *target, const void *source, size_t len, int pe);
|
||||
|
||||
/*
|
||||
* Strided get routines
|
||||
*/
|
||||
OSHMEM_DECLSPEC void shmem_int_iget(int* target, const int* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_short_iget(short* target, const short* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_float_iget(float* target, const float* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_double_iget(double* target, const double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_longlong_iget(long long* target, const long long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_longdouble_iget(long double* target, const long double* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_long_iget(long* target, const long* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_iget32(void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_iget64(void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_iget128(void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe);
|
||||
|
||||
/*
|
||||
* Atomic operations
|
||||
*/
|
||||
/* Atomic swap */
|
||||
OSHMEM_DECLSPEC long shmem_swap(long *target, long value, int pe);
|
||||
OSHMEM_DECLSPEC double shmem_double_swap(double *target, double value, int pe);
|
||||
OSHMEM_DECLSPEC float shmem_float_swap(float *target, float value, int pe);
|
||||
OSHMEM_DECLSPEC int shmem_int_swap(int *target, int value, int pe);
|
||||
OSHMEM_DECLSPEC long shmem_long_swap(long *target, long value, int pe);
|
||||
OSHMEM_DECLSPEC long long shmem_longlong_swap(long long*target, long long value, int pe);
|
||||
|
||||
/* Atomic conditional swap */
|
||||
OSHMEM_DECLSPEC int shmem_int_cswap(int *target, int cond, int value, int pe);
|
||||
OSHMEM_DECLSPEC long shmem_long_cswap(long *target, long cond, long value, int pe);
|
||||
OSHMEM_DECLSPEC long long shmem_longlong_cswap(long long *target, long long cond, long long value, int pe);
|
||||
|
||||
/* Atomic Fetch&Add */
|
||||
OSHMEM_DECLSPEC int shmem_int_fadd(int *target, int value, int pe);
|
||||
OSHMEM_DECLSPEC long shmem_long_fadd(long *target, long value, int pe);
|
||||
OSHMEM_DECLSPEC long long shmem_longlong_fadd(long long *target, long long value, int pe);
|
||||
|
||||
/* Atomic Fetch&Inc */
|
||||
OSHMEM_DECLSPEC int shmem_int_finc(int *target, int pe);
|
||||
OSHMEM_DECLSPEC long shmem_long_finc(long *target, int pe);
|
||||
OSHMEM_DECLSPEC long long shmem_longlong_finc(long long *target, int pe);
|
||||
|
||||
/* Atomic Add*/
|
||||
OSHMEM_DECLSPEC void shmem_int_add(int *target, int value, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_long_add(long *target, long value, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_longlong_add(long long *target, long long value, int pe);
|
||||
|
||||
/* Atomic Inc */
|
||||
OSHMEM_DECLSPEC void shmem_int_inc(int *target, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_long_inc(long *target, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_longlong_inc(long long *target, int pe);
|
||||
|
||||
/*
|
||||
* Lock functions
|
||||
*/
|
||||
OSHMEM_DECLSPEC void shmem_set_lock(long *lock);
|
||||
OSHMEM_DECLSPEC void shmem_clear_lock(long *lock);
|
||||
OSHMEM_DECLSPEC int shmem_test_lock(long *lock);
|
||||
|
||||
/*
|
||||
* P2P sync routines
|
||||
*/
|
||||
OSHMEM_DECLSPEC void shmem_short_wait(short *addr, short value);
|
||||
OSHMEM_DECLSPEC void shmem_int_wait(int *addr, int value);
|
||||
OSHMEM_DECLSPEC void shmem_long_wait(long *addr, long value);
|
||||
OSHMEM_DECLSPEC void shmem_longlong_wait(long long *addr, long long value);
|
||||
OSHMEM_DECLSPEC void shmem_wait(long *addr, long value);
|
||||
|
||||
OSHMEM_DECLSPEC void shmem_short_wait_until(short *addr, int cmp, short value);
|
||||
OSHMEM_DECLSPEC void shmem_int_wait_until(int *addr, int cmp, int value);
|
||||
OSHMEM_DECLSPEC void shmem_long_wait_until(long *addr, int cmp, long value);
|
||||
OSHMEM_DECLSPEC void shmem_longlong_wait_until(long long *addr, int cmp, long long value);
|
||||
OSHMEM_DECLSPEC void shmem_wait_until(long *addr, int cmp, long value);
|
||||
|
||||
/*
|
||||
* Barrier sync routines
|
||||
*/
|
||||
OSHMEM_DECLSPEC void shmem_barrier(int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_barrier_all(void);
|
||||
OSHMEM_DECLSPEC void shmem_fence(void);
|
||||
OSHMEM_DECLSPEC void shmem_quiet(void);
|
||||
|
||||
/*
|
||||
* Collective routines
|
||||
*/
|
||||
OSHMEM_DECLSPEC void shmem_broadcast32(void *target, const void *source, size_t nlong, int PE_root, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_broadcast64(void *target, const void *source, size_t nlong, int PE_root, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_broadcast(void *target, const void *source, size_t nlong, int PE_root, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_collect32(void *target, const void *source, size_t nlong, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_collect64(void *target, const void *source, size_t nlong, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_fcollect32(void *target, const void *source, size_t nlong, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_fcollect64(void *target, const void *source, size_t nlong, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||
|
||||
/*
|
||||
* Reduction routines
|
||||
*/
|
||||
OSHMEM_DECLSPEC void shmem_short_and_to_all(short *target, short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_int_and_to_all(int *target, int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_long_and_to_all(long *target, long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_longlong_and_to_all(long long *target, long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync);
|
||||
|
||||
OSHMEM_DECLSPEC void shmem_short_or_to_all(short *target, short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_int_or_to_all(int *target, int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_long_or_to_all(long *target, long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_longlong_or_to_all(long long *target, long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync);
|
||||
|
||||
OSHMEM_DECLSPEC void shmem_short_xor_to_all(short *target, short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_int_xor_to_all(int *target, int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_long_xor_to_all(long *target, long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_longlong_xor_to_all(long long *target, long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync);
|
||||
|
||||
OSHMEM_DECLSPEC void shmem_short_max_to_all(short *target, short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_int_max_to_all(int *target, int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_long_max_to_all(long *target, long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_longlong_max_to_all(long long *target, long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_float_max_to_all(float *target, float *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_double_max_to_all(double *target, double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_longdouble_max_to_all(long double *target, long double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long double *pWrk, long *pSync);
|
||||
|
||||
OSHMEM_DECLSPEC void shmem_short_min_to_all(short *target, short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_int_min_to_all(int *target, int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_long_min_to_all(long *target, long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_longlong_min_to_all(long long *target, long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_float_min_to_all(float *target, float *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_double_min_to_all(double *target, double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_longdouble_min_to_all(long double *target, long double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long double *pWrk, long *pSync);
|
||||
|
||||
OSHMEM_DECLSPEC void shmem_short_sum_to_all(short *target, short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_int_sum_to_all(int *target, int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_long_sum_to_all(long *target, long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_longlong_sum_to_all(long long *target, long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_float_sum_to_all(float *target, float *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_double_sum_to_all(double *target, double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_longdouble_sum_to_all(long double *target, long double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long double *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_complexf_sum_to_all(OSHMEM_COMPLEX_TYPE(float) *target, OSHMEM_COMPLEX_TYPE(float) *source, int nreduce, int PE_start, int logPE_stride, int PE_size, OSHMEM_COMPLEX_TYPE(float) *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_complexd_sum_to_all(OSHMEM_COMPLEX_TYPE(double) *target, OSHMEM_COMPLEX_TYPE(double) *source, int nreduce, int PE_start, int logPE_stride, int PE_size, OSHMEM_COMPLEX_TYPE(double) *pWrk, long *pSync);
|
||||
|
||||
OSHMEM_DECLSPEC void shmem_short_prod_to_all(short *target, short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_int_prod_to_all(int *target, int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_long_prod_to_all(long *target, long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_longlong_prod_to_all(long long *target, long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_float_prod_to_all(float *target, float *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_double_prod_to_all(double *target, double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_longdouble_prod_to_all(long double *target, long double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long double *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_complexf_prod_to_all(OSHMEM_COMPLEX_TYPE(float) *target, OSHMEM_COMPLEX_TYPE(float) *source, int nreduce, int PE_start, int logPE_stride, int PE_size, OSHMEM_COMPLEX_TYPE(float) *pWrk, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_complexd_prod_to_all(OSHMEM_COMPLEX_TYPE(double) *target, OSHMEM_COMPLEX_TYPE(double) *source, int nreduce, int PE_start, int logPE_stride, int PE_size, OSHMEM_COMPLEX_TYPE(double) *pWrk, long *pSync);
|
||||
|
||||
/*
|
||||
* Platform specific cache management routines
|
||||
*/
|
||||
OSHMEM_DECLSPEC void shmem_udcflush(void);
|
||||
OSHMEM_DECLSPEC void shmem_udcflush_line(void* target);
|
||||
OSHMEM_DECLSPEC void shmem_set_cache_inv(void);
|
||||
OSHMEM_DECLSPEC void shmem_set_cache_line_inv(void* target);
|
||||
OSHMEM_DECLSPEC void shmem_clear_cache_inv(void);
|
||||
OSHMEM_DECLSPEC void shmem_clear_cache_line_inv(void* target);
|
||||
|
||||
/*
|
||||
* Legacy API
|
||||
*/
|
||||
OSHMEM_DECLSPEC int num_pes(void);
|
||||
OSHMEM_DECLSPEC int my_pe(void);
|
||||
|
||||
/* old init/destruct functions - not in the open shmem spec but still supported */
|
||||
OSHMEM_DECLSPEC void shmem_init(void);
|
||||
OSHMEM_DECLSPEC int shmem_finalize(void) OSHMEM_DESTRUCTOR;
|
||||
OSHMEM_DECLSPEC int shmem_n_pes(void);
|
||||
OSHMEM_DECLSPEC int shmem_my_pe(void);
|
||||
|
||||
OSHMEM_DECLSPEC void shmem_put(void *target, const void *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_get(void *target, const void *source, size_t len, int pe);
|
||||
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* OSHMEM_SHMEM_H */
|
401
oshmem/include/shmem_portable_platform.h.in
Обычный файл
401
oshmem/include/shmem_portable_platform.h.in
Обычный файл
@ -0,0 +1,401 @@
|
||||
/*
|
||||
* Header file with preprocessor magic to figure out, which compiler the user has been calling!
|
||||
*
|
||||
* This code is adapted from the file other/portable_platform.h of GASnet-1.12.0:
|
||||
* - Ripping out the required parts.
|
||||
* - Get rid of brackets as it messes up autoconf
|
||||
* - Delete version tests for older PGI versions (#include "omp.h" not acceptabe)
|
||||
* - Indent ('#' should be in column 0)
|
||||
*
|
||||
* External packages (vt, romio) depend on top_build_dir/ompi/include, therefore
|
||||
* although this is not changed in the configure process, this has to be set as
|
||||
* a .in file...
|
||||
* ---------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef MPI_PORTABLE_PLATFORM_H
|
||||
#define MPI_PORTABLE_PLATFORM_H
|
||||
|
||||
/* All files in this directory and all sub-directories (except where otherwise noted)
|
||||
* are subject to the following licensing terms:
|
||||
*
|
||||
* ---------------------------------------------------------------------------
|
||||
* "Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* " All rights reserved.
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software and its
|
||||
* documentation for any purpose, without fee, and without written agreement is
|
||||
* hereby granted, provided that the above copyright notice and the following
|
||||
* two paragraphs appear in all copies of this software.
|
||||
*
|
||||
* IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
|
||||
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
|
||||
* OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
|
||||
* CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
|
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
* AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
|
||||
* ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
|
||||
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS."
|
||||
* ---------------------------------------------------------------------------
|
||||
*
|
||||
* Please see the license.txt files within the gm-conduit, lapi-conduit and
|
||||
* vapi-conduit directories for the licensing terms governing those
|
||||
* contributed components.
|
||||
*
|
||||
* The authors/contributors of GASNet include:
|
||||
*
|
||||
* Dan Bonachea <bonachea@cs.berkeley.edu>:
|
||||
* General infrastructure & documentation
|
||||
* mpi-conduit
|
||||
* elan-conduit
|
||||
* smp-conduit
|
||||
* udp-conduit
|
||||
* extended-ref
|
||||
* template-conduit
|
||||
* Christian Bell <csbell@cs.berkeley.edu>: gm-conduit, shmem-conduit
|
||||
* Mike Welcome <mlwelcome@lbl.gov>: lapi-conduit, portals-conduit
|
||||
* Paul H. Hargrove <phhargrove@lbl.gov>: vapi-conduit, ibv-conduit
|
||||
* Rajesh Nishtala <rajeshn@cs.berkeley.edu>: collectives, dcmf-conduit
|
||||
* Parry Husbands (PJRHusbands@lbl.gov): lapi-conduit
|
||||
*
|
||||
* For more information about GASNet, visit our home page at:
|
||||
* http://gasnet.cs.berkeley.edu/
|
||||
* Or send email to:
|
||||
* <upc@lbl.gov>
|
||||
*
|
||||
* Source code contributions (fixes, patches, extensions etc.) should be
|
||||
* sent to <upc@lbl.gov> to be reviewed for acceptance into the primary
|
||||
* distribution. Contributions are most likely to be accepted if they
|
||||
* are provided as public domain, or under a BSD-style license such as
|
||||
* the one above.
|
||||
*
|
||||
*/
|
||||
#ifndef _STRINGIFY
|
||||
#define _STRINGIFY_HELPER(x) #x
|
||||
#define _STRINGIFY(x) _STRINGIFY_HELPER(x)
|
||||
#endif
|
||||
|
||||
#if defined(__INTEL_COMPILER)
|
||||
# define PLATFORM_COMPILER_FAMILYNAME INTEL
|
||||
# define PLATFORM_COMPILER_FAMILYID 2
|
||||
# ifdef __cplusplus
|
||||
# define PLATFORM_COMPILER_INTEL_CXX 1
|
||||
# else
|
||||
# define PLATFORM_COMPILER_INTEL_C 1
|
||||
# endif
|
||||
# define _PLATFORM_COMPILER_INTEL_MIN_BUILDDATE 19700000 /* year 1970: predates most intel products :) */
|
||||
# ifdef __INTEL_COMPILER_BUILD_DATE
|
||||
# define _PLATFORM_INTEL_COMPILER_BUILD_DATE __INTEL_COMPILER_BUILD_DATE
|
||||
# else
|
||||
# define _PLATFORM_INTEL_COMPILER_BUILD_DATE _PLATFORM_COMPILER_INTEL_MIN_BUILDDATE
|
||||
# endif
|
||||
/* patch number is a decimal build date: YYYYMMDD */
|
||||
# define PLATFORM_COMPILER_VERSION_INT(maj,min,pat) \
|
||||
(((((maj) * 10) | (min)) << 20) | \
|
||||
((pat) < _PLATFORM_COMPILER_INTEL_MIN_BUILDDATE ? \
|
||||
_PLATFORM_COMPILER_INTEL_MIN_BUILDDATE : ((pat)-_PLATFORM_COMPILER_INTEL_MIN_BUILDDATE)))
|
||||
# define PLATFORM_COMPILER_VERSION \
|
||||
PLATFORM_COMPILER_VERSION_INT(__INTEL_COMPILER/10, __INTEL_COMPILER/100, _PLATFORM_INTEL_COMPILER_BUILD_DATE)
|
||||
# define PLATFORM_COMPILER_VERSION_STR \
|
||||
_STRINGIFY(__INTEL_COMPILER)"."_STRINGIFY(_PLATFORM_INTEL_COMPILER_BUILD_DATE)
|
||||
|
||||
#elif defined(__PATHSCALE__)
|
||||
# define PLATFORM_COMPILER_PATHSCALE 1
|
||||
# define PLATFORM_COMPILER_FAMILYNAME PATHSCALE
|
||||
# define PLATFORM_COMPILER_FAMILYID 3
|
||||
# ifdef __cplusplus
|
||||
# define PLATFORM_COMPILER_PATHSCALE_CXX 1
|
||||
# else
|
||||
# define PLATFORM_COMPILER_PATHSCALE_C 1
|
||||
# endif
|
||||
# define PLATFORM_COMPILER_VERSION \
|
||||
PLATFORM_COMPILER_VERSION_INT(__PATHCC__,__PATHCC_MINOR__,__PATHCC_PATCHLEVEL__)
|
||||
# define PLATFORM_COMPILER_VERSION_STR __PATHSCALE__
|
||||
|
||||
#elif defined(__PGI)
|
||||
# define PLATFORM_COMPILER_PGI 1
|
||||
# define PLATFORM_COMPILER_FAMILYNAME PGI
|
||||
# define PLATFORM_COMPILER_FAMILYID 4
|
||||
# ifdef __cplusplus
|
||||
# define PLATFORM_COMPILER_PGI_CXX 1
|
||||
# else
|
||||
# define PLATFORM_COMPILER_PGI_C 1
|
||||
# endif
|
||||
# if __PGIC__ == 99
|
||||
/* bug 2230: PGI versioning was broken for some platforms in 7.0
|
||||
no way to know exact version, but provide something slightly more accurate */
|
||||
# define PLATFORM_COMPILER_VERSION 0x070000
|
||||
# define PLATFORM_COMPILER_VERSION_STR "7.?-?"
|
||||
# elif defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__)
|
||||
# define PLATFORM_COMPILER_VERSION \
|
||||
PLATFORM_COMPILER_VERSION_INT(__PGIC__,__PGIC_MINOR__,__PGIC_PATCHLEVEL__)
|
||||
# define PLATFORM_COMPILER_VERSION_STR \
|
||||
_STRINGIFY(__PGIC__)"."_STRINGIFY(__PGIC_MINOR__)"-"_STRINGIFY(__PGIC_PATCHLEVEL__)
|
||||
# else
|
||||
/* PGI before 6.1-4 lacks any version ID preprocessor macros - so use this filthy hack */
|
||||
/* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
||||
* We cannot do these within mpi.h.in, as we should not include ompi.h
|
||||
* Hopefully, compilers with integrated preprocessors will not analyse code within the #if 0-block
|
||||
* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
||||
*/
|
||||
#if 0
|
||||
# ifdef PLATFORM_PGI_IS_ANCIENT
|
||||
/* Include below might fail for ancient versions lacking this header, but testing shows it
|
||||
works back to at least 5.1-3 (Nov 2003), and based on docs probably back to 3.2 (Sep 2000) */
|
||||
# define PLATFORM_COMPILER_VERSION 0
|
||||
# elif defined(__x86_64__) /* bug 1753 - 64-bit omp.h upgrade happenned in <6.0-8,6.1-1) */
|
||||
# include "omp.h"
|
||||
# if defined(_PGOMP_H)
|
||||
/* 6.1.1 or newer */
|
||||
# define PLATFORM_COMPILER_VERSION 0x060101
|
||||
# define PLATFORM_COMPILER_VERSION_STR ">=6.1-1"
|
||||
# else
|
||||
/* 6.0.8 or older */
|
||||
# define PLATFORM_COMPILER_VERSION 0
|
||||
# define PLATFORM_COMPILER_VERSION_STR "<=6.0-8"
|
||||
# endif
|
||||
# else /* 32-bit omp.h upgrade happenned in <5.2-4,6.0-8 */
|
||||
# include "omp.h"
|
||||
# if defined(_PGOMP_H)
|
||||
/* 6.0-8 or newer */
|
||||
# define PLATFORM_COMPILER_VERSION 0x060008
|
||||
# define PLATFORM_COMPILER_VERSION_STR ">=6.0-8"
|
||||
# else
|
||||
/* 5.2-4 or older */
|
||||
# define PLATFORM_COMPILER_VERSION 0
|
||||
# define PLATFORM_COMPILER_VERSION_STR "<=5.2-4"
|
||||
# endif
|
||||
# endif
|
||||
#endif /* 0 */
|
||||
/* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */
|
||||
# endif
|
||||
|
||||
#elif defined(__xlC__)
|
||||
# define PLATFORM_COMPILER_XLC 1
|
||||
# define PLATFORM_COMPILER_FAMILYNAME XLC
|
||||
# define PLATFORM_COMPILER_FAMILYID 5
|
||||
# ifdef __cplusplus
|
||||
# define PLATFORM_COMPILER_XLC_CXX 1
|
||||
# else
|
||||
# define PLATFORM_COMPILER_XLC_C 1
|
||||
# endif
|
||||
# define PLATFORM_COMPILER_VERSION __xlC__
|
||||
# define PLATFORM_COMPILER_VERSION_INT(maj,min,pat) \
|
||||
( ((maj) << 8) | ((min) << 4) | (pat) )
|
||||
|
||||
#elif defined(__DECC) || defined(__DECCXX)
|
||||
# define PLATFORM_COMPILER_COMPAQ 1
|
||||
# define PLATFORM_COMPILER_FAMILYNAME COMPAQ
|
||||
# define PLATFORM_COMPILER_FAMILYID 6
|
||||
# ifdef __cplusplus
|
||||
# define PLATFORM_COMPILER_COMPAQ_CXX 1
|
||||
# else
|
||||
# define PLATFORM_COMPILER_COMPAQ_C 1
|
||||
# endif
|
||||
# if defined(__DECC_VER)
|
||||
# define PLATFORM_COMPILER_VERSION __DECC_VER
|
||||
# elif defined(__DECCXX_VER)
|
||||
# define PLATFORM_COMPILER_VERSION __DECCXX_VER
|
||||
# endif
|
||||
|
||||
# define PLATFORM_COMPILER_VERSION_INT(maj,min,pat) \
|
||||
( ((maj) * 10000000) + ((min) * 100000) + (90000) + (pat) )
|
||||
/* 90000 = official ver, 80000 = customer special ver, 60000 = field test ver */
|
||||
|
||||
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
|
||||
# define PLATFORM_COMPILER_SUN 1
|
||||
# define PLATFORM_COMPILER_FAMILYNAME SUN
|
||||
# define PLATFORM_COMPILER_FAMILYID 7
|
||||
# ifdef __cplusplus
|
||||
# define PLATFORM_COMPILER_SUN_CXX 1
|
||||
# else
|
||||
# define PLATFORM_COMPILER_SUN_C 1
|
||||
# endif
|
||||
# if defined(__SUNPRO_C) && __SUNPRO_C > 0
|
||||
# define PLATFORM_COMPILER_VERSION __SUNPRO_C
|
||||
# elif defined(__SUNPRO_CC) && __SUNPRO_CC > 0
|
||||
# define PLATFORM_COMPILER_VERSION __SUNPRO_CC
|
||||
# endif
|
||||
# define PLATFORM_COMPILER_VERSION_INT(maj,min,pat) \
|
||||
( ((maj) << 8) | ((min) << 4) | (pat) )
|
||||
|
||||
#elif defined(__HP_cc) || defined(__HP_aCC)
|
||||
# define PLATFORM_COMPILER_HP 1
|
||||
# define PLATFORM_COMPILER_FAMILYNAME HP
|
||||
# define PLATFORM_COMPILER_FAMILYID 8
|
||||
# ifdef __cplusplus
|
||||
# define PLATFORM_COMPILER_HP_CXX 1
|
||||
# else
|
||||
# define PLATFORM_COMPILER_HP_C 1
|
||||
# endif
|
||||
# if defined(__HP_cc) && __HP_cc > 0
|
||||
# define PLATFORM_COMPILER_VERSION __HP_cc
|
||||
# elif defined(__HP_aCC) && __HP_aCC > 0
|
||||
# define PLATFORM_COMPILER_VERSION __HP_aCC
|
||||
# endif
|
||||
# define PLATFORM_COMPILER_VERSION_INT(maj,min,pat) \
|
||||
( ((maj) << 16) | ((min) << 8) | (pat) )
|
||||
|
||||
#elif defined(_SGI_COMPILER_VERSION) || \
|
||||
(defined(_COMPILER_VERSION) && defined(__sgi) && !defined(__GNUC__)) /* 7.3.0 and earlier lack _SGI_COMPILER_VERSION */
|
||||
# define PLATFORM_COMPILER_SGI 1
|
||||
# define PLATFORM_COMPILER_FAMILYNAME SGI
|
||||
# define PLATFORM_COMPILER_FAMILYID 9
|
||||
# ifdef __cplusplus
|
||||
# define PLATFORM_COMPILER_SGI_CXX 1
|
||||
# else
|
||||
# define PLATFORM_COMPILER_SGI_C 1
|
||||
# endif
|
||||
# if defined(_SGI_COMPILER_VERSION) && _SGI_COMPILER_VERSION > 0
|
||||
# define PLATFORM_COMPILER_VERSION _SGI_COMPILER_VERSION
|
||||
# elif defined(_COMPILER_VERSION) && _COMPILER_VERSION > 0
|
||||
# define PLATFORM_COMPILER_VERSION _COMPILER_VERSION
|
||||
# endif
|
||||
# define PLATFORM_COMPILER_VERSION_INT(maj,min,pat) \
|
||||
( ((maj) << 8) | ((min) << 4) | (pat) )
|
||||
|
||||
#elif defined(_CRAYC)
|
||||
# define PLATFORM_COMPILER_CRAY 1
|
||||
# define PLATFORM_COMPILER_FAMILYNAME CRAY
|
||||
# define PLATFORM_COMPILER_FAMILYID 10
|
||||
# ifdef __cplusplus
|
||||
# define PLATFORM_COMPILER_CRAY_CXX 1
|
||||
# else
|
||||
# define PLATFORM_COMPILER_CRAY_C 1
|
||||
# endif
|
||||
# if defined(_RELEASE) && defined(_RELEASE_MINOR) /* X1 */
|
||||
# define PLATFORM_COMPILER_VERSION \
|
||||
PLATFORM_COMPILER_VERSION_INT(_RELEASE,_RELEASE_MINOR,0)
|
||||
# elif defined(_RELEASE) /* T3E */
|
||||
# define PLATFORM_COMPILER_VERSION \
|
||||
PLATFORM_COMPILER_VERSION_INT(_RELEASE,0,0)
|
||||
# endif
|
||||
# ifdef _RELEASE_STRING /* X1 */
|
||||
# define PLATFORM_COMPILER_VERSION_STR _RELEASE_STRING
|
||||
# endif
|
||||
|
||||
#elif defined(__KCC)
|
||||
# define PLATFORM_COMPILER_KAI 1
|
||||
# define PLATFORM_COMPILER_FAMILYNAME KAI
|
||||
# define PLATFORM_COMPILER_FAMILYID 11
|
||||
# ifdef __cplusplus
|
||||
# define PLATFORM_COMPILER_KAI_CXX 1
|
||||
# else
|
||||
# define PLATFORM_COMPILER_KAI_C 1
|
||||
# endif
|
||||
|
||||
#elif defined(__MTA__)
|
||||
# define PLATFORM_COMPILER_MTA 1
|
||||
# define PLATFORM_COMPILER_FAMILYNAME MTA
|
||||
# define PLATFORM_COMPILER_FAMILYID 12
|
||||
# ifdef __cplusplus
|
||||
# define PLATFORM_COMPILER_MTA_CXX 1
|
||||
# else
|
||||
# define PLATFORM_COMPILER_MTA_C 1
|
||||
# endif
|
||||
|
||||
#elif defined(_SX)
|
||||
# define PLATFORM_COMPILER_NECSX 1
|
||||
# define PLATFORM_COMPILER_FAMILYNAME NECSX
|
||||
# define PLATFORM_COMPILER_FAMILYID 13
|
||||
# ifdef __cplusplus
|
||||
# define PLATFORM_COMPILER_NECSX_CXX 1
|
||||
# else
|
||||
# define PLATFORM_COMPILER_NECSX_C 1
|
||||
# endif
|
||||
|
||||
#elif defined(_MSC_VER)
|
||||
# define PLATFORM_COMPILER_MICROSOFT 1
|
||||
# define PLATFORM_COMPILER_FAMILYNAME MICROSOFT
|
||||
# define PLATFORM_COMPILER_FAMILYID 14
|
||||
# ifdef __cplusplus
|
||||
# define PLATFORM_COMPILER_MICROSOFT_CXX 1
|
||||
# else
|
||||
# define PLATFORM_COMPILER_MICROSOFT_C 1
|
||||
# endif
|
||||
# define PLATFORM_COMPILER_VERSION _MSC_VER
|
||||
|
||||
#elif defined(__TINYC__)
|
||||
# define PLATFORM_COMPILER_TINY 1
|
||||
# define PLATFORM_COMPILER_FAMILYNAME TINY
|
||||
# define PLATFORM_COMPILER_FAMILYID 15
|
||||
# ifdef __cplusplus
|
||||
# define PLATFORM_COMPILER_TINY_CXX 1
|
||||
# else
|
||||
# define PLATFORM_COMPILER_TINY_C 1
|
||||
# endif
|
||||
|
||||
#elif defined(__LCC__)
|
||||
# define PLATFORM_COMPILER_LCC 1
|
||||
# define PLATFORM_COMPILER_FAMILYNAME LCC
|
||||
# define PLATFORM_COMPILER_FAMILYID 16
|
||||
# ifdef __cplusplus
|
||||
# define PLATFORM_COMPILER_LCC_CXX 1
|
||||
# else
|
||||
# define PLATFORM_COMPILER_LCC_C 1
|
||||
# endif
|
||||
|
||||
#else /* unknown compiler */
|
||||
# define PLATFORM_COMPILER_UNKNOWN 1
|
||||
#endif
|
||||
|
||||
/* this stanza comes last, because many vendor compilers lie and claim
|
||||
to be GNU C for compatibility reasons and/or because they share a frontend */
|
||||
#if defined(__GNUC__)
|
||||
# undef PLATFORM_COMPILER_UNKNOWN
|
||||
# ifndef PLATFORM_COMPILER_FAMILYID
|
||||
# define PLATFORM_COMPILER_GNU 1
|
||||
# define PLATFORM_COMPILER_FAMILYNAME GNU
|
||||
# define PLATFORM_COMPILER_FAMILYID 1
|
||||
# ifdef __cplusplus
|
||||
# define PLATFORM_COMPILER_GNU_CXX 1
|
||||
# else
|
||||
# define PLATFORM_COMPILER_GNU_C 1
|
||||
# endif
|
||||
# if defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__)
|
||||
# define PLATFORM_COMPILER_VERSION \
|
||||
PLATFORM_COMPILER_VERSION_INT(__GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__)
|
||||
# elif defined(__GNUC_MINOR__) /* older versions of egcs lack __GNUC_PATCHLEVEL__ */
|
||||
# define PLATFORM_COMPILER_VERSION \
|
||||
PLATFORM_COMPILER_VERSION_INT(__GNUC__,__GNUC_MINOR__,0)
|
||||
# else
|
||||
# define PLATFORM_COMPILER_VERSION \
|
||||
PLATFORM_COMPILER_VERSION_INT(__GNUC__,0,0)
|
||||
# endif
|
||||
# define PLATFORM_COMPILER_VERSION_STR __PLATFORM_COMPILER_GNU_VERSION_STR
|
||||
# else
|
||||
# define _PLATFORM_COMPILER_GNU_VERSION_STR __PLATFORM_COMPILER_GNU_VERSION_STR
|
||||
# endif
|
||||
/* gather any advertised GNU version number info, even for non-gcc compilers */
|
||||
# if defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__)
|
||||
# define __PLATFORM_COMPILER_GNU_VERSION_STR \
|
||||
_STRINGIFY(__GNUC__)"."_STRINGIFY(__GNUC_MINOR__)"."_STRINGIFY(__GNUC_PATCHLEVEL__)
|
||||
# elif defined(__GNUC_MINOR__)
|
||||
# define __PLATFORM_COMPILER_GNU_VERSION_STR \
|
||||
_STRINGIFY(__GNUC__)"."_STRINGIFY(__GNUC_MINOR__)".?"
|
||||
# else
|
||||
# define __PLATFORM_COMPILER_GNU_VERSION_STR \
|
||||
_STRINGIFY(__GNUC__)".?.?"
|
||||
# endif
|
||||
#elif defined(PLATFORM_COMPILER_UNKNOWN) /* unknown compiler */
|
||||
# define PLATFORM_COMPILER_FAMILYNAME UNKNOWN
|
||||
# define PLATFORM_COMPILER_FAMILYID 0
|
||||
#endif
|
||||
|
||||
/* Default Values */
|
||||
#ifndef PLATFORM_COMPILER_VERSION
|
||||
# define PLATFORM_COMPILER_VERSION 0 /* don't know */
|
||||
#endif
|
||||
|
||||
#ifndef PLATFORM_COMPILER_VERSION_STR
|
||||
# define PLATFORM_COMPILER_VERSION_STR _STRINGIFY(PLATFORM_COMPILER_VERSION)
|
||||
#endif
|
||||
|
||||
#ifndef PLATFORM_COMPILER_VERSION_INT
|
||||
# define PLATFORM_COMPILER_VERSION_INT(maj,min,pat) \
|
||||
(((maj) << 16) | ((min) << 8) | (pat))
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* MPI_PORTABLE_PLATFORM_H */
|
35
oshmem/mca/atomic/Makefile.am
Обычный файл
35
oshmem/mca/atomic/Makefile.am
Обычный файл
@ -0,0 +1,35 @@
|
||||
#
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# main library setup
|
||||
noinst_LTLIBRARIES = libmca_atomic.la
|
||||
libmca_atomic_la_SOURCES =
|
||||
|
||||
# header setup
|
||||
nobase_oshmem_HEADERS =
|
||||
nobase_nodist_oshmem_HEADERS =
|
||||
|
||||
# local files
|
||||
headers = atomic.h
|
||||
libmca_atomic_la_SOURCES += $(headers) $(nodist_headers)
|
||||
|
||||
# Conditionally install the header files
|
||||
if WANT_INSTALL_HEADERS
|
||||
nobase_oshmem_HEADERS += $(headers)
|
||||
nobase_nodist_oshmem_HEADERS += $(nodist_headers)
|
||||
oshmemdir = $(includedir)/oshmem/oshmem/mca/atomic
|
||||
else
|
||||
oshmemdir = $(includedir)
|
||||
endif
|
||||
|
||||
include base/Makefile.am
|
||||
|
||||
distclean-local:
|
||||
rm -f base/static-components.h
|
121
oshmem/mca/atomic/atomic.h
Обычный файл
121
oshmem/mca/atomic/atomic.h
Обычный файл
@ -0,0 +1,121 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
*
|
||||
* Atomic Operations Interface
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef OSHMEM_MCA_ATOMIC_H
|
||||
#define OSHMEM_MCA_ATOMIC_H
|
||||
|
||||
#include "oshmem_config.h"
|
||||
#include "oshmem/types.h"
|
||||
#include "oshmem/constants.h"
|
||||
|
||||
#include "opal/util/output.h"
|
||||
#include "mpi.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "oshmem/mca/atomic/base/base.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/* ******************************************************************** */
|
||||
|
||||
struct oshmem_op_t;
|
||||
|
||||
/* ******************************************************************** */
|
||||
|
||||
typedef int (*mca_atomic_base_component_init_fn_t)(bool enable_progress_threads,
|
||||
bool enable_threads);
|
||||
|
||||
typedef int (*mca_atomic_base_component_finalize_fn_t)(void);
|
||||
|
||||
typedef struct mca_atomic_base_module_1_0_0_t* (*mca_atomic_base_component_query_fn_t)(int *priority);
|
||||
|
||||
/* ******************************************************************** */
|
||||
|
||||
/**
|
||||
* Atomic component interface
|
||||
*
|
||||
* Component interface for the atomic framework. A public
|
||||
* instance of this structure, called
|
||||
* mca_atomic_[component_name]_component, must exist in any atomic
|
||||
* component.
|
||||
*/
|
||||
struct mca_atomic_base_component_1_0_0_t {
|
||||
/** Base component description */
|
||||
mca_base_component_t atomic_version;
|
||||
/** Base component data block */
|
||||
mca_base_component_data_t atomic_data;
|
||||
|
||||
/** Component initialization function */
|
||||
mca_atomic_base_component_init_fn_t atomic_init;
|
||||
mca_atomic_base_component_finalize_fn_t atomic_finalize;
|
||||
mca_atomic_base_component_query_fn_t atomic_query;
|
||||
};
|
||||
typedef struct mca_atomic_base_component_1_0_0_t mca_atomic_base_component_1_0_0_t;
|
||||
|
||||
/** Per guidence in mca.h, use the unversioned struct name if you just
|
||||
want to always keep up with the most recent version of the
|
||||
interace. */
|
||||
typedef struct mca_atomic_base_component_1_0_0_t mca_atomic_base_component_t;
|
||||
|
||||
/**
|
||||
* Atomic module interface
|
||||
*
|
||||
*/
|
||||
struct mca_atomic_base_module_1_0_0_t {
|
||||
/** Collective modules all inherit from opal_object */
|
||||
opal_object_t super;
|
||||
|
||||
/* Collective function pointers */
|
||||
int (*atomic_fadd)(void *target,
|
||||
void *prev,
|
||||
const void *value,
|
||||
size_t nlong,
|
||||
int pe,
|
||||
struct oshmem_op_t *op);
|
||||
int (*atomic_cswap)(void *target,
|
||||
void *prev,
|
||||
const void *cond,
|
||||
const void *value,
|
||||
size_t nlong,
|
||||
int pe);
|
||||
};
|
||||
typedef struct mca_atomic_base_module_1_0_0_t mca_atomic_base_module_1_0_0_t;
|
||||
|
||||
/** Per guidence in mca.h, use the unversioned struct name if you just
|
||||
want to always keep up with the most recent version of the
|
||||
interace. */
|
||||
typedef struct mca_atomic_base_module_1_0_0_t mca_atomic_base_module_t;
|
||||
OSHMEM_DECLSPEC OBJ_CLASS_DECLARATION(mca_atomic_base_module_t);
|
||||
|
||||
/* ******************************************************************** */
|
||||
|
||||
/*
|
||||
* Macro for use in components
|
||||
*/
|
||||
#define MCA_ATOMIC_BASE_VERSION_2_0_0 \
|
||||
MCA_BASE_VERSION_2_0_0, \
|
||||
"atomic", 1, 0, 0
|
||||
|
||||
/* ******************************************************************** */
|
||||
|
||||
OSHMEM_DECLSPEC extern mca_atomic_base_component_t mca_atomic_base_selected_component;
|
||||
OSHMEM_DECLSPEC extern mca_atomic_base_module_t mca_atomic;
|
||||
#define MCA_ATOMIC_CALL(a) mca_atomic.atomic_ ## a
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* OSHMEM_MCA_ATOMIC_H */
|
19
oshmem/mca/atomic/base/Makefile.am
Обычный файл
19
oshmem/mca/atomic/base/Makefile.am
Обычный файл
@ -0,0 +1,19 @@
|
||||
#
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
AM_CFLAGS = $(OSHMEM_CFLAGS)
|
||||
|
||||
headers += \
|
||||
base/base.h
|
||||
|
||||
libmca_atomic_la_SOURCES += \
|
||||
base/atomic_base_frame.c \
|
||||
base/atomic_base_available.c \
|
||||
base/atomic_base_select.c
|
139
oshmem/mca/atomic/base/atomic_base_available.c
Обычный файл
139
oshmem/mca/atomic/base/atomic_base_available.c
Обычный файл
@ -0,0 +1,139 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "oshmem_config.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_component_repository.h"
|
||||
|
||||
#include "oshmem/constants.h"
|
||||
#include "oshmem/mca/atomic/atomic.h"
|
||||
#include "oshmem/mca/atomic/base/base.h"
|
||||
|
||||
/*
|
||||
* Private functions
|
||||
*/
|
||||
static int init_query(const mca_base_component_t * ls,
|
||||
bool enable_progress_threads,
|
||||
bool enable_threads);
|
||||
|
||||
/*
|
||||
* Scan down the list of successfully opened components and query each of
|
||||
* them (the opened list will be one or more components. If the user
|
||||
* requested a specific component, it will be the only component in the
|
||||
* opened list). Create and populate the available list of all
|
||||
* components who indicate that they want to be considered for selection.
|
||||
* Close all components who do not want to be considered for selection,
|
||||
* and destroy the opened list.
|
||||
*
|
||||
* Also find the basic component while we're doing all of this, and save
|
||||
* it in a global variable so that we can find it easily later (e.g.,
|
||||
* during scope selection).
|
||||
*/
|
||||
int mca_atomic_base_find_available(bool enable_progress_threads,
|
||||
bool enable_threads)
|
||||
{
|
||||
mca_base_component_list_item_t *cli, *next;
|
||||
const mca_base_component_t *component;
|
||||
|
||||
OPAL_LIST_FOREACH_SAFE(cli, next, &oshmem_atomic_base_framework.framework_components, mca_base_component_list_item_t) {
|
||||
component = cli->cli_component;
|
||||
|
||||
/* Call a subroutine to do the work, because the component may
|
||||
represent different versions of the coll MCA. */
|
||||
|
||||
if (OSHMEM_SUCCESS != init_query(component, enable_progress_threads,
|
||||
enable_threads)) {
|
||||
/* If the component doesn't want to run, then close it.
|
||||
Now close it out and release it from the DSO repository (if it's there). */
|
||||
opal_list_remove_item(&oshmem_atomic_base_framework.framework_components, &cli->super);
|
||||
mca_base_component_close(component, oshmem_atomic_base_framework.framework_output);
|
||||
OBJ_RELEASE(cli);
|
||||
}
|
||||
}
|
||||
|
||||
/* If we have no collective components available, it's an error.
|
||||
Thanks for playing! */
|
||||
|
||||
if (opal_list_get_size(&oshmem_atomic_base_framework.framework_components) == 0) {
|
||||
ATOMIC_VERBOSE(10,
|
||||
"atomic:find_available: no components available!");
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
/* All done */
|
||||
|
||||
return mca_atomic_base_select();
|
||||
}
|
||||
|
||||
/*
|
||||
* Query a component, see if it wants to run at all. If it does, save
|
||||
* some information. If it doesn't, close it.
|
||||
*/
|
||||
static int init_query(const mca_base_component_t * component,
|
||||
bool enable_progress_threads,
|
||||
bool enable_threads)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ATOMIC_VERBOSE(10,
|
||||
"atomic:find_available: querying atomic component %s",
|
||||
component->mca_component_name);
|
||||
|
||||
/* This component has already been successfully opened. So now
|
||||
query it. */
|
||||
|
||||
if (1 == component->mca_type_major_version
|
||||
&& 0 == component->mca_type_minor_version
|
||||
&& 0 == component->mca_type_release_version) {
|
||||
|
||||
mca_atomic_base_component_t *atomic =
|
||||
(mca_atomic_base_component_t *) component;
|
||||
|
||||
ret = atomic->atomic_init(enable_progress_threads, enable_threads);
|
||||
} else {
|
||||
/* Unrecognized coll API version */
|
||||
|
||||
ATOMIC_VERBOSE(10,
|
||||
"atomic:find_available: unrecognized atomic API version (%d.%d.%d, ignored)",
|
||||
component->mca_type_major_version,
|
||||
component->mca_type_minor_version,
|
||||
component->mca_type_release_version);
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
/* Query done -- look at the return value to see what happened */
|
||||
|
||||
if (OSHMEM_SUCCESS != ret) {
|
||||
ATOMIC_VERBOSE(10,
|
||||
"atomic:find_available: atomic component %s is not available",
|
||||
component->mca_component_name);
|
||||
if (NULL != component->mca_close_component) {
|
||||
component->mca_close_component();
|
||||
}
|
||||
} else {
|
||||
ATOMIC_VERBOSE(10,
|
||||
"atomic:find_available: atomic component %s is available",
|
||||
component->mca_component_name);
|
||||
}
|
||||
|
||||
/* All done */
|
||||
|
||||
return ret;
|
||||
}
|
91
oshmem/mca/atomic/base/atomic_base_frame.c
Обычный файл
91
oshmem/mca/atomic/base/atomic_base_frame.c
Обычный файл
@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "oshmem_config.h"
|
||||
|
||||
#include "oshmem/constants.h"
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
|
||||
#include "oshmem/mca/atomic/atomic.h"
|
||||
#include "oshmem/mca/atomic/base/base.h"
|
||||
|
||||
/*
|
||||
* The following file was created by configure. It contains extern
|
||||
* statements and the definition of an array of pointers to each
|
||||
* component's public mca_base_component_t struct.
|
||||
*/
|
||||
|
||||
#include "oshmem/mca/atomic/base/static-components.h"
|
||||
|
||||
/*
|
||||
* Global variables; most of which are loaded by back-ends of MCA
|
||||
* variables
|
||||
*/
|
||||
|
||||
/*
|
||||
* Ensure all function pointers are NULL'ed out to start with
|
||||
*/
|
||||
static void atomic_base_module_construct(mca_atomic_base_module_t *m)
|
||||
{
|
||||
/* Atomic function pointers */
|
||||
m->atomic_fadd = NULL;
|
||||
m->atomic_cswap = NULL;
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_atomic_base_module_t, opal_object_t,
|
||||
atomic_base_module_construct, NULL);
|
||||
|
||||
static int mca_atomic_base_register(mca_base_register_flag_t flags)
|
||||
{
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static int mca_atomic_base_close(void)
|
||||
{
|
||||
mca_base_component_list_item_t *cli, *next;
|
||||
const mca_base_component_t *component;
|
||||
|
||||
OPAL_LIST_FOREACH_SAFE(cli, next, &oshmem_atomic_base_framework.framework_components, mca_base_component_list_item_t) {
|
||||
component = cli->cli_component;
|
||||
mca_atomic_base_component_t *atomic =
|
||||
(mca_atomic_base_component_t *) component;
|
||||
|
||||
if (NULL != atomic->atomic_finalize) {
|
||||
atomic->atomic_finalize();
|
||||
}
|
||||
}
|
||||
|
||||
/* Close all remaining available components */
|
||||
return mca_base_framework_components_close(&oshmem_atomic_base_framework, NULL);
|
||||
}
|
||||
|
||||
static int mca_atomic_base_open(mca_base_open_flag_t flags)
|
||||
{
|
||||
/* Open up all available components */
|
||||
if (OPAL_SUCCESS !=
|
||||
mca_base_framework_components_open(&oshmem_atomic_base_framework, flags)) {
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
MCA_BASE_FRAMEWORK_DECLARE(oshmem, atomic,
|
||||
"OSHMEM ATOMIC",
|
||||
mca_atomic_base_register,
|
||||
mca_atomic_base_open,
|
||||
mca_atomic_base_close,
|
||||
mca_atomic_base_static_components,
|
||||
0);
|
242
oshmem/mca/atomic/base/atomic_base_select.c
Обычный файл
242
oshmem/mca/atomic/base/atomic_base_select.c
Обычный файл
@ -0,0 +1,242 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "oshmem_config.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "oshmem/constants.h"
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_component_repository.h"
|
||||
|
||||
#include "oshmem/mca/atomic/atomic.h"
|
||||
#include "oshmem/mca/atomic/base/base.h"
|
||||
|
||||
/*
|
||||
* Global variables; most of which are loaded by back-ends of MCA
|
||||
* variables
|
||||
*/
|
||||
mca_atomic_base_module_t mca_atomic;
|
||||
|
||||
/*
|
||||
* Local types
|
||||
*/
|
||||
struct avail_com_t {
|
||||
opal_list_item_t super;
|
||||
|
||||
int ac_priority;
|
||||
mca_atomic_base_module_t *ac_module;
|
||||
};
|
||||
typedef struct avail_com_t avail_com_t;
|
||||
|
||||
/*
|
||||
* Local functions
|
||||
*/
|
||||
static opal_list_t *check_components(opal_list_t * components);
|
||||
static int check_one_component(const mca_base_component_t * component,
|
||||
mca_atomic_base_module_1_0_0_t ** module);
|
||||
|
||||
static int query(const mca_base_component_t * component,
|
||||
int *priority,
|
||||
mca_atomic_base_module_1_0_0_t ** module);
|
||||
|
||||
static int query_1_0_0(const mca_atomic_base_component_1_0_0_t * atomic_component,
|
||||
int *priority,
|
||||
mca_atomic_base_module_1_0_0_t ** module);
|
||||
|
||||
/*
|
||||
* Stuff for the OBJ interface
|
||||
*/
|
||||
static OBJ_CLASS_INSTANCE(avail_com_t, opal_list_item_t, NULL, NULL);
|
||||
|
||||
/*
|
||||
* This function is called at the initialization.
|
||||
* It is used to select which atomic component will be
|
||||
* active for a given group.
|
||||
*/
|
||||
int mca_atomic_base_select(void)
|
||||
{
|
||||
opal_list_t *selectable;
|
||||
opal_list_item_t *item;
|
||||
|
||||
/* Announce */
|
||||
ATOMIC_VERBOSE(10,
|
||||
"atomic:base:atomic_select: Checking all available modules");
|
||||
selectable = check_components(&oshmem_atomic_base_framework.framework_components);
|
||||
|
||||
/* Upon return from the above, the modules list will contain the
|
||||
list of modules that returned (priority >= 0). If we have no
|
||||
atomic modules available, then print error and return. */
|
||||
if (NULL == selectable) {
|
||||
/* There's no modules available */
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
/* do the selection loop */
|
||||
for (item = opal_list_remove_first(selectable); NULL != item; item =
|
||||
opal_list_remove_first(selectable)) {
|
||||
avail_com_t *avail = (avail_com_t *) item;
|
||||
|
||||
/* Set module having the highest priority */
|
||||
memcpy(&mca_atomic, avail->ac_module, sizeof(mca_atomic));
|
||||
|
||||
OBJ_RELEASE(avail->ac_module);
|
||||
OBJ_RELEASE(avail);
|
||||
/* check correctness */
|
||||
if (!(mca_atomic.atomic_fadd) || !(mca_atomic.atomic_cswap)) {
|
||||
return OSHMEM_ERR_NOT_FOUND;
|
||||
}
|
||||
}
|
||||
|
||||
/* Done with the list from the check_components() call so release it. */
|
||||
OBJ_RELEASE(selectable);
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static int avail_com_compare (opal_list_item_t **a,
|
||||
opal_list_item_t **b)
|
||||
{
|
||||
avail_com_t *acom = (avail_com_t *) *a;
|
||||
avail_com_t *bcom = (avail_com_t *) *b;
|
||||
|
||||
if (acom->ac_priority > bcom->ac_priority) {
|
||||
return 1;
|
||||
} else if (acom->ac_priority < bcom->ac_priority) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* For each module in the list, check and see if it wants to run, and
|
||||
* do the resulting priority comparison. Make a list of modules to be
|
||||
* only those who returned that they want to run, and put them in
|
||||
* priority order.
|
||||
*/
|
||||
static opal_list_t *check_components(opal_list_t *components)
|
||||
{
|
||||
int priority;
|
||||
const mca_base_component_t *component;
|
||||
mca_base_component_list_item_t *cli;
|
||||
mca_atomic_base_module_1_0_0_t *module;
|
||||
opal_list_t *selectable;
|
||||
avail_com_t *avail;
|
||||
|
||||
/* Make a list of the components that query successfully */
|
||||
selectable = OBJ_NEW(opal_list_t);
|
||||
|
||||
/* Scan through the list of components */
|
||||
OPAL_LIST_FOREACH(cli, &oshmem_atomic_base_framework.framework_components, mca_base_component_list_item_t) {
|
||||
component = cli->cli_component;
|
||||
|
||||
priority = check_one_component(component, &module);
|
||||
if (priority >= 0) {
|
||||
/* We have a component that indicated that it wants to run
|
||||
by giving us a module */
|
||||
avail = OBJ_NEW(avail_com_t);
|
||||
avail->ac_priority = priority;
|
||||
avail->ac_module = module;
|
||||
|
||||
opal_list_append(selectable, &avail->super);
|
||||
}
|
||||
}
|
||||
|
||||
/* If we didn't find any available components, return an error */
|
||||
if (0 == opal_list_get_size(selectable)) {
|
||||
OBJ_RELEASE(selectable);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Put this list in priority order */
|
||||
opal_list_sort(selectable, avail_com_compare);
|
||||
|
||||
/* All done */
|
||||
return selectable;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check a single component
|
||||
*/
|
||||
static int check_one_component(const mca_base_component_t *component,
|
||||
mca_atomic_base_module_1_0_0_t **module)
|
||||
{
|
||||
int err;
|
||||
int priority = -1;
|
||||
|
||||
err = query(component, &priority, module);
|
||||
|
||||
if (OSHMEM_SUCCESS == err) {
|
||||
priority = (priority < 100) ? priority : 100;
|
||||
ATOMIC_VERBOSE(10,
|
||||
"atomic:base:atomic_select: component available: %s, priority: %d",
|
||||
component->mca_component_name, priority);
|
||||
|
||||
} else {
|
||||
priority = -1;
|
||||
ATOMIC_VERBOSE(10,
|
||||
"atomic:base:atomic_select: component not available: %s",
|
||||
component->mca_component_name);
|
||||
}
|
||||
|
||||
return priority;
|
||||
}
|
||||
|
||||
/**************************************************************************
|
||||
* Query functions
|
||||
**************************************************************************/
|
||||
|
||||
/*
|
||||
* Take any version of a atomic module, query it, and return the right
|
||||
* module struct
|
||||
*/
|
||||
static int query(const mca_base_component_t *component,
|
||||
int *priority,
|
||||
mca_atomic_base_module_1_0_0_t **module)
|
||||
{
|
||||
*module = NULL;
|
||||
if (1 == component->mca_type_major_version
|
||||
&& 0 == component->mca_type_minor_version
|
||||
&& 0 == component->mca_type_release_version) {
|
||||
const mca_atomic_base_component_1_0_0_t *atomic100 =
|
||||
(mca_atomic_base_component_1_0_0_t *) component;
|
||||
|
||||
return query_1_0_0(atomic100, priority, module);
|
||||
}
|
||||
|
||||
/* Unknown atomic API version -- return error */
|
||||
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
static int query_1_0_0(const mca_atomic_base_component_1_0_0_t *component,
|
||||
int *priority,
|
||||
mca_atomic_base_module_1_0_0_t **module)
|
||||
{
|
||||
mca_atomic_base_module_1_0_0_t *ret;
|
||||
|
||||
/* There's currently no need for conversion */
|
||||
|
||||
ret = component->atomic_query(priority);
|
||||
if (NULL != ret) {
|
||||
*module = ret;
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
return OSHMEM_ERROR;
|
||||
}
|
52
oshmem/mca/atomic/base/base.h
Обычный файл
52
oshmem/mca/atomic/base/base.h
Обычный файл
@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MCA_ATOMIC_BASE_H
|
||||
#define MCA_ATOMIC_BASE_H
|
||||
|
||||
#include "oshmem_config.h"
|
||||
|
||||
#include "oshmem/mca/atomic/atomic.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
|
||||
/*
|
||||
* Global functions for MCA overall atomic open and close
|
||||
*/
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
int mca_atomic_base_find_available(bool enable_progress_threads,
|
||||
bool enable_threads);
|
||||
|
||||
int mca_atomic_base_select(void);
|
||||
|
||||
/*
|
||||
* MCA framework
|
||||
*/
|
||||
OSHMEM_DECLSPEC extern mca_base_framework_t oshmem_atomic_base_framework;
|
||||
|
||||
/* ******************************************************************** */
|
||||
#ifdef __BASE_FILE__
|
||||
#define __ATOMIC_FILE__ __BASE_FILE__
|
||||
#else
|
||||
#define __ATOMIC_FILE__ __FILE__
|
||||
#endif
|
||||
|
||||
#define ATOMIC_VERBOSE(level, format, ...) \
|
||||
opal_output_verbose(level, oshmem_atomic_base_framework.framework_output, "%s:%d - %s() " format, \
|
||||
__ATOMIC_FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
|
||||
|
||||
#define ATOMIC_ERROR(format, ... ) \
|
||||
opal_output_verbose(0, oshmem_atomic_base_framework.framework_output, "Error: %s:%d - %s() " format, \
|
||||
__ATOMIC_FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* MCA_ATOMIC_BASE_H */
|
40
oshmem/mca/atomic/basic/Makefile.am
Обычный файл
40
oshmem/mca/atomic/basic/Makefile.am
Обычный файл
@ -0,0 +1,40 @@
|
||||
#
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
AM_CFLAGS = $(OSHMEM_CFLAGS)
|
||||
|
||||
sources = \
|
||||
atomic_basic.h \
|
||||
atomic_basic_module.c \
|
||||
atomic_basic_component.c \
|
||||
atomic_basic_fadd.c \
|
||||
atomic_basic_cswap.c
|
||||
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_oshmem_atomic_basic_DSO
|
||||
component_noinst =
|
||||
component_install = mca_atomic_basic.la
|
||||
else
|
||||
component_noinst = libmca_atomic_basic.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_atomic_basic_la_SOURCES = $(sources)
|
||||
mca_atomic_basic_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_atomic_basic_la_SOURCES =$(sources)
|
||||
libmca_atomic_basic_la_LDFLAGS = -module -avoid-version
|
59
oshmem/mca/atomic/basic/atomic_basic.h
Обычный файл
59
oshmem/mca/atomic/basic/atomic_basic.h
Обычный файл
@ -0,0 +1,59 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MCA_ATOMIC_BASIC_H
|
||||
#define MCA_ATOMIC_BASIC_H
|
||||
|
||||
#include "oshmem_config.h"
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
#include "oshmem/mca/atomic/atomic.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/* Globally exported variables */
|
||||
|
||||
OSHMEM_MODULE_DECLSPEC extern mca_atomic_base_component_1_0_0_t
|
||||
mca_atomic_basic_component;
|
||||
|
||||
extern int mca_atomic_basic_priority_param;
|
||||
|
||||
OSHMEM_DECLSPEC void atomic_basic_lock(int pe);
|
||||
OSHMEM_DECLSPEC void atomic_basic_unlock(int pe);
|
||||
|
||||
/* API functions */
|
||||
|
||||
int mca_atomic_basic_init(bool enable_progress_threads, bool enable_threads);
|
||||
int mca_atomic_basic_finalize(void);
|
||||
mca_atomic_base_module_t*
|
||||
mca_atomic_basic_query(int *priority);
|
||||
|
||||
int mca_atomic_basic_fadd(void *target,
|
||||
void *prev,
|
||||
const void *value,
|
||||
size_t nlong,
|
||||
int pe,
|
||||
struct oshmem_op_t *op);
|
||||
int mca_atomic_basic_cswap(void *target,
|
||||
void *prev,
|
||||
const void *cond,
|
||||
const void *value,
|
||||
size_t nlong,
|
||||
int pe);
|
||||
|
||||
struct mca_atomic_basic_module_t {
|
||||
mca_atomic_base_module_t super;
|
||||
};
|
||||
typedef struct mca_atomic_basic_module_t mca_atomic_basic_module_t;
|
||||
OBJ_CLASS_DECLARATION(mca_atomic_basic_module_t);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* MCA_ATOMIC_BASIC_H */
|
86
oshmem/mca/atomic/basic/atomic_basic_component.c
Обычный файл
86
oshmem/mca/atomic/basic/atomic_basic_component.c
Обычный файл
@ -0,0 +1,86 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "oshmem_config.h"
|
||||
|
||||
#include "oshmem/constants.h"
|
||||
#include "oshmem/mca/atomic/atomic.h"
|
||||
#include "oshmem/mca/atomic/base/base.h"
|
||||
#include "atomic_basic.h"
|
||||
|
||||
/*
|
||||
* Public string showing the scoll basic component version number
|
||||
*/
|
||||
const char *mca_atomic_basic_component_version_string =
|
||||
"Open SHMEM basic atomic MCA component version " OSHMEM_VERSION;
|
||||
|
||||
/*
|
||||
* Global variable
|
||||
*/
|
||||
int mca_atomic_basic_priority_param = -1;
|
||||
|
||||
/*
|
||||
* Local function
|
||||
*/
|
||||
static int __basic_open(void);
|
||||
|
||||
/*
|
||||
* Instantiate the public struct with all of our public information
|
||||
* and pointers to our public functions in it
|
||||
*/
|
||||
|
||||
mca_atomic_base_component_t mca_atomic_basic_component = {
|
||||
|
||||
/* First, the mca_component_t struct containing meta information
|
||||
about the component itself */
|
||||
|
||||
{
|
||||
MCA_ATOMIC_BASE_VERSION_2_0_0,
|
||||
|
||||
/* Component name and version */
|
||||
"basic",
|
||||
OSHMEM_MAJOR_VERSION,
|
||||
OSHMEM_MINOR_VERSION,
|
||||
OSHMEM_RELEASE_VERSION,
|
||||
|
||||
/* Component open and close functions */
|
||||
__basic_open,
|
||||
NULL
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
|
||||
/* Initialization / querying functions */
|
||||
|
||||
mca_atomic_basic_init,
|
||||
mca_atomic_basic_finalize,
|
||||
mca_atomic_basic_query
|
||||
};
|
||||
|
||||
static int __basic_open(void)
|
||||
{
|
||||
mca_atomic_basic_priority_param = 75;
|
||||
(void) mca_base_component_var_register(&mca_atomic_basic_component.atomic_version,
|
||||
"priority",
|
||||
"Priority of the atomic:basic component",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_atomic_basic_priority_param);
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_atomic_basic_module_t,
|
||||
mca_atomic_base_module_t,
|
||||
NULL,
|
||||
NULL);
|
48
oshmem/mca/atomic/basic/atomic_basic_cswap.c
Обычный файл
48
oshmem/mca/atomic/basic/atomic_basic_cswap.c
Обычный файл
@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "oshmem_config.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "oshmem/constants.h"
|
||||
#include "oshmem/mca/spml/spml.h"
|
||||
#include "oshmem/mca/atomic/atomic.h"
|
||||
#include "oshmem/mca/atomic/base/base.h"
|
||||
#include "atomic_basic.h"
|
||||
|
||||
int mca_atomic_basic_cswap(void *target,
|
||||
void *prev,
|
||||
const void *cond,
|
||||
const void *value,
|
||||
size_t nlong,
|
||||
int pe)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
|
||||
if (!prev) {
|
||||
rc = OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
if (rc == OSHMEM_SUCCESS) {
|
||||
atomic_basic_lock(pe);
|
||||
|
||||
rc = MCA_SPML_CALL(get(target, nlong, prev, pe));
|
||||
|
||||
if ((rc == OSHMEM_SUCCESS) && (!cond || !memcmp(prev, cond, nlong))) {
|
||||
rc = MCA_SPML_CALL(put(target, nlong, (void*)value, pe));
|
||||
shmem_quiet();
|
||||
}
|
||||
|
||||
atomic_basic_unlock(pe);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
58
oshmem/mca/atomic/basic/atomic_basic_fadd.c
Обычный файл
58
oshmem/mca/atomic/basic/atomic_basic_fadd.c
Обычный файл
@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "oshmem_config.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "oshmem/constants.h"
|
||||
#include "oshmem/op/op.h"
|
||||
#include "oshmem/mca/spml/spml.h"
|
||||
#include "oshmem/mca/atomic/atomic.h"
|
||||
#include "oshmem/mca/atomic/base/base.h"
|
||||
#include "atomic_basic.h"
|
||||
|
||||
int mca_atomic_basic_fadd(void *target,
|
||||
void *prev,
|
||||
const void *value,
|
||||
size_t nlong,
|
||||
int pe,
|
||||
struct oshmem_op_t *op)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
|
||||
if (!target || !value) {
|
||||
rc = OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
if (rc == OSHMEM_SUCCESS) {
|
||||
long long temp_value = 0;
|
||||
|
||||
atomic_basic_lock(pe);
|
||||
|
||||
rc = MCA_SPML_CALL(get(target, nlong, (void*)&temp_value, pe));
|
||||
|
||||
if (prev)
|
||||
memcpy(prev, (void*) &temp_value, nlong);
|
||||
|
||||
op->o_func.c_fn((void*) value,
|
||||
(void*) &temp_value,
|
||||
nlong / op->dt_size);
|
||||
|
||||
if (rc == OSHMEM_SUCCESS) {
|
||||
rc = MCA_SPML_CALL(put(target, nlong, (void*)&temp_value, pe));
|
||||
shmem_quiet();
|
||||
}
|
||||
|
||||
atomic_basic_unlock(pe);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
179
oshmem/mca/atomic/basic/atomic_basic_module.c
Обычный файл
179
oshmem/mca/atomic/basic/atomic_basic_module.c
Обычный файл
@ -0,0 +1,179 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "oshmem_config.h"
|
||||
#include <stdio.h>
|
||||
|
||||
#include "oshmem/constants.h"
|
||||
#include "oshmem/mca/atomic/atomic.h"
|
||||
#include "oshmem/mca/spml/spml.h"
|
||||
#include "oshmem/mca/memheap/memheap.h"
|
||||
#include "oshmem/proc/proc.h"
|
||||
#include "atomic_basic.h"
|
||||
|
||||
static char *atomic_lock_sync;
|
||||
static int *atomic_lock_turn;
|
||||
static char *local_lock_sync;
|
||||
static int *local_lock_turn;
|
||||
|
||||
enum {
|
||||
ATOMIC_LOCK_IDLE = 0,
|
||||
ATOMIC_LOCK_WAITING = 1,
|
||||
ATOMIC_LOCK_ACTIVE = 2
|
||||
};
|
||||
|
||||
/*
|
||||
* Initial query function that is invoked during initialization, allowing
|
||||
* this module to indicate what level of thread support it provides.
|
||||
*/
|
||||
int mca_atomic_basic_init(bool enable_progress_threads, bool enable_threads)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
void* ptr = NULL;
|
||||
int num_pe = oshmem_num_procs();
|
||||
|
||||
rc = MCA_MEMHEAP_CALL(private_alloc((num_pe * sizeof(char)), &ptr));
|
||||
if (rc == OSHMEM_SUCCESS) {
|
||||
atomic_lock_sync = (char*) ptr;
|
||||
memset(atomic_lock_sync, ATOMIC_LOCK_IDLE, sizeof(char) * num_pe);
|
||||
|
||||
rc = MCA_MEMHEAP_CALL(private_alloc(sizeof(int), &ptr));
|
||||
if (rc == OSHMEM_SUCCESS) {
|
||||
atomic_lock_turn = (int*) ptr;
|
||||
*atomic_lock_turn = 0;
|
||||
if (rc == OSHMEM_SUCCESS) {
|
||||
local_lock_sync = (char*) malloc(num_pe * sizeof(char));
|
||||
local_lock_turn = (int*) malloc(sizeof(int));
|
||||
if (!local_lock_sync || !local_lock_turn) {
|
||||
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
|
||||
} else {
|
||||
memcpy((void*) local_lock_sync,
|
||||
(void*) atomic_lock_sync,
|
||||
sizeof(char) * num_pe);
|
||||
*local_lock_turn = *atomic_lock_turn;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int mca_atomic_basic_finalize(void)
|
||||
{
|
||||
void* ptr = NULL;
|
||||
|
||||
ptr = (void*) atomic_lock_sync;
|
||||
MCA_MEMHEAP_CALL(private_free(ptr));
|
||||
atomic_lock_sync = NULL;
|
||||
|
||||
ptr = (void*) atomic_lock_turn;
|
||||
MCA_MEMHEAP_CALL(private_free(ptr));
|
||||
atomic_lock_turn = NULL;
|
||||
|
||||
if (local_lock_sync) {
|
||||
free((void*) local_lock_sync);
|
||||
local_lock_sync = NULL;
|
||||
}
|
||||
|
||||
if (local_lock_turn) {
|
||||
free((void*) local_lock_turn);
|
||||
local_lock_turn = NULL;
|
||||
}
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
mca_atomic_base_module_t *
|
||||
mca_atomic_basic_query(int *priority)
|
||||
{
|
||||
mca_atomic_basic_module_t *module;
|
||||
|
||||
*priority = mca_atomic_basic_priority_param;
|
||||
|
||||
module = OBJ_NEW(mca_atomic_basic_module_t);
|
||||
if (module) {
|
||||
module->super.atomic_fadd = mca_atomic_basic_fadd;
|
||||
module->super.atomic_cswap = mca_atomic_basic_cswap;
|
||||
return &(module->super);
|
||||
}
|
||||
|
||||
return NULL ;
|
||||
}
|
||||
|
||||
void atomic_basic_lock(int pe)
|
||||
{
|
||||
int index = -1;
|
||||
int me = oshmem_my_proc_id();
|
||||
int num_pe = oshmem_num_procs();
|
||||
char lock_required = ATOMIC_LOCK_WAITING;
|
||||
char lock_active = ATOMIC_LOCK_ACTIVE;
|
||||
int root_pe = pe;
|
||||
|
||||
do {
|
||||
/* announce that we need the resource */
|
||||
do {
|
||||
MCA_SPML_CALL(put((void*)(atomic_lock_sync + me), sizeof(lock_required), (void*)&lock_required, root_pe));
|
||||
MCA_SPML_CALL(get((void*)atomic_lock_sync, num_pe * sizeof(*atomic_lock_sync), (void*)local_lock_sync, root_pe));
|
||||
} while (local_lock_sync[me] != lock_required);
|
||||
|
||||
MCA_SPML_CALL(get((void*)atomic_lock_turn, sizeof(index), (void*)&index, root_pe));
|
||||
while (index != me) {
|
||||
if (local_lock_sync[index] != ATOMIC_LOCK_IDLE) {
|
||||
MCA_SPML_CALL(get((void*)atomic_lock_turn, sizeof(index), (void*)&index, root_pe));
|
||||
MCA_SPML_CALL(get((void*)atomic_lock_sync, num_pe * sizeof(*atomic_lock_sync), (void*)local_lock_sync, root_pe));
|
||||
} else {
|
||||
index = (index + 1) % num_pe;
|
||||
}
|
||||
}
|
||||
|
||||
/* now tentatively claim the resource */
|
||||
do {
|
||||
MCA_SPML_CALL(put((void*)(atomic_lock_sync + me), sizeof(lock_active), (void*)&lock_active, root_pe));
|
||||
MCA_SPML_CALL(get((void*)atomic_lock_sync, num_pe * sizeof(*atomic_lock_sync), (void*)local_lock_sync, root_pe));
|
||||
} while (local_lock_sync[me] != lock_active);
|
||||
|
||||
index = 0;
|
||||
while ((index < num_pe)
|
||||
&& ((index == me)
|
||||
|| (local_lock_sync[index] != ATOMIC_LOCK_ACTIVE))) {
|
||||
index = index + 1;
|
||||
}
|
||||
|
||||
MCA_SPML_CALL(get((void*)atomic_lock_turn, sizeof(*atomic_lock_turn), (void*)local_lock_turn, root_pe));
|
||||
} while (!((index >= num_pe)
|
||||
&& ((*local_lock_turn == me)
|
||||
|| (local_lock_sync[*local_lock_turn] == ATOMIC_LOCK_IDLE))));
|
||||
|
||||
MCA_SPML_CALL(put((void*)atomic_lock_turn, sizeof(me), (void*)&me, root_pe));
|
||||
}
|
||||
|
||||
void atomic_basic_unlock(int pe)
|
||||
{
|
||||
int index = -1;
|
||||
int me = oshmem_my_proc_id();
|
||||
int num_pe = oshmem_num_procs();
|
||||
char lock_idle = ATOMIC_LOCK_IDLE;
|
||||
int root_pe = pe;
|
||||
|
||||
MCA_SPML_CALL(get((void*)atomic_lock_sync, num_pe * sizeof(*atomic_lock_sync), (void*)local_lock_sync, root_pe));
|
||||
MCA_SPML_CALL(get((void*)atomic_lock_turn, sizeof(index), (void*)&index, root_pe));
|
||||
|
||||
do {
|
||||
index = (index + 1) % num_pe;
|
||||
} while (local_lock_sync[index] == ATOMIC_LOCK_IDLE);
|
||||
|
||||
MCA_SPML_CALL(put((void*)atomic_lock_turn, sizeof(index), (void*)&index, root_pe));
|
||||
|
||||
do {
|
||||
MCA_SPML_CALL(put((void*)(atomic_lock_sync + me), sizeof(lock_idle), (void*)&lock_idle, root_pe));
|
||||
MCA_SPML_CALL(get((void*)atomic_lock_sync, num_pe * sizeof(*atomic_lock_sync), (void*)local_lock_sync, root_pe));
|
||||
} while (local_lock_sync[me] != lock_idle);
|
||||
}
|
13
oshmem/mca/atomic/basic/configure.params
Обычный файл
13
oshmem/mca/atomic/basic/configure.params
Обычный файл
@ -0,0 +1,13 @@
|
||||
# -*- shell-script -*-
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# Specific to this module
|
||||
|
||||
PARAM_CONFIG_FILES="Makefile"
|
42
oshmem/mca/atomic/mxm/Makefile.am
Обычный файл
42
oshmem/mca/atomic/mxm/Makefile.am
Обычный файл
@ -0,0 +1,42 @@
|
||||
#
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
AM_CFLAGS = $(OSHMEM_CFLAGS)
|
||||
AM_CPPFLAGS = $(atomic_mxm_CPPFLAGS)
|
||||
|
||||
mxm_sources = \
|
||||
atomic_mxm.h \
|
||||
atomic_mxm_module.c \
|
||||
atomic_mxm_component.c \
|
||||
atomic_mxm_fadd.c \
|
||||
atomic_mxm_cswap.c
|
||||
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_oshmem_atomic_mxm_DSO
|
||||
component_noinst =
|
||||
component_install = mca_atomic_mxm.la
|
||||
else
|
||||
component_noinst = libmca_atomic_mxm.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_atomic_mxm_la_SOURCES = $(mxm_sources)
|
||||
mca_atomic_mxm_la_LIBADD = $(atomic_mxm_LIBS)
|
||||
mca_atomic_mxm_la_LDFLAGS = -module -avoid-version $(atomic_mxm_LDFLAGS)
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_atomic_mxm_la_SOURCES =$(mxm_sources)
|
||||
libmca_atomic_mxm_la_LDFLAGS = -module -avoid-version $(atomic_mxm_LDFLAGS)
|
66
oshmem/mca/atomic/mxm/atomic_mxm.h
Обычный файл
66
oshmem/mca/atomic/mxm/atomic_mxm.h
Обычный файл
@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MCA_ATOMIC_MXM_H
|
||||
#define MCA_ATOMIC_MXM_H
|
||||
|
||||
#include "oshmem_config.h"
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
#include "oshmem/mca/atomic/atomic.h"
|
||||
|
||||
/* This component does uses SPML:IKRIT */
|
||||
#include "oshmem/mca/spml/ikrit/spml_ikrit.h"
|
||||
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/* Globally exported variables */
|
||||
|
||||
OSHMEM_MODULE_DECLSPEC extern mca_atomic_base_component_1_0_0_t
|
||||
mca_atomic_mxm_component;
|
||||
|
||||
extern int mca_atomic_mxm_priority_param;
|
||||
|
||||
/* this component works with spml:ikrit only */
|
||||
extern mca_spml_ikrit_t *mca_spml_self;
|
||||
|
||||
OSHMEM_DECLSPEC void atomic_mxm_lock(int pe);
|
||||
OSHMEM_DECLSPEC void atomic_mxm_unlock(int pe);
|
||||
|
||||
/* API functions */
|
||||
|
||||
int mca_atomic_mxm_init(bool enable_progress_threads, bool enable_threads);
|
||||
int mca_atomic_mxm_finalize(void);
|
||||
mca_atomic_base_module_t*
|
||||
mca_atomic_mxm_query(int *priority);
|
||||
|
||||
int mca_atomic_mxm_fadd(void *target,
|
||||
void *prev,
|
||||
const void *value,
|
||||
size_t nlong,
|
||||
int pe,
|
||||
struct oshmem_op_t *op);
|
||||
int mca_atomic_mxm_cswap(void *target,
|
||||
void *prev,
|
||||
const void *cond,
|
||||
const void *value,
|
||||
size_t nlong,
|
||||
int pe);
|
||||
|
||||
struct mca_atomic_mxm_module_t {
|
||||
mca_atomic_base_module_t super;
|
||||
};
|
||||
typedef struct mca_atomic_mxm_module_t mca_atomic_mxm_module_t;
|
||||
OBJ_CLASS_DECLARATION(mca_atomic_mxm_module_t);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* MCA_ATOMIC_MXM_H */
|
101
oshmem/mca/atomic/mxm/atomic_mxm_component.c
Обычный файл
101
oshmem/mca/atomic/mxm/atomic_mxm_component.c
Обычный файл
@ -0,0 +1,101 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "oshmem_config.h"
|
||||
|
||||
#include "oshmem/constants.h"
|
||||
#include "oshmem/mca/atomic/atomic.h"
|
||||
#include "oshmem/mca/atomic/base/base.h"
|
||||
#include "oshmem/mca/spml/base/base.h"
|
||||
|
||||
#include "atomic_mxm.h"
|
||||
|
||||
|
||||
/*
|
||||
* Public string showing the scoll mxm component version number
|
||||
*/
|
||||
const char *mca_atomic_mxm_component_version_string =
|
||||
"Open SHMEM mxm atomic MCA component version " OSHMEM_VERSION;
|
||||
|
||||
/*
|
||||
* Global variable
|
||||
*/
|
||||
int mca_atomic_mxm_priority_param = -1;
|
||||
mca_spml_ikrit_t *mca_spml_self = NULL;
|
||||
|
||||
/*
|
||||
* Local function
|
||||
*/
|
||||
static int __mxm_open(void);
|
||||
|
||||
/*
|
||||
* Instantiate the public struct with all of our public information
|
||||
* and pointers to our public functions in it
|
||||
*/
|
||||
|
||||
mca_atomic_base_component_t mca_atomic_mxm_component = {
|
||||
|
||||
/* First, the mca_component_t struct containing meta information
|
||||
about the component itself */
|
||||
|
||||
{
|
||||
MCA_ATOMIC_BASE_VERSION_2_0_0,
|
||||
|
||||
/* Component name and version */
|
||||
"mxm",
|
||||
OSHMEM_MAJOR_VERSION,
|
||||
OSHMEM_MINOR_VERSION,
|
||||
OSHMEM_RELEASE_VERSION,
|
||||
|
||||
/* Component open and close functions */
|
||||
__mxm_open,
|
||||
NULL
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
|
||||
/* Initialization / querying functions */
|
||||
|
||||
mca_atomic_mxm_init,
|
||||
mca_atomic_mxm_finalize,
|
||||
mca_atomic_mxm_query
|
||||
};
|
||||
|
||||
static int __mxm_open(void)
|
||||
{
|
||||
/*
|
||||
* This component is able to work using spml:ikrit component only
|
||||
* (this check is added instead of !mca_spml_ikrit.enabled)
|
||||
*/
|
||||
if (strcmp(mca_spml_base_selected_component.spmlm_version.mca_component_name, "ikrit")) {
|
||||
ATOMIC_VERBOSE(5,
|
||||
"Can not use atomic/mxm because spml ikrit component disabled");
|
||||
return OSHMEM_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
mca_spml_self = (mca_spml_ikrit_t *)mca_spml.self;
|
||||
|
||||
mca_atomic_mxm_priority_param = 100;
|
||||
(void) mca_base_component_var_register(&mca_atomic_mxm_component.atomic_version,
|
||||
"priority",
|
||||
"Priority of the basic atomic:mxm component",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_atomic_mxm_priority_param);
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_atomic_mxm_module_t,
|
||||
mca_atomic_base_module_t,
|
||||
NULL,
|
||||
NULL);
|
140
oshmem/mca/atomic/mxm/atomic_mxm_cswap.c
Обычный файл
140
oshmem/mca/atomic/mxm/atomic_mxm_cswap.c
Обычный файл
@ -0,0 +1,140 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "oshmem_config.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "oshmem/constants.h"
|
||||
#include "oshmem/mca/spml/spml.h"
|
||||
#include "oshmem/mca/atomic/atomic.h"
|
||||
#include "oshmem/mca/atomic/base/base.h"
|
||||
#include "oshmem/mca/memheap/memheap.h"
|
||||
#include "oshmem/runtime/runtime.h"
|
||||
|
||||
#include "atomic_mxm.h"
|
||||
|
||||
|
||||
int mca_atomic_mxm_cswap(void *target,
|
||||
void *prev,
|
||||
const void *cond,
|
||||
const void *value,
|
||||
size_t nlong,
|
||||
int pe)
|
||||
{
|
||||
unsigned my_pe;
|
||||
uint8_t nlong_order;
|
||||
uint64_t remote_addr;
|
||||
int ptl_id;
|
||||
mxm_send_req_t sreq;
|
||||
mxm_error_t mxm_err;
|
||||
|
||||
my_pe = oshmem_my_proc_id();
|
||||
ptl_id = -1;
|
||||
mxm_err = MXM_OK;
|
||||
|
||||
if (!prev || !target || !value) {
|
||||
ATOMIC_ERROR("[#%d] Whether target, value or prev are not defined",
|
||||
my_pe);
|
||||
oshmem_shmem_abort(-1);
|
||||
return OSHMEM_ERR_BAD_PARAM;
|
||||
}
|
||||
if ((pe < 0) || (pe >= oshmem_num_procs())) {
|
||||
ATOMIC_ERROR("[#%d] PE=%d not valid", my_pe, pe);
|
||||
oshmem_shmem_abort(-1);
|
||||
return OSHMEM_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
switch (nlong) {
|
||||
case 1:
|
||||
nlong_order = 0;
|
||||
break;
|
||||
case 2:
|
||||
nlong_order = 1;
|
||||
break;
|
||||
case 4:
|
||||
nlong_order = 2;
|
||||
break;
|
||||
case 8:
|
||||
nlong_order = 3;
|
||||
break;
|
||||
default:
|
||||
ATOMIC_ERROR("[#%d] Type size must be 1/2/4 or 8 bytes.", my_pe);
|
||||
oshmem_shmem_abort(-1);
|
||||
return OSHMEM_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
ptl_id = oshmem_proc_group_all(pe)->transport_ids[0];
|
||||
if (MXM_PTL_SHM == ptl_id) {
|
||||
ptl_id = MXM_PTL_RDMA;
|
||||
}
|
||||
|
||||
if (!mca_memheap.memheap_get_cached_mkey(pe,
|
||||
(unsigned long) target,
|
||||
ptl_id,
|
||||
&remote_addr)) {
|
||||
ATOMIC_ERROR("[#%d] %p is not address of symmetric variable",
|
||||
my_pe, target);
|
||||
oshmem_shmem_abort(-1);
|
||||
return OSHMEM_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* mxm request init */
|
||||
sreq.base.state = MXM_REQ_NEW;
|
||||
sreq.base.mq = mca_spml_self->mxm_mq;
|
||||
sreq.base.conn = mca_spml_self->mxm_peers[pe]->mxm_conn;
|
||||
sreq.base.completed_cb = NULL;
|
||||
sreq.base.data_type = MXM_REQ_DATA_BUFFER;
|
||||
|
||||
/* set data */
|
||||
sreq.base.data.buffer.ptr = (void *) value;
|
||||
sreq.base.data.buffer.length = nlong;
|
||||
sreq.base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
|
||||
|
||||
sreq.op.atomic.remote_vaddr = (uintptr_t) remote_addr;
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
sreq.base.flags = 0;
|
||||
sreq.op.atomic.remote_memh = MXM_INVALID_MEM_HANDLE;
|
||||
#else
|
||||
sreq.flags = 0;
|
||||
sreq.op.atomic.remote_mkey = MXM_INVALID_MEM_HANDLE;
|
||||
#endif
|
||||
sreq.op.atomic.order = nlong_order;
|
||||
|
||||
if (NULL == cond) {
|
||||
sreq.opcode = MXM_REQ_OP_ATOMIC_SWAP;
|
||||
} else {
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
memcpy(&sreq.op.atomic.value8, cond, nlong);
|
||||
#else
|
||||
memcpy(&sreq.op.atomic.value, cond, nlong);
|
||||
#endif
|
||||
sreq.opcode = MXM_REQ_OP_ATOMIC_CSWAP;
|
||||
}
|
||||
|
||||
if (MXM_OK != (mxm_err = mxm_req_send(&sreq))) {
|
||||
ATOMIC_ERROR("[#%d] mxm_req_send failed, mxm_error = %d",
|
||||
my_pe, mxm_err);
|
||||
oshmem_shmem_abort(-1);
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
mxm_req_wait(&sreq.base);
|
||||
if (MXM_OK != sreq.base.error) {
|
||||
ATOMIC_ERROR("[#%d] mxm_req_wait got non MXM_OK error: %d",
|
||||
my_pe, sreq.base.error);
|
||||
oshmem_shmem_abort(-1);
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
memcpy(prev, value, nlong);
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
147
oshmem/mca/atomic/mxm/atomic_mxm_fadd.c
Обычный файл
147
oshmem/mca/atomic/mxm/atomic_mxm_fadd.c
Обычный файл
@ -0,0 +1,147 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "oshmem_config.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "oshmem/constants.h"
|
||||
#include "oshmem/op/op.h"
|
||||
#include "oshmem/mca/spml/spml.h"
|
||||
#include "oshmem/mca/atomic/atomic.h"
|
||||
#include "oshmem/mca/atomic/base/base.h"
|
||||
#include "oshmem/mca/memheap/memheap.h"
|
||||
#include "oshmem/runtime/runtime.h"
|
||||
|
||||
#include "atomic_mxm.h"
|
||||
|
||||
|
||||
int mca_atomic_mxm_fadd(void *target,
|
||||
void *prev,
|
||||
const void *value,
|
||||
size_t nlong,
|
||||
int pe,
|
||||
struct oshmem_op_t *op)
|
||||
{
|
||||
unsigned my_pe;
|
||||
uint8_t nlong_order;
|
||||
uint64_t remote_addr;
|
||||
int ptl_id;
|
||||
mxm_send_req_t sreq;
|
||||
mxm_error_t mxm_err;
|
||||
|
||||
my_pe = oshmem_my_proc_id();
|
||||
ptl_id = -1;
|
||||
mxm_err = MXM_OK;
|
||||
|
||||
if (!target || !value) {
|
||||
ATOMIC_ERROR("[#%d] target or value are not defined", my_pe);
|
||||
oshmem_shmem_abort(-1);
|
||||
return OSHMEM_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
if ((pe < 0) || (pe >= oshmem_num_procs())) {
|
||||
ATOMIC_ERROR("[#%d] PE=%d not valid", my_pe, pe);
|
||||
oshmem_shmem_abort(-1);
|
||||
return OSHMEM_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
switch (nlong) {
|
||||
case 1:
|
||||
nlong_order = 0;
|
||||
break;
|
||||
case 2:
|
||||
nlong_order = 1;
|
||||
break;
|
||||
case 4:
|
||||
nlong_order = 2;
|
||||
break;
|
||||
case 8:
|
||||
nlong_order = 3;
|
||||
break;
|
||||
default:
|
||||
ATOMIC_ERROR("[#%d] Type size must be 1/2/4 or 8 bytes.", my_pe);
|
||||
oshmem_shmem_abort(-1);
|
||||
return OSHMEM_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
ptl_id = oshmem_proc_group_all(pe)->transport_ids[0];
|
||||
if (MXM_PTL_SHM == ptl_id) {
|
||||
ptl_id = MXM_PTL_RDMA;
|
||||
}
|
||||
|
||||
if (!mca_memheap.memheap_get_cached_mkey(pe,
|
||||
(unsigned long) target,
|
||||
ptl_id,
|
||||
&remote_addr)) {
|
||||
ATOMIC_ERROR("[#%d] %p is not address of symmetric variable",
|
||||
my_pe, target);
|
||||
oshmem_shmem_abort(-1);
|
||||
return OSHMEM_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* mxm request init */
|
||||
sreq.base.state = MXM_REQ_NEW;
|
||||
sreq.base.mq = mca_spml_self->mxm_mq;
|
||||
sreq.base.conn = mca_spml_self->mxm_peers[pe]->mxm_conn;
|
||||
sreq.base.completed_cb = NULL;
|
||||
sreq.base.data_type = MXM_REQ_DATA_BUFFER;
|
||||
|
||||
sreq.op.atomic.remote_vaddr = (uintptr_t) remote_addr;
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
sreq.op.atomic.remote_memh = MXM_INVALID_MEM_HANDLE;
|
||||
#else
|
||||
sreq.op.atomic.remote_mkey = MXM_INVALID_MEM_HANDLE;
|
||||
#endif
|
||||
sreq.op.atomic.order = nlong_order;
|
||||
memcpy(&sreq.op.atomic.value8, value, nlong);
|
||||
|
||||
/* Do we need atomic 'add' or atomic 'fetch and add'? */
|
||||
if (NULL == prev) {
|
||||
sreq.base.data.buffer.ptr = NULL;
|
||||
sreq.base.data.buffer.length = 0;
|
||||
sreq.base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
sreq.base.flags = MXM_REQ_FLAG_SEND_SYNC;
|
||||
sreq.opcode = MXM_REQ_OP_ATOMIC_ADD;
|
||||
#else
|
||||
sreq.flags = 0;
|
||||
sreq.opcode = MXM_REQ_OP_ATOMIC_ADD_SYNC;
|
||||
#endif
|
||||
} else {
|
||||
sreq.base.data.buffer.ptr = prev;
|
||||
sreq.base.data.buffer.length = nlong;
|
||||
sreq.base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
sreq.base.flags = 0;
|
||||
#else
|
||||
sreq.flags = 0;
|
||||
#endif
|
||||
|
||||
sreq.opcode = MXM_REQ_OP_ATOMIC_FADD;
|
||||
}
|
||||
|
||||
if (MXM_OK != (mxm_err = mxm_req_send(&sreq))) {
|
||||
ATOMIC_ERROR("[#%d] mxm_req_send failed, mxm_error = %d",
|
||||
my_pe, mxm_err);
|
||||
oshmem_shmem_abort(-1);
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
mxm_req_wait(&sreq.base);
|
||||
if (MXM_OK != sreq.base.error) {
|
||||
ATOMIC_ERROR("[#%d] mxm_req_wait got non MXM_OK error: %d",
|
||||
my_pe, sreq.base.error);
|
||||
oshmem_shmem_abort(-1);
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
50
oshmem/mca/atomic/mxm/atomic_mxm_module.c
Обычный файл
50
oshmem/mca/atomic/mxm/atomic_mxm_module.c
Обычный файл
@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "oshmem_config.h"
|
||||
#include <stdio.h>
|
||||
|
||||
#include "oshmem/constants.h"
|
||||
#include "oshmem/mca/atomic/atomic.h"
|
||||
#include "oshmem/mca/spml/spml.h"
|
||||
#include "oshmem/mca/memheap/memheap.h"
|
||||
#include "oshmem/proc/proc.h"
|
||||
#include "atomic_mxm.h"
|
||||
|
||||
/*
|
||||
* Initial query function that is invoked during initialization, allowing
|
||||
* this module to indicate what level of thread support it provides.
|
||||
*/
|
||||
int mca_atomic_mxm_init(bool enable_progress_threads, bool enable_threads)
|
||||
{
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_atomic_mxm_finalize(void)
|
||||
{
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
mca_atomic_base_module_t *
|
||||
mca_atomic_mxm_query(int *priority)
|
||||
{
|
||||
mca_atomic_mxm_module_t *module;
|
||||
|
||||
*priority = mca_atomic_mxm_priority_param;
|
||||
|
||||
module = OBJ_NEW(mca_atomic_mxm_module_t);
|
||||
if (module) {
|
||||
module->super.atomic_fadd = mca_atomic_mxm_fadd;
|
||||
module->super.atomic_cswap = mca_atomic_mxm_cswap;
|
||||
return &(module->super);
|
||||
}
|
||||
|
||||
return NULL ;
|
||||
}
|
59
oshmem/mca/atomic/mxm/configure.m4
Обычный файл
59
oshmem/mca/atomic/mxm/configure.m4
Обычный файл
@ -0,0 +1,59 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
# MCA_oshmem_atomic_mxm_CONFIG([action-if-can-compile],
|
||||
# [action-if-cant-compile])
|
||||
# ------------------------------------------------
|
||||
AC_DEFUN([MCA_oshmem_atomic_mxm_CONFIG],[
|
||||
AC_CONFIG_FILES([oshmem/mca/atomic/mxm/Makefile])
|
||||
OMPI_CHECK_MXM([atomic_mxm],
|
||||
[save_CPPFLAGS="$CPPFLAGS"
|
||||
save_LDFLAGS="$LDFLAGS"
|
||||
save_LIBS="$LIBS"
|
||||
|
||||
CPPFLAGS="$CPPFLAGS -I$ompi_check_mxm_dir/include"
|
||||
LDFLAGS="$LDFLAGS -L$ompi_check_mxm_dir/lib"
|
||||
LIBS="$LIBS -lmxm"
|
||||
AC_TRY_RUN([
|
||||
#include <mxm/api/mxm_api.h>
|
||||
int main() {
|
||||
if (mxm_get_version() < MXM_VERSION(1,5) )
|
||||
return 1;
|
||||
|
||||
/* if compiler sees these constansts then mxm has atomic support*/
|
||||
int add_index = MXM_REQ_OP_ATOMIC_ADD;
|
||||
int swap_index = MXM_REQ_OP_ATOMIC_SWAP;
|
||||
return 0;
|
||||
}],
|
||||
[AC_DEFINE([OSHMEM_HAS_ATOMIC_MXM], [1], [mxm support is available]) atomic_mxm_happy="yes"],
|
||||
[atomic_mxm_happy="no"])
|
||||
CPPFLAGS=$save_CPPFLAGS
|
||||
LDFLAGS=$save_LDFLAGS
|
||||
LIBS=$save_LIBS
|
||||
],
|
||||
[atomic_mxm_happy="no"])
|
||||
|
||||
AS_IF([test "$atomic_mxm_happy" = "yes"],
|
||||
[atomic_mxm_WRAPPER_EXTRA_LDFLAGS="$atomic_mxm_LDFLAGS"
|
||||
atomic_mxm_WRAPPER_EXTRA_LIBS="$atomic_mxm_LIBS"
|
||||
$1],
|
||||
[$2])
|
||||
|
||||
|
||||
# substitute in the things needed to build mxm
|
||||
AC_SUBST([atomic_mxm_CFLAGS])
|
||||
AC_SUBST([atomic_mxm_CPPFLAGS])
|
||||
AC_SUBST([atomic_mxm_LDFLAGS])
|
||||
AC_SUBST([atomic_mxm_LIBS])
|
||||
|
||||
AC_MSG_CHECKING([if oshmem/atomic/mxm component can be compiled])
|
||||
AC_MSG_RESULT([$atomic_mxm_happy])
|
||||
])dnl
|
||||
|
13
oshmem/mca/atomic/mxm/configure.params
Обычный файл
13
oshmem/mca/atomic/mxm/configure.params
Обычный файл
@ -0,0 +1,13 @@
|
||||
# -*- shell-script -*-
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# Specific to this module
|
||||
|
||||
PARAM_CONFIG_FILES="Makefile"
|
39
oshmem/mca/memheap/Makefile.am
Обычный файл
39
oshmem/mca/memheap/Makefile.am
Обычный файл
@ -0,0 +1,39 @@
|
||||
#
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# main library setup
|
||||
noinst_LTLIBRARIES = libmca_memheap.la
|
||||
libmca_memheap_la_SOURCES =
|
||||
libmca_memheap_la_LDFLAGS =
|
||||
libmca_memheap_la_LIBADD =
|
||||
|
||||
# header setup
|
||||
nobase_oshmem_HEADERS =
|
||||
nobase_nodist_oshmem_HEADERS =
|
||||
|
||||
dist_pkgdata_DATA =
|
||||
|
||||
# local files
|
||||
headers = memheap.h
|
||||
libmca_memheap_la_SOURCES += $(headers) $(nodist_headers)
|
||||
|
||||
# Conditionally install the header files
|
||||
if WANT_INSTALL_HEADERS
|
||||
nobase_oshmem_HEADERS += $(headers)
|
||||
nobase_nodist_oshmem_HEADERS += $(nodist_headers)
|
||||
oshmemdir = $(includedir)/oshmem/oshmem/mca/memheap
|
||||
else
|
||||
oshmemdir = $(includedir)
|
||||
endif
|
||||
|
||||
include base/Makefile.am
|
||||
|
||||
distclean-local:
|
||||
rm -f base/static-components.h
|
50
oshmem/mca/memheap/README
Обычный файл
50
oshmem/mca/memheap/README
Обычный файл
@ -0,0 +1,50 @@
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved
|
||||
# $COPYRIGHT$
|
||||
MEMHEAP Infrustructure documentation
|
||||
------------------------------------
|
||||
|
||||
MEMHEAP Infrustructure is responsible for managing the symmetric heap.
|
||||
The framework currently has following components: buddy and ptmalloc. buddy which uses a buddy allocator in order to manage the Memory allocations on the symmetric heap. Ptmalloc is an adaptation of ptmalloc3.
|
||||
|
||||
Additional components may be added easily to the framework by defining the component's and the module's base and extended structures, and their funtionalities.
|
||||
|
||||
The buddy allocator has the following data structures:
|
||||
1. Base component - of type struct mca_memheap_base_component_2_0_0_t
|
||||
2. Base module - of type struct mca_memheap_base_module_t
|
||||
3. Buddy component - of type struct mca_memheap_base_component_2_0_0_t
|
||||
4. Buddy module - of type struct mca_memheap_buddy_module_t extending the base module (struct mca_memheap_base_module_t)
|
||||
|
||||
Each data structure includes the following fields:
|
||||
1. Base component - memheap_version, memheap_data and memheap_init
|
||||
2. Base module - Holds pointers to the base component and to the functions: alloc, free and finalize
|
||||
3. Buddy component - is a base component.
|
||||
4. Buddy module - Extends the base module and holds additional data on the components's priority, buddy allocator,
|
||||
maximal order of the symmetric heap, symmetric heap, pointer to the symmetric heap and hashtable maintaining the size of each allocated address.
|
||||
|
||||
In the case that the user decides to implement additional components, the Memheap infrastructure chooses a component with the maximal priority.
|
||||
Handling the component opening is done under the base directory, in three stages:
|
||||
1. Open all available components. Implemented by memheap_base_open.c and called from shmem_init.
|
||||
2. Select the maximal priority component. This procedure involves the initialization of all components and then their
|
||||
finalization except to the chosen component. It is implemented by memheap_base_select.c and called from shmem_init.
|
||||
3. Close the max priority active cmponent. Implemented by memheap_base_close.c and called from shmem finalize.
|
||||
|
||||
|
||||
Buddy Component/Module
|
||||
----------------------
|
||||
|
||||
Responsible for handling the entire activities of the symmetric heap.
|
||||
The supported activities are:
|
||||
- buddy_init (Initialization)
|
||||
- buddy_alloc (Allocates a variable on the symmetric heap)
|
||||
- buddy_free (frees a variable previously allocated on the symetric heap)
|
||||
- buddy_finalize (Finalization).
|
||||
|
||||
Data members of buddy module: - priority. The module's priority.
|
||||
- buddy allocator: bits, num_free, lock and the maximal order (log2 of the maximal size)
|
||||
of a variable on the symmetric heap. Buddy Allocator gives the offset in the symmetric heap
|
||||
where a variable should be allocated.
|
||||
- symmetric_heap: a range of reserved addresses (equal in all executing PE's) dedicated to "shared memory" allocation.
|
||||
- symmetric_heap_hashtable (holding the size of an allocated variable on the symmetric heap.
|
||||
used to free an allocated variable on the symmetric heap)
|
||||
|
27
oshmem/mca/memheap/base/Makefile.am
Обычный файл
27
oshmem/mca/memheap/base/Makefile.am
Обычный файл
@ -0,0 +1,27 @@
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
AM_CFLAGS = $(OSHMEM_CFLAGS)
|
||||
AM_CPPFLAGS = $(openib_CPPFLAGS)
|
||||
|
||||
dist_pkgdata_DATA += base/help-shmem-mca.txt
|
||||
|
||||
headers += \
|
||||
base/base.h
|
||||
|
||||
libmca_memheap_la_SOURCES += \
|
||||
base/memheap_base_frame.c \
|
||||
base/memheap_base_select.c \
|
||||
base/memheap_base_alloc.c \
|
||||
base/memheap_base_static.c \
|
||||
base/memheap_base_register.c \
|
||||
base/memheap_base_mkey.c
|
||||
|
||||
libmca_memheap_la_LDFLAGS += -module -avoid-version $(openib_LDFLAGS)
|
||||
libmca_memheap_la_LIBADD += $(openib_LIBS)
|
191
oshmem/mca/memheap/base/base.h
Обычный файл
191
oshmem/mca/memheap/base/base.h
Обычный файл
@ -0,0 +1,191 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*/
|
||||
#ifndef MCA_MEMHEAP_BASE_H
|
||||
#define MCA_MEMHEAP_BASE_H
|
||||
|
||||
#include "oshmem_config.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/class/opal_value_array.h"
|
||||
#include "opal/mca/mca.h"
|
||||
|
||||
#include "oshmem/mca/memheap/memheap.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/*
|
||||
* Global functions for MCA: overall MEMHEAP open and close
|
||||
*/
|
||||
OSHMEM_DECLSPEC int mca_memheap_base_select(void);
|
||||
|
||||
/*
|
||||
* Globals
|
||||
*/
|
||||
OSHMEM_DECLSPEC extern struct mca_memheap_base_module_t* mca_memheap_base_module_initialized;
|
||||
|
||||
/* only used within base -- no need to DECLSPEC */
|
||||
#define MEMHEAP_BASE_MIN_ORDER 3 /* forces 64 bit alignment */
|
||||
#define MEMHEAP_BASE_PAGE_ORDER 21
|
||||
#define MEMHEAP_BASE_PRIVATE_SIZE (1ULL << MEMHEAP_BASE_PAGE_ORDER) /* should be at least the same as a huge page size */
|
||||
#define MEMHEAP_BASE_MIN_SIZE (1ULL << MEMHEAP_BASE_PAGE_ORDER) /* must fit into at least one huge page */
|
||||
|
||||
extern unsigned long long mca_memheap_base_start_address;
|
||||
extern char* mca_memheap_base_include;
|
||||
extern char* mca_memheap_base_exclude;
|
||||
extern int mca_memheap_base_already_opened;
|
||||
extern int mca_memheap_base_alloc_type;
|
||||
extern int mca_memheap_base_key_exchange;
|
||||
extern int mca_memheap_base_mr_interleave_factor;
|
||||
|
||||
#define MCA_MEMHEAP_MAX_SEGMENTS 256
|
||||
#define HEAP_SEG_INDEX 0
|
||||
#define SYMB_SEG_INDEX 1
|
||||
|
||||
#define MEMHEAP_SHM_INVALID (-1)
|
||||
|
||||
#define MEMHEAP_SHM_CODE( type, id ) ((((uint64_t)(type)) << 32) | ((uint32_t)(id)))
|
||||
#define MEMHEAP_SHM_GET_TYPE( x ) (((uint32_t)((x) >> 32)) & 0xFFFFFFFF)
|
||||
#define MEMHEAP_SHM_GET_ID( x ) ((uint32_t)((x) & 0xFFFFFFFF))
|
||||
|
||||
typedef enum {
|
||||
MAP_SEGMENT_STATIC = 0,
|
||||
MAP_SEGMENT_ALLOC_MMAP,
|
||||
MAP_SEGMENT_ALLOC_SHM,
|
||||
MAP_SEGMENT_ALLOC_IBV,
|
||||
MAP_SEGMENT_UNKNOWN
|
||||
} segment_type_t;
|
||||
|
||||
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
|
||||
#include <infiniband/verbs.h>
|
||||
|
||||
typedef struct openib_device_t {
|
||||
struct ibv_device **ib_devs;
|
||||
struct ibv_device *ib_dev;
|
||||
struct ibv_context *ib_dev_context;
|
||||
struct ibv_device_attr ib_dev_attr;
|
||||
struct ibv_pd *ib_pd;
|
||||
opal_value_array_t ib_mr_array;;
|
||||
struct ibv_mr *ib_mr_shared;
|
||||
} openib_device_t;
|
||||
#endif /* MPAGE_ENABLE */
|
||||
|
||||
typedef struct map_segment_t {
|
||||
mca_spml_mkey_t **mkeys_cache; /* includes remote segment bases in va_base */
|
||||
mca_spml_mkey_t *mkeys; /* includes local segment bases in va_base */
|
||||
int is_active; /* enable/disable flag */
|
||||
int shmid;
|
||||
|
||||
uint64_t start; /* base address of the segment */
|
||||
uint64_t end; /* final address of the segment */
|
||||
size_t size; /* length of the segment */
|
||||
|
||||
segment_type_t type; /* type of the segment */
|
||||
void *context; /* additional data related the segment */
|
||||
} map_segment_t;
|
||||
|
||||
typedef struct mca_memheap_map {
|
||||
map_segment_t mem_segs[MCA_MEMHEAP_MAX_SEGMENTS]; /* TODO: change into pointer array */
|
||||
int n_segments;
|
||||
int num_transports;
|
||||
} mca_memheap_map_t;
|
||||
|
||||
extern mca_memheap_map_t mca_memheap_base_map;
|
||||
|
||||
int mca_memheap_base_alloc_init(mca_memheap_map_t *, size_t);
|
||||
void mca_memheap_base_alloc_exit(mca_memheap_map_t *);
|
||||
int mca_memheap_base_static_init(mca_memheap_map_t *);
|
||||
void mca_memheap_base_static_exit(mca_memheap_map_t *);
|
||||
int mca_memheap_base_reg(mca_memheap_map_t *);
|
||||
int mca_memheap_base_dereg(mca_memheap_map_t *);
|
||||
int memheap_oob_init(mca_memheap_map_t *);
|
||||
void memheap_oob_destruct(void);
|
||||
|
||||
OSHMEM_DECLSPEC uint64_t mca_memheap_base_find_offset(int pe,
|
||||
int tr_id,
|
||||
unsigned long va,
|
||||
uint64_t rva);
|
||||
OSHMEM_DECLSPEC int mca_memheap_base_is_symmetric_addr(unsigned long va);
|
||||
OSHMEM_DECLSPEC mca_spml_mkey_t *mca_memheap_base_get_mkey(unsigned long va,
|
||||
int tr_id);
|
||||
OSHMEM_DECLSPEC mca_spml_mkey_t * mca_memheap_base_get_cached_mkey(int pe,
|
||||
unsigned long va,
|
||||
int btl_id,
|
||||
uint64_t *rva);
|
||||
OSHMEM_DECLSPEC void mca_memheap_modex_recv_all(void);
|
||||
|
||||
/* This function is for internal usage only
|
||||
* return value:
|
||||
* 0 - addr is not symmetric address
|
||||
* 1 - addr is part of user memheap
|
||||
* 2 - addr is part of private memheap
|
||||
* 3 - addr is static variable
|
||||
*/
|
||||
typedef enum {
|
||||
ADDR_INVALID = 0, ADDR_USER, ADDR_PRIVATE, ADDR_STATIC,
|
||||
} addr_type_t;
|
||||
|
||||
OSHMEM_DECLSPEC int mca_memheap_base_detect_addr_type(unsigned long va);
|
||||
|
||||
static inline unsigned memheap_log2(unsigned long long val)
|
||||
{
|
||||
/* add 1 if val is NOT a power of 2 (to do the ceil) */
|
||||
unsigned int count = (val & (val - 1) ? 1 : 0);
|
||||
|
||||
while (val > 0) {
|
||||
val = val >> 1;
|
||||
count++;
|
||||
}
|
||||
|
||||
return count > 0 ? count - 1 : 0;
|
||||
}
|
||||
|
||||
static inline void *memheap_down_align_addr(void* addr, unsigned int shift)
|
||||
{
|
||||
return (void*) (((intptr_t) addr) & (~(intptr_t) 0) << shift);
|
||||
}
|
||||
|
||||
static inline void *memheap_up_align_addr(void*addr, unsigned int shift)
|
||||
{
|
||||
return (void*) ((((intptr_t) addr) | ~((~(intptr_t) 0) << shift)));
|
||||
}
|
||||
|
||||
static inline unsigned long long memheap_align(unsigned long top)
|
||||
{
|
||||
return ((top + MEMHEAP_BASE_MIN_SIZE - 1) & ~(MEMHEAP_BASE_MIN_SIZE - 1));
|
||||
}
|
||||
|
||||
/*
|
||||
* MCA framework
|
||||
*/
|
||||
OSHMEM_DECLSPEC extern mca_base_framework_t oshmem_memheap_base_framework;
|
||||
|
||||
/* ******************************************************************** */
|
||||
#ifdef __BASE_FILE__
|
||||
#define __SPML_FILE__ __BASE_FILE__
|
||||
#else
|
||||
#define __SPML_FILE__ __FILE__
|
||||
#endif
|
||||
|
||||
#define MEMHEAP_VERBOSE(level, format, ...) \
|
||||
opal_output_verbose(level, oshmem_memheap_base_framework.framework_output, "%s:%d - %s() " format, \
|
||||
__SPML_FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
|
||||
|
||||
#define MEMHEAP_ERROR(format, ... ) \
|
||||
opal_output_verbose(0, oshmem_memheap_base_framework.framework_output, "Error: %s:%d - %s() " format, \
|
||||
__SPML_FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
|
||||
|
||||
#define MEMHEAP_WARN(format, ... ) \
|
||||
opal_output_verbose(0, oshmem_memheap_base_framework.framework_output, "Warning: %s:%d - %s() " format, \
|
||||
__SPML_FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* MCA_MEMHEAP_BASE_H */
|
23
oshmem/mca/memheap/base/help-shmem-mca.txt
Обычный файл
23
oshmem/mca/memheap/base/help-shmem-mca.txt
Обычный файл
@ -0,0 +1,23 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
# This is the US/English help file for Open SHMEM MCA error messages.
|
||||
#
|
||||
[find-available:none-found]
|
||||
No available %s components were found!
|
||||
|
||||
This means that there are no components of this type installed on your
|
||||
system or all the components reported that they could not be used.
|
||||
|
||||
This is a fatal error; your SHMEM process is likely to abort. Check the
|
||||
output of the "ompi_info" command and ensure that components of this
|
||||
type are available on your system. You may also wish to check the
|
||||
value of the "component_path" MCA parameter and ensure that it has at
|
||||
least one directory that contains valid MCA components.
|
553
oshmem/mca/memheap/base/memheap_base_alloc.c
Обычный файл
553
oshmem/mca/memheap/base/memheap_base_alloc.c
Обычный файл
@ -0,0 +1,553 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "oshmem_config.h"
|
||||
|
||||
#include "opal/util/output.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "oshmem/mca/memheap/memheap.h"
|
||||
#include "oshmem/mca/memheap/base/base.h"
|
||||
|
||||
#ifdef HAVE_SYS_MMAN_H
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#include <sys/ipc.h>
|
||||
#include <sys/shm.h>
|
||||
|
||||
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
|
||||
#include <infiniband/verbs.h>
|
||||
#endif /* MPAGE_ENABLE */
|
||||
|
||||
extern char* mca_memheap_base_param_hca_name;
|
||||
|
||||
static int __shm_attach(map_segment_t *, size_t, int, int);
|
||||
static void __shm_detach(map_segment_t *);
|
||||
|
||||
static int __mmap_attach(map_segment_t *, size_t);
|
||||
static void __mmap_detach(map_segment_t *);
|
||||
|
||||
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
|
||||
static int __ibv_attach(map_segment_t *, size_t);
|
||||
static void __ibv_detach(map_segment_t *);
|
||||
#endif /* MPAGE_ENABLE */
|
||||
|
||||
static int __adaptive_attach(map_segment_t *, size_t);
|
||||
|
||||
int mca_memheap_base_alloc_init(mca_memheap_map_t *map, size_t size)
|
||||
{
|
||||
int ret = OSHMEM_SUCCESS;
|
||||
int value = mca_memheap_base_alloc_type;
|
||||
|
||||
assert(map);
|
||||
assert(HEAP_SEG_INDEX == map->n_segments);
|
||||
|
||||
MEMHEAP_VERBOSE(5,
|
||||
"memheap method : %d",
|
||||
mca_memheap_base_alloc_type);
|
||||
|
||||
map_segment_t *s = &map->mem_segs[map->n_segments];
|
||||
memset(s, 0, sizeof(*s));
|
||||
s->is_active = 0;
|
||||
s->shmid = MEMHEAP_SHM_INVALID;
|
||||
s->start = 0;
|
||||
s->end = 0;
|
||||
s->size = 0;
|
||||
s->type = MAP_SEGMENT_UNKNOWN;
|
||||
s->context = NULL;
|
||||
|
||||
switch (value) {
|
||||
case 0:
|
||||
/* use sysv alloc without hugepages */
|
||||
ret = __shm_attach(s, size, 0, 1);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
ret = __shm_attach(s, size, 1, 1);
|
||||
if (OSHMEM_SUCCESS != ret)
|
||||
ret = __shm_attach(s, size, 0, 1);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
/* huge pages only */
|
||||
ret = __shm_attach(s, size, 1, 1);
|
||||
if (OSHMEM_SUCCESS != ret)
|
||||
MEMHEAP_ERROR("FAILED to allocated symmetric heap using hugepages fallback is disabled, errno=%d",
|
||||
errno);
|
||||
break;
|
||||
|
||||
case 3:
|
||||
/* huge pages only + cleanup shmid */
|
||||
ret = __shm_attach(s, size, 1, 0);
|
||||
if (OSHMEM_SUCCESS != ret)
|
||||
MEMHEAP_ERROR("FAILED to allocated symmetric heap using hugepages fallback is disabled, errno=%d",
|
||||
errno);
|
||||
break;
|
||||
|
||||
case 4:
|
||||
/* use sysv alloc without hugepages */
|
||||
ret = __shm_attach(s, size, 0, 0);
|
||||
break;
|
||||
|
||||
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
|
||||
case 5:
|
||||
/* use shared memory registration (mpages) */
|
||||
ret = __ibv_attach(s, size);
|
||||
if (OSHMEM_SUCCESS != ret)
|
||||
ret = __shm_attach(s, size, 0, 1);
|
||||
|
||||
break;
|
||||
#endif /* MPAGE_ENABLE */
|
||||
|
||||
case 100:
|
||||
/* use mmap. It will severaly impact performance of intra node communication */
|
||||
ret = __mmap_attach(s, size);
|
||||
MEMHEAP_VERBOSE(1,
|
||||
"mmap() memheap allocation will severely impact performance of intra node communication");
|
||||
break;
|
||||
|
||||
case 101:
|
||||
ret = __shm_attach(s, size, 1, 1);
|
||||
if (OSHMEM_SUCCESS != ret) {
|
||||
MEMHEAP_ERROR("Failed to allocate hugepages. Falling back on regular allocation");
|
||||
ret = __mmap_attach(s, size);
|
||||
} else {
|
||||
s->shmid = MEMHEAP_SHM_INVALID;
|
||||
}
|
||||
MEMHEAP_VERBOSE(1, "SM BTL will be always used for intranode comm\n");
|
||||
break;
|
||||
|
||||
case 102:
|
||||
ret = __shm_attach(s, size, 1, 1);
|
||||
if (OSHMEM_SUCCESS != ret) {
|
||||
MEMHEAP_ERROR("FAILED to allocated symmetric heap using hugepages fallback is disabled, errno=%d",
|
||||
errno);
|
||||
} else {
|
||||
s->shmid = MEMHEAP_SHM_INVALID;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
ret = __adaptive_attach(s, size);
|
||||
}
|
||||
|
||||
if (OSHMEM_SUCCESS == ret) {
|
||||
map->n_segments++;
|
||||
MEMHEAP_VERBOSE(1,
|
||||
"Memheap alloc memory: %llu byte(s), %d segments by method: %d",
|
||||
(unsigned long long)size, map->n_segments, s->type);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void mca_memheap_base_alloc_exit(mca_memheap_map_t *map)
|
||||
{
|
||||
if (map) {
|
||||
map_segment_t *s = &map->mem_segs[HEAP_SEG_INDEX];
|
||||
|
||||
assert(s);
|
||||
|
||||
switch (s->type) {
|
||||
case MAP_SEGMENT_ALLOC_SHM:
|
||||
__shm_detach(s);
|
||||
break;
|
||||
|
||||
case MAP_SEGMENT_ALLOC_MMAP:
|
||||
__mmap_detach(s);
|
||||
break;
|
||||
|
||||
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
|
||||
case MAP_SEGMENT_ALLOC_IBV:
|
||||
__ibv_detach(s);
|
||||
break;
|
||||
#endif /* MPAGE_ENABLE */
|
||||
|
||||
default:
|
||||
MEMHEAP_ERROR("Unknown segment type: %d", (int)s->type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int __adaptive_attach(map_segment_t *s, size_t size)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
|
||||
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
|
||||
rc = __ibv_attach(s, size);
|
||||
#endif /* MPAGE_ENABLE */
|
||||
|
||||
if (rc) {
|
||||
rc = __shm_attach(s, size, 1, 1);
|
||||
}
|
||||
|
||||
if (rc) {
|
||||
rc = __shm_attach(s, size, 0, 1);
|
||||
}
|
||||
|
||||
if (rc) {
|
||||
rc = __shm_attach(s, size, 0, 0);
|
||||
}
|
||||
|
||||
if (rc) {
|
||||
rc = __mmap_attach(s, size);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int __shm_attach(map_segment_t *s, size_t size, int use_hp, int do_rmid)
|
||||
{
|
||||
static int shm_context = 0;
|
||||
;
|
||||
void *addr = NULL;
|
||||
int shmid = MEMHEAP_SHM_INVALID;
|
||||
int flags;
|
||||
|
||||
assert(s);
|
||||
|
||||
shm_context = use_hp;
|
||||
|
||||
flags = IPC_CREAT | IPC_EXCL | SHM_R | SHM_W;
|
||||
#if defined (SHM_HUGETLB)
|
||||
flags |= (use_hp ? SHM_HUGETLB : 0);
|
||||
#endif
|
||||
|
||||
/* Create a new shared memory segment and save the shmid. */
|
||||
shmid = shmget(IPC_PRIVATE, size, flags);
|
||||
if (shmid == MEMHEAP_SHM_INVALID) {
|
||||
MEMHEAP_VERBOSE(1, "Failed to get shm segment (errno=%d)", errno);
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
/* Attach to the sement */
|
||||
addr = shmat(shmid, (void *) mca_memheap_base_start_address, 0);
|
||||
if (addr == (void *) -1L) {
|
||||
MEMHEAP_VERBOSE(1, "Failed to attach to shm segment (errno=%d)", errno);
|
||||
|
||||
shmctl(shmid, IPC_RMID, NULL );
|
||||
return OSHMEM_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
MEMHEAP_VERBOSE(5, "got shmid %d", shmid);
|
||||
|
||||
if (do_rmid)
|
||||
shmctl(shmid, IPC_RMID, NULL );
|
||||
|
||||
s->type = MAP_SEGMENT_ALLOC_SHM;
|
||||
s->shmid = shmid;
|
||||
s->start = (uintptr_t) addr;
|
||||
s->size = size;
|
||||
s->end = s->start + s->size;
|
||||
s->context = &shm_context;
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static void __shm_detach(map_segment_t *s)
|
||||
{
|
||||
assert(s);
|
||||
|
||||
if (s->shmid != MEMHEAP_SHM_INVALID) {
|
||||
shmctl(s->shmid, IPC_RMID, NULL );
|
||||
}
|
||||
|
||||
if (s->context && (*((int *) (s->context))) > 0) {
|
||||
/**
|
||||
* Workaround kernel panic when detaching huge pages from user space simultanously from several processes
|
||||
* dont detach here instead let kernel do it during process cleanup
|
||||
*/
|
||||
/* shmdt((void *)s->start); */
|
||||
}
|
||||
}
|
||||
|
||||
static int __mmap_attach(map_segment_t *s, size_t size)
|
||||
{
|
||||
void *addr = NULL;
|
||||
|
||||
assert(s);
|
||||
|
||||
addr = mmap((void *) mca_memheap_base_start_address,
|
||||
size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED |
|
||||
#if defined (__APPLE__)
|
||||
MAP_ANON
|
||||
#elif defined (__GNUC__)
|
||||
MAP_ANONYMOUS
|
||||
#endif
|
||||
| MAP_FIXED,
|
||||
0,
|
||||
0);
|
||||
|
||||
if (MAP_FAILED == addr) {
|
||||
MEMHEAP_ERROR("Failed to mmap() %llu bytes (errno=%d)",
|
||||
(unsigned long long)size, errno);
|
||||
return OSHMEM_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
s->type = MAP_SEGMENT_ALLOC_MMAP;
|
||||
s->shmid = MEMHEAP_SHM_INVALID;
|
||||
s->start = (uintptr_t) addr;
|
||||
s->size = size;
|
||||
s->end = s->start + s->size;
|
||||
s->context = NULL;
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static void __mmap_detach(map_segment_t *s)
|
||||
{
|
||||
assert(s);
|
||||
|
||||
munmap((void *) s->start, s->size);
|
||||
}
|
||||
|
||||
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
|
||||
|
||||
static int __ibv_attach(map_segment_t *s, size_t size)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
static openib_device_t memheap_device;
|
||||
openib_device_t *device = &memheap_device;
|
||||
int num_devs = 0;
|
||||
|
||||
assert(s);
|
||||
|
||||
memset(device, 0, sizeof(*device));
|
||||
|
||||
#ifdef HAVE_IBV_GET_DEVICE_LIST
|
||||
device->ib_devs = ibv_get_device_list(&num_devs);
|
||||
#else
|
||||
#error unsupported ibv_get_device_list in infiniband/verbs.h
|
||||
#endif
|
||||
|
||||
if (num_devs == 0 || !device->ib_devs)
|
||||
{
|
||||
rc = OSHMEM_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
/* Open device */
|
||||
if (!rc)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
if (num_devs > 1)
|
||||
{
|
||||
if (NULL == mca_memheap_base_param_hca_name)
|
||||
{
|
||||
MEMHEAP_VERBOSE(5, "found %d HCAs, choosing the first", num_devs);
|
||||
}
|
||||
else
|
||||
{
|
||||
MEMHEAP_VERBOSE(5, "found %d HCAs, searching for %s", num_devs, mca_memheap_base_param_hca_name);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < num_devs; i++)
|
||||
{
|
||||
device->ib_dev = device->ib_devs[i];
|
||||
|
||||
device->ib_dev_context = ibv_open_device(device->ib_dev);
|
||||
if (NULL == device->ib_dev_context)
|
||||
{
|
||||
MEMHEAP_ERROR("error obtaining device context for %s errno says %d: %s",
|
||||
ibv_get_device_name(device->ib_dev), errno, strerror(errno));
|
||||
rc = OSHMEM_ERR_RESOURCE_BUSY;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (NULL != mca_memheap_base_param_hca_name)
|
||||
{
|
||||
if (0 == strcmp(mca_memheap_base_param_hca_name,ibv_get_device_name(device->ib_dev)))
|
||||
{
|
||||
MEMHEAP_VERBOSE(5, "mca_memheap_base_param_hca_name = %s, selected %s as %d of %d", mca_memheap_base_param_hca_name, ibv_get_device_name(device->ib_dev), i, num_devs);
|
||||
rc = OSHMEM_SUCCESS;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
MEMHEAP_VERBOSE(5, "mca_memheap_base_param_hca_name = %s, selected %s as %d of %d", mca_memheap_base_param_hca_name, ibv_get_device_name(device->ib_dev), i, num_devs);
|
||||
rc = OSHMEM_SUCCESS;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Obtain device attributes */
|
||||
if (!rc)
|
||||
{
|
||||
if (ibv_query_device(device->ib_dev_context, &device->ib_dev_attr))
|
||||
{
|
||||
MEMHEAP_ERROR("error obtaining device attributes for %s errno says %d: %s",
|
||||
ibv_get_device_name(device->ib_dev), errno, strerror(errno));
|
||||
rc = OSHMEM_ERR_RESOURCE_BUSY;
|
||||
}
|
||||
else
|
||||
{
|
||||
MEMHEAP_VERBOSE(5, "ibv device %s",
|
||||
ibv_get_device_name(device->ib_dev));
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocate the protection domain for the device */
|
||||
if (!rc)
|
||||
{
|
||||
device->ib_pd = ibv_alloc_pd(device->ib_dev_context);
|
||||
if (NULL == device->ib_pd)
|
||||
{
|
||||
MEMHEAP_ERROR("error allocating protection domain for %s errno says %d: %s",
|
||||
ibv_get_device_name(device->ib_dev), errno, strerror(errno));
|
||||
rc = OSHMEM_ERR_RESOURCE_BUSY;
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocate memory */
|
||||
if (!rc)
|
||||
{
|
||||
void *addr = NULL;
|
||||
struct ibv_mr *ib_mr = NULL;
|
||||
int access_flag = IBV_ACCESS_LOCAL_WRITE |
|
||||
IBV_ACCESS_REMOTE_WRITE |
|
||||
IBV_ACCESS_REMOTE_READ;
|
||||
|
||||
OBJ_CONSTRUCT(&device->ib_mr_array, opal_value_array_t);
|
||||
opal_value_array_init(&device->ib_mr_array, sizeof(struct ibv_mr *));
|
||||
|
||||
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
|
||||
access_flag |= IBV_ACCESS_ALLOCATE_MR |
|
||||
IBV_ACCESS_SHARED_MR_USER_READ |
|
||||
IBV_ACCESS_SHARED_MR_USER_WRITE;
|
||||
#endif /* MPAGE_ENABLE */
|
||||
|
||||
ib_mr = ibv_reg_mr(device->ib_pd, addr, size, access_flag);
|
||||
if (NULL == ib_mr)
|
||||
{
|
||||
MEMHEAP_ERROR("error to ibv_reg_mr() %llu bytes errno says %d: %s",
|
||||
(unsigned long long)size, errno, strerror(errno));
|
||||
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
else
|
||||
{
|
||||
device->ib_mr_shared = ib_mr;
|
||||
opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
|
||||
}
|
||||
|
||||
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
|
||||
if (!rc)
|
||||
{
|
||||
access_flag = IBV_ACCESS_LOCAL_WRITE |
|
||||
IBV_ACCESS_REMOTE_WRITE |
|
||||
IBV_ACCESS_REMOTE_READ|
|
||||
IBV_ACCESS_NO_RDMA;
|
||||
|
||||
addr = (void *)mca_memheap_base_start_address;
|
||||
ib_mr = ibv_reg_shared_mr(device->ib_mr_shared->handle,
|
||||
device->ib_pd, addr, access_flag);
|
||||
if (NULL == ib_mr)
|
||||
{
|
||||
MEMHEAP_ERROR("error to ibv_reg_shared_mr() %llu bytes errno says %d: %s",
|
||||
(unsigned long long)size, errno, strerror(errno));
|
||||
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
else
|
||||
{
|
||||
opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
|
||||
}
|
||||
}
|
||||
#endif /* MPAGE_ENABLE */
|
||||
|
||||
if (!rc)
|
||||
{
|
||||
assert(size == device->ib_mr_shared->length);
|
||||
|
||||
s->type = MAP_SEGMENT_ALLOC_IBV;
|
||||
s->shmid = device->ib_mr_shared->handle;
|
||||
s->start = (intptr_t)ib_mr->addr;
|
||||
s->size = size;
|
||||
s->end = s->start + s->size;
|
||||
s->context = &memheap_device;
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void __ibv_detach(map_segment_t *s)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
openib_device_t *device = NULL;
|
||||
|
||||
assert(s);
|
||||
|
||||
device = (openib_device_t *)s->context;
|
||||
|
||||
if (device)
|
||||
{
|
||||
if(!rc && opal_value_array_get_size(&device->ib_mr_array))
|
||||
{
|
||||
struct ibv_mr** array;
|
||||
struct ibv_mr* ib_mr = NULL;
|
||||
array = OPAL_VALUE_ARRAY_GET_BASE(&device->ib_mr_array, struct ibv_mr *);
|
||||
while (opal_value_array_get_size(&device->ib_mr_array) > 0)
|
||||
{
|
||||
ib_mr = array[0];
|
||||
if(ibv_dereg_mr(ib_mr))
|
||||
{
|
||||
MEMHEAP_ERROR("error ibv_dereg_mr(): %d: %s", errno, strerror(errno));
|
||||
rc = OSHMEM_ERROR;
|
||||
}
|
||||
opal_value_array_remove_item(&device->ib_mr_array, 0);
|
||||
}
|
||||
|
||||
if(!rc && device->ib_mr_shared)
|
||||
{
|
||||
device->ib_mr_shared = NULL;
|
||||
}
|
||||
OBJ_DESTRUCT(&device->ib_mr_array);
|
||||
}
|
||||
|
||||
if(!rc && device->ib_pd)
|
||||
{
|
||||
if(ibv_dealloc_pd(device->ib_pd))
|
||||
{
|
||||
MEMHEAP_ERROR("error ibv_dealloc_pd(): %d: %s", errno, strerror(errno));
|
||||
rc = OSHMEM_ERROR;
|
||||
}
|
||||
else
|
||||
{
|
||||
device->ib_pd = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if(!rc && device->ib_dev_context)
|
||||
{
|
||||
if(ibv_close_device(device->ib_dev_context))
|
||||
{
|
||||
MEMHEAP_ERROR("error ibv_close_device(): %d: %s", errno, strerror(errno));
|
||||
rc = OSHMEM_ERROR;
|
||||
}
|
||||
else
|
||||
{
|
||||
device->ib_dev_context = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if(!rc && device->ib_devs)
|
||||
{
|
||||
ibv_free_device_list(device->ib_devs);
|
||||
device->ib_devs = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* MPAGE_ENABLE */
|
215
oshmem/mca/memheap/base/memheap_base_frame.c
Обычный файл
215
oshmem/mca/memheap/base/memheap_base_frame.c
Обычный файл
@ -0,0 +1,215 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "oshmem_config.h"
|
||||
#include <stdio.h>
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
|
||||
#include "oshmem/mca/memheap/memheap.h"
|
||||
#include "oshmem/mca/memheap/base/base.h"
|
||||
|
||||
/*
|
||||
* The following file was created by configure. It contains extern
|
||||
* statements and the definition of an array of pointers to each
|
||||
* component's public mca_base_component_t struct.
|
||||
*/
|
||||
|
||||
#include "oshmem/mca/memheap/base/static-components.h"
|
||||
|
||||
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
|
||||
int mca_memheap_base_alloc_type = 5;
|
||||
#else
|
||||
int mca_memheap_base_alloc_type = 1;
|
||||
#endif /* MPAGE_ENABLE */
|
||||
|
||||
unsigned long long int mca_memheap_base_start_address = 0xFF000000;
|
||||
int mca_memheap_base_output = -1;
|
||||
int mca_memheap_base_key_exchange = 1;
|
||||
int mca_memheap_base_mr_interleave_factor = 2;
|
||||
char* mca_memheap_base_include = NULL;
|
||||
char* mca_memheap_base_exclude = NULL;
|
||||
char* mca_memheap_base_param_hca_name = NULL;
|
||||
opal_list_t mca_memheap_base_components_opened;
|
||||
struct mca_memheap_base_module_t* mca_memheap_base_module_initialized = NULL;
|
||||
int mca_memheap_base_already_opened = 0;
|
||||
mca_memheap_map_t mca_memheap_base_map;
|
||||
|
||||
static int mca_memheap_base_register(mca_base_register_flag_t flags)
|
||||
{
|
||||
|
||||
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
|
||||
(void) mca_base_var_register("oshmem",
|
||||
"memheap",
|
||||
"base",
|
||||
"alloc_type",
|
||||
"0|1|2|5 - disabled huge pages, enabled huge pages with fallback to mmap(), do not fallback to mmap(), enabled mpages(default)",
|
||||
MCA_BASE_VAR_TYPE_INT,
|
||||
NULL,
|
||||
0,
|
||||
0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_memheap_base_alloc_type);
|
||||
#else
|
||||
(void) mca_base_var_register("oshmem",
|
||||
"memheap",
|
||||
"base",
|
||||
"alloc_type",
|
||||
"0|1|2 - disabled huge pages, enabled huge pages(default) with fallback to mmap(), do not fallback to mmap()",
|
||||
MCA_BASE_VAR_TYPE_INT,
|
||||
NULL,
|
||||
0,
|
||||
0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_memheap_base_alloc_type);
|
||||
#endif /* MPAGE_ENABLE */
|
||||
|
||||
(void) mca_base_var_register("oshmem",
|
||||
"memheap",
|
||||
"base",
|
||||
"start_address",
|
||||
"Specify base address for shared memory region",
|
||||
MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG,
|
||||
NULL,
|
||||
0,
|
||||
0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_memheap_base_start_address);
|
||||
|
||||
(void) mca_base_var_register("oshmem",
|
||||
"memheap",
|
||||
"base",
|
||||
"key_exchange",
|
||||
"0|1 - disabled, enabled(default) force memory keys exchange",
|
||||
MCA_BASE_VAR_TYPE_INT,
|
||||
NULL,
|
||||
0,
|
||||
0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_memheap_base_key_exchange);
|
||||
|
||||
(void) mca_base_var_register("oshmem",
|
||||
"memheap",
|
||||
"base",
|
||||
"mr_interleave_factor",
|
||||
"2 - default, try to give at least N Gbytes spaces between mapped memheaps of other pes that are local to me",
|
||||
MCA_BASE_VAR_TYPE_INT,
|
||||
NULL,
|
||||
0,
|
||||
0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_memheap_base_mr_interleave_factor);
|
||||
|
||||
(void) mca_base_var_register("oshmem",
|
||||
"memheap",
|
||||
"base",
|
||||
"include",
|
||||
"Specify a specific MEMHEAP implementation to use",
|
||||
MCA_BASE_VAR_TYPE_STRING,
|
||||
NULL,
|
||||
0,
|
||||
0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_memheap_base_include);
|
||||
|
||||
if (NULL == mca_memheap_base_include) {
|
||||
mca_memheap_base_include = getenv(SHMEM_HEAP_TYPE);
|
||||
if (NULL == mca_memheap_base_include)
|
||||
mca_memheap_base_include = strdup("");
|
||||
else
|
||||
mca_memheap_base_include = strdup(mca_memheap_base_include);
|
||||
}
|
||||
|
||||
(void) mca_base_var_register("oshmem",
|
||||
"memheap",
|
||||
"base",
|
||||
"exclude",
|
||||
"Specify excluded MEMHEAP implementations",
|
||||
MCA_BASE_VAR_TYPE_STRING,
|
||||
NULL,
|
||||
0,
|
||||
0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_memheap_base_exclude);
|
||||
|
||||
(void) mca_base_var_register("oshmem",
|
||||
"memheap",
|
||||
"base",
|
||||
"hca_name",
|
||||
"Specify excluded memheap implementations",
|
||||
MCA_BASE_VAR_TYPE_STRING,
|
||||
NULL,
|
||||
0,
|
||||
0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_memheap_base_param_hca_name);
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static int mca_memheap_base_close(void)
|
||||
{
|
||||
if (mca_memheap_base_already_opened <= 0) {
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
mca_memheap_base_already_opened--;
|
||||
if (mca_memheap_base_already_opened > 0) {
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
memheap_oob_destruct();
|
||||
|
||||
mca_memheap_base_dereg(&mca_memheap_base_map);
|
||||
|
||||
mca_memheap_base_alloc_exit(&mca_memheap_base_map);
|
||||
mca_memheap_base_static_exit(&mca_memheap_base_map);
|
||||
|
||||
/* Close all remaining available components */
|
||||
return mca_base_framework_components_close(&oshmem_memheap_base_framework, NULL);
|
||||
}
|
||||
|
||||
static int mca_memheap_base_open(mca_base_open_flag_t flags)
|
||||
{
|
||||
mca_memheap_base_already_opened = mca_memheap_base_already_opened + 1;
|
||||
if (mca_memheap_base_already_opened > 1) {
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
memset(&mca_memheap_base_map, 0, sizeof(mca_memheap_base_map));
|
||||
mca_memheap_base_map.n_segments = 0;
|
||||
mca_memheap_base_map.num_transports = 0;
|
||||
|
||||
/* Open up all available components */
|
||||
if (OPAL_SUCCESS !=
|
||||
mca_base_framework_components_open(&oshmem_memheap_base_framework, flags)) {
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
MCA_BASE_FRAMEWORK_DECLARE(oshmem, memheap,
|
||||
"OSHMEM MEMHEAP",
|
||||
mca_memheap_base_register,
|
||||
mca_memheap_base_open,
|
||||
mca_memheap_base_close,
|
||||
mca_memheap_base_static_components,
|
||||
0);
|
630
oshmem/mca/memheap/base/memheap_base_mkey.c
Обычный файл
630
oshmem/mca/memheap/base/memheap_base_mkey.c
Обычный файл
@ -0,0 +1,630 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "oshmem_config.h"
|
||||
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/dss/dss.h"
|
||||
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/rml/rml_types.h"
|
||||
#include "orte/mca/grpcomm/grpcomm.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "ompi/mca/bml/bml.h"
|
||||
#include "ompi/mca/dpm/dpm.h"
|
||||
|
||||
#include "oshmem/proc/proc.h"
|
||||
#include "oshmem/runtime/runtime.h"
|
||||
#include "oshmem/mca/memheap/memheap.h"
|
||||
#include "oshmem/mca/memheap/base/base.h"
|
||||
#include "oshmem/mca/spml/spml.h"
|
||||
|
||||
#ifdef HAVE_SYS_MMAN_H
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#include <sys/ipc.h>
|
||||
#include <sys/shm.h>
|
||||
|
||||
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
|
||||
#include <infiniband/verbs.h>
|
||||
#endif /* MPAGE_ENABLE */
|
||||
|
||||
/* Turn ON/OFF debug output from build (default 0) */
|
||||
#ifndef MEMHEAP_BASE_DEBUG
|
||||
#define MEMHEAP_BASE_DEBUG 0
|
||||
#endif
|
||||
|
||||
#define MEMHEAP_RKEY_REQ 0xA1
|
||||
#define MEMHEAP_RKEY_RESP 0xA2
|
||||
#define MEMHEAP_RKEY_RESP_FAIL 0xA3
|
||||
|
||||
struct oob_comm {
|
||||
opal_mutex_t lck;
|
||||
opal_condition_t cond;
|
||||
mca_spml_mkey_t *mkeys;
|
||||
int mkeys_rcvd;
|
||||
};
|
||||
|
||||
#define MEMHEAP_VERBOSE_FASTPATH(...)
|
||||
|
||||
static mca_memheap_map_t* memheap_map = NULL;
|
||||
|
||||
struct oob_comm memheap_oob;
|
||||
|
||||
/* pickup list of rkeys and remote va */
|
||||
static int memheap_oob_get_mkeys(int pe,
|
||||
uint32_t va_seg_num,
|
||||
mca_spml_mkey_t *mkey);
|
||||
|
||||
static inline unsigned long __seg2base_va(int seg)
|
||||
{
|
||||
return memheap_map->mem_segs[seg].start;
|
||||
}
|
||||
|
||||
static int __seg_cmp(const void *k, const void *v)
|
||||
{
|
||||
unsigned long va = (unsigned long) k;
|
||||
map_segment_t *s = (map_segment_t *) v;
|
||||
|
||||
if (va < s->start)
|
||||
return -1;
|
||||
if (va >= s->end)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline map_segment_t *__find_va(unsigned long va)
|
||||
{
|
||||
map_segment_t *s;
|
||||
|
||||
if (OPAL_LIKELY(va >= (unsigned long)memheap_map->mem_segs[HEAP_SEG_INDEX].start &&
|
||||
va < (unsigned long)memheap_map->mem_segs[HEAP_SEG_INDEX].end)) {
|
||||
s = &memheap_map->mem_segs[HEAP_SEG_INDEX];
|
||||
} else {
|
||||
s = bsearch((const void *) va,
|
||||
&memheap_map->mem_segs[SYMB_SEG_INDEX],
|
||||
memheap_map->n_segments - 1,
|
||||
sizeof(*s),
|
||||
__seg_cmp);
|
||||
}
|
||||
|
||||
#if MEMHEAP_BASE_DEBUG == 1
|
||||
if (s) {
|
||||
MEMHEAP_VERBOSE(5, "match seg#%02ld: 0x%llX - 0x%llX %llu bytes va=%p",
|
||||
s - memheap_map->mem_segs,
|
||||
(long long)s->start,
|
||||
(long long)s->end,
|
||||
(long long)(s->end - s->start),
|
||||
(void *)va);
|
||||
}
|
||||
#endif
|
||||
return s;
|
||||
}
|
||||
|
||||
static int do_mkey_req(opal_buffer_t *msg, int pe, int seg)
|
||||
{
|
||||
uint8_t msg_type;
|
||||
oshmem_proc_t *proc;
|
||||
int i, n, tr_id;
|
||||
mca_spml_mkey_t *mkey;
|
||||
|
||||
msg_type = MEMHEAP_RKEY_RESP;
|
||||
opal_dss.pack(msg, &msg_type, 1, OPAL_UINT8);
|
||||
|
||||
/* go over all transports to remote pe and pack mkeys */
|
||||
n = oshmem_get_transport_count(pe);
|
||||
proc = oshmem_proc_group_find(oshmem_group_all, pe);
|
||||
opal_dss.pack(msg, &n, 1, OPAL_UINT32);
|
||||
MEMHEAP_VERBOSE(5, "found %d transports to %d", n, pe);
|
||||
for (i = 0; i < n; i++) {
|
||||
tr_id = proc->transport_ids[i];
|
||||
|
||||
mkey = mca_memheap_base_get_mkey(__seg2base_va(seg), tr_id);
|
||||
if (!mkey) {
|
||||
MEMHEAP_ERROR("seg#%d tr_id: %d failed to find local mkey",
|
||||
seg, tr_id);
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
opal_dss.pack(msg, &tr_id, 1, OPAL_UINT32);
|
||||
opal_dss.pack(msg, &mkey->key, 1, OPAL_UINT64);
|
||||
opal_dss.pack(msg, &mkey->va_base, 1, OPAL_UINT64);
|
||||
|
||||
if (NULL != MCA_SPML_CALL(get_remote_context_size)) {
|
||||
uint32_t context_size =
|
||||
(mkey->spml_context == NULL ) ?
|
||||
0 :
|
||||
(uint32_t) MCA_SPML_CALL(get_remote_context_size(mkey->spml_context));
|
||||
opal_dss.pack(msg, &context_size, 1, OPAL_UINT32);
|
||||
if (0 != context_size) {
|
||||
opal_dss.pack(msg,
|
||||
MCA_SPML_CALL(get_remote_context(mkey->spml_context)),
|
||||
context_size,
|
||||
OPAL_BYTE);
|
||||
}
|
||||
}
|
||||
|
||||
MEMHEAP_VERBOSE(5,
|
||||
"seg#%d tr_id: %d key %llx base_va %llx",
|
||||
seg, tr_id, (unsigned long long)mkey->key, (unsigned long long)mkey->va_base);
|
||||
}
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static void memheap_attach_segment(mca_spml_mkey_t *mkey, int tr_id)
|
||||
{
|
||||
/* process special case when va was got using shmget(IPC_PRIVATE)
|
||||
* this case is notable for:
|
||||
* - key is set as (type|shmid);
|
||||
* - va_base is set as 0;
|
||||
*/
|
||||
if (!mkey->va_base
|
||||
&& ((int) MEMHEAP_SHM_GET_ID(mkey->key) != MEMHEAP_SHM_INVALID)) {
|
||||
MEMHEAP_VERBOSE(5,
|
||||
"shared memory usage tr_id: %d key %llx base_va %llx shmid 0x%X|0x%X",
|
||||
tr_id,
|
||||
(unsigned long long)mkey->key,
|
||||
(unsigned long long)mkey->va_base,
|
||||
MEMHEAP_SHM_GET_TYPE(mkey->key),
|
||||
MEMHEAP_SHM_GET_ID(mkey->key));
|
||||
|
||||
if (MEMHEAP_SHM_GET_TYPE(mkey->key) == MAP_SEGMENT_ALLOC_SHM) {
|
||||
mkey->va_base = (intptr_t) shmat(MEMHEAP_SHM_GET_ID(mkey->key),
|
||||
0,
|
||||
0);
|
||||
} else if (MEMHEAP_SHM_GET_TYPE(mkey->key) == MAP_SEGMENT_ALLOC_IBV) {
|
||||
#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
|
||||
openib_device_t *device = NULL;
|
||||
struct ibv_mr *ib_mr;
|
||||
void *addr;
|
||||
static int mr_count;
|
||||
|
||||
int access_flag = IBV_ACCESS_LOCAL_WRITE |
|
||||
IBV_ACCESS_REMOTE_WRITE |
|
||||
IBV_ACCESS_REMOTE_READ |
|
||||
IBV_ACCESS_NO_RDMA;
|
||||
|
||||
device = (openib_device_t *)memheap_map->mem_segs[HEAP_SEG_INDEX].context;
|
||||
assert(device);
|
||||
|
||||
/* workaround mtt problem - request aligned addresses */
|
||||
++mr_count;
|
||||
addr = (void *)(mca_memheap_base_start_address + mca_memheap_base_mr_interleave_factor*1024ULL*1024ULL*1024ULL*mr_count);
|
||||
ib_mr = ibv_reg_shared_mr(MEMHEAP_SHM_GET_ID(mkey->key),
|
||||
device->ib_pd, addr, access_flag);
|
||||
if (NULL == ib_mr)
|
||||
{
|
||||
mkey->va_base = -1;
|
||||
MEMHEAP_ERROR("error to ibv_reg_shared_mr() errno says %d: %s",
|
||||
errno, strerror(errno));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ib_mr->addr != addr) {
|
||||
MEMHEAP_WARN("Failed to map shared region to address %p got addr %p. Try to increase 'memheap_mr_interleave_factor' from %d", addr, ib_mr->addr, mca_memheap_base_mr_interleave_factor);
|
||||
}
|
||||
|
||||
opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
|
||||
mkey->va_base = (intptr_t)ib_mr->addr;
|
||||
}
|
||||
#endif /* MPAGE_ENABLE */
|
||||
} else {
|
||||
MEMHEAP_ERROR("tr_id: %d key %llx attach failed: incorrect shmid 0x%X|0x%X",
|
||||
tr_id,
|
||||
(unsigned long long)mkey->key,
|
||||
MEMHEAP_SHM_GET_TYPE(mkey->key),
|
||||
MEMHEAP_SHM_GET_ID(mkey->key));
|
||||
oshmem_shmem_abort(-1);
|
||||
}
|
||||
|
||||
if ((void *) -1 == (void *) mkey->va_base) {
|
||||
MEMHEAP_ERROR("tr_id: %d key %llx attach failed: errno = %d",
|
||||
tr_id, (unsigned long long)mkey->key, errno);
|
||||
oshmem_shmem_abort(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void do_mkey_resp(opal_buffer_t *msg)
|
||||
{
|
||||
int32_t cnt;
|
||||
int32_t n;
|
||||
int32_t tr_id;
|
||||
int i;
|
||||
|
||||
cnt = 1;
|
||||
opal_dss.unpack(msg, &n, &cnt, OPAL_UINT32);
|
||||
for (i = 0; i < n; i++) {
|
||||
opal_dss.unpack(msg, &tr_id, &cnt, OPAL_UINT32);
|
||||
opal_dss.unpack(msg, &memheap_oob.mkeys[tr_id].key, &cnt, OPAL_UINT64);
|
||||
opal_dss.unpack(msg,
|
||||
&memheap_oob.mkeys[tr_id].va_base,
|
||||
&cnt,
|
||||
OPAL_UINT64);
|
||||
|
||||
if (NULL != MCA_SPML_CALL(set_remote_context_size)) {
|
||||
int32_t context_size;
|
||||
opal_dss.unpack(msg, &context_size, &cnt, OPAL_UINT32);
|
||||
if (0 != context_size) {
|
||||
MCA_SPML_CALL(set_remote_context_size(&(memheap_oob.mkeys[tr_id].spml_context), context_size));
|
||||
void* context;
|
||||
context = calloc(1, context_size);
|
||||
opal_dss.unpack(msg, context, &context_size, OPAL_BYTE);
|
||||
MCA_SPML_CALL(set_remote_context(&(memheap_oob.mkeys[tr_id].spml_context),context));
|
||||
}
|
||||
}
|
||||
|
||||
memheap_attach_segment(&memheap_oob.mkeys[tr_id], tr_id);
|
||||
|
||||
MEMHEAP_VERBOSE(5,
|
||||
"tr_id: %d key %llx base_va %llx",
|
||||
tr_id, (unsigned long long)memheap_oob.mkeys[tr_id].key, (unsigned long long)memheap_oob.mkeys[tr_id].va_base);
|
||||
}
|
||||
}
|
||||
|
||||
static void memheap_buddy_rml_recv_cb(int status,
|
||||
orte_process_name_t* process_name,
|
||||
opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
void* cbdata)
|
||||
{
|
||||
MEMHEAP_VERBOSE(5,
|
||||
"**** get request from %u:%d",
|
||||
process_name->jobid, process_name->vpid);
|
||||
int32_t cnt = 1;
|
||||
int rc;
|
||||
opal_buffer_t *msg;
|
||||
uint8_t msg_type;
|
||||
uint32_t seg;
|
||||
|
||||
MEMHEAP_VERBOSE(5, "unpacking %d of %d", cnt, OPAL_UINT8);
|
||||
rc = opal_dss.unpack(buffer, &msg_type, &cnt, OPAL_UINT8);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto send_fail;
|
||||
}
|
||||
|
||||
switch (msg_type) {
|
||||
case MEMHEAP_RKEY_REQ:
|
||||
cnt = 1;
|
||||
rc = opal_dss.unpack(buffer, &seg, &cnt, OPAL_UINT32);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
MEMHEAP_ERROR("bad RKEY_REQ msg");
|
||||
goto send_fail;
|
||||
}
|
||||
|
||||
MEMHEAP_VERBOSE(5, "*** RKEY REQ");
|
||||
msg = OBJ_NEW(opal_buffer_t);
|
||||
if (!msg) {
|
||||
MEMHEAP_ERROR("failed to get msg buffer");
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
|
||||
if (OSHMEM_SUCCESS != do_mkey_req(msg, process_name->vpid, seg)) {
|
||||
OBJ_RELEASE(msg);
|
||||
goto send_fail;
|
||||
}
|
||||
|
||||
rc = orte_rml.send_buffer_nb(process_name, msg, OMPI_RML_TAG_SHMEM, orte_rml_send_callback, NULL);
|
||||
|
||||
if (0 > rc) {
|
||||
MEMHEAP_ERROR("FAILED to send rml message %d", rc);
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto send_fail;
|
||||
}
|
||||
break;
|
||||
|
||||
case MEMHEAP_RKEY_RESP:
|
||||
MEMHEAP_VERBOSE(5, "*** RKEY RESP");
|
||||
OPAL_THREAD_LOCK(&memheap_oob.lck);
|
||||
do_mkey_resp(buffer);
|
||||
memheap_oob.mkeys_rcvd = MEMHEAP_RKEY_RESP;
|
||||
opal_condition_broadcast(&memheap_oob.cond);
|
||||
OPAL_THREAD_UNLOCK(&memheap_oob.lck);
|
||||
break;
|
||||
|
||||
case MEMHEAP_RKEY_RESP_FAIL:
|
||||
MEMHEAP_VERBOSE(5, "*** RKEY RESP FAIL");
|
||||
memheap_oob.mkeys_rcvd = MEMHEAP_RKEY_RESP_FAIL;
|
||||
opal_condition_broadcast(&memheap_oob.cond);
|
||||
OPAL_THREAD_UNLOCK(&memheap_oob.lck);
|
||||
break;
|
||||
|
||||
default:
|
||||
MEMHEAP_VERBOSE(5, "Unknown message type %x", msg_type);
|
||||
goto send_fail;
|
||||
}
|
||||
return;
|
||||
|
||||
send_fail: msg = OBJ_NEW(opal_buffer_t);
|
||||
if (!msg) {
|
||||
MEMHEAP_ERROR("failed to get msg buffer");
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
msg_type = MEMHEAP_RKEY_RESP_FAIL;
|
||||
opal_dss.pack(msg, &msg_type, 1, OPAL_UINT8);
|
||||
|
||||
rc = orte_rml.send_buffer_nb(process_name, msg, OMPI_RML_TAG_SHMEM, orte_rml_send_callback, NULL);
|
||||
if (0 > rc) {
|
||||
MEMHEAP_ERROR("FAILED to send rml message %d", rc);
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int memheap_oob_init(mca_memheap_map_t *map)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
|
||||
memheap_map = map;
|
||||
|
||||
OBJ_CONSTRUCT(&memheap_oob.lck, opal_mutex_t);
|
||||
OBJ_CONSTRUCT(&memheap_oob.cond, opal_condition_t);
|
||||
|
||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
|
||||
OMPI_RML_TAG_SHMEM,
|
||||
ORTE_RML_PERSISTENT,
|
||||
memheap_buddy_rml_recv_cb,
|
||||
NULL );
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
void memheap_oob_destruct(void)
|
||||
{
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, OMPI_RML_TAG_SHMEM);
|
||||
OBJ_DESTRUCT(&memheap_oob.lck);
|
||||
OBJ_DESTRUCT(&memheap_oob.cond);
|
||||
}
|
||||
|
||||
static int memheap_oob_get_mkeys(int pe, uint32_t seg, mca_spml_mkey_t *mkeys)
|
||||
{
|
||||
orte_process_name_t name;
|
||||
opal_buffer_t *msg;
|
||||
int rc;
|
||||
uint8_t cmd;
|
||||
int i;
|
||||
|
||||
if (OSHMEM_SUCCESS == MCA_SPML_CALL(oob_get_mkeys(pe, seg, mkeys))) {
|
||||
for (i = 0; i < memheap_map->num_transports; i++) {
|
||||
mkeys[i].va_base = __seg2base_va(seg);
|
||||
MEMHEAP_VERBOSE(5,
|
||||
"MKEY CALCULATED BY LOCAL SPML: pe: %d tr_id: %d key %llx base_va %llx",
|
||||
pe,
|
||||
i,
|
||||
(unsigned long long)mkeys[i].key,
|
||||
(unsigned long long)mkeys[i].va_base);
|
||||
}
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&memheap_oob.lck);
|
||||
|
||||
memheap_oob.mkeys = mkeys;
|
||||
memheap_oob.mkeys_rcvd = 0;
|
||||
|
||||
name.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
name.vpid = pe;
|
||||
|
||||
msg = OBJ_NEW(opal_buffer_t);
|
||||
if (!msg) {
|
||||
OPAL_THREAD_UNLOCK(&memheap_oob.lck);
|
||||
MEMHEAP_ERROR("failed to get msg buffer");
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&memheap_oob.lck);
|
||||
cmd = MEMHEAP_RKEY_REQ;
|
||||
opal_dss.pack(msg, &cmd, 1, OPAL_UINT8);
|
||||
opal_dss.pack(msg, &seg, 1, OPAL_UINT32);
|
||||
rc = orte_rml.send_buffer_nb(&name, msg, OMPI_RML_TAG_SHMEM, orte_rml_send_callback, NULL);
|
||||
if (0 > rc) {
|
||||
OBJ_RELEASE(msg);
|
||||
OPAL_THREAD_UNLOCK(&memheap_oob.lck);
|
||||
MEMHEAP_ERROR("FAILED to send rml message %d", rc);
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
MEMHEAP_VERBOSE(5, "message sent: %d bytes!", rc);
|
||||
|
||||
while (!memheap_oob.mkeys_rcvd) {
|
||||
opal_condition_wait(&memheap_oob.cond, &memheap_oob.lck);
|
||||
}
|
||||
|
||||
if (MEMHEAP_RKEY_RESP == memheap_oob.mkeys_rcvd) {
|
||||
rc = OSHMEM_SUCCESS;
|
||||
} else {
|
||||
MEMHEAP_ERROR("failed to get rkey seg#%d pe=%d", seg, pe);
|
||||
rc = OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
OPAL_THREAD_UNLOCK(&memheap_oob.lck);
|
||||
return rc;
|
||||
}
|
||||
|
||||
void mca_memheap_modex_recv_all(void)
|
||||
{
|
||||
int i;
|
||||
int j;
|
||||
int nprocs, my_pe;
|
||||
oshmem_proc_t *proc;
|
||||
mca_spml_mkey_t *mkey;
|
||||
uint64_t dummy_rva;
|
||||
|
||||
if (!mca_memheap_base_key_exchange)
|
||||
return;
|
||||
|
||||
/* init rkey cache */
|
||||
nprocs = oshmem_num_procs();
|
||||
my_pe = oshmem_my_proc_id();
|
||||
|
||||
/* Note:
|
||||
* Doing exchange via rml till we figure out problem with grpcomm.modex and barrier
|
||||
*/
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
if (i == my_pe)
|
||||
continue;
|
||||
|
||||
proc = oshmem_proc_group_find(oshmem_group_all, i);
|
||||
for (j = 0; j < memheap_map->n_segments; j++) {
|
||||
mkey =
|
||||
mca_memheap_base_get_cached_mkey(i,
|
||||
memheap_map->mem_segs[j].start,
|
||||
proc->transport_ids[0],
|
||||
&dummy_rva);
|
||||
if (!mkey) {
|
||||
MEMHEAP_ERROR("Failed to receive mkeys");
|
||||
oshmem_shmem_abort(-1);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* There is an issue with orte_grpcomm.barrier usage as
|
||||
* ess/pmi directs to use grpcomm/pmi in case slurm srun() call grpcomm/pmi calls PMI_Barrier()
|
||||
* that is a function of external library.
|
||||
* There is no opal_progress() in such way. As a result slow PEs send a request (MEMHEAP_RKEY_REQ) to
|
||||
* fast PEs waiting on barrier and do not get a respond (MEMHEAP_RKEY_RESP).
|
||||
*
|
||||
* there are following ways to solve one:
|
||||
* 1. calculate requests from remote PEs and do ORTE_PROGRESSED_WAIT waiting for expected value;
|
||||
* 2. use shmem_barrier_all();
|
||||
* 3. rework pmi/barrier to use opal_progress();
|
||||
* 4. use orte_grpcomm.barrier carefully;
|
||||
*
|
||||
* It seems there is no need to use orte_grpcomm.barrier here
|
||||
*/
|
||||
|
||||
if (memheap_map->mem_segs[HEAP_SEG_INDEX].shmid != MEMHEAP_SHM_INVALID) {
|
||||
/* unfortunately we must do barrier here to assure that everyone are attached to our segment
|
||||
* good thing that this code path only invoked on older linuxes (-mca shmalloc_use_hugepages 3|4)
|
||||
* try to minimize damage here by waiting 5 seconds and doing progress
|
||||
*/
|
||||
shmem_barrier_all();
|
||||
/* keys exchanged, segments attached, now we can safely cleanup */
|
||||
if (memheap_map->mem_segs[HEAP_SEG_INDEX].type
|
||||
== MAP_SEGMENT_ALLOC_SHM) {
|
||||
shmctl(memheap_map->mem_segs[HEAP_SEG_INDEX].shmid,
|
||||
IPC_RMID,
|
||||
NULL );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline uint64_t va2rva(unsigned long va,
|
||||
uint64_t local_base,
|
||||
uint64_t remote_base)
|
||||
{
|
||||
return remote_base > local_base ? va + (remote_base - local_base) :
|
||||
va - (local_base - remote_base);
|
||||
}
|
||||
|
||||
mca_spml_mkey_t * mca_memheap_base_get_cached_mkey(int pe,
|
||||
unsigned long va,
|
||||
int btl_id,
|
||||
uint64_t *rva)
|
||||
{
|
||||
map_segment_t *s;
|
||||
int rc;
|
||||
mca_spml_mkey_t *mkey;
|
||||
|
||||
MEMHEAP_VERBOSE_FASTPATH(10, "rkey: pe=%d va=%p", pe, (void *)va);
|
||||
s = __find_va(va);
|
||||
if (NULL == s)
|
||||
return NULL ;
|
||||
|
||||
if (!s->is_active)
|
||||
return NULL ;
|
||||
|
||||
if (pe == oshmem_my_proc_id()) {
|
||||
*rva = va;
|
||||
MEMHEAP_VERBOSE_FASTPATH(10, "rkey: pe=%d va=%p -> (local) %lx %p", pe, (void *)va,
|
||||
s->mkeys[btl_id].key, (void *)*rva);
|
||||
return &s->mkeys[btl_id];
|
||||
}
|
||||
|
||||
if (OPAL_LIKELY(s->mkeys_cache[pe])) {
|
||||
mkey = &s->mkeys_cache[pe][btl_id];
|
||||
*rva = va2rva(va, s->start, mkey->va_base);
|
||||
MEMHEAP_VERBOSE_FASTPATH(10, "rkey: pe=%d va=%p -> (cached) %lx %p", pe, (void *)va, mkey->key, (void *)*rva);
|
||||
return mkey;
|
||||
}
|
||||
|
||||
s->mkeys_cache[pe] = (mca_spml_mkey_t *) calloc(memheap_map->num_transports,
|
||||
sizeof(mca_spml_mkey_t));
|
||||
if (!s->mkeys_cache[pe])
|
||||
return NULL ;
|
||||
|
||||
rc = memheap_oob_get_mkeys(pe,
|
||||
s - memheap_map->mem_segs,
|
||||
s->mkeys_cache[pe]);
|
||||
if (OSHMEM_SUCCESS != rc)
|
||||
return NULL ;
|
||||
|
||||
mkey = &s->mkeys_cache[pe][btl_id];
|
||||
*rva = va2rva(va, s->start, mkey->va_base);
|
||||
|
||||
MEMHEAP_VERBOSE_FASTPATH(5, "rkey: pe=%d va=%p -> (remote lookup) %lx %p", pe, (void *)va, mkey->key, (void *)*rva);
|
||||
return mkey;
|
||||
}
|
||||
|
||||
mca_spml_mkey_t *mca_memheap_base_get_mkey(unsigned long va, int tr_id)
|
||||
{
|
||||
map_segment_t *s;
|
||||
|
||||
s = __find_va(va);
|
||||
|
||||
return ((s && s->is_active) ? &s->mkeys[tr_id] : NULL );
|
||||
}
|
||||
|
||||
uint64_t mca_memheap_base_find_offset(int pe,
|
||||
int tr_id,
|
||||
unsigned long va,
|
||||
uint64_t rva)
|
||||
{
|
||||
map_segment_t *s;
|
||||
|
||||
s = __find_va(va);
|
||||
|
||||
return ((s && s->is_active) ? (rva - s->mkeys_cache[pe][tr_id].va_base) : 0);
|
||||
}
|
||||
|
||||
int mca_memheap_base_is_symmetric_addr(unsigned long va)
|
||||
{
|
||||
return (__find_va(va) ? 1 : 0);
|
||||
}
|
||||
|
||||
int mca_memheap_base_detect_addr_type(unsigned long va)
|
||||
{
|
||||
int addr_type = ADDR_INVALID;
|
||||
map_segment_t *s;
|
||||
|
||||
s = __find_va(va);
|
||||
|
||||
if (s) {
|
||||
if (s->type == MAP_SEGMENT_STATIC) {
|
||||
addr_type = ADDR_STATIC;
|
||||
} else if (va >= (unsigned long) s->start
|
||||
&& va < (unsigned long) (s->start + mca_memheap.memheap_size)) {
|
||||
addr_type = ADDR_USER;
|
||||
} else {
|
||||
assert( va >= (unsigned long)(s->start + mca_memheap.memheap_size) && va < (unsigned long)s->end);
|
||||
addr_type = ADDR_PRIVATE;
|
||||
}
|
||||
}
|
||||
|
||||
return addr_type;
|
||||
}
|
131
oshmem/mca/memheap/base/memheap_base_register.c
Обычный файл
131
oshmem/mca/memheap/base/memheap_base_register.c
Обычный файл
@ -0,0 +1,131 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
#include "oshmem_config.h"
|
||||
|
||||
#include "oshmem/proc/proc.h"
|
||||
#include "oshmem/mca/memheap/memheap.h"
|
||||
#include "oshmem/mca/memheap/base/base.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
static int __dereg_segment(map_segment_t *s);
|
||||
static int __reg_segment(map_segment_t *s, int *num_btl);
|
||||
|
||||
int mca_memheap_base_reg(mca_memheap_map_t *memheap_map)
|
||||
{
|
||||
int ret = OSHMEM_SUCCESS;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < memheap_map->n_segments; i++) {
|
||||
map_segment_t *s = &memheap_map->mem_segs[i];
|
||||
|
||||
MEMHEAP_VERBOSE(5,
|
||||
"register seg#%02d: 0x%llX - 0x%llX %llu bytes type=0x%X id=0x%X",
|
||||
i,
|
||||
(long long)s->start,
|
||||
(long long)s->end,
|
||||
(long long)(s->end - s->start),
|
||||
s->type,
|
||||
s->shmid);
|
||||
ret = __reg_segment(s, &memheap_map->num_transports);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int mca_memheap_base_dereg(mca_memheap_map_t *memheap_map)
|
||||
{
|
||||
int ret = OSHMEM_SUCCESS;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < memheap_map->n_segments; i++) {
|
||||
map_segment_t *s = &memheap_map->mem_segs[i];
|
||||
|
||||
if (!s->is_active)
|
||||
continue;
|
||||
|
||||
MEMHEAP_VERBOSE(5,
|
||||
"deregistering segment#%d: %llx - %llx %llu bytes",
|
||||
i,
|
||||
(long long)s->start,
|
||||
(long long)s->end,
|
||||
(long long)(s->end - s->start));
|
||||
ret = __dereg_segment(s);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __dereg_segment(map_segment_t *s)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
int j;
|
||||
int nprocs, my_pe;
|
||||
|
||||
nprocs = oshmem_num_procs();
|
||||
my_pe = oshmem_my_proc_id();
|
||||
|
||||
MCA_SPML_CALL(deregister(s->mkeys));
|
||||
|
||||
if (s->mkeys_cache) {
|
||||
for (j = 0; j < nprocs; j++) {
|
||||
if (j == my_pe)
|
||||
continue;
|
||||
if (s->mkeys_cache[j]) {
|
||||
free(s->mkeys_cache[j]);
|
||||
s->mkeys_cache[j] = NULL;
|
||||
}
|
||||
}
|
||||
free(s->mkeys_cache);
|
||||
s->mkeys_cache = NULL;
|
||||
}
|
||||
|
||||
s->is_active = 0;
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int __reg_segment(map_segment_t *s, int *num_btl)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
int my_pe;
|
||||
int nprocs;
|
||||
|
||||
nprocs = oshmem_num_procs();
|
||||
my_pe = oshmem_my_proc_id();
|
||||
|
||||
s->mkeys_cache = (mca_spml_mkey_t **) calloc(nprocs,
|
||||
sizeof(mca_spml_mkey_t *));
|
||||
if (NULL == s->mkeys_cache) {
|
||||
MEMHEAP_ERROR("Failed to allocate memory for remote segments");
|
||||
rc = OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
if (!rc) {
|
||||
s->mkeys = MCA_SPML_CALL(register((void *)(unsigned long)s->start,
|
||||
s->end - s->start,
|
||||
MEMHEAP_SHM_CODE(s->type, s->shmid),
|
||||
num_btl));
|
||||
if (NULL == s->mkeys) {
|
||||
free(s->mkeys_cache);
|
||||
s->mkeys_cache = NULL;
|
||||
|
||||
MEMHEAP_ERROR("Failed to register segment");
|
||||
rc = OSHMEM_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
if (OSHMEM_SUCCESS == rc) {
|
||||
s->mkeys_cache[my_pe] = s->mkeys;
|
||||
s->is_active = 1;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
251
oshmem/mca/memheap/base/memheap_base_select.c
Обычный файл
251
oshmem/mca/memheap/base/memheap_base_select.c
Обычный файл
@ -0,0 +1,251 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "oshmem_config.h"
|
||||
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_component_repository.h"
|
||||
#include "oshmem/mca/memheap/memheap.h"
|
||||
#include "oshmem/mca/memheap/base/base.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "opal/runtime/opal.h"
|
||||
|
||||
mca_memheap_base_module_t mca_memheap;
|
||||
|
||||
/**
|
||||
* Function for weeding out memheap components that shouldn't be executed.
|
||||
* Implementation inspired by btl/base.
|
||||
*
|
||||
* Call the init function on all available components to find out if
|
||||
* they want to run. Select all components that don't fail. Failing
|
||||
* components will be closed and unloaded. The selected modules will
|
||||
* be pointed to by mca_memheap_base_module_t.
|
||||
*/
|
||||
|
||||
static memheap_context_t* __memheap_create(void);
|
||||
|
||||
/**
|
||||
* Choose to init one component with the highest priority.
|
||||
* If the include list if it is not empty choose a component that appear in the list.
|
||||
* O/W choose the highest priority component not in the exclude list.
|
||||
* Include and exclude lists may be given in the shmem launcher command line.
|
||||
*/
|
||||
int mca_memheap_base_select()
|
||||
{
|
||||
int priority = 0;
|
||||
int max_priority = 0;
|
||||
mca_base_component_list_item_t *cli, *next;
|
||||
mca_memheap_base_component_t *component = NULL;
|
||||
mca_memheap_base_component_t *max_priority_component = NULL;
|
||||
mca_memheap_base_module_t *module = NULL;
|
||||
memheap_context_t *context = NULL;
|
||||
|
||||
char** include = opal_argv_split(mca_memheap_base_include, ',');
|
||||
char** exclude = opal_argv_split(mca_memheap_base_exclude, ',');
|
||||
|
||||
context = __memheap_create();
|
||||
if (!context) {
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
OPAL_LIST_FOREACH_SAFE(cli, next, &oshmem_memheap_base_framework.framework_components, mca_base_component_list_item_t) {
|
||||
component = (mca_memheap_base_component_t *) cli->cli_component;
|
||||
|
||||
/* Verify if the component is in the include or the exclude list. */
|
||||
/* If there is an include list - item must be in the list to be included */
|
||||
if (NULL != include) {
|
||||
char** argv = include;
|
||||
bool found = false;
|
||||
while (argv && *argv) {
|
||||
if (strcmp(component->memheap_version.mca_component_name, *argv)
|
||||
== 0) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
argv++;
|
||||
}
|
||||
/* If not in the list do not choose this component */
|
||||
if (found == false) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Otherwise - check the exclude list to see if this item has been specifically excluded */
|
||||
} else if (NULL != exclude) {
|
||||
char** argv = exclude;
|
||||
bool found = false;
|
||||
while (argv && *argv) {
|
||||
if (strcmp(component->memheap_version.mca_component_name, *argv)
|
||||
== 0) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
argv++;
|
||||
}
|
||||
if (found == true) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* Verify that the component has an init function */
|
||||
if (NULL == component->memheap_init) {
|
||||
MEMHEAP_VERBOSE(10,
|
||||
"select: no init function; for component %s. No component selected",
|
||||
component->memheap_version.mca_component_name);
|
||||
} else {
|
||||
|
||||
MEMHEAP_VERBOSE(5,
|
||||
"select: component %s size : user %d private: %d",
|
||||
component->memheap_version.mca_component_name, (int)context->user_size, (int)context->private_size);
|
||||
|
||||
/* Init the component in order to get its priority */
|
||||
module = component->memheap_init(context, &priority);
|
||||
|
||||
/* If the component didn't initialize, remove it from the opened list, remove it from the component repository and return an error */
|
||||
if (NULL == module) {
|
||||
MEMHEAP_VERBOSE(10,
|
||||
"select: init of component %s returned failure",
|
||||
component->memheap_version.mca_component_name);
|
||||
|
||||
opal_list_remove_item(&oshmem_memheap_base_framework.framework_components, &cli->super);
|
||||
mca_base_component_close((mca_base_component_t *) component,
|
||||
oshmem_memheap_base_framework.framework_output);
|
||||
}
|
||||
/* Calculate memheap size in case it was not set during component initialization */
|
||||
module->memheap_size = context->user_size;
|
||||
}
|
||||
|
||||
/* Init max priority component */
|
||||
if (NULL == max_priority_component) {
|
||||
max_priority_component = component;
|
||||
mca_memheap_base_module_initialized = module;
|
||||
max_priority = priority;
|
||||
}
|
||||
|
||||
/* Update max priority component if current component has greater priority */
|
||||
if (priority > max_priority) {
|
||||
max_priority = priority;
|
||||
max_priority_component = component;
|
||||
mca_memheap_base_module_initialized = module;
|
||||
}
|
||||
}
|
||||
|
||||
opal_argv_free(include);
|
||||
opal_argv_free(exclude);
|
||||
|
||||
/* Verify that a component was selected */
|
||||
if (NULL == max_priority_component) {
|
||||
MEMHEAP_VERBOSE(10, "select: no component selected");
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
/* Verify that some module was initialized */
|
||||
if (NULL == mca_memheap_base_module_initialized) {
|
||||
orte_show_help("help-shmem-mca.txt",
|
||||
"find-available:none-found",
|
||||
true,
|
||||
"memheap");
|
||||
orte_errmgr.abort(1, NULL );
|
||||
}
|
||||
|
||||
MEMHEAP_VERBOSE(10,
|
||||
"SELECTED %s component %s",
|
||||
max_priority_component->memheap_version.mca_type_name, max_priority_component->memheap_version.mca_component_name);
|
||||
|
||||
setenv(SHMEM_HEAP_TYPE,
|
||||
max_priority_component->memheap_version.mca_component_name,
|
||||
1);
|
||||
|
||||
mca_memheap = *mca_memheap_base_module_initialized;
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static size_t memheap_size(void)
|
||||
{
|
||||
char *p;
|
||||
unsigned long long factor;
|
||||
int idx;
|
||||
unsigned long long size;
|
||||
|
||||
p = getenv(SHMEM_HEAP_SIZE);
|
||||
if (!p)
|
||||
return SIZE_IN_MEGA_BYTES(DEFAULT_SYMMETRIC_HEAP_SIZE);
|
||||
|
||||
idx = strlen(p) - 1;
|
||||
if (p[idx] == 'k' || p[idx] == 'K') {
|
||||
factor = 1024;
|
||||
} else if (p[idx] == 'm' || p[idx] == 'M') {
|
||||
factor = 1024 * 1024;
|
||||
} else if (p[idx] == 'g' || p[idx] == 'G') {
|
||||
factor = 1024 * 1024 * 1024;
|
||||
} else if (p[idx] == 't' || p[idx] == 'T') {
|
||||
factor = 1024UL * 1024UL * 1024UL * 1024UL;
|
||||
} else
|
||||
factor = 1;
|
||||
|
||||
size = atoll(p);
|
||||
if (size == 0) {
|
||||
MEMHEAP_ERROR("Incorrect symmetric heap size %s. Using default heap size %d M\n",
|
||||
p, DEFAULT_SYMMETRIC_HEAP_SIZE);
|
||||
return SIZE_IN_MEGA_BYTES(DEFAULT_SYMMETRIC_HEAP_SIZE);
|
||||
}
|
||||
return (size_t) memheap_align(size * factor);
|
||||
}
|
||||
|
||||
static memheap_context_t* __memheap_create(void)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
static memheap_context_t context;
|
||||
size_t user_size;
|
||||
|
||||
user_size = memheap_size();
|
||||
if (user_size < MEMHEAP_BASE_MIN_SIZE) {
|
||||
MEMHEAP_ERROR("Requested memheap size is less than minimal meamheap size (%llu < %llu)",
|
||||
(unsigned long long)user_size, MEMHEAP_BASE_MIN_SIZE);
|
||||
return NULL ;
|
||||
}
|
||||
/* Inititialize symmetric area */
|
||||
if (OSHMEM_SUCCESS == rc) {
|
||||
rc = mca_memheap_base_alloc_init(&mca_memheap_base_map,
|
||||
user_size + MEMHEAP_BASE_PRIVATE_SIZE);
|
||||
}
|
||||
|
||||
/* Inititialize static/global variables area */
|
||||
if (OSHMEM_SUCCESS == rc) {
|
||||
rc = mca_memheap_base_static_init(&mca_memheap_base_map);
|
||||
}
|
||||
|
||||
/* Memory Registration */
|
||||
if (OSHMEM_SUCCESS == rc) {
|
||||
rc = mca_memheap_base_reg(&mca_memheap_base_map);
|
||||
}
|
||||
|
||||
/* Init OOB channel */
|
||||
if (OSHMEM_SUCCESS == rc) {
|
||||
rc = memheap_oob_init(&mca_memheap_base_map);
|
||||
}
|
||||
|
||||
if (OSHMEM_SUCCESS == rc) {
|
||||
context.user_size = user_size;
|
||||
context.private_size = MEMHEAP_BASE_PRIVATE_SIZE;
|
||||
context.user_base_addr =
|
||||
(void*) ((unsigned char*) mca_memheap_base_map.mem_segs[HEAP_SEG_INDEX].start
|
||||
+ 0);
|
||||
context.private_base_addr =
|
||||
(void*) ((unsigned char*) mca_memheap_base_map.mem_segs[HEAP_SEG_INDEX].start
|
||||
+ context.user_size);
|
||||
}
|
||||
|
||||
return ((OSHMEM_SUCCESS == rc) ? &context : NULL );
|
||||
}
|
217
oshmem/mca/memheap/base/memheap_base_static.c
Обычный файл
217
oshmem/mca/memheap/base/memheap_base_static.c
Обычный файл
@ -0,0 +1,217 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
#include "oshmem_config.h"
|
||||
|
||||
#include "oshmem/proc/proc.h"
|
||||
#include "oshmem/mca/memheap/memheap.h"
|
||||
#include "oshmem/mca/memheap/base/base.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
struct map_segment_desc {
|
||||
uint64_t start;
|
||||
uint64_t end;
|
||||
char perms[8];
|
||||
uint64_t offset;
|
||||
char dev[8];
|
||||
uint64_t inode;
|
||||
char pathname[MAXPATHLEN];
|
||||
};
|
||||
|
||||
typedef struct memheap_static_context {
|
||||
struct {
|
||||
uint64_t start;
|
||||
uint64_t end;
|
||||
} mem_segs[MCA_MEMHEAP_MAX_SEGMENTS];
|
||||
int n_segments;
|
||||
} memheap_static_context_t;
|
||||
|
||||
static memheap_static_context_t memheap_context;
|
||||
|
||||
static int __load_segments(void);
|
||||
static int __check_perms(struct map_segment_desc *seg);
|
||||
static int __check_address(struct map_segment_desc *seg);
|
||||
static int __check_pathname(struct map_segment_desc *seg);
|
||||
|
||||
int mca_memheap_base_static_init(mca_memheap_map_t *map)
|
||||
{
|
||||
/* read and parse segments from /proc/self/maps */
|
||||
int ret = OSHMEM_SUCCESS;
|
||||
|
||||
assert(map);
|
||||
assert(SYMB_SEG_INDEX <= map->n_segments);
|
||||
|
||||
ret = __load_segments();
|
||||
|
||||
if (OSHMEM_SUCCESS == ret) {
|
||||
int i;
|
||||
size_t total_mem;
|
||||
|
||||
for (i = 0, total_mem = 0; i < memheap_context.n_segments; i++) {
|
||||
map_segment_t *s = &map->mem_segs[map->n_segments];
|
||||
|
||||
memset(s, 0, sizeof(*s));
|
||||
s->is_active = 0;
|
||||
s->shmid = MEMHEAP_SHM_INVALID;
|
||||
s->start = memheap_context.mem_segs[i].start;
|
||||
s->end = memheap_context.mem_segs[i].end;
|
||||
s->size = s->end - s->start;
|
||||
s->type = MAP_SEGMENT_STATIC;
|
||||
s->context = NULL;
|
||||
map->n_segments++;
|
||||
|
||||
total_mem += s->end - s->start;
|
||||
}
|
||||
MEMHEAP_VERBOSE(1,
|
||||
"Memheap static memory: %llu byte(s), %d segments",
|
||||
(unsigned long long)total_mem, map->n_segments);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void mca_memheap_base_static_exit(mca_memheap_map_t *map)
|
||||
{
|
||||
assert(map);
|
||||
}
|
||||
|
||||
static int __check_perms(struct map_segment_desc *seg)
|
||||
{
|
||||
if (!strcmp(seg->perms, "rw-p") || !strcmp(seg->perms, "rwxp"))
|
||||
return OSHMEM_SUCCESS;
|
||||
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
static int __check_address(struct map_segment_desc *seg)
|
||||
{
|
||||
extern unsigned _end;
|
||||
unsigned long data_end = (unsigned long) &_end;
|
||||
|
||||
/**
|
||||
* SGI shmem only supports globals&static in main program.
|
||||
* It does not support them in shared objects or in dlopen()
|
||||
* (Clarified on PGAS 2011 tutorial)
|
||||
*
|
||||
* So ignored any maps that start higher then process _end
|
||||
* FIXME: make sure we do not register symmetric heap twice
|
||||
* if we decide to allow shared objects
|
||||
*/
|
||||
if (seg->start > data_end) {
|
||||
MEMHEAP_VERBOSE(100,
|
||||
"skip segment: data _end < segment start (%llx < %llx)",
|
||||
(unsigned long long)data_end, (unsigned long long)seg->start);
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static int __check_pathname(struct map_segment_desc *seg)
|
||||
{
|
||||
/* Probably we need to check found path but
|
||||
* To press check coverity issue following code is disabled
|
||||
*/
|
||||
#if 0
|
||||
char *p;
|
||||
if ('\0' == seg->pathname[0])
|
||||
return OSHMEM_SUCCESS;
|
||||
|
||||
if (0 == strncmp(seg->pathname, "/lib", 4))
|
||||
return OSHMEM_ERROR;
|
||||
|
||||
if (0 == strncmp(seg->pathname, "/usr/lib", 8))
|
||||
return OSHMEM_ERROR;
|
||||
|
||||
if (0 == strncmp(seg->pathname, "/dev", 4))
|
||||
return OSHMEM_ERROR;
|
||||
|
||||
if (0 == strcmp(seg->pathname, "[stack]"))
|
||||
return OSHMEM_ERROR;
|
||||
|
||||
if (0 == strcmp(seg->pathname, "[vdso]"))
|
||||
return OSHMEM_ERROR;
|
||||
|
||||
if (0 == strcmp(seg->pathname, "[vsyscall]"))
|
||||
return OSHMEM_ERROR;
|
||||
|
||||
p = rindex(seg->pathname, '/');
|
||||
if (p) {
|
||||
if (0 == strncmp(p+1, "libshmem.so", 11))
|
||||
return OSHMEM_ERROR;
|
||||
|
||||
if (0 == strncmp(p+1, "libmpi.so", 9))
|
||||
return OSHMEM_ERROR;
|
||||
|
||||
if (0 == strncmp(p+1, "libmca_common_sm.so", 19))
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
#endif
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static int __load_segments(void)
|
||||
{
|
||||
FILE *fp;
|
||||
char line[1024];
|
||||
struct map_segment_desc seg;
|
||||
|
||||
memheap_context.n_segments = 0;
|
||||
|
||||
fp = fopen("/proc/self/maps", "r");
|
||||
if (NULL == fp) {
|
||||
MEMHEAP_ERROR("Failed to open /proc/self/maps");
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
while (NULL != fgets(line, sizeof(line), fp)) {
|
||||
memset(&seg, 0, sizeof(seg));
|
||||
sscanf(line,
|
||||
"%llx-%llx %s %llx %s %llx %s",
|
||||
(long long *) &seg.start,
|
||||
(long long *) &seg.end,
|
||||
seg.perms,
|
||||
(long long *) &seg.offset,
|
||||
seg.dev,
|
||||
(long long *) &seg.inode,
|
||||
seg.pathname);
|
||||
|
||||
if (OSHMEM_ERROR == __check_address(&seg))
|
||||
continue;
|
||||
|
||||
if (OSHMEM_ERROR == __check_pathname(&seg))
|
||||
continue;
|
||||
|
||||
if (OSHMEM_ERROR == __check_perms(&seg))
|
||||
continue;
|
||||
|
||||
MEMHEAP_VERBOSE(5, "add: %s", line);
|
||||
if (MCA_MEMHEAP_MAX_SEGMENTS <= memheap_context.n_segments) {
|
||||
MEMHEAP_ERROR("too many segments (max = %d): skip %s",
|
||||
MCA_MEMHEAP_MAX_SEGMENTS, line);
|
||||
continue;
|
||||
}
|
||||
if (memheap_context.n_segments > 0
|
||||
&& seg.start
|
||||
== memheap_context.mem_segs[memheap_context.n_segments
|
||||
- 1].end) {
|
||||
MEMHEAP_VERBOSE(5, "Coalescing segment");
|
||||
memheap_context.mem_segs[memheap_context.n_segments - 1].end =
|
||||
seg.end;
|
||||
} else {
|
||||
memheap_context.mem_segs[memheap_context.n_segments].start =
|
||||
seg.start;
|
||||
memheap_context.mem_segs[memheap_context.n_segments].end = seg.end;
|
||||
memheap_context.n_segments++;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
41
oshmem/mca/memheap/buddy/Makefile.am
Обычный файл
41
oshmem/mca/memheap/buddy/Makefile.am
Обычный файл
@ -0,0 +1,41 @@
|
||||
#
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
EXTRA_DIST =
|
||||
|
||||
AM_CFLAGS = $(OSHMEM_CFLAGS)
|
||||
|
||||
buddy_sources = \
|
||||
memheap_buddy.c \
|
||||
memheap_buddy.h \
|
||||
memheap_buddy_component.c \
|
||||
memheap_buddy_component.h
|
||||
|
||||
#if OMPI_BUILD_memheap_buddy_DSO
|
||||
if MCA_BUILD_ompi_pml_ob1_DSO
|
||||
component_noinst =
|
||||
component_install = mca_memheap_buddy.la
|
||||
else
|
||||
component_noinst = libmca_memheap_buddy.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_memheap_buddy_la_SOURCES = $(buddy_sources)
|
||||
mca_memheap_buddy_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
#noinst_LTLIBRARIES = $(lib)
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_memheap_buddy_la_SOURCES = $(buddy_sources)
|
||||
libmca_memheap_buddy_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
|
||||
|
13
oshmem/mca/memheap/buddy/configure.params
Обычный файл
13
oshmem/mca/memheap/buddy/configure.params
Обычный файл
@ -0,0 +1,13 @@
|
||||
# -*- shell-script -*-
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# Specific to this module
|
||||
|
||||
PARAM_CONFIG_FILES="Makefile"
|
718
oshmem/mca/memheap/buddy/memheap_buddy.c
Обычный файл
718
oshmem/mca/memheap/buddy/memheap_buddy.c
Обычный файл
@ -0,0 +1,718 @@
|
||||
/* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "oshmem_config.h"
|
||||
#include "oshmem/proc/proc.h"
|
||||
#include "oshmem/mca/spml/spml.h"
|
||||
#include "oshmem/mca/memheap/memheap.h"
|
||||
#include "oshmem/mca/memheap/buddy/memheap_buddy.h"
|
||||
#include "oshmem/mca/memheap/buddy/memheap_buddy_component.h"
|
||||
#include "oshmem/mca/memheap/base/base.h"
|
||||
#include "orte/mca/grpcomm/grpcomm.h"
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
#include "opal/class/opal_object.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
|
||||
static int buddy_init(mca_memheap_buddy_module_t* buddy);
|
||||
|
||||
mca_memheap_buddy_module_t memheap_buddy = {
|
||||
{
|
||||
&mca_memheap_buddy_component,
|
||||
mca_memheap_buddy_finalize,
|
||||
mca_memheap_buddy_alloc,
|
||||
mca_memheap_buddy_align,
|
||||
mca_memheap_buddy_realloc,
|
||||
mca_memheap_buddy_free,
|
||||
|
||||
mca_memheap_buddy_private_alloc,
|
||||
mca_memheap_buddy_private_free,
|
||||
|
||||
mca_memheap_base_get_cached_mkey,
|
||||
mca_memheap_base_get_mkey,
|
||||
mca_memheap_base_find_offset,
|
||||
mca_memheap_base_is_symmetric_addr,
|
||||
mca_memheap_modex_recv_all,
|
||||
|
||||
0
|
||||
},
|
||||
1 /* priority */
|
||||
};
|
||||
|
||||
/* Memory Heap Buddy Implementation */
|
||||
|
||||
/* Static inline functions */
|
||||
static inline unsigned int bits_per_long(void)
|
||||
{
|
||||
return BITS_PER_BYTE * sizeof(unsigned long);
|
||||
}
|
||||
|
||||
static inline void bitmap_zero(unsigned long *dst, unsigned long nbits)
|
||||
{
|
||||
unsigned long len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
|
||||
memset(dst, 0, len);
|
||||
}
|
||||
|
||||
/*
|
||||
* WARNING: Non atomic version.
|
||||
*/
|
||||
static inline void __clear_bit(unsigned long nr, volatile void * addr)
|
||||
{
|
||||
int *m = ((int *) addr) + (nr >> 5);
|
||||
*m &= ~(1 << (nr & 31));
|
||||
}
|
||||
|
||||
/*
|
||||
* WARNING: non atomic version.
|
||||
*/
|
||||
static inline void __set_bit(unsigned long nr, volatile void * addr)
|
||||
{
|
||||
int *m = ((int *) addr) + (nr >> 5);
|
||||
*m |= 1 << (nr & 31);
|
||||
}
|
||||
|
||||
static inline int test_bit(int nr, const volatile void * addr)
|
||||
{
|
||||
return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL;
|
||||
}
|
||||
|
||||
/*
|
||||
* __ffs - find first bit in word.
|
||||
* @word: The word to search
|
||||
*
|
||||
* Undefined if no bit exists, so code should check against 0 first.
|
||||
*/
|
||||
static inline __opal_attribute_always_inline__ unsigned long __ffs(unsigned long word)
|
||||
{
|
||||
int num = 0;
|
||||
|
||||
if(bits_per_long() == 64) {
|
||||
if ((word & 0xffffffff) == 0) {
|
||||
num += 32;
|
||||
word >>= 32;
|
||||
}
|
||||
}
|
||||
|
||||
if ((word & 0xffff) == 0) {
|
||||
num += 16;
|
||||
word >>= 16;
|
||||
}
|
||||
if ((word & 0xff) == 0) {
|
||||
num += 8;
|
||||
word >>= 8;
|
||||
}
|
||||
if ((word & 0xf) == 0) {
|
||||
num += 4;
|
||||
word >>= 4;
|
||||
}
|
||||
if ((word & 0x3) == 0) {
|
||||
num += 2;
|
||||
word >>= 2;
|
||||
}
|
||||
if ((word & 0x1) == 0)
|
||||
num += 1;
|
||||
return num;
|
||||
}
|
||||
|
||||
/* round up to next power of two */
|
||||
static inline unsigned memheap_buddy_find_order(unsigned long size)
|
||||
{
|
||||
unsigned order;
|
||||
|
||||
if (size & (size - 1))
|
||||
order = 1;
|
||||
else
|
||||
order = 0;
|
||||
|
||||
while (size >>= 1) {
|
||||
order++;
|
||||
}
|
||||
return order;
|
||||
}
|
||||
|
||||
/*
|
||||
* find the first set bit in a memory region
|
||||
* @addr: The address to base the search on
|
||||
* @offset: The bitnumber to start searching at
|
||||
* @size: The maximum size to search
|
||||
*/
|
||||
|
||||
static inline unsigned long find_next_bit(const unsigned long *addr,
|
||||
unsigned long size,
|
||||
unsigned long offset)
|
||||
{
|
||||
const unsigned long *p = addr + BITOP_WORD(offset);
|
||||
unsigned long result = offset & ~(bits_per_long() - 1);
|
||||
unsigned long tmp;
|
||||
|
||||
if (offset >= size)
|
||||
return size;
|
||||
size -= result;
|
||||
offset %= bits_per_long();
|
||||
if (offset) {
|
||||
tmp = *(p++);
|
||||
tmp &= (~0UL << offset);
|
||||
if (size < bits_per_long())
|
||||
goto found_first;
|
||||
if (tmp)
|
||||
goto found_middle;
|
||||
size -= bits_per_long();
|
||||
result += bits_per_long();
|
||||
}
|
||||
while (size & ~(bits_per_long() - 1)) {
|
||||
if ((tmp = *(p++)))
|
||||
goto found_middle;
|
||||
result += bits_per_long();
|
||||
size -= bits_per_long();
|
||||
}
|
||||
if (!size)
|
||||
return result;
|
||||
tmp = *p;
|
||||
|
||||
found_first: tmp &= (~0UL >> (bits_per_long() - size));
|
||||
if (tmp == 0UL) /* Are any bits set? */
|
||||
return result + size; /* Nope. */
|
||||
found_middle: return result + __ffs(tmp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the Memory Heap
|
||||
*/
|
||||
int mca_memheap_buddy_module_init(memheap_context_t *context)
|
||||
{
|
||||
if (!context || !context->user_size || !context->private_size) {
|
||||
return OSHMEM_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* Construct a mutex object */
|
||||
OBJ_CONSTRUCT(&memheap_buddy.lock, opal_mutex_t);
|
||||
|
||||
memheap_buddy.heap.max_order = memheap_log2(context->user_size);
|
||||
memheap_buddy.heap.min_order = MEMHEAP_BASE_MIN_ORDER;
|
||||
memheap_buddy.private_heap.max_order = memheap_log2(context->private_size);
|
||||
memheap_buddy.private_heap.min_order = MEMHEAP_BASE_MIN_ORDER;
|
||||
|
||||
if (context->user_size != (1ULL << memheap_buddy.heap.max_order)) {
|
||||
MEMHEAP_VERBOSE(1,
|
||||
"Memheap rounded to the nearest power of two: requested %llu bytes, allocated %llu bytes",
|
||||
(unsigned long long)context->user_size, 1ULL << memheap_buddy.heap.max_order);
|
||||
}
|
||||
|
||||
assert(context->private_size == (1ULL << memheap_buddy.private_heap.max_order));
|
||||
|
||||
memheap_buddy.heap.symmetric_heap = context->user_base_addr;
|
||||
memheap_buddy.private_heap.symmetric_heap = context->private_base_addr;
|
||||
|
||||
memheap_buddy.super.memheap_size = (1ULL << memheap_buddy.heap.max_order);
|
||||
|
||||
MEMHEAP_VERBOSE(1,
|
||||
"symmetric heap memory (user+private): %llu bytes",
|
||||
(unsigned long long)(context->user_size + context->private_size));
|
||||
|
||||
/* Initialize buddy allocator */
|
||||
if (OSHMEM_SUCCESS != buddy_init(&memheap_buddy)) {
|
||||
MEMHEAP_ERROR("Failed to setup MEMHEAP buddy allocator");
|
||||
goto err;
|
||||
}
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
|
||||
err: mca_memheap_buddy_finalize();
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
static int buddy_init(mca_memheap_buddy_module_t* buddy)
|
||||
{
|
||||
unsigned long long total_size;
|
||||
unsigned i;
|
||||
unsigned long long s;
|
||||
|
||||
/* Allocate and init Hashtable */
|
||||
memheap_buddy.heap.symmetric_heap_hashtable = OBJ_NEW(opal_hash_table_t);
|
||||
if (NULL == memheap_buddy.heap.symmetric_heap_hashtable) {
|
||||
MEMHEAP_ERROR("Opal failed to allocate hashtable object");
|
||||
goto err;
|
||||
}
|
||||
memheap_buddy.private_heap.symmetric_heap_hashtable =
|
||||
OBJ_NEW(opal_hash_table_t);
|
||||
if (NULL == memheap_buddy.private_heap.symmetric_heap_hashtable) {
|
||||
MEMHEAP_ERROR("Opal failed to allocate hashtable object");
|
||||
goto err;
|
||||
}
|
||||
|
||||
opal_hash_table_init(memheap_buddy.heap.symmetric_heap_hashtable,
|
||||
DEFAULT_HASHTABLE_SIZE);
|
||||
opal_hash_table_init(memheap_buddy.private_heap.symmetric_heap_hashtable,
|
||||
DEFAULT_HASHTABLE_SIZE);
|
||||
/* Init Buddy Allocator */
|
||||
buddy->heap.bits = (unsigned long**) calloc((buddy->heap.max_order + 1),
|
||||
sizeof(unsigned long *));
|
||||
buddy->private_heap.bits =
|
||||
(unsigned long**) calloc((buddy->private_heap.max_order + 1),
|
||||
sizeof(unsigned long *));
|
||||
buddy->heap.num_free = (unsigned int*) calloc((buddy->heap.max_order + 1),
|
||||
sizeof(unsigned int));
|
||||
buddy->private_heap.num_free =
|
||||
(unsigned int*) calloc((buddy->private_heap.max_order + 1),
|
||||
sizeof(unsigned int));
|
||||
if ((NULL == buddy->heap.bits) || (NULL == buddy->heap.num_free)
|
||||
|| (NULL == buddy->private_heap.bits)
|
||||
|| (NULL == buddy->private_heap.num_free)) {
|
||||
|
||||
MEMHEAP_ERROR("Failed to allocate buddy allocator");
|
||||
goto err;
|
||||
}
|
||||
|
||||
total_size = 0;
|
||||
for (i = buddy->heap.min_order; i <= buddy->heap.max_order; ++i) {
|
||||
s = BITS_TO_LONGS(1UL << (buddy->heap.max_order - i));
|
||||
MEMHEAP_VERBOSE(20,
|
||||
"%d: (order=%d) allocating %llu longs (sizeof long = %d)",
|
||||
i, buddy->heap.max_order, s, (int)sizeof(unsigned long));
|
||||
total_size += s * sizeof(unsigned long);
|
||||
buddy->heap.bits[i] = (unsigned long*) malloc(s
|
||||
* sizeof(unsigned long));
|
||||
if (NULL == buddy->heap.bits[i]) {
|
||||
MEMHEAP_ERROR("Failed to allocate buddy->allocator");
|
||||
goto err;
|
||||
}
|
||||
bitmap_zero(buddy->heap.bits[i], 1UL << (buddy->heap.max_order - i));
|
||||
}
|
||||
MEMHEAP_VERBOSE(5, "MEMHEAP metadata size = %llu bytes", total_size);
|
||||
|
||||
total_size = 0;
|
||||
for (i = buddy->private_heap.min_order; i <= buddy->private_heap.max_order;
|
||||
++i) {
|
||||
s = BITS_TO_LONGS(1UL << (buddy->private_heap.max_order - i));
|
||||
MEMHEAP_VERBOSE(20,
|
||||
"%d: (order=%d) allocating %llu longs (sizeof long = %d)",
|
||||
i, buddy->private_heap.max_order, s, (int)sizeof(unsigned long));
|
||||
total_size += s * sizeof(unsigned long);
|
||||
buddy->private_heap.bits[i] = (unsigned long*) malloc(s
|
||||
* sizeof(unsigned long));
|
||||
if (NULL == buddy->private_heap.bits[i]) {
|
||||
MEMHEAP_ERROR("Failed to allocate buddy->allocator");
|
||||
goto err;
|
||||
}
|
||||
bitmap_zero(buddy->private_heap.bits[i],
|
||||
1UL << (buddy->private_heap.max_order - i));
|
||||
}
|
||||
MEMHEAP_VERBOSE(5,
|
||||
"private MEMHEAP metadata size = %llu bytes",
|
||||
total_size);
|
||||
|
||||
set_bit(0, buddy->heap.bits[buddy->heap.max_order]);
|
||||
set_bit(0, buddy->private_heap.bits[buddy->private_heap.max_order]);
|
||||
buddy->heap.num_free[buddy->heap.max_order] = 1;
|
||||
buddy->private_heap.num_free[buddy->private_heap.max_order] = 1;
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
|
||||
err: return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
static int buddy_cleanup(mca_memheap_buddy_module_t* buddy)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
MEMHEAP_VERBOSE(5, "buddy cleanup");
|
||||
if (NULL == buddy) {
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
for (i = 0; i <= buddy->heap.max_order; ++i) {
|
||||
if (NULL != buddy->heap.bits && NULL != buddy->heap.bits[i]) {
|
||||
free(buddy->heap.bits[i]);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i <= buddy->private_heap.max_order; ++i) {
|
||||
if (NULL != buddy->private_heap.bits
|
||||
&& NULL != buddy->private_heap.bits[i]) {
|
||||
free(buddy->private_heap.bits[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (NULL != buddy->heap.bits) {
|
||||
free(buddy->heap.bits);
|
||||
}
|
||||
if (NULL != buddy->heap.num_free) {
|
||||
free(buddy->heap.num_free);
|
||||
}
|
||||
|
||||
if (NULL != buddy->private_heap.bits) {
|
||||
free(buddy->private_heap.bits);
|
||||
}
|
||||
if (NULL != buddy->private_heap.num_free) {
|
||||
free(buddy->private_heap.num_free);
|
||||
}
|
||||
|
||||
OBJ_DESTRUCT(&buddy->lock);
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static int _buddy_alloc(unsigned order,
|
||||
uint32_t* seg,
|
||||
mca_memheap_buddy_heap_t *heap)
|
||||
{
|
||||
uint32_t o;
|
||||
uint32_t m;
|
||||
|
||||
MEMHEAP_VERBOSE(20, "order=%d size=%d", order, 1<<order);
|
||||
OPAL_THREAD_LOCK(&memheap_buddy.lock);
|
||||
for (o = order; o <= heap->max_order; ++o) {
|
||||
if (heap->num_free[o]) {
|
||||
m = 1 << (heap->max_order - o);
|
||||
*seg = find_first_bit(heap->bits[o], m);
|
||||
MEMHEAP_VERBOSE(20,
|
||||
"found free bit: order=%d, bits=0x%lx m=%d, *seg=%d",
|
||||
o, heap->bits[o][0], m, *seg);
|
||||
if (*seg < m)
|
||||
goto found;
|
||||
}
|
||||
}
|
||||
|
||||
OPAL_THREAD_UNLOCK(&memheap_buddy.lock);
|
||||
return OSHMEM_ERROR;
|
||||
|
||||
found:
|
||||
clear_bit(*seg, heap->bits[o]);
|
||||
--(heap->num_free[o]);
|
||||
|
||||
while (o > order) {
|
||||
--o;
|
||||
*seg <<= 1;
|
||||
set_bit(*seg ^ 1, heap->bits[o]);
|
||||
++(heap->num_free[o]);
|
||||
}
|
||||
|
||||
OPAL_THREAD_UNLOCK(&memheap_buddy.lock);
|
||||
*seg <<= order;
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static int _buddy_free(mca_memheap_buddy_module_t* buddy,
|
||||
uint32_t seg,
|
||||
unsigned order,
|
||||
mca_memheap_buddy_heap_t *heap)
|
||||
{
|
||||
MEMHEAP_VERBOSE(20, "order=%d size=%d seg=%d", order, 1<<order, seg);
|
||||
seg >>= order;
|
||||
OPAL_THREAD_LOCK(&buddy->lock);
|
||||
|
||||
while (test_bit(seg ^ 1, heap->bits[order])) {
|
||||
clear_bit(seg ^ 1, heap->bits[order]);
|
||||
--(heap->num_free[order]);
|
||||
seg >>= 1;
|
||||
++order;
|
||||
}
|
||||
|
||||
set_bit(seg, heap->bits[order]);
|
||||
++(heap->num_free[order]);
|
||||
OPAL_THREAD_UNLOCK(&buddy->lock);
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static int buddy_free(mca_memheap_buddy_module_t* buddy,
|
||||
uint32_t seg,
|
||||
unsigned order)
|
||||
{
|
||||
return _buddy_free(buddy, seg, order, &buddy->heap);
|
||||
}
|
||||
|
||||
static int buddy_private_free(mca_memheap_buddy_module_t* buddy,
|
||||
uint32_t seg,
|
||||
unsigned order)
|
||||
{
|
||||
return _buddy_free(buddy, seg, order, &buddy->private_heap);
|
||||
}
|
||||
|
||||
static int _do_alloc(uint32_t order,
|
||||
void **p_buff,
|
||||
mca_memheap_buddy_heap_t *heap)
|
||||
{
|
||||
int rc;
|
||||
unsigned long base;
|
||||
uint32_t offset;
|
||||
unsigned long addr;
|
||||
|
||||
if (order < heap->min_order)
|
||||
order = heap->min_order;
|
||||
|
||||
*p_buff = 0;
|
||||
if (order > heap->max_order) {
|
||||
/* Test allocated size overflow */
|
||||
MEMHEAP_VERBOSE(5, "Allocation overflow of symmetric heap size");
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
base = (unsigned long) heap->symmetric_heap;
|
||||
|
||||
if (OSHMEM_SUCCESS != _buddy_alloc(order, &offset, heap)) {
|
||||
MEMHEAP_VERBOSE(5, "Buddy Allocator failed to return a base address");
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
/* Save the order of the allocated variable */
|
||||
addr = base + offset;
|
||||
|
||||
rc = opal_hash_table_set_value_uint64(heap->symmetric_heap_hashtable,
|
||||
addr,
|
||||
(void *) (unsigned long) order);
|
||||
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
MEMHEAP_VERBOSE(5, "Failed to insert order to hashtable");
|
||||
goto alloc_error;
|
||||
}
|
||||
|
||||
*p_buff = (void*) addr;
|
||||
/* no barrier because it is not required by spec! */
|
||||
return OSHMEM_SUCCESS;
|
||||
|
||||
alloc_error: _buddy_free(&memheap_buddy, offset, order, heap);
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
static int do_alloc(uint32_t order, void **p_buff)
|
||||
{
|
||||
return _do_alloc(order, p_buff, &(memheap_buddy.heap));
|
||||
}
|
||||
|
||||
static int do_private_alloc(uint32_t order, void **p_buff)
|
||||
{
|
||||
return _do_alloc(order, p_buff, &(memheap_buddy.private_heap));
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate size bytes on the symmetric heap.
|
||||
* The allocated variable is aligned to its size.
|
||||
*/
|
||||
int mca_memheap_buddy_alloc(size_t size, void** p_buff)
|
||||
{
|
||||
|
||||
uint32_t order;
|
||||
|
||||
order = memheap_buddy_find_order(size);
|
||||
|
||||
return do_alloc(order, p_buff);
|
||||
}
|
||||
|
||||
int mca_memheap_buddy_private_alloc(size_t size, void** p_buff)
|
||||
{
|
||||
uint32_t order;
|
||||
int status = 0;
|
||||
order = memheap_buddy_find_order(size);
|
||||
|
||||
status = do_private_alloc(order, p_buff);
|
||||
|
||||
MEMHEAP_VERBOSE(20, "private alloc addr: %p", *p_buff);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
int mca_memheap_buddy_private_free(void* ptr)
|
||||
{
|
||||
int rc;
|
||||
uint32_t offset;
|
||||
unsigned long addr;
|
||||
unsigned long base;
|
||||
void *order;
|
||||
|
||||
if (0 == ptr) {
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
base = (unsigned long) memheap_buddy.private_heap.symmetric_heap;
|
||||
addr = (unsigned long) ptr;
|
||||
offset = addr - base;
|
||||
|
||||
rc =
|
||||
opal_hash_table_get_value_uint64(memheap_buddy.private_heap.symmetric_heap_hashtable,
|
||||
addr,
|
||||
&order);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
buddy_private_free(&memheap_buddy,
|
||||
offset,
|
||||
(unsigned) (unsigned long) order);
|
||||
opal_hash_table_remove_value_uint64(memheap_buddy.private_heap.symmetric_heap_hashtable,
|
||||
addr);
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_memheap_buddy_align(size_t align, size_t size, void **p_buff)
|
||||
{
|
||||
uint32_t order;
|
||||
|
||||
if (align == 0) {
|
||||
*p_buff = 0;
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
/* check that align is power of 2 */
|
||||
if (align & (align - 1)) {
|
||||
*p_buff = 0;
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
order = memheap_buddy_find_order(size);
|
||||
if ((unsigned long) align > (1UL << order))
|
||||
order = memheap_buddy_find_order(align);
|
||||
|
||||
return do_alloc(order, p_buff);
|
||||
}
|
||||
|
||||
int mca_memheap_buddy_realloc(size_t new_size, void *p_buff, void **p_new_buff)
|
||||
{
|
||||
int rc;
|
||||
unsigned long addr;
|
||||
void *order;
|
||||
size_t old_size;
|
||||
char *tmp_buf;
|
||||
|
||||
/* equiv to alloc if old ptr is null */
|
||||
if (NULL == p_buff)
|
||||
return mca_memheap_buddy_alloc(new_size, p_new_buff);
|
||||
|
||||
addr = (unsigned long) p_buff;
|
||||
|
||||
rc =
|
||||
opal_hash_table_get_value_uint64(memheap_buddy.heap.symmetric_heap_hashtable,
|
||||
addr,
|
||||
&order);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
*p_new_buff = NULL;
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
/* equiv to free if new_size is 0 */
|
||||
if (0 == new_size) {
|
||||
*p_new_buff = NULL;
|
||||
return mca_memheap_buddy_free(p_buff);
|
||||
}
|
||||
|
||||
old_size = 1UL << (unsigned long) order;
|
||||
|
||||
/* do nothing if new size is less then current size */
|
||||
if (new_size <= old_size) {
|
||||
*p_new_buff = p_buff;
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
if (new_size > (1UL << memheap_buddy.heap.max_order)) {
|
||||
*p_new_buff = NULL;
|
||||
return OSHMEM_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
if (old_size + new_size >= (1UL << memheap_buddy.heap.max_order)) {
|
||||
/* copy via temporary buffer */
|
||||
|
||||
tmp_buf = (char *) malloc(old_size);
|
||||
if (!tmp_buf)
|
||||
return OSHMEM_ERR_OUT_OF_RESOURCE;
|
||||
memcpy(tmp_buf, p_buff, old_size);
|
||||
mca_memheap_buddy_free(p_buff);
|
||||
} else
|
||||
tmp_buf = p_buff;
|
||||
|
||||
/* alloc and copy data to new buffer, free old one */
|
||||
rc = mca_memheap_buddy_alloc(new_size, p_new_buff);
|
||||
if (OSHMEM_SUCCESS != rc) {
|
||||
*p_new_buff = NULL;
|
||||
if (old_size + new_size >= (1UL << memheap_buddy.heap.max_order)
|
||||
&& tmp_buf) {
|
||||
free(tmp_buf);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
memcpy(*p_new_buff, tmp_buf, old_size);
|
||||
|
||||
if (old_size + new_size < (1UL << memheap_buddy.heap.max_order))
|
||||
mca_memheap_buddy_free(p_buff);
|
||||
else if (tmp_buf)
|
||||
free(tmp_buf);
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free a variable allocated on the
|
||||
* symmetric heap.
|
||||
*/
|
||||
int mca_memheap_buddy_free(void* ptr)
|
||||
{
|
||||
int rc;
|
||||
uint32_t offset;
|
||||
unsigned long addr;
|
||||
unsigned long base;
|
||||
void *order;
|
||||
|
||||
base = (unsigned long) memheap_buddy.heap.symmetric_heap;
|
||||
addr = (unsigned long) ptr;
|
||||
offset = addr - base;
|
||||
|
||||
rc =
|
||||
opal_hash_table_get_value_uint64(memheap_buddy.heap.symmetric_heap_hashtable,
|
||||
addr,
|
||||
&order);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
buddy_free(&memheap_buddy, offset, (unsigned) (unsigned long) order);
|
||||
opal_hash_table_remove_value_uint64(memheap_buddy.heap.symmetric_heap_hashtable,
|
||||
addr);
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_memheap_buddy_finalize()
|
||||
{
|
||||
MEMHEAP_VERBOSE(5, "deregistering symmetric heap");
|
||||
|
||||
/* was not initialized - do nothing */
|
||||
if (memheap_buddy.heap.max_order == 0)
|
||||
return OSHMEM_SUCCESS;
|
||||
|
||||
/* Destruct hashtable supporting shfree of symmetric heap variables */
|
||||
if (memheap_buddy.heap.symmetric_heap_hashtable) {
|
||||
OBJ_RELEASE(memheap_buddy.heap.symmetric_heap_hashtable);
|
||||
}
|
||||
if (memheap_buddy.private_heap.symmetric_heap_hashtable) {
|
||||
OBJ_RELEASE(memheap_buddy.private_heap.symmetric_heap_hashtable);
|
||||
}
|
||||
|
||||
buddy_cleanup(&memheap_buddy);
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the base address of the symmetric heap.
|
||||
*/
|
||||
|
||||
static inline void* mca_memheap_buddy_get_symmetric_heap_base_addr(void)
|
||||
{
|
||||
return memheap_buddy.heap.symmetric_heap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the last address in the symmetric heap.
|
||||
*/
|
||||
static inline void* mca_memheap_buddy_get_symmetric_heap_last_addr(void)
|
||||
{
|
||||
return (void*) ((unsigned char*) (memheap_buddy.heap.symmetric_heap)
|
||||
+ (1ULL << memheap_buddy.heap.max_order)
|
||||
+ (1ULL << memheap_buddy.private_heap.max_order));
|
||||
}
|
||||
|
87
oshmem/mca/memheap/buddy/memheap_buddy.h
Обычный файл
87
oshmem/mca/memheap/buddy/memheap_buddy.h
Обычный файл
@ -0,0 +1,87 @@
|
||||
/**
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
* Description of the Registration Cache framework
|
||||
*/
|
||||
#ifndef MCA_MEMHEAP_BUDDY_H
|
||||
#define MCA_MEMHEAP_BUDDY_H
|
||||
|
||||
#include "oshmem_config.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/threads/mutex.h"
|
||||
#include "oshmem/mca/memheap/memheap.h"
|
||||
#include "oshmem/mca/memheap/base/base.h"
|
||||
#include "oshmem/mca/spml/spml.h"
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <math.h>
|
||||
|
||||
#define BITS_PER_BYTE 8
|
||||
#define __BITOPS_WORDSIZE 64
|
||||
#define DEFAULT_HASHTABLE_SIZE 100
|
||||
|
||||
#define BITOP_WORD(nr) ((nr) / bits_per_long())
|
||||
#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
|
||||
#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(unsigned long))
|
||||
#define __BITOPS_WORDS(bits) (((bits)+__BITOPS_WORDSIZE-1)/__BITOPS_WORDSIZE)
|
||||
#define clear_bit(x,y) __clear_bit((x), (y))
|
||||
#define set_bit(x,y) __set_bit((x), (y))
|
||||
#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
struct mca_memheap_buddy_heap_t {
|
||||
unsigned long **bits; /** Part of the buddy allocator */
|
||||
unsigned *num_free; /** Part of the buddy allocator */
|
||||
unsigned max_order; /** Log2 of Maximal heap size, part of the allocator */
|
||||
unsigned min_order; /** min alloc order */
|
||||
void* symmetric_heap; /** Symmetric Heap */
|
||||
opal_hash_table_t* symmetric_heap_hashtable; /** Pointer to the Symmetric heap used for moving on it */
|
||||
};
|
||||
typedef struct mca_memheap_buddy_heap_t mca_memheap_buddy_heap_t;
|
||||
|
||||
/* Structure for managing shmem symmetric heap */
|
||||
struct mca_memheap_buddy_module_t {
|
||||
mca_memheap_base_module_t super;
|
||||
|
||||
int priority; /** Module's Priority */
|
||||
mca_memheap_buddy_heap_t heap;
|
||||
mca_memheap_buddy_heap_t private_heap;
|
||||
opal_mutex_t lock; /** Part of the buddy allocator */
|
||||
};
|
||||
typedef struct mca_memheap_buddy_module_t mca_memheap_buddy_module_t;
|
||||
OSHMEM_DECLSPEC extern mca_memheap_buddy_module_t memheap_buddy;
|
||||
|
||||
/*
|
||||
* Buddy interface.
|
||||
* Please pay attention to the new differences in the interface.
|
||||
*/
|
||||
OSHMEM_DECLSPEC extern int mca_memheap_buddy_module_init(memheap_context_t *);
|
||||
OSHMEM_DECLSPEC extern int mca_memheap_buddy_alloc(size_t, void**);
|
||||
OSHMEM_DECLSPEC extern int mca_memheap_buddy_realloc(size_t, void*, void **);
|
||||
OSHMEM_DECLSPEC extern int mca_memheap_buddy_align(size_t, size_t, void**);
|
||||
OSHMEM_DECLSPEC extern int mca_memheap_buddy_free(void*);
|
||||
OSHMEM_DECLSPEC extern int mca_memheap_buddy_finalize(void);
|
||||
|
||||
/* private alloc/free functions */
|
||||
OSHMEM_DECLSPEC extern int mca_memheap_buddy_private_alloc(size_t, void**);
|
||||
OSHMEM_DECLSPEC extern int mca_memheap_buddy_private_free(void*);
|
||||
|
||||
/**
|
||||
* static/global variables support. Consider making it a separate component
|
||||
*/
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* MCA_MEMHEAP_BUDDY_H */
|
72
oshmem/mca/memheap/buddy/memheap_buddy_component.c
Обычный файл
72
oshmem/mca/memheap/buddy/memheap_buddy_component.c
Обычный файл
@ -0,0 +1,72 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
#include "oshmem_config.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "oshmem/mca/memheap/memheap.h"
|
||||
#include "oshmem/mca/memheap/base/base.h"
|
||||
#include "oshmem/mca/memheap/buddy/memheap_buddy.h"
|
||||
#include "memheap_buddy_component.h"
|
||||
|
||||
static int mca_memheap_buddy_component_close(void);
|
||||
static mca_memheap_base_module_t* mca_memheap_buddy_component_init(memheap_context_t *,
|
||||
int *);
|
||||
|
||||
static int __basic_open(void);
|
||||
|
||||
mca_memheap_base_component_t mca_memheap_buddy_component = {
|
||||
{
|
||||
MCA_MEMHEAP_BASE_VERSION_2_0_0,
|
||||
|
||||
"buddy", /* MCA component name */
|
||||
OSHMEM_MAJOR_VERSION, /* MCA component major version */
|
||||
OSHMEM_MINOR_VERSION, /* MCA component minor version */
|
||||
OSHMEM_RELEASE_VERSION, /* MCA component release version */
|
||||
|
||||
__basic_open,
|
||||
mca_memheap_buddy_component_close,
|
||||
NULL
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
mca_memheap_buddy_component_init
|
||||
};
|
||||
|
||||
/* Open component */
|
||||
static int __basic_open(void)
|
||||
{
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
/* Initialize component */
|
||||
mca_memheap_base_module_t* mca_memheap_buddy_component_init(memheap_context_t *context,
|
||||
int *priority)
|
||||
{
|
||||
int rc;
|
||||
|
||||
*priority = memheap_buddy.priority;
|
||||
rc = mca_memheap_buddy_module_init(context);
|
||||
if (OSHMEM_SUCCESS != rc) {
|
||||
return NULL ;
|
||||
}
|
||||
|
||||
return &(memheap_buddy.super);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is automaticaly called from mca_base_components_close.
|
||||
* It releases the component's allocated memory.
|
||||
*/
|
||||
int mca_memheap_buddy_component_close()
|
||||
{
|
||||
mca_memheap_buddy_finalize();
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
26
oshmem/mca/memheap/buddy/memheap_buddy_component.h
Обычный файл
26
oshmem/mca/memheap/buddy/memheap_buddy_component.h
Обычный файл
@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*/
|
||||
|
||||
#ifndef MCA_MEMHEAP_BUDDY_COMPONENT_H
|
||||
#define MCA_MEMHEAP_BUDDY_COMPONENT_H
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/*
|
||||
* MEMHEAP module functions.
|
||||
*/
|
||||
OSHMEM_MODULE_DECLSPEC extern mca_memheap_base_component_2_0_0_t mca_memheap_buddy_component;
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
19
oshmem/mca/memheap/configure.m4
Обычный файл
19
oshmem/mca/memheap/configure.m4
Обычный файл
@ -0,0 +1,19 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
AC_DEFUN([MCA_oshmem_memheap_CONFIG],[
|
||||
# configure all the components
|
||||
MCA_CONFIGURE_FRAMEWORK($1, $2, 1)
|
||||
|
||||
# this is a direct callable component, so set that up.
|
||||
MCA_SETUP_DIRECT_CALL($1, $2)
|
||||
])
|
156
oshmem/mca/memheap/memheap.h
Обычный файл
156
oshmem/mca/memheap/memheap.h
Обычный файл
@ -0,0 +1,156 @@
|
||||
/**
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MCA_MEMHEAP_H
|
||||
#define MCA_MEMHEAP_H
|
||||
#include "opal/mca/mca.h"
|
||||
#include "oshmem/constants.h"
|
||||
#include "oshmem/proc/proc.h"
|
||||
#include "oshmem/mca/spml/spml.h"
|
||||
|
||||
#define DEFAULT_SYMMETRIC_HEAP_SIZE 256
|
||||
#define SIZE_IN_MEGA_BYTES(size_in_mb) size_in_mb * 1024 * 1024
|
||||
|
||||
BEGIN_C_DECLS
|
||||
struct mca_memheap_base_module_t;
|
||||
|
||||
typedef struct memheap_context
|
||||
{
|
||||
void* user_base_addr;
|
||||
void* private_base_addr;
|
||||
size_t user_size;
|
||||
size_t private_size;
|
||||
} memheap_context_t;
|
||||
|
||||
/**
|
||||
* Component initialize
|
||||
*/
|
||||
typedef struct mca_memheap_base_module_t* (*mca_memheap_base_component_init_fn_t)(memheap_context_t *,
|
||||
int *priority);
|
||||
|
||||
/*
|
||||
* Symmetric heap allocation. Malloc like interface
|
||||
*/
|
||||
typedef int (*mca_memheap_base_module_alloc_fn_t)(size_t, void**);
|
||||
|
||||
typedef int (*mca_memheap_base_module_memalign_fn_t)(size_t align,
|
||||
size_t size,
|
||||
void**);
|
||||
|
||||
typedef int (*mca_memheap_base_module_realloc_fn_t)(size_t newsize,
|
||||
void *,
|
||||
void **);
|
||||
|
||||
/*
|
||||
* Symmetric heap free.
|
||||
*/
|
||||
typedef int (*mca_memheap_base_module_free_fn_t)(void*);
|
||||
|
||||
/**
|
||||
* Service functions
|
||||
*/
|
||||
typedef uint64_t (*mca_memheap_base_module_find_offset_fn_t)(int pe,
|
||||
int tr_id,
|
||||
unsigned long va,
|
||||
uint64_t rva);
|
||||
|
||||
/**
|
||||
* @return mkey suitable to access pe via given transport id. rva is set to virtual address mapping of (va)
|
||||
* on remote pe.
|
||||
*/
|
||||
typedef mca_spml_mkey_t * (*mca_memheap_base_module_get_cached_mkey_fn_t)(int pe,
|
||||
unsigned long va,
|
||||
int transport_id,
|
||||
uint64_t *rva);
|
||||
typedef mca_spml_mkey_t * (*mca_memheap_base_module_get_local_mkey_fn_t)(unsigned long va,
|
||||
int transport_id);
|
||||
|
||||
/*
|
||||
* Symmetric heap destructor.
|
||||
*/
|
||||
typedef int (*mca_memheap_base_module_finalize_fn_t)(void);
|
||||
|
||||
typedef int (*mca_memheap_base_is_memheap_addr_fn_t)(unsigned long va);
|
||||
|
||||
/* get mkeys from all ranks */
|
||||
typedef void (*mca_memheap_base_mkey_exchange_fn_t)(void);
|
||||
|
||||
/*
|
||||
* memheap component descriptor. Contains component version, information and
|
||||
* init functions
|
||||
*/
|
||||
struct mca_memheap_base_component_2_0_0_t {
|
||||
mca_base_component_t memheap_version; /**< version */
|
||||
mca_base_component_data_t memheap_data; /**< metadata */
|
||||
mca_memheap_base_component_init_fn_t memheap_init; /**<init function */
|
||||
};
|
||||
typedef struct mca_memheap_base_component_2_0_0_t mca_memheap_base_component_2_0_0_t;
|
||||
typedef struct mca_memheap_base_component_2_0_0_t mca_memheap_base_component_t;
|
||||
|
||||
/**
|
||||
* memheap module descriptor
|
||||
*/
|
||||
struct mca_memheap_base_module_t {
|
||||
mca_memheap_base_component_t *memheap_component; /** Memory Heap Management Componenet */
|
||||
mca_memheap_base_module_finalize_fn_t memheap_finalize;
|
||||
mca_memheap_base_module_alloc_fn_t memheap_alloc;
|
||||
mca_memheap_base_module_memalign_fn_t memheap_memalign;
|
||||
mca_memheap_base_module_realloc_fn_t memheap_realloc;
|
||||
mca_memheap_base_module_free_fn_t memheap_free;
|
||||
|
||||
/*
|
||||
* alloc/free that should be used for internal allocation.
|
||||
* Internal memory does not count towards
|
||||
* symmetric heap memory
|
||||
*/
|
||||
mca_memheap_base_module_alloc_fn_t memheap_private_alloc;
|
||||
mca_memheap_base_module_free_fn_t memheap_private_free;
|
||||
|
||||
mca_memheap_base_module_get_cached_mkey_fn_t memheap_get_cached_mkey;
|
||||
mca_memheap_base_module_get_local_mkey_fn_t memheap_get_local_mkey;
|
||||
mca_memheap_base_module_find_offset_fn_t memheap_find_offset;
|
||||
mca_memheap_base_is_memheap_addr_fn_t memheap_is_symmetric_addr;
|
||||
mca_memheap_base_mkey_exchange_fn_t memheap_get_all_mkeys;
|
||||
|
||||
/*
|
||||
* Total size of user available memheap
|
||||
*/
|
||||
long memheap_size;
|
||||
};
|
||||
|
||||
typedef struct mca_memheap_base_module_t mca_memheap_base_module_t;
|
||||
|
||||
/*
|
||||
* Macro for use in components that are of type rcache
|
||||
*/
|
||||
#define MCA_MEMHEAP_BASE_VERSION_2_0_0 \
|
||||
MCA_BASE_VERSION_2_0_0, \
|
||||
"memheap", 2, 0, 0
|
||||
|
||||
/*
|
||||
* macro for doing direct call / call through struct
|
||||
*/
|
||||
#if MCA_oshmem_memheap_DIRECT_CALL
|
||||
|
||||
#include MCA_oshmem_memheap_DIRECT_CALL_HEADER
|
||||
|
||||
#define MCA_MEMHEAP_CALL_STAMP(a, b) mca_memheap_ ## a ## _ ## b
|
||||
#define MCA_MEMHEAP_CALL_EXPANDER(a, b) MCA_MEMHEAP_CALL_STAMP(a,b)
|
||||
#define MCA_MEMHEAP_CALL(a) MCA_MEMHEAP_CALL_EXPANDER(MCA_oshmem_memheap_DIRECT_CALL_COMPONENT, a)
|
||||
|
||||
#else
|
||||
#define MCA_MEMHEAP_CALL(a) mca_memheap.memheap_ ## a
|
||||
#endif
|
||||
|
||||
OSHMEM_DECLSPEC extern mca_memheap_base_module_t mca_memheap;
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* MCA_MEMHEAP_H */
|
43
oshmem/mca/memheap/ptmalloc/Makefile.am
Обычный файл
43
oshmem/mca/memheap/ptmalloc/Makefile.am
Обычный файл
@ -0,0 +1,43 @@
|
||||
#
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
EXTRA_DIST =
|
||||
|
||||
AM_CFLAGS = $(OSHMEM_CFLAGS)
|
||||
|
||||
ptmalloc_sources = \
|
||||
malloc.c \
|
||||
memheap_ptmalloc.c \
|
||||
memheap_ptmalloc.h \
|
||||
memheap_ptmalloc_component.c \
|
||||
memheap_ptmalloc_component.h \
|
||||
malloc_defs.h
|
||||
|
||||
#if MCA_BUILD_ompi_memheap_ptmalloc_DSO
|
||||
if MCA_BUILD_ompi_pml_ob1_DSO
|
||||
component_noinst =
|
||||
component_install = mca_memheap_ptmalloc.la
|
||||
else
|
||||
component_noinst = libmca_memheap_ptmalloc.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_memheap_ptmalloc_la_SOURCES = $(ptmalloc_sources)
|
||||
mca_memheap_ptmalloc_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
#noinst_LTLIBRARIES = $(lib)
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_memheap_ptmalloc_la_SOURCES = $(ptmalloc_sources)
|
||||
libmca_memheap_ptmalloc_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
|
||||
|
13
oshmem/mca/memheap/ptmalloc/configure.params
Обычный файл
13
oshmem/mca/memheap/ptmalloc/configure.params
Обычный файл
@ -0,0 +1,13 @@
|
||||
# -*- shell-script -*-
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# Specific to this module
|
||||
|
||||
PARAM_CONFIG_FILES="Makefile"
|
5527
oshmem/mca/memheap/ptmalloc/malloc.c
Обычный файл
5527
oshmem/mca/memheap/ptmalloc/malloc.c
Обычный файл
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
32
oshmem/mca/memheap/ptmalloc/malloc_defs.h
Обычный файл
32
oshmem/mca/memheap/ptmalloc/malloc_defs.h
Обычный файл
@ -0,0 +1,32 @@
|
||||
#ifndef _MALLOC_DEFS_H
|
||||
#define _MALLOC_DEFS_H
|
||||
|
||||
#include "oshmem/runtime/runtime.h"
|
||||
|
||||
/* See malloc.c for detailed parameter description */
|
||||
#define USE_SPIN_LOCKS 0
|
||||
#define USE_DL_PREFIX
|
||||
#define ABORT oshmem_shmem_abort(-2)
|
||||
//#define ABORT abort()
|
||||
#define MORECORE mca_memheap_ptmalloc_sbrk
|
||||
#define MORECORE_CANNOT_TRIM
|
||||
#define DL_HAVE_MMAP 0
|
||||
#define DL_HAVE_MREMAP 0
|
||||
#define malloc_getpagesize mca_memheap_ptmalloc_getpagesize()
|
||||
#define REALLOC_ZERO_BYTES_FREES
|
||||
#define ABORT_ON_ASSERT_FAILURE 1
|
||||
/* next two are useful for debugging */
|
||||
#define DL_DEBUG 0
|
||||
#define FOOTERS 0
|
||||
/* print error if *alloc() is called with incorrect params */
|
||||
#define USAGE_ERROR_ACTION(m, p) do { printf("PTMALLOC: USAGE ERROR DETECTED: m=%p ptr=%p\n", m, p); } while (0)
|
||||
|
||||
int mca_memheap_ptmalloc_getpagesize(void);
|
||||
void *mca_memheap_ptmalloc_sbrk(size_t size);
|
||||
|
||||
void* dlmalloc(size_t);
|
||||
void dlfree(void*);
|
||||
void* dlrealloc(void*, size_t);
|
||||
void* dlmemalign(size_t, size_t);
|
||||
|
||||
#endif
|
179
oshmem/mca/memheap/ptmalloc/memheap_ptmalloc.c
Обычный файл
179
oshmem/mca/memheap/ptmalloc/memheap_ptmalloc.c
Обычный файл
@ -0,0 +1,179 @@
|
||||
/* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "oshmem_config.h"
|
||||
#include "oshmem/proc/proc.h"
|
||||
#include "oshmem/mca/spml/spml.h"
|
||||
#include "oshmem/mca/memheap/memheap.h"
|
||||
#include "oshmem/mca/memheap/ptmalloc/memheap_ptmalloc.h"
|
||||
#include "oshmem/mca/memheap/ptmalloc/memheap_ptmalloc_component.h"
|
||||
#include "oshmem/mca/memheap/base/base.h"
|
||||
#include "orte/mca/grpcomm/grpcomm.h"
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
#include "opal/class/opal_object.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
|
||||
mca_memheap_ptmalloc_module_t memheap_ptmalloc = {
|
||||
{
|
||||
&mca_memheap_ptmalloc_component,
|
||||
mca_memheap_ptmalloc_finalize,
|
||||
mca_memheap_ptmalloc_alloc,
|
||||
mca_memheap_ptmalloc_align,
|
||||
mca_memheap_ptmalloc_realloc,
|
||||
mca_memheap_ptmalloc_free,
|
||||
|
||||
mca_memheap_ptmalloc_alloc,
|
||||
mca_memheap_ptmalloc_free,
|
||||
|
||||
mca_memheap_base_get_cached_mkey,
|
||||
mca_memheap_base_get_mkey,
|
||||
mca_memheap_base_find_offset,
|
||||
mca_memheap_base_is_symmetric_addr,
|
||||
mca_memheap_modex_recv_all,
|
||||
|
||||
0
|
||||
},
|
||||
100 /* priority */
|
||||
};
|
||||
|
||||
/* Memory Heap Buddy Implementation */
|
||||
/**
|
||||
* Initialize the Memory Heap
|
||||
*/
|
||||
int mca_memheap_ptmalloc_module_init(memheap_context_t *context)
|
||||
{
|
||||
if (!context || !context->user_size || !context->private_size) {
|
||||
return OSHMEM_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* Construct a mutex object */
|
||||
OBJ_CONSTRUCT(&memheap_ptmalloc.lock, opal_mutex_t);
|
||||
memheap_ptmalloc.base = context->user_base_addr;
|
||||
memheap_ptmalloc.cur_size = 0;
|
||||
memheap_ptmalloc.max_size = context->user_size + context->private_size;
|
||||
memheap_ptmalloc.max_alloc_size = context->user_size;
|
||||
|
||||
MEMHEAP_VERBOSE(1,
|
||||
"symmetric heap memory (user+private): %llu bytes",
|
||||
(unsigned long long)(context->user_size + context->private_size));
|
||||
|
||||
/* disable till we figure out double modex&grpcomm.bad problem */
|
||||
// memheap_modex_mkey_exchange();
|
||||
return OSHMEM_SUCCESS;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate size bytes on the symmetric heap.
|
||||
* The allocated variable is aligned to its size.
|
||||
*/
|
||||
int mca_memheap_ptmalloc_alloc(size_t size, void** p_buff)
|
||||
{
|
||||
if (size > memheap_ptmalloc.max_alloc_size) {
|
||||
*p_buff = 0;
|
||||
return OSHMEM_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&memheap_ptmalloc.lock);
|
||||
*p_buff = dlmalloc(size);
|
||||
OPAL_THREAD_UNLOCK(&memheap_ptmalloc.lock);
|
||||
|
||||
if (NULL == *p_buff)
|
||||
return OSHMEM_ERROR;
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_memheap_ptmalloc_align(size_t align, size_t size, void **p_buff)
|
||||
{
|
||||
if (size > memheap_ptmalloc.max_alloc_size) {
|
||||
*p_buff = 0;
|
||||
return OSHMEM_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
if (align == 0) {
|
||||
*p_buff = 0;
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
/* check that align is power of 2 */
|
||||
if (align & (align - 1)) {
|
||||
*p_buff = 0;
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&memheap_ptmalloc.lock);
|
||||
*p_buff = dlmemalign(align, size);
|
||||
OPAL_THREAD_UNLOCK(&memheap_ptmalloc.lock);
|
||||
|
||||
if (NULL == *p_buff)
|
||||
return OSHMEM_ERROR;
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_memheap_ptmalloc_realloc(size_t new_size,
|
||||
void *p_buff,
|
||||
void **p_new_buff)
|
||||
{
|
||||
if (new_size > memheap_ptmalloc.max_alloc_size) {
|
||||
*p_new_buff = 0;
|
||||
return OSHMEM_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&memheap_ptmalloc.lock);
|
||||
*p_new_buff = dlrealloc(p_buff, new_size);
|
||||
OPAL_THREAD_UNLOCK(&memheap_ptmalloc.lock);
|
||||
|
||||
if (!*p_new_buff)
|
||||
return OSHMEM_ERR_OUT_OF_RESOURCE;
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free a variable allocated on the
|
||||
* symmetric heap.
|
||||
*/
|
||||
int mca_memheap_ptmalloc_free(void* ptr)
|
||||
{
|
||||
OPAL_THREAD_LOCK(&memheap_ptmalloc.lock);
|
||||
dlfree(ptr);
|
||||
OPAL_THREAD_UNLOCK(&memheap_ptmalloc.lock);
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_memheap_ptmalloc_finalize()
|
||||
{
|
||||
MEMHEAP_VERBOSE(5, "deregistering symmetric heap");
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_memheap_ptmalloc_getpagesize(void)
|
||||
{
|
||||
return 2 * 1024 * 1024;
|
||||
}
|
||||
|
||||
/* must be same as in malloc.c */
|
||||
#define PTMALLOC_MAX_SIZE_T (~(size_t)0)
|
||||
#define PTMALLOC_MFAIL ((void*)(PTMALLOC_MAX_SIZE_T))
|
||||
void *mca_memheap_ptmalloc_sbrk(size_t size)
|
||||
{
|
||||
char *ret;
|
||||
|
||||
if (memheap_ptmalloc.cur_size + size > memheap_ptmalloc.max_size) {
|
||||
return PTMALLOC_MFAIL ;
|
||||
}
|
||||
|
||||
ret = (char *) memheap_ptmalloc.base + memheap_ptmalloc.cur_size;
|
||||
memheap_ptmalloc.cur_size += size;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
71
oshmem/mca/memheap/ptmalloc/memheap_ptmalloc.h
Обычный файл
71
oshmem/mca/memheap/ptmalloc/memheap_ptmalloc.h
Обычный файл
@ -0,0 +1,71 @@
|
||||
/**
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
* Description of the Registration Cache framework
|
||||
*/
|
||||
#ifndef MCA_MEMHEAP_PTMALLOC_H
|
||||
#define MCA_MEMHEAP_PTMALLOC_H
|
||||
|
||||
#include "oshmem_config.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/threads/mutex.h"
|
||||
#include "oshmem/mca/memheap/memheap.h"
|
||||
#include "oshmem/mca/memheap/base/base.h"
|
||||
#include "oshmem/mca/spml/spml.h"
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <math.h>
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
#include "malloc_defs.h"
|
||||
/*
|
||||
* At the moment we use only dlmalloc part of the ptmalloc3. Thread safety is implemented by using locks on
|
||||
* alloc operations. Since all shmem alloc ops are collectives, malloc performance is not a problem. So it makes
|
||||
* sense to use simpler algorithm.
|
||||
*
|
||||
* Heap is allocate in one chunk, and we implement our on sbrk like function that serves portions of the memory
|
||||
* to malloc.
|
||||
*
|
||||
* At the moment we do not support growing/returning heap based memory to OS.
|
||||
*/
|
||||
|
||||
/* Structure for managing shmem symmetric heap */
|
||||
struct mca_memheap_ptmalloc_module_t {
|
||||
mca_memheap_base_module_t super;
|
||||
int priority; /** Module's Priority */
|
||||
void *base;
|
||||
size_t cur_size;
|
||||
size_t max_size;
|
||||
size_t max_alloc_size;
|
||||
opal_mutex_t lock; /** Part of the allocator */
|
||||
};
|
||||
|
||||
typedef struct mca_memheap_ptmalloc_module_t mca_memheap_ptmalloc_module_t;
|
||||
OSHMEM_DECLSPEC extern mca_memheap_ptmalloc_module_t memheap_ptmalloc;
|
||||
|
||||
/*
|
||||
* Buddy interface.
|
||||
* Please pay attention to the new differences in the interface.
|
||||
*/
|
||||
OSHMEM_DECLSPEC extern int mca_memheap_ptmalloc_module_init(memheap_context_t *);
|
||||
OSHMEM_DECLSPEC extern int mca_memheap_ptmalloc_alloc(size_t, void**);
|
||||
OSHMEM_DECLSPEC extern int mca_memheap_ptmalloc_realloc(size_t, void*, void **);
|
||||
OSHMEM_DECLSPEC extern int mca_memheap_ptmalloc_align(size_t, size_t, void**);
|
||||
OSHMEM_DECLSPEC extern int mca_memheap_ptmalloc_free(void*);
|
||||
OSHMEM_DECLSPEC extern int mca_memheap_ptmalloc_finalize(void);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* MCA_MEMHEAP_BUDDY_H */
|
72
oshmem/mca/memheap/ptmalloc/memheap_ptmalloc_component.c
Обычный файл
72
oshmem/mca/memheap/ptmalloc/memheap_ptmalloc_component.c
Обычный файл
@ -0,0 +1,72 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
#include "oshmem_config.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "oshmem/mca/memheap/memheap.h"
|
||||
#include "oshmem/mca/memheap/base/base.h"
|
||||
#include "oshmem/mca/memheap/ptmalloc/memheap_ptmalloc.h"
|
||||
#include "memheap_ptmalloc_component.h"
|
||||
|
||||
static int mca_memheap_ptmalloc_component_close(void);
|
||||
static mca_memheap_base_module_t* mca_memheap_ptmalloc_component_init(memheap_context_t *,
|
||||
int *);
|
||||
|
||||
static int __basic_open(void);
|
||||
|
||||
mca_memheap_base_component_t mca_memheap_ptmalloc_component = {
|
||||
{
|
||||
MCA_MEMHEAP_BASE_VERSION_2_0_0,
|
||||
|
||||
"ptmalloc", /* MCA component name */
|
||||
OSHMEM_MAJOR_VERSION, /* MCA component major version */
|
||||
OSHMEM_MINOR_VERSION, /* MCA component minor version */
|
||||
OSHMEM_RELEASE_VERSION, /* MCA component release version */
|
||||
|
||||
__basic_open,
|
||||
mca_memheap_ptmalloc_component_close,
|
||||
NULL
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
mca_memheap_ptmalloc_component_init
|
||||
};
|
||||
|
||||
/* Open component */
|
||||
static int __basic_open(void)
|
||||
{
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
/* Initialize component */
|
||||
mca_memheap_base_module_t* mca_memheap_ptmalloc_component_init(memheap_context_t *context,
|
||||
int *priority)
|
||||
{
|
||||
int rc;
|
||||
|
||||
*priority = memheap_ptmalloc.priority;
|
||||
rc = mca_memheap_ptmalloc_module_init(context);
|
||||
if (OSHMEM_SUCCESS != rc) {
|
||||
return NULL ;
|
||||
}
|
||||
|
||||
return &(memheap_ptmalloc.super);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is automaticaly called from mca_base_components_close.
|
||||
* It releases the component's allocated memory.
|
||||
*/
|
||||
int mca_memheap_ptmalloc_component_close()
|
||||
{
|
||||
mca_memheap_ptmalloc_finalize();
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
26
oshmem/mca/memheap/ptmalloc/memheap_ptmalloc_component.h
Обычный файл
26
oshmem/mca/memheap/ptmalloc/memheap_ptmalloc_component.h
Обычный файл
@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*/
|
||||
|
||||
#ifndef MCA_MEMHEAP_PTMALLOC_COMPONENT_H
|
||||
#define MCA_MEMHEAP_PTMALLOC_COMPONENT_H
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/*
|
||||
* MEMHEAP module functions.
|
||||
*/
|
||||
OSHMEM_MODULE_DECLSPEC extern mca_memheap_base_component_2_0_0_t mca_memheap_ptmalloc_component;
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
35
oshmem/mca/scoll/Makefile.am
Обычный файл
35
oshmem/mca/scoll/Makefile.am
Обычный файл
@ -0,0 +1,35 @@
|
||||
#
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# main library setup
|
||||
noinst_LTLIBRARIES = libmca_scoll.la
|
||||
libmca_scoll_la_SOURCES =
|
||||
|
||||
# header setup
|
||||
nobase_oshmem_HEADERS =
|
||||
nobase_nodist_oshmem_HEADERS =
|
||||
|
||||
# local files
|
||||
headers = scoll.h
|
||||
libmca_scoll_la_SOURCES += $(headers) $(nodist_headers)
|
||||
|
||||
# Conditionally install the header files
|
||||
if WANT_INSTALL_HEADERS
|
||||
nobase_oshmem_HEADERS += $(headers)
|
||||
nobase_nodist_oshmem_HEADERS += $(nodist_headers)
|
||||
oshmemdir = $(includedir)/oshmem/oshmem/mca/scoll
|
||||
else
|
||||
oshmemdir = $(includedir)
|
||||
endif
|
||||
|
||||
include base/Makefile.am
|
||||
|
||||
distclean-local:
|
||||
rm -f base/static-components.h
|
19
oshmem/mca/scoll/base/Makefile.am
Обычный файл
19
oshmem/mca/scoll/base/Makefile.am
Обычный файл
@ -0,0 +1,19 @@
|
||||
#
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
AM_CFLAGS = $(OSHMEM_CFLAGS)
|
||||
|
||||
headers += \
|
||||
base/base.h
|
||||
|
||||
libmca_scoll_la_SOURCES += \
|
||||
base/scoll_base_frame.c \
|
||||
base/scoll_base_available.c \
|
||||
base/scoll_base_select.c
|
64
oshmem/mca/scoll/base/base.h
Обычный файл
64
oshmem/mca/scoll/base/base.h
Обычный файл
@ -0,0 +1,64 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MCA_SCOLL_BASE_H
|
||||
#define MCA_SCOLL_BASE_H
|
||||
|
||||
#include "oshmem_config.h"
|
||||
|
||||
#include "oshmem/mca/memheap/memheap.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
|
||||
/*
|
||||
* Global functions for MCA overall collective open and close
|
||||
*/
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/*
|
||||
* Globals
|
||||
*/
|
||||
/**
|
||||
* Special synchronization array to do barrier all.
|
||||
*/
|
||||
OSHMEM_DECLSPEC extern long* mca_scoll_sync_array;
|
||||
|
||||
OSHMEM_DECLSPEC int mca_scoll_base_find_available(bool enable_progress_threads,
|
||||
bool enable_threads);
|
||||
|
||||
OSHMEM_DECLSPEC int mca_scoll_base_select(struct oshmem_group_t *group);
|
||||
|
||||
int mca_scoll_base_group_unselect(struct oshmem_group_t *group);
|
||||
|
||||
OSHMEM_DECLSPEC int mca_scoll_enable(void);
|
||||
|
||||
/*
|
||||
* MCA framework
|
||||
*/
|
||||
OSHMEM_DECLSPEC extern mca_base_framework_t oshmem_scoll_base_framework;
|
||||
|
||||
/* ******************************************************************** */
|
||||
#ifdef __BASE_FILE__
|
||||
#define __SCOLL_FILE__ __BASE_FILE__
|
||||
#else
|
||||
#define __SCOLL_FILE__ __FILE__
|
||||
#endif
|
||||
|
||||
#define SCOLL_VERBOSE(level, format, ...) \
|
||||
opal_output_verbose(level, oshmem_scoll_base_framework.framework_output, "%s:%d - %s() " format, \
|
||||
__SCOLL_FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
|
||||
|
||||
#define SCOLL_ERROR(format, ... ) \
|
||||
opal_output_verbose(0, oshmem_scoll_base_framework.framework_output, "Error: %s:%d - %s() " format, \
|
||||
__SCOLL_FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* MCA_SCOLL_BASE_H */
|
139
oshmem/mca/scoll/base/scoll_base_available.c
Обычный файл
139
oshmem/mca/scoll/base/scoll_base_available.c
Обычный файл
@ -0,0 +1,139 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "oshmem_config.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_component_repository.h"
|
||||
|
||||
#include "oshmem/constants.h"
|
||||
#include "oshmem/mca/scoll/scoll.h"
|
||||
#include "oshmem/mca/scoll/base/base.h"
|
||||
|
||||
/*
|
||||
* Private functions
|
||||
*/
|
||||
static int init_query(const mca_base_component_t * ls,
|
||||
bool enable_progress_threads,
|
||||
bool enable_threads);
|
||||
|
||||
/*
|
||||
* Scan down the list of successfully opened components and query each of
|
||||
* them (the opened list will be one or more components. If the user
|
||||
* requested a specific component, it will be the only component in the
|
||||
* opened list). Create and populate the available list of all
|
||||
* components who indicate that they want to be considered for selection.
|
||||
* Close all components who do not want to be considered for selection,
|
||||
* and destroy the opened list.
|
||||
*
|
||||
* Also find the basic component while we're doing all of this, and save
|
||||
* it in a global variable so that we can find it easily later (e.g.,
|
||||
* during scope selection).
|
||||
*/
|
||||
int mca_scoll_base_find_available(bool enable_progress_threads,
|
||||
bool enable_threads)
|
||||
{
|
||||
mca_base_component_list_item_t *cli, *next;
|
||||
const mca_base_component_t *component;
|
||||
|
||||
OPAL_LIST_FOREACH_SAFE(cli, next, &oshmem_scoll_base_framework.framework_components, mca_base_component_list_item_t) {
|
||||
component = cli->cli_component;
|
||||
|
||||
/* Call a subroutine to do the work, because the component may
|
||||
represent different versions of the coll MCA. */
|
||||
|
||||
if (OSHMEM_SUCCESS != init_query(component, enable_progress_threads,
|
||||
enable_threads)) {
|
||||
/* If the component doesn't want to run, then close it.
|
||||
Now close it out and release it from the DSO repository (if it's there). */
|
||||
opal_list_remove_item(&oshmem_scoll_base_framework.framework_components, &cli->super);
|
||||
mca_base_component_close(component, oshmem_scoll_base_framework.framework_output);
|
||||
OBJ_RELEASE(cli);
|
||||
}
|
||||
}
|
||||
|
||||
/* If we have no collective components available, it's an error.
|
||||
Thanks for playing! */
|
||||
|
||||
if (opal_list_get_size(&oshmem_scoll_base_framework.framework_components) == 0) {
|
||||
SCOLL_VERBOSE(10,
|
||||
"scoll:find_available: no components available!");
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
/* All done */
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Query a component, see if it wants to run at all. If it does, save
|
||||
* some information. If it doesn't, close it.
|
||||
*/
|
||||
static int init_query(const mca_base_component_t * component,
|
||||
bool enable_progress_threads,
|
||||
bool enable_threads)
|
||||
{
|
||||
int ret;
|
||||
|
||||
SCOLL_VERBOSE(10,
|
||||
"scoll:find_available: querying scoll component %s",
|
||||
component->mca_component_name);
|
||||
|
||||
/* This component has already been successfully opened. So now
|
||||
query it. */
|
||||
|
||||
if (1 == component->mca_type_major_version
|
||||
&& 0 == component->mca_type_minor_version
|
||||
&& 0 == component->mca_type_release_version) {
|
||||
|
||||
mca_scoll_base_component_t *scoll =
|
||||
(mca_scoll_base_component_t *) component;
|
||||
|
||||
ret = scoll->scoll_init(enable_progress_threads, enable_threads);
|
||||
} else {
|
||||
/* Unrecognized coll API version */
|
||||
|
||||
SCOLL_VERBOSE(10,
|
||||
"scoll:find_available: unrecognized scoll API version (%d.%d.%d, ignored)",
|
||||
component->mca_type_major_version,
|
||||
component->mca_type_minor_version,
|
||||
component->mca_type_release_version);
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
/* Query done -- look at the return value to see what happened */
|
||||
|
||||
if (OSHMEM_SUCCESS != ret) {
|
||||
SCOLL_VERBOSE(10,
|
||||
"scoll:find_available: scoll component %s is not available",
|
||||
component->mca_component_name);
|
||||
if (NULL != component->mca_close_component) {
|
||||
component->mca_close_component();
|
||||
}
|
||||
} else {
|
||||
SCOLL_VERBOSE(10,
|
||||
"scoll:find_available: scoll component %s is available",
|
||||
component->mca_component_name);
|
||||
}
|
||||
|
||||
/* All done */
|
||||
|
||||
return ret;
|
||||
}
|
112
oshmem/mca/scoll/base/scoll_base_frame.c
Обычный файл
112
oshmem/mca/scoll/base/scoll_base_frame.c
Обычный файл
@ -0,0 +1,112 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "oshmem_config.h"
|
||||
|
||||
#include "oshmem/constants.h"
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
|
||||
#include "oshmem/mca/scoll/scoll.h"
|
||||
#include "oshmem/mca/scoll/base/base.h"
|
||||
|
||||
/*
|
||||
* The following file was created by configure. It contains extern
|
||||
* statements and the definition of an array of pointers to each
|
||||
* component's public mca_base_component_t struct.
|
||||
*/
|
||||
|
||||
#include "oshmem/mca/scoll/base/static-components.h"
|
||||
|
||||
/*
|
||||
* Global variables; most of which are loaded by back-ends of MCA
|
||||
* variables
|
||||
*/
|
||||
long* mca_scoll_sync_array = NULL;
|
||||
|
||||
/*
|
||||
* Ensure all function pointers are NULL'ed out to start with
|
||||
*/
|
||||
static void scoll_base_module_construct(mca_scoll_base_module_t *m)
|
||||
{
|
||||
/* Collective function pointers */
|
||||
m->scoll_barrier = NULL;
|
||||
m->scoll_broadcast = NULL;
|
||||
m->scoll_collect = NULL;
|
||||
m->scoll_reduce = NULL;
|
||||
m->scoll_module_enable = NULL;
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_scoll_base_module_t, opal_object_t,
|
||||
scoll_base_module_construct, NULL);
|
||||
|
||||
int mca_scoll_enable(void)
|
||||
{
|
||||
int ret = OSHMEM_SUCCESS;
|
||||
|
||||
if (!mca_scoll_sync_array) {
|
||||
void* ptr = (void*) mca_scoll_sync_array;
|
||||
int i = 0;
|
||||
|
||||
MCA_MEMHEAP_CALL(private_alloc((_SHMEM_BARRIER_SYNC_SIZE * sizeof(*mca_scoll_sync_array)), &ptr));
|
||||
mca_scoll_sync_array = ptr;
|
||||
|
||||
for (i = 0; i < _SHMEM_BARRIER_SYNC_SIZE; i++) {
|
||||
mca_scoll_sync_array[i] = _SHMEM_SYNC_VALUE;
|
||||
}
|
||||
}
|
||||
|
||||
/* Note: it is done to support FCA only and we need to consider possibility to
|
||||
* find a way w/o this ugly hack
|
||||
*/
|
||||
if (OSHMEM_SUCCESS != (ret = mca_scoll_base_select(oshmem_group_all))) {
|
||||
return ret;
|
||||
}
|
||||
if (OSHMEM_SUCCESS != (ret = mca_scoll_base_select(oshmem_group_self))) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static int mca_scoll_base_register(mca_base_register_flag_t flags)
|
||||
{
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static int mca_scoll_base_close(void)
|
||||
{
|
||||
/* This call should be done before memheap close */
|
||||
if (mca_scoll_sync_array) {
|
||||
void* ptr = (void*) mca_scoll_sync_array;
|
||||
|
||||
MCA_MEMHEAP_CALL(private_free(ptr));
|
||||
mca_scoll_sync_array = NULL;
|
||||
}
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static int mca_scoll_base_open(mca_base_open_flag_t flags)
|
||||
{
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
MCA_BASE_FRAMEWORK_DECLARE(oshmem, scoll,
|
||||
"OSHMEM SCOLL",
|
||||
mca_scoll_base_register,
|
||||
mca_scoll_base_open,
|
||||
mca_scoll_base_close,
|
||||
mca_scoll_base_static_components,
|
||||
0);
|
367
oshmem/mca/scoll/base/scoll_base_select.c
Обычный файл
367
oshmem/mca/scoll/base/scoll_base_select.c
Обычный файл
@ -0,0 +1,367 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "oshmem_config.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "oshmem/constants.h"
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_component_repository.h"
|
||||
|
||||
#include "oshmem/mca/scoll/scoll.h"
|
||||
#include "oshmem/mca/scoll/base/base.h"
|
||||
#include "oshmem/proc/proc.h"
|
||||
#include "oshmem/runtime/runtime.h"
|
||||
|
||||
/*
|
||||
* Local types
|
||||
*/
|
||||
struct avail_com_t {
|
||||
opal_list_item_t super;
|
||||
|
||||
int ac_priority;
|
||||
mca_scoll_base_module_t *ac_module;
|
||||
};
|
||||
typedef struct avail_com_t avail_com_t;
|
||||
|
||||
/*
|
||||
* Local functions
|
||||
*/
|
||||
static opal_list_t *check_components(opal_list_t * components,
|
||||
oshmem_group_t * group);
|
||||
static int check_one_component(oshmem_group_t * group,
|
||||
const mca_base_component_t * component,
|
||||
mca_scoll_base_module_1_0_0_t ** module);
|
||||
|
||||
static int query(const mca_base_component_t * component,
|
||||
oshmem_group_t * group,
|
||||
int *priority,
|
||||
mca_scoll_base_module_1_0_0_t ** module);
|
||||
|
||||
static int query_1_0_0(const mca_scoll_base_component_1_0_0_t * scoll_component,
|
||||
oshmem_group_t * group,
|
||||
int *priority,
|
||||
mca_scoll_base_module_1_0_0_t ** module);
|
||||
|
||||
static int scoll_null_barrier(struct oshmem_group_t *group,
|
||||
long *pSync,
|
||||
int alg)
|
||||
{
|
||||
if (oshmem_proc_group_is_member(group)) {
|
||||
SCOLL_ERROR("internal error");
|
||||
oshmem_shmem_abort(-1);
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static int scoll_null_broadcast(struct oshmem_group_t *group,
|
||||
int PE_root,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
int alg)
|
||||
{
|
||||
if (oshmem_proc_group_is_member(group)) {
|
||||
SCOLL_ERROR("internal error");
|
||||
oshmem_shmem_abort(-1);
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static int scoll_null_collect(struct oshmem_group_t *group,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
bool nlong_type,
|
||||
int alg)
|
||||
{
|
||||
if (oshmem_proc_group_is_member(group)) {
|
||||
SCOLL_ERROR("internal error");
|
||||
oshmem_shmem_abort(-1);
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static int scoll_null_reduce(struct oshmem_group_t *group,
|
||||
struct oshmem_op_t *op,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
void *pWrk,
|
||||
int alg)
|
||||
{
|
||||
if (oshmem_proc_group_is_member(group)) {
|
||||
SCOLL_ERROR("internal error");
|
||||
oshmem_shmem_abort(-1);
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Stuff for the OBJ interface
|
||||
*/
|
||||
static OBJ_CLASS_INSTANCE(avail_com_t, opal_list_item_t, NULL, NULL);
|
||||
|
||||
#define COPY(module, group, func) \
|
||||
do { \
|
||||
if (NULL != module->scoll_ ## func) { \
|
||||
if (NULL != group->g_scoll.scoll_ ## func ## _module) { \
|
||||
OBJ_RELEASE(group->g_scoll.scoll_ ## func ## _module); \
|
||||
} \
|
||||
group->g_scoll.scoll_ ## func = module->scoll_ ## func; \
|
||||
group->g_scoll.scoll_ ## func ## _module = module; \
|
||||
OBJ_RETAIN(module); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define CLOSE(group, func) \
|
||||
do { \
|
||||
if (NULL != group->g_scoll.scoll_ ## func ## _module) { \
|
||||
OBJ_RELEASE(group->g_scoll.scoll_ ## func ## _module); \
|
||||
group->g_scoll.scoll_## func = NULL; \
|
||||
group->g_scoll.scoll_## func ## _module = NULL; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
int mca_scoll_base_group_unselect(struct oshmem_group_t * group)
|
||||
{
|
||||
/*
|
||||
* scoll close() is called before group destructors, so
|
||||
* do close group collectives if scoll modules are no longer
|
||||
* valid
|
||||
*
|
||||
* there is a memory leak here, because not doing close means
|
||||
* that we leaving object with dangling ref counts
|
||||
*/
|
||||
SCOLL_VERBOSE(10, "scoll:base:group_unselect: group: %d", group->id);
|
||||
|
||||
CLOSE(group, barrier);
|
||||
CLOSE(group, broadcast);
|
||||
CLOSE(group, collect);
|
||||
CLOSE(group, reduce);
|
||||
|
||||
/* All done */
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
/*
|
||||
* This function is called at the initialization time of every
|
||||
* group. It is used to select which coll component will be
|
||||
* active for a given group.
|
||||
*/
|
||||
int mca_scoll_base_select(struct oshmem_group_t *group)
|
||||
{
|
||||
opal_list_t *selectable;
|
||||
opal_list_item_t *item;
|
||||
int ret;
|
||||
|
||||
/* Announce */
|
||||
SCOLL_VERBOSE(10, "scoll:base:group_select: new group: %d", group->id);
|
||||
mca_scoll_base_group_unselect(group);
|
||||
memset(&group->g_scoll, 0, sizeof(mca_scoll_base_group_scoll_t));
|
||||
if (!oshmem_proc_group_is_member(group)) {
|
||||
group->g_scoll.scoll_barrier = scoll_null_barrier;
|
||||
group->g_scoll.scoll_broadcast = scoll_null_broadcast;
|
||||
group->g_scoll.scoll_collect = scoll_null_collect;
|
||||
group->g_scoll.scoll_reduce = scoll_null_reduce;
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
SCOLL_VERBOSE(10,
|
||||
"scoll:base:group_select: Checking all available modules");
|
||||
selectable = check_components(&oshmem_scoll_base_framework.framework_components, group);
|
||||
|
||||
/* Upon return from the above, the modules list will contain the
|
||||
list of modules that returned (priority >= 0). If we have no
|
||||
collective modules available, then print error and return. */
|
||||
if (NULL == selectable) {
|
||||
/* There's no modules available */
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
/* do the selection loop */
|
||||
for (item = opal_list_remove_first(selectable); NULL != item; item =
|
||||
opal_list_remove_first(selectable)) {
|
||||
avail_com_t *avail = (avail_com_t *) item;
|
||||
ret = avail->ac_module->scoll_module_enable(avail->ac_module, group);
|
||||
if (OSHMEM_SUCCESS != ret) {
|
||||
mca_scoll_base_group_unselect(group);
|
||||
} else {
|
||||
COPY(avail->ac_module, group, broadcast);
|
||||
COPY(avail->ac_module, group, collect);
|
||||
COPY(avail->ac_module, group, reduce);
|
||||
COPY(avail->ac_module, group, barrier);
|
||||
}
|
||||
OBJ_RELEASE(avail->ac_module);
|
||||
OBJ_RELEASE(avail);
|
||||
}
|
||||
|
||||
/* Done with the list from the check_components() call so release it. */
|
||||
OBJ_RELEASE(selectable);
|
||||
if ((NULL == group->g_scoll.scoll_barrier)
|
||||
|| (NULL == group->g_scoll.scoll_broadcast)
|
||||
|| (NULL == group->g_scoll.scoll_collect)
|
||||
|| (NULL == group->g_scoll.scoll_reduce)) {
|
||||
mca_scoll_base_group_unselect(group);
|
||||
return OSHMEM_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static int avail_coll_compare (opal_list_item_t **a,
|
||||
opal_list_item_t **b)
|
||||
{
|
||||
avail_com_t *acom = (avail_com_t *) *a;
|
||||
avail_com_t *bcom = (avail_com_t *) *b;
|
||||
|
||||
if (acom->ac_priority > bcom->ac_priority) {
|
||||
return 1;
|
||||
} else if (acom->ac_priority < bcom->ac_priority) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* For each module in the list, check and see if it wants to run, and
|
||||
* do the resulting priority comparison. Make a list of modules to be
|
||||
* only those who returned that they want to run, and put them in
|
||||
* priority order.
|
||||
*/
|
||||
static opal_list_t *check_components(opal_list_t *components,
|
||||
oshmem_group_t *group)
|
||||
{
|
||||
int priority;
|
||||
const mca_base_component_t *component;
|
||||
mca_base_component_list_item_t *cli;
|
||||
mca_scoll_base_module_1_0_0_t *module;
|
||||
opal_list_t *selectable;
|
||||
avail_com_t *avail;
|
||||
|
||||
/* Make a list of the components that query successfully */
|
||||
selectable = OBJ_NEW(opal_list_t);
|
||||
|
||||
/* Scan through the list of components */
|
||||
OPAL_LIST_FOREACH(cli, &oshmem_scoll_base_framework.framework_components, mca_base_component_list_item_t) {
|
||||
component = cli->cli_component;
|
||||
|
||||
priority = check_one_component(group, component, &module);
|
||||
if (priority >= 0) {
|
||||
/* We have a component that indicated that it wants to run
|
||||
by giving us a module */
|
||||
avail = OBJ_NEW(avail_com_t);
|
||||
avail->ac_priority = priority;
|
||||
avail->ac_module = module;
|
||||
|
||||
opal_list_append(selectable, &avail->super);
|
||||
}
|
||||
}
|
||||
|
||||
/* If we didn't find any available components, return an error */
|
||||
if (0 == opal_list_get_size(selectable)) {
|
||||
OBJ_RELEASE(selectable);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Put this list in priority order */
|
||||
opal_list_sort(selectable, avail_coll_compare);
|
||||
|
||||
/* All done */
|
||||
return selectable;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check a single component
|
||||
*/
|
||||
static int check_one_component(oshmem_group_t *group,
|
||||
const mca_base_component_t *component,
|
||||
mca_scoll_base_module_1_0_0_t **module)
|
||||
{
|
||||
int err;
|
||||
int priority = -1;
|
||||
|
||||
err = query(component, group, &priority, module);
|
||||
|
||||
if (OSHMEM_SUCCESS == err) {
|
||||
priority = (priority < 100) ? priority : 100;
|
||||
SCOLL_VERBOSE(10,
|
||||
"scoll:base:group_select: component available: %s, priority: %d",
|
||||
component->mca_component_name, priority);
|
||||
|
||||
} else {
|
||||
priority = -1;
|
||||
SCOLL_VERBOSE(10,
|
||||
"scoll:base:group_select: component not available: %s",
|
||||
component->mca_component_name);
|
||||
}
|
||||
|
||||
return priority;
|
||||
}
|
||||
|
||||
/**************************************************************************
|
||||
* Query functions
|
||||
**************************************************************************/
|
||||
|
||||
/*
|
||||
* Take any version of a coll module, query it, and return the right
|
||||
* module struct
|
||||
*/
|
||||
static int query(const mca_base_component_t * component,
|
||||
oshmem_group_t *group,
|
||||
int *priority,
|
||||
mca_scoll_base_module_1_0_0_t **module)
|
||||
{
|
||||
*module = NULL;
|
||||
if (1 == component->mca_type_major_version
|
||||
&& 0 == component->mca_type_minor_version
|
||||
&& 0 == component->mca_type_release_version) {
|
||||
const mca_scoll_base_component_1_0_0_t *coll100 =
|
||||
(mca_scoll_base_component_1_0_0_t *) component;
|
||||
|
||||
return query_1_0_0(coll100, group, priority, module);
|
||||
}
|
||||
|
||||
/* Unknown coll API version -- return error */
|
||||
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
static int query_1_0_0(const mca_scoll_base_component_1_0_0_t *component,
|
||||
oshmem_group_t *group,
|
||||
int *priority,
|
||||
mca_scoll_base_module_1_0_0_t **module)
|
||||
{
|
||||
mca_scoll_base_module_1_0_0_t *ret;
|
||||
|
||||
/* There's currently no need for conversion */
|
||||
|
||||
ret = component->scoll_query(group, priority);
|
||||
if (NULL != ret) {
|
||||
*module = ret;
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
return OSHMEM_ERROR;
|
||||
}
|
42
oshmem/mca/scoll/basic/Makefile.am
Обычный файл
42
oshmem/mca/scoll/basic/Makefile.am
Обычный файл
@ -0,0 +1,42 @@
|
||||
#
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
AM_CFLAGS = $(OSHMEM_CFLAGS)
|
||||
|
||||
sources = \
|
||||
scoll_basic.h \
|
||||
scoll_basic_module.c \
|
||||
scoll_basic_component.c \
|
||||
scoll_basic_barrier.c \
|
||||
scoll_basic_broadcast.c \
|
||||
scoll_basic_collect.c \
|
||||
scoll_basic_reduce.c
|
||||
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_oshmem_scoll_basic_DSO
|
||||
component_noinst =
|
||||
component_install = mca_scoll_basic.la
|
||||
else
|
||||
component_noinst = libmca_scoll_basic.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_scoll_basic_la_SOURCES = $(sources)
|
||||
mca_scoll_basic_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_scoll_basic_la_SOURCES =$(sources)
|
||||
libmca_scoll_basic_la_LDFLAGS = -module -avoid-version
|
13
oshmem/mca/scoll/basic/configure.params
Обычный файл
13
oshmem/mca/scoll/basic/configure.params
Обычный файл
@ -0,0 +1,13 @@
|
||||
# -*- shell-script -*-
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# Specific to this module
|
||||
|
||||
PARAM_CONFIG_FILES="Makefile"
|
89
oshmem/mca/scoll/basic/scoll_basic.h
Обычный файл
89
oshmem/mca/scoll/basic/scoll_basic.h
Обычный файл
@ -0,0 +1,89 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MCA_SCOLL_BASIC_H
|
||||
#define MCA_SCOLL_BASIC_H
|
||||
|
||||
#include "oshmem_config.h"
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
#include "oshmem/mca/scoll/scoll.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/* Globally exported variables */
|
||||
|
||||
OSHMEM_MODULE_DECLSPEC extern mca_scoll_base_component_1_0_0_t
|
||||
mca_scoll_basic_component;
|
||||
|
||||
extern int mca_scoll_basic_priority_param;
|
||||
OSHMEM_DECLSPEC extern int mca_scoll_basic_param_barrier_algorithm;
|
||||
extern int mca_scoll_basic_param_broadcast_algorithm;
|
||||
extern int mca_scoll_basic_param_collect_algorithm;
|
||||
extern int mca_scoll_basic_param_reduce_algorithm;
|
||||
|
||||
/* API functions */
|
||||
|
||||
int mca_scoll_basic_init(bool enable_progress_threads, bool enable_threads);
|
||||
mca_scoll_base_module_t*
|
||||
mca_scoll_basic_query(struct oshmem_group_t *group, int *priority);
|
||||
|
||||
enum {
|
||||
SHMEM_SYNC_INIT = _SHMEM_SYNC_VALUE,
|
||||
SHMEM_SYNC_WAIT = -2,
|
||||
SHMEM_SYNC_RUN = -3,
|
||||
SHMEM_SYNC_READY = -4,
|
||||
};
|
||||
|
||||
int mca_scoll_basic_barrier(struct oshmem_group_t *group, long *pSync, int alg);
|
||||
int mca_scoll_basic_broadcast(struct oshmem_group_t *group,
|
||||
int PE_root,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
int alg);
|
||||
int mca_scoll_basic_collect(struct oshmem_group_t *group,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
bool nlong_type,
|
||||
int alg);
|
||||
int mca_scoll_basic_reduce(struct oshmem_group_t *group,
|
||||
struct oshmem_op_t *op,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
void *pWrk,
|
||||
int alg);
|
||||
|
||||
static inline unsigned int scoll_log2(unsigned long val)
|
||||
{
|
||||
unsigned int count = 0;
|
||||
|
||||
while (val > 0) {
|
||||
val = val >> 1;
|
||||
count++;
|
||||
}
|
||||
|
||||
return count > 0 ? count - 1 : 0;
|
||||
}
|
||||
|
||||
struct mca_scoll_basic_module_t {
|
||||
mca_scoll_base_module_t super;
|
||||
};
|
||||
typedef struct mca_scoll_basic_module_t mca_scoll_basic_module_t;
|
||||
OBJ_CLASS_DECLARATION(mca_scoll_basic_module_t);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* MCA_SCOLL_BASIC_H */
|
585
oshmem/mca/scoll/basic/scoll_basic_barrier.c
Обычный файл
585
oshmem/mca/scoll/basic/scoll_basic_barrier.c
Обычный файл
@ -0,0 +1,585 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "oshmem_config.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "orte/mca/grpcomm/grpcomm.h"
|
||||
|
||||
#include "oshmem/constants.h"
|
||||
#include "oshmem/mca/spml/spml.h"
|
||||
#include "oshmem/mca/scoll/scoll.h"
|
||||
#include "oshmem/mca/scoll/base/base.h"
|
||||
#include "oshmem/proc/proc.h"
|
||||
#include "scoll_basic.h"
|
||||
|
||||
static int __algorithm_central_counter(struct oshmem_group_t *group,
|
||||
long *pSync);
|
||||
static int __algorithm_tournament(struct oshmem_group_t *group, long *pSync);
|
||||
static int __algorithm_recursive_doubling(struct oshmem_group_t *group,
|
||||
long *pSync);
|
||||
static int __algorithm_dissemination(struct oshmem_group_t *group, long *pSync);
|
||||
static int __algorithm_basic(struct oshmem_group_t *group, long *pSync);
|
||||
static int __algorithm_adaptive(struct oshmem_group_t *group, long *pSync);
|
||||
|
||||
int mca_scoll_basic_barrier(struct oshmem_group_t *group, long *pSync, int alg)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
|
||||
/* Arguments validation */
|
||||
if (!group) {
|
||||
SCOLL_ERROR("Active set (group) of PE is not defined");
|
||||
rc = OSHMEM_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
if ((rc == OSHMEM_SUCCESS) && oshmem_proc_group_is_member(group)) {
|
||||
if (pSync) {
|
||||
alg = (alg == SCOLL_DEFAULT_ALG ?
|
||||
mca_scoll_basic_param_barrier_algorithm : alg);
|
||||
switch (alg) {
|
||||
case SCOLL_ALG_BARRIER_CENTRAL_COUNTER:
|
||||
{
|
||||
rc = __algorithm_central_counter(group, pSync);
|
||||
break;
|
||||
}
|
||||
case SCOLL_ALG_BARRIER_TOURNAMENT:
|
||||
{
|
||||
rc = __algorithm_tournament(group, pSync);
|
||||
break;
|
||||
}
|
||||
case SCOLL_ALG_BARRIER_RECURSIVE_DOUBLING:
|
||||
{
|
||||
rc = __algorithm_recursive_doubling(group, pSync);
|
||||
break;
|
||||
}
|
||||
case SCOLL_ALG_BARRIER_DISSEMINATION:
|
||||
{
|
||||
rc = __algorithm_dissemination(group, pSync);
|
||||
break;
|
||||
}
|
||||
case SCOLL_ALG_BARRIER_BASIC:
|
||||
{
|
||||
rc = __algorithm_basic(group, pSync);
|
||||
break;
|
||||
}
|
||||
case SCOLL_ALG_BARRIER_ADAPTIVE:
|
||||
{
|
||||
rc = __algorithm_adaptive(group, pSync);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
rc = __algorithm_recursive_doubling(group, pSync);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
SCOLL_ERROR("Incorrect argument pSync");
|
||||
rc = OSHMEM_ERR_BAD_PARAM;
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
This algorithm is quite simple and straightforward. But because of itТs obvious simplicity and
|
||||
the naive prove for correctness it is implemented quite often. One node asks peers if they are
|
||||
achieve barrier state. When all processors are ready it signals to go ahead.
|
||||
Outlay:
|
||||
NP-1 competing network transfers are needed to implement the counter
|
||||
The memory usage is constant (1 byte) per node.
|
||||
*/
|
||||
static int __algorithm_central_counter(struct oshmem_group_t *group,
|
||||
long *pSync)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
long value = SHMEM_SYNC_INIT;
|
||||
int root_id = 0;
|
||||
int PE_root = oshmem_proc_pe(group->proc_array[root_id]);
|
||||
int i = 0;
|
||||
|
||||
SCOLL_VERBOSE(12, "[#%d] Barrier algorithm: Central Counter", group->my_pe);
|
||||
SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
|
||||
|
||||
/* Set current state as WAIT */
|
||||
pSync[0] = SHMEM_SYNC_WAIT;
|
||||
|
||||
/* Root processes synchronization */
|
||||
if (PE_root == group->my_pe) {
|
||||
int pe_cur = 0;
|
||||
long wait_pe_count = 0;
|
||||
int* wait_pe_array = NULL;
|
||||
|
||||
wait_pe_array = malloc(sizeof(*wait_pe_array) * group->proc_count);
|
||||
if (wait_pe_array) {
|
||||
SCOLL_VERBOSE(14, "[#%d] PE is the root", group->my_pe);
|
||||
|
||||
wait_pe_count = group->proc_count;
|
||||
for (i = 0; i < group->proc_count; i++) {
|
||||
wait_pe_array[i] = oshmem_proc_pe(group->proc_array[i]);
|
||||
}
|
||||
wait_pe_array[root_id] = OSHMEM_PE_INVALID;
|
||||
wait_pe_count--;
|
||||
|
||||
while (wait_pe_count) {
|
||||
for (i = 0; (i < group->proc_count) && (rc == OSHMEM_SUCCESS);
|
||||
i++) {
|
||||
pe_cur = wait_pe_array[i];
|
||||
if (pe_cur != OSHMEM_PE_INVALID) {
|
||||
rc = MCA_SPML_CALL(get((void*)pSync, sizeof(value), (void*)&value, pe_cur));
|
||||
if ((rc == OSHMEM_SUCCESS)
|
||||
&& (value == SHMEM_SYNC_WAIT)) {
|
||||
wait_pe_array[i] = OSHMEM_PE_INVALID;
|
||||
wait_pe_count--;
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] PE#%d is ready (wait list counter: %d)",
|
||||
group->my_pe, pe_cur, (int)wait_pe_count);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SCOLL_VERBOSE(14, "[#%d] PE signals to all", group->my_pe);
|
||||
value = SHMEM_SYNC_RUN;
|
||||
for (i = 0; (i < group->proc_count) && (rc == OSHMEM_SUCCESS);
|
||||
i++) {
|
||||
pe_cur = oshmem_proc_pe(group->proc_array[i]);
|
||||
if (pe_cur != PE_root) {
|
||||
rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, pe_cur));
|
||||
}
|
||||
}
|
||||
|
||||
free(wait_pe_array);
|
||||
} else {
|
||||
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* Possibly this is unnecessary...
|
||||
But imagine the scenario when you have 2 sequential barriers and the root PE is the fastest one.
|
||||
The root could leave the first barrier and in the second barrier it could get SHMEM_SYNC_WAIT value on
|
||||
remote node before the remote node receives its SHMEM_SYNC_RUN value in the first barrier
|
||||
*/
|
||||
/* TODO: actually it must be quiet */
|
||||
MCA_SPML_CALL(fence());
|
||||
}
|
||||
/* Wait for RUN signal */
|
||||
else {
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] PE waits for a signal from root",
|
||||
group->my_pe);
|
||||
|
||||
value = SHMEM_SYNC_RUN;
|
||||
rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG));
|
||||
}
|
||||
|
||||
/* Restore initial values */
|
||||
SCOLL_VERBOSE(12,
|
||||
"[#%d] Restore special synchronization array",
|
||||
group->my_pe);
|
||||
for (i = 0; pSync && (i < _SHMEM_BARRIER_SYNC_SIZE); i++) {
|
||||
pSync[i] = _SHMEM_SYNC_VALUE;
|
||||
}
|
||||
|
||||
SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
The Tournament Barrier, proposed by Hengsen, Finkel and Manser is mostly suitable for shared memory
|
||||
multiprocessors because it benefits from several caching mechanisms.
|
||||
The algorithm is similar to a tournament game. In each round two
|
||||
nodes play against each other. The winner is known in advance and waits until the looser arrives. The
|
||||
winners play against each other in the next round. The overall winner (the champion) notifies all others
|
||||
about the end of the barrier.
|
||||
Outlay:
|
||||
The game scales with log2(NP) and uses 1 byte of memory.
|
||||
*/
|
||||
static int __algorithm_tournament(struct oshmem_group_t *group, long *pSync)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
int round = 0;
|
||||
int exit_flag = group->proc_count - 1;
|
||||
long value = SHMEM_SYNC_INIT;
|
||||
int my_id = oshmem_proc_group_find_id(group, group->my_pe);
|
||||
int peer_id = 0;
|
||||
int peer_pe = 0;
|
||||
int i = 0;
|
||||
|
||||
SCOLL_VERBOSE(12, "[#%d] Barrier algorithm: Tournament", group->my_pe);
|
||||
SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
|
||||
|
||||
/* Set current state as WAIT */
|
||||
pSync[0] = SHMEM_SYNC_WAIT;
|
||||
|
||||
while (exit_flag && (rc == OSHMEM_SUCCESS)) {
|
||||
/* Define a peer for competition */
|
||||
peer_id = my_id ^ (1 << round);
|
||||
|
||||
/* Update exit condition and round counter */
|
||||
exit_flag >>= 1;
|
||||
round++;
|
||||
|
||||
/* Do not have peer for tournament */
|
||||
if (peer_id >= group->proc_count)
|
||||
continue;
|
||||
|
||||
if (my_id < peer_id) {
|
||||
pSync[0] = peer_id;
|
||||
value = my_id;
|
||||
|
||||
SCOLL_VERBOSE(14, "[#%d] round = %d wait", group->my_pe, round);
|
||||
rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG));
|
||||
} else {
|
||||
peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
|
||||
|
||||
#if 1 /* It is ugly implementation of compare and swap operation
|
||||
Usage of this hack does not give performance improvement but
|
||||
it is expected that shmem_long_cswap() will make it faster.
|
||||
*/
|
||||
do {
|
||||
MCA_SPML_CALL(get((void*)pSync, sizeof(value), (void*)&value, peer_pe));
|
||||
} while (value != my_id);
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] round = %d signals to #%d",
|
||||
group->my_pe, round, peer_pe);
|
||||
value = peer_id;
|
||||
rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe));
|
||||
#else
|
||||
SCOLL_VERBOSE(14, "[#%d] round = %d signals to #%d", group->my_pe, round, peer_pe);
|
||||
do
|
||||
{
|
||||
rc = MCA_ATOMIC_CALL(cswap((void*)pSync, (void*)&value, (const void*)&my_id, (const void*)&peer_id, sizeof(value), peer_pe));
|
||||
}while (value != my_id);
|
||||
#endif
|
||||
SCOLL_VERBOSE(14, "[#%d] round = %d wait", group->my_pe, round);
|
||||
value = SHMEM_SYNC_RUN;
|
||||
rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG));
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Restore initial values */
|
||||
SCOLL_VERBOSE(12,
|
||||
"[#%d] Restore special synchronization array",
|
||||
group->my_pe);
|
||||
for (i = 0; pSync && (i < _SHMEM_BARRIER_SYNC_SIZE); i++) {
|
||||
pSync[i] = _SHMEM_SYNC_VALUE;
|
||||
}
|
||||
|
||||
/* Send result to all PE in group */
|
||||
if ((my_id == 0) && (rc == OSHMEM_SUCCESS)) {
|
||||
SCOLL_VERBOSE(14, "[#%d] signals to all", group->my_pe);
|
||||
|
||||
value = SHMEM_SYNC_RUN;
|
||||
for (peer_id = 1;
|
||||
(peer_id < group->proc_count) && (rc == OSHMEM_SUCCESS);
|
||||
peer_id++) {
|
||||
peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
|
||||
rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe));
|
||||
}
|
||||
}
|
||||
|
||||
SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
Pairwise Exchange With Recursive Doubling.
|
||||
Rinka Gupta, Vinod Tipparaju, Jare Nieplocha, and Dhabaleswar Panda. Efficient Barrier
|
||||
using Remote Memory Operations on VIA-Based Clusters. In 2002 IEEE International
|
||||
Conference on Cluster Computing (CLUSTER 2002), page 83. IEEE Computer Society, 2002.
|
||||
Outlay:
|
||||
The algorithm uses a maximum of log2(NP) + 2 network writes and P bytes memory per node.
|
||||
*/
|
||||
static int __algorithm_recursive_doubling(struct oshmem_group_t *group,
|
||||
long *pSync)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
int round = 0;
|
||||
int floor2_proc = 0;
|
||||
int exit_flag = 0;
|
||||
long value = SHMEM_SYNC_INIT;
|
||||
int my_id = oshmem_proc_group_find_id(group, group->my_pe);
|
||||
int peer_id = 0;
|
||||
int peer_pe = 0;
|
||||
int i = 0;
|
||||
|
||||
floor2_proc = 1;
|
||||
i = group->proc_count;
|
||||
i >>= 1;
|
||||
while (i) {
|
||||
i >>= 1;
|
||||
floor2_proc <<= 1;
|
||||
}
|
||||
|
||||
SCOLL_VERBOSE(12,
|
||||
"[#%d] Barrier algorithm: Recursive Doubling",
|
||||
group->my_pe);
|
||||
SCOLL_VERBOSE(15,
|
||||
"[#%d] pSync[0] = %ld floor2_proc = %d",
|
||||
group->my_pe, pSync[0], floor2_proc);
|
||||
|
||||
if (my_id >= floor2_proc) {
|
||||
/* I am in extra group, my partner is node (my_id-y) in basic group */
|
||||
peer_id = my_id - floor2_proc;
|
||||
peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] is extra and signal to #%d",
|
||||
group->my_pe, peer_pe);
|
||||
value = SHMEM_SYNC_WAIT;
|
||||
rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe));
|
||||
|
||||
SCOLL_VERBOSE(14, "[#%d] wait", group->my_pe);
|
||||
value = SHMEM_SYNC_RUN;
|
||||
rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG));
|
||||
|
||||
/* Restore initial values */
|
||||
SCOLL_VERBOSE(12,
|
||||
"[#%d] Restore special synchronization array",
|
||||
group->my_pe);
|
||||
for (i = 0; pSync && (i < _SHMEM_BARRIER_SYNC_SIZE); i++) {
|
||||
pSync[i] = _SHMEM_SYNC_VALUE;
|
||||
}
|
||||
} else {
|
||||
/* Wait for a peer from extra group */
|
||||
if ((group->proc_count - floor2_proc) > my_id) {
|
||||
/* I am in basic group, my partner is node (my_id+y) in extra group */
|
||||
peer_id = my_id + floor2_proc;
|
||||
peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] wait a signal from #%d",
|
||||
group->my_pe, peer_pe);
|
||||
value = SHMEM_SYNC_WAIT;
|
||||
rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG));
|
||||
}
|
||||
|
||||
/* Pairwise exchange */
|
||||
exit_flag = floor2_proc - 1;
|
||||
pSync[0] = round;
|
||||
while (exit_flag && (rc == OSHMEM_SUCCESS)) {
|
||||
/* Define a peer for competition */
|
||||
peer_id = my_id ^ (1 << round);
|
||||
|
||||
/* Update exit condition and round counter */
|
||||
exit_flag >>= 1;
|
||||
round++;
|
||||
|
||||
peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
|
||||
|
||||
#if 1 /* It is ugly implementation of compare and swap operation
|
||||
Usage of this hack does not give performance improvement but
|
||||
it is expected that shmem_long_cswap() will make it faster.
|
||||
*/
|
||||
do {
|
||||
MCA_SPML_CALL(get((void*)pSync, sizeof(value), (void*)&value, peer_pe));
|
||||
} while (value != (round - 1));
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] round = %d signals to #%d",
|
||||
group->my_pe, round, peer_pe);
|
||||
value = round;
|
||||
rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe));
|
||||
#else
|
||||
SCOLL_VERBOSE(14, "[#%d] round = %d signals to #%d", group->my_pe, round, peer_pe);
|
||||
{
|
||||
long cond = round - 1;
|
||||
do
|
||||
{
|
||||
rc = MCA_ATOMIC_CALL(cswap((void*)pSync, (void*)&value, (const void*)&cond, (const void*)&round, sizeof(value), peer_pe));
|
||||
}while (value != (round-1));
|
||||
}
|
||||
#endif
|
||||
|
||||
SCOLL_VERBOSE(14, "[#%d] round = %d wait", group->my_pe, round);
|
||||
value = round;
|
||||
rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_GE, (void*)&value, SHMEM_LONG));
|
||||
}
|
||||
|
||||
/* Restore initial values */
|
||||
SCOLL_VERBOSE(12,
|
||||
"[#%d] Restore special synchronization array",
|
||||
group->my_pe);
|
||||
for (i = 0; pSync && (i < _SHMEM_BARRIER_SYNC_SIZE); i++) {
|
||||
pSync[i] = _SHMEM_SYNC_VALUE;
|
||||
}
|
||||
|
||||
/* Notify a peer from extra group */
|
||||
if ((group->proc_count - floor2_proc) > my_id) {
|
||||
/* I am in basic group, my partner is node (my_id+y) in extra group */
|
||||
peer_id = my_id + floor2_proc;
|
||||
peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
|
||||
|
||||
SCOLL_VERBOSE(14, "[#%d] signals to #%d", group->my_pe, peer_pe);
|
||||
value = SHMEM_SYNC_RUN;
|
||||
rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe));
|
||||
}
|
||||
}
|
||||
|
||||
SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
The Dissemination Barrier, introduced by Hengsen, Finkel and Manser in 1998.
|
||||
The algorithm is mostly an improvement of the Butterfly Barrier for non power of two processor counts.
|
||||
It uses the same pairwise synchronization but with other partners.
|
||||
Outlay:
|
||||
The game scales with log2(NP) and uses 1 byte of memory.
|
||||
*/
|
||||
static int __algorithm_dissemination(struct oshmem_group_t *group, long *pSync)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
int round = 0;
|
||||
int log2_proc = 0;
|
||||
long value = SHMEM_SYNC_INIT;
|
||||
int my_id = oshmem_proc_group_find_id(group, group->my_pe);
|
||||
int peer_id = 0;
|
||||
int peer_pe = 0;
|
||||
int i = 0;
|
||||
|
||||
log2_proc = scoll_log2((unsigned long) group->proc_count);
|
||||
|
||||
SCOLL_VERBOSE(12, "[#%d] Barrier algorithm: Dissemination", group->my_pe);
|
||||
SCOLL_VERBOSE(15,
|
||||
"[#%d] pSync[0] = %ld floor2_proc = %d",
|
||||
group->my_pe, pSync[0], log2_proc);
|
||||
|
||||
pSync[0] = round;
|
||||
for (round = 0; (round <= log2_proc) && (rc == OSHMEM_SUCCESS); round++) {
|
||||
/* Define a peer to send signal */
|
||||
peer_id = (my_id + (1 << round)) % group->proc_count;
|
||||
|
||||
peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
|
||||
|
||||
#if 1 /* It is ugly implementation of compare and swap operation
|
||||
Usage of this hack does not give performance improvement but
|
||||
it is expected that shmem_long_cswap() will make it faster.
|
||||
*/
|
||||
do {
|
||||
MCA_SPML_CALL(get((void*)pSync, sizeof(value), (void*)&value, peer_pe));
|
||||
} while (value != round);
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] round = %d signals to #%d",
|
||||
group->my_pe, round, peer_pe);
|
||||
value = round + 1;
|
||||
rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe));
|
||||
#endif
|
||||
|
||||
SCOLL_VERBOSE(14, "[#%d] round = %d wait", group->my_pe, round);
|
||||
value = round + 1;
|
||||
rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_GE, (void*)&value, SHMEM_LONG));
|
||||
}
|
||||
|
||||
/* Restore initial values */
|
||||
SCOLL_VERBOSE(12,
|
||||
"[#%d] Restore special synchronization array",
|
||||
group->my_pe);
|
||||
for (i = 0; pSync && (i < _SHMEM_BARRIER_SYNC_SIZE); i++) {
|
||||
pSync[i] = _SHMEM_SYNC_VALUE;
|
||||
}
|
||||
|
||||
SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int __algorithm_basic(struct oshmem_group_t *group, long *pSync)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
int root_id = 0;
|
||||
int PE_root = oshmem_proc_pe(group->proc_array[root_id]);
|
||||
int i = 0;
|
||||
|
||||
SCOLL_VERBOSE(12, "[#%d] Barrier algorithm: Basic", group->my_pe);
|
||||
|
||||
if (PE_root != group->my_pe) {
|
||||
rc = MCA_SPML_CALL(send(NULL, 0, PE_root, MCA_SPML_BASE_PUT_STANDARD));
|
||||
if (OSHMEM_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = MCA_SPML_CALL(recv(NULL, 0, PE_root));
|
||||
if (OSHMEM_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
/* The root collects and broadcasts the messages. */
|
||||
|
||||
else {
|
||||
int pe_cur = 0;
|
||||
|
||||
for (i = 0; (i < group->proc_count) && (rc == OSHMEM_SUCCESS); i++) {
|
||||
pe_cur = oshmem_proc_pe(group->proc_array[i]);
|
||||
if (pe_cur != PE_root) {
|
||||
rc = MCA_SPML_CALL(recv(NULL, 0, SHMEM_ANY_SOURCE));
|
||||
}
|
||||
if (OSHMEM_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; (i < group->proc_count) && (rc == OSHMEM_SUCCESS); i++) {
|
||||
pe_cur = oshmem_proc_pe(group->proc_array[i]);
|
||||
if (pe_cur != PE_root) {
|
||||
rc = MCA_SPML_CALL(send(NULL, 0, pe_cur, MCA_SPML_BASE_PUT_STANDARD));
|
||||
}
|
||||
if (OSHMEM_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int __algorithm_adaptive(struct oshmem_group_t *group, long *pSync)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
bool local_peers_only = true;
|
||||
|
||||
SCOLL_VERBOSE(12, "[#%d] Barrier algorithm: Adaptive", group->my_pe);
|
||||
|
||||
/* check if we have only local peers */
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
for (i = 0; i < group->proc_count; i++) {
|
||||
if (i == group->id)
|
||||
continue;
|
||||
|
||||
if (!OPAL_PROC_ON_LOCAL_NODE(group->proc_array[i]->proc_flags)) {
|
||||
local_peers_only = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Select algorithm we use:
|
||||
* use send/recv way for group in the same node and for np < 32
|
||||
* otherwise use put/get way
|
||||
*/
|
||||
if (local_peers_only || (group->proc_count < 32)) {
|
||||
rc = __algorithm_basic(group, pSync);
|
||||
} else {
|
||||
rc = __algorithm_recursive_doubling(group, pSync);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
250
oshmem/mca/scoll/basic/scoll_basic_broadcast.c
Обычный файл
250
oshmem/mca/scoll/basic/scoll_basic_broadcast.c
Обычный файл
@ -0,0 +1,250 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "oshmem_config.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "orte/mca/grpcomm/grpcomm.h"
|
||||
|
||||
#include "opal/util/bit_ops.h"
|
||||
|
||||
#include "oshmem/constants.h"
|
||||
#include "oshmem/mca/spml/spml.h"
|
||||
#include "oshmem/mca/scoll/scoll.h"
|
||||
#include "oshmem/mca/scoll/base/base.h"
|
||||
#include "scoll_basic.h"
|
||||
|
||||
static int __algorithm_central_counter(struct oshmem_group_t *group,
|
||||
int PE_root,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync);
|
||||
static int __algorithm_binomial_tree(struct oshmem_group_t *group,
|
||||
int PE_root,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync);
|
||||
|
||||
int mca_scoll_basic_broadcast(struct oshmem_group_t *group,
|
||||
int PE_root,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
int alg)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
|
||||
/* Arguments validation */
|
||||
if (!group) {
|
||||
SCOLL_ERROR("Active set (group) of PE is not defined");
|
||||
rc = OSHMEM_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* Check if this PE is part of the group */
|
||||
if ((rc == OSHMEM_SUCCESS) && oshmem_proc_group_is_member(group)) {
|
||||
int i = 0;
|
||||
|
||||
if (pSync) {
|
||||
alg = (alg == SCOLL_DEFAULT_ALG ?
|
||||
mca_scoll_basic_param_broadcast_algorithm : alg);
|
||||
switch (alg) {
|
||||
case SCOLL_ALG_BROADCAST_CENTRAL_COUNTER:
|
||||
{
|
||||
rc = __algorithm_central_counter(group,
|
||||
PE_root,
|
||||
target,
|
||||
source,
|
||||
nlong,
|
||||
pSync);
|
||||
break;
|
||||
}
|
||||
case SCOLL_ALG_BROADCAST_BINOMIAL:
|
||||
{
|
||||
rc = __algorithm_binomial_tree(group,
|
||||
PE_root,
|
||||
target,
|
||||
source,
|
||||
nlong,
|
||||
pSync);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
rc = __algorithm_binomial_tree(group,
|
||||
PE_root,
|
||||
target,
|
||||
source,
|
||||
nlong,
|
||||
pSync);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
SCOLL_ERROR("Incorrect argument pSync");
|
||||
rc = OSHMEM_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* Restore initial values */
|
||||
SCOLL_VERBOSE(12,
|
||||
"[#%d] Restore special synchronization array",
|
||||
group->my_pe);
|
||||
for (i = 0; pSync && (i < _SHMEM_BCAST_SYNC_SIZE); i++) {
|
||||
pSync[i] = _SHMEM_SYNC_VALUE;
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
This algorithm is quite simple and straightforward. But because of itТs obvious simplicity and
|
||||
the naive prove for correctness it is implemented quite often. The root send data to all.
|
||||
Outlay:
|
||||
NP-1 competing network transfers are needed to implement the counter
|
||||
The memory usage is constant (1 byte) per node.
|
||||
*/
|
||||
static int __algorithm_central_counter(struct oshmem_group_t *group,
|
||||
int PE_root,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
int i = 0;
|
||||
|
||||
SCOLL_VERBOSE(12,
|
||||
"[#%d] Broadcast algorithm: Central Counter",
|
||||
group->my_pe);
|
||||
SCOLL_VERBOSE(15,
|
||||
"[#%d] pSync[0] = %ld root = #%d",
|
||||
group->my_pe, pSync[0], PE_root);
|
||||
|
||||
/* Check if this PE is the root */
|
||||
if (PE_root == group->my_pe) {
|
||||
int pe_cur = 0;
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] send data to all PE in the group",
|
||||
group->my_pe);
|
||||
for (i = 0; (i < group->proc_count) && (rc == OSHMEM_SUCCESS); i++) {
|
||||
pe_cur = oshmem_proc_pe(group->proc_array[i]);
|
||||
if (pe_cur != PE_root) {
|
||||
SCOLL_VERBOSE(15,
|
||||
"[#%d] send data to #%d",
|
||||
group->my_pe, pe_cur);
|
||||
rc = MCA_SPML_CALL(put(target, nlong, (void *)source, pe_cur));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Wait for operation completion to set needed size */
|
||||
if (rc == OSHMEM_SUCCESS) {
|
||||
SCOLL_VERBOSE(14, "[#%d] Wait for operation completion", group->my_pe);
|
||||
rc = group->g_scoll.scoll_barrier(group,
|
||||
(pSync + 1),
|
||||
SCOLL_DEFAULT_ALG);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
The Binomial Spanning Tree algorithm.
|
||||
Outlay:
|
||||
The game scales with log2(NP) and uses 1 byte of memory.
|
||||
*/
|
||||
static int __algorithm_binomial_tree(struct oshmem_group_t *group,
|
||||
int PE_root,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
long value = SHMEM_SYNC_INIT;
|
||||
int root_id = oshmem_proc_group_find_id(group, PE_root);
|
||||
int my_id = oshmem_proc_group_find_id(group, group->my_pe);
|
||||
int peer_id = 0;
|
||||
int peer_pe = 0;
|
||||
int vrank;
|
||||
int dim = opal_cube_dim(group->proc_count);
|
||||
int hibit;
|
||||
int mask;
|
||||
int i = 0;
|
||||
|
||||
SCOLL_VERBOSE(12, "[#%d] Broadcast algorithm: Tree", group->my_pe);
|
||||
SCOLL_VERBOSE(15,
|
||||
"[#%d] pSync[0] = %ld root = #%d",
|
||||
group->my_pe, pSync[0], PE_root);
|
||||
|
||||
vrank = (my_id + group->proc_count - root_id) % group->proc_count;
|
||||
hibit = opal_hibit(vrank, dim);
|
||||
|
||||
SCOLL_VERBOSE(15,
|
||||
"[#%d] dim = %d vrank = %d hibit = %d",
|
||||
group->my_pe, dim, vrank, hibit);
|
||||
|
||||
dim--;
|
||||
|
||||
pSync[0] = SHMEM_SYNC_READY;
|
||||
/* Receive data from parent in the tree. */
|
||||
if (vrank > 0) {
|
||||
value = SHMEM_SYNC_READY;
|
||||
|
||||
SCOLL_VERBOSE(14, "[#%d] wait", group->my_pe);
|
||||
rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_NE, (void*)&value, SHMEM_LONG));
|
||||
while ((value = pSync[0]) < 0) {
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] Broadcast size is a negative value (%li)\n",
|
||||
group->my_pe, pSync[0]);
|
||||
MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_NE, (void*)&value, SHMEM_LONG));
|
||||
}
|
||||
if (OSHMEM_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
nlong = (size_t) pSync[0];
|
||||
}
|
||||
|
||||
/* Send data to the children. */
|
||||
for (i = hibit + 1, mask = 1 << i; i <= dim; ++i, mask <<= 1) {
|
||||
peer_id = vrank | mask;
|
||||
|
||||
if (peer_id < group->proc_count) {
|
||||
/* Wait for the child to be ready to receive (pSync must have the initial value) */
|
||||
peer_id = (peer_id + root_id) % group->proc_count;
|
||||
peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] check remote pe is ready to receive #%d",
|
||||
group->my_pe, peer_pe);
|
||||
do {
|
||||
rc = MCA_SPML_CALL(get((void*)pSync, sizeof(long), (void*)pSync, peer_pe));
|
||||
} while ((OSHMEM_SUCCESS == rc) && (pSync[0] != SHMEM_SYNC_READY));
|
||||
|
||||
SCOLL_VERBOSE(14, "[#%d] send data to #%d", group->my_pe, peer_pe);
|
||||
rc = MCA_SPML_CALL(put(target, nlong, (my_id == root_id ? (void *)source : target), peer_pe));
|
||||
|
||||
MCA_SPML_CALL(fence());
|
||||
|
||||
SCOLL_VERBOSE(14, "[#%d] signals to #%d", group->my_pe, peer_pe);
|
||||
value = nlong;
|
||||
rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe));
|
||||
if (OSHMEM_SUCCESS != rc) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
625
oshmem/mca/scoll/basic/scoll_basic_collect.c
Обычный файл
625
oshmem/mca/scoll/basic/scoll_basic_collect.c
Обычный файл
@ -0,0 +1,625 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "oshmem_config.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "orte/mca/grpcomm/grpcomm.h"
|
||||
|
||||
#include "oshmem/constants.h"
|
||||
#include "oshmem/mca/spml/spml.h"
|
||||
#include "oshmem/mca/scoll/scoll.h"
|
||||
#include "oshmem/mca/scoll/base/base.h"
|
||||
#include "scoll_basic.h"
|
||||
|
||||
static int __algorithm_central_collector(struct oshmem_group_t *group,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync);
|
||||
static int __algorithm_f_central_counter(struct oshmem_group_t *group,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync);
|
||||
static int __algorithm_f_tournament(struct oshmem_group_t *group,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync);
|
||||
static int __algorithm_f_recursive_doubling(struct oshmem_group_t *group,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync);
|
||||
static int __algorithm_f_ring(struct oshmem_group_t *group,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync);
|
||||
|
||||
int mca_scoll_basic_collect(struct oshmem_group_t *group,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
bool nlong_type,
|
||||
int alg)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
|
||||
/* Arguments validation */
|
||||
if (!group) {
|
||||
SCOLL_ERROR("Active set (group) of PE is not defined");
|
||||
rc = OSHMEM_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* Check if this PE is part of the group */
|
||||
if ((rc == OSHMEM_SUCCESS) && oshmem_proc_group_is_member(group)) {
|
||||
int i = 0;
|
||||
|
||||
if (nlong_type) {
|
||||
alg = (alg == SCOLL_DEFAULT_ALG ?
|
||||
mca_scoll_basic_param_collect_algorithm : alg);
|
||||
switch (alg) {
|
||||
case SCOLL_ALG_COLLECT_CENTRAL_COUNTER:
|
||||
{
|
||||
rc = __algorithm_f_central_counter(group,
|
||||
target,
|
||||
source,
|
||||
nlong,
|
||||
pSync);
|
||||
break;
|
||||
}
|
||||
case SCOLL_ALG_COLLECT_TOURNAMENT:
|
||||
{
|
||||
rc = __algorithm_f_tournament(group,
|
||||
target,
|
||||
source,
|
||||
nlong,
|
||||
pSync);
|
||||
break;
|
||||
}
|
||||
case SCOLL_ALG_COLLECT_RECURSIVE_DOUBLING:
|
||||
{
|
||||
rc = __algorithm_f_recursive_doubling(group,
|
||||
target,
|
||||
source,
|
||||
nlong,
|
||||
pSync);
|
||||
break;
|
||||
}
|
||||
case SCOLL_ALG_COLLECT_RING:
|
||||
{
|
||||
rc = __algorithm_f_ring(group,
|
||||
target,
|
||||
source,
|
||||
nlong,
|
||||
pSync);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
rc = __algorithm_f_central_counter(group,
|
||||
target,
|
||||
source,
|
||||
nlong,
|
||||
pSync);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
rc = __algorithm_central_collector(group,
|
||||
target,
|
||||
source,
|
||||
nlong,
|
||||
pSync);
|
||||
}
|
||||
|
||||
/* Restore initial values */
|
||||
SCOLL_VERBOSE(12,
|
||||
"[#%d] Restore special synchronization array",
|
||||
group->my_pe);
|
||||
for (i = 0; pSync && (i < _SHMEM_COLLECT_SYNC_SIZE); i++) {
|
||||
pSync[i] = _SHMEM_SYNC_VALUE;
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
This algorithm is quite simple and straightforward for PEs with identical data size.
|
||||
One node gathers data from peers and send final result to them.
|
||||
Outlay:
|
||||
NP-1 competing network transfers are needed.
|
||||
*/
|
||||
static int __algorithm_f_central_counter(struct oshmem_group_t *group,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
int i = 0;
|
||||
int PE_root = oshmem_proc_pe(group->proc_array[0]);
|
||||
|
||||
SCOLL_VERBOSE(12,
|
||||
"[#%d] Collect algorithm: Central Counter (identical size)",
|
||||
group->my_pe);
|
||||
SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
|
||||
|
||||
if (PE_root == group->my_pe) {
|
||||
int pe_cur = 0;
|
||||
|
||||
memcpy((void*) ((unsigned char*) target + 0 * nlong),
|
||||
(void *) source,
|
||||
nlong);
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] Gather data from all PEs in the group",
|
||||
group->my_pe);
|
||||
for (i = 0; (i < group->proc_count) && (rc == OSHMEM_SUCCESS); i++) {
|
||||
/* Get PE ID of a peer from the group */
|
||||
pe_cur = oshmem_proc_pe(group->proc_array[i]);
|
||||
|
||||
if (pe_cur == group->my_pe)
|
||||
continue;
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] Gather data (%d bytes) from #%d",
|
||||
group->my_pe, (int)nlong, pe_cur);
|
||||
|
||||
/* Get data from the current peer */
|
||||
rc = MCA_SPML_CALL(get((void *)source, nlong, (void*)((unsigned char*)target + i * nlong), pe_cur));
|
||||
}
|
||||
}
|
||||
|
||||
/* Send result to all PE in group */
|
||||
if (rc == OSHMEM_SUCCESS) {
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] Broadcast from the root #%d",
|
||||
group->my_pe, PE_root);
|
||||
rc = group->g_scoll.scoll_broadcast(group,
|
||||
PE_root,
|
||||
target,
|
||||
target,
|
||||
group->proc_count * nlong,
|
||||
(pSync + 1),
|
||||
SCOLL_DEFAULT_ALG);
|
||||
}
|
||||
|
||||
SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int __algorithm_f_tournament(struct oshmem_group_t *group,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
int round = 0;
|
||||
int exit_flag = group->proc_count - 1;
|
||||
long value = SHMEM_SYNC_INIT;
|
||||
int my_id = oshmem_proc_group_find_id(group, group->my_pe);
|
||||
int peer_id = 0;
|
||||
int peer_pe = 0;
|
||||
int PE_root = oshmem_proc_pe(group->proc_array[0]);
|
||||
|
||||
SCOLL_VERBOSE(12,
|
||||
"[#%d] Collect algorithm: Tournament (identical size)",
|
||||
group->my_pe);
|
||||
SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
|
||||
|
||||
/* Set current state as WAIT */
|
||||
pSync[0] = SHMEM_SYNC_WAIT;
|
||||
|
||||
/* Copy data to itself */
|
||||
memcpy((void*) ((unsigned char*) target + my_id * nlong),
|
||||
(void *) source,
|
||||
nlong);
|
||||
|
||||
while (exit_flag && (rc == OSHMEM_SUCCESS)) {
|
||||
/* Define a peer for competition */
|
||||
peer_id = my_id ^ (1 << round);
|
||||
|
||||
/* Update exit condition and round counter */
|
||||
exit_flag >>= 1;
|
||||
round++;
|
||||
|
||||
/* Do not have peer for tournament */
|
||||
if (peer_id >= group->proc_count)
|
||||
continue;
|
||||
|
||||
if (my_id < peer_id) {
|
||||
pSync[0] = peer_id;
|
||||
value = my_id;
|
||||
|
||||
SCOLL_VERBOSE(14, "[#%d] round = %d wait", group->my_pe, round);
|
||||
rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG));
|
||||
} else {
|
||||
peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
|
||||
|
||||
#if 1 /* It is ugly implementation of compare and swap operation
|
||||
Usage of this hack does not give performance improvement but
|
||||
it is expected that shmem_long_cswap() will make it faster.
|
||||
*/
|
||||
do {
|
||||
MCA_SPML_CALL(get((void*)pSync, sizeof(value), (void*)&value, peer_pe));
|
||||
} while (value != my_id);
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] round = %d send data to #%d",
|
||||
group->my_pe, round, peer_pe);
|
||||
rc = MCA_SPML_CALL(put((void*)((unsigned char*)target + my_id * nlong), (1 << (round - 1)) * nlong, (void*)((unsigned char*)target + my_id * nlong), peer_pe));
|
||||
|
||||
MCA_SPML_CALL(fence());
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] round = %d signals to #%d",
|
||||
group->my_pe, round, peer_pe);
|
||||
value = peer_id;
|
||||
rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe));
|
||||
#endif
|
||||
SCOLL_VERBOSE(14, "[#%d] round = %d wait", group->my_pe, round);
|
||||
value = SHMEM_SYNC_RUN;
|
||||
rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG));
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Send result to all PE in group */
|
||||
if ((my_id == 0) && (rc == OSHMEM_SUCCESS)) {
|
||||
SCOLL_VERBOSE(14, "[#%d] signals to all", group->my_pe);
|
||||
|
||||
value = SHMEM_SYNC_RUN;
|
||||
for (peer_id = 1;
|
||||
(peer_id < group->proc_count) && (rc == OSHMEM_SUCCESS);
|
||||
peer_id++) {
|
||||
peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
|
||||
rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe));
|
||||
}
|
||||
}
|
||||
|
||||
/* Send result to all PE in group */
|
||||
if (rc == OSHMEM_SUCCESS) {
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] Broadcast from the root #%d",
|
||||
group->my_pe, PE_root);
|
||||
rc = group->g_scoll.scoll_broadcast(group,
|
||||
PE_root,
|
||||
target,
|
||||
target,
|
||||
group->proc_count * nlong,
|
||||
(pSync + 1),
|
||||
SCOLL_DEFAULT_ALG);
|
||||
}
|
||||
|
||||
SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int __algorithm_f_ring(struct oshmem_group_t *group,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
int i = 0;
|
||||
long value = SHMEM_SYNC_INIT;
|
||||
int my_id = oshmem_proc_group_find_id(group, group->my_pe);
|
||||
int data_index = 0;
|
||||
int peer_id = 0;
|
||||
int peer_pe = 0;
|
||||
|
||||
SCOLL_VERBOSE(12,
|
||||
"[#%d] Collect algorithm: Ring (identical size)",
|
||||
group->my_pe);
|
||||
SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
|
||||
|
||||
peer_id = (my_id + 1) % group->proc_count;
|
||||
peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
|
||||
memcpy((void*) ((unsigned char*) target + my_id * nlong),
|
||||
(void *) source,
|
||||
nlong);
|
||||
data_index = my_id;
|
||||
|
||||
for (i = 0; (i < (group->proc_count - 1)) && (rc == OSHMEM_SUCCESS); i++) {
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] round = %d send data to #%d by index = %d",
|
||||
group->my_pe, i, peer_pe, data_index);
|
||||
rc = MCA_SPML_CALL(put((void*)((unsigned char*)target + data_index * nlong), nlong, (void*)((unsigned char*)target + data_index * nlong), peer_pe));
|
||||
|
||||
MCA_SPML_CALL(fence());
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] round = %d signals to #%d",
|
||||
group->my_pe, i, peer_pe);
|
||||
value = i;
|
||||
rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe));
|
||||
|
||||
data_index = (data_index ? (data_index - 1) : (group->proc_count - 1));
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] round = %d wait for data by index = %d",
|
||||
group->my_pe, i, data_index);
|
||||
if (i == 0) {
|
||||
value = _SHMEM_SYNC_VALUE;
|
||||
rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_NE, (void*)&value, SHMEM_LONG));
|
||||
} else {
|
||||
value = i;
|
||||
rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_GE, (void*)&value, SHMEM_LONG));
|
||||
}
|
||||
}
|
||||
|
||||
SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int __algorithm_f_recursive_doubling(struct oshmem_group_t *group,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
int round = 0;
|
||||
int floor2_proc = 0;
|
||||
int exit_flag = 0;
|
||||
long value = SHMEM_SYNC_INIT;
|
||||
int my_id = oshmem_proc_group_find_id(group, group->my_pe);
|
||||
int data_index = 0;
|
||||
int peer_id = 0;
|
||||
int peer_pe = 0;
|
||||
int i = 0;
|
||||
|
||||
floor2_proc = 1;
|
||||
i = group->proc_count;
|
||||
i >>= 1;
|
||||
while (i) {
|
||||
i >>= 1;
|
||||
floor2_proc <<= 1;
|
||||
}
|
||||
|
||||
SCOLL_VERBOSE(12,
|
||||
"[#%d] Collect algorithm: Recursive Doubling (identical size)",
|
||||
group->my_pe);
|
||||
SCOLL_VERBOSE(15,
|
||||
"[#%d] pSync[0] = %ld floor2_proc = %d",
|
||||
group->my_pe, pSync[0], floor2_proc);
|
||||
|
||||
memcpy((void*) ((unsigned char*) target + my_id * nlong),
|
||||
(void *) source,
|
||||
nlong);
|
||||
data_index = my_id;
|
||||
|
||||
if (my_id >= floor2_proc) {
|
||||
int pe_cur = 0;
|
||||
|
||||
/* I am in extra group, my partner is node (my_id-y) in basic group */
|
||||
peer_id = my_id - floor2_proc;
|
||||
peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
|
||||
|
||||
for (i = 0; (i < group->proc_count) && (rc == OSHMEM_SUCCESS); i++) {
|
||||
if (i == my_id)
|
||||
continue;
|
||||
|
||||
pe_cur = oshmem_proc_pe(group->proc_array[i]);
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] is extra send data to #%d",
|
||||
group->my_pe, pe_cur);
|
||||
rc = MCA_SPML_CALL(put((void*)((unsigned char*)target + data_index * nlong), nlong, (void *)source, pe_cur));
|
||||
}
|
||||
|
||||
MCA_SPML_CALL(fence());
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] is extra and signal to #%d",
|
||||
group->my_pe, peer_pe);
|
||||
value = SHMEM_SYNC_RUN;
|
||||
rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe));
|
||||
|
||||
SCOLL_VERBOSE(14, "[#%d] wait", group->my_pe);
|
||||
value = SHMEM_SYNC_RUN;
|
||||
rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG));
|
||||
} else {
|
||||
/* Wait for a peer from extra group */
|
||||
if ((group->proc_count - floor2_proc) > my_id) {
|
||||
/* I am in basic group, my partner is node (my_id+y) in extra group */
|
||||
peer_id = my_id + floor2_proc;
|
||||
peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] wait a signal from #%d",
|
||||
group->my_pe, peer_pe);
|
||||
value = SHMEM_SYNC_RUN;
|
||||
rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG));
|
||||
}
|
||||
|
||||
/* Pairwise exchange */
|
||||
exit_flag = floor2_proc - 1;
|
||||
pSync[0] = round;
|
||||
while (exit_flag && (rc == OSHMEM_SUCCESS)) {
|
||||
/* Define a peer for competition */
|
||||
peer_id = my_id ^ (1 << round);
|
||||
|
||||
/* Update exit condition and round counter */
|
||||
exit_flag >>= 1;
|
||||
round++;
|
||||
|
||||
peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
|
||||
|
||||
#if 1 /* It is ugly implementation of compare and swap operation
|
||||
Usage of this hack does not give performance improvement but
|
||||
it is expected that shmem_long_cswap() will make it faster.
|
||||
*/
|
||||
do {
|
||||
MCA_SPML_CALL(get((void*)pSync, sizeof(value), (void*)&value, peer_pe));
|
||||
} while (value != (round - 1));
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] round = %d send data to #%d by index = %d",
|
||||
group->my_pe, round, peer_pe, data_index);
|
||||
rc = MCA_SPML_CALL(put((void*)((unsigned char*)target + data_index * nlong), (1 << (round - 1)) * nlong, (void*)((unsigned char*)target + data_index * nlong), peer_pe));
|
||||
|
||||
MCA_SPML_CALL(fence());
|
||||
|
||||
data_index = (my_id / (1 << round)) * (1 << round);
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] round = %d signals to #%d",
|
||||
group->my_pe, round, peer_pe);
|
||||
value = SHMEM_SYNC_RUN;
|
||||
rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe));
|
||||
#endif
|
||||
|
||||
SCOLL_VERBOSE(14, "[#%d] round = %d wait", group->my_pe, round);
|
||||
value = SHMEM_SYNC_RUN;
|
||||
rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG));
|
||||
|
||||
pSync[0] = round;
|
||||
}
|
||||
|
||||
/* Notify a peer from extra group */
|
||||
if ((group->proc_count - floor2_proc) > my_id) {
|
||||
/* I am in basic group, my partner is node (my_id+y) in extra group */
|
||||
peer_id = my_id + floor2_proc;
|
||||
peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] is extra send data to #%d",
|
||||
group->my_pe, peer_pe);
|
||||
rc = MCA_SPML_CALL(put(target, group->proc_count * nlong, target, peer_pe));
|
||||
|
||||
MCA_SPML_CALL(fence());
|
||||
|
||||
SCOLL_VERBOSE(14, "[#%d] signals to #%d", group->my_pe, peer_pe);
|
||||
value = SHMEM_SYNC_RUN;
|
||||
rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe));
|
||||
}
|
||||
}
|
||||
|
||||
SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
This algorithm is quite simple and straightforward. It allows to have different data size on PEs.
|
||||
One node gathers data from peers and send final result to them.
|
||||
Outlay:
|
||||
NP-1 competing network transfers are needed.
|
||||
*/
|
||||
static int __algorithm_central_collector(struct oshmem_group_t *group,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
size_t offset = 0;
|
||||
int i = 0;
|
||||
int PE_root = oshmem_proc_pe(group->proc_array[0]);
|
||||
|
||||
SCOLL_VERBOSE(12,
|
||||
"[#%d] Collect algorithm: Central Counter (vary size)",
|
||||
group->my_pe);
|
||||
|
||||
/* Set own data size */
|
||||
pSync[0] = nlong;
|
||||
|
||||
if (PE_root == group->my_pe) {
|
||||
long value = 0;
|
||||
int pe_cur = 0;
|
||||
long wait_pe_count = 0;
|
||||
size_t* wait_pe_array = NULL;
|
||||
|
||||
wait_pe_count = group->proc_count;
|
||||
wait_pe_array = malloc(sizeof(*wait_pe_array) * wait_pe_count);
|
||||
if (wait_pe_array) {
|
||||
memset((void*) wait_pe_array,
|
||||
0,
|
||||
sizeof(*wait_pe_array) * wait_pe_count);
|
||||
wait_pe_array[0] = nlong;
|
||||
wait_pe_count--;
|
||||
|
||||
while (wait_pe_count) {
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] Gather data size info from all PEs in the group",
|
||||
group->my_pe);
|
||||
for (i = 1; (i < group->proc_count) && (rc == OSHMEM_SUCCESS);
|
||||
i++) {
|
||||
if (wait_pe_array[i] == 0) {
|
||||
pe_cur = oshmem_proc_pe(group->proc_array[i]);
|
||||
value = 0;
|
||||
rc = MCA_SPML_CALL(get((void*)pSync, sizeof(value), (void*)&value, pe_cur));
|
||||
if ((rc == OSHMEM_SUCCESS)
|
||||
&& (value != _SHMEM_SYNC_VALUE)
|
||||
&& (value > 0)) {
|
||||
wait_pe_array[i] = (size_t) value;
|
||||
wait_pe_count--;
|
||||
SCOLL_VERBOSE(14,
|
||||
"Got source data size as %d from #%d (wait list counter: %d)",
|
||||
(int)value, pe_cur, (int)wait_pe_count);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
memcpy((void*) ((unsigned char*) target + 0 * nlong),
|
||||
(void *) source,
|
||||
nlong);
|
||||
offset += nlong;
|
||||
|
||||
for (i = 1; (i < group->proc_count) && (rc == OSHMEM_SUCCESS);
|
||||
i++) {
|
||||
/* Get PE ID of a peer from the group */
|
||||
pe_cur = oshmem_proc_pe(group->proc_array[i]);
|
||||
|
||||
/* Get data from the current peer */
|
||||
rc = MCA_SPML_CALL(get((void *)source, wait_pe_array[i], (void*)((unsigned char*)target + offset), pe_cur));
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"Got %d bytes of data from #%d (offset: %d)",
|
||||
(int)wait_pe_array[i], pe_cur, (int)offset);
|
||||
|
||||
offset += wait_pe_array[i];
|
||||
}
|
||||
|
||||
free(wait_pe_array);
|
||||
} else {
|
||||
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
|
||||
/* Send result to all PE in group */
|
||||
if (rc == OSHMEM_SUCCESS) {
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] Broadcast from the root #%d",
|
||||
group->my_pe, PE_root);
|
||||
rc = group->g_scoll.scoll_broadcast(group,
|
||||
PE_root,
|
||||
target,
|
||||
target,
|
||||
offset,
|
||||
(pSync + 1),
|
||||
SCOLL_DEFAULT_ALG);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
164
oshmem/mca/scoll/basic/scoll_basic_component.c
Обычный файл
164
oshmem/mca/scoll/basic/scoll_basic_component.c
Обычный файл
@ -0,0 +1,164 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "oshmem_config.h"
|
||||
|
||||
#include "oshmem/constants.h"
|
||||
#include "oshmem/mca/scoll/scoll.h"
|
||||
#include "oshmem/mca/scoll/base/base.h"
|
||||
#include "scoll_basic.h"
|
||||
|
||||
/*
|
||||
* Public string showing the scoll basic component version number
|
||||
*/
|
||||
const char *mca_scoll_basic_component_version_string =
|
||||
"Open SHMEM basic collective MCA component version " OSHMEM_VERSION;
|
||||
|
||||
/*
|
||||
* Global variable
|
||||
*/
|
||||
int mca_scoll_basic_priority_param = -1;
|
||||
int mca_scoll_basic_param_barrier_algorithm = SCOLL_ALG_BARRIER_ADAPTIVE;
|
||||
int mca_scoll_basic_param_broadcast_algorithm = SCOLL_ALG_BROADCAST_BINOMIAL;
|
||||
int mca_scoll_basic_param_collect_algorithm =
|
||||
SCOLL_ALG_COLLECT_RECURSIVE_DOUBLING;
|
||||
int mca_scoll_basic_param_reduce_algorithm = SCOLL_ALG_REDUCE_RECURSIVE_DOUBLING;
|
||||
|
||||
/*
|
||||
* Local function
|
||||
*/
|
||||
static int basic_register(void);
|
||||
static int basic_open(void);
|
||||
static int basic_close(void);
|
||||
|
||||
/*
|
||||
* Instantiate the public struct with all of our public information
|
||||
* and pointers to our public functions in it
|
||||
*/
|
||||
|
||||
mca_scoll_base_component_t mca_scoll_basic_component = {
|
||||
|
||||
/* First, the mca_component_t struct containing meta information
|
||||
about the component itself */
|
||||
|
||||
{
|
||||
MCA_SCOLL_BASE_VERSION_2_0_0,
|
||||
|
||||
/* Component name and version */
|
||||
"basic",
|
||||
OSHMEM_MAJOR_VERSION,
|
||||
OSHMEM_MINOR_VERSION,
|
||||
OSHMEM_RELEASE_VERSION,
|
||||
|
||||
/* Component open and close functions */
|
||||
basic_open,
|
||||
basic_close,
|
||||
NULL,
|
||||
basic_register
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
|
||||
/* Initialization / querying functions */
|
||||
|
||||
mca_scoll_basic_init,
|
||||
mca_scoll_basic_query
|
||||
};
|
||||
|
||||
static int basic_register(void)
|
||||
{
|
||||
char help_msg[200];
|
||||
mca_base_component_t *comp = &mca_scoll_basic_component.scoll_version;
|
||||
|
||||
mca_scoll_basic_priority_param = 75;
|
||||
(void) mca_base_component_var_register(comp,
|
||||
"priority",
|
||||
"Priority of the basic scoll:basic component",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_scoll_basic_priority_param);
|
||||
|
||||
sprintf(help_msg,
|
||||
"Algoritm selection for Barrier (%d - Central Counter, %d - Tournament, %d - Recursive Doubling, %d - Dissemination, %d - Basic, %d - Adaptive)",
|
||||
SCOLL_ALG_BARRIER_CENTRAL_COUNTER,
|
||||
SCOLL_ALG_BARRIER_TOURNAMENT,
|
||||
SCOLL_ALG_BARRIER_RECURSIVE_DOUBLING,
|
||||
SCOLL_ALG_BARRIER_DISSEMINATION,
|
||||
SCOLL_ALG_BARRIER_BASIC,
|
||||
SCOLL_ALG_BARRIER_ADAPTIVE);
|
||||
(void) mca_base_component_var_register(comp,
|
||||
"barrier_alg",
|
||||
help_msg,
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_scoll_basic_param_barrier_algorithm);
|
||||
|
||||
sprintf(help_msg,
|
||||
"Algoritm selection for Broadcast (%d - Central Counter, %d - Binomial)",
|
||||
SCOLL_ALG_BROADCAST_CENTRAL_COUNTER,
|
||||
SCOLL_ALG_BROADCAST_BINOMIAL);
|
||||
(void) mca_base_component_var_register(comp,
|
||||
"broadcast_alg",
|
||||
help_msg,
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_scoll_basic_param_broadcast_algorithm);
|
||||
|
||||
sprintf(help_msg,
|
||||
"Algoritm selection for Collect (%d - Central Counter, %d - Tournament, %d - Recursive Doubling, %d - Ring)",
|
||||
SCOLL_ALG_COLLECT_CENTRAL_COUNTER,
|
||||
SCOLL_ALG_COLLECT_TOURNAMENT,
|
||||
SCOLL_ALG_COLLECT_RECURSIVE_DOUBLING,
|
||||
SCOLL_ALG_COLLECT_RING);
|
||||
(void) mca_base_component_var_register(comp,
|
||||
"collect_alg",
|
||||
help_msg,
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_scoll_basic_param_collect_algorithm);
|
||||
|
||||
sprintf(help_msg,
|
||||
"Algoritm selection for Reduce (%d - Central Counter, %d - Tournament, %d - Recursive Doubling %d - Linear %d - Log)",
|
||||
SCOLL_ALG_REDUCE_CENTRAL_COUNTER,
|
||||
SCOLL_ALG_REDUCE_TOURNAMENT,
|
||||
SCOLL_ALG_REDUCE_RECURSIVE_DOUBLING,
|
||||
SCOLL_ALG_REDUCE_LEGACY_LINEAR,
|
||||
SCOLL_ALG_REDUCE_LEGACY_LOG);
|
||||
(void) mca_base_component_var_register(comp,
|
||||
"reduce_alg",
|
||||
help_msg,
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_scoll_basic_param_reduce_algorithm);
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static int basic_open(void)
|
||||
{
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static int basic_close(void)
|
||||
{
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_scoll_basic_module_t,
|
||||
mca_scoll_base_module_t,
|
||||
NULL,
|
||||
NULL);
|
60
oshmem/mca/scoll/basic/scoll_basic_module.c
Обычный файл
60
oshmem/mca/scoll/basic/scoll_basic_module.c
Обычный файл
@ -0,0 +1,60 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "oshmem_config.h"
|
||||
|
||||
#include "oshmem/constants.h"
|
||||
#include "oshmem/mca/scoll/scoll.h"
|
||||
#include "oshmem/mca/scoll/base/base.h"
|
||||
#include "scoll_basic.h"
|
||||
|
||||
/*
|
||||
* Initial query function that is invoked during initialization, allowing
|
||||
* this module to indicate what level of thread support it provides.
|
||||
*/
|
||||
int mca_scoll_basic_init(bool enable_progress_threads, bool enable_threads)
|
||||
{
|
||||
/* Nothing to do */
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Invoked when there's a new communicator that has been created.
|
||||
* Look at the communicator and decide which set of functions and
|
||||
* priority we want to return.
|
||||
*/
|
||||
static int mca_scoll_basic_enable(mca_scoll_base_module_t *module,
|
||||
struct oshmem_group_t *comm)
|
||||
{
|
||||
/*nothing to do here*/
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
mca_scoll_base_module_t *
|
||||
mca_scoll_basic_query(struct oshmem_group_t *group, int *priority)
|
||||
{
|
||||
mca_scoll_basic_module_t *module;
|
||||
|
||||
*priority = mca_scoll_basic_priority_param;
|
||||
|
||||
module = OBJ_NEW(mca_scoll_basic_module_t);
|
||||
if (module) {
|
||||
module->super.scoll_barrier = mca_scoll_basic_barrier;
|
||||
module->super.scoll_broadcast = mca_scoll_basic_broadcast;
|
||||
module->super.scoll_collect = mca_scoll_basic_collect;
|
||||
module->super.scoll_reduce = mca_scoll_basic_reduce;
|
||||
module->super.scoll_module_enable = mca_scoll_basic_enable;
|
||||
return &(module->super);
|
||||
}
|
||||
|
||||
return NULL ;
|
||||
}
|
810
oshmem/mca/scoll/basic/scoll_basic_reduce.c
Обычный файл
810
oshmem/mca/scoll/basic/scoll_basic_reduce.c
Обычный файл
@ -0,0 +1,810 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "oshmem_config.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "opal/util/bit_ops.h"
|
||||
|
||||
#include "oshmem/constants.h"
|
||||
#include "oshmem/op/op.h"
|
||||
#include "oshmem/mca/spml/spml.h"
|
||||
#include "oshmem/mca/scoll/scoll.h"
|
||||
#include "oshmem/mca/scoll/base/base.h"
|
||||
#include "scoll_basic.h"
|
||||
|
||||
static int __algorithm_central_counter(struct oshmem_group_t *group,
|
||||
struct oshmem_op_t *op,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
void *pWrk);
|
||||
static int __algorithm_tournament(struct oshmem_group_t *group,
|
||||
struct oshmem_op_t *op,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
void *pWrk);
|
||||
static int __algorithm_recursive_doubling(struct oshmem_group_t *group,
|
||||
struct oshmem_op_t *op,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
void *pWrk);
|
||||
static int __algorithm_linear(struct oshmem_group_t *group,
|
||||
struct oshmem_op_t *op,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
void *pWrk);
|
||||
static int __algorithm_log(struct oshmem_group_t *group,
|
||||
struct oshmem_op_t *op,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
void *pWrk);
|
||||
|
||||
int mca_scoll_basic_reduce(struct oshmem_group_t *group,
|
||||
struct oshmem_op_t *op,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
void *pWrk,
|
||||
int alg)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
|
||||
/* Arguments validation */
|
||||
if (!group) {
|
||||
SCOLL_ERROR("Active set (group) of PE is not defined");
|
||||
rc = OSHMEM_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* Check if this PE is part of the group */
|
||||
if ((rc == OSHMEM_SUCCESS) && oshmem_proc_group_is_member(group)) {
|
||||
int i = 0;
|
||||
|
||||
if (pSync) {
|
||||
alg = (alg == SCOLL_DEFAULT_ALG ?
|
||||
mca_scoll_basic_param_reduce_algorithm : alg);
|
||||
switch (alg) {
|
||||
case SCOLL_ALG_REDUCE_CENTRAL_COUNTER:
|
||||
{
|
||||
rc = __algorithm_central_counter(group,
|
||||
op,
|
||||
target,
|
||||
source,
|
||||
nlong,
|
||||
pSync,
|
||||
pWrk);
|
||||
break;
|
||||
}
|
||||
case SCOLL_ALG_REDUCE_TOURNAMENT:
|
||||
{
|
||||
rc = __algorithm_tournament(group,
|
||||
op,
|
||||
target,
|
||||
source,
|
||||
nlong,
|
||||
pSync,
|
||||
pWrk);
|
||||
break;
|
||||
}
|
||||
case SCOLL_ALG_REDUCE_RECURSIVE_DOUBLING:
|
||||
{
|
||||
rc = __algorithm_recursive_doubling(group,
|
||||
op,
|
||||
target,
|
||||
source,
|
||||
nlong,
|
||||
pSync,
|
||||
pWrk);
|
||||
break;
|
||||
}
|
||||
case SCOLL_ALG_REDUCE_LEGACY_LINEAR:
|
||||
{
|
||||
rc = __algorithm_linear(group,
|
||||
op,
|
||||
target,
|
||||
source,
|
||||
nlong,
|
||||
pSync,
|
||||
pWrk);
|
||||
break;
|
||||
}
|
||||
case SCOLL_ALG_REDUCE_LEGACY_LOG:
|
||||
{
|
||||
rc = __algorithm_log(group,
|
||||
op,
|
||||
target,
|
||||
source,
|
||||
nlong,
|
||||
pSync,
|
||||
pWrk);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
rc = __algorithm_central_counter(group,
|
||||
op,
|
||||
target,
|
||||
source,
|
||||
nlong,
|
||||
pSync,
|
||||
pWrk);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
SCOLL_ERROR("Incorrect argument pSync");
|
||||
rc = OSHMEM_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* Restore initial values */
|
||||
SCOLL_VERBOSE(12,
|
||||
"PE#%d Restore special synchronization array",
|
||||
group->my_pe);
|
||||
for (i = 0; pSync && (i < _SHMEM_REDUCE_SYNC_SIZE); i++) {
|
||||
pSync[i] = _SHMEM_SYNC_VALUE;
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
This algorithm is quite simple and straightforward for PEs with identical data size.
|
||||
One node gathers data from peers and send final result to them.
|
||||
Outlay:
|
||||
NP-1 competing network transfers are needed.
|
||||
*/
|
||||
static int __algorithm_central_counter(struct oshmem_group_t *group,
|
||||
struct oshmem_op_t *op,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
void *pWrk)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
int i = 0;
|
||||
int PE_root = oshmem_proc_pe(group->proc_array[0]);
|
||||
|
||||
SCOLL_VERBOSE(12, "[#%d] Reduce algorithm: Central Counter", group->my_pe);
|
||||
|
||||
if (PE_root == group->my_pe) {
|
||||
int pe_cur = 0;
|
||||
void *target_cur = NULL;
|
||||
|
||||
target_cur = malloc(nlong);
|
||||
if (target_cur) {
|
||||
memcpy(target, (void *) source, nlong);
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] Gather data from all PEs in the group",
|
||||
group->my_pe);
|
||||
for (i = 0; (i < group->proc_count) && (rc == OSHMEM_SUCCESS);
|
||||
i++) {
|
||||
/* Get PE ID of a peer from the group */
|
||||
pe_cur = oshmem_proc_pe(group->proc_array[i]);
|
||||
|
||||
if (pe_cur == group->my_pe)
|
||||
continue;
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] Gather data (%d bytes) from #%d",
|
||||
group->my_pe, (int)nlong, pe_cur);
|
||||
|
||||
/* Clean up temporary buffer */
|
||||
memset(target_cur, 0, nlong);
|
||||
|
||||
/* Get data from the current peer */
|
||||
rc = MCA_SPML_CALL(get((void *)source, nlong, target_cur, pe_cur));
|
||||
|
||||
/* Do reduction operation */
|
||||
if (rc == OSHMEM_SUCCESS) {
|
||||
op->o_func.c_fn(target_cur, target, nlong / op->dt_size);
|
||||
}
|
||||
}
|
||||
|
||||
free(target_cur);
|
||||
} else {
|
||||
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
|
||||
/* Send result to all PE in group */
|
||||
if (rc == OSHMEM_SUCCESS) {
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] Broadcast from the root #%d",
|
||||
group->my_pe, PE_root);
|
||||
rc = group->g_scoll.scoll_broadcast(group,
|
||||
PE_root,
|
||||
target,
|
||||
target,
|
||||
nlong,
|
||||
(pSync + 1),
|
||||
SCOLL_DEFAULT_ALG);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int __algorithm_tournament(struct oshmem_group_t *group,
|
||||
struct oshmem_op_t *op,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
void *pWrk)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
int round = 0;
|
||||
int exit_flag = group->proc_count - 1;
|
||||
long value = SHMEM_SYNC_INIT;
|
||||
int my_id = oshmem_proc_group_find_id(group, group->my_pe);
|
||||
int peer_id = 0;
|
||||
int peer_pe = 0;
|
||||
void *target_cur = NULL;
|
||||
int PE_root = oshmem_proc_pe(group->proc_array[0]);
|
||||
|
||||
SCOLL_VERBOSE(12, "[#%d] Reduce algorithm: Tournament", group->my_pe);
|
||||
SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
|
||||
|
||||
/* Set current state as WAIT */
|
||||
pSync[0] = SHMEM_SYNC_WAIT;
|
||||
|
||||
target_cur = malloc(nlong);
|
||||
if (target_cur) {
|
||||
memcpy(target_cur, (void *) source, nlong);
|
||||
} else {
|
||||
return OSHMEM_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
while (exit_flag && (rc == OSHMEM_SUCCESS)) {
|
||||
/* Define a peer for competition */
|
||||
peer_id = my_id ^ (1 << round);
|
||||
|
||||
/* Update exit condition and round counter */
|
||||
exit_flag >>= 1;
|
||||
round++;
|
||||
|
||||
/* Do not have peer for tournament */
|
||||
if (peer_id >= group->proc_count)
|
||||
continue;
|
||||
|
||||
if (my_id < peer_id) {
|
||||
pSync[0] = peer_id;
|
||||
value = my_id;
|
||||
|
||||
SCOLL_VERBOSE(14, "[#%d] round = %d wait", group->my_pe, round);
|
||||
rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG));
|
||||
|
||||
/* Do reduction operation */
|
||||
if (rc == OSHMEM_SUCCESS) {
|
||||
op->o_func.c_fn(target, target_cur, nlong / op->dt_size);
|
||||
}
|
||||
} else {
|
||||
peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
|
||||
|
||||
#if 1 /* It is ugly implementation of compare and swap operation
|
||||
Usage of this hack does not give performance improvement but
|
||||
it is expected that shmem_long_cswap() will make it faster.
|
||||
*/
|
||||
do {
|
||||
MCA_SPML_CALL(get((void*)pSync, sizeof(value), (void*)&value, peer_pe));
|
||||
} while (value != my_id);
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] round = %d send data to #%d",
|
||||
group->my_pe, round, peer_pe);
|
||||
rc = MCA_SPML_CALL(put(target, nlong, target_cur, peer_pe));
|
||||
|
||||
MCA_SPML_CALL(fence());
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] round = %d signals to #%d",
|
||||
group->my_pe, round, peer_pe);
|
||||
value = peer_id;
|
||||
rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe));
|
||||
#endif
|
||||
SCOLL_VERBOSE(14, "[#%d] round = %d wait", group->my_pe, round);
|
||||
value = SHMEM_SYNC_RUN;
|
||||
rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG));
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Send result to all PE in group */
|
||||
if ((my_id == 0) && (rc == OSHMEM_SUCCESS)) {
|
||||
SCOLL_VERBOSE(14, "[#%d] signals to all", group->my_pe);
|
||||
|
||||
memcpy(target, target_cur, nlong);
|
||||
|
||||
value = SHMEM_SYNC_RUN;
|
||||
for (peer_id = 1;
|
||||
(peer_id < group->proc_count) && (rc == OSHMEM_SUCCESS);
|
||||
peer_id++) {
|
||||
peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
|
||||
rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe));
|
||||
}
|
||||
}
|
||||
|
||||
/* Send result to all PE in group */
|
||||
if (rc == OSHMEM_SUCCESS) {
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] Broadcast from the root #%d",
|
||||
group->my_pe, PE_root);
|
||||
rc = group->g_scoll.scoll_broadcast(group,
|
||||
PE_root,
|
||||
target,
|
||||
target,
|
||||
nlong,
|
||||
(pSync + 1),
|
||||
SCOLL_DEFAULT_ALG);
|
||||
}
|
||||
|
||||
free(target_cur);
|
||||
|
||||
SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int __algorithm_recursive_doubling(struct oshmem_group_t *group,
|
||||
struct oshmem_op_t *op,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
void *pWrk)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
int round = 0;
|
||||
int floor2_proc = 0;
|
||||
int exit_flag = 0;
|
||||
long value = SHMEM_SYNC_INIT;
|
||||
void *target_cur = NULL;
|
||||
int my_id = oshmem_proc_group_find_id(group, group->my_pe);
|
||||
int peer_id = 0;
|
||||
int peer_pe = 0;
|
||||
int i = 0;
|
||||
|
||||
floor2_proc = 1;
|
||||
i = group->proc_count;
|
||||
i >>= 1;
|
||||
while (i) {
|
||||
i >>= 1;
|
||||
floor2_proc <<= 1;
|
||||
}
|
||||
|
||||
target_cur = malloc(nlong);
|
||||
if (target_cur) {
|
||||
memcpy(target_cur, (void *) source, nlong);
|
||||
} else {
|
||||
return OSHMEM_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
SCOLL_VERBOSE(12,
|
||||
"[#%d] Reduce algorithm: Recursive Doubling",
|
||||
group->my_pe);
|
||||
SCOLL_VERBOSE(15,
|
||||
"[#%d] pSync[0] = %ld floor2_proc = %d",
|
||||
group->my_pe, pSync[0], floor2_proc);
|
||||
|
||||
if (my_id >= floor2_proc) {
|
||||
/* I am in extra group, my partner is node (my_id-y) in basic group */
|
||||
peer_id = my_id - floor2_proc;
|
||||
peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
|
||||
|
||||
/* Special procedure is needed in case target and source are the same */
|
||||
if (source == target) {
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] wait for peer #%d is ready",
|
||||
group->my_pe, peer_pe);
|
||||
value = SHMEM_SYNC_WAIT;
|
||||
rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG));
|
||||
}
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] is extra send data to #%d",
|
||||
group->my_pe, peer_pe);
|
||||
rc = MCA_SPML_CALL(put(target, nlong, target_cur, peer_pe));
|
||||
|
||||
MCA_SPML_CALL(fence());
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] is extra and signal to #%d",
|
||||
group->my_pe, peer_pe);
|
||||
value = SHMEM_SYNC_RUN;
|
||||
rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe));
|
||||
|
||||
SCOLL_VERBOSE(14, "[#%d] wait", group->my_pe);
|
||||
value = SHMEM_SYNC_RUN;
|
||||
rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG));
|
||||
} else {
|
||||
/* Wait for a peer from extra group */
|
||||
if ((group->proc_count - floor2_proc) > my_id) {
|
||||
/* I am in basic group, my partner is node (my_id+y) in extra group */
|
||||
peer_id = my_id + floor2_proc;
|
||||
peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
|
||||
|
||||
/* Special procedure is needed in case target and source are the same */
|
||||
if (source == target) {
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] signal to #%d that I am ready",
|
||||
group->my_pe, peer_pe);
|
||||
value = SHMEM_SYNC_WAIT;
|
||||
rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe));
|
||||
}
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] wait a signal from #%d",
|
||||
group->my_pe, peer_pe);
|
||||
value = SHMEM_SYNC_RUN;
|
||||
rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG));
|
||||
|
||||
/* Do reduction operation */
|
||||
if (rc == OSHMEM_SUCCESS) {
|
||||
op->o_func.c_fn(target, target_cur, nlong / op->dt_size);
|
||||
}
|
||||
}
|
||||
|
||||
/* Pairwise exchange */
|
||||
exit_flag = floor2_proc - 1;
|
||||
pSync[0] = round;
|
||||
while (exit_flag && (rc == OSHMEM_SUCCESS)) {
|
||||
/* Define a peer for competition */
|
||||
peer_id = my_id ^ (1 << round);
|
||||
|
||||
/* Update exit condition and round counter */
|
||||
exit_flag >>= 1;
|
||||
round++;
|
||||
|
||||
peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
|
||||
|
||||
#if 1 /* It is ugly implementation of compare and swap operation
|
||||
Usage of this hack does not give performance improvement but
|
||||
it is expected that shmem_long_cswap() will make it faster.
|
||||
*/
|
||||
do {
|
||||
MCA_SPML_CALL(get((void*)pSync, sizeof(value), (void*)&value, peer_pe));
|
||||
} while (value != (round - 1));
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] round = %d send data to #%d",
|
||||
group->my_pe, round, peer_pe);
|
||||
rc = MCA_SPML_CALL(put(target, nlong, target_cur, peer_pe));
|
||||
|
||||
MCA_SPML_CALL(fence());
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] round = %d signals to #%d",
|
||||
group->my_pe, round, peer_pe);
|
||||
value = SHMEM_SYNC_RUN;
|
||||
rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe));
|
||||
#endif
|
||||
|
||||
SCOLL_VERBOSE(14, "[#%d] round = %d wait", group->my_pe, round);
|
||||
value = SHMEM_SYNC_RUN;
|
||||
rc = MCA_SPML_CALL(wait((void*)pSync, SHMEM_CMP_EQ, (void*)&value, SHMEM_LONG));
|
||||
|
||||
/* Do reduction operation */
|
||||
if (rc == OSHMEM_SUCCESS) {
|
||||
op->o_func.c_fn(target, target_cur, nlong / op->dt_size);
|
||||
}
|
||||
|
||||
pSync[0] = round;
|
||||
}
|
||||
|
||||
memcpy(target, target_cur, nlong);
|
||||
|
||||
/* Notify a peer from extra group */
|
||||
if ((group->proc_count - floor2_proc) > my_id) {
|
||||
/* I am in basic group, my partner is node (my_id+y) in extra group */
|
||||
peer_id = my_id + floor2_proc;
|
||||
peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] is extra send data to #%d",
|
||||
group->my_pe, peer_pe);
|
||||
rc = MCA_SPML_CALL(put(target, nlong, target_cur, peer_pe));
|
||||
|
||||
MCA_SPML_CALL(fence());
|
||||
|
||||
SCOLL_VERBOSE(14, "[#%d] signals to #%d", group->my_pe, peer_pe);
|
||||
value = SHMEM_SYNC_RUN;
|
||||
rc = MCA_SPML_CALL(put((void*)pSync, sizeof(value), (void*)&value, peer_pe));
|
||||
}
|
||||
}
|
||||
|
||||
free(target_cur);
|
||||
|
||||
SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int __algorithm_linear(struct oshmem_group_t *group,
|
||||
struct oshmem_op_t *op,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
void *pWrk)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
int i, rank, size;
|
||||
char *free_buffer = NULL;
|
||||
char *pml_buffer = NULL;
|
||||
char *inbuf;
|
||||
int peer_id = 0;
|
||||
int peer_pe = 0;
|
||||
|
||||
/* Initialize */
|
||||
rank = group->my_pe;
|
||||
size = group->proc_count;
|
||||
int root_id = size - 1;
|
||||
int root_pe = oshmem_proc_pe(group->proc_array[root_id]);
|
||||
|
||||
SCOLL_VERBOSE(12, "[#%d] Reduce algorithm: Basic", group->my_pe);
|
||||
|
||||
/* If not root, send data to the root. */
|
||||
|
||||
if (rank != root_pe) {
|
||||
rc = MCA_SPML_CALL(send((void*)source, nlong, root_pe, MCA_SPML_BASE_PUT_STANDARD));
|
||||
} else {
|
||||
|
||||
/* for reducing buffer allocation lengths.... */
|
||||
|
||||
if (size > 1) {
|
||||
free_buffer = (char*) malloc(nlong);
|
||||
if (NULL == free_buffer) {
|
||||
return OSHMEM_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
pml_buffer = free_buffer;
|
||||
}
|
||||
|
||||
/* Initialize the receive buffer. */
|
||||
|
||||
if (root_id == (size - 1)) {
|
||||
memcpy(target, (void *) source, nlong);
|
||||
} else {
|
||||
peer_id = size - 1;
|
||||
peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
|
||||
rc = MCA_SPML_CALL(recv(target, nlong, peer_pe));
|
||||
}
|
||||
if (OSHMEM_SUCCESS != rc) {
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Loop receiving and calling reduction function (C or Fortran). */
|
||||
|
||||
for (i = size - 2; i >= 0; --i) {
|
||||
if (root_id == i) {
|
||||
inbuf = (char*) source;
|
||||
} else {
|
||||
peer_id = i;
|
||||
peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
|
||||
rc = MCA_SPML_CALL(recv(pml_buffer, nlong, peer_pe));
|
||||
if (OSHMEM_SUCCESS != rc) {
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
inbuf = pml_buffer;
|
||||
}
|
||||
|
||||
/* Perform the reduction */
|
||||
op->o_func.c_fn(inbuf, target, nlong / op->dt_size);
|
||||
}
|
||||
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
/* Send result to all PE in group */
|
||||
if (rc == OSHMEM_SUCCESS) {
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] Broadcast from the root #%d",
|
||||
group->my_pe, root_pe);
|
||||
rc = group->g_scoll.scoll_broadcast(group,
|
||||
root_pe,
|
||||
target,
|
||||
target,
|
||||
nlong,
|
||||
(pSync + 1),
|
||||
SCOLL_DEFAULT_ALG);
|
||||
}
|
||||
|
||||
/* All done */
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int __algorithm_log(struct oshmem_group_t *group,
|
||||
struct oshmem_op_t *op,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
void *pWrk)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
int i, size, rank, vrank;
|
||||
int mask;
|
||||
void *sbuf = (void*) source;
|
||||
void *rbuf = target;
|
||||
char *free_buffer = NULL;
|
||||
char *free_rbuf = NULL;
|
||||
char *pml_buffer = NULL;
|
||||
char *snd_buffer = NULL;
|
||||
char *rcv_buffer = (char*) rbuf;
|
||||
int my_id = oshmem_proc_group_find_id(group, group->my_pe);
|
||||
int peer_id = 0;
|
||||
int peer_pe = 0;
|
||||
int root_id = 0;
|
||||
int root_pe = oshmem_proc_pe(group->proc_array[root_id]);
|
||||
int dim = 0;
|
||||
|
||||
/* Initialize */
|
||||
rank = group->my_pe;
|
||||
size = group->proc_count;
|
||||
dim = opal_cube_dim(group->proc_count);
|
||||
vrank = (my_id + size - root_id) % size;
|
||||
|
||||
SCOLL_VERBOSE(12, "[#%d] Reduce algorithm: Log", rank);
|
||||
|
||||
/* Allocate the incoming and resulting message buffers. See lengthy
|
||||
* rationale above. */
|
||||
|
||||
free_buffer = (char*) malloc(nlong);
|
||||
if (NULL == free_buffer) {
|
||||
return OSHMEM_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
pml_buffer = free_buffer;
|
||||
rcv_buffer = pml_buffer;
|
||||
|
||||
/* Allocate sendbuf in case the MPI_IN_PLACE option has been used. See lengthy
|
||||
* rationale above. */
|
||||
|
||||
snd_buffer = (char*) sbuf;
|
||||
|
||||
if (my_id != root_id && 0 == (vrank & 1)) {
|
||||
/* root is the only one required to provide a valid rbuf.
|
||||
* Assume rbuf is invalid for all other ranks, so fix it up
|
||||
* here to be valid on all non-leaf ranks */
|
||||
free_rbuf = (char*) malloc(nlong);
|
||||
if (NULL == free_rbuf) {
|
||||
rc = OSHMEM_ERR_OUT_OF_RESOURCE;
|
||||
goto cleanup_and_return;
|
||||
}
|
||||
rbuf = free_rbuf;
|
||||
}
|
||||
|
||||
/* Loop over cube dimensions. High processes send to low ones in the
|
||||
* dimension. */
|
||||
|
||||
for (i = 0, mask = 1; i < dim; ++i, mask <<= 1) {
|
||||
|
||||
/* A high-proc sends to low-proc and stops. */
|
||||
if (vrank & mask) {
|
||||
peer_id = vrank & ~mask;
|
||||
peer_id = (peer_id + root_id) % size;
|
||||
peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
|
||||
|
||||
rc = MCA_SPML_CALL(send((void*)snd_buffer, nlong, peer_pe, MCA_SPML_BASE_PUT_STANDARD));
|
||||
if (OSHMEM_SUCCESS != rc) {
|
||||
goto cleanup_and_return;
|
||||
}
|
||||
snd_buffer = (char*) rbuf;
|
||||
break;
|
||||
}
|
||||
|
||||
/* A low-proc receives, reduces, and moves to a higher
|
||||
* dimension. */
|
||||
|
||||
else {
|
||||
peer_id = vrank | mask;
|
||||
if (peer_id >= size) {
|
||||
continue;
|
||||
}
|
||||
peer_id = (peer_id + root_id) % size;
|
||||
peer_pe = oshmem_proc_pe(group->proc_array[peer_id]);
|
||||
|
||||
/* Most of the time (all except the first one for commutative
|
||||
* operations) we receive in the user provided buffer
|
||||
* (rbuf). But the exception is here to allow us to dont have
|
||||
* to copy from the sbuf to a temporary location. If the
|
||||
* operation is commutative we dont care in which order we
|
||||
* apply the operation, so for the first time we can receive
|
||||
* the data in the pml_buffer and then apply to operation
|
||||
* between this buffer and the user provided data. */
|
||||
|
||||
rc = MCA_SPML_CALL(recv(rcv_buffer, nlong, peer_pe));
|
||||
if (OSHMEM_SUCCESS != rc) {
|
||||
goto cleanup_and_return;
|
||||
}
|
||||
/* Perform the operation. The target is always the user
|
||||
* provided buffer We do the operation only if we receive it
|
||||
* not in the user buffer */
|
||||
if (snd_buffer != sbuf) {
|
||||
/* the target buffer is the locally allocated one */
|
||||
op->o_func.c_fn(rcv_buffer, pml_buffer, nlong / op->dt_size);
|
||||
} else {
|
||||
/* If we're commutative, we don't care about the order of
|
||||
* operations and we can just reduce the operations now.
|
||||
* If we are not commutative, we have to copy the send
|
||||
* buffer into a temp buffer (pml_buffer) and then reduce
|
||||
* what we just received against it. */
|
||||
{
|
||||
op->o_func.c_fn(sbuf, pml_buffer, nlong / op->dt_size);
|
||||
}
|
||||
/* now we have to send the buffer containing the computed data */
|
||||
snd_buffer = pml_buffer;
|
||||
/* starting from now we always receive in the user
|
||||
* provided buffer */
|
||||
rcv_buffer = (char*) rbuf;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Get the result to the root if needed. */
|
||||
rc = OSHMEM_SUCCESS;
|
||||
if (0 == vrank) {
|
||||
if (root_id == my_id) {
|
||||
memcpy(rbuf, snd_buffer, nlong);
|
||||
} else {
|
||||
rc = MCA_SPML_CALL(send((void*)snd_buffer, nlong, root_pe, MCA_SPML_BASE_PUT_STANDARD));
|
||||
}
|
||||
} else if (my_id == root_id) {
|
||||
rc = MCA_SPML_CALL(recv(rcv_buffer, nlong, root_pe));
|
||||
if (rcv_buffer != rbuf) {
|
||||
op->o_func.c_fn(rcv_buffer, rbuf, nlong / op->dt_size);
|
||||
}
|
||||
}
|
||||
|
||||
cleanup_and_return: if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
if (NULL != free_rbuf) {
|
||||
free(free_rbuf);
|
||||
}
|
||||
|
||||
/* Send result to all PE in group */
|
||||
if (rc == OSHMEM_SUCCESS) {
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] Broadcast from the root #%d",
|
||||
rank, root_pe);
|
||||
rc = group->g_scoll.scoll_broadcast(group,
|
||||
root_pe,
|
||||
target,
|
||||
target,
|
||||
nlong,
|
||||
(pSync + 1),
|
||||
SCOLL_DEFAULT_ALG);
|
||||
}
|
||||
|
||||
/* All done */
|
||||
return rc;
|
||||
}
|
38
oshmem/mca/scoll/fca/Makefile.am
Обычный файл
38
oshmem/mca/scoll/fca/Makefile.am
Обычный файл
@ -0,0 +1,38 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
#
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
#
|
||||
AM_CPPFLAGS = $(OSHMEM_CFLAGS) $(coll_fca_CPPFLAGS) -DCOLL_FCA_HOME=\"$(coll_fca_HOME)\" -I$(coll_fca_HOME)/include/fca -I$(coll_fca_HOME)/include/fca_core
|
||||
scoll_fca_sources = \
|
||||
scoll_fca.h \
|
||||
scoll_fca_debug.h \
|
||||
scoll_fca_api.h \
|
||||
scoll_fca_module.c \
|
||||
scoll_fca_component.c \
|
||||
scoll_fca_ops.c
|
||||
if MCA_BUILD_oshmem_scoll_fca_DSO
|
||||
component_noinst =
|
||||
component_install = mca_scoll_fca.la
|
||||
else
|
||||
component_noinst = libmca_scoll_fca.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_scoll_fca_la_SOURCES = $(scoll_fca_sources)
|
||||
mca_scoll_fca_la_LIBADD = $(scoll_fca_LIBS)
|
||||
mca_scoll_fca_la_LDFLAGS = -module -avoid-version $(scoll_fca_LDFLAGS)
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_scoll_fca_la_SOURCES =$(scoll_fca_sources)
|
||||
libmca_scoll_fca_la_LIBADD = $(scoll_fca_LIBS)
|
||||
libmca_scoll_fca_la_LDFLAGS = -module -avoid-version $(scoll_fca_LDFLAGS)
|
39
oshmem/mca/scoll/fca/configure.m4
Обычный файл
39
oshmem/mca/scoll/fca/configure.m4
Обычный файл
@ -0,0 +1,39 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
#
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
|
||||
# MCA_oshmem_scoll_fca_CONFIG([action-if-can-compile],
|
||||
# [action-if-cant-compile])
|
||||
# ------------------------------------------------
|
||||
AC_DEFUN([MCA_oshmem_scoll_fca_CONFIG],[
|
||||
AC_CONFIG_FILES([oshmem/mca/scoll/fca/Makefile])
|
||||
|
||||
OMPI_CHECK_FCA([scoll_fca],
|
||||
[scoll_fca_happy="yes"],
|
||||
[scoll_fca_happy="no"])
|
||||
|
||||
AS_IF([test "$scoll_fca_happy" = "yes"],
|
||||
[scoll_fca_WRAPPER_EXTRA_LDFLAGS="$scoll_fca_LDFLAGS"
|
||||
scoll_fca_CPPFLAGS="$scoll_fca_CPPFLAGS"
|
||||
scoll_fca_WRAPPER_EXTRA_CPPFLAGS="$scoll_fca_CPPFLAGS"
|
||||
scoll_fca_WRAPPER_EXTRA_LIBS="$scoll_fca_LIBS"
|
||||
$1],
|
||||
[$2])
|
||||
|
||||
# substitute in the things needed to build fca
|
||||
AC_SUBST([scoll_fca_CFLAGS])
|
||||
AC_SUBST([scoll_fca_CPPFLAGS])
|
||||
AC_SUBST([scoll_fca_LDFLAGS])
|
||||
AC_SUBST([scoll_fca_LIBS])
|
||||
AC_SUBST(scoll_fca_HOME, "$ompi_check_fca_dir")
|
||||
])dnl
|
||||
|
13
oshmem/mca/scoll/fca/configure.params
Обычный файл
13
oshmem/mca/scoll/fca/configure.params
Обычный файл
@ -0,0 +1,13 @@
|
||||
# -*- shell-script -*-
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# Specific to this module
|
||||
|
||||
PARAM_CONFIG_FILES="Makefile"
|
137
oshmem/mca/scoll/fca/scoll_fca.h
Обычный файл
137
oshmem/mca/scoll/fca/scoll_fca.h
Обычный файл
@ -0,0 +1,137 @@
|
||||
/**
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
* */
|
||||
|
||||
#ifndef MCA_SCOLL_FCA_H
|
||||
#define MCA_SCOLL_FCA_H
|
||||
#include "oshmem_config.h"
|
||||
#include "oshmem/constants.h"
|
||||
#include "shmem.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "oshmem/mca/scoll/scoll.h"
|
||||
#include "scoll_fca_api.h"
|
||||
#include "scoll_fca_debug.h"
|
||||
|
||||
#ifdef OMPI_PROC_FLAG_LOCAL
|
||||
#define FCA_IS_LOCAL_PROCESS(n) ((n) & OMPI_PROC_FLAG_LOCAL)
|
||||
#else
|
||||
#define FCA_IS_LOCAL_PROCESS(n) OPAL_PROC_ON_LOCAL_NODE(n)
|
||||
#endif
|
||||
|
||||
BEGIN_C_DECLS
|
||||
struct mca_scoll_fca_component_t {
|
||||
/** Base coll component */
|
||||
mca_scoll_base_component_1_0_0_t super;
|
||||
|
||||
/** MCA parameter: Priority of this component */
|
||||
int fca_priority;
|
||||
|
||||
/** MCA parameter: Verbose level of this component */
|
||||
int fca_verbose;
|
||||
|
||||
/** MCA parameter: Path to fca spec file */
|
||||
char* fca_spec_file;
|
||||
|
||||
/** MCA parameter: FCA device */
|
||||
char* fca_dev;
|
||||
|
||||
/** MCA parameter: Enable FCA */
|
||||
int fca_enable;
|
||||
|
||||
/** MCA parameter: Enable FCA Barrier */
|
||||
int fca_enable_barrier;
|
||||
|
||||
/** MCA parameter: Enable FCA Bcast */
|
||||
int fca_enable_bcast;
|
||||
|
||||
/** MCA parameter: Enable FCA Allreduce */
|
||||
int fca_enable_allreduce;
|
||||
|
||||
/** MCA parameter: Enable FCA Allgather */
|
||||
int fca_enable_allgather;
|
||||
|
||||
/** MCA parameter: Enable FCA Allgatherv */
|
||||
int fca_enable_allgatherv;
|
||||
|
||||
/** MCA parameter: FCA NP */
|
||||
int fca_np;
|
||||
|
||||
/* FCA global stuff */
|
||||
fca_t *fca_context; /* FCA context handle */
|
||||
|
||||
/*These vars are used as symmetric objects during __fca_comm_new. The proper amount of memory
|
||||
is allocated only once during fca_comm_query*/
|
||||
int *ret;
|
||||
int *rcounts;
|
||||
void *my_info_exchangeable;
|
||||
void *fca_comm_desc_exchangeable;
|
||||
};
|
||||
typedef struct mca_scoll_fca_component_t mca_scoll_fca_component_t;
|
||||
|
||||
OSHMEM_MODULE_DECLSPEC extern mca_scoll_fca_component_t mca_scoll_fca_component;
|
||||
|
||||
struct mca_scoll_fca_module_t {
|
||||
mca_scoll_base_module_t super;
|
||||
struct oshmem_group_t *comm;
|
||||
int rank;
|
||||
int local_proc_idx;
|
||||
int num_local_procs;
|
||||
int *local_ranks;
|
||||
fca_comm_t *fca_comm;
|
||||
fca_comm_desc_t fca_comm_desc;
|
||||
fca_comm_caps_t fca_comm_caps;
|
||||
|
||||
/* Saved handlers - for fallback */
|
||||
mca_scoll_base_module_barrier_fn_t previous_barrier;
|
||||
mca_scoll_base_module_t *previous_barrier_module;
|
||||
mca_scoll_base_module_broadcast_fn_t previous_broadcast;
|
||||
mca_scoll_base_module_t *previous_broadcast_module;
|
||||
mca_scoll_base_module_collect_fn_t previous_collect;
|
||||
mca_scoll_base_module_t *previous_collect_module;
|
||||
mca_scoll_base_module_reduce_fn_t previous_reduce;
|
||||
mca_scoll_base_module_t *previous_reduce_module;
|
||||
};
|
||||
typedef struct mca_scoll_fca_module_t mca_scoll_fca_module_t;
|
||||
OBJ_CLASS_DECLARATION(mca_scoll_fca_module_t);
|
||||
|
||||
/* API functions */
|
||||
int mca_scoll_fca_init_query(bool enable_progress_threads,
|
||||
bool enable_mpi_threads);
|
||||
mca_scoll_base_module_t *mca_scoll_fca_comm_query(struct oshmem_group_t *comm,
|
||||
int *priority);
|
||||
int mca_scoll_fca_get_fca_lib(struct oshmem_group_t *comm);
|
||||
|
||||
int mca_scoll_fca_barrier(struct oshmem_group_t *group,
|
||||
long *pSync,
|
||||
int algorithm_type);
|
||||
int mca_scoll_fca_broadcast(struct oshmem_group_t *group,
|
||||
int PE_root,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
int algorithm_type);
|
||||
int mca_scoll_fca_collect(struct oshmem_group_t *group,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
bool nlong_type,
|
||||
int algorithm_type);
|
||||
int mca_scoll_fca_reduce(struct oshmem_group_t *group,
|
||||
struct oshmem_op_t *op,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
void *pWrk,
|
||||
int algorithm_type);
|
||||
OBJ_CLASS_DECLARATION(mca_coll_fca_module_t);
|
||||
END_C_DECLS
|
||||
#endif
|
81
oshmem/mca/scoll/fca/scoll_fca_api.h
Обычный файл
81
oshmem/mca/scoll/fca/scoll_fca_api.h
Обычный файл
@ -0,0 +1,81 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
#include "oshmem_config.h"
|
||||
|
||||
#include <fca_api.h>
|
||||
#include <fca_version.h>
|
||||
#include <config/fca_parse_specfile.h>
|
||||
|
||||
#ifndef FCA_API
|
||||
#define OSHMEM_FCA_VERSION 12
|
||||
#else
|
||||
#define OSHMEM_FCA_VERSION FCA_API
|
||||
#endif
|
||||
|
||||
/*
|
||||
* * FCA API compatibility layer.
|
||||
* * MPI build must define an FCA version macro.
|
||||
* */
|
||||
|
||||
#define OSHMEM_FCA_BARRIER 1
|
||||
#define OSHMEM_FCA_BCAST 1
|
||||
#define OSHMEM_FCA_ALLREDUCE 1
|
||||
|
||||
#if OSHMEM_FCA_VERSION == 12
|
||||
|
||||
#define OSHMEM_FCA_ALLGATHER 0
|
||||
#define FCA_API_ABI_MAJOR 1
|
||||
#define FCA_API_ABI_MINOR 2
|
||||
#define FCA_MAJOR_BIT 24ul
|
||||
#define FCA_MINOR_BIT 16ul
|
||||
#define EUSESHMEM 287
|
||||
|
||||
static inline int mca_scoll_fca_comm_init(fca_t *fca_context,
|
||||
int rank,
|
||||
int comm_size,
|
||||
int local_proc_idx,
|
||||
int num_local_procs,
|
||||
fca_comm_desc_t *comm_desc,
|
||||
fca_comm_t **fca_comm)
|
||||
{
|
||||
return fca_comm_init(fca_context,
|
||||
local_proc_idx,
|
||||
num_local_procs,
|
||||
comm_size,
|
||||
comm_desc,
|
||||
fca_comm);
|
||||
}
|
||||
#elif OSHMEM_FCA_VERSION >= 20
|
||||
|
||||
#define OSHMEM_FCA_ALLGATHER 1
|
||||
#define OSHMEM_FCA_ALLGATHERV 1
|
||||
|
||||
#define OSHMEM_FCA_PROGRESS 1
|
||||
#define EUSESHMEM 287
|
||||
|
||||
static inline int mca_scoll_fca_comm_init(fca_t *fca_context, int rank, int comm_size,
|
||||
int local_proc_idx, int num_local_procs,
|
||||
fca_comm_desc_t *comm_desc,
|
||||
fca_comm_t **fca_comm)
|
||||
{
|
||||
fca_comm_init_spec_t spec;
|
||||
|
||||
spec.rank = rank;
|
||||
spec.size = comm_size;
|
||||
spec.desc = *comm_desc;
|
||||
spec.proc_idx = local_proc_idx;
|
||||
spec.num_procs = num_local_procs;
|
||||
return fca_comm_init(fca_context, &spec, fca_comm);
|
||||
}
|
||||
#else
|
||||
|
||||
#error "FCA API version is unsupported"
|
||||
|
||||
#endif
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче
Block a user