1
1
Этот коммит содержится в:
Ralph Castain 2016-09-16 15:49:34 -05:00
родитель 408199ce20 295eec7059
Коммит 9c3ae64297
707 изменённых файлов: 49917 добавлений и 23042 удалений

13
.gitignore поставляемый
Просмотреть файл

@ -305,6 +305,15 @@ opal/mca/pmix/pmix*/pmix/include/pmix/autogen/config.h
opal/mca/pmix/pmix*/pmix/include/pmix/autogen/config.h.in
opal/mca/pmix/pmix*/pmix/src/include/private/autogen/config.h.in
opal/mca/pmix/pmix*/pmix/src/include/private/autogen/config.h
opal/mca/hwloc/base/static-components.h.new.extern
opal/mca/hwloc/base/static-components.h.new.struct
opal/mca/pmix/pmix3x/pmix/src/include/frameworks.h
opal/mca/pmix/pmix3x/pmix/src/mca/pinstalldirs/config/pinstall_dirs.h
opal/mca/pmix/pmix3x/pmix/config/autogen_found_items.m4
opal/mca/pmix/pmix3x/pmix/src/include/pmix_config.h
opal/mca/pmix/pmix3x/pmix/src/include/pmix_config.h.in
opal/mca/pmix/pmix3x/pmix/include/pmix_rename.h
opal/mca/pmix/pmix3x/pmix/include/pmix_version.h
opal/tools/opal-checkpoint/opal-checkpoint
opal/tools/opal-checkpoint/opal-checkpoint.1
@ -395,6 +404,8 @@ orte/test/mpi/pconnect
orte/test/mpi/thread_init
orte/test/mpi/memcached-dummy
orte/test/mpi/coll_test
orte/test/mpi/badcoll
orte/test/mpi/iof
orte/test/system/radix
orte/test/system/sigusr_trap
@ -451,6 +462,7 @@ orte/test/system/opal_hwloc
orte/test/system/opal_db
orte/test/system/ulfm
orte/test/system/pmixtool
orte/test/system/orte_notify
orte/tools/orte-checkpoint/orte-checkpoint
orte/tools/orte-checkpoint/orte-checkpoint.1
@ -573,6 +585,7 @@ test/class/opal_value_array
test/datatype/ddt_test
test/datatype/ddt_pack
test/datatype/external32
test/datatype/to_self
test/datatype/checksum
test/datatype/position

Просмотреть файл

@ -1,8 +1,19 @@
# This file exists to help map usernames to proper names and email addresses
# in the Open MPI github mirror of the canonical SVN repository. The github
# mirror can be found here:
# This file exists to help consolidate names and email addresses
# (e.g., when people accidentally commit with an incorrect or local
# email address). Two common use cases:
#
# https://github.com/open-mpi/ompi-svn-mirror
# 1. Consolidate multiple email addresses from a single person.
# Example: one commit from John Smith is from
# <john.smith@his.work.com> and another is from
# <jsmith@workstation.his.work.com>, and a third is from
# <rocketman9982@users.noreply.github.com>. But they're all from
# the same John Smith person.
#
# 2. Consolidate misspellings / altername names from a single person.
# Example: one commit is from "John Smith" and another is from
# "John Smith, CONTRACTOR", and third is from "RocketMan 9982". But
# these are all really the same person, who can be listed once in
# AUTHORS as "John Smith".
#
# The format of this file is documented in git-shortlog(1). Specifically,
# a line like this:
@ -12,46 +23,79 @@
# means that when git sees "commit@email.xx" it will display
# "Proper Name <proper@email.xx>" instead in certain circumstances. Those
# circumstances include:
#
# - git shortlog
# - git blame
# - git log --format=tformat:"%aN <%aE>" (and similar)
#
# A copy of this file should be present on each branch in SVN which is being
# tracked in the Git mirror.
# Jeff accidentally stomped on his $HOME/.gitconfig for a short while:
Jeff Squyres <jsquyres@cisco.com> --quiet <--quiet>
# Commits from people with protected Github account address
Jeff Squyres <jsquyres@cisco.com> <jsquyres@users.noreply.github.com>
Jeff Squyres <jsquyres@cisco.com> --quiet <--quiet>
Jeff Squyres <no-author@open-mpi.org>
George Bosilca <bosilca@icl.utk.edu> <bosilca@users.noreply.github.com>
Howard Pritchard <howardp@lanl.gov> <hppritcha@users.noreply.github.com>
Howard Pritchard <howardp@lanl.gov> <howardp@lanl.gov>
Andrew Friedley <andrew.friedley@intel.com> <afriedle-intel@users.noreply.github.com>
Devendar Bureddy <devendar@mellanox.com> <bureddy@users.noreply.github.com>
Edgar Gabriel <egabriel@central.uh.edu> <edgargabriel@users.noreply.github.com>
Edgar Gabriel <gabriel@cs.uh.edu> <gabriel@Peggys-MacBook-Air.local>
Gilles Gouaillardet <gilles@rist.or.jp> <ggouaillardet@users.noreply.github.com>
Matias A Cabral <matias.a.cabral@intel.com> <matcabral@users.noreply.github.com>
Matias A Cabral <matias.a.cabral@intel.com> <matias.a.cabral@intel.com>
Pavel Shamis <shamisp@ornl.gov> <shamisp@ornl.gov>
Pavel Shamis <shamisp@ornl.gov> <shamisp@users.noreply.github.com>
Todd Kordenbrock <thkgcode@gmail.com> <tkordenbrock@users.noreply.github.com>
Yohann Burette <yohann.burette@intel.com> <yburette@users.noreply.github.com>
Pavel Shamis <pasharesearch@gmail.com> <pasharesearch@gmail.com>
Todd Kordenbrock <thkgcode@gmail.com> <tkordenbrock@users.noreply.github.com>
Yohann Burette <yohann.burette@intel.com> <yburette@users.noreply.github.com>
Yohann Burette <yohann.burette@intel.com> <yohann.burette@intel.com>
MPI Team (bot) <mpiteam@open-mpi.org> <mpiteam@open-mpi.org>
# Fix what look like accidental name mispellings / common-name-isms
Yossi Itigin <yosefe@mellanox.com> <yosefe@mellanox.com>
Josh Hursey <jjhursey@open-mpi.org> <jjhursey@open-mpi.org>
Josh Hursey <jhursey@us.ibm.com> <jhursey@us.ibm.com>
Adrian Reber <adrian@lisas.de> <adrian@lisas.de>
Elena <elena.elkina@itseez.com> <elena.elkina89@gmail.com>
Howard Pritchard <howardp@lanl.gov> <howardp@lanl.gov>
Elena Elkina <elena.elkina@itseez.com> <elena.elkina@itseez.com>
Elena Elkina <elena.elkina@itseez.com> <elena.elkina89@gmail.com>
Igor Ivanov <igor.ivanov.va@gmail.com> <igor.ivanov.va@gmail.com>
Igor Ivanov <Igor.Ivanov@itseez.com> <Igor.Ivanov@itseez.com>
Matias A Cabral <matias.a.cabral@intel.com> <matias.a.cabral@intel.com>
Mangala Jyothi Bhaskar <mjbhaskar@uh.edu> <mjbhaskar@uh.edu>
Mangala Jyothi Bhaskar <mjbhaskar@uh.edu> <mjbhaskar@crill.cs.uh.edu>
Ralph Castain <rhc@open-mpi.org> <rhc@open-mpi.org>
Ralph Castain <rhc@open-mpi.org> <rhc@odin.cs.indiana.edu>
Rolf vandeVaart <rvandevaart@nvidia.com> <rvandevaart@nvidia.com>
Yohann Burette <yohann.burette@intel.com> <yohann.burette@intel.com>
Karol Mroz <mroz.karol@gmail.com> <mroz.karol@gmail.com>
Nadezhda Kogteva <nadezhda.kogteva@itseez.com> <nadezhda@mngx-orion-01.dmz.e2e.mlnx>
Nysal Jan <jnysal@in.ibm.com> <jnysal@in.ibm.com>
Nysal Jan <jnysal@in.ibm.com> <jnysal@gmail.com>
Thananon Patinyasakdikul <apatinya@cisco.com> <apatinya@savbu-usnic-a.cisco.com>
Nysal Jan K A <jnysal@in.ibm.com> <jnysal@in.ibm.com>
Nysal Jan K A <jnysal@in.ibm.com> <jnysal@gmail.com>
Zhi Ming Wang <wangzm@cn.ibm.com> <wangzm@cn.ibm.com>
Annapurna Dasari <annapurna.dasari@intel.com> <annapurna.dasari@intel.com>
L. R. Rajeshnarayanan <l.r.rajeshnarayanan@intel.com> <l.r.rajeshnarayanan@intel.com>
Aurщlien Bouteiller <bouteill@icl.utk.edu> <bouteill@icl.utk.edu>
Aurщlien Bouteiller <bouteill@icl.utk.edu> <darter4.nics.utk.edu>

41
AUTHORS
Просмотреть файл

@ -8,6 +8,8 @@ Github.com pull request). Note that these email addresses are not
guaranteed to be current; they are simply a unique indicator of the
individual who committed them.
-----
Abhishek Joshi, Broadcom
abhishek.joshi@broadcom.com
Abhishek Kulkarni, Indiana University
@ -31,7 +33,6 @@ Anandhi S Jayakumar, Intel
Andreas Knüpfer, Technische Universitaet Dresden
andreas.knuepfer@tu-dresden.de
Andrew Friedley, Indiana University, Sandia National Laboratory, Intel
afriedle-intel@users.noreply.github.com
afriedle@osl.iu.edu
andrew.friedley@intel.com
Andrew Lumsdaine, Indiana University
@ -42,7 +43,7 @@ Anya Tatashina, Sun
anya.tatashina@sun.com
Artem Polyakov, Individual, Mellanox
artpol84@gmail.com
Aurélien Bouteiller, University of Tennessee-Knoxville
Aurщlien Bouteiller, University of Tennessee-Knoxville
bouteill@icl.utk.edu
darter4.nics.utk.edu
Avneesh Pant, QLogic
@ -51,6 +52,8 @@ Bert Wesarg, Technische Universitaet Dresden
bert.wesarg@tu-dresden.de
Bill D'Amico, Cisco
bdamico@cisco.com
Boris Karasev, Mellanox
karasev.b@gmail.com
Brad Benton, IBM, AMD
brad.benton@us.ibm.com
Brad Penoff, University of British Columbia
@ -58,7 +61,7 @@ Brad Penoff, University of British Columbia
Brian Barrett, Indiana University, Los Alamos National Laboratory, Sandia National Laboratory
brbarret@open-mpi.org
Brice Goglin, INRIA
Brice.Goglin@inria.fr
brice.goglin@inria.fr
Camille Coti, University of Tennessee-Knoxville, INRIA
ccoti@icl.utk.edu
Christian Bell, QLogic
@ -79,7 +82,6 @@ David Daniel, Los Alamos National Laboratory
Denis Dimick, Los Alamos National Laboratory
dgdimick@lnal.gov
Devendar Bureddy, Mellanox
bureddy@users.noreply.github.com
devendar@mellanox.com
Dimitar Pashov, Individual
d.pashov@gmail.com
@ -87,13 +89,11 @@ Donald Kerr, Sun, Oracle
donald.kerr@oracle.com
Doron Shoham, Mellanox
dorons@mellanox.com
Edagr Gabriel, High Performance Computing Center, Stuttgart, University of Tennessee-Knoxville, University of Houston
gabriel@Peggys-MacBook-Air.local
edgargabriel@users.noreply.github.com
Edgar Gabriel, High Performance Computing Center, Stuttgart, University of Tennessee-Knoxville, University of Houston
gabriel@cs.uh.edu
Elena Elkina, Mellanox
elena.elkina@itseez.com
elena.elkina89@gmail.com
elena.elkina@itseez.com
Ethan Mallove, Sun, Oracle
ethan.mallove@oracle.com
Eugene Loh, Sun, Oracle
@ -112,9 +112,7 @@ Geoffrey Paulsen, IBM
George Bosilca, University of Tennessee-Knoxville
bosilca@eecs.utk.edu
bosilca@icl.utk.edu
bosilca@users.noreply.github.com
Gilles Gouaillardet, Research Organization for Information Science and Technology
ggouaillardet@users.noreply.github.com
gilles.gouaillardet@iferc.org
gilles@rist.or.jp
Ginger Young, Los Alamos National Laboratory
@ -136,7 +134,6 @@ Hadi Montakhabi, University of Houston
Howard Pritchard, Los Alamos National Laboratory
howardp@lanl.gov
hppritcha@gmail.com
hppritcha@users.noreply.github.com
Iain Bason, Sun, Oracle
iain.bason@oracle.com
Igor Ivanov, Mellanox
@ -147,7 +144,6 @@ Igor Usarov, Mellanox
Jeff Squyres, University of Indiana, Cisco
jeff@squyres.com
jsquyres@cisco.com
jsquyres@users.noreply.github.com
Jelena Pjesivac-Grbovic, University of Tennessee-Knoxville
pjesa@icl.iu.edu
Jithin Jose, Intel
@ -174,9 +170,11 @@ Kenneth Matney, Oak Ridge National Laboratory
matneykdsr@ornl.gov
L. R. Rajeshnarayanan, Intel
l.r.rajeshnarayanan@intel.com
LANL OMPI Bot, Los Alamos National Laboratory
openmpihpp@gmail.com
Laura Casswell, Los Alamos National Laboratory
lcasswell@lanl.gov
Lenny Verkhovsky, Volataine
Lenny Verkhovsky, Voltaire
lennyb@voltaire.com
Leobardo Ruiz Rountree, Individual
lruizrountree@gmail.com
@ -197,7 +195,6 @@ Mark Taylor, Los Alamos National Laboratory
mt@lanl.gov
Matias A Cabral, Intel
matias.a.cabral@intel.com
matcabral@users.noreply.github.com
Matthias Jurenz, Technische Universitaet Dresden
matthias.jurenz@tu-dresden.de
Maximilien Levesque, Individual
@ -209,10 +206,10 @@ Mitch Sukalski, Sandia National Laboratory
Mohamad Chaarawi, University of Houston
mschaara@cs.uh.edu
Nadezhda Kogteva, Mellanox
nadezhda@mngx-orion-01.dmz.e2e.mlnx
nadezhda.kogteva@itseez.com
nadezhda@mngx-orion-01.dmz.e2e.mlnx
Nadia Derbey, Bull
Nadia.Derbey@bull.net
nadia.derbey@bull.net
Nathan Hjelm, Los Alamos National Laboratory
hjelmn@cs.unm.edu
hjelmn@lanl.gov
@ -222,6 +219,8 @@ Nathaniel Graham, Los Alamos National Laboratory
nrgraham23@gmail.com
Nick Papior Andersen, Individual
nickpapior@gmail.com
Nicolas Chevalier, Bull
nicolas.chevalier@bull.net
Nysal Jan K A, IBM
jnysal@gmail.com
jnysal@in.ibm.com
@ -231,11 +230,12 @@ Oscar Vega-Gisbert, Universitat Politecnica de Valencia
ovega@dsic.upv.es
Pak Lui, Sun
pak.lui@sun.com
Pascal Deveze, Bull
pascal.deveze@atos.net
Patrick Geoffray, Myricom
patrick@myri.com
Pavel Shamis, Mellanox, Oak Ridge National Laboratory
shamisp@ornl.gov
shamisp@users.noreply.github.com
Pierre Lemarinier, University of Tennessee-Knoxville
lemarini@icl.utk.edu
Piotr Lesnicki, Bull
@ -248,7 +248,6 @@ Rainer Keller, High Performance Computing Center, Stuttgart, Oak Ridge National
rainer.keller@hft-stuttgart.de
rainer.keller@hlrs.de
Ralph Castain, Los Alamos National Laboratory, Cisco, Greenplum, Intel
rhc@odin.cs.indiana.edu
rhc@open-mpi.org
Reese Faucette, Cisco
rfaucett@cisco.com
@ -258,7 +257,7 @@ Rob Awles, Los Alamos National Laboratory
rta@lanl.gov
Rob Latham, Argonne National Laboratory
robl@mcs.anl.gov
Rolf vandeVaart, NVIDIA
Rolf vandeVaart, Sun, Oracle, NVIDIA
rvandevaart@nvidia.com
Ron Brightwell, Sandia National Laboratory
rbbrigh@sandia.gov
@ -271,7 +270,7 @@ Samuel Gutierrez, Los Alamos National Laboratory
samuel@lanl.gov
Sayantan Sur, The Ohio State University
surs@osu.edu
Sharon Melamed, Volataire
Sharon Melamed, Voltaire
sharonm@voltaire.com
Shiqing Fan, High Performance Computing Center, Stuttgart
shiqing@hlrs.de
@ -307,7 +306,6 @@ Tim Woodall, Los Alamos National Laboratory
Todd Kordenbrock, Sandia National Laboratory
thkgcode@gmail.com
thkorde@sandia.gov
tkordenbrock@users.noreply.github.com
Tom Naughton, Oak Ridge National Laboratory
naughtont@ornl.gov
Tomislav Janjusic, Mellanox
@ -333,7 +331,6 @@ Yael Dayan, Mellanox
Yevgeny Kliteynik, Mellanox
kliteyn@mellanox.co.il
Yohann Burette, Intel
yburette@users.noreply.github.com
yohann.burette@intel.com
Yossi Itigin, Mellanox
yosefe@mellanox.com

20
NEWS
Просмотреть файл

@ -216,7 +216,25 @@ Master (not on release branches yet)
Alastair McKinstry for reporting.
1.10.3 - DATE
1.10.4 - 01 Sept 2016
------
- Fix assembler support for MIPS
- Improve memory handling for temp buffers in collectives
- Fix [all]reduce with non-zero lower bound datatypes
Thanks Hristo Iliev for the report
- Fix non-standard ddt handling. Thanks Yuki Matsumoto for the report
- Various libnbc fixes. Thanks Yuki Matsumoto for the report
- Fix typos in request RMA bindings for Fortran. Thanks to @alazzaro
and @vondele for the assist
- Various bug fixes and enhancements to collective support
- Fix predefined types mapping in hcoll
- Revive the coll/sync component to resolve unexpected message issues
during tight loops across collectives
- Fix typo in wrapper compiler for Fortran static builds
1.10.3 - 15 June 2016
------
- Minor manpage cleanups

58
README
Просмотреть файл

@ -36,8 +36,8 @@ sign up on the user's and/or developer's mailing list (for user-level
and developer-level questions; when in doubt, send to the user's
list):
users@open-mpi.org
devel@open-mpi.org
users@lists.open-mpi.org
devel@lists.open-mpi.org
Because of spam, only subscribers are allowed to post to these lists
(ensure that you subscribe with and post from exactly the same e-mail
@ -45,8 +45,8 @@ address -- joe@example.com is considered different than
joe@mycomputer.example.com!). Visit these pages to subscribe to the
lists:
http://www.open-mpi.org/mailman/listinfo.cgi/users
http://www.open-mpi.org/mailman/listinfo.cgi/devel
http://lists.open-mpi.org/mailman/listinfo/users
http://lists.open-mpi.org/mailman/listinfo/devel
Thanks for your time.
@ -54,7 +54,7 @@ Thanks for your time.
Much, much more information is also available in the Open MPI FAQ:
http://www.open-mpi.org/faq/
https://www.open-mpi.org/faq/
===========================================================================
@ -108,7 +108,7 @@ General notes
- The majority of Open MPI's documentation is here in this file, the
included man pages, and on the web site FAQ
(http://www.open-mpi.org/).
(https://www.open-mpi.org/).
- Note that Open MPI documentation uses the word "component"
frequently; the word "plugin" is probably more familiar to most
@ -131,7 +131,7 @@ General notes
- Linux (various flavors/distros), 32 bit, with gcc
- Linux (various flavors/distros), 64 bit (x86), with gcc, Absoft,
Intel, and Portland (*)
- OS X (10.6, 10.7, 10.8, 10.9, 10.10), 32 and 64 bit (x86_64), with
- OS X (10.8, 10.9, 10.10, 10.11), 32 and 64 bit (x86_64), with
XCode and Absoft compilers (*)
(*) Be sure to read the Compiler Notes, below.
@ -141,7 +141,7 @@ General notes
- ARMv4, ARMv5, ARMv6, ARMv7, ARMv8
- Other 64 bit platforms (e.g., Linux on PPC64)
- Oracle Solaris 10 and 11, 32 and 64 bit (SPARC, i386, x86_64),
with Oracle Solaris Studio 12.2, 12.3, and 12.4
with Oracle Solaris Studio 12.5
Compiler Notes
--------------
@ -410,10 +410,10 @@ General Run-Time Support Notes
is a shared library), unless using the --prefix or
--enable-mpirun-prefix-by-default functionality (see below).
- Open MPI's run-time behavior can be customized via MCA ("MPI
Component Architecture") parameters (see below for more information
on how to get/set MCA parameter values). Some MCA parameters can be
set in a way that renders Open MPI inoperable (see notes about MCA
- Open MPI's run-time behavior can be customized via MPI Component
Architecture (MCA) parameters (see below for more information on how
to get/set MCA parameter values). Some MCA parameters can be set in
a way that renders Open MPI inoperable (see notes about MCA
parameters later in this file). In particular, some parameters have
required options that must be included.
@ -421,7 +421,7 @@ General Run-Time Support Notes
component, or Open MPI will not be able to deliver messages to the
same rank as the sender. For example: "mpirun --mca btl tcp,self
..."
- If specified, the "btl_tcp_if_exclude" paramater must include the
- If specified, the "btl_tcp_if_exclude" parameter must include the
loopback device ("lo" on many Linux platforms), or Open MPI will
not be able to route MPI messages using the TCP BTL. For example:
"mpirun --mca btl_tcp_if_exclude lo,eth1 ..."
@ -480,7 +480,7 @@ MPI Functionality and Features
by default (it can be disabled via the --disable-libompitrace
flag). This library provides a simplistic tracing of select MPI
function calls via the MPI profiling interface. Linking it in to
your appliation via (e.g., via -lompitrace) will automatically
your application via (e.g., via -lompitrace) will automatically
output to stderr when some MPI functions are invoked:
shell$ cd examples/
@ -595,9 +595,7 @@ Network Support
MXM transport.
- "ob1" supports a variety of networks that can be used in
combination with each other (per OS constraints; e.g., there are
reports that the GM and OpenFabrics kernel drivers do not operate
well together):
combination with each other:
- OpenFabrics: InfiniBand, iWARP, and RoCE
- Loopback (send-to-self)
@ -715,7 +713,7 @@ Open MPI Extensions
a string that contains what resources a process is bound to. See
its man page for more details.
- cr: Provides routines to access to checkpoint restart routines.
See ompi/mpiext/cr/mpiext_cr_c.h for a listing of availble
See ompi/mpiext/cr/mpiext_cr_c.h for a listing of available
functions.
- cuda: When the library is compiled with CUDA-aware support, it provides
two things. First, a macro MPIX_CUDA_AWARE_SUPPORT. Secondly, the
@ -1087,7 +1085,7 @@ MISCELLANEOUS SUPPORT LIBRARIES
installation to use
By default (or if --with-libevent is specified with no VALUE), Open
MPI will build and use the copy of libeveny that it has in its
MPI will build and use the copy of libevent that it has in its
source tree. However, if the VALUE is "external", Open MPI will
look for the relevant libevent header file and library in default
compiler / linker locations. Or, VALUE can be a directory tree
@ -1452,7 +1450,7 @@ format. Each of the three numbers has a specific meaning:
change in the code base and/or end-user functionality, and also
indicate a break from backwards compatibility. Specifically: Open
MPI releases with different major version numbers are not
backwards compatibile with each other.
backwards compatibale with each other.
CAVEAT: This rule does not extend to versions prior to v1.10.0.
Specifically: v1.10.x is not guaranteed to be backwards
@ -1767,7 +1765,7 @@ configure script. They are not necessary for MPI applications, but
may be used by applications that use Open MPI's lower layer support
libraries.
orte: Open MPI Run-Time Environment applicaions
orte: Open MPI Run-Time Environment applications
opal: Open Portable Access Layer applications
===========================================================================
@ -1886,7 +1884,7 @@ Back-end run-time environment (RTE) component frameworks:
dfs - Distributed file system
errmgr - RTE error manager
ess - RTE environment-specfic services
ess - RTE environment-specific services
filem - Remote file management
grpcomm - RTE group communications
iof - I/O forwarding
@ -1918,7 +1916,7 @@ hwloc - Hardware locality (hwloc) versioning support
if - OS IP interface support
installdirs - Installation directory relocation services
memchecker - Run-time memory checking
memcpy - Memopy copy support
memcpy - Memory copy support
memory - Memory management hooks
mpool - Memory pooling
patcher - Symbol patcher hooks
@ -2027,7 +2025,7 @@ passed on the mpirun command line will override an environment
variable; an environment variable will override the system-wide
defaults.
Each component typically activates itself when relavant. For example,
Each component typically activates itself when relevant. For example,
the MX component will detect that MX devices are present and will
automatically be used for MPI communications. The SLURM component
will automatically detect when running inside a SLURM job and activate
@ -2066,7 +2064,7 @@ Common Questions
Many common questions about building and using Open MPI are answered
on the FAQ:
http://www.open-mpi.org/faq/
https://www.open-mpi.org/faq/
===========================================================================
@ -2080,24 +2078,24 @@ When submitting questions and problems, be sure to include as much
extra information as possible. This web page details all the
information that we request in order to provide assistance:
http://www.open-mpi.org/community/help/
https://www.open-mpi.org/community/help/
User-level questions and comments should generally be sent to the
user's mailing list (users@open-mpi.org). Because of spam, only
user's mailing list (users@lists.open-mpi.org). Because of spam, only
subscribers are allowed to post to this list (ensure that you
subscribe with and post from *exactly* the same e-mail address --
joe@example.com is considered different than
joe@mycomputer.example.com!). Visit this page to subscribe to the
user's list:
http://www.open-mpi.org/mailman/listinfo.cgi/users
http://lists.open-mpi.org/mailman/listinfo/users
Developer-level bug reports, questions, and comments should generally
be sent to the developer's mailing list (devel@open-mpi.org). Please
be sent to the developer's mailing list (devel@lists.open-mpi.org). Please
do not post the same question to both lists. As with the user's list,
only subscribers are allowed to post to the developer's list. Visit
the following web page to subscribe:
http://www.open-mpi.org/mailman/listinfo.cgi/devel
http://lists.open-mpi.org/mailman/listinfo/devel
Make today an Open MPI day!

Просмотреть файл

@ -3,6 +3,7 @@
# Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
# Copyright (c) 2013 Mellanox Technologies, Inc.
# All rights reserved.
# Copyright (c) 2016 IBM Corporation. All rights reserved.
# This is the VERSION file for Open MPI, describing the precise
# version of Open MPI in this distribution. The various components of
@ -91,6 +92,7 @@ libopen_rte_so_version=0:0:0
libopen_pal_so_version=0:0:0
libmpi_java_so_version=0:0:0
liboshmem_so_version=0:0:0
libompitrace_so_version=0:0:0
# "Common" components install standalone libraries that are run-time
# linked by one or more components. So they need to be versioned as
@ -98,6 +100,7 @@ liboshmem_so_version=0:0:0
# components-don't-affect-the-build-system abstraction.
# OMPI layer
libmca_ompi_common_ompio_so_version=0:0:0
# ORTE layer
libmca_orte_common_alps_so_version=0:0:0

Просмотреть файл

@ -9,7 +9,7 @@
- tmp_inherited_linker_flags=`$ECHO "$inherited_linker_flags" | $SED 's/-framework \([^ $]*\)/\1.ltframework/g'`
+ case "$CC" in
+ nagfor*)
+ tmp_inherited_linker_flags=`$ECHO "$inherited_linker_flags" | $SED 's/-framework \([^ $]*\)/\1.ltframework/g' | $SED 's/-pthread/-Wl,-pthread'`;;
+ tmp_inherited_linker_flags=`$ECHO "$inherited_linker_flags" | $SED 's/-framework \([^ $]*\)/\1.ltframework/g' | $SED 's/-pthread/-Wl,-pthread/g'`;;
+ *)
+ tmp_inherited_linker_flags=`$ECHO "$inherited_linker_flags" | $SED 's/-framework \([^ $]*\)/\1.ltframework/g'`;;
+ esac

Просмотреть файл

@ -18,23 +18,45 @@
# --------------------------------------------------------
# check if cma support is wanted.
AC_DEFUN([OPAL_CHECK_CMA],[
if test -z "$ompi_check_cma_happy" ; then
OPAL_VAR_SCOPE_PUSH([ompi_check_cma_need_defs ompi_check_cma_kernel_version ompi_check_cma_CFLAGS])
AC_ARG_WITH([cma],
[AC_HELP_STRING([--with-cma],
[Build Cross Memory Attach support (default: autodetect)])])
AC_ARG_WITH([cma],
[AC_HELP_STRING([--with-cma],
[Build Cross Memory Attach support (default: autodetect)])])
# We only need to do the back-end test once
if test -z "$opal_check_cma_happy" ; then
OPAL_CHECK_CMA_BACKEND
fi
# Enable CMA support by default if process_vm_readv is defined in glibc
AC_CHECK_FUNC(process_vm_readv, [ompi_check_cma_need_defs=0],
[ompi_check_cma_need_defs=1])
AS_IF([test $opal_check_cma_happy -eq 1],
[$2],
[if test "$with_cma" = "yes"; then
AC_MSG_WARN([--with-cma support requested, but not available])
AC_MSG_ERROR([Cannot continue])
fi
$3])
])
if test $ompi_check_cma_need_defs = 1 ; then
ompi_check_cma_CFLAGS="$CFLAGS"
# Need some extra include paths to locate the appropriate headers
CFLAGS="$CFLAGS -I${srcdir} -I${srcdir}/opal/include"
AC_MSG_CHECKING([if internal syscall numbers for Linux CMA work])
AC_RUN_IFELSE([AC_LANG_PROGRAM([[
AC_DEFUN([OPAL_CHECK_CMA_BACKEND],
[
OPAL_VAR_SCOPE_PUSH([opal_check_cma_need_defs opal_check_cma_kernel_version opal_check_cma_CFLAGS opal_check_cma_msg])
# Some systems have process_cm_readv() in libc, which means CMA is
# supported. Other systems do not have process_cm_readv() in
# libc, but have support for it in the kernel if we invoke it
# directly. Check for both.
AC_CHECK_HEADERS([sys/prctl.h])
AC_CHECK_FUNC([process_vm_readv], [opal_check_cma_need_defs=0],
[opal_check_cma_need_defs=1])
AC_DEFINE_UNQUOTED([OPAL_CMA_NEED_SYSCALL_DEFS],
[$opal_check_cma_need_defs],
[Need CMA syscalls defined])
if test $opal_check_cma_need_defs -eq 1 ; then
opal_check_cma_CFLAGS=$CFLAGS
# Need some extra include paths to locate the appropriate headers
CFLAGS="$CFLAGS -I${srcdir} -I${srcdir}/opal/include"
AC_MSG_CHECKING([if internal syscall numbers for Linux CMA work])
AC_RUN_IFELSE([AC_LANG_PROGRAM([[
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@ -88,31 +110,23 @@ static void do_check (pid_t pid, int *in, int *out)
/* all good */
return 0;
]])],
[AC_MSG_RESULT([yes])
ompi_check_cma_happy="yes"],
[AC_MSG_RESULT([no])
ompi_check_cma_happy="no"],
[AC_MSG_RESULT([no (cross-compiling)])
ompi_check_cma_happy="no"])
CFLAGS="$ompi_check_cma_CFLAGS"
else
ompi_check_cma_happy="yes"
fi
# If the user specifically requests CMA go ahead and enable it even
# if the glibc version does not support process_vm_readv
if test "x$with_cma" = "xyes" || test "$ompi_check_cma_happy" = "yes" ; then
ompi_check_cma_happy="yes"
AC_DEFINE_UNQUOTED([OPAL_CMA_NEED_SYSCALL_DEFS],
[$ompi_check_cma_need_defs],
[Need CMA syscalls defined])
AC_CHECK_HEADERS([sys/prctl.h])
fi
OPAL_VAR_SCOPE_POP
OPAL_SUMMARY_ADD([[Transports]],[[Shared memory/Linux CMA]],[$1],[$ompi_check_cma_happy])
[AC_MSG_RESULT([yes])
opal_check_cma_happy=1],
[AC_MSG_RESULT([no])
opal_check_cma_happy=0],
[AC_MSG_RESULT([no (cross-compiling)])
opal_check_cma_happy=0])
CFLAGS=$opal_check_cma_CFLAGS
else
# If we didn't need the defs, then we have process_vm_readv(),
# and CMA is happy.
opal_check_cma_happy=1
fi
AS_IF([test "$ompi_check_cma_happy" = "yes"], [$2], [$3])
OPAL_VAR_SCOPE_POP
AS_IF([test $opal_check_cma_happy -eq 1],
[opal_check_cma_msg=yes],
[opal_check_cma_msg=no])
OPAL_SUMMARY_ADD([[Transports]],[[Shared memory/Linux CMA]],[$1],[$opal_check_cma_msg])
])

Просмотреть файл

@ -13,7 +13,7 @@ dnl All rights reserved.
dnl Copyright (c) 2012-2015 Cisco Systems, Inc. All rights reserved.
dnl Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
dnl Copyright (c) 2014 Intel, Inc. All rights reserved.
dnl Copyright (c) 2015 Research Organization for Information Science
dnl Copyright (c) 2015-2016 Research Organization for Information Science
dnl and Technology (RIST). All rights reserved.
dnl $COPYRIGHT$
dnl
@ -31,15 +31,18 @@ AC_DEFUN([_OPAL_CHECK_PACKAGE_HEADER], [
# cache variable for the library check. one should not copy this
# code into other places unless you want much pain and suffering
AS_VAR_PUSHDEF([opal_Header], [ac_cv_header_$2])
OPAL_VAR_SCOPE_PUSH([dir_prefix])
# so this sucks, but there's no way to get through the progression
# of header includes without killing off the cache variable and trying
# again...
unset opal_Header
# get rid of the trailing slash(es)
dir_prefix=$(echo $3 | sed -e 'sX/*$XXg')
opal_check_package_header_happy="no"
AS_IF([test "$3" = "/usr" || \
test "$3" = "/usr/local"],
AS_IF([test "$dir_prefix" = "/usr" || \
test "$dir_prefix" = "/usr/local"],
[ # try as is...
AC_VERBOSE([looking for header without includes])
AC_CHECK_HEADERS([$2], [opal_check_package_header_happy="yes"], [])
@ -48,14 +51,15 @@ AC_DEFUN([_OPAL_CHECK_PACKAGE_HEADER], [
unset opal_Header])])
AS_IF([test "$opal_check_package_header_happy" = "no"],
[AS_IF([test "$3" != ""],
[$1_CPPFLAGS="$$1_CPPFLAGS -I$3/include"
CPPFLAGS="$CPPFLAGS -I$3/include"])
[AS_IF([test "$dir_prefix" != ""],
[$1_CPPFLAGS="$$1_CPPFLAGS -I$dir_prefix/include"
CPPFLAGS="$CPPFLAGS -I$dir_prefix/include"])
AC_CHECK_HEADERS([$2], [opal_check_package_header_happy="yes"], [], [$6])
AS_IF([test "$opal_check_package_header_happy" = "yes"], [$4], [$5])],
[$4])
unset opal_check_package_header_happy
OPAL_VAR_SCOPE_POP([dir_prefix])
AS_VAR_POPDEF([opal_Header])dnl
])

Просмотреть файл

@ -227,6 +227,8 @@ AC_DEFUN([OPAL_CHECK_PMI],[
AC_DEFUN([OPAL_CHECK_PMIX],[
OPAL_VAR_SCOPE_PUSH([opal_external_pmix_save_CPPFLAGS opal_external_pmix_save_LDFLAGS opal_external_pmix_save_LIBS])
AC_ARG_WITH([pmix],
[AC_HELP_STRING([--with-pmix(=DIR)],
[Build PMIx support. DIR can take one of three values: "internal", "external", or a valid directory name. "internal" (or no DIR value) forces Open MPI to use its internal copy of PMIx. "external" forces Open MPI to use an external installation of PMIx. Supplying a valid directory name also forces Open MPI to use an external installation of PMIx, and adds DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries. Note that Open MPI does not support --without-pmix.])])
@ -241,14 +243,88 @@ AC_DEFUN([OPAL_CHECK_PMIX],[
AS_IF([test -z "$with_pmix" || test "$with_pmix" = "yes" || test "$with_pmix" = "internal"],
[AC_MSG_RESULT([no])
opal_external_pmix_happy=no],
[AC_MSG_RESULT([yes])
# check for external pmix lib */
AS_IF([test "$with_pmix" = "external"],
[pmix_ext_install_dir=/usr],
[pmix_ext_install_dir=$with_pmix])
# Make sure we have the headers and libs in the correct location
OPAL_CHECK_WITHDIR([external-pmix], [$pmix_ext_install_dir/include], [pmix.h])
OPAL_CHECK_WITHDIR([external-libpmix], [$pmix_ext_install_dir/lib], [libpmix.*])
# check the version
opal_external_pmix_save_CPPFLAGS=$CPPFLAGS
opal_external_pmix_save_LDFLAGS=$LDFLAGS
opal_external_pmix_save_LIBS=$LIBS
# if the pmix_version.h file does not exist, then
# this must be from a pre-1.1.5 version
AC_MSG_CHECKING([PMIx version])
CPPFLAGS="-I$pmix_ext_install_dir/include $CPPFLAGS"
AS_IF([test "x`ls $pmix_ext_install_dir/include/pmix_version.h 2> /dev/null`" = "x"],
[AC_MSG_RESULT([version file not found - assuming v1.1.4])
opal_external_pmix_version_found=1
opal_external_pmix_version=114],
[AC_MSG_RESULT([version file found])
opal_external_pmix_version_found=0])
# if it does exist, then we need to parse it to find
# the actual release series
AS_IF([test "$opal_external_pmix_version_found" = "0"],
[AC_MSG_CHECKING([version 3x])
AC_PREPROC_IFELSE([AC_LANG_PROGRAM([
#include <pmix_version.h>
#if (PMIX_VERSION_MAJOR != 3L)
#error "not version 3"
#endif
], [])],
[AC_MSG_RESULT([found])
opal_external_pmix_version=3X
opal_external_pmix_version_found=1],
[AC_MSG_RESULT([not found])])])
AS_IF([test "$opal_external_pmix_version_found" = "0"],
[AC_MSG_CHECKING([version 2x])
AC_PREPROC_IFELSE([AC_LANG_PROGRAM([
#include <pmix_version.h>
#if (PMIX_VERSION_MAJOR != 2L)
#error "not version 2"
#endif
], [])],
[AC_MSG_RESULT([found])
opal_external_pmix_version=2X
opal_external_pmix_version_found=1],
[AC_MSG_RESULT([not found])])])
AS_IF([test "$opal_external_pmix_version_found" = "0"],
[AC_MSG_CHECKING([version 1x])
AC_PREPROC_IFELSE([AC_LANG_PROGRAM([
#include <pmix_version.h>
#if (PMIX_VERSION_MAJOR != 1L)
#error "not version 1"
#endif
], [])],
[AC_MSG_RESULT([found])
opal_external_pmix_version=1X
opal_external_pmix_version_found=1],
[AC_MSG_RESULT([not found])])])
AS_IF([test "x$opal_external_pmix_version" = "x"],
[AC_MSG_WARN([External PMIx support requested, but version])
AC_MSG_WARN([information of the external lib could not])
AC_MSG_WARN([be detected])
AC_MSG_ERROR([cannot continue])])
CPPFLAGS=$opal_external_pmix_save_CPPFLAGS
LDFLAGS=$opal_external_pmix_save_LDFLAGS
LIBS=$opal_external_pmix_save_LIBS
opal_external_pmix_CPPFLAGS="-I$pmix_ext_install_dir/include"
opal_external_pmix_LDFLAGS=-L$pmix_ext_install_dir/lib
opal_external_pmix_LIBS=-lpmix
opal_external_pmix_happy=yes])
OPAL_VAR_SCOPE_POP
])

Просмотреть файл

@ -848,34 +848,28 @@ AC_DEFUN([OPAL_CHECK_INLINE_C_GCC],[
AC_MSG_CHECKING([if $CC supports GCC inline assembly])
if test "$opal_cv_c_compiler_vendor" = "portland group" ; then
# PGI seems to have some issues with our inline assembly.
# Disable for now.
asm_result="no (Portland Group)"
if test ! "$assembly" = "" ; then
AC_RUN_IFELSE([AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT],[[
int ret = 1;
int negone = -1;
__asm__ __volatile__ ($assembly);
return ret;
]])],
[asm_result="yes"], [asm_result="no"],
[asm_result="unknown"])
else
if test ! "$assembly" = "" ; then
AC_RUN_IFELSE([AC_LANG_PROGRAM([
AC_INCLUDES_DEFAULT],
[[int ret = 1;
int negone = -1;
__asm__ __volatile__ ($assembly);
return ret;]])],
[asm_result="yes"], [asm_result="no"],
[asm_result="unknown"])
else
assembly="test skipped - assuming no"
fi
assembly="test skipped - assuming no"
fi
# if we're cross compiling, just try to compile and figure good enough
if test "$asm_result" = "unknown" ; then
AC_LINK_IFELSE([AC_LANG_PROGRAM([
AC_INCLUDES_DEFAULT],
[[int ret = 1;
# if we're cross compiling, just try to compile and figure good enough
if test "$asm_result" = "unknown" ; then
AC_LINK_IFELSE([AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT],[[
int ret = 1;
int negone = -1;
__asm__ __volatile__ ($assembly);
return ret;]])],
[asm_result="yes"], [asm_result="no"])
fi
return ret;
]])],
[asm_result="yes"], [asm_result="no"])
fi
AC_MSG_RESULT([$asm_result])

Просмотреть файл

@ -11,7 +11,7 @@ dnl University of Stuttgart. All rights reserved.
dnl Copyright (c) 2004-2005 The Regents of the University of California.
dnl All rights reserved.
dnl Copyright (c) 2006-2010 Oracle and/or its affiliates. All rights reserved.
dnl Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved.
dnl Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved.
dnl Copyright (c) 2015-2016 Research Organization for Information Science
dnl and Technology (RIST). All rights reserved.
dnl $COPYRIGHT$
@ -247,7 +247,7 @@ m4_ifdef([project_ompi],[
# (because if script A sources script B, and B calls "exit", then both
# B and A will exit). Instead, we have to send the output to a file
# and then source that.
$OPAL_TOP_BUILDDIR/libtool --tag=FC--config > $rpath_outfile
$OPAL_TOP_BUILDDIR/libtool --tag=FC --config > $rpath_outfile
chmod +x $rpath_outfile
. ./$rpath_outfile

Просмотреть файл

@ -22,6 +22,7 @@
# Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
# Copyright (c) 2014-2016 Research Organization for Information Science
# and Technology (RIST). All rights reserved.
# Copyright (c) 2016 IBM Corporation. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -134,7 +135,8 @@ m4_ifdef([project_ompi],
AC_SUBST(libmpi_usempi_tkr_so_version)
AC_SUBST(libmpi_usempi_ignore_tkr_so_version)
AC_SUBST(libmpi_usempif08_so_version)
AC_SUBST(libmpi_java_so_version)])
AC_SUBST(libmpi_java_so_version)
AC_SUBST(libompitrace_so_version)])
m4_ifdef([project_orte],
[AC_SUBST(libopen_rte_so_version)])
m4_ifdef([project_oshmem],
@ -151,6 +153,7 @@ AC_SUBST(libmca_opal_common_sm_so_version)
AC_SUBST(libmca_opal_common_ugni_so_version)
AC_SUBST(libmca_opal_common_verbs_so_version)
AC_SUBST(libmca_orte_common_alps_so_version)
AC_SUBST(libmca_ompi_common_ompio_so_version)
#
# Get the versions of the autotools that were used to bootstrap us
@ -249,6 +252,7 @@ m4_ifdef([project_oshmem],
OPAL_CONFIGURE_OPTIONS
OPAL_CHECK_OS_FLAVORS
OPAL_CHECK_CUDA
OPAL_CHECK_PMIX
m4_ifdef([project_orte], [ORTE_CONFIGURE_OPTIONS])
m4_ifdef([project_ompi], [OMPI_CONFIGURE_OPTIONS])
m4_ifdef([project_oshmem], [OSHMEM_CONFIGURE_OPTIONS])

167
contrib/build-server/hwloc-nightly-coverity.pl Исполняемый файл
Просмотреть файл

@ -0,0 +1,167 @@
#!/usr/bin/env perl
use warnings;
use strict;
use Getopt::Long;
use File::Temp qw/ tempfile tempdir /;
use File::Basename;
my $coverity_project = "hwloc";
# Coverity changes this URL periodically
my $coverity_tool_url = "https://scan.coverity.com/download/cxx/linux64";
my $filename_arg;
my $coverity_token_arg;
my $dry_run_arg = 0;
my $verbose_arg = 0;
my $debug_arg = 0;
my $logfile_dir_arg;
my $configure_args = "";
my $make_args = "-j 32";
my $help_arg = 0;
&Getopt::Long::Configure("bundling");
my $ok = Getopt::Long::GetOptions("filename=s" => \$filename_arg,
"coverity-token=s" => \$coverity_token_arg,
"logfile-dir=s" => \$logfile_dir_arg,
"configure-args=s" => \$configure_args,
"make-args=s" => \$make_args,
"dry-run!" => \$dry_run_arg,
"verbose!" => \$verbose_arg,
"debug!" => \$debug_arg,
"help|h" => \$help_arg);
$ok = 0
if (!defined($filename_arg));
$ok = 0
if (!defined($coverity_token_arg));
if (!$ok || $help_arg) {
print "Usage: $0 --filename=FILENAME --coverity-token=TOKEN [--dry-run] [--verbose] [--help]\n";
exit($ok);
}
die "Cannot read $filename_arg"
if (! -r $filename_arg);
$verbose_arg = 1
if ($debug_arg);
######################################################################
sub verbose {
print @_
if ($verbose_arg);
}
# run a command and save the stdout / stderr
sub safe_system {
my $allowed_to_fail = shift;
my $cmd = shift;
my $stdout_file = shift;
# Redirect stdout if requested or not verbose
if (defined($stdout_file)) {
$stdout_file = "$logfile_dir_arg/$stdout_file";
unlink($stdout_file);
$cmd .= " >$stdout_file";
} elsif (!$debug_arg) {
$cmd .= " >/dev/null";
}
$cmd .= " 2>&1";
my $rc = system($cmd);
if (0 != $rc && !$allowed_to_fail) {
# If we die/fail, ensure to change out of the temp tree so
# that it can be removed upon exit.
chdir("/");
print "Command $cmd failed: exit status $rc\n";
if (defined($stdout_file) && -f $stdout_file) {
print "Last command output:\n";
system("cat $stdout_file");
}
die "Cannot continue";
}
system("cat $stdout_file")
if ($debug_arg && defined($stdout_file) && -f $stdout_file);
}
######################################################################
# Make an area to work
my $dir = tempdir(CLEANUP => 0);
chdir($dir);
verbose "*** Working in $dir\n";
######################################################################
# Get the coverity tool, put it in our path
my $cdir = "$ENV{HOME}/coverity";
safe_system(0, "mkdir $cdir")
if (! -d $cdir);
# Optimization: the tool is pretty large. If our local copy is less
# than a day old, just use that without re-downloading.
my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
$atime,$mtime,$ctime,$blksize,$blocks) =
stat("$cdir/coverity_tool.tgz");
my $now = time();
if (!defined($mtime) || $mtime < $now - 24*60*60) {
verbose "*** Downloading new copy of the coverity tool\n";
safe_system(0, "wget $coverity_tool_url --post-data \"token=$coverity_token_arg&project=$coverity_project\" -O coverity_tool.tgz");
safe_system(0, "cp coverity_tool.tgz $cdir");
}
verbose "*** Expanding coverity tool tarball\n";
safe_system(0, "tar xf $cdir/coverity_tool.tgz");
opendir(my $dh, ".") ||
die "Can't opendir .";
my @files = grep { /^cov/ && -d "./$_" } readdir($dh);
closedir($dh);
my $cov_dir = "$dir/$files[0]/bin";
$ENV{PATH} = "$cov_dir:$ENV{PATH}";
######################################################################
# Expand the HWLOC tarball, build it
verbose "*** Extracting HWLOC tarball\n";
safe_system(0, "tar xf $filename_arg");
my $tarball_filename = basename($filename_arg);
$tarball_filename =~ m/^hwloc-(.+)\.tar.+$/;
my $hwloc_ver = $1;
chdir("hwloc-$hwloc_ver");
verbose "*** Configuring HWLOC tarball\n";
safe_system(0, "./configure $configure_args", "configure");
verbose "*** Building HWLOC tarball\n";
safe_system(0, "cov-build --dir cov-int make $make_args", "cov-build");
# Tar up the Coverity results
verbose "*** Tarring up results\n";
safe_system(0, "tar jcf $hwloc_ver-analyzed.tar.bz2 cov-int");
# If not dry-run, submit to Coverity
if ($dry_run_arg) {
verbose "*** Would have submitted, but this is a dry run\n";
} else {
verbose "*** Submitting results\n";
safe_system(0, "curl --form token=$coverity_token_arg " .
"--form email=brice.goglin\@labri.fr " .
"--form file=\@$hwloc_ver-analyzed.tar.bz2 " .
"--form version=$hwloc_ver " .
"--form description=nightly-master " .
"https://scan.coverity.com/builds?project=hwloc",
"coverity-submit");
}
verbose("*** All done\n");
# Chdir out of the tempdir so that it can be removed
chdir("/");
exit(0);

Просмотреть файл

@ -7,30 +7,39 @@
#####
# e-mail address to send results to
results_addr=hwloc-devel@open-mpi.org
results_addr=hwloc-devel@lists.open-mpi.org
#results_addr=rhc@open-mpi.org
# svn repository uri
code_uri=http://svn.open-mpi.org/svn/hwloc
# git repository URL
code_uri=https://github.com/open-mpi/hwloc.git
raw_uri=https://raw.github.com/open-mpi/hwloc
# where to put built tarballs
outputroot=/l/osl/www/www.open-mpi.org/software/hwloc/nightly
outputroot=$HOME/hwloc/nightly
# where to find the build script
script_uri=${code_uri}/trunk/contrib/nightly/create_tarball.sh
script_uri=contrib/nightly/create_tarball.sh
script_uri=contrib/nightly/make_snapshot_tarball
# helper scripts dir
script_dir=$HOME/ompi/contrib/build-server
# The tarballs to make
if [ $# -eq 0 ] ; then
dirs="/trunk /branches/v1.5 /branches/v1.4 /branches/v1.3 /branches/v1.2 /branches/v1.1 /branches/v1.0"
# Branches v1.6 and earlier were not updated to build nightly
# snapshots from git, so only check v1.7 and later
branches="master v1.11"
else
dirs=$@
branches=$@
fi
# Build root - scratch space
build_root=/home/mpiteam/hwloc/nightly-tarball-build-root
build_root=$HOME/hwloc/nightly-tarball-build-root
export PATH=$HOME/local/bin:$PATH
export LD_LIBRARY_PATH=$HOME/local/lib:$LD_LIBRARY_PATH
# Coverity stuff
coverity_token=`cat $HOME/coverity/hwloc-token.txt`
export PATH=$HOME_PREFIX/bin:$PATH
export LD_LIBRARY_PATH=$HOME_PREFIX/lib:$LD_LIBRARY_PATH
#####
#
@ -39,23 +48,29 @@ export LD_LIBRARY_PATH=$HOME/local/lib:$LD_LIBRARY_PATH
#####
# load the modules configuration
. /etc/profile.d/modules.sh
module use ~/modules
. $MODULE_INIT
module use $AUTOTOOL_MODULE
# get our nightly build script
mkdir -p $build_root
cd $build_root
# Loop making them
for dir in $dirs; do
# Remove leading /
safe_dirname=`echo $dir | sed -e 's/^\///g'`
# Convert remaining /'s to -'s
safe_dirname=`echo $safe_dirname | sed -e 's/\//-/g'`
# Now form a URL-specific script name
script=$safe_dirname-`basename $script_uri`
pending_coverity=$build_root/tarballs-to-run-through-coverity.txt
rm -f $pending_coverity
touch $pending_coverity
wget --quiet --no-check-certificate --tries=10 $code_uri/$dir/$script_uri -O $script
# Loop making them
module unload autotools
for branch in $branches; do
echo "=== Branch: $branch"
# Get the last tarball version that was made
prev_snapshot=`cat $outputroot/$branch/latest_snapshot.txt`
# Form a URL-specific script name
script=$branch-`basename $script_uri`
echo "=== Getting script from: $raw_uri"
wget --quiet --no-check-certificate --tries=10 $raw_uri/$branch/$script_uri -O $script
if test ! $? -eq 0 ; then
echo "wget of hwloc nightly tarball create script failed."
if test -f $script ; then
@ -67,16 +82,60 @@ for dir in $dirs; do
fi
chmod +x $script
ver=`basename $dir`
module load "autotools/hwloc-$branch"
# module load "tex-live/hwloc-$branch"
module load "autotools/hwloc-$ver"
module load "tex-live/hwloc-$ver"
echo "=== Running script..."
./$script \
$build_root/$branch \
$results_addr \
$outputroot/$branch \
$code_uri \
$branch \
>/dev/null 2>&1
./$script \
$build_root/$ver \
$results_addr \
$code_uri/$dir \
$outputroot/$ver >/dev/null 2>&1
module unload autotools
echo "=== Done running script"
# Did the script generate a new tarball? If so, save it so that we can
# spawn the coverity checker on it afterwards. Only for this for the
# master (for now).
latest_snapshot=`cat $outputroot/$branch/latest_snapshot.txt`
echo "=== Latest snapshot: $latest_snapshot"
if test "$prev_snapshot" != "$latest_snapshot"; then
if test "$branch" = "master"; then
echo "=== Saving output for a Coverity run"
echo "$outputroot/$branch/hwloc-$latest_snapshot.tar.bz2" >> $pending_coverity
else
echo "=== NOT saving output for a Coverity run"
fi
echo "=== Posting tarball to open-mpi.org"
# tell the web server to cleanup old nightly tarballs
ssh -p 2222 ompiteam@192.185.39.252 "git/ompi/contrib/build-server/remove-old.pl 7 public_html/software/hwloc/nightly/$branch"
# upload the new ones
scp -P 2222 $outputroot/$branch/hwloc-$latest_snapshot.tar.* ompiteam@192.185.39.252:public_html/software/hwloc/nightly/$branch/
scp -P 2222 $outputroot/$branch/latest_snapshot.txt ompiteam@192.185.39.252:public_html/software/hwloc/nightly/$branch/
# direct the web server to regenerate the checksums
ssh -p 2222 ompiteam@192.185.39.252 "cd public_html/software/hwloc/nightly/$branch && md5sum hwloc* > md5sums.txt"
ssh -p 2222 ompiteam@192.185.39.252 "cd public_html/software/hwloc/nightly/$branch && sha1sum hwloc* > sha1sums.txt"
fi
# Failed builds are not removed. But if a human forgets to come
# in here and clean up the old failed builds, we can accumulate
# many over time. So remove any old failed builds that are over
# 4 weeks old.
${script_dir}/remove-old.pl 7 $build_root/$branch
module unload autotools tex-live
done
# If we had any new snapshots to send to coverity, process them now
for tarball in `cat $pending_coverity`; do
${script_dir}/hwloc-nightly-coverity.pl \
--filename=$tarball \
--coverity-token=$coverity_token \
--verbose \
--logfile-dir=$HOME/coverity \
--make-args="-j8"
done
rm -f $pending_coverity

Просмотреть файл

@ -8,6 +8,8 @@ use File::Temp qw/ tempfile tempdir /;
use File::Basename;
my $coverity_project = "Open+MPI";
# Coverity changes this URL periodically
my $coverity_tool_url = "https://scan.coverity.com/download/cxx/linux64";
my $filename_arg;
my $coverity_token_arg;
@ -103,7 +105,7 @@ my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
my $now = time();
if (!defined($mtime) || $mtime < $now - 24*60*60) {
verbose "*** Downloading new copy of the coverity tool\n";
safe_system(0, "wget https://scan.coverity.com/download/linux-64 --post-data \"token=$coverity_token_arg\&project=$coverity_project\" -O coverity_tool.tgz");
safe_system(0, "wget $coverity_tool_url --post-data \"token=$coverity_token_arg&project=$coverity_project\" -O coverity_tool.tgz");
safe_system(0, "cp coverity_tool.tgz $cdir");
}

Просмотреть файл

@ -7,7 +7,8 @@
#####
# e-mail address to send results to
results_addr=testing@open-mpi.org
results_addr=testing@lists.open-mpi.org
#results_addr=rhc@open-mpi.org
# svn repository uri
master_code_uri=https://github.com/open-mpi/ompi.git
@ -15,32 +16,33 @@ master_raw_uri=https://raw.github.com/open-mpi/ompi
release_code_uri=https://github.com/open-mpi/ompi-release.git
release_raw_uri=https://raw.github.com/open-mpi/ompi-release
# where to put built tarballs
outputroot=/l/osl/www/www.open-mpi.org/nightly
# where to put built tarballs - needs to be
# adjusted to match your site!
outputroot=$HOME/openmpi/nightly
# where to find the build script
script_uri=contrib/nightly/create_tarball.sh
# helper scripts dir
script_dir=/u/mpiteam/scripts
script_dir=$HOME/ompi/contrib/build-server
# The tarballs to make
if [ $# -eq 0 ] ; then
# We're no longer ever checking the 1.0 - 1.6 branches anymore
branches="master v1.8 v1.10 v2.x"
# We're no longer ever checking the 1.0 - 1.8 branches anymore
branches="master v1.10 v2.x v2.0.x"
else
branches=$@
fi
# Build root - scratch space
build_root=/home/mpiteam/openmpi/nightly-tarball-build-root
build_root=$HOME/openmpi/nightly-tarball-build-root
# Coverity stuff
coverity_token=`cat $HOME/coverity/openmpi-token.txt`
coverity_configure_args="--enable-debug --enable-mpi-fortran --enable-mpi-java --enable-oshmem --enable-oshmem-fortran --enable-oshmem-java --with-mxm=/opt/mellanox/mxm --with-psm --with-usnic --with-libfabric=/u/mpiteam/libfabric-current/install"
coverity_configure_args="--enable-debug --enable-mpi-fortran --enable-mpi-java --enable-oshmem --enable-oshmem-fortran --with-psm --with-usnic --with-libfabric"
export PATH=$HOME/local/bin:$PATH
export LD_LIBRARY_PATH=$HOME/local/lib:$LD_LIBRARY_PATH
export PATH=$HOME_PREFIX/bin:$PATH
export LD_LIBRARY_PATH=$HOME_PREFIX/lib:$LD_LIBRARY_PATH
#####
#
@ -49,8 +51,8 @@ export LD_LIBRARY_PATH=$HOME/local/lib:$LD_LIBRARY_PATH
#####
# load the modules configuration
. /etc/profile.d/modules.sh
module use ~/modules
. $MODULE_INIT
module use $AUTOTOOL_MODULE
# get our nightly build script
mkdir -p $build_root
@ -111,26 +113,38 @@ for branch in $branches; do
# master (for now).
latest_snapshot=`cat $outputroot/$branch/latest_snapshot.txt`
echo "=== Latest snapshot: $latest_snapshot"
if test "$prev_snapshot" != "$latest_snapshot" && \
test "$branch" = "master"; then
echo "=== Saving output for a Coverity run"
echo "$outputroot/$branch/openmpi-$latest_snapshot.tar.bz2" >> $pending_coverity
else
echo "=== NOT saving output for a Coverity run"
if test "$prev_snapshot" != "$latest_snapshot"; then
if test "$branch" = "master"; then
echo "=== Saving output for a Coverity run"
echo "$outputroot/$branch/openmpi-$latest_snapshot.tar.bz2" >> $pending_coverity
else
echo "=== NOT saving output for a Coverity run"
fi
echo "=== Posting tarball to open-mpi.org"
# tell the web server to cleanup old nightly tarballs
ssh -p 2222 ompiteam@192.185.39.252 "git/ompi/contrib/build-server/remove-old.pl 7 public_html/nightly/$branch"
# upload the new ones
scp -P 2222 $outputroot/$branch/openmpi-$latest_snapshot.tar.* ompiteam@192.185.39.252:public_html/nightly/$branch/
scp -P 2222 $outputroot/$branch/latest_snapshot.txt ompiteam@192.185.39.252:public_html/nightly/$branch/
# direct the web server to regenerate the checksums
ssh -p 2222 ompiteam@192.185.39.252 "cd public_html/nightly/$branch && md5sum openmpi* > md5sums.txt"
ssh -p 2222 ompiteam@192.185.39.252 "cd public_html/nightly/$branch && sha1sum openmpi* > sha1sums.txt"
fi
# Failed builds are not removed. But if a human forgets to come
# in here and clean up the old failed builds, we can accumulate
# many over time. So remove any old failed bbuilds that are over
# many over time. So remove any old failed builds that are over
# 4 weeks old.
${script_dir}/remove-old.pl 7 $build_root/$branch
done
# If we had any new snapshots to send to coverity, process them now
for tarball in `cat $pending_coverity`; do
echo "=== Submitting $tarball to Coverity..."
$HOME/scripts/openmpi-nightly-coverity.pl \
${script_dir}/openmpi-nightly-coverity.pl \
--filename=$tarball \
--coverity-token=$coverity_token \
--verbose \

162
contrib/build-server/pmix-nightly-coverity.pl Исполняемый файл
Просмотреть файл

@ -0,0 +1,162 @@
#!/usr/bin/env perl
use warnings;
use strict;
use Getopt::Long;
use File::Temp qw/ tempfile tempdir /;
use File::Basename;
my $coverity_project = "open-mpi%2Fpmix";
# Coverity changes this URL periodically
my $coverity_tool_url = "https://scan.coverity.com/download/cxx/linux64";
my $filename_arg;
my $coverity_token_arg;
my $dry_run_arg = 0;
my $verbose_arg = 0;
my $debug_arg = 0;
my $logfile_dir_arg = "/tmp";
my $configure_args = "";
my $make_args = "-j 32";
my $help_arg = 0;
&Getopt::Long::Configure("bundling");
my $ok = Getopt::Long::GetOptions("filename=s" => \$filename_arg,
"coverity-token=s" => \$coverity_token_arg,
"logfile-dir=s" => \$logfile_dir_arg,
"configure-args=s" => \$configure_args,
"make-args=s" => \$make_args,
"dry-run!" => \$dry_run_arg,
"verbose!" => \$verbose_arg,
"debug!" => \$debug_arg,
"help|h" => \$help_arg);
$ok = 0
if (!defined($filename_arg));
$ok = 0
if (!defined($coverity_token_arg));
if (!$ok || $help_arg) {
print "Usage: $0 --filename=FILENAME --coverity-token=TOKEN [--dry-run] [--verbose] [--help]\n";
exit($ok);
}
die "Cannot read $filename_arg"
if (! -r $filename_arg);
$verbose_arg = 1
if ($debug_arg);
######################################################################
sub verbose {
print @_
if ($verbose_arg);
}
# run a command and save the stdout / stderr
sub safe_system {
my $allowed_to_fail = shift;
my $cmd = shift;
my $stdout_file = shift;
# Redirect stdout if requested or not verbose
if (defined($stdout_file)) {
$stdout_file = "$logfile_dir_arg/$stdout_file";
unlink($stdout_file);
$cmd .= " >$stdout_file";
} elsif (!$debug_arg) {
$cmd .= " >/dev/null";
}
$cmd .= " 2>&1";
my $rc = system($cmd);
if (0 != $rc && !$allowed_to_fail) {
# If we die/fail, ensure to change out of the temp tree so
# that it can be removed upon exit.
chdir("/");
die "Command $cmd failed: exit status $rc";
}
system("cat $stdout_file")
if ($debug_arg && defined($stdout_file) && -f $stdout_file);
}
######################################################################
# Make an area to work
my $dir = tempdir(CLEANUP => 1);
chdir($dir);
verbose "*** Working in $dir\n";
######################################################################
# Get the coverity tool, put it in our path.
my $cdir = "/home/common/mpiteam/coverity";
safe_system(0, "mkdir $cdir")
if (! -d $cdir);
# Optimization: the tool is pretty large. If our local copy is less
# than a day old, just use that without re-downloading.
my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
$atime,$mtime,$ctime,$blksize,$blocks) =
stat("$cdir/coverity_tool.tgz");
my $now = time();
if (!defined($mtime) || $mtime < $now - 24*60*60) {
verbose "*** Downloading new copy of the coverity tool\n";
safe_system(0, "wget $coverity_tool_url --post-data \"token=$coverity_token_arg&project=$coverity_project\" -O coverity_tool.tgz");
safe_system(0, "cp coverity_tool.tgz $cdir");
}
verbose "*** Expanding coverity tool tarball\n";
safe_system(0, "tar xf $cdir/coverity_tool.tgz");
opendir(my $dh, ".") ||
die "Can't opendir .";
my @files = grep { /^cov/ && -d "./$_" } readdir($dh);
closedir($dh);
my $cov_dir = "$dir/$files[0]/bin";
$ENV{PATH} = "$cov_dir:$ENV{PATH}";
######################################################################
# Expand the PMIX tarball, build it
verbose "*** Extracting PMIX tarball\n";
safe_system(0, "tar xf $filename_arg");
my $tarball_filename = basename($filename_arg);
$tarball_filename =~ m/^pmix-(.+)\.tar.+$/;
my $pmix_ver = $1;
chdir("pmix-$pmix_ver");
verbose "*** Configuring PMIX tarball\n";
safe_system(0, "./configure $configure_args", "configure");
verbose "*** Building PMIX tarball\n";
safe_system(0, "cov-build --dir cov-int make $make_args", "cov-build");
# Tar up the Coverity results
verbose "*** Tarring up results\n";
safe_system(0, "tar jcf $pmix_ver-analyzed.tar.bz2 cov-int");
# If not dry-run, submit to Coverity
if ($dry_run_arg) {
verbose "*** Would have submitted, but this is a dry run\n";
} else {
verbose "*** Submitting results\n";
safe_system(0, "curl --form token=$coverity_token_arg " .
"--form email=rhc\@open-mpi.org " .
"--form file=\@$pmix_ver-analyzed.tar.bz2 " .
"--form version=$pmix_ver " .
"--form description=nightly-master " .
"https://scan.coverity.com/builds?project=$coverity_project",
"coverity-submit");
}
verbose("*** All done\n");
# Chdir out of the tempdir so that it can be removed
chdir("/");
exit(0);

152
contrib/build-server/pmix-nightly-tarball.sh Исполняемый файл
Просмотреть файл

@ -0,0 +1,152 @@
#!/bin/sh
#####
#
# Configuration options
#
#####
# e-mail address to send results to
#results_addr=testing@lists.open-mpi.org
results_addr=rhc@open-mpi.org
# svn repository uri
master_code_uri=https://github.com/pmix/master.git
master_raw_uri=https://raw.github.com/pmix/master
release_code_uri=https://github.com/pmix/releases.git
release_raw_uri=https://raw.github.com/pmix/releases
# where to put built tarballs
outputroot=$HOME/pmix/nightly
# where to find the build script
script_uri=contrib/nightly/create_tarball.sh
# helper scripts dir
script_dir=$HOME/ompi/contrib/build-server
# The tarballs to make
if [ $# -eq 0 ] ; then
branches="master"
else
branches=$@
fi
# Build root - scratch space
build_root=$HOME/pmix/nightly-tarball-build-root
# Coverity stuff
coverity_token=`cat /home/common/mpiteam/coverity/pmix-token.txt`
coverity_configure_args="--with-libevent=/home/common/local"
export PATH=$HOME_PREFIX/bin:$PATH
export LD_LIBRARY_PATH=$HOME_PREFIX/lib:$LD_LIBRARY_PATH
#####
#
# Actually do stuff
#
#####
# load the modules configuration
. $MODULE_INIT
module use $AUTOTOOL_MODULE
# get our nightly build script
mkdir -p $build_root
cd $build_root
pending_coverity=$build_root/tarballs-to-run-through-coverity.txt
rm -f $pending_coverity
touch $pending_coverity
# Loop making the tarballs
module unload autotools
for branch in $branches; do
echo "=== Branch: $branch"
# Get the last tarball version that was made
prev_snapshot=`cat $outputroot/$branch/latest_snapshot.txt`
echo "=== Previous snapshot: $prev_snapshot"
if test "$branch" = "master"; then
code_uri=$master_code_uri
raw_uri=$master_raw_uri
else
code_uri=$release_code_uri
raw_uri=$release_raw_uri
fi
# Form a URL-specific script name
script=$branch-`basename $script_uri`
echo "=== Getting script from: $raw_uri"
wget --quiet --no-check-certificate --tries=10 $raw_uri/$branch/$script_uri -O $script
if test ! $? -eq 0 ; then
echo "wget of PMIX nightly tarball create script failed."
if test -f $script ; then
echo "Using older version of $script for this run."
else
echo "No build script available. Aborting."
exit 1
fi
fi
chmod +x $script
module load "autotools/pmix-$branch"
# module load "libevent/pmix-$branch"
echo "=== Running script..."
./$script \
$build_root/$branch \
$results_addr \
$outputroot/$branch \
$code_uri \
$branch \
>/dev/null 2>&1
module unload autotools
echo "=== Done running script"
# Did the script generate a new tarball? If so, save it so that we can
# spawn the coverity checker on it afterwards. Only for this for the
# master (for now).
latest_snapshot=`cat $outputroot/$branch/latest_snapshot.txt`
echo "=== Latest snapshot: $latest_snapshot"
if test "$prev_snapshot" != "$latest_snapshot"; then
if test "$branch" = "master"; then
echo "=== Saving output for a Coverity run"
echo "$outputroot/$branch/pmix-$latest_snapshot.tar.bz2" >> $pending_coverity
else
echo "=== NOT saving output for a Coverity run"
fi
echo "=== Posting tarball to open-mpi.org"
# tell the web server to cleanup old nightly tarballs
ssh -p 2222 ompiteam@192.185.39.252 "git/ompi/contrib/build-server/remove-old.pl 7 public_html/software/pmix/nightly/$branch"
# upload the new ones
scp -P 2222 $outputroot/$branch/pmix-$latest_snapshot.tar.* ompiteam@192.185.39.252:public_html/software/pmix/nightly/$branch/
scp -P 2222 $outputroot/$branch/latest_snapshot.txt ompiteam@192.185.39.252:public_html/software/pmix/nightly/$branch/
# direct the web server to regenerate the checksums
ssh -p 2222 ompiteam@192.185.39.252 "cd public_html/software/pmix/nightly/$branch && md5sum pmix* > md5sums.txt"
ssh -p 2222 ompiteam@192.185.39.252 "cd public_html/software/pmix/nightly/$branch && sha1sum pmix* > sha1sums.txt"
fi
# Failed builds are not removed. But if a human forgets to come
# in here and clean up the old failed builds, we can accumulate
# many over time. So remove any old failed bbuilds that are over
# 4 weeks old.
${script_dir}/remove-old.pl 28 $build_root/$branch
done
# If we had any new snapshots to send to coverity, process them now
for tarball in `cat $pending_coverity`; do
echo "=== Submitting $tarball to Coverity..."
${script_dir}/pmix-nightly-coverity.pl \
--filename=$tarball \
--coverity-token=$coverity_token \
--verbose \
--logfile-dir=$HOME/coverity \
--make-args=-j8 \
--configure-args="$coverity_configure_args"
done
rm -f $pending_coverity

52
contrib/build-server/pmix-release.sh Исполняемый файл
Просмотреть файл

@ -0,0 +1,52 @@
#!/bin/sh -x
# The tarballs to make
if [ $# -eq 0 ] ; then
branches="v1.0"
else
branches=$1
shift
fi
# Build root - scratch space
build_root=/home/mpiteam/pmix/release
# Script to execute
script=contrib/make_dist_tarball
export PATH=$HOME/local/bin:$PATH
export LD_LIBRARY_PATH=$HOME/local/lib:$LD_LIBRARY_PATH
#####
#
# Actually do stuff
#
#####
# load the modules configuration
. /etc/profile.d/modules.sh
module use ~/modules
# move to the directory
# Loop making them
for branch in $branches; do
cd $build_root/$branch
module load "autotools/pmix-$branch"
module load libevent/pmix-$branch
./$script $@ >dist.out 2>&1
if test "$?" != "0"; then
cat <<EOF
=============================================================================
== Dist failure
== Last few lines of output (full results in dist.out file):
=============================================================================
EOF
tail -n 20 dist.out
exit 1
fi
module unload libevent
module unload autotools
done

11
contrib/cleanperms Исполняемый файл
Просмотреть файл

@ -0,0 +1,11 @@
#!/usr/bin/bash
find . -type f -name "*.c" -perm /u+x -print -exec chmod -x {} \;
find . -type f -name Makefile.am -perm /u+x -print -exec chmod -x {} \;
find . -type f -name "*.h" -perm /u+x -print -exec chmod -x {} \;
find . -type f -name Makefile.include -perm /u+x -print -exec chmod -x {} \;
find . -type f -name Makefile -perm /u+x -print -exec chmod -x {} \;
find . -type f -name "*.m4" -perm /u+x -print -exec chmod -x {} \;
find . -type f -name "*.ac" -perm /u+x -print -exec chmod -x {} \;
find . -type f -name "*.txt" -perm /u+x -print -exec chmod -x {} \;
find . -type f -name "*.l" -perm /u+x -print -exec chmod -x {} \;

259
contrib/dist/make-authors.pl поставляемый
Просмотреть файл

@ -1,9 +1,10 @@
#!/usr/bin/env perl
#
# Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2008-2016 Cisco Systems, Inc. All rights reserved.
#
use strict;
use Data::Dumper;
# Ensure that we're in the root of a writeable Git clone
@ -14,149 +15,213 @@ $in_git_clone = 0
######################################################################
my $header_sep = "-----";
my $unknown_org = "********* NO ORGANIZATION SET ********";
my $people;
######################################################################
# Run git log to get a list of committers
my $committers;
open (GIT, "git log --pretty=format:%ae|") || die "Can't run 'git log'.";
open (GIT, "git log --format=tformat:'%aN <%aE>'|") || die "Can't run 'git log'.";
while (<GIT>) {
chomp;
m/^\s*([\S]+)\s*$/;
m/^\s*(.+)\s+<(.+)>\s*$/;
if (!exists($committers->{$1})) {
$committers->{$1} = { };
print "Found Git commit email: $1\n";
if (!exists($people->{$1})) {
# The person doesn't exist, so save a new entry
$people->{$1} = {
name => $1,
org => $unknown_org,
emails => {
lc($2) => 1,
}
};
print "Found Git committer: $1 <$2>\n";
} else {
# The person already exists, so just add (or overwrite) this
# email address
$people->{$1}->{emails}->{$2} = 1;
}
}
close(GIT);
# Read the existing AUTHORS file to get the header, footer, and Git
# email ID -> (gecos, affiliation) mappings.
######################################################################
# Read the existing AUTHORS file
my $header;
my $footer;
print "Matching Git emails to existing names/affiliations...\n";
sub save {
my $current = shift;
print "Saving person from AUTHORS: $current->{name}\n";
# We may overwrite an entry written from the git log, but that's
# ok
$people->{$current->{name}} = $current;
}
open (AUTHORS, "AUTHORS") || die "Can't open AUTHORS file";
my $in_header = 1;
my $in_footer = 0;
my $current = undef;
while (<AUTHORS>) {
chomp;
my $line = $_;
# Slurp down header lines until we hit a line that begins with an
# Git email
# Slurp down header lines until we hit a line that begins with
# $header_sep
if ($in_header) {
foreach my $git_email (keys(%{$committers})) {
if ($line =~ /$git_email\s+/) {
$in_header = 0;
}
}
if ($in_header) {
$header .= "$_\n";
$header .= "$line\n";
if ($_ =~ /^$header_sep/) {
$in_header = 0;
# There should be a blank line after this, too
$header .= "\n";
}
next;
}
# If we're in the body, parse to get the existing Git emails, gecos,
# and affiliations
if (!$in_header && !$in_footer) {
# Skip blank lines
next
if ($line =~ /^\s*$/);
# Make sure we have a line that begins with an Git email;
# otherwise, fall through to the footer.
my $found = undef;
my $git_email;
foreach $git_email (keys(%{$committers})) {
if ($line =~ /$git_email\s+/) {
$found = $git_email;
last;
}
}
if (!$found) {
$in_footer = 1;
# Format of body:
#
# NAME, Affiliation 1[, Affiliation 2[...]]
# Email address 1
# [Email address 2]
# [...]
# NAME, Affiliation 1[, Affiliation 2[...]]
# Email address 1
# [Email address 2]
# [...]
# Found a new email address for an existing person
if ($line =~ /^ /) {
m/^ (.+)$/;
$current->{emails}->{lc($1)} = 1;
next;
} else {
# Found a new person; save the old entry
save($current)
if (defined($current));
$current = undef;
$current->{org} = $unknown_org;
if ($line =~ m/^(.+?),\s+(.+)$/) {
$current->{name} = $1;
$current->{org} = $2;
} else {
$line =~ m/^$found\s+(.+?)\s{2,}(.+)$/;
my $gecos = $1;
my $aff = $2;
if ($gecos =~ /^\s+$/) {
$gecos = "<UNKNOWN>";
} else {
$committers->{$found}->{gecos} = $gecos;
}
if ($aff =~ /^\s+$/) {
$aff = "<UNKNOWN>";
} else {
$committers->{$found}->{affiliation} = $aff;
}
print "Git email $found matches: $gecos / $aff\n";
$current->{name} = $line;
}
}
# If we're in the footer, just save all the lines
if ($in_footer) {
$footer .= "$_\n";
next;
}
}
save($current)
if (defined($current));
close(AUTHORS);
# Figure out the 3 column widths. The last line of the header
# contains -'s for each of the columns.
######################################################################
$header =~ m/\n([\-\s]+?)$/m;
my $div_line = $1;
my @divs = split(/ /, $div_line);
my $id_col = length($divs[0]);
my $gecos_col = length($divs[1]);
my $aff_col = length($divs[2]);
# Output a new AUTHORS file
# Print out a new AUTHORS file
open (AUTHORS, ">AUTHORS.new") || die "Can't write to AUTHORS file";
print AUTHORS $header;
my $i;
my $have_unknowns = 0;
foreach my $git_email (sort(keys(%${committers}))) {
# Skip the automated accounts
next
if ($git_email eq "no-author\@open-mpi.org" ||
$git_email eq "mpiteam\@open-mpi.org");
print AUTHORS $git_email;
$i = length($git_email);
while ($i <= $id_col) {
print AUTHORS ' ';
++$i;
}
my @people_with_unknown_orgs;
my $email_dups;
# if we have gecos/affiliation, print them. Otherwise, just end
# the line here
if ((exists($committers->{$git_email}->{gecos}) &&
$committers->{$git_email}->{gecos} !~ /^\s+$/) ||
(exists($committers->{$git_email}->{affiliation}) &&
$committers->{$git_email}->{affiliation} !~ /^\s+$/)) {
print AUTHORS $committers->{$git_email}->{gecos};
$i = length($committers->{$git_email}->{gecos});
while ($i <= $gecos_col) {
print AUTHORS ' ';
++$i;
}
my @sorted_people = sort(keys(%{$people}));
foreach my $p (@sorted_people) {
print AUTHORS $p;
if (exists($people->{$p}->{org})) {
print AUTHORS ", $people->{$p}->{org}";
print AUTHORS $committers->{$git_email}->{affiliation}
if (exists($committers->{$git_email}->{affiliation}));
} else {
$have_unknowns = 1;
# Record this so that we can warn about it
push(@people_with_unknown_orgs, $p)
if ($people->{$p}->{org} eq $unknown_org);
}
print AUTHORS "\n";
foreach my $e (sort(keys(%{$people->{$p}->{emails}}))) {
# Sanity check: make sure this email address does not show up
# with any other person/name
my $dup;
foreach my $p2 (@sorted_people) {
next
if ($p eq $p2);
foreach my $e2 (keys(%{$people->{$p2}->{emails}})) {
if ($e eq $e2) {
$dup = $p2;
# Record this so that we can warn about it
if ($p le $p2) {
$email_dups->{$p} = $p2;
} else {
$email_dups->{$p2} = $p;
}
last;
}
}
last
if (defined($dup));
}
print AUTHORS " $e";
print AUTHORS " (**** DUPLICATE EMAIL ADDRESS WITH $dup ***)"
if (defined($dup));
print AUTHORS "\n";
}
}
print AUTHORS $footer;
close(AUTHORS);
# We have a new AUTHORS file! Replace the old one.
unlink("AUTHORS");
rename("AUTHORS.new", "AUTHORS");
print "New AUTHORS file written.\n";
if ($have_unknowns) {
print "*** WARNING: There were Git committers with unknown real names and/or\n*** affiliations. You *MUST* edit the AUTHORS file to fill them in!\n";
} else {
print "All Git emails were matched! No need to hand-edit the AUTHORS file.\n";
######################################################################
# Output any relevant warnings
my $warned = 0;
if ($#people_with_unknown_orgs >= 0) {
$warned = 1;
print "\n*** WARNING: The following people have unspecified organiations:\n";
foreach my $p (@people_with_unknown_orgs) {
print "*** $p\n";
}
}
my @k = sort(keys(%{$email_dups}));
if ($#k >= 0) {
$warned = 1;
print "\n*** WARNING: The following people had the same email address:\n";
foreach my $p (@k) {
print "*** $p, $email_dups->{$p}\n";
}
}
if ($warned) {
print "
*******************************************************************************
*** YOU SHOULD EDIT THE .mailmap AND/OR AUTHORS FILE TO RESOLVE THESE WARNINGS!
*******************************************************************************\n";
}
exit($warned);

Просмотреть файл

@ -811,7 +811,7 @@ int ompi_comm_split_type (ompi_communicator_t *comm, int split_type, int key,
tmp[3] = -key;
rc = comm->c_coll.coll_allreduce (MPI_IN_PLACE, &tmp, 4, MPI_INT, MPI_MAX, comm,
comm->c_coll.coll_allgather_module);
comm->c_coll.coll_allreduce_module);
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
return rc;
}

Просмотреть файл

@ -103,10 +103,6 @@ struct ompi_comm_allreduce_context_t {
ompi_comm_cid_context_t *cid_context;
int *tmpbuf;
/* for intercomm allreduce */
int *rcounts;
int *rdisps;
/* for group allreduce */
int peers_comm[3];
};
@ -121,8 +117,6 @@ static void ompi_comm_allreduce_context_construct (ompi_comm_allreduce_context_t
static void ompi_comm_allreduce_context_destruct (ompi_comm_allreduce_context_t *context)
{
free (context->tmpbuf);
free (context->rcounts);
free (context->rdisps);
}
OBJ_CLASS_INSTANCE (ompi_comm_allreduce_context_t, opal_object_t,
@ -181,6 +175,7 @@ static ompi_comm_cid_context_t *mca_comm_cid_context_alloc (ompi_communicator_t
context->newcomm = newcomm;
context->comm = comm;
context->bridgecomm = bridgecomm;
context->pml_tag = 0;
/* Determine which implementation of allreduce we have to use
* for the current mode. */
@ -245,8 +240,8 @@ static int ompi_comm_allreduce_getnextcid (ompi_comm_request_t *request);
static int ompi_comm_checkcid (ompi_comm_request_t *request);
/* verify that the cid was available globally */
static int ompi_comm_nextcid_check_flag (ompi_comm_request_t *request);
/* lock the cid generator */
static int ompi_comm_cid_lock (ompi_comm_request_t *request);
static volatile int64_t ompi_comm_cid_lowest_id = INT64_MAX;
int ompi_comm_nextcid_nb (ompi_communicator_t *newcomm, ompi_communicator_t *comm,
ompi_communicator_t *bridgecomm, const void *arg0, const void *arg1,
@ -271,7 +266,7 @@ int ompi_comm_nextcid_nb (ompi_communicator_t *newcomm, ompi_communicator_t *com
request->context = &context->super;
ompi_comm_request_schedule_append (request, ompi_comm_cid_lock, NULL, 0);
ompi_comm_request_schedule_append (request, ompi_comm_allreduce_getnextcid, NULL, 0);
ompi_comm_request_start (request);
*req = &request->super;
@ -299,30 +294,33 @@ int ompi_comm_nextcid (ompi_communicator_t *newcomm, ompi_communicator_t *comm,
return rc;
}
static int ompi_comm_cid_lock (ompi_comm_request_t *request)
{
if (!OPAL_THREAD_TRYLOCK(&ompi_cid_lock)) {
return ompi_comm_request_schedule_append (request, ompi_comm_allreduce_getnextcid, NULL, 0);
}
return ompi_comm_request_schedule_append (request, ompi_comm_cid_lock, NULL, 0);
}
static int ompi_comm_allreduce_getnextcid (ompi_comm_request_t *request)
{
ompi_comm_cid_context_t *context = (ompi_comm_cid_context_t *) request->context;
int64_t my_id = ((int64_t) ompi_comm_get_cid (context->comm) << 32 | context->pml_tag);
ompi_request_t *subreq;
bool flag;
int ret;
if (OPAL_THREAD_TRYLOCK(&ompi_cid_lock)) {
return ompi_comm_request_schedule_append (request, ompi_comm_allreduce_getnextcid, NULL, 0);
}
if (ompi_comm_cid_lowest_id < my_id) {
OPAL_THREAD_UNLOCK(&ompi_cid_lock);
return ompi_comm_request_schedule_append (request, ompi_comm_allreduce_getnextcid, NULL, 0);
}
ompi_comm_cid_lowest_id = my_id;
/**
* This is the real algorithm described in the doc
*/
flag = false;
context->nextlocal_cid = mca_pml.pml_max_contextid;
for (unsigned int i = context->start ; i < mca_pml.pml_max_contextid ; ++i) {
flag = opal_pointer_array_test_and_set_item(&ompi_mpi_communicators,
i, context->comm);
flag = opal_pointer_array_test_and_set_item (&ompi_mpi_communicators, i,
context->comm);
if (true == flag) {
context->nextlocal_cid = i;
break;
@ -332,6 +330,7 @@ static int ompi_comm_allreduce_getnextcid (ompi_comm_request_t *request)
ret = context->allreduce_fn (&context->nextlocal_cid, &context->nextcid, 1, MPI_MAX,
context, &subreq);
if (OMPI_SUCCESS != ret) {
ompi_comm_cid_lowest_id = INT64_MAX;
OPAL_THREAD_UNLOCK(&ompi_cid_lock);
return ret;
}
@ -341,10 +340,12 @@ static int ompi_comm_allreduce_getnextcid (ompi_comm_request_t *request)
if (flag) {
opal_pointer_array_test_and_set_item(&ompi_mpi_communicators, context->nextlocal_cid, NULL);
}
ompi_comm_cid_lowest_id = INT64_MAX;
OPAL_THREAD_UNLOCK(&ompi_cid_lock);
return OMPI_ERR_OUT_OF_RESOURCE;
}
OPAL_THREAD_UNLOCK(&ompi_cid_lock);
/* next we want to verify that the resulting commid is ok */
return ompi_comm_request_schedule_append (request, ompi_comm_checkcid, &subreq, 1);
@ -356,6 +357,10 @@ static int ompi_comm_checkcid (ompi_comm_request_t *request)
ompi_request_t *subreq;
int ret;
if (OPAL_THREAD_TRYLOCK(&ompi_cid_lock)) {
return ompi_comm_request_schedule_append (request, ompi_comm_checkcid, NULL, 0);
}
context->flag = (context->nextcid == context->nextlocal_cid);
if (!context->flag) {
@ -367,11 +372,13 @@ static int ompi_comm_checkcid (ompi_comm_request_t *request)
++context->iter;
ret = context->allreduce_fn (&context->flag, &context->rflag, 1, MPI_MAX, context, &subreq);
ret = context->allreduce_fn (&context->flag, &context->rflag, 1, MPI_MIN, context, &subreq);
if (OMPI_SUCCESS == ret) {
ompi_comm_request_schedule_append (request, ompi_comm_nextcid_check_flag, &subreq, 1);
}
OPAL_THREAD_UNLOCK(&ompi_cid_lock);
return ret;
}
@ -379,12 +386,17 @@ static int ompi_comm_nextcid_check_flag (ompi_comm_request_t *request)
{
ompi_comm_cid_context_t *context = (ompi_comm_cid_context_t *) request->context;
if (OPAL_THREAD_TRYLOCK(&ompi_cid_lock)) {
return ompi_comm_request_schedule_append (request, ompi_comm_nextcid_check_flag, NULL, 0);
}
if (1 == context->rflag) {
/* set the according values to the newcomm */
context->newcomm->c_contextid = context->nextcid;
opal_pointer_array_set_item (&ompi_mpi_communicators, context->nextcid, context->newcomm);
/* unlock the cid generator */
ompi_comm_cid_lowest_id = INT64_MAX;
OPAL_THREAD_UNLOCK(&ompi_cid_lock);
/* done! */
@ -399,6 +411,8 @@ static int ompi_comm_nextcid_check_flag (ompi_comm_request_t *request)
++context->iter;
OPAL_THREAD_UNLOCK(&ompi_cid_lock);
/* try again */
return ompi_comm_allreduce_getnextcid (request);
}
@ -464,7 +478,7 @@ int ompi_comm_activate_nb (ompi_communicator_t **newcomm, ompi_communicator_t *c
/* Step 1: the barrier, after which it is allowed to
* send messages over the new communicator
*/
ret = context->allreduce_fn (&context->ok, &context->ok, 1, MPI_MAX, context,
ret = context->allreduce_fn (&context->ok, &context->ok, 1, MPI_MIN, context,
&subreq);
if (OMPI_SUCCESS != ret) {
ompi_comm_request_return (request);
@ -582,7 +596,7 @@ static int ompi_comm_allreduce_intra_nb (int *inbuf, int *outbuf, int count, str
/* Non-blocking version of ompi_comm_allreduce_inter */
static int ompi_comm_allreduce_inter_leader_exchange (ompi_comm_request_t *request);
static int ompi_comm_allreduce_inter_leader_reduce (ompi_comm_request_t *request);
static int ompi_comm_allreduce_inter_allgather (ompi_comm_request_t *request);
static int ompi_comm_allreduce_inter_bcast (ompi_comm_request_t *request);
static int ompi_comm_allreduce_inter_nb (int *inbuf, int *outbuf,
int count, struct ompi_op_t *op,
@ -616,18 +630,19 @@ static int ompi_comm_allreduce_inter_nb (int *inbuf, int *outbuf,
rsize = ompi_comm_remote_size (intercomm);
local_rank = ompi_comm_rank (intercomm);
context->tmpbuf = (int *) calloc (count, sizeof(int));
context->rdisps = (int *) calloc (rsize, sizeof(int));
context->rcounts = (int *) calloc (rsize, sizeof(int));
if (OPAL_UNLIKELY (NULL == context->tmpbuf || NULL == context->rdisps || NULL == context->rcounts)) {
ompi_comm_request_return (request);
return OMPI_ERR_OUT_OF_RESOURCE;
if (0 == local_rank) {
context->tmpbuf = (int *) calloc (count, sizeof(int));
if (OPAL_UNLIKELY (NULL == context->tmpbuf)) {
ompi_comm_request_return (request);
return OMPI_ERR_OUT_OF_RESOURCE;
}
}
/* Execute the inter-allreduce: the result from the local will be in the buffer of the remote group
* and vise-versa. */
rc = intercomm->c_coll.coll_iallreduce (inbuf, context->tmpbuf, count, MPI_INT, op, intercomm,
&subreq, intercomm->c_coll.coll_iallreduce_module);
rc = intercomm->c_local_comm->c_coll.coll_ireduce (inbuf, context->tmpbuf, count, MPI_INT, op, 0,
intercomm->c_local_comm, &subreq,
intercomm->c_local_comm->c_coll.coll_ireduce_module);
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
ompi_comm_request_return (request);
return rc;
@ -636,7 +651,7 @@ static int ompi_comm_allreduce_inter_nb (int *inbuf, int *outbuf,
if (0 == local_rank) {
ompi_comm_request_schedule_append (request, ompi_comm_allreduce_inter_leader_exchange, &subreq, 1);
} else {
ompi_comm_request_schedule_append (request, ompi_comm_allreduce_inter_allgather, &subreq, 1);
ompi_comm_request_schedule_append (request, ompi_comm_allreduce_inter_bcast, &subreq, 1);
}
ompi_comm_request_start (request);
@ -676,33 +691,20 @@ static int ompi_comm_allreduce_inter_leader_reduce (ompi_comm_request_t *request
ompi_op_reduce (context->op, context->tmpbuf, context->outbuf, context->count, MPI_INT);
return ompi_comm_allreduce_inter_allgather (request);
return ompi_comm_allreduce_inter_bcast (request);
}
static int ompi_comm_allreduce_inter_allgather (ompi_comm_request_t *request)
static int ompi_comm_allreduce_inter_bcast (ompi_comm_request_t *request)
{
ompi_comm_allreduce_context_t *context = (ompi_comm_allreduce_context_t *) request->context;
ompi_communicator_t *intercomm = context->cid_context->comm;
ompi_communicator_t *comm = context->cid_context->comm->c_local_comm;
ompi_request_t *subreq;
int scount = 0, rc;
/* distribute the overall result to all processes in the other group.
Instead of using bcast, we are using here allgatherv, to avoid the
possible deadlock. Else, we need an algorithm to determine,
which group sends first in the inter-bcast and which receives
the result first.
*/
if (0 != ompi_comm_rank (intercomm)) {
context->rcounts[0] = context->count;
} else {
scount = context->count;
}
rc = intercomm->c_coll.coll_iallgatherv (context->outbuf, scount, MPI_INT, context->outbuf,
context->rcounts, context->rdisps, MPI_INT, intercomm,
&subreq, intercomm->c_coll.coll_iallgatherv_module);
/* both roots have the same result. broadcast to the local group */
rc = comm->c_coll.coll_ibcast (context->outbuf, context->count, MPI_INT, 0, comm,
&subreq, comm->c_coll.coll_ibcast_module);
if (OMPI_SUCCESS != rc) {
return rc;
}
@ -840,8 +842,6 @@ static int ompi_comm_allreduce_pmix_reduce_complete (ompi_comm_request_t *reques
opal_buffer_t sbuf;
int rc;
fprintf (stderr, "reduce complete\n");
OBJ_CONSTRUCT(&sbuf, opal_buffer_t);
if (OPAL_SUCCESS != (rc = opal_dss.pack(&sbuf, context->tmpbuf, (int32_t)context->count, OPAL_INT))) {
@ -871,8 +871,6 @@ static int ompi_comm_allreduce_pmix_reduce_complete (ompi_comm_request_t *reques
cid_context->iter);
}
fprintf (stderr, "%s, %s\n", info.key, pdat.value.key);
/* this macro is not actually non-blocking. if a non-blocking version becomes available this function
* needs to be reworked to take advantage of it. */
OPAL_PMIX_EXCHANGE(rc, &info, &pdat, 600); // give them 10 minutes

Просмотреть файл

@ -11,6 +11,7 @@
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2016 IBM Corporation. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -42,4 +43,4 @@ libompitrace_la_SOURCES = \
send.c \
sendrecv.c
libompitrace_la_LDFLAGS = -version-info 0:0:0
libompitrace_la_LDFLAGS = -version-info $(libompitrace_so_version)

Просмотреть файл

@ -109,7 +109,7 @@ ompi_predefined_datatype_t ompi_mpi_cxx_bool = OMPI_DATATYPE_INIT_PREDEFIN
/*
* Complex datatypes for C (base types), C++, and fortran
*/
ompi_predefined_datatype_t ompi_mpi_c_float_complex = OMPI_DATATYPE_INIT_PREDEFINED (C_FLOAT_COMPLEX, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_COMPLEX );
ompi_predefined_datatype_t ompi_mpi_c_float_complex = OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE (C_FLOAT_COMPLEX, C_COMPLEX, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_COMPLEX );
ompi_predefined_datatype_t ompi_mpi_c_complex = OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE (C_FLOAT_COMPLEX, C_COMPLEX, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_COMPLEX );
ompi_predefined_datatype_t ompi_mpi_c_double_complex = OMPI_DATATYPE_INIT_PREDEFINED (C_DOUBLE_COMPLEX, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_COMPLEX );
#if HAVE_LONG_DOUBLE
@ -255,7 +255,7 @@ ompi_predefined_datatype_t ompi_mpi_integer8 = OMPI_DATATYPE_INIT_UNAVAILA
#if OMPI_HAVE_FORTRAN_INTEGER16
ompi_predefined_datatype_t ompi_mpi_integer16 = OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE_FORTRAN (INT, INTEGER16, OMPI_SIZEOF_FORTRAN_INTEGER16, OMPI_ALIGNMENT_FORTRAN_INTEGER16, OMPI_DATATYPE_FLAG_DATA_INT);
#else
ompi_predefined_datatype_t ompi_mpi_integer16 = OMPI_DATATYPE_INIT_UNAVAILABLE (INTEGER8, OMPI_DATATYPE_FLAG_DATA_FORTRAN | OMPI_DATATYPE_FLAG_DATA_INT);
ompi_predefined_datatype_t ompi_mpi_integer16 = OMPI_DATATYPE_INIT_UNAVAILABLE (INTEGER16, OMPI_DATATYPE_FLAG_DATA_FORTRAN | OMPI_DATATYPE_FLAG_DATA_INT);
#endif
/*
@ -533,7 +533,7 @@ int32_t ompi_datatype_init( void )
}
/*
* This MUST match the order of ompi/include/mpif-common.h
* This MUST match the order of ompi/include/mpif-values.pl
* Any change will break binary compatibility of Fortran programs.
*/
MOOG(datatype_null, 0);
@ -614,7 +614,7 @@ int32_t ompi_datatype_init( void )
MOOG(uint64_t, 65);
MOOG(aint, 66);
MOOG(offset, 67);
MOOG(c_complex, 68);
MOOG(c_bool, 68);
MOOG(c_float_complex, 69);
MOOG(c_double_complex, 70);
MOOG(c_long_double_complex, 71);

Просмотреть файл

@ -14,6 +14,7 @@
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016 University of Houston. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -31,6 +32,10 @@
#include "ompi/mca/io/base/base.h"
#include "ompi/info/info.h"
opal_mutex_t ompi_mpi_file_bootstrap_mutex = OPAL_MUTEX_STATIC_INIT;
/*
* Table for Fortran <-> C file handle conversion
*/
@ -102,6 +107,7 @@ int ompi_file_open(struct ompi_communicator_t *comm, const char *filename,
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* Save the params */
file->f_comm = comm;
@ -127,6 +133,9 @@ int ompi_file_open(struct ompi_communicator_t *comm, const char *filename,
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* Create the mutex */
OBJ_CONSTRUCT(&file->f_mutex, opal_mutex_t);
/* Select a module and actually open the file */
if (OMPI_SUCCESS != (ret = mca_io_base_file_select(file, NULL))) {
@ -146,6 +155,9 @@ int ompi_file_open(struct ompi_communicator_t *comm, const char *filename,
*/
int ompi_file_close(ompi_file_t **file)
{
OBJ_DESTRUCT(&(*file)->f_mutex);
(*file)->f_flags |= OMPI_FILE_ISCLOSED;
OBJ_RELEASE(*file);
*file = &ompi_mpi_file_null.file;

Просмотреть файл

@ -14,6 +14,7 @@
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016 University of Houston. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -78,6 +79,10 @@ struct ompi_file_t {
indicates what member to look at in the union, below) */
mca_io_base_version_t f_io_version;
/** Mutex to be used to protect access to the selected component
on a per file-handle basis */
opal_mutex_t f_mutex;
/** The selected component (note that this is a union) -- we need
this to add and remove the component from the list of
components currently in use by the io framework for

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007-2016 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2009-2012 Oak Rigde National Laboratory. All rights reserved.
* Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
@ -434,8 +434,8 @@ typedef int (MPI_Grequest_cancel_function)(void *, int);
/*
* More constants
*/
#define MPI_UNWEIGHTED ((void *) 2) /* unweighted graph */
#define MPI_WEIGHTS_EMPTY ((void *) 3) /* empty weights */
#define MPI_UNWEIGHTED ((int *) 2) /* unweighted graph */
#define MPI_WEIGHTS_EMPTY ((int *) 3) /* empty weights */
#define MPI_BOTTOM ((void *) 0) /* base reference address */
#define MPI_IN_PLACE ((void *) 1) /* in place buffer */
#define MPI_BSEND_OVERHEAD 128 /* size of bsend header + ptr */
@ -1006,7 +1006,6 @@ OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_aint;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_offset;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_count;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_c_bool;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_c_complex;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_c_float_complex;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_c_double_complex;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_c_long_double_complex;
@ -1153,7 +1152,7 @@ OMPI_DECLSPEC extern MPI_Fint *MPI_F_STATUSES_IGNORE;
#define MPI_OFFSET OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_offset)
#define MPI_C_BOOL OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_c_bool)
#if HAVE_FLOAT__COMPLEX
#define MPI_C_COMPLEX OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_c_complex)
#define MPI_C_COMPLEX OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_c_float_complex)
#define MPI_C_FLOAT_COMPLEX OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_c_float_complex)
#endif
#if HAVE_DOUBLE__COMPLEX
@ -1163,6 +1162,7 @@ OMPI_DECLSPEC extern MPI_Fint *MPI_F_STATUSES_IGNORE;
#define MPI_C_LONG_DOUBLE_COMPLEX OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_c_long_double_complex)
#endif
#define MPI_CXX_BOOL OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_cxx_bool)
#define MPI_CXX_COMPLEX OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_cxx_cplex)
#define MPI_CXX_FLOAT_COMPLEX OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_cxx_cplex)
#define MPI_CXX_DOUBLE_COMPLEX OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_cxx_dblcplex)
#define MPI_CXX_LONG_DOUBLE_COMPLEX OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_cxx_ldblcplex)

Просмотреть файл

@ -3,6 +3,7 @@
# Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2016 Research Organization for Information Science
# and Technology (RIST). All rights reserved.
# Copyright (c) 2016 FUJITSU LIMITED. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -105,6 +106,7 @@ $handles->{MPI_BXOR} = 10;
$handles->{MPI_MAXLOC} = 11;
$handles->{MPI_MINLOC} = 12;
$handles->{MPI_REPLACE} = 13;
$handles->{MPI_NO_OP} = 14;
$handles->{MPI_COMM_NULL} = 2;
$handles->{MPI_DATATYPE_NULL} = 0;
@ -160,20 +162,22 @@ $handles->{MPI_UNSIGNED} = 40;
$handles->{MPI_LONG} = 41;
$handles->{MPI_UNSIGNED_LONG} = 42;
$handles->{MPI_LONG_LONG_INT} = 43;
$handles->{MPI_LONG_LONG} = $handles->{MPI_LONG_LONG_INT};
$handles->{MPI_UNSIGNED_LONG_LONG} = 44;
$handles->{MPI_FLOAT} = 45;
$handles->{MPI_DOUBLE} = 46;
$handles->{MPI_LONG_DOUBLE} = 47;
$handles->{MPI_FLOAT_INT} = 48;
$handles->{MPI_DOUBLE_INT} = 49;
$handles->{MPI_LONGDBL_INT} = 50;
$handles->{MPI_LONG_DOUBLE_INT} = 50;
$handles->{MPI_LONG_INT} = 51;
$handles->{MPI_2INT} = 52;
$handles->{MPI_SHORT_INT} = 53;
$handles->{MPI_CXX_BOOL} = 54;
$handles->{MPI_CXX_CPLEX} = 55;
$handles->{MPI_CXX_DBLCPLEX} = 56;
$handles->{MPI_CXX_LDBLCPLEX} = 57;
$handles->{MPI_CXX_FLOAT_COMPLEX} = 55;
$handles->{MPI_CXX_COMPLEX} = $handles->{MPI_CXX_FLOAT_COMPLEX};
$handles->{MPI_CXX_DOUBLE_COMPLEX} = 56;
$handles->{MPI_CXX_LONG_DOUBLE_COMPLEX} = 57;
$handles->{MPI_INT8_T} = 58;
$handles->{MPI_UINT8_T} = 59;
$handles->{MPI_INT16_T} = 60;
@ -184,8 +188,9 @@ $handles->{MPI_INT64_T} = 64;
$handles->{MPI_UINT64_T} = 65;
$handles->{MPI_AINT} = 66;
$handles->{MPI_OFFSET} = 67;
$handles->{MPI_C_COMPLEX} = 68;
$handles->{MPI_C_FLOAT_COMPLEX} = 69;
$handles->{MPI_C_BOOL} = 68;
$handles->{MPI_C_COMPLEX} = 69;
$handles->{MPI_C_FLOAT_COMPLEX} = $handles->{MPI_C_COMPLEX};
$handles->{MPI_C_DOUBLE_COMPLEX} = 70;
$handles->{MPI_C_LONG_DOUBLE_COMPLEX} = 71;
$handles->{MPI_COUNT} = 72;
@ -235,6 +240,8 @@ $constants->{MPI_WIN_FLAVOR_CREATE} = 1;
$constants->{MPI_WIN_FLAVOR_ALLOCATE} = 2;
$constants->{MPI_WIN_FLAVOR_DYNAMIC} = 3;
$constants->{MPI_WIN_FLAVOR_SHARED} = 4;
$constants->{MPI_WIN_UNIFIED} = 0;
$constants->{MPI_WIN_SEPARATE} = 1;
$constants->{MPI_BSEND_OVERHEAD} = 128;
$constants->{MPI_ORDER_C} = 0;

Просмотреть файл

@ -188,7 +188,7 @@ static int basesmuma_open(void)
* Make sure that the number of banks is a power of 2
*/
cs->basesmuma_num_mem_banks=
roundup_to_power_radix(2,cs->basesmuma_num_mem_banks, &dummy);
ompi_roundup_to_power_radix(2,cs->basesmuma_num_mem_banks, &dummy);
if ( 0 == cs->basesmuma_num_mem_banks ) {
ret=OMPI_ERROR;
goto exit_ERROR;
@ -198,7 +198,7 @@ static int basesmuma_open(void)
* Make sure that the the number of buffers is a power of 2
*/
cs->basesmuma_num_regions_per_bank=
roundup_to_power_radix(2,cs->basesmuma_num_regions_per_bank, &dummy);
ompi_roundup_to_power_radix(2,cs->basesmuma_num_regions_per_bank, &dummy);
if ( 0 == cs->basesmuma_num_regions_per_bank ) {
ret=OMPI_ERROR;
goto exit_ERROR;

Просмотреть файл

@ -409,10 +409,10 @@ int base_bcol_basesmuma_setup_ctl_struct(
cs->basesmuma_num_regions_per_bank;
ctl_mgmt->size_of_group=
sm_bcol_module->super.sbgp_partner_module->group_size;
roundup_to_power_radix(2,cs->basesmuma_num_regions_per_bank,&n_levels);
ompi_roundup_to_power_radix(2,cs->basesmuma_num_regions_per_bank,&n_levels);
ctl_mgmt->log2_num_buffs_per_mem_bank=n_levels;
roundup_to_power_radix(2,n_ctl_structs,&n_levels);
ompi_roundup_to_power_radix(2,n_ctl_structs,&n_levels);
ctl_mgmt->log2_number_of_buffs=n_levels;
ctl_mgmt->mask=n_ctl_structs-1;
sm_bcol_module->super.n_poll_loops=cs->n_poll_loops;

Просмотреть файл

@ -34,17 +34,6 @@
#include "coll_base_topo.h"
#include "coll_base_util.h"
/* valid values for coll_base_allgatherv_forced_algorithm */
mca_base_var_enum_value_t coll_base_allgatherv_algorithms[] = {
{0, "ignore"},
{1, "default"},
{2, "bruck"},
{3, "ring"},
{4, "neighbor"},
{5, "two_proc"},
{0, NULL}
};
/*
* ompi_coll_base_allgatherv_intra_bruck
*

Просмотреть файл

@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All Rights
* Copyright (c) 2013-2016 Los Alamos National Security, LLC. All Rights
* reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
@ -343,7 +343,8 @@ int ompi_coll_base_alltoall_intra_linear_sync(const void *sbuf, int scount,
mca_coll_base_module_t *module,
int max_outstanding_reqs)
{
int line, error, ri, si, rank, size, nreqs, nrreqs, nsreqs, total_reqs;
int line, error, ri, si, rank, size, nrreqs, nsreqs, total_reqs;
int nreqs = 0;
char *psnd, *prcv;
ptrdiff_t slb, sext, rlb, rext;
@ -565,7 +566,8 @@ int ompi_coll_base_alltoall_intra_basic_linear(const void *sbuf, int scount,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
int i, rank, size, err, nreqs, line;
int i, rank, size, err, line;
int nreqs = 0;
char *psnd, *prcv;
MPI_Aint lb, sndinc, rcvinc;
ompi_request_t **req, **sreq, **rreq;

Просмотреть файл

@ -39,54 +39,33 @@ int ompi_coll_base_sendrecv_nonzero_actual( void* sendbuf, size_t scount,
ompi_status_public_t* status )
{ /* post receive first, then send, then waitall... should be fast (I hope) */
int err, line = 0, nreqs = 0;
size_t typesize;
ompi_request_t* reqs[2], **req = reqs;
ompi_status_public_t statuses[2];
int err, line = 0;
size_t rtypesize, stypesize;
ompi_request_t *req;
ompi_status_public_t rstatus;
/* post new irecv */
ompi_datatype_type_size(rdatatype, &typesize);
if (0 != rcount && 0 != typesize) {
ompi_datatype_type_size(rdatatype, &rtypesize);
if (0 != rcount && 0 != rtypesize) {
err = MCA_PML_CALL(irecv( recvbuf, rcount, rdatatype, source, rtag,
comm, req++));
++nreqs;
comm, &req));
if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }
}
/* send data to children */
ompi_datatype_type_size(sdatatype, &typesize);
if (0 != scount && 0 != typesize) {
err = MCA_PML_CALL(isend( sendbuf, scount, sdatatype, dest, stag,
MCA_PML_BASE_SEND_STANDARD, comm, req++));
++nreqs;
ompi_datatype_type_size(sdatatype, &stypesize);
if (0 != scount && 0 != stypesize) {
err = MCA_PML_CALL(send( sendbuf, scount, sdatatype, dest, stag,
MCA_PML_BASE_SEND_STANDARD, comm));
if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }
}
if (0 != nreqs) {
err = ompi_request_wait_all( nreqs, reqs, statuses );
if( MPI_ERR_IN_STATUS == err ) { line = __LINE__;
/* As we use wait_all we will get MPI_ERR_IN_STATUS which is not an error
* code that we can propagate up the stack. Instead, look for the real
* error code from the MPI_ERROR in the status.
*/
int err_index = 0;
if( MPI_SUCCESS == statuses[0].MPI_ERROR
|| MPI_ERR_PENDING == statuses[0].MPI_ERROR ) {
err_index = 1;
}
if (MPI_STATUS_IGNORE != status) {
*status = statuses[err_index];
}
err = statuses[err_index].MPI_ERROR;
OPAL_OUTPUT ((ompi_coll_base_framework.framework_output, "%s:%d: Error %d occurred in the %s"
" stage of ompi_coll_base_sendrecv_zero\n",
__FILE__, line, err, (0 == err_index ? "receive" : "send")));
return err;
}
if (0 != rcount && 0 != rtypesize) {
err = ompi_request_wait( &req, &rstatus);
if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }
if (MPI_STATUS_IGNORE != status) {
*status = statuses[0];
*status = rstatus;
}
} else {
if( MPI_STATUS_IGNORE != status )
@ -96,7 +75,7 @@ int ompi_coll_base_sendrecv_nonzero_actual( void* sendbuf, size_t scount,
return (MPI_SUCCESS);
error_handler:
/* Error discovered during the posting of the irecv or isend,
/* Error discovered during the posting of the irecv or send,
* and no status is available.
*/
OPAL_OUTPUT ((ompi_coll_base_framework.framework_output, "%s:%d: Error %d occurred\n",

Просмотреть файл

@ -44,7 +44,7 @@ mca_coll_basic_alltoallw_intra_inplace(const void *rbuf, const int *rcounts, con
int i, j, size, rank, err = MPI_SUCCESS, max_size;
ompi_request_t **preq, **reqs = NULL;
char *tmp_buffer, *save_buffer = NULL;
ptrdiff_t ext, gap;
ptrdiff_t ext, gap = 0;
/* Initialize. */

Просмотреть файл

@ -365,11 +365,9 @@ mca_coll_basic_reduce_scatter_inter(const void *sbuf, void *rbuf, const int *rco
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
int err, i, rank, root = 0, rsize, lsize;
int totalcounts;
int err, i, rank, root = 0, rsize, lsize, totalcounts;
char *tmpbuf = NULL, *tmpbuf2 = NULL, *lbuf = NULL, *buf;
ptrdiff_t gap, span;
char *tmpbuf = NULL, *tmpbuf2 = NULL;
char *lbuf, *buf;
ompi_request_t *req;
int *disps = NULL;
@ -472,9 +470,9 @@ mca_coll_basic_reduce_scatter_inter(const void *sbuf, void *rbuf, const int *rco
/* Now do a scatterv on the local communicator */
err = comm->c_local_comm->c_coll.coll_scatterv(lbuf, rcounts, disps, dtype,
rbuf, rcounts[rank], dtype, 0,
comm->c_local_comm,
comm->c_local_comm->c_coll.coll_scatterv_module);
rbuf, rcounts[rank], dtype, 0,
comm->c_local_comm,
comm->c_local_comm->c_coll.coll_scatterv_module);
exit:
if (NULL != tmpbuf) {

Просмотреть файл

@ -140,7 +140,8 @@ ompi_dtype_2_hcoll_dtype( ompi_datatype_t *dtype,
int opal_type_id = dtype->super.id;
dte_data_representation_t dte_data_rep = DTE_ZERO;
if (ompi_type_id < OMPI_DATATYPE_MPI_MAX_PREDEFINED) {
if (ompi_type_id < OMPI_DATATYPE_MPI_MAX_PREDEFINED &&
dtype->super.flags & OMPI_DATATYPE_FLAG_PREDEFINED) {
if (opal_type_id > 0 && opal_type_id < OPAL_DATATYPE_MAX_PREDEFINED) {
dte_data_rep = *ompi_datatype_2_dte_data_rep[opal_type_id];
}

Просмотреть файл

@ -486,7 +486,6 @@ static inline int NBC_Type_intrinsic(MPI_Datatype type) {
/* let's give a try to inline functions */
static inline int NBC_Copy(const void *src, int srccount, MPI_Datatype srctype, void *tgt, int tgtcount, MPI_Datatype tgttype, MPI_Comm comm) {
int size, pos, res;
OPAL_PTRDIFF_TYPE ext, lb;
void *packbuf;
#if OPAL_CUDA_SUPPORT
@ -496,13 +495,10 @@ static inline int NBC_Copy(const void *src, int srccount, MPI_Datatype srctype,
#endif /* OPAL_CUDA_SUPPORT */
/* if we have the same types and they are contiguous (intrinsic
* types are contiguous), we can just use a single memcpy */
res = ompi_datatype_get_extent(srctype, &lb, &ext);
if (OMPI_SUCCESS != res) {
NBC_Error ("MPI Error in MPI_Type_extent() (%i)", res);
return res;
}
ptrdiff_t gap, span;
span = opal_datatype_span(&srctype->super, srccount, &gap);
memcpy(tgt, src, srccount*ext);
memcpy(tgt, src, span);
} else {
/* we have to pack and unpack */
res = PMPI_Pack_size(srccount, srctype, comm, &size);

Просмотреть файл

@ -14,11 +14,12 @@
*
*/
#include "opal/include/opal/align.h"
#include "ompi/op/op.h"
#include "nbc_internal.h"
static inline int red_sched_binomial (int rank, int p, int root, const void *sendbuf, void *redbuf, int count, MPI_Datatype datatype,
static inline int red_sched_binomial (int rank, int p, int root, const void *sendbuf, void *redbuf, char tmpredbuf, int count, MPI_Datatype datatype,
MPI_Op op, char inplace, NBC_Schedule *schedule, NBC_Handle *handle);
static inline int red_sched_chain (int rank, int p, int root, const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype,
MPI_Op op, int ext, size_t size, NBC_Schedule *schedule, NBC_Handle *handle, int fragsize);
@ -55,6 +56,7 @@ int ompi_coll_libnbc_ireduce(const void* sendbuf, void* recvbuf, int count, MPI_
MPI_Aint ext;
NBC_Schedule *schedule;
char *redbuf=NULL, inplace;
char tmpredbuf = 0;
enum { NBC_RED_BINOMIAL, NBC_RED_CHAIN } alg;
NBC_Handle *handle;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
@ -104,8 +106,10 @@ int ompi_coll_libnbc_ireduce(const void* sendbuf, void* recvbuf, int count, MPI_
redbuf = recvbuf;
} else {
/* recvbuf may not be valid on non-root nodes */
handle->tmpbuf = malloc (2*span);
redbuf = (char*) handle->tmpbuf + span - gap;
ptrdiff_t span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t);
handle->tmpbuf = malloc (span_align + span);
redbuf = (char*)span_align - gap;
tmpredbuf = 1;
}
} else {
handle->tmpbuf = malloc (span);
@ -142,7 +146,7 @@ int ompi_coll_libnbc_ireduce(const void* sendbuf, void* recvbuf, int count, MPI_
switch(alg) {
case NBC_RED_BINOMIAL:
res = red_sched_binomial(rank, p, root, sendbuf, redbuf, count, datatype, op, inplace, schedule, handle);
res = red_sched_binomial(rank, p, root, sendbuf, redbuf, tmpredbuf, count, datatype, op, inplace, schedule, handle);
break;
case NBC_RED_CHAIN:
res = red_sched_chain(rank, p, root, sendbuf, recvbuf, count, datatype, op, ext, size, schedule, handle, segsize);
@ -289,10 +293,10 @@ int ompi_coll_libnbc_ireduce_inter(const void* sendbuf, void* recvbuf, int count
if (vrank == 0) rank = root; \
if (vrank == root) rank = 0; \
}
static inline int red_sched_binomial (int rank, int p, int root, const void *sendbuf, void *redbuf, int count, MPI_Datatype datatype,
static inline int red_sched_binomial (int rank, int p, int root, const void *sendbuf, void *redbuf, char tmpredbuf, int count, MPI_Datatype datatype,
MPI_Op op, char inplace, NBC_Schedule *schedule, NBC_Handle *handle) {
int vroot, vrank, vpeer, peer, res, maxr;
char *rbuf, *lbuf, *buf;
char *rbuf, *lbuf, *buf, tmpbuf;
int tmprbuf, tmplbuf;
ptrdiff_t gap;
(void)opal_datatype_span(&datatype->super, count, &gap);
@ -305,17 +309,21 @@ static inline int red_sched_binomial (int rank, int p, int root, const void *sen
RANK2VRANK(rank, vrank, vroot);
maxr = (int)ceil((log((double)p)/LOG2));
if (rank != root) {
inplace = 0;
}
/* ensure the result ends up in redbuf on vrank 0 */
if (0 == (maxr%2)) {
rbuf = (void *)(-gap);
tmprbuf = true;
lbuf = redbuf;
tmplbuf = false;
tmplbuf = tmpredbuf;
} else {
lbuf = (void *)(-gap);
tmplbuf = true;
rbuf = redbuf;
tmprbuf = false;
tmprbuf = tmpredbuf;
if (inplace) {
res = NBC_Copy(rbuf, count, datatype, ((char *)handle->tmpbuf)-gap, count, datatype, MPI_COMM_SELF);
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
@ -352,7 +360,7 @@ static inline int red_sched_binomial (int rank, int p, int root, const void *sen
}
/* swap left and right buffers */
buf = rbuf; rbuf = lbuf ; lbuf = buf;
tmprbuf ^= 1; tmplbuf ^= 1;
tmpbuf = tmprbuf; tmprbuf = tmplbuf; tmplbuf = tmpbuf;
}
} else {
/* we have to send this round */
@ -377,9 +385,9 @@ static inline int red_sched_binomial (int rank, int p, int root, const void *sen
/* send to root if vroot ! root */
if (vroot != root) {
if (0 == rank) {
res = NBC_Sched_send (redbuf, false, count, datatype, root, schedule, false);
res = NBC_Sched_send (redbuf, tmpredbuf, count, datatype, root, schedule, false);
} else if (root == rank) {
res = NBC_Sched_recv (redbuf, false, count, datatype, vroot, schedule, false);
res = NBC_Sched_recv (redbuf, tmpredbuf, count, datatype, vroot, schedule, false);
}
}

Просмотреть файл

@ -16,6 +16,8 @@
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
*
*/
#include "opal/include/opal/align.h"
#include "nbc_internal.h"
/* an reduce_csttare schedule can not be cached easily because the contents
@ -40,7 +42,7 @@ int ompi_coll_libnbc_ireduce_scatter(const void* sendbuf, void* recvbuf, const i
struct mca_coll_base_module_2_1_0_t *module) {
int peer, rank, maxr, p, res, count;
MPI_Aint ext;
ptrdiff_t gap, span;
ptrdiff_t gap, span, span_align;
char *sbuf, inplace;
NBC_Schedule *schedule;
NBC_Handle *handle;
@ -84,14 +86,15 @@ int ompi_coll_libnbc_ireduce_scatter(const void* sendbuf, void* recvbuf, const i
maxr = (int) ceil ((log((double) p) / LOG2));
span = opal_datatype_span(&datatype->super, count, &gap);
handle->tmpbuf = malloc (span * 2);
span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t);
handle->tmpbuf = malloc (span_align + span);
if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) {
NBC_Return_handle (handle);
return OMPI_ERR_OUT_OF_RESOURCE;
}
rbuf = (char *)(-gap);
lbuf = (char *)(span - gap);
lbuf = (char *)(span_align - gap);
schedule = OBJ_NEW(NBC_Schedule);
if (OPAL_UNLIKELY(NULL == schedule)) {
@ -205,7 +208,7 @@ int ompi_coll_libnbc_ireduce_scatter_inter (const void* sendbuf, void* recvbuf,
struct mca_coll_base_module_2_1_0_t *module) {
int rank, res, count, lsize, rsize;
MPI_Aint ext;
ptrdiff_t gap, span;
ptrdiff_t gap, span, span_align;
NBC_Schedule *schedule;
NBC_Handle *handle;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
@ -226,6 +229,7 @@ int ompi_coll_libnbc_ireduce_scatter_inter (const void* sendbuf, void* recvbuf,
}
span = opal_datatype_span(&datatype->super, count, &gap);
span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t);
res = NBC_Init_handle(comm, &handle, libnbc_module);
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
@ -233,7 +237,7 @@ int ompi_coll_libnbc_ireduce_scatter_inter (const void* sendbuf, void* recvbuf,
}
if (count > 0) {
handle->tmpbuf = malloc (2 * span);
handle->tmpbuf = malloc (span_align + span);
if (OPAL_UNLIKELY(NULL == handle->tmpbuf)) {
NBC_Return_handle (handle);
return OMPI_ERR_OUT_OF_RESOURCE;
@ -259,7 +263,7 @@ int ompi_coll_libnbc_ireduce_scatter_inter (const void* sendbuf, void* recvbuf,
if (0 == rank) {
char *lbuf, *rbuf;
lbuf = (char *)(-gap);
rbuf = (char *)(span-gap);
rbuf = (char *)(span_align-gap);
res = NBC_Sched_recv (lbuf, true, count, datatype, 0, schedule, true);
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
NBC_Return_handle (handle);

Просмотреть файл

@ -14,6 +14,8 @@
* Author(s): Torsten Hoefler <htor@cs.indiana.edu>
*
*/
#include "opal/include/opal/align.h"
#include "nbc_internal.h"
/* an reduce_csttare schedule can not be cached easily because the contents
@ -75,9 +77,11 @@ int ompi_coll_libnbc_ireduce_scatter_block(const void* sendbuf, void* recvbuf, i
if (0 < count) {
char *rbuf, *lbuf, *buf;
ptrdiff_t span_align;
span = opal_datatype_span(&datatype->super, count, &gap);
handle->tmpbuf = malloc (2*span);
span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t);
handle->tmpbuf = malloc (span_align + span);
if (NULL == handle->tmpbuf) {
OMPI_COLL_LIBNBC_REQUEST_RETURN(handle);
OBJ_RELEASE(schedule);
@ -85,8 +89,8 @@ int ompi_coll_libnbc_ireduce_scatter_block(const void* sendbuf, void* recvbuf, i
}
rbuf = (void *)(-gap);
lbuf = (char *)(span - gap);
redbuf = (char *) handle->tmpbuf + span - gap;
lbuf = (char *)(span_align - gap);
redbuf = (char *) handle->tmpbuf + span_align - gap;
/* copy data to redbuf if we only have a single node */
if ((p == 1) && !inplace) {
@ -206,7 +210,7 @@ int ompi_coll_libnbc_ireduce_scatter_block_inter(const void *sendbuf, void *recv
ompi_request_t **request, struct mca_coll_base_module_2_1_0_t *module) {
int rank, res, count, lsize, rsize;
MPI_Aint ext;
ptrdiff_t gap, span;
ptrdiff_t gap, span, span_align;
NBC_Schedule *schedule;
NBC_Handle *handle;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
@ -229,9 +233,10 @@ int ompi_coll_libnbc_ireduce_scatter_block_inter(const void *sendbuf, void *recv
count = rcount * lsize;
span = opal_datatype_span(&dtype->super, count, &gap);
span_align = OPAL_ALIGN(span, dtype->super.align, ptrdiff_t);
if (count > 0) {
handle->tmpbuf = malloc (2 * span);
handle->tmpbuf = malloc (span_align + span);
if (NULL == handle->tmpbuf) {
NBC_Return_handle (handle);
return OMPI_ERR_OUT_OF_RESOURCE;
@ -257,7 +262,7 @@ int ompi_coll_libnbc_ireduce_scatter_block_inter(const void *sendbuf, void *recv
if (0 == rank) {
char *lbuf, *rbuf;
lbuf = (char *)(-gap);
rbuf = (char *)(span-gap);
rbuf = (char *)(span_align-gap);
res = NBC_Sched_recv (lbuf, true, count, dtype, 0, schedule, true);
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
NBC_Return_handle (handle);

Просмотреть файл

@ -179,12 +179,12 @@ static int mca_coll_ml_verify_params(void)
/* Make sure that the the number of memory banks is a power of 2 */
mca_coll_ml_component.n_payload_mem_banks =
roundup_to_power_radix(2, mca_coll_ml_component.n_payload_mem_banks,
ompi_roundup_to_power_radix(2, mca_coll_ml_component.n_payload_mem_banks,
&dummy);
/* Make sure that the the number of buffers is a power of 2 */
mca_coll_ml_component.n_payload_buffs_per_bank =
roundup_to_power_radix(2, mca_coll_ml_component.n_payload_buffs_per_bank,
ompi_roundup_to_power_radix(2, mca_coll_ml_component.n_payload_buffs_per_bank,
&dummy);
return OMPI_SUCCESS;

Просмотреть файл

@ -65,6 +65,7 @@ struct mca_coll_portals4_component_t {
opal_free_list_t requests; /* request free list for the i collectives */
ptl_ni_limits_t ni_limits;
ptl_size_t portals_max_msg_size;
int use_binomial_gather_algorithm;
@ -314,7 +315,7 @@ is_reduce_optimizable(struct ompi_datatype_t *dtype, size_t length, struct ompi_
}
*ptl_dtype = ompi_coll_portals4_atomic_datatype[dtype->id];
if (*ptl_dtype == COLL_PORTALS4_NO_DTYPE){
if (*ptl_dtype == COLL_PORTALS4_NO_DTYPE) {
opal_output_verbose(50, ompi_coll_base_framework.framework_output,
"datatype %d not supported\n",
dtype->id);

Просмотреть файл

@ -265,7 +265,7 @@ allreduce_kary_tree_top(const void *sendbuf, void *recvbuf, int count,
ompi_coll_portals4_get_peer(comm, child[i]),
mca_coll_portals4_component.pt_idx,
match_bits_rtr, 0, NULL, 0)) != PTL_OK)
return opal_stderr("Put RTR failed", __FILE__, __LINE__, ret);
return opal_stderr("Put RTR failed %d", __FILE__, __LINE__, ret);
}
}
}
@ -408,7 +408,7 @@ int ompi_coll_portals4_iallreduce_intra(const void* sendbuf, void* recvbuf, int
allreduce_kary_tree_top(sendbuf, recvbuf, count,
dtype, op, comm, request, portals4_module);
puts("iallreduce");
opal_output_verbose(10, ompi_coll_base_framework.framework_output, "iallreduce");
return (OMPI_SUCCESS);
}

Просмотреть файл

@ -147,9 +147,31 @@ barrier_hypercube_top(struct ompi_communicator_t *comm,
}
if (is_sync) {
/* Send a put to self when we've received all our messages... */
ret = PtlCTWait(request->u.barrier.rtr_ct_h, num_msgs, &event);
/* Each process has a pending PtlTriggeredPut. To be sure this request will be triggered, we must
call PtlTriggeredCTInc twice. Otherwise, we could free the CT too early and the Put wouldn't be triggered */
ptl_ct_event_t ct_inc;
ct_inc.success = 1;
ct_inc.failure = 0;
if ((ret = PtlTriggeredCTInc(request->u.barrier.rtr_ct_h, ct_inc,
request->u.barrier.rtr_ct_h, num_msgs)) != 0) {
return opal_stderr("PtlTriggeredCTInc failed", __FILE__, __LINE__, ret);
}
if ((ret = PtlTriggeredCTInc(request->u.barrier.rtr_ct_h, ct_inc,
request->u.barrier.rtr_ct_h, num_msgs + 1)) != 0) {
return opal_stderr("PtlTriggeredCTInc failed", __FILE__, __LINE__, ret);
}
ret = PtlCTWait(request->u.barrier.rtr_ct_h, num_msgs + 2, &event);
if (PTL_OK != ret) {
opal_output_verbose(1, ompi_coll_base_framework.framework_output,
"%s:%d: PtlCTWait failed: %d\n",
__FILE__, __LINE__, ret);
return OMPI_ERROR;
}
}
else {
/* Send a put to self when we've received all our messages... */

Просмотреть файл

@ -89,12 +89,20 @@ static int prepare_bcast_data (struct ompi_communicator_t *comm,
}
/* Number of segments */
request->u.bcast.segment_nb = (request->u.bcast.tmpsize > COLL_PORTALS4_MAX_BW) ?
(((request->u.bcast.tmpsize + COLL_PORTALS4_MAX_BW -1) / COLL_PORTALS4_MAX_BW) < COLL_PORTALS4_MAX_SEGMENT ?
((request->u.bcast.tmpsize + COLL_PORTALS4_MAX_BW -1) / COLL_PORTALS4_MAX_BW) :
COLL_PORTALS4_MAX_SEGMENT) :
{
size_t max_msg_size = (COLL_PORTALS4_MAX_BW > mca_coll_portals4_component.ni_limits.max_msg_size) ?
mca_coll_portals4_component.ni_limits.max_msg_size :
COLL_PORTALS4_MAX_BW;
//TODO : Either make compatible Portals size limits and COLL_PORTALS4_MAX_SEGMENT or remove COLL_PORTALS4_MAX_SEGMENT
request->u.bcast.segment_nb = (request->u.bcast.tmpsize > max_msg_size) ?
(((request->u.bcast.tmpsize + max_msg_size -1) / max_msg_size) < COLL_PORTALS4_MAX_SEGMENT ?
((request->u.bcast.tmpsize + max_msg_size -1) / max_msg_size) : COLL_PORTALS4_MAX_SEGMENT) :
1;
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"seg_number=%d , seg_size_max=%lu", request->u.bcast.segment_nb, max_msg_size));
}
if (request->u.bcast.segment_nb > COLL_PORTALS4_BCAST_ALGO_THRESHOLD) {
request->u.bcast.algo = OMPI_COLL_PORTALS4_BCAST_PIPELINE_ALGO;
}
@ -137,9 +145,9 @@ bcast_kary_tree_top(void *buff, int count,
mca_coll_portals4_module_t *portals4_module)
{
bool is_sync = request->is_sync;
int ret, seg;
unsigned int i;
int segment_nb = request->u.bcast.segment_nb;
int ret;
unsigned int i, seg, seg_size, nb_long;
unsigned int segment_nb = request->u.bcast.segment_nb;
unsigned int child_nb;
int size = ompi_comm_size(comm);
int rank = ompi_comm_rank(comm);
@ -201,15 +209,22 @@ bcast_kary_tree_top(void *buff, int count,
COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm), 0, 0,
COLL_PORTALS4_BCAST, 0, internal_count);
/* The data will be cut in segment_nb segments.
* nb_long segments will have a size of (seg_size + 1)
* and (segment_nb - nb_long) segments will have a size of seg_size
*/
seg_size = request->u.bcast.tmpsize / segment_nb;
nb_long = request->u.bcast.tmpsize % segment_nb;
opal_output_verbose(10, ompi_coll_base_framework.framework_output, "seg_size=%d nb_long=%d segment_nb=%d", seg_size, nb_long, segment_nb);
if (rank != root) {
for (seg = 1, offset = 0, length = 0 ;
seg <= segment_nb ;
seg++, offset += length) {
/* Divide buffer into segments */
length = (seg < segment_nb) ?
(request->u.bcast.tmpsize + segment_nb - 1) / segment_nb :
request->u.bcast.tmpsize - ((request->u.bcast.tmpsize + segment_nb - 1) / segment_nb) * (segment_nb - 1);
if (seg <= nb_long) length = seg_size + 1;
else length = seg_size;
/*
** Prepare Data ME
@ -352,13 +367,14 @@ bcast_kary_tree_top(void *buff, int count,
seg++, offset += length) {
/* Divide buffer into segments */
length = (seg < segment_nb) ?
(request->u.bcast.tmpsize + segment_nb - 1) / segment_nb :
request->u.bcast.tmpsize - ((request->u.bcast.tmpsize + segment_nb - 1) / segment_nb) * (segment_nb - 1);
if (seg <= nb_long) length = seg_size + 1;
else length = seg_size;
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
"bcast with k-ary tree : segment of size %ld", length);
/* compute the triggering threshold to send data to the children */
trig_thr = (rank == root) ? (segment_nb) :
(segment_nb + seg);
trig_thr = segment_nb + seg - 1; /* To be sure the set of PtlTriggeredPut of DATA will be executed in order */
if (rank != root) trig_thr ++;
/*
** Send Data to children
@ -381,6 +397,17 @@ bcast_kary_tree_top(void *buff, int count,
}
}
if (rank == root) {
trig_thr = segment_nb;
ct_inc.success = segment_nb;
ct_inc.failure = 0;
if ((ret = PtlTriggeredCTInc(request->u.bcast.trig_ct_h, ct_inc,
request->u.bcast.trig_ct_h, trig_thr)) != 0) {
return opal_stderr("PtlTriggeredCTInc failed", __FILE__, __LINE__, ret);
}
}
ack_thr = child_nb;
if (is_sync) {
@ -409,9 +436,28 @@ bcast_kary_tree_top(void *buff, int count,
*/
if (rank != root) {
ack_thr = segment_nb;
trig_thr = segment_nb;
if (is_sync) {
if ((ret = PtlCTWait(request->u.bcast.trig_ct_h, ack_thr, &ct)) != 0) {
/* Each leaf has a pending PtlTriggeredPut (to send the final ACK). We must call PtlTriggeredCTInc twice.
Otherwise, we could pass the PtlCTWait and then free the CT too early and the Put wouldn't be triggered.
This is necessary because portals4 does not insure the order in the triggered operations associated
with the same threshold. In the case where PtlCTWait is not called (else case), this is not necessary. */
ct_inc.success = 1;
ct_inc.failure = 0;
if ((ret = PtlTriggeredCTInc(request->u.bcast.trig_ct_h, ct_inc,
request->u.bcast.trig_ct_h, trig_thr)) != 0) {
return opal_stderr("PtlTriggeredCTInc failed", __FILE__, __LINE__, ret);
}
if ((ret = PtlTriggeredCTInc(request->u.bcast.trig_ct_h, ct_inc,
request->u.bcast.trig_ct_h, trig_thr + 1)) != 0) {
return opal_stderr("PtlTriggeredCTInc failed", __FILE__, __LINE__, ret);
}
if ((ret = PtlCTWait(request->u.bcast.trig_ct_h, trig_thr + 2, &ct)) != 0) {
opal_stderr("PtlCTWait failed", __FILE__, __LINE__, ret);
}
}
@ -421,7 +467,7 @@ bcast_kary_tree_top(void *buff, int count,
mca_coll_portals4_component.finish_pt_idx,
0, 0, NULL, (uintptr_t) request,
request->u.bcast.trig_ct_h,
ack_thr)) != 0) {
trig_thr)) != 0) {
return opal_stderr("PtlTriggeredPut failed", __FILE__, __LINE__, ret);
}
@ -440,8 +486,9 @@ bcast_pipeline_top(void *buff, int count,
mca_coll_portals4_module_t *portals4_module)
{
bool is_sync = request->is_sync;
int ret, seg;
int segment_nb = request->u.bcast.segment_nb;
int ret;
unsigned int seg, seg_size, nb_long;
unsigned int segment_nb = request->u.bcast.segment_nb;
int size = ompi_comm_size(comm);
int rank = ompi_comm_rank(comm);
ptl_rank_t parent, child;
@ -492,6 +539,13 @@ bcast_pipeline_top(void *buff, int count,
COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm), 0, 0,
COLL_PORTALS4_BCAST, 0, internal_count);
/* The data will be cut in segment_nb segments.
* nb_long segments will have a size of (seg_size + 1)
* and (segment_nb - nb_long) segments will have a size of seg_size
*/
seg_size = request->u.bcast.tmpsize / segment_nb;
nb_long = request->u.bcast.tmpsize % segment_nb;
opal_output_verbose(10, ompi_coll_base_framework.framework_output, "seg_size=%d nb_long=%d", seg_size, nb_long);
if (rank != root) {
for (seg = 1, offset = 0, length = 0 ;
@ -499,9 +553,8 @@ bcast_pipeline_top(void *buff, int count,
seg++, offset += length) {
/* Divide buffer into segments */
length = (seg < segment_nb) ?
(request->u.bcast.tmpsize + segment_nb - 1) / segment_nb :
request->u.bcast.tmpsize - ((request->u.bcast.tmpsize + segment_nb - 1) / segment_nb) * (segment_nb - 1);
if (seg <= nb_long) length = seg_size + 1;
else length = seg_size;
/*
** Prepare Data ME
@ -642,13 +695,14 @@ bcast_pipeline_top(void *buff, int count,
seg++, offset += length) {
/* Divide buffer into segments */
length = (seg < segment_nb) ?
(request->u.bcast.tmpsize + segment_nb - 1) / segment_nb :
request->u.bcast.tmpsize - ((request->u.bcast.tmpsize + segment_nb - 1) / segment_nb) * (segment_nb - 1);
if (seg <= nb_long) length = seg_size + 1;
else length = seg_size;
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
"bcast with pipeline : segment of size %ld \n", length);
/* compute the triggering threshold to send data to the children */
trig_thr = (rank == root) ? (segment_nb) :
(segment_nb + seg);
trig_thr = segment_nb + seg - 1; /* To be sure the PtlTriggeredPut will be executed in order */
if (rank != root) trig_thr ++;
/*
** Send Data to children
@ -668,6 +722,16 @@ bcast_pipeline_top(void *buff, int count,
}
}
}
if (rank == root) {
trig_thr = segment_nb;
ct_inc.success = segment_nb;
ct_inc.failure = 0;
if ((ret = PtlTriggeredCTInc(request->u.bcast.trig_ct_h, ct_inc,
request->u.bcast.trig_ct_h, trig_thr)) != 0) {
return opal_stderr("PtlTriggeredCTInc failed", __FILE__, __LINE__, ret);
}
}
if (is_sync) {
if ((ret = PtlCTWait(request->u.bcast.ack_ct_h, 1, &ct)) != 0) {
@ -696,8 +760,29 @@ bcast_pipeline_top(void *buff, int count,
*/
if (rank != root) {
trig_thr = segment_nb;
if (is_sync) {
if ((ret = PtlCTWait(request->u.bcast.trig_ct_h, segment_nb, &ct)) != 0) {
/* Each leaf has a pending PtlTriggeredPut (to send the final ACK). We must call PtlTriggeredCTInc twice.
Otherwise, we could pass the PtlCTWait and then free the CT too early and the Put wouldn't be triggered.
This is necessary because portals4 does not insure the order in the triggered operations associated
with the same threshold. In the case where PtlCTWait is not called (else case), this is not necessary. */
ct_inc.success = 1;
ct_inc.failure = 0;
if ((ret = PtlTriggeredCTInc(request->u.bcast.trig_ct_h, ct_inc,
request->u.bcast.trig_ct_h, trig_thr)) != 0) {
return opal_stderr("PtlTriggeredCTInc failed", __FILE__, __LINE__, ret);
}
if ((ret = PtlTriggeredCTInc(request->u.bcast.trig_ct_h, ct_inc,
request->u.bcast.trig_ct_h, trig_thr + 1)) != 0) {
return opal_stderr("PtlTriggeredCTInc failed", __FILE__, __LINE__, ret);
}
if ((ret = PtlCTWait(request->u.bcast.trig_ct_h, trig_thr + 2, &ct)) != 0) {
opal_stderr("PtlCTWait failed", __FILE__, __LINE__, ret);
}
}
@ -707,7 +792,7 @@ bcast_pipeline_top(void *buff, int count,
mca_coll_portals4_component.finish_pt_idx,
0, 0, NULL, (uintptr_t) request,
request->u.bcast.trig_ct_h,
segment_nb)) != 0) {
trig_thr)) != 0) {
return opal_stderr("PtlTriggeredPut failed", __FILE__, __LINE__, ret);
}
}
@ -831,7 +916,7 @@ ompi_coll_portals4_ibcast_intra(void *buff, int count,
return OMPI_ERROR;
}
puts("ibcast");
opal_output_verbose(10, ompi_coll_base_framework.framework_output, "ibcast_intra");
return (OMPI_SUCCESS);
}
@ -860,5 +945,6 @@ ompi_coll_portals4_ibcast_intra_fini(ompi_coll_portals4_request_t *request)
ompi_request_complete(&request->super, true);
OPAL_THREAD_UNLOCK(&ompi_request_lock);
opal_output_verbose(10, ompi_coll_base_framework.framework_output, "ibcast_intra_fini");
return (OMPI_SUCCESS);
}

Просмотреть файл

@ -211,6 +211,16 @@ portals4_register(void)
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_portals4_component.use_binomial_gather_algorithm);
mca_coll_portals4_component.portals_max_msg_size = PTL_SIZE_MAX;
(void) mca_base_component_var_register(&mca_coll_portals4_component.super.collm_version,
"max_msg_size",
"Max size supported by portals4 (above that, a message is cut into messages less than that size)",
MCA_BASE_VAR_TYPE_UNSIGNED_LONG,
NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_portals4_component.portals_max_msg_size);
return OMPI_SUCCESS;
}
@ -369,7 +379,13 @@ portals4_init_query(bool enable_progress_threads,
__FILE__, __LINE__, ret);
return OMPI_ERROR;
}
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
"ni_limits.max_atomic_size=%ld", mca_coll_portals4_component.ni_limits.max_atomic_size);
if (mca_coll_portals4_component.portals_max_msg_size < mca_coll_portals4_component.ni_limits.max_msg_size)
mca_coll_portals4_component.ni_limits.max_msg_size = mca_coll_portals4_component.portals_max_msg_size;
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
"ni_limits.max_msg_size=%lu", mca_coll_portals4_component.ni_limits.max_msg_size);
ret = PtlGetId(mca_coll_portals4_component.ni_h, &mca_coll_portals4_component.id);
if (PTL_OK != ret) {

Просмотреть файл

@ -21,6 +21,7 @@
#include "coll_portals4.h"
#include "coll_portals4_request.h"
#include <string.h> // included for ffs in get_tree_numdescendants_of
#undef RTR_USES_TRIGGERED_PUT
@ -55,6 +56,22 @@
* |
* 15
*/
static int32_t get_tree_numdescendants_of(struct ompi_communicator_t* comm,
int vrank)
{
int max;
int size = ompi_comm_size(comm);
if (0 == vrank) {
return size - 1;
} else {
max = 1 << ffs(vrank - 1);
return ((vrank + max <= size ) ? max : size - vrank) -1;
}
}
static ompi_coll_portals4_tree_t*
ompi_coll_portals4_build_in_order_bmtree( struct ompi_communicator_t* comm,
int root )
@ -506,8 +523,10 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc
int32_t expected_ops =0;
int32_t expected_acks=0;
ptl_size_t number_of_fragment_gathered = 0;
ptl_size_t number_of_fragment_send = 1;
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:gather_intra_binomial_top enter rank %d", request->u.gather.my_rank));
request->type = OMPI_COLL_PORTALS4_TYPE_GATHER;
@ -579,6 +598,23 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc
ret = setup_sync_handles(comm, request, portals4_module);
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"%s:%d: packed_size=%lu, fragment_size=%lu",
__FILE__, __LINE__, request->u.gather.packed_size, mca_coll_portals4_component.ni_limits.max_msg_size));
for (int i =0; i < bmtree->tree_nextsize; i++) {
int child_vrank = VRANK(bmtree->tree_next[i], request->u.gather.root_rank, request->u.gather.size);
int sub_tree_size = get_tree_numdescendants_of(comm, child_vrank) + 1;
ptl_size_t local_number_of_fragment = ((sub_tree_size * request->u.gather.packed_size) + mca_coll_portals4_component.ni_limits.max_msg_size -1) / mca_coll_portals4_component.ni_limits.max_msg_size;
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"%s:%d: %d is child of %d(%d) with %d descendants (nb_frag += %lu)",
__FILE__, __LINE__, bmtree->tree_next[i], vrank, request->u.gather.root_rank , sub_tree_size, local_number_of_fragment));
number_of_fragment_gathered += local_number_of_fragment;
}
number_of_fragment_send = (request->u.gather.gather_bytes + mca_coll_portals4_component.ni_limits.max_msg_size -1) / mca_coll_portals4_component.ni_limits.max_msg_size;
/***********************************************/
/* Chain the RTR and Recv-ACK to the Gather CT */
/***********************************************/
@ -603,7 +639,7 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc
if (vrank == 0) {
/* root, so do nothing */
expected_ops=bmtree->tree_nextsize; /* gather put from each child */
expected_ops=number_of_fragment_gathered ; /* gather put from each child */
expected_acks=0;
} else {
@ -617,22 +653,32 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc
__FILE__, __LINE__, vrank,
remote_offset, vrank, vparent, request->u.gather.packed_size);
expected_ops=bmtree->tree_nextsize + 1; /* gather put from each child + a chained RTR */
expected_ops=number_of_fragment_gathered + 1; /* gather puts from each child + a chained RTR */
expected_acks=1; /* Recv-ACK from parent */
ret = PtlTriggeredPut(request->u.gather.gather_mdh,
request->u.gather.gather_offset,
request->u.gather.gather_bytes,
ptl_size_t size_sent = 0;
ptl_size_t size_left = request->u.gather.gather_bytes;
for (ptl_size_t i = 0 ; i < number_of_fragment_send; i++) {
ptl_size_t frag_size = (size_left > mca_coll_portals4_component.ni_limits.max_msg_size) ?
mca_coll_portals4_component.ni_limits.max_msg_size:
size_left;
ret = PtlTriggeredPut(request->u.gather.gather_mdh,
request->u.gather.gather_offset + size_sent,
frag_size,
PTL_NO_ACK_REQ,
ompi_coll_portals4_get_peer(comm, parent),
mca_coll_portals4_component.pt_idx,
request->u.gather.gather_match_bits,
remote_offset,
remote_offset + size_sent,
NULL,
0,
request->u.gather.gather_cth,
expected_ops);
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
size_left -= frag_size;
size_sent += frag_size;
}
}
/************************************/
@ -734,7 +780,7 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc
ompi_coll_portals4_destroy_tree(&(portals4_module->cached_in_order_bmtree));
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:gather_intra_binomial_top exit rank %d", request->u.gather.my_rank));
return OMPI_SUCCESS;
@ -773,8 +819,9 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct
int32_t expected_ops =0;
int32_t expected_acks=0;
ptl_size_t number_of_fragment = 1;
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:gather_intra_linear_top enter rank %d", request->u.gather.my_rank));
request->type = OMPI_COLL_PORTALS4_TYPE_GATHER;
@ -843,6 +890,13 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct
ret = setup_sync_handles(comm, request, portals4_module);
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
number_of_fragment = (request->u.gather.packed_size > mca_coll_portals4_component.ni_limits.max_msg_size) ?
(request->u.gather.packed_size + mca_coll_portals4_component.ni_limits.max_msg_size - 1) / mca_coll_portals4_component.ni_limits.max_msg_size :
1;
opal_output_verbose(90, ompi_coll_base_framework.framework_output,
"%s:%d:rank %d:number_of_fragment = %lu",
__FILE__, __LINE__, request->u.gather.my_rank, number_of_fragment);
/***********************************************/
/* Chain the RTR and Recv-ACK to the Gather CT */
/***********************************************/
@ -867,11 +921,13 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct
if (i_am_root) {
/* root, so do nothing */
expected_ops=request->u.gather.size-1; /* gather put from all other ranks */
expected_ops=(request->u.gather.size-1) * number_of_fragment; /* gather put from all other ranks */
expected_acks=0;
} else {
ptl_size_t remote_offset=request->u.gather.my_rank * request->u.gather.packed_size;
ptl_size_t split_offset = 0;
ptl_size_t size_left = request->u.gather.gather_bytes;
opal_output_verbose(30, ompi_coll_base_framework.framework_output,
"%s:%d:rank(%d): remote_offset(%lu)=rank(%d) * packed_size(%ld)",
@ -881,19 +937,34 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct
expected_ops=1; /* chained RTR */
expected_acks=1; /* Recv-ACK from root */
ret = PtlTriggeredPut(request->u.gather.gather_mdh,
request->u.gather.gather_offset,
request->u.gather.gather_bytes,
for (ptl_size_t j=0; j<number_of_fragment; j++) {
ptl_size_t frag_size = (size_left > mca_coll_portals4_component.ni_limits.max_msg_size) ?
mca_coll_portals4_component.ni_limits.max_msg_size :
size_left;
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
"%s:%d:rank(%d): frag(%lu),offset_frag (%lu) frag_size(%lu)",
__FILE__, __LINE__, request->u.gather.my_rank,
j, split_offset, frag_size);
ret = PtlTriggeredPut(request->u.gather.gather_mdh,
request->u.gather.gather_offset + split_offset,
frag_size,
PTL_NO_ACK_REQ,
ompi_coll_portals4_get_peer(comm, request->u.gather.root_rank),
mca_coll_portals4_component.pt_idx,
request->u.gather.gather_match_bits,
remote_offset,
remote_offset + split_offset,
NULL,
0,
request->u.gather.gather_cth,
expected_ops);
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
size_left -= frag_size;
split_offset += frag_size;
}
}
/*****************************************/
@ -997,7 +1068,7 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct
"completed CTWait(expected_ops=%d)\n", expected_ops);
}
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:gather_intra_linear_top exit rank %d", request->u.gather.my_rank));
return OMPI_SUCCESS;
@ -1020,7 +1091,7 @@ ompi_coll_portals4_gather_intra_binomial_bottom(struct ompi_communicator_t *comm
int ret, line;
int i;
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:gather_intra_binomial_bottom enter rank %d", request->u.gather.my_rank));
ret = cleanup_gather_handles(request);
@ -1065,7 +1136,7 @@ ompi_coll_portals4_gather_intra_binomial_bottom(struct ompi_communicator_t *comm
ompi_request_complete(&request->super, true);
OPAL_THREAD_UNLOCK(&ompi_request_lock);
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:gather_intra_binomial_bottom exit rank %d", request->u.gather.my_rank));
return OMPI_SUCCESS;
@ -1090,7 +1161,7 @@ ompi_coll_portals4_gather_intra_linear_bottom(struct ompi_communicator_t *comm,
int ret, line;
int i;
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:gather_intra_linear_bottom enter rank %d", request->u.gather.my_rank));
ret = cleanup_gather_handles(request);
@ -1128,7 +1199,7 @@ ompi_coll_portals4_gather_intra_linear_bottom(struct ompi_communicator_t *comm,
ompi_request_complete(&request->super, true);
OPAL_THREAD_UNLOCK(&ompi_request_lock);
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:gather_intra_linear_bottom exit rank %d", request->u.gather.my_rank));
return OMPI_SUCCESS;
@ -1157,7 +1228,7 @@ ompi_coll_portals4_gather_intra(const void *sbuf, int scount, struct ompi_dataty
ompi_coll_portals4_request_t *request;
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:gather_intra enter rank %d", ompi_comm_rank(comm)));
/*
@ -1204,7 +1275,7 @@ ompi_coll_portals4_gather_intra(const void *sbuf, int scount, struct ompi_dataty
*/
OMPI_COLL_PORTALS4_REQUEST_RETURN(request);
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:gather_intra exit rank %d", request->u.gather.my_rank));
return OMPI_SUCCESS;
@ -1230,7 +1301,7 @@ ompi_coll_portals4_igather_intra(const void *sbuf, int scount, struct ompi_datat
ompi_coll_portals4_request_t *request;
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:igather_intra enter rank %d", ompi_comm_rank(comm)));
/*
@ -1267,7 +1338,7 @@ ompi_coll_portals4_igather_intra(const void *sbuf, int scount, struct ompi_datat
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
}
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:igather_intra exit rank %d", request->u.gather.my_rank));
return OMPI_SUCCESS;
@ -1286,7 +1357,7 @@ ompi_coll_portals4_igather_intra_fini(ompi_coll_portals4_request_t *request)
{
int ret, line;
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:igather_intra_fini enter rank %d", request->u.gather.my_rank));
/*
@ -1300,7 +1371,7 @@ ompi_coll_portals4_igather_intra_fini(ompi_coll_portals4_request_t *request)
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
}
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:igather_intra_fini exit rank %d", request->u.gather.my_rank));
return OMPI_SUCCESS;

Просмотреть файл

@ -385,10 +385,10 @@ ompi_coll_portals4_reduce_intra(const void *sendbuf, void *recvbuf, int count,
ret = reduce_kary_tree_top(sendbuf, recvbuf, count,
dtype, op, root, comm, request, portals4_module);
if (OMPI_SUCCESS != ret)
return ret;
return ret;
ret = reduce_kary_tree_bottom(request);
if (OMPI_SUCCESS != ret)
return ret;
return ret;
OMPI_COLL_PORTALS4_REQUEST_RETURN(request);
return (OMPI_SUCCESS);
@ -422,13 +422,13 @@ ompi_coll_portals4_ireduce_intra(const void* sendbuf, void* recvbuf, int count,
ret = reduce_kary_tree_top(sendbuf, recvbuf, count,
dtype, op, root, comm, request, portals4_module);
if (OMPI_SUCCESS != ret)
return ret;
return ret;
if (!request->u.reduce.is_optim) {
OMPI_COLL_PORTALS4_REQUEST_RETURN(request);
}
puts("ireduce");
opal_output_verbose(10, ompi_coll_base_framework.framework_output, "ireduce");
return (OMPI_SUCCESS);
}
@ -439,7 +439,7 @@ ompi_coll_portals4_ireduce_intra_fini(ompi_coll_portals4_request_t *request)
ret = reduce_kary_tree_bottom(request);
if (OMPI_SUCCESS != ret)
return ret;
return ret;
OPAL_THREAD_LOCK(&ompi_request_lock);
ompi_request_complete(&request->super, true);

Просмотреть файл

@ -127,7 +127,7 @@ setup_scatter_handles(struct ompi_communicator_t *comm,
ptl_me_t me;
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:setup_scatter_handles enter rank %d", request->u.scatter.my_rank));
/**********************************/
@ -136,7 +136,7 @@ setup_scatter_handles(struct ompi_communicator_t *comm,
COLL_PORTALS4_SET_BITS(request->u.scatter.scatter_match_bits, ompi_comm_get_cid(comm),
0, 0, COLL_PORTALS4_SCATTER, 0, request->u.scatter.coll_count);
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:setup_scatter_handles rank(%d) scatter_match_bits(0x%016lX)",
request->u.scatter.my_rank, request->u.scatter.scatter_match_bits));
@ -166,7 +166,7 @@ setup_scatter_handles(struct ompi_communicator_t *comm,
&request->u.scatter.scatter_meh);
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:setup_scatter_handles exit rank %d", request->u.scatter.my_rank));
return OMPI_SUCCESS;
@ -188,7 +188,7 @@ setup_sync_handles(struct ompi_communicator_t *comm,
ptl_me_t me;
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:setup_sync_handles enter rank %d", request->u.scatter.my_rank));
/**********************************/
@ -197,7 +197,7 @@ setup_sync_handles(struct ompi_communicator_t *comm,
COLL_PORTALS4_SET_BITS(request->u.scatter.sync_match_bits, ompi_comm_get_cid(comm),
0, 1, COLL_PORTALS4_SCATTER, 0, request->u.scatter.coll_count);
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:setup_sync_handles rank(%d) sync_match_bits(0x%016lX)",
request->u.scatter.my_rank, request->u.scatter.sync_match_bits));
@ -227,7 +227,7 @@ setup_sync_handles(struct ompi_communicator_t *comm,
&request->u.scatter.sync_meh);
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:setup_sync_handles exit rank %d", request->u.scatter.my_rank));
return OMPI_SUCCESS;
@ -245,7 +245,7 @@ cleanup_scatter_handles(ompi_coll_portals4_request_t *request)
{
int ret, line;
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:cleanup_scatter_handles enter rank %d", request->u.scatter.my_rank));
/**********************************/
@ -265,7 +265,7 @@ cleanup_scatter_handles(ompi_coll_portals4_request_t *request)
ret = PtlCTFree(request->u.scatter.scatter_cth);
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:cleanup_scatter_handles exit rank %d", request->u.scatter.my_rank));
return OMPI_SUCCESS;
@ -284,7 +284,7 @@ cleanup_sync_handles(ompi_coll_portals4_request_t *request)
int ret, line;
int ptl_ret;
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:cleanup_sync_handles enter rank %d", request->u.scatter.my_rank));
/**********************************/
@ -304,7 +304,7 @@ cleanup_sync_handles(ompi_coll_portals4_request_t *request)
ret = PtlCTFree(request->u.scatter.sync_cth);
if (PTL_OK != ret) { ptl_ret = ret; ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:cleanup_sync_handles exit rank %d", request->u.scatter.my_rank));
return OMPI_SUCCESS;
@ -341,8 +341,9 @@ ompi_coll_portals4_scatter_intra_linear_top(const void *sbuf, int scount, struct
int32_t expected_chained_rtrs = 0;
int32_t expected_chained_acks = 0;
ptl_size_t number_of_fragment = 1;
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:scatter_intra_linear_top enter rank %d", request->u.scatter.my_rank));
request->type = OMPI_COLL_PORTALS4_TYPE_SCATTER;
@ -409,6 +410,13 @@ ompi_coll_portals4_scatter_intra_linear_top(const void *sbuf, int scount, struct
ret = setup_sync_handles(comm, request, portals4_module);
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
number_of_fragment = (request->u.scatter.packed_size > mca_coll_portals4_component.ni_limits.max_msg_size) ?
(request->u.scatter.packed_size + mca_coll_portals4_component.ni_limits.max_msg_size - 1) / mca_coll_portals4_component.ni_limits.max_msg_size :
1;
opal_output_verbose(90, ompi_coll_base_framework.framework_output,
"%s:%d:rank %d:number_of_fragment = %lu",
__FILE__, __LINE__, request->u.scatter.my_rank, number_of_fragment);
/**********************************/
/* do the scatter */
/**********************************/
@ -445,25 +453,42 @@ ompi_coll_portals4_scatter_intra_linear_top(const void *sbuf, int scount, struct
}
ptl_size_t offset = request->u.scatter.packed_size * i;
ptl_size_t size_sent = 0;
ptl_size_t size_left = request->u.scatter.packed_size;
opal_output_verbose(30, ompi_coll_base_framework.framework_output,
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
"%s:%d:rank(%d): offset(%lu)=rank(%d) * packed_size(%ld)",
__FILE__, __LINE__, request->u.scatter.my_rank,
offset, i, request->u.scatter.packed_size);
ret = PtlTriggeredPut(request->u.scatter.scatter_mdh,
(ptl_size_t)request->u.scatter.scatter_buf + offset,
request->u.scatter.packed_size,
PTL_NO_ACK_REQ,
ompi_coll_portals4_get_peer(comm, i),
mca_coll_portals4_component.pt_idx,
request->u.scatter.scatter_match_bits,
0,
NULL,
0,
request->u.scatter.scatter_cth,
expected_chained_rtrs);
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
for (ptl_size_t j=0; j<number_of_fragment; j++) {
ptl_size_t frag_size = (size_left > mca_coll_portals4_component.ni_limits.max_msg_size) ?
mca_coll_portals4_component.ni_limits.max_msg_size :
size_left;
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"%s:%d:rank(%d): frag(%lu),offset_frag (%lu) frag_size(%lu)",
__FILE__, __LINE__, request->u.scatter.my_rank,
j, size_sent, frag_size));
ret = PtlTriggeredPut(request->u.scatter.scatter_mdh,
(ptl_size_t)request->u.scatter.scatter_buf + offset + size_sent,
frag_size,
PTL_NO_ACK_REQ,
ompi_coll_portals4_get_peer(comm, i),
mca_coll_portals4_component.pt_idx,
request->u.scatter.scatter_match_bits,
size_sent,
NULL,
0,
request->u.scatter.scatter_cth,
expected_chained_rtrs);
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
size_left -= frag_size;
size_sent += frag_size;
}
}
} else {
/* non-root, so do nothing */
@ -473,7 +498,7 @@ ompi_coll_portals4_scatter_intra_linear_top(const void *sbuf, int scount, struct
expected_acks = 0;
/* operations on the scatter counter */
expected_puts = 1; /* scatter put from root */
expected_puts = number_of_fragment; /* scatter put from root */
expected_chained_rtrs = 0;
expected_chained_acks = 0;
}
@ -552,7 +577,7 @@ ompi_coll_portals4_scatter_intra_linear_top(const void *sbuf, int scount, struct
"completed CTWait(expected_ops=%d)\n", expected_ops);
}
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:scatter_intra_linear_top exit rank %d", request->u.scatter.my_rank));
return OMPI_SUCCESS;
@ -574,7 +599,7 @@ ompi_coll_portals4_scatter_intra_linear_bottom(struct ompi_communicator_t *comm,
{
int ret, line;
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:scatter_intra_linear_bottom enter rank %d", request->u.scatter.my_rank));
ret = cleanup_scatter_handles(request);
@ -616,7 +641,7 @@ ompi_coll_portals4_scatter_intra_linear_bottom(struct ompi_communicator_t *comm,
ompi_request_complete(&request->super, true);
OPAL_THREAD_UNLOCK(&ompi_request_lock);
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:scatter_intra_linear_bottom exit rank %d", request->u.scatter.my_rank));
return OMPI_SUCCESS;
@ -645,7 +670,7 @@ ompi_coll_portals4_scatter_intra(const void *sbuf, int scount, struct ompi_datat
ompi_coll_portals4_request_t *request;
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:scatter_intra enter rank %d", ompi_comm_rank(comm)));
/*
@ -679,7 +704,7 @@ ompi_coll_portals4_scatter_intra(const void *sbuf, int scount, struct ompi_datat
*/
OMPI_COLL_PORTALS4_REQUEST_RETURN(request);
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:scatter_intra exit rank %d", request->u.scatter.my_rank));
return OMPI_SUCCESS;
@ -705,7 +730,7 @@ ompi_coll_portals4_iscatter_intra(const void *sbuf, int scount, struct ompi_data
ompi_coll_portals4_request_t *request;
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:iscatter_intra enter rank %d", ompi_comm_rank(comm)));
/*
@ -732,7 +757,7 @@ ompi_coll_portals4_iscatter_intra(const void *sbuf, int scount, struct ompi_data
module);
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:iscatter_intra exit rank %d", request->u.scatter.my_rank));
return OMPI_SUCCESS;
@ -751,7 +776,7 @@ ompi_coll_portals4_iscatter_intra_fini(ompi_coll_portals4_request_t *request)
{
int ret, line;
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:iscatter_intra_fini enter rank %d", request->u.scatter.my_rank));
/*
@ -760,7 +785,7 @@ ompi_coll_portals4_iscatter_intra_fini(ompi_coll_portals4_request_t *request)
ret = ompi_coll_portals4_scatter_intra_linear_bottom(request->super.req_mpi_object.comm, request);
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
"coll:portals4:iscatter_intra_fini exit rank %d", request->u.scatter.my_rank));
return OMPI_SUCCESS;

52
ompi/mca/coll/sync/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,52 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2016 Intel, Inc. All rights reserved
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
dist_ompidata_DATA = help-coll-sync.txt
sources = \
coll_sync.h \
coll_sync_component.c \
coll_sync_module.c \
coll_sync_bcast.c \
coll_sync_exscan.c \
coll_sync_gather.c \
coll_sync_gatherv.c \
coll_sync_reduce.c \
coll_sync_reduce_scatter.c \
coll_sync_scan.c \
coll_sync_scatter.c \
coll_sync_scatterv.c
if MCA_BUILD_ompi_coll_sync_DSO
component_noinst =
component_install = mca_coll_sync.la
else
component_noinst = libmca_coll_sync.la
component_install =
endif
mcacomponentdir = $(ompilibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_coll_sync_la_SOURCES = $(sources)
mca_coll_sync_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_coll_sync_la_SOURCES =$(sources)
libmca_coll_sync_la_LDFLAGS = -module -avoid-version

184
ompi/mca/coll/sync/coll_sync.h Обычный файл
Просмотреть файл

@ -0,0 +1,184 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_COLL_SYNC_EXPORT_H
#define MCA_COLL_SYNC_EXPORT_H
#include "ompi_config.h"
#include "mpi.h"
#include "opal/class/opal_object.h"
#include "opal/mca/mca.h"
#include "opal/util/output.h"
#include "ompi/constants.h"
#include "ompi/mca/coll/coll.h"
#include "ompi/mca/coll/base/base.h"
#include "ompi/communicator/communicator.h"
BEGIN_C_DECLS
/* API functions */
int mca_coll_sync_init_query(bool enable_progress_threads,
bool enable_mpi_threads);
mca_coll_base_module_t
*mca_coll_sync_comm_query(struct ompi_communicator_t *comm,
int *priority);
int mca_coll_sync_module_enable(mca_coll_base_module_t *module,
struct ompi_communicator_t *comm);
int mca_coll_sync_barrier(struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);
int mca_coll_sync_bcast(void *buff, int count,
struct ompi_datatype_t *datatype,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);
int mca_coll_sync_exscan(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);
int mca_coll_sync_gather(const void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void *rbuf, int rcount,
struct ompi_datatype_t *rdtype,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);
int mca_coll_sync_gatherv(const void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void *rbuf, const int *rcounts, const int *disps,
struct ompi_datatype_t *rdtype,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);
int mca_coll_sync_reduce(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);
int mca_coll_sync_reduce_scatter(const void *sbuf, void *rbuf,
const int *rcounts,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);
int mca_coll_sync_scan(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);
int mca_coll_sync_scatter(const void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void *rbuf, int rcount,
struct ompi_datatype_t *rdtype,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);
int mca_coll_sync_scatterv(const void *sbuf, const int *scounts, const int *disps,
struct ompi_datatype_t *sdtype,
void *rbuf, int rcount,
struct ompi_datatype_t *rdtype,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);
int mca_coll_sync_ft_event(int status);
/* Types */
/* Module */
typedef struct mca_coll_sync_module_t {
mca_coll_base_module_t super;
/* Pointers to all the "real" collective functions */
mca_coll_base_comm_coll_t c_coll;
/* How many ops we've executed */
int before_num_operations;
/* How many ops we've executed (it's easier to have 2) */
int after_num_operations;
/* Avoid recursion of syncs */
bool in_operation;
} mca_coll_sync_module_t;
OBJ_CLASS_DECLARATION(mca_coll_sync_module_t);
/* Component */
typedef struct mca_coll_sync_component_t {
mca_coll_base_component_2_0_0_t super;
/* Priority of this component */
int priority;
/* Do a sync *before* each Nth collective */
int barrier_before_nops;
/* Do a sync *after* each Nth collective */
int barrier_after_nops;
} mca_coll_sync_component_t;
/* Globally exported variables */
OMPI_MODULE_DECLSPEC extern mca_coll_sync_component_t mca_coll_sync_component;
/* Macro used in most of the collectives */
#define COLL_SYNC(m, op) \
do { \
int err = MPI_SUCCESS; \
(m)->in_operation = true; \
if (OPAL_UNLIKELY(++((m)->before_num_operations) == \
mca_coll_sync_component.barrier_before_nops)) { \
(m)->before_num_operations = 0; \
err = (m)->c_coll.coll_barrier(comm, (m)->c_coll.coll_barrier_module); \
} \
if (OPAL_LIKELY(MPI_SUCCESS == err)) { \
err = op; \
} \
if (OPAL_UNLIKELY(++((m)->after_num_operations) == \
mca_coll_sync_component.barrier_after_nops) && \
OPAL_LIKELY(MPI_SUCCESS == err)) { \
(m)->after_num_operations = 0; \
err = (m)->c_coll.coll_barrier(comm, (m)->c_coll.coll_barrier_module); \
} \
(m)->in_operation = false; \
return err; \
} while(0)
END_C_DECLS
#endif /* MCA_COLL_SYNC_EXPORT_H */

47
ompi/mca/coll/sync/coll_sync_bcast.c Обычный файл
Просмотреть файл

@ -0,0 +1,47 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "mpi.h"
#include "coll_sync.h"
/*
* bcast
*
* Function: - broadcast
* Accepts: - same arguments as MPI_Bcast()
* Returns: - MPI_SUCCESS or error code
*/
int mca_coll_sync_bcast(void *buff, int count,
struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module;
if (s->in_operation) {
return s->c_coll.coll_bcast(buff, count, datatype, root, comm,
s->c_coll.coll_bcast_module);
} else {
COLL_SYNC(s, s->c_coll.coll_bcast(buff, count, datatype, root, comm,
s->c_coll.coll_bcast_module));
}
}

104
ompi/mca/coll/sync/coll_sync_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,104 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <string.h>
#include "opal/util/output.h"
#include "mpi.h"
#include "ompi/constants.h"
#include "coll_sync.h"
/*
* Public string showing the coll ompi_sync component version number
*/
const char *mca_coll_sync_component_version_string =
"Open MPI sync collective MCA component version " OMPI_VERSION;
/*
* Local function
*/
static int sync_register(void);
/*
* Instantiate the public struct with all of our public information
* and pointers to our public functions in it
*/
mca_coll_sync_component_t mca_coll_sync_component = {
{
/* First, the mca_component_t struct containing meta information
* about the component itself */
.collm_version = {
MCA_COLL_BASE_VERSION_2_0_0,
/* Component name and version */
.mca_component_name = "sync",
MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
OMPI_RELEASE_VERSION),
/* Component open and close functions */
.mca_register_component_params = sync_register
},
.collm_data = {
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
/* Initialization / querying functions */
.collm_init_query = mca_coll_sync_init_query,
.collm_comm_query = mca_coll_sync_comm_query
},
};
static int sync_register(void)
{
mca_base_component_t *c = &mca_coll_sync_component.super.collm_version;
mca_coll_sync_component.priority = 50;
(void) mca_base_component_var_register(c, "priority",
"Priority of the sync coll component; only relevant if barrier_before or barrier_after is > 0",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_sync_component.priority);
mca_coll_sync_component.barrier_before_nops = 0;
(void) mca_base_component_var_register(c, "barrier_before",
"Do a synchronization before each Nth collective",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_sync_component.barrier_before_nops);
mca_coll_sync_component.barrier_after_nops = 0;
(void) mca_base_component_var_register(c, "barrier_after",
"Do a synchronization after each Nth collective",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_sync_component.barrier_after_nops);
return OMPI_SUCCESS;
}

47
ompi/mca/coll/sync/coll_sync_exscan.c Обычный файл
Просмотреть файл

@ -0,0 +1,47 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "coll_sync.h"
/*
* exscan
*
* Function: - exscan
* Accepts: - same arguments as MPI_Exscan()
* Returns: - MPI_SUCCESS or error code
*/
int mca_coll_sync_exscan(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module;
if (s->in_operation) {
return s->c_coll.coll_exscan(sbuf, rbuf, count, dtype, op, comm,
s->c_coll.coll_exscan_module);
} else {
COLL_SYNC(s, s->c_coll.coll_exscan(sbuf, rbuf, count, dtype, op, comm,
s->c_coll.coll_exscan_module));
}
}

50
ompi/mca/coll/sync/coll_sync_gather.c Обычный файл
Просмотреть файл

@ -0,0 +1,50 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "coll_sync.h"
/*
* gather
*
* Function: - gather
* Accepts: - same arguments as MPI_Gather()
* Returns: - MPI_SUCCESS or error code
*/
int mca_coll_sync_gather(const void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void *rbuf, int rcount,
struct ompi_datatype_t *rdtype,
int root, struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module;
if (s->in_operation) {
return s->c_coll.coll_gather(sbuf, scount, sdtype,
rbuf, rcount, rdtype, root, comm,
s->c_coll.coll_gather_module);
} else {
COLL_SYNC(s, s->c_coll.coll_gather(sbuf, scount, sdtype,
rbuf, rcount, rdtype, root, comm,
s->c_coll.coll_gather_module));
}
}

51
ompi/mca/coll/sync/coll_sync_gatherv.c Обычный файл
Просмотреть файл

@ -0,0 +1,51 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "coll_sync.h"
/*
* gatherv
*
* Function: - gatherv
* Accepts: - same arguments as MPI_Gatherv()
* Returns: - MPI_SUCCESS or error code
*/
int mca_coll_sync_gatherv(const void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void *rbuf, const int *rcounts, const int *disps,
struct ompi_datatype_t *rdtype, int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module;
if (s->in_operation) {
return s->c_coll.coll_gatherv(sbuf, scount, sdtype,
rbuf, rcounts, disps, rdtype, root, comm,
s->c_coll.coll_gatherv_module);
} else {
COLL_SYNC(s, s->c_coll.coll_gatherv(sbuf, scount, sdtype,
rbuf, rcounts, disps, rdtype,
root, comm,
s->c_coll.coll_gatherv_module));
}
}

200
ompi/mca/coll/sync/coll_sync_module.c Обычный файл
Просмотреть файл

@ -0,0 +1,200 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#include <stdio.h>
#include "coll_sync.h"
#include "mpi.h"
#include "orte/util/show_help.h"
#include "orte/util/proc_info.h"
#include "ompi/constants.h"
#include "ompi/communicator/communicator.h"
#include "ompi/mca/coll/coll.h"
#include "ompi/mca/coll/base/base.h"
#include "coll_sync.h"
static void mca_coll_sync_module_construct(mca_coll_sync_module_t *module)
{
memset(&(module->c_coll), 0, sizeof(module->c_coll));
module->before_num_operations = 0;
module->after_num_operations = 0;
module->in_operation = false;
}
static void mca_coll_sync_module_destruct(mca_coll_sync_module_t *module)
{
OBJ_RELEASE(module->c_coll.coll_bcast_module);
OBJ_RELEASE(module->c_coll.coll_gather_module);
OBJ_RELEASE(module->c_coll.coll_gatherv_module);
OBJ_RELEASE(module->c_coll.coll_reduce_module);
OBJ_RELEASE(module->c_coll.coll_reduce_scatter_module);
OBJ_RELEASE(module->c_coll.coll_scatter_module);
OBJ_RELEASE(module->c_coll.coll_scatterv_module);
/* If the exscan module is not NULL, then this was an
intracommunicator, and therefore scan will have a module as
well. */
if (NULL != module->c_coll.coll_exscan_module) {
OBJ_RELEASE(module->c_coll.coll_exscan_module);
OBJ_RELEASE(module->c_coll.coll_scan_module);
}
}
OBJ_CLASS_INSTANCE(mca_coll_sync_module_t, mca_coll_base_module_t,
mca_coll_sync_module_construct,
mca_coll_sync_module_destruct);
/*
* Initial query function that is invoked during MPI_INIT, allowing
* this component to disqualify itself if it doesn't support the
* required level of thread support.
*/
int mca_coll_sync_init_query(bool enable_progress_threads,
bool enable_mpi_threads)
{
/* Nothing to do */
return OMPI_SUCCESS;
}
/*
* Invoked when there's a new communicator that has been created.
* Look at the communicator and decide which set of functions and
* priority we want to return.
*/
mca_coll_base_module_t *
mca_coll_sync_comm_query(struct ompi_communicator_t *comm,
int *priority)
{
mca_coll_sync_module_t *sync_module;
sync_module = OBJ_NEW(mca_coll_sync_module_t);
if (NULL == sync_module) {
return NULL;
}
/* If both MCA params are 0, then disqualify us */
if (0 == mca_coll_sync_component.barrier_before_nops &&
0 == mca_coll_sync_component.barrier_after_nops) {
return NULL;
}
*priority = mca_coll_sync_component.priority;
/* Choose whether to use [intra|inter] */
sync_module->super.coll_module_enable = mca_coll_sync_module_enable;
sync_module->super.ft_event = mca_coll_sync_ft_event;
/* The "all" versions are already synchronous. So no need for an
additional barrier there. */
sync_module->super.coll_allgather = NULL;
sync_module->super.coll_allgatherv = NULL;
sync_module->super.coll_allreduce = NULL;
sync_module->super.coll_alltoall = NULL;
sync_module->super.coll_alltoallv = NULL;
sync_module->super.coll_alltoallw = NULL;
sync_module->super.coll_barrier = NULL;
sync_module->super.coll_bcast = mca_coll_sync_bcast;
sync_module->super.coll_exscan = mca_coll_sync_exscan;
sync_module->super.coll_gather = mca_coll_sync_gather;
sync_module->super.coll_gatherv = mca_coll_sync_gatherv;
sync_module->super.coll_reduce = mca_coll_sync_reduce;
sync_module->super.coll_reduce_scatter = mca_coll_sync_reduce_scatter;
sync_module->super.coll_scan = mca_coll_sync_scan;
sync_module->super.coll_scatter = mca_coll_sync_scatter;
sync_module->super.coll_scatterv = mca_coll_sync_scatterv;
return &(sync_module->super);
}
/*
* Init module on the communicator
*/
int mca_coll_sync_module_enable(mca_coll_base_module_t *module,
struct ompi_communicator_t *comm)
{
bool good = true;
char *msg = NULL;
mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module;
/* Save the prior layer of coll functions */
s->c_coll = comm->c_coll;
#define CHECK_AND_RETAIN(name) \
if (NULL == s->c_coll.coll_ ## name ## _module) { \
good = false; \
msg = #name; \
} else if (good) { \
OBJ_RETAIN(s->c_coll.coll_ ## name ## _module); \
}
CHECK_AND_RETAIN(bcast);
CHECK_AND_RETAIN(gather);
CHECK_AND_RETAIN(gatherv);
CHECK_AND_RETAIN(reduce);
CHECK_AND_RETAIN(reduce_scatter);
CHECK_AND_RETAIN(scatter);
CHECK_AND_RETAIN(scatterv);
if (!OMPI_COMM_IS_INTER(comm)) {
/* MPI does not define scan/exscan on intercommunicators */
CHECK_AND_RETAIN(exscan);
CHECK_AND_RETAIN(scan);
}
/* All done */
if (good) {
return OMPI_SUCCESS;
} else {
orte_show_help("help-coll-sync.txt", "missing collective", true,
orte_process_info.nodename,
mca_coll_sync_component.priority, msg);
return OMPI_ERR_NOT_FOUND;
}
}
int mca_coll_sync_ft_event(int state)
{
if (OPAL_CRS_CHECKPOINT == state) {
;
}
else if (OPAL_CRS_CONTINUE == state) {
;
}
else if (OPAL_CRS_RESTART == state) {
;
}
else if (OPAL_CRS_TERM == state ) {
;
}
else {
;
}
return OMPI_SUCCESS;
}

47
ompi/mca/coll/sync/coll_sync_reduce.c Обычный файл
Просмотреть файл

@ -0,0 +1,47 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "coll_sync.h"
/*
* reduce
*
* Function: - reduce
* Accepts: - same as MPI_Reduce()
* Returns: - MPI_SUCCESS or error code
*/
int mca_coll_sync_reduce(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
int root, struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module;
if (s->in_operation) {
return s->c_coll.coll_reduce(sbuf, rbuf, count, dtype, op, root, comm,
s->c_coll.coll_reduce_module);
} else {
COLL_SYNC(s, s->c_coll.coll_reduce(sbuf, rbuf, count, dtype,
op, root, comm,
s->c_coll.coll_reduce_module));
}
}

Просмотреть файл

@ -0,0 +1,50 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "coll_sync.h"
/*
* reduce_scatter
*
* Function: - reduce then scatter
* Accepts: - same as MPI_Reduce_scatter()
* Returns: - MPI_SUCCESS or error code
*/
int mca_coll_sync_reduce_scatter(const void *sbuf, void *rbuf, const int *rcounts,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module;
if (s->in_operation) {
return s->c_coll.coll_reduce_scatter(sbuf, rbuf, rcounts,
dtype, op, comm,
s->c_coll.coll_reduce_scatter_module);
} else {
COLL_SYNC(s, s->c_coll.coll_reduce_scatter(sbuf, rbuf, rcounts,
dtype, op, comm,
s->c_coll.coll_reduce_scatter_module));
}
}

46
ompi/mca/coll/sync/coll_sync_scan.c Обычный файл
Просмотреть файл

@ -0,0 +1,46 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "coll_sync.h"
/*
* scan
*
* Function: - scan
* Accepts: - same arguments as MPI_Scan()
* Returns: - MPI_SUCCESS or error code
*/
int mca_coll_sync_scan(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module;
if (s->in_operation) {
return s->c_coll.coll_scan(sbuf, rbuf, count, dtype, op, comm,
s->c_coll.coll_scan_module);
} else {
COLL_SYNC(s, s->c_coll.coll_scan(sbuf, rbuf, count, dtype, op, comm,
s->c_coll.coll_scan_module));
}
}

50
ompi/mca/coll/sync/coll_sync_scatter.c Обычный файл
Просмотреть файл

@ -0,0 +1,50 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "coll_sync.h"
/*
* scatter
*
* Function: - scatter
* Accepts: - same arguments as MPI_Scatter()
* Returns: - MPI_SUCCESS or error code
*/
int mca_coll_sync_scatter(const void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void *rbuf, int rcount,
struct ompi_datatype_t *rdtype,
int root, struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module;
if (s->in_operation) {
return s->c_coll.coll_scatter(sbuf, scount, sdtype,
rbuf, rcount, rdtype, root, comm,
s->c_coll.coll_scatter_module);
} else {
COLL_SYNC(s, s->c_coll.coll_scatter(sbuf, scount, sdtype,
rbuf, rcount, rdtype, root, comm,
s->c_coll.coll_scatter_module));
}
}

50
ompi/mca/coll/sync/coll_sync_scatterv.c Обычный файл
Просмотреть файл

@ -0,0 +1,50 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "coll_sync.h"
/*
* scatterv
*
* Function: - scatterv
* Accepts: - same arguments as MPI_Scatterv()
* Returns: - MPI_SUCCESS or error code
*/
int mca_coll_sync_scatterv(const void *sbuf, const int *scounts,
const int *disps, struct ompi_datatype_t *sdtype,
void *rbuf, int rcount,
struct ompi_datatype_t *rdtype, int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_sync_module_t *s = (mca_coll_sync_module_t*) module;
if (s->in_operation) {
return s->c_coll.coll_scatterv(sbuf, scounts, disps, sdtype,
rbuf, rcount, rdtype, root, comm,
s->c_coll.coll_scatterv_module);
} else {
COLL_SYNC(s, s->c_coll.coll_scatterv(sbuf, scounts, disps, sdtype,
rbuf, rcount, rdtype, root, comm,
s->c_coll.coll_scatterv_module));
}
}

22
ompi/mca/coll/sync/help-coll-sync.txt Обычный файл
Просмотреть файл

@ -0,0 +1,22 @@
# -*- text -*-
#
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the US/English general help file for Open MPI's sync
# collective component.
#
[missing collective]
The sync collective component in Open MPI was activated on a
communicator where it did not find an underlying collective operation
defined. This usually means that the sync collective module's
priority was not set high enough. Please try increasing sync's
priority.
Local host: %s
Sync coll module priority: %d
First discovered missing collective: %s

7
ompi/mca/coll/sync/owner.txt Обычный файл
Просмотреть файл

@ -0,0 +1,7 @@
#
# owner/status file
# owner: institution that is responsible for this package
# status: e.g. active, maintenance, unmaintained
#
owner: Intel
status: maintenance

100
ompi/mca/common/ompio/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,100 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2007 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2008-2016 University of Houston. All rights reserved.
#
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
if OMPI_PROVIDE_MPI_FILE_INTERFACE
headers = \
common_ompio_print_queue.h \
common_ompio.h
sources = \
common_ompio_print_queue.c \
common_ompio_file_open.c \
common_ompio_file_view.c \
common_ompio_file_read.c \
common_ompio_file_write.c
# To simplify components that link to this library, we will *always*
# have an output libtool library named libmca_<type>_<name>.la -- even
# for case 2) described above (i.e., so there's no conditional logic
# necessary in component Makefile.am's that link to this library).
# Hence, if we're creating a noinst version of this library (i.e.,
# case 2), we sym link it to the libmca_<type>_<name>.la name
# (libtool will do the Right Things under the covers). See the
# all-local and clean-local rules, below, for how this is effected.
lib_LTLIBRARIES =
noinst_LTLIBRARIES =
comp_inst = lib@OPAL_LIB_PREFIX@mca_common_ompio.la
comp_noinst = lib@OPAL_LIB_PREFIX@mca_common_ompio_noinst.la
if MCA_BUILD_ompi_common_ompio_DSO
lib_LTLIBRARIES += $(comp_inst)
else
noinst_LTLIBRARIES += $(comp_noinst)
endif
lib@OPAL_LIB_PREFIX@mca_common_ompio_la_SOURCES = $(headers) $(sources)
lib@OPAL_LIB_PREFIX@mca_common_ompio_la_CPPFLAGS = $(common_ompio_CPPFLAGS)
lib@OPAL_LIB_PREFIX@mca_common_ompio_la_LDFLAGS = \
-version-info $(libmca_ompi_common_ompio_so_version) \
$(common_ompio_LDFLAGS)
lib@OPAL_LIB_PREFIX@mca_common_ompio_la_LIBADD = $(common_ompio_LIBS)
lib@OPAL_LIB_PREFIX@mca_common_ompio_noinst_la_SOURCES = $(headers) $(sources)
# Conditionally install the header files
if WANT_INSTALL_HEADERS
ompidir = $(ompiincludedir)/ompi/mca/common/ompio
ompi_HEADERS = $(headers)
else
ompidir = $(includedir)
endif
# These two rules will sym link the "noinst" libtool library filename
# to the installable libtool library filename in the case where we are
# compiling this component statically (case 2), described above).
V=0
OMPI_V_LN_SCOMP = $(ompi__v_LN_SCOMP_$V)
ompi__v_LN_SCOMP_ = $(ompi__v_LN_SCOMP_$AM_DEFAULT_VERBOSITY)
ompi__v_LN_SCOMP_0 = @echo " LN_S " `basename $(comp_inst)`;
all-local:
$(OMPI_V_LN_SCOMP) if test -z "$(lib_LTLIBRARIES)"; then \
rm -f "$(comp_inst)"; \
$(LN_S) "$(comp_noinst)" "$(comp_inst)"; \
fi
clean-local:
if test -z "$(lib_LTLIBRARIES)"; then \
rm -f "$(comp_inst)"; \
fi
else
# Need to have empty targets because AM can't handle having an
# AM_CONDITIONAL was targets in the "if" statement but not in the
# "else". :-(
all-local:
clean-local:
endif

91
ompi/mca/common/ompio/common_ompio.h Обычный файл
Просмотреть файл

@ -0,0 +1,91 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2007 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008-2016 University of Houston. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_COMMON_OMPIO_H
#define MCA_COMMON_OMPIO_H
#include "ompi/mca/common/ompio/common_ompio_print_queue.h"
#include "ompi/mca/io/ompio/io_ompio.h"
OMPI_DECLSPEC int mca_common_ompio_file_write (mca_io_ompio_file_t *fh, const void *buf, int count,
struct ompi_datatype_t *datatype,
ompi_status_public_t *status);
OMPI_DECLSPEC int mca_common_ompio_file_write_at (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, const void *buf,
int count, struct ompi_datatype_t *datatype,
ompi_status_public_t *status);
OMPI_DECLSPEC int mca_common_ompio_file_iwrite (mca_io_ompio_file_t *fh, const void *buf, int count,
struct ompi_datatype_t *datatype, ompi_request_t **request);
OMPI_DECLSPEC int mca_common_ompio_file_iwrite_at (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset,
const void *buf, int count, struct ompi_datatype_t *datatype,
ompi_request_t **request);
OMPI_DECLSPEC int mca_common_ompio_file_write_at_all (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, const void *buf,
int count, struct ompi_datatype_t *datatype,
ompi_status_public_t *status);
OMPI_DECLSPEC int mca_common_ompio_file_iwrite_at_all (mca_io_ompio_file_t *fp, OMPI_MPI_OFFSET_TYPE offset, const void *buf,
int count, struct ompi_datatype_t *datatype, ompi_request_t **request);
OMPI_DECLSPEC int mca_common_ompio_build_io_array ( mca_io_ompio_file_t *fh, int index, int cycles,
size_t bytes_per_cycle, int max_data, uint32_t iov_count,
struct iovec *decoded_iov, int *ii, int *jj, size_t *tbw );
OMPI_DECLSPEC int mca_common_ompio_file_read (mca_io_ompio_file_t *fh, void *buf, int count,
struct ompi_datatype_t *datatype, ompi_status_public_t *status);
OMPI_DECLSPEC int mca_common_ompio_file_read_at (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, void *buf,
int count, struct ompi_datatype_t *datatype,
ompi_status_public_t * status);
OMPI_DECLSPEC int mca_common_ompio_file_iread (mca_io_ompio_file_t *fh, void *buf, int count,
struct ompi_datatype_t *datatype, ompi_request_t **request);
OMPI_DECLSPEC int mca_common_ompio_file_iread_at (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset,
void *buf, int count, struct ompi_datatype_t *datatype,
ompi_request_t **request);
OMPI_DECLSPEC int mca_common_ompio_file_read_at_all (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset,
void *buf, int count, struct ompi_datatype_t *datatype,
ompi_status_public_t * status);
OMPI_DECLSPEC int mca_common_ompio_file_iread_at_all (mca_io_ompio_file_t *fp, OMPI_MPI_OFFSET_TYPE offset,
void *buf, int count, struct ompi_datatype_t *datatype,
ompi_request_t **request);
OMPI_DECLSPEC int mca_common_ompio_file_open (ompi_communicator_t *comm, const char *filename,
int amode, ompi_info_t *info,
mca_io_ompio_file_t *ompio_fh, bool use_sharedfp);
OMPI_DECLSPEC int mca_common_ompio_file_close (mca_io_ompio_file_t *ompio_fh);
OMPI_DECLSPEC int mca_common_ompio_file_get_size (mca_io_ompio_file_t *ompio_fh, OMPI_MPI_OFFSET_TYPE *size);
OMPI_DECLSPEC int mca_common_ompio_file_get_position (mca_io_ompio_file_t *fh,OMPI_MPI_OFFSET_TYPE *offset);
OMPI_DECLSPEC int mca_common_ompio_set_explicit_offset (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset);
OMPI_DECLSPEC int mca_common_ompio_set_file_defaults (mca_io_ompio_file_t *fh);
OMPI_DECLSPEC int mca_common_ompio_set_view (mca_io_ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE disp,
ompi_datatype_t *etype, ompi_datatype_t *filetype, const char *datarep,
ompi_info_t *info);
#endif /* MCA_COMMON_OMPIO_H */

Просмотреть файл

@ -0,0 +1,459 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008-2016 University of Houston. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/communicator/communicator.h"
#include "ompi/info/info.h"
#include "ompi/file/file.h"
#include "ompi/mca/io/base/base.h"
#include "ompi/mca/fs/fs.h"
#include "ompi/mca/fs/base/base.h"
#include "ompi/mca/fcoll/fcoll.h"
#include "ompi/mca/fcoll/base/base.h"
#include "ompi/mca/fbtl/fbtl.h"
#include "ompi/mca/fbtl/base/base.h"
#include "ompi/mca/sharedfp/sharedfp.h"
#include "ompi/mca/sharedfp/base/base.h"
#include <unistd.h>
#include <math.h>
#include "common_ompio.h"
#include "ompi/mca/topo/topo.h"
int mca_common_ompio_file_open (ompi_communicator_t *comm,
const char *filename,
int amode,
ompi_info_t *info,
mca_io_ompio_file_t *ompio_fh, bool use_sharedfp)
{
int ret = OMPI_SUCCESS;
int remote_arch;
ompio_fh->f_iov_type = MPI_DATATYPE_NULL;
ompio_fh->f_comm = MPI_COMM_NULL;
if ( ((amode&MPI_MODE_RDONLY)?1:0) + ((amode&MPI_MODE_RDWR)?1:0) +
((amode&MPI_MODE_WRONLY)?1:0) != 1 ) {
return MPI_ERR_AMODE;
}
if ((amode & MPI_MODE_RDONLY) &&
((amode & MPI_MODE_CREATE) || (amode & MPI_MODE_EXCL))) {
return MPI_ERR_AMODE;
}
if ((amode & MPI_MODE_RDWR) && (amode & MPI_MODE_SEQUENTIAL)) {
return MPI_ERR_AMODE;
}
ompio_fh->f_rank = ompi_comm_rank (comm);
ompio_fh->f_size = ompi_comm_size (comm);
remote_arch = opal_local_arch;
ompio_fh->f_convertor = opal_convertor_create (remote_arch, 0);
if ( true == use_sharedfp ) {
ret = ompi_comm_dup (comm, &ompio_fh->f_comm);
if ( OMPI_SUCCESS != ret ) {
goto fn_fail;
}
}
else {
/* No need to duplicate the communicator if the file_open is called
from the sharedfp component, since the comm used as an input
is already a dup of the user level comm. */
ompio_fh->f_flags |= OMPIO_SHAREDFP_IS_SET;
ompio_fh->f_comm = comm;
}
ompio_fh->f_fstype = NONE;
ompio_fh->f_amode = amode;
ompio_fh->f_info = info;
ompio_fh->f_atomicity = 0;
mca_common_ompio_set_file_defaults (ompio_fh);
ompio_fh->f_filename = filename;
ompio_fh->f_split_coll_req = NULL;
ompio_fh->f_split_coll_in_use = false;
/*Initialize the print_queues queues here!*/
mca_common_ompio_initialize_print_queue(&ompio_fh->f_coll_write_time);
mca_common_ompio_initialize_print_queue(&ompio_fh->f_coll_read_time);
/* set some function pointers required for fcoll, fbtls and sharedfp modules*/
ompio_fh->f_decode_datatype=ompi_io_ompio_decode_datatype;
ompio_fh->f_generate_current_file_view=ompi_io_ompio_generate_current_file_view;
ompio_fh->f_get_num_aggregators=mca_io_ompio_get_num_aggregators;
ompio_fh->f_get_bytes_per_agg=mca_io_ompio_get_bytes_per_agg;
ompio_fh->f_set_aggregator_props=mca_io_ompio_set_aggregator_props;
/* This fix is needed for data seiving to work with
two-phase collective I/O */
if ((amode & MPI_MODE_WRONLY)){
amode -= MPI_MODE_WRONLY;
amode += MPI_MODE_RDWR;
}
/*--------------------------------------------------*/
if (OMPI_SUCCESS != (ret = mca_fs_base_file_select (ompio_fh,
NULL))) {
opal_output(1, "mca_fs_base_file_select() failed\n");
goto fn_fail;
}
if (OMPI_SUCCESS != (ret = mca_fbtl_base_file_select (ompio_fh,
NULL))) {
opal_output(1, "mca_fbtl_base_file_select() failed\n");
goto fn_fail;
}
if (OMPI_SUCCESS != (ret = mca_fcoll_base_file_select (ompio_fh,
NULL))) {
opal_output(1, "mca_fcoll_base_file_select() failed\n");
goto fn_fail;
}
ompio_fh->f_sharedfp_component = NULL; /*component*/
ompio_fh->f_sharedfp = NULL; /*module*/
ompio_fh->f_sharedfp_data = NULL; /*data*/
if ( true == use_sharedfp ) {
if (OMPI_SUCCESS != (ret = mca_sharedfp_base_file_select (ompio_fh, NULL))) {
opal_output ( ompi_io_base_framework.framework_output,
"mca_sharedfp_base_file_select() failed\n");
ompio_fh->f_sharedfp = NULL; /*module*/
/* Its ok to not have a shared file pointer module as long as the shared file
** pointer operations are not used. However, the first call to any file_read/write_shared
** function will return an error code.
*/
}
/* open the file once more for the shared file pointer if required.
** Per default, the shared file pointer specific actions are however
** only performed on first access of the shared file pointer, except
** for the addproc sharedfp component.
**
** Lazy open does not work for the addproc sharedfp
** component since it starts by spawning a process using MPI_Comm_spawn.
** For this, the first operation has to be collective which we can
** not guarantuee outside of the MPI_File_open operation.
*/
if ( NULL != ompio_fh->f_sharedfp &&
true == use_sharedfp &&
(!mca_io_ompio_sharedfp_lazy_open ||
!strcmp (ompio_fh->f_sharedfp_component->mca_component_name,
"addproc") )) {
ret = ompio_fh->f_sharedfp->sharedfp_file_open(comm,
filename,
amode,
info,
ompio_fh);
if ( OMPI_SUCCESS != ret ) {
goto fn_fail;
}
}
}
/*Determine topology information if set*/
if (ompio_fh->f_comm->c_flags & OMPI_COMM_CART){
ret = mca_io_ompio_cart_based_grouping(ompio_fh);
if(OMPI_SUCCESS != ret ){
ret = MPI_ERR_FILE;
}
}
ret = ompio_fh->f_fs->fs_file_open (comm,
filename,
amode,
info,
ompio_fh);
if ( OMPI_SUCCESS != ret ) {
ret = MPI_ERR_FILE;
goto fn_fail;
}
/* If file has been opened in the append mode, move the internal
file pointer of OMPIO to the very end of the file. */
if ( ompio_fh->f_amode & MPI_MODE_APPEND ) {
OMPI_MPI_OFFSET_TYPE current_size;
ompio_fh->f_fs->fs_file_get_size( ompio_fh,
&current_size);
mca_common_ompio_set_explicit_offset (ompio_fh, current_size);
}
return OMPI_SUCCESS;
fn_fail:
/* no need to free resources here, since the destructor
* is calling mca_io_ompio_file_close, which actually gets
*rid of all allocated memory items */
return ret;
}
int mca_common_ompio_file_close (mca_io_ompio_file_t *ompio_fh)
{
int ret = OMPI_SUCCESS;
int delete_flag = 0;
char name[256];
ret = ompio_fh->f_comm->c_coll.coll_barrier ( ompio_fh->f_comm, ompio_fh->f_comm->c_coll.coll_barrier_module);
if ( OMPI_SUCCESS != ret ) {
/* Not sure what to do */
opal_output (1,"mca_common_ompio_file_close: error in Barrier \n");
return ret;
}
if(mca_io_ompio_coll_timing_info){
strcpy (name, "WRITE");
if (!mca_common_ompio_empty_print_queue(ompio_fh->f_coll_write_time)){
ret = mca_common_ompio_print_time_info(ompio_fh->f_coll_write_time,
name,
ompio_fh);
if (OMPI_SUCCESS != ret){
printf("Error in print_time_info ");
}
}
strcpy (name, "READ");
if (!mca_common_ompio_empty_print_queue(ompio_fh->f_coll_read_time)){
ret = mca_common_ompio_print_time_info(ompio_fh->f_coll_read_time,
name,
ompio_fh);
if (OMPI_SUCCESS != ret){
printf("Error in print_time_info ");
}
}
}
if ( ompio_fh->f_amode & MPI_MODE_DELETE_ON_CLOSE ) {
delete_flag = 1;
}
/*close the sharedfp file*/
if( NULL != ompio_fh->f_sharedfp ){
ret = ompio_fh->f_sharedfp->sharedfp_file_close(ompio_fh);
}
if ( NULL != ompio_fh->f_fs ) {
/* The pointer might not be set if file_close() is
** called from the file destructor in case of an error
** during file_open()
*/
ret = ompio_fh->f_fs->fs_file_close (ompio_fh);
}
if ( delete_flag && 0 == ompio_fh->f_rank ) {
mca_io_ompio_file_delete ( ompio_fh->f_filename, MPI_INFO_NULL );
}
if ( NULL != ompio_fh->f_fs ) {
mca_fs_base_file_unselect (ompio_fh);
}
if ( NULL != ompio_fh->f_fbtl ) {
mca_fbtl_base_file_unselect (ompio_fh);
}
if ( NULL != ompio_fh->f_fcoll ) {
mca_fcoll_base_file_unselect (ompio_fh);
}
if ( NULL != ompio_fh->f_sharedfp) {
mca_sharedfp_base_file_unselect (ompio_fh);
}
if (NULL != ompio_fh->f_io_array) {
free (ompio_fh->f_io_array);
ompio_fh->f_io_array = NULL;
}
if (NULL != ompio_fh->f_init_procs_in_group) {
free (ompio_fh->f_init_procs_in_group);
ompio_fh->f_init_procs_in_group = NULL;
}
if (NULL != ompio_fh->f_procs_in_group) {
free (ompio_fh->f_procs_in_group);
ompio_fh->f_procs_in_group = NULL;
}
if (NULL != ompio_fh->f_decoded_iov) {
free (ompio_fh->f_decoded_iov);
ompio_fh->f_decoded_iov = NULL;
}
if (NULL != ompio_fh->f_convertor) {
free (ompio_fh->f_convertor);
ompio_fh->f_convertor = NULL;
}
if (NULL != ompio_fh->f_datarep) {
free (ompio_fh->f_datarep);
ompio_fh->f_datarep = NULL;
}
if ( NULL != ompio_fh->f_coll_write_time ) {
free ( ompio_fh->f_coll_write_time );
ompio_fh->f_coll_write_time = NULL;
}
if ( NULL != ompio_fh->f_coll_read_time ) {
free ( ompio_fh->f_coll_read_time );
ompio_fh->f_coll_read_time = NULL;
}
if (MPI_DATATYPE_NULL != ompio_fh->f_iov_type) {
ompi_datatype_destroy (&ompio_fh->f_iov_type);
}
if ( MPI_DATATYPE_NULL != ompio_fh->f_etype ) {
ompi_datatype_destroy (&ompio_fh->f_etype);
}
if ( MPI_DATATYPE_NULL != ompio_fh->f_filetype ){
ompi_datatype_destroy (&ompio_fh->f_filetype);
}
if ( MPI_DATATYPE_NULL != ompio_fh->f_orig_filetype ){
ompi_datatype_destroy (&ompio_fh->f_orig_filetype);
}
if (MPI_COMM_NULL != ompio_fh->f_comm && (ompio_fh->f_flags & OMPIO_SHAREDFP_IS_SET) ) {
ompi_comm_free (&ompio_fh->f_comm);
}
return ret;
}
int mca_common_ompio_file_get_size (mca_io_ompio_file_t *ompio_fh,
OMPI_MPI_OFFSET_TYPE *size)
{
int ret = OMPI_SUCCESS;
ret = ompio_fh->f_fs->fs_file_get_size (ompio_fh, size);
return ret;
}
int mca_common_ompio_file_get_position (mca_io_ompio_file_t *fh,
OMPI_MPI_OFFSET_TYPE *offset)
{
OMPI_MPI_OFFSET_TYPE off;
/* No. of copies of the entire file view */
off = (fh->f_offset - fh->f_disp)/fh->f_view_extent;
/* No. of elements per view */
off *= (fh->f_view_size / fh->f_etype_size);
/* No of elements used in the current copy of the view */
off += fh->f_total_bytes / fh->f_etype_size;
*offset = off;
return OMPI_SUCCESS;
}
int mca_common_ompio_set_file_defaults (mca_io_ompio_file_t *fh)
{
if (NULL != fh) {
ompi_datatype_t *types[2];
int blocklen[2] = {1, 1};
OPAL_PTRDIFF_TYPE d[2], base;
int i;
fh->f_io_array = NULL;
fh->f_perm = OMPIO_PERM_NULL;
fh->f_flags = 0;
fh->f_bytes_per_agg = mca_io_ompio_bytes_per_agg;
fh->f_datarep = strdup ("native");
fh->f_offset = 0;
fh->f_disp = 0;
fh->f_position_in_file_view = 0;
fh->f_index_in_file_view = 0;
fh->f_total_bytes = 0;
fh->f_init_procs_per_group = -1;
fh->f_init_procs_in_group = NULL;
fh->f_procs_per_group = -1;
fh->f_procs_in_group = NULL;
fh->f_init_num_aggrs = -1;
fh->f_init_aggr_list = NULL;
/* Default file View */
fh->f_iov_type = MPI_DATATYPE_NULL;
fh->f_stripe_size = mca_io_ompio_bytes_per_agg;
/*Decoded iovec of the file-view*/
fh->f_decoded_iov = NULL;
fh->f_etype = NULL;
fh->f_filetype = NULL;
fh->f_orig_filetype = NULL;
mca_common_ompio_set_view(fh,
0,
&ompi_mpi_byte.dt,
&ompi_mpi_byte.dt,
"native",
fh->f_info);
/*Create a derived datatype for the created iovec */
types[0] = &ompi_mpi_long.dt;
types[1] = &ompi_mpi_long.dt;
d[0] = (OPAL_PTRDIFF_TYPE) fh->f_decoded_iov;
d[1] = (OPAL_PTRDIFF_TYPE) &fh->f_decoded_iov[0].iov_len;
base = d[0];
for (i=0 ; i<2 ; i++) {
d[i] -= base;
}
ompi_datatype_create_struct (2,
blocklen,
d,
types,
&fh->f_iov_type);
ompi_datatype_commit (&fh->f_iov_type);
return OMPI_SUCCESS;
}
else {
return OMPI_ERROR;
}
}

Просмотреть файл

@ -0,0 +1,388 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2016 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008-2016 University of Houston. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/communicator/communicator.h"
#include "ompi/info/info.h"
#include "ompi/file/file.h"
#include "ompi/mca/fs/fs.h"
#include "ompi/mca/fs/base/base.h"
#include "ompi/mca/fcoll/fcoll.h"
#include "ompi/mca/fcoll/base/base.h"
#include "ompi/mca/fbtl/fbtl.h"
#include "ompi/mca/fbtl/base/base.h"
#include "common_ompio.h"
#include "ompi/mca/io/ompio/io_ompio.h"
#include "ompi/mca/io/ompio/io_ompio_request.h"
#include "math.h"
#include <unistd.h>
/* Read and write routines are split into two interfaces.
** The
** mca_io_ompio_file_read/write[_at]
**
** routines are the ones registered with the ompio modules.
** The
**
** mca_common_ompio_file_read/write[_at]
**
** routesin are used e.g. from the shared file pointer modules.
** The main difference is, that the first one takes an ompi_file_t
** as a file pointer argument, while the second uses the ompio internal
** mca_io_ompio_file_t structure.
*/
int mca_common_ompio_file_read (mca_io_ompio_file_t *fh,
void *buf,
int count,
struct ompi_datatype_t *datatype,
ompi_status_public_t *status)
{
int ret = OMPI_SUCCESS;
size_t total_bytes_read = 0; /* total bytes that have been read*/
size_t bytes_per_cycle = 0; /* total read in each cycle by each process*/
int index = 0;
int cycles = 0;
uint32_t iov_count = 0;
struct iovec *decoded_iov = NULL;
size_t max_data=0, real_bytes_read=0;
ssize_t ret_code=0;
int i = 0; /* index into the decoded iovec of the buffer */
int j = 0; /* index into the file vie iovec */
if ( 0 == count ) {
if ( MPI_STATUS_IGNORE != status ) {
status->_ucount = 0;
}
return ret;
}
if (fh->f_amode & MPI_MODE_WRONLY){
printf("Improper use of FILE Mode, Using WRONLY for Read!\n");
ret = OMPI_ERROR;
return ret;
}
ompi_io_ompio_decode_datatype (fh,
datatype,
count,
buf,
&max_data,
&decoded_iov,
&iov_count);
if ( -1 == mca_io_ompio_cycle_buffer_size ) {
bytes_per_cycle = max_data;
}
else {
bytes_per_cycle = mca_io_ompio_cycle_buffer_size;
}
cycles = ceil((float)max_data/bytes_per_cycle);
#if 0
printf ("Bytes per Cycle: %d Cycles: %d max_data:%d \n",bytes_per_cycle, cycles, max_data);
#endif
j = fh->f_index_in_file_view;
for (index = 0; index < cycles; index++) {
mca_common_ompio_build_io_array ( fh,
index,
cycles,
bytes_per_cycle,
max_data,
iov_count,
decoded_iov,
&i,
&j,
&total_bytes_read);
if (fh->f_num_of_io_entries) {
ret_code = fh->f_fbtl->fbtl_preadv (fh);
if ( 0<= ret_code ) {
real_bytes_read+=(size_t)ret_code;
}
}
fh->f_num_of_io_entries = 0;
if (NULL != fh->f_io_array) {
free (fh->f_io_array);
fh->f_io_array = NULL;
}
}
if (NULL != decoded_iov) {
free (decoded_iov);
decoded_iov = NULL;
}
if ( MPI_STATUS_IGNORE != status ) {
status->_ucount = real_bytes_read;
}
return ret;
}
int mca_common_ompio_file_read_at (mca_io_ompio_file_t *fh,
OMPI_MPI_OFFSET_TYPE offset,
void *buf,
int count,
struct ompi_datatype_t *datatype,
ompi_status_public_t * status)
{
int ret = OMPI_SUCCESS;
OMPI_MPI_OFFSET_TYPE prev_offset;
mca_common_ompio_file_get_position (fh, &prev_offset );
mca_common_ompio_set_explicit_offset (fh, offset);
ret = mca_common_ompio_file_read (fh,
buf,
count,
datatype,
status);
// An explicit offset file operation is not suppsed to modify
// the internal file pointer. So reset the pointer
// to the previous value
mca_common_ompio_set_explicit_offset (fh, prev_offset);
return ret;
}
int mca_common_ompio_file_iread (mca_io_ompio_file_t *fh,
void *buf,
int count,
struct ompi_datatype_t *datatype,
ompi_request_t **request)
{
int ret = OMPI_SUCCESS;
mca_ompio_request_t *ompio_req=NULL;
ompio_req = OBJ_NEW(mca_ompio_request_t);
ompio_req->req_type = MCA_OMPIO_REQUEST_READ;
ompio_req->req_ompi.req_state = OMPI_REQUEST_ACTIVE;
if ( 0 == count ) {
ompio_req->req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS;
ompio_req->req_ompi.req_status._ucount = 0;
ompi_request_complete (&ompio_req->req_ompi, false);
*request = (ompi_request_t *) ompio_req;
return OMPI_SUCCESS;
}
if ( NULL != fh->f_fbtl->fbtl_ipreadv ) {
// This fbtl has support for non-blocking operations
size_t total_bytes_read = 0; /* total bytes that have been read*/
uint32_t iov_count = 0;
struct iovec *decoded_iov = NULL;
size_t max_data = 0;
int i = 0; /* index into the decoded iovec of the buffer */
int j = 0; /* index into the file vie iovec */
ompi_io_ompio_decode_datatype (fh,
datatype,
count,
buf,
&max_data,
&decoded_iov,
&iov_count);
// Non-blocking operations have to occur in a single cycle
j = fh->f_index_in_file_view;
mca_common_ompio_build_io_array ( fh,
0, // index
1, // no. of cyces
max_data, // setting bytes per cycle to match data
max_data,
iov_count,
decoded_iov,
&i,
&j,
&total_bytes_read);
if (fh->f_num_of_io_entries) {
fh->f_fbtl->fbtl_ipreadv (fh, (ompi_request_t *) ompio_req);
}
if ( false == mca_io_ompio_progress_is_registered ) {
// Lazy initialization of progress function to minimize impact
// on other ompi functionality in case its not used.
opal_progress_register (mca_io_ompio_component_progress);
mca_io_ompio_progress_is_registered=true;
}
fh->f_num_of_io_entries = 0;
if (NULL != fh->f_io_array) {
free (fh->f_io_array);
fh->f_io_array = NULL;
}
if (NULL != decoded_iov) {
free (decoded_iov);
decoded_iov = NULL;
}
}
else {
// This fbtl does not support non-blocking operations
ompi_status_public_t status;
ret = mca_common_ompio_file_read (fh, buf, count, datatype, &status);
ompio_req->req_ompi.req_status.MPI_ERROR = ret;
ompio_req->req_ompi.req_status._ucount = status._ucount;
ompi_request_complete (&ompio_req->req_ompi, false);
}
*request = (ompi_request_t *) ompio_req;
return ret;
}
int mca_common_ompio_file_iread_at (mca_io_ompio_file_t *fh,
OMPI_MPI_OFFSET_TYPE offset,
void *buf,
int count,
struct ompi_datatype_t *datatype,
ompi_request_t **request)
{
int ret = OMPI_SUCCESS;
OMPI_MPI_OFFSET_TYPE prev_offset;
mca_common_ompio_file_get_position (fh, &prev_offset );
mca_common_ompio_set_explicit_offset (fh, offset);
ret = mca_common_ompio_file_iread (fh,
buf,
count,
datatype,
request);
/* An explicit offset file operation is not suppsed to modify
** the internal file pointer. So reset the pointer
** to the previous value
** It is OK to reset the position already here, althgouth
** the operation might still be pending/ongoing, since
** the entire array of <offset, length, memaddress> have
** already been constructed in the file_iread operation
*/
mca_common_ompio_set_explicit_offset (fh, prev_offset);
return ret;
}
/* Infrastructure for collective operations */
int mca_common_ompio_file_read_at_all (mca_io_ompio_file_t *fh,
OMPI_MPI_OFFSET_TYPE offset,
void *buf,
int count,
struct ompi_datatype_t *datatype,
ompi_status_public_t * status)
{
int ret = OMPI_SUCCESS;
OMPI_MPI_OFFSET_TYPE prev_offset;
mca_common_ompio_file_get_position (fh, &prev_offset );
mca_common_ompio_set_explicit_offset (fh, offset);
ret = fh->f_fcoll->fcoll_file_read_all (fh,
buf,
count,
datatype,
status);
mca_common_ompio_set_explicit_offset (fh, prev_offset);
return ret;
}
int mca_common_ompio_file_iread_at_all (mca_io_ompio_file_t *fp,
OMPI_MPI_OFFSET_TYPE offset,
void *buf,
int count,
struct ompi_datatype_t *datatype,
ompi_request_t **request)
{
int ret = OMPI_SUCCESS;
OMPI_MPI_OFFSET_TYPE prev_offset;
mca_common_ompio_file_get_position (fp, &prev_offset );
mca_common_ompio_set_explicit_offset (fp, offset);
if ( NULL != fp->f_fcoll->fcoll_file_iread_all ) {
ret = fp->f_fcoll->fcoll_file_iread_all (fp,
buf,
count,
datatype,
request);
}
else {
/* this fcoll component does not support non-blocking
collective I/O operations. WE fake it with
individual non-blocking I/O operations. */
ret = mca_common_ompio_file_iread ( fp, buf, count, datatype, request );
}
mca_common_ompio_set_explicit_offset (fp, prev_offset);
return ret;
}
int mca_common_ompio_set_explicit_offset (mca_io_ompio_file_t *fh,
OMPI_MPI_OFFSET_TYPE offset)
{
int i = 0;
int k = 0;
if ( fh->f_view_size > 0 ) {
/* starting offset of the current copy of the filew view */
fh->f_offset = (fh->f_view_extent *
((offset*fh->f_etype_size) / fh->f_view_size)) + fh->f_disp;
/* number of bytes used within the current copy of the file view */
fh->f_total_bytes = (offset*fh->f_etype_size) % fh->f_view_size;
i = fh->f_total_bytes;
/* Initialize the block id and the starting offset of the current block
within the current copy of the file view to zero */
fh->f_index_in_file_view = 0;
fh->f_position_in_file_view = 0;
/* determine block id that the offset is located in and
the starting offset of that block */
k = fh->f_decoded_iov[fh->f_index_in_file_view].iov_len;
while (i >= k) {
fh->f_position_in_file_view = k;
fh->f_index_in_file_view++;
k += fh->f_decoded_iov[fh->f_index_in_file_view].iov_len;
}
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -0,0 +1,280 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008-2016 University of Houston. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "opal/datatype/opal_convertor.h"
#include "ompi/datatype/ompi_datatype.h"
#include <stdlib.h>
#include <stdio.h>
#include "common_ompio.h"
#include "ompi/mca/fcoll/base/base.h"
static OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (mca_io_ompio_file_t *);
static int datatype_duplicate (ompi_datatype_t *oldtype, ompi_datatype_t **newtype );
static int datatype_duplicate (ompi_datatype_t *oldtype, ompi_datatype_t **newtype )
{
ompi_datatype_t *type;
if( ompi_datatype_is_predefined(oldtype) ) {
OBJ_RETAIN(oldtype);
*newtype = oldtype;
return OMPI_SUCCESS;
}
if ( OMPI_SUCCESS != ompi_datatype_duplicate (oldtype, &type)){
ompi_datatype_destroy (&type);
return MPI_ERR_INTERN;
}
ompi_datatype_set_args( type, 0, NULL, 0, NULL, 1, &oldtype, MPI_COMBINER_DUP );
*newtype = type;
return OMPI_SUCCESS;
}
int mca_common_ompio_set_view (mca_io_ompio_file_t *fh,
OMPI_MPI_OFFSET_TYPE disp,
ompi_datatype_t *etype,
ompi_datatype_t *filetype,
const char *datarep,
ompi_info_t *info)
{
size_t max_data = 0;
int i;
int num_groups = 0;
mca_io_ompio_contg *contg_groups;
size_t ftype_size;
OPAL_PTRDIFF_TYPE ftype_extent, lb, ub;
ompi_datatype_t *newfiletype;
if ( NULL != fh->f_etype ) {
ompi_datatype_destroy (&fh->f_etype);
}
if ( NULL != fh->f_filetype ) {
ompi_datatype_destroy (&fh->f_filetype);
}
if ( NULL != fh->f_orig_filetype ) {
ompi_datatype_destroy (&fh->f_orig_filetype);
}
if (NULL != fh->f_decoded_iov) {
free (fh->f_decoded_iov);
fh->f_decoded_iov = NULL;
}
if (NULL != fh->f_datarep) {
free (fh->f_datarep);
fh->f_datarep = NULL;
}
/* Reset the flags first */
fh->f_flags = 0;
fh->f_flags |= OMPIO_FILE_VIEW_IS_SET;
fh->f_datarep = strdup (datarep);
datatype_duplicate (filetype, &fh->f_orig_filetype );
opal_datatype_get_extent(&filetype->super, &lb, &ftype_extent);
opal_datatype_type_size (&filetype->super, &ftype_size);
if ( etype == filetype &&
ompi_datatype_is_predefined (filetype ) &&
ftype_extent == (OPAL_PTRDIFF_TYPE)ftype_size ){
ompi_datatype_create_contiguous(MCA_IO_DEFAULT_FILE_VIEW_SIZE,
&ompi_mpi_byte.dt,
&newfiletype);
ompi_datatype_commit (&newfiletype);
}
else {
newfiletype = filetype;
}
fh->f_iov_count = 0;
fh->f_disp = disp;
fh->f_offset = disp;
fh->f_total_bytes = 0;
fh->f_index_in_file_view=0;
fh->f_position_in_file_view=0;
ompi_io_ompio_decode_datatype (fh,
newfiletype,
1,
NULL,
&max_data,
&fh->f_decoded_iov,
&fh->f_iov_count);
opal_datatype_get_extent(&newfiletype->super, &lb, &fh->f_view_extent);
opal_datatype_type_ub (&newfiletype->super, &ub);
opal_datatype_type_size (&etype->super, &fh->f_etype_size);
opal_datatype_type_size (&newfiletype->super, &fh->f_view_size);
datatype_duplicate (etype, &fh->f_etype);
// This file type is our own representation. The original is stored
// in orig_file type, No need to set args on this one.
ompi_datatype_duplicate (newfiletype, &fh->f_filetype);
fh->f_cc_size = get_contiguous_chunk_size (fh);
if (opal_datatype_is_contiguous_memory_layout(&etype->super,1)) {
if (opal_datatype_is_contiguous_memory_layout(&filetype->super,1) &&
fh->f_view_extent == (OPAL_PTRDIFF_TYPE)fh->f_view_size ) {
fh->f_flags |= OMPIO_CONTIGUOUS_FVIEW;
}
}
contg_groups = (mca_io_ompio_contg*) calloc ( 1, fh->f_size * sizeof(mca_io_ompio_contg));
if (NULL == contg_groups) {
opal_output (1, "OUT OF MEMORY\n");
return OMPI_ERR_OUT_OF_RESOURCE;
}
for( i = 0; i < fh->f_size; i++){
contg_groups[i].procs_in_contg_group = (int*)calloc (1,fh->f_size * sizeof(int));
if(NULL == contg_groups[i].procs_in_contg_group){
int j;
opal_output (1, "OUT OF MEMORY\n");
for(j=0; j<i; j++) {
free(contg_groups[j].procs_in_contg_group);
}
free(contg_groups);
return OMPI_ERR_OUT_OF_RESOURCE;
}
}
if ( SIMPLE != mca_io_ompio_grouping_option ) {
if( OMPI_SUCCESS != mca_io_ompio_fview_based_grouping(fh,
&num_groups,
contg_groups)){
opal_output(1, "mca_common_ompio_set_view: mca_io_ompio_fview_based_grouping failed\n");
free(contg_groups);
return OMPI_ERROR;
}
}
else {
if( OMPI_SUCCESS != mca_io_ompio_simple_grouping(fh,
&num_groups,
contg_groups)){
opal_output(1, "mca_common_ompio_set_view: mca_io_ompio_simple_grouping failed\n");
free(contg_groups);
return OMPI_ERROR;
}
}
if ( OMPI_SUCCESS != mca_io_ompio_finalize_initial_grouping(fh,
num_groups,
contg_groups) ){
opal_output(1, "mca_common_ompio_set_view: mca_io_ompio_finalize_initial_grouping failed\n");
free(contg_groups);
return OMPI_ERROR;
}
for( i = 0; i < fh->f_size; i++){
free(contg_groups[i].procs_in_contg_group);
}
free(contg_groups);
if ( etype == filetype &&
ompi_datatype_is_predefined (filetype ) &&
ftype_extent == (OPAL_PTRDIFF_TYPE)ftype_size ){
ompi_datatype_destroy ( &newfiletype );
}
if (OMPI_SUCCESS != mca_fcoll_base_file_select (fh, NULL)) {
opal_output(1, "mca_common_ompio_set_view: mca_fcoll_base_file_select() failed\n");
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
OMPI_MPI_OFFSET_TYPE get_contiguous_chunk_size (mca_io_ompio_file_t *fh)
{
int uniform = 0;
OMPI_MPI_OFFSET_TYPE avg[3] = {0,0,0};
OMPI_MPI_OFFSET_TYPE global_avg[3] = {0,0,0};
int i = 0;
/* This function does two things: first, it determines the average data chunk
** size in the file view for each process and across all processes.
** Second, it establishes whether the view across all processes is uniform.
** By definition, uniform means:
** 1. the file view of each process has the same number of contiguous sections
** 2. each section in the file view has exactly the same size
*/
for (i=0 ; i<(int)fh->f_iov_count ; i++) {
avg[0] += fh->f_decoded_iov[i].iov_len;
if (i && 0 == uniform) {
if (fh->f_decoded_iov[i].iov_len != fh->f_decoded_iov[i-1].iov_len) {
uniform = 1;
}
}
}
if ( 0 != fh->f_iov_count ) {
avg[0] = avg[0]/fh->f_iov_count;
}
avg[1] = (OMPI_MPI_OFFSET_TYPE) fh->f_iov_count;
avg[2] = (OMPI_MPI_OFFSET_TYPE) uniform;
fh->f_comm->c_coll.coll_allreduce (avg,
global_avg,
3,
OMPI_OFFSET_DATATYPE,
MPI_SUM,
fh->f_comm,
fh->f_comm->c_coll.coll_allreduce_module);
global_avg[0] = global_avg[0]/fh->f_size;
global_avg[1] = global_avg[1]/fh->f_size;
#if 0
/* Disabling the feature since we are not using it anyway. Saves us one allreduce operation. */
int global_uniform=0;
if ( global_avg[0] == avg[0] &&
global_avg[1] == avg[1] &&
0 == avg[2] &&
0 == global_avg[2] ) {
uniform = 0;
}
else {
uniform = 1;
}
/* second confirmation round to see whether all processes agree
** on having a uniform file view or not
*/
fh->f_comm->c_coll.coll_allreduce (&uniform,
&global_uniform,
1,
MPI_INT,
MPI_MAX,
fh->f_comm,
fh->f_comm->c_coll.coll_allreduce_module);
if ( 0 == global_uniform ){
/* yes, everybody agrees on having a uniform file view */
fh->f_flags |= OMPIO_UNIFORM_FVIEW;
}
#endif
return global_avg[0];
}

Просмотреть файл

@ -0,0 +1,448 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2016 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008-2016 University of Houston. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/communicator/communicator.h"
#include "ompi/info/info.h"
#include "ompi/file/file.h"
#include "ompi/mca/fcoll/fcoll.h"
#include "ompi/mca/fcoll/base/base.h"
#include "ompi/mca/fbtl/fbtl.h"
#include "ompi/mca/fbtl/base/base.h"
#include "common_ompio.h"
#include "ompi/mca/io/ompio/io_ompio.h"
#include "ompi/mca/io/ompio/io_ompio_request.h"
#include "math.h"
#include <unistd.h>
int mca_common_ompio_file_write (mca_io_ompio_file_t *fh,
const void *buf,
int count,
struct ompi_datatype_t *datatype,
ompi_status_public_t *status)
{
int ret = OMPI_SUCCESS;
int index = 0;
int cycles = 0;
uint32_t iov_count = 0;
struct iovec *decoded_iov = NULL;
size_t bytes_per_cycle=0;
size_t total_bytes_written = 0;
size_t max_data=0, real_bytes_written=0;
ssize_t ret_code=0;
int i = 0; /* index into the decoded iovec of the buffer */
int j = 0; /* index into the file view iovec */
if ( 0 == count ) {
if ( MPI_STATUS_IGNORE != status ) {
status->_ucount = 0;
}
return ret;
}
ompi_io_ompio_decode_datatype (fh,
datatype,
count,
buf,
&max_data,
&decoded_iov,
&iov_count);
if ( -1 == mca_io_ompio_cycle_buffer_size ) {
bytes_per_cycle = max_data;
}
else {
bytes_per_cycle = mca_io_ompio_cycle_buffer_size;
}
cycles = ceil((float)max_data/bytes_per_cycle);
#if 0
printf ("Bytes per Cycle: %d Cycles: %d\n", bytes_per_cycle, cycles);
#endif
j = fh->f_index_in_file_view;
for (index = 0; index < cycles; index++) {
mca_common_ompio_build_io_array ( fh,
index,
cycles,
bytes_per_cycle,
max_data,
iov_count,
decoded_iov,
&i,
&j,
&total_bytes_written);
if (fh->f_num_of_io_entries) {
ret_code =fh->f_fbtl->fbtl_pwritev (fh);
if ( 0<= ret_code ) {
real_bytes_written+= (size_t)ret_code;
}
}
fh->f_num_of_io_entries = 0;
if (NULL != fh->f_io_array) {
free (fh->f_io_array);
fh->f_io_array = NULL;
}
}
if (NULL != decoded_iov) {
free (decoded_iov);
decoded_iov = NULL;
}
if ( MPI_STATUS_IGNORE != status ) {
status->_ucount = real_bytes_written;
}
return ret;
}
int mca_common_ompio_file_write_at (mca_io_ompio_file_t *fh,
OMPI_MPI_OFFSET_TYPE offset,
const void *buf,
int count,
struct ompi_datatype_t *datatype,
ompi_status_public_t *status)
{
int ret = OMPI_SUCCESS;
OMPI_MPI_OFFSET_TYPE prev_offset;
mca_common_ompio_file_get_position (fh, &prev_offset );
mca_common_ompio_set_explicit_offset (fh, offset);
ret = mca_common_ompio_file_write (fh,
buf,
count,
datatype,
status);
// An explicit offset file operation is not suppsed to modify
// the internal file pointer. So reset the pointer
// to the previous value
mca_common_ompio_set_explicit_offset (fh, prev_offset );
return ret;
}
int mca_common_ompio_file_iwrite (mca_io_ompio_file_t *fh,
const void *buf,
int count,
struct ompi_datatype_t *datatype,
ompi_request_t **request)
{
int ret = OMPI_SUCCESS;
mca_ompio_request_t *ompio_req=NULL;
ompio_req = OBJ_NEW(mca_ompio_request_t);
ompio_req->req_type = MCA_OMPIO_REQUEST_WRITE;
ompio_req->req_ompi.req_state = OMPI_REQUEST_ACTIVE;
if ( 0 == count ) {
ompio_req->req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS;
ompio_req->req_ompi.req_status._ucount = 0;
ompi_request_complete (&ompio_req->req_ompi, false);
*request = (ompi_request_t *) ompio_req;
return OMPI_SUCCESS;
}
if ( NULL != fh->f_fbtl->fbtl_ipwritev ) {
/* This fbtl has support for non-blocking operations */
uint32_t iov_count = 0;
struct iovec *decoded_iov = NULL;
size_t max_data = 0;
size_t total_bytes_written =0;
int i = 0; /* index into the decoded iovec of the buffer */
int j = 0; /* index into the file vie iovec */
ompi_io_ompio_decode_datatype (fh,
datatype,
count,
buf,
&max_data,
&decoded_iov,
&iov_count);
j = fh->f_index_in_file_view;
/* Non blocking operations have to occur in a single cycle */
mca_common_ompio_build_io_array ( fh,
0, // index of current cycle iteration
1, // number of cycles
max_data, // setting bytes_per_cycle to max_data
max_data,
iov_count,
decoded_iov,
&i,
&j,
&total_bytes_written);
if (fh->f_num_of_io_entries) {
fh->f_fbtl->fbtl_ipwritev (fh, (ompi_request_t *) ompio_req);
}
if ( false == mca_io_ompio_progress_is_registered ) {
// Lazy initialization of progress function to minimize impact
// on other ompi functionality in case its not used.
opal_progress_register (mca_io_ompio_component_progress);
mca_io_ompio_progress_is_registered=true;
}
fh->f_num_of_io_entries = 0;
if (NULL != fh->f_io_array) {
free (fh->f_io_array);
fh->f_io_array = NULL;
}
if (NULL != decoded_iov) {
free (decoded_iov);
decoded_iov = NULL;
}
}
else {
// This fbtl does not support non-blocking write operations
ompi_status_public_t status;
ret = mca_common_ompio_file_write(fh,buf,count,datatype, &status);
ompio_req->req_ompi.req_status.MPI_ERROR = ret;
ompio_req->req_ompi.req_status._ucount = status._ucount;
ompi_request_complete (&ompio_req->req_ompi, false);
}
*request = (ompi_request_t *) ompio_req;
return ret;
}
int mca_common_ompio_file_iwrite_at (mca_io_ompio_file_t *fh,
OMPI_MPI_OFFSET_TYPE offset,
const void *buf,
int count,
struct ompi_datatype_t *datatype,
ompi_request_t **request)
{
int ret = OMPI_SUCCESS;
OMPI_MPI_OFFSET_TYPE prev_offset;
mca_common_ompio_file_get_position (fh, &prev_offset );
mca_common_ompio_set_explicit_offset (fh, offset);
ret = mca_common_ompio_file_iwrite (fh,
buf,
count,
datatype,
request);
/* An explicit offset file operation is not suppsed to modify
** the internal file pointer. So reset the pointer
** to the previous value
** It is OK to reset the position already here, althgouth
** the operation might still be pending/ongoing, since
** the entire array of <offset, length, memaddress> have
** already been constructed in the file_iwrite operation
*/
mca_common_ompio_set_explicit_offset (fh, prev_offset);
return ret;
}
/* Collective operations */
/******************************************************************/
int mca_common_ompio_file_write_at_all (mca_io_ompio_file_t *fh,
OMPI_MPI_OFFSET_TYPE offset,
const void *buf,
int count,
struct ompi_datatype_t *datatype,
ompi_status_public_t *status)
{
int ret = OMPI_SUCCESS;
OMPI_MPI_OFFSET_TYPE prev_offset;
mca_common_ompio_file_get_position (fh, &prev_offset );
mca_common_ompio_set_explicit_offset (fh, offset);
ret = fh->f_fcoll->fcoll_file_write_all (fh,
buf,
count,
datatype,
status);
mca_common_ompio_set_explicit_offset (fh, prev_offset);
return ret;
}
int mca_common_ompio_file_iwrite_at_all (mca_io_ompio_file_t *fp,
OMPI_MPI_OFFSET_TYPE offset,
const void *buf,
int count,
struct ompi_datatype_t *datatype,
ompi_request_t **request)
{
int ret = OMPI_SUCCESS;
OMPI_MPI_OFFSET_TYPE prev_offset;
mca_common_ompio_file_get_position (fp, &prev_offset );
mca_common_ompio_set_explicit_offset (fp, offset);
if ( NULL != fp->f_fcoll->fcoll_file_iwrite_all ) {
ret = fp->f_fcoll->fcoll_file_iwrite_all (fp,
buf,
count,
datatype,
request);
}
else {
/* this fcoll component does not support non-blocking
collective I/O operations. WE fake it with
individual non-blocking I/O operations. */
ret = mca_common_ompio_file_iwrite ( fp, buf, count, datatype, request );
}
mca_common_ompio_set_explicit_offset (fp, prev_offset);
return ret;
}
/* Helper function used by both read and write operations */
/**************************************************************/
int mca_common_ompio_build_io_array ( mca_io_ompio_file_t *fh, int index, int cycles,
size_t bytes_per_cycle, int max_data, uint32_t iov_count,
struct iovec *decoded_iov, int *ii, int *jj, size_t *tbw )
{
OPAL_PTRDIFF_TYPE disp;
int block = 1;
size_t total_bytes_written = *tbw; /* total bytes that have been written*/
size_t bytes_to_write_in_cycle = 0; /* left to be written in a cycle*/
size_t sum_previous_counts = 0;
size_t sum_previous_length = 0;
int k = 0; /* index into the io_array */
int i = *ii;
int j = *jj;
sum_previous_length = fh->f_position_in_file_view;
if ((index == cycles-1) && (max_data % bytes_per_cycle)) {
bytes_to_write_in_cycle = max_data % bytes_per_cycle;
}
else {
bytes_to_write_in_cycle = bytes_per_cycle;
}
fh->f_io_array = (mca_io_ompio_io_array_t *)malloc
(OMPIO_IOVEC_INITIAL_SIZE * sizeof (mca_io_ompio_io_array_t));
if (NULL == fh->f_io_array) {
opal_output(1, "OUT OF MEMORY\n");
return OMPI_ERR_OUT_OF_RESOURCE;
}
while (bytes_to_write_in_cycle) {
/* reallocate if needed */
if (OMPIO_IOVEC_INITIAL_SIZE*block <= k) {
block ++;
fh->f_io_array = (mca_io_ompio_io_array_t *)realloc
(fh->f_io_array, OMPIO_IOVEC_INITIAL_SIZE *
block * sizeof (mca_io_ompio_io_array_t));
if (NULL == fh->f_io_array) {
opal_output(1, "OUT OF MEMORY\n");
return OMPI_ERR_OUT_OF_RESOURCE;
}
}
if (decoded_iov[i].iov_len -
(total_bytes_written - sum_previous_counts) <= 0) {
sum_previous_counts += decoded_iov[i].iov_len;
i = i + 1;
}
disp = (OPAL_PTRDIFF_TYPE)decoded_iov[i].iov_base +
(total_bytes_written - sum_previous_counts);
fh->f_io_array[k].memory_address = (IOVBASE_TYPE *)disp;
if (decoded_iov[i].iov_len -
(total_bytes_written - sum_previous_counts) >=
bytes_to_write_in_cycle) {
fh->f_io_array[k].length = bytes_to_write_in_cycle;
}
else {
fh->f_io_array[k].length = decoded_iov[i].iov_len -
(total_bytes_written - sum_previous_counts);
}
if (! (fh->f_flags & OMPIO_CONTIGUOUS_FVIEW)) {
if (fh->f_decoded_iov[j].iov_len -
(fh->f_total_bytes - sum_previous_length) <= 0) {
sum_previous_length += fh->f_decoded_iov[j].iov_len;
j = j + 1;
if (j == (int)fh->f_iov_count) {
j = 0;
sum_previous_length = 0;
fh->f_offset += fh->f_view_extent;
fh->f_position_in_file_view = sum_previous_length;
fh->f_index_in_file_view = j;
fh->f_total_bytes = 0;
}
}
}
disp = (OPAL_PTRDIFF_TYPE)fh->f_decoded_iov[j].iov_base +
(fh->f_total_bytes - sum_previous_length);
fh->f_io_array[k].offset = (IOVBASE_TYPE *)(intptr_t)(disp + fh->f_offset);
if (! (fh->f_flags & OMPIO_CONTIGUOUS_FVIEW)) {
if (fh->f_decoded_iov[j].iov_len -
(fh->f_total_bytes - sum_previous_length)
< fh->f_io_array[k].length) {
fh->f_io_array[k].length = fh->f_decoded_iov[j].iov_len -
(fh->f_total_bytes - sum_previous_length);
}
}
total_bytes_written += fh->f_io_array[k].length;
fh->f_total_bytes += fh->f_io_array[k].length;
bytes_to_write_in_cycle -= fh->f_io_array[k].length;
k = k + 1;
}
fh->f_position_in_file_view = sum_previous_length;
fh->f_index_in_file_view = j;
fh->f_num_of_io_entries = k;
#if 0
if (fh->f_rank == 0) {
int d;
printf("*************************** %d\n", fh->f_num_of_io_entries);
for (d=0 ; d<fh->f_num_of_io_entries ; d++) {
printf(" ADDRESS: %p OFFSET: %p LENGTH: %d\n",
fh->f_io_array[d].memory_address,
fh->f_io_array[d].offset,
fh->f_io_array[d].length);
}
}
#endif
*ii = i;
*jj = j;
*tbw = total_bytes_written;
return OMPI_SUCCESS;
}

Просмотреть файл

@ -0,0 +1,230 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2013 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008-2016 University of Houston. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/communicator/communicator.h"
#include "ompi/datatype/ompi_datatype.h"
#include "ompi/mca/common/ompio/common_ompio.h"
#include "ompi/mca/io/ompio/io_ompio.h"
/* Print queue related function implementations */
int mca_common_ompio_initialize_print_queue( struct mca_common_ompio_print_queue **r){
struct mca_common_ompio_print_queue *q=NULL;
int ret = OMPI_SUCCESS;
q = (struct mca_common_ompio_print_queue *) malloc ( sizeof(mca_common_ompio_print_queue));
if ( NULL == q ) {
ret = OMPI_ERR_OUT_OF_RESOURCE;
}
q->first = 0;
q->last = MCA_COMMON_OMPIO_QUEUESIZE - 1;
q->count = 0;
*r = q;
return ret;
}
int mca_common_ompio_register_print_entry ( struct mca_common_ompio_print_queue *q,
mca_common_ompio_print_entry x)
{
if (q->count >= MCA_COMMON_OMPIO_QUEUESIZE){
return OMPI_ERROR;
}
else{
q->last = (q->last + 1) % MCA_COMMON_OMPIO_QUEUESIZE;
q->entry[q->last] = x;
q->count = q->count + 1;
}
return OMPI_SUCCESS;
}
int mca_common_ompio_unregister_print_entry ( struct mca_common_ompio_print_queue *q,
mca_common_ompio_print_entry *x)
{
if (q->count <= 0){
return OMPI_ERROR;
}
else{
*x = q->entry[q->first];
q->first = (q->first+1) % MCA_COMMON_OMPIO_QUEUESIZE;
q->count = q->count - 1;
}
return OMPI_SUCCESS;
}
int mca_common_ompio_empty_print_queue( struct mca_common_ompio_print_queue *q)
{
if (q->count == 0) {
return 1;
}
return 0;
}
int mca_common_ompio_full_print_queue( struct mca_common_ompio_print_queue *q)
{
if (q->count < MCA_COMMON_OMPIO_QUEUESIZE) {
return 0;
}
return 1;
}
int mca_common_ompio_print_time_info( struct mca_common_ompio_print_queue *q,
char *name,
struct mca_io_ompio_file_t *fh){
int i = 0, j=0, nprocs_for_coll = 0, ret = OMPI_SUCCESS, count = 0;
double *time_details = NULL, *final_sum = NULL;
double *final_max = NULL, *final_min = NULL;
double *final_time_details=NULL;
nprocs_for_coll = q->entry[0].nprocs_for_coll;
time_details = (double *) calloc (4,sizeof(double));
if ( NULL == time_details){
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto exit;
}
if (!fh->f_rank){
final_min = (double *) malloc (3*sizeof(double));
if ( NULL == final_min){
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto exit;
}
final_max = (double *) malloc (3*sizeof(double));
if ( NULL == final_max){
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto exit;
}
final_sum = (double *) malloc (3*sizeof(double));
if ( NULL == final_sum){
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto exit;
}
final_time_details = (double *)calloc (fh->f_size, 4 * sizeof(double));
if (NULL == final_time_details){
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto exit;
}
count = 4 * fh->f_size;
}
if (q->count > 0){
for (i=0; i < q->count; i++){
for (j=0;j<3;j++){
if (!fh->f_rank){
final_min[j] = 100000.0;
final_max[j] = 0.0;
final_sum[j] = 0.0;
}
time_details[j] += q->entry[i].time[j];
}
time_details[3] = q->entry[i].aggregator;
}
}
ret = fh->f_comm->c_coll.coll_gather(time_details,
4,
MPI_DOUBLE,
final_time_details,
4,
MPI_DOUBLE,
0,
fh->f_comm,
fh->f_comm->c_coll.coll_gather_module);
if ( OMPI_SUCCESS != ret ) {
}
if (!fh->f_rank){
for (i=0;i<count;i+=4){
if (final_time_details[i+3] == 1){
final_sum[0] += final_time_details[i];
final_sum[1] += final_time_details[i+1];
final_sum[2] += final_time_details[i+2];
if ( final_time_details[i] < final_min[0])
final_min[0] = final_time_details[i];
if ( final_time_details[i+1] < final_min[1])
final_min[1] = final_time_details[i+1];
if ( final_time_details[i+2] < final_min[2])
final_min[2] = final_time_details[i+2];
if ( final_time_details[i] > final_max[0])
final_max[0] = final_time_details[i];
if ( final_time_details[i+1] > final_max[1])
final_max[1] = final_time_details[i+1];
if ( final_time_details[i+2] > final_max[2])
final_max[2] = final_time_details[i+2];
}
}
printf ("\n# MAX-%s AVG-%s MIN-%s MAX-COMM AVG-COMM MIN-COMM",
name, name, name);
printf (" MAX-EXCH AVG-EXCH MIN-EXCH\n");
printf (" %f %f %f %f %f %f %f %f %f\n\n",
final_max[0], final_sum[0]/nprocs_for_coll, final_min[0],
final_max[1], final_sum[1]/nprocs_for_coll, final_min[1],
final_max[2], final_sum[2]/nprocs_for_coll, final_min[2]);
}
exit:
if ( NULL != final_max){
free(final_max);
final_max = NULL;
}
if (NULL != final_min){
free(final_min);
final_min = NULL;
}
if (NULL != final_sum){
free(final_sum);
final_sum = NULL;
}
if (NULL != time_details){
free(time_details);
time_details = NULL;
}
return ret;
}

Просмотреть файл

@ -0,0 +1,70 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2007 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008-2016 University of Houston. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_COMMON_OMPIO_PRINT_QUEUE_H
#define MCA_COMMON_OMPIO_PRINT_QUEUE_H
#include "mpi.h"
OMPI_DECLSPEC extern int mca_io_ompio_coll_timing_info;
struct mca_io_ompio_file_t;
#define MCA_COMMON_OMPIO_QUEUESIZE 2048
/*To extract time-information */
struct mca_common_ompio_print_entry{
double time[3];
int nprocs_for_coll;
int aggregator;
};
typedef struct mca_common_ompio_print_entry mca_common_ompio_print_entry;
struct mca_common_ompio_print_queue {
mca_common_ompio_print_entry entry[MCA_COMMON_OMPIO_QUEUESIZE + 1];
int first;
int last;
int count;
};
typedef struct mca_common_ompio_print_queue mca_common_ompio_print_queue;
OMPI_DECLSPEC int mca_common_ompio_register_print_entry (struct mca_common_ompio_print_queue *q,
mca_common_ompio_print_entry x);
OMPI_DECLSPEC int mca_common_ompio_unregister_print_entry (struct mca_common_ompio_print_queue *q,
mca_common_ompio_print_entry *x);
OMPI_DECLSPEC int mca_common_ompio_empty_print_queue( struct mca_common_ompio_print_queue *q);
OMPI_DECLSPEC int mca_common_ompio_full_print_queue( struct mca_common_ompio_print_queue *q);
OMPI_DECLSPEC int mca_common_ompio_initialize_print_queue(struct mca_common_ompio_print_queue **q);
OMPI_DECLSPEC int mca_common_ompio_print_time_info( struct mca_common_ompio_print_queue *q,
char *name_operation, struct mca_io_ompio_file_t *fh);
END_C_DECLS
#endif /* MCA_COMMON_OMPIO_PRINT_QUEUE_H */

Просмотреть файл

@ -27,7 +27,7 @@
#include "opal/mca/base/base.h"
#include "ompi/mca/fbtl/fbtl.h"
#include "ompi/mca/fbtl/base/base.h"
#include "ompi/mca/io/ompio/io_ompio.h"
#include "ompi/mca/common/ompio/common_ompio.h"
/*
* This structure is needed so that we can close the modules

Просмотреть файл

@ -24,12 +24,12 @@
#include <stdlib.h>
#include "mpi.h"
#include "ompi/mca/io/ompio/io_ompio.h"
#include "opal/util/show_help.h"
#include "ompi/mca/mca.h"
#include "opal/mca/base/base.h"
#include "ompi/mca/fbtl/fbtl.h"
#include "ompi/mca/fbtl/base/base.h"
#include "ompi/mca/common/ompio/common_ompio.h"
int mca_fbtl_base_file_unselect(mca_io_ompio_file_t *file)
{

Просмотреть файл

@ -23,7 +23,7 @@
#include "ompi_config.h"
#include "ompi/mca/mca.h"
#include "ompi/mca/fbtl/fbtl.h"
#include "ompi/mca/io/ompio/io_ompio.h"
#include "ompi/mca/common/ompio/common_ompio.h"
#include <plfs.h>
extern int mca_fbtl_plfs_priority;

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008-2015 University of Houston. All rights reserved.
* Copyright (c) 2008-2016 University of Houston. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -23,7 +23,7 @@
#include "ompi_config.h"
#include "ompi/mca/mca.h"
#include "ompi/mca/fbtl/fbtl.h"
#include "ompi/mca/io/ompio/io_ompio.h"
#include "ompi/mca/common/ompio/common_ompio.h"
#include "ompi/mca/io/ompio/io_ompio_request.h"
extern int mca_fbtl_posix_priority;

Просмотреть файл

@ -23,7 +23,7 @@
#include "ompi_config.h"
#include "ompi/mca/mca.h"
#include "ompi/mca/fbtl/fbtl.h"
#include "ompi/mca/io/ompio/io_ompio.h"
#include "ompi/mca/common/ompio/common_ompio.h"
#include "ompi/mca/fs/pvfs2/fs_pvfs2.h"
#include "pvfs2.h"
#include "pvfs2-compat.h"

Просмотреть файл

@ -26,4 +26,5 @@ libmca_fcoll_la_SOURCES += \
base/fcoll_base_file_select.c \
base/fcoll_base_file_unselect.c \
base/fcoll_base_find_available.c \
base/fcoll_base_sort.c \
base/fcoll_base_coll_array.c

Просмотреть файл

@ -47,6 +47,8 @@ OMPI_DECLSPEC int mca_fcoll_base_find_available(bool enable_progress_threads,
OMPI_DECLSPEC int mca_fcoll_base_init_file (struct mca_io_ompio_file_t *file);
OMPI_DECLSPEC int mca_fcoll_base_get_param (struct mca_io_ompio_file_t *file, int keyval);
OMPI_DECLSPEC int fcoll_base_sort_iovec (struct iovec *iov, int num_entries, int *sorted);
/*
* Globals
*/

Просмотреть файл

@ -29,7 +29,7 @@
#include <math.h>
#include "ompi/mca/fcoll/base/fcoll_base_coll_array.h"
#include "ompi/mca/io/ompio/io_ompio.h"
#include "ompi/mca/common/ompio/common_ompio.h"
int fcoll_base_coll_allgatherv_array (void *sbuf,

Просмотреть файл

@ -27,7 +27,7 @@
#include "opal/mca/base/base.h"
#include "ompi/mca/fcoll/fcoll.h"
#include "ompi/mca/fcoll/base/base.h"
#include "ompi/mca/io/ompio/io_ompio.h"
#include "ompi/mca/common/ompio/common_ompio.h"
/*
* This structure is needed so that we can close the modules

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008-2011 University of Houston. All rights reserved.
* Copyright (c) 2008-2016 University of Houston. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -24,12 +24,12 @@
#include <stdlib.h>
#include "mpi.h"
#include "ompi/mca/io/ompio/io_ompio.h"
#include "opal/util/show_help.h"
#include "ompi/mca/mca.h"
#include "opal/mca/base/base.h"
#include "ompi/mca/fcoll/fcoll.h"
#include "ompi/mca/fcoll/base/base.h"
#include "ompi/mca/common/ompio/common_ompio.h"
int mca_fcoll_base_file_unselect(mca_io_ompio_file_t *file)
{

131
ompi/mca/fcoll/base/fcoll_base_sort.c Обычный файл
Просмотреть файл

@ -0,0 +1,131 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2007 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008-2016 University of Houston. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "base.h"
#include "ompi/mca/common/ompio/common_ompio.h"
int fcoll_base_sort_iovec (struct iovec *iov,
int num_entries,
int *sorted)
{
int i = 0;
int j = 0;
int left = 0;
int right = 0;
int largest = 0;
int heap_size = num_entries - 1;
int temp = 0;
unsigned char done = 0;
int* temp_arr = NULL;
if (0 == num_entries) {
return OMPI_SUCCESS;
}
temp_arr = (int*)malloc(num_entries*sizeof(int));
if (NULL == temp_arr) {
opal_output (1, "OUT OF MEMORY\n");
return OMPI_ERR_OUT_OF_RESOURCE;
}
temp_arr[0] = 0;
for (i = 1; i < num_entries; ++i) {
temp_arr[i] = i;
}
/* num_entries can be a large no. so NO RECURSION */
for (i = num_entries/2-1 ; i>=0 ; i--) {
done = 0;
j = i;
largest = j;
while (!done) {
left = j*2+1;
right = j*2+2;
if ((left <= heap_size) &&
(iov[temp_arr[left]].iov_base > iov[temp_arr[j]].iov_base)) {
largest = left;
}
else {
largest = j;
}
if ((right <= heap_size) &&
(iov[temp_arr[right]].iov_base >
iov[temp_arr[largest]].iov_base)) {
largest = right;
}
if (largest != j) {
temp = temp_arr[largest];
temp_arr[largest] = temp_arr[j];
temp_arr[j] = temp;
j = largest;
}
else {
done = 1;
}
}
}
for (i = num_entries-1; i >=1; --i) {
temp = temp_arr[0];
temp_arr[0] = temp_arr[i];
temp_arr[i] = temp;
heap_size--;
done = 0;
j = 0;
largest = j;
while (!done) {
left = j*2+1;
right = j*2+2;
if ((left <= heap_size) &&
(iov[temp_arr[left]].iov_base >
iov[temp_arr[j]].iov_base)) {
largest = left;
}
else {
largest = j;
}
if ((right <= heap_size) &&
(iov[temp_arr[right]].iov_base >
iov[temp_arr[largest]].iov_base)) {
largest = right;
}
if (largest != j) {
temp = temp_arr[largest];
temp_arr[largest] = temp_arr[j];
temp_arr[j] = temp;
j = largest;
}
else {
done = 1;
}
}
sorted[i] = temp_arr[i];
}
sorted[0] = temp_arr[0];
if (NULL != temp_arr) {
free(temp_arr);
temp_arr = NULL;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008-2014 University of Houston. All rights reserved.
* Copyright (c) 2008-2016 University of Houston. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -28,7 +28,7 @@
#include "ompi/mca/mca.h"
#include "ompi/mca/fcoll/fcoll.h"
#include "ompi/mca/fcoll/base/base.h"
#include "ompi/mca/io/ompio/io_ompio.h"
#include "ompi/mca/common/ompio/common_ompio.h"
BEGIN_C_DECLS

Просмотреть файл

@ -104,7 +104,7 @@ mca_fcoll_dynamic_file_read_all (mca_io_ompio_file_t *fh,
double read_time = 0.0, start_read_time = 0.0, end_read_time = 0.0;
double rcomm_time = 0.0, start_rcomm_time = 0.0, end_rcomm_time = 0.0;
double read_exch = 0.0, start_rexch = 0.0, end_rexch = 0.0;
mca_io_ompio_print_entry nentry;
mca_common_ompio_print_entry nentry;
#endif
/**************************************************************************
@ -307,7 +307,7 @@ mca_fcoll_dynamic_file_read_all (mca_io_ompio_file_t *fh,
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto exit;
}
fh->f_sort_iovec (global_iov_array, total_fview_count, sorted);
fcoll_base_sort_iovec (global_iov_array, total_fview_count, sorted);
}
if (NULL != local_iov_array) {
@ -866,9 +866,9 @@ mca_fcoll_dynamic_file_read_all (mca_io_ompio_file_t *fh,
else
nentry.aggregator = 0;
nentry.nprocs_for_coll = dynamic_num_io_procs;
if (!fh->f_full_print_queue(READ_PRINT_QUEUE)){
fh->f_register_print_entry(READ_PRINT_QUEUE,
nentry);
if (!mca_common_ompio_full_print_queue(fh->f_coll_read_time)){
mca_common_ompio_register_print_entry(fh->f_coll_read_time,
nentry);
}
#endif

Просмотреть файл

@ -108,7 +108,7 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh,
double write_time = 0.0, start_write_time = 0.0, end_write_time = 0.0;
double comm_time = 0.0, start_comm_time = 0.0, end_comm_time = 0.0;
double exch_write = 0.0, start_exch = 0.0, end_exch = 0.0;
mca_io_ompio_print_entry nentry;
mca_common_ompio_print_entry nentry;
#endif
opal_datatype_type_size ( &datatype->super, &ftype_size );
@ -327,7 +327,7 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh,
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto exit;
}
fh->f_sort_iovec (global_iov_array, total_fview_count, sorted);
fcoll_base_sort_iovec (global_iov_array, total_fview_count, sorted);
}
if (NULL != local_iov_array){
@ -979,9 +979,9 @@ mca_fcoll_dynamic_file_write_all (mca_io_ompio_file_t *fh,
else
nentry.aggregator = 0;
nentry.nprocs_for_coll = dynamic_num_io_procs;
if (!fh->f_full_print_queue(WRITE_PRINT_QUEUE)){
fh->f_register_print_entry(WRITE_PRINT_QUEUE,
nentry);
if (!mca_common_ompio_full_print_queue(fh->f_coll_write_time)){
mca_common_ompio_register_print_entry(fh->f_coll_write_time,
nentry);
}
#endif

Просмотреть файл

@ -28,7 +28,7 @@
#include "ompi/mca/mca.h"
#include "ompi/mca/fcoll/fcoll.h"
#include "ompi/mca/fcoll/base/base.h"
#include "ompi/mca/io/ompio/io_ompio.h"
#include "ompi/mca/common/ompio/common_ompio.h"
BEGIN_C_DECLS

Просмотреть файл

@ -104,7 +104,7 @@ mca_fcoll_dynamic_gen2_file_read_all (mca_io_ompio_file_t *fh,
double read_time = 0.0, start_read_time = 0.0, end_read_time = 0.0;
double rcomm_time = 0.0, start_rcomm_time = 0.0, end_rcomm_time = 0.0;
double read_exch = 0.0, start_rexch = 0.0, end_rexch = 0.0;
mca_io_ompio_print_entry nentry;
mca_common_ompio_print_entry nentry;
#endif
/**************************************************************************
@ -307,7 +307,7 @@ mca_fcoll_dynamic_gen2_file_read_all (mca_io_ompio_file_t *fh,
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto exit;
}
fh->f_sort_iovec (global_iov_array, total_fview_count, sorted);
fcoll_base_sort_iovec (global_iov_array, total_fview_count, sorted);
}
if (NULL != local_iov_array) {
@ -866,9 +866,9 @@ mca_fcoll_dynamic_gen2_file_read_all (mca_io_ompio_file_t *fh,
else
nentry.aggregator = 0;
nentry.nprocs_for_coll = dynamic_gen2_num_io_procs;
if (!fh->f_full_print_queue(READ_PRINT_QUEUE)){
fh->f_register_print_entry(READ_PRINT_QUEUE,
nentry);
if (!mca_common_ompio_full_print_queue(fh->f_coll_read_time)){
mca_common_ompio_register_print_entry(fh->f_coll_read_time,
nentry);
}
#endif

Просмотреть файл

@ -152,7 +152,7 @@ int mca_fcoll_dynamic_gen2_file_write_all (mca_io_ompio_file_t *fh,
double write_time = 0.0, start_write_time = 0.0, end_write_time = 0.0;
double comm_time = 0.0, start_comm_time = 0.0, end_comm_time = 0.0;
double exch_write = 0.0, start_exch = 0.0, end_exch = 0.0;
mca_io_ompio_print_entry nentry;
mca_common_ompio_print_entry nentry;
#endif
@ -455,7 +455,7 @@ int mca_fcoll_dynamic_gen2_file_write_all (mca_io_ompio_file_t *fh,
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto exit;
}
fh->f_sort_iovec (aggr_data[i]->global_iov_array, total_fview_count, aggr_data[i]->sorted);
fcoll_base_sort_iovec (aggr_data[i]->global_iov_array, total_fview_count, aggr_data[i]->sorted);
}
if (NULL != local_iov_array){
@ -470,7 +470,7 @@ int mca_fcoll_dynamic_gen2_file_write_all (mca_io_ompio_file_t *fh,
#if DEBUG_ON
if (my_aggregator == fh->f_rank) {
if (aggregators[i] == fh->f_rank) {
uint32_t tv=0;
for (tv=0 ; tv<total_fview_count ; tv++) {
printf("%d: OFFSET: %lld LENGTH: %ld\n",
@ -591,10 +591,17 @@ int mca_fcoll_dynamic_gen2_file_write_all (mca_io_ompio_file_t *fh,
/* Write data for iteration i-1 */
for ( i=0; i<dynamic_gen2_num_io_procs; i++ ) {
#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
start_write_time = MPI_Wtime();
#endif
ret = write_init (fh, aggregators[i], aggr_data[i], write_chunksize );
if (OMPI_SUCCESS != ret){
goto exit;
}
#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
end_write_time = MPI_Wtime();
write_time += end_write_time - start_write_time;
#endif
if (!aggr_data[i]->prev_sendbuf_is_contiguous && aggr_data[i]->prev_bytes_sent) {
free (aggr_data[i]->prev_send_buf);
@ -617,10 +624,17 @@ int mca_fcoll_dynamic_gen2_file_write_all (mca_io_ompio_file_t *fh,
/* Write data for iteration i=cycles-1 */
for ( i=0; i<dynamic_gen2_num_io_procs; i++ ) {
#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
start_write_time = MPI_Wtime();
#endif
ret = write_init (fh, aggregators[i], aggr_data[i], write_chunksize );
if (OMPI_SUCCESS != ret){
goto exit;
}
#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
end_write_time = MPI_Wtime();
write_time += end_write_time - start_write_time;
#endif
if (!aggr_data[i]->prev_sendbuf_is_contiguous && aggr_data[i]->prev_bytes_sent) {
free (aggr_data[i]->prev_send_buf);
@ -635,14 +649,15 @@ int mca_fcoll_dynamic_gen2_file_write_all (mca_io_ompio_file_t *fh,
nentry.time[0] = write_time;
nentry.time[1] = comm_time;
nentry.time[2] = exch_write;
if (my_aggregator == fh->f_rank)
nentry.aggregator = 0;
for ( i=0; i<dynamic_gen2_num_io_procs; i++ ) {
if (aggregators[i] == fh->f_rank)
nentry.aggregator = 1;
else
nentry.aggregator = 0;
}
nentry.nprocs_for_coll = dynamic_gen2_num_io_procs;
if (!fh->f_full_print_queue(WRITE_PRINT_QUEUE)){
fh->f_register_print_entry(WRITE_PRINT_QUEUE,
nentry);
if (!mca_common_ompio_full_print_queue(fh->f_coll_write_time)){
mca_common_ompio_register_print_entry(fh->f_coll_write_time,
nentry);
}
#endif
@ -725,19 +740,12 @@ static int write_init (mca_io_ompio_file_t *fh, int aggregator, mca_io_ompio_agg
aggr_data->prev_num_io_entries,
&last_array_pos, &last_pos,
write_chunksize );
#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
start_write_time = MPI_Wtime();
#endif
if ( 0 > fh->f_fbtl->fbtl_pwritev (fh)) {
free ( aggr_data->prev_io_array);
opal_output (1, "dynamic_gen2_write_all: fbtl_pwritev failed\n");
ret = OMPI_ERROR;
goto exit;
}
#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
end_write_time = MPI_Wtime();
write_time += end_write_time - start_write_time;
#endif
}
free ( fh->f_io_array );
free ( aggr_data->prev_io_array);
@ -1087,9 +1095,9 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
printf("%d : global_count : %ld, bytes_sent : %d\n",
rank,global_count, bytes_sent);
#endif
#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
start_comm_time = MPI_Wtime();
#endif
//#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
// start_comm_time = MPI_Wtime();
//#endif
/*************************************************************************
*** 7e. Perform the actual communication
*************************************************************************/
@ -1198,10 +1206,10 @@ static int shuffle_init ( int index, int cycles, int aggregator, int rank, mca_i
}
#endif
#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
end_comm_time = MPI_Wtime();
comm_time += (end_comm_time - start_comm_time);
#endif
//#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
// end_comm_time = MPI_Wtime();
// comm_time += (end_comm_time - start_comm_time);
//#endif
/**********************************************************
*** 7f. Create the io array, and pass it to fbtl
*********************************************************/

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008-2014 University of Houston. All rights reserved.
* Copyright (c) 2008-2016 University of Houston. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -28,7 +28,7 @@
#include "ompi/mca/mca.h"
#include "ompi/mca/fcoll/fcoll.h"
#include "ompi/mca/fcoll/base/base.h"
#include "ompi/mca/io/ompio/io_ompio.h"
#include "ompi/mca/common/ompio/common_ompio.h"
BEGIN_C_DECLS

Просмотреть файл

@ -23,8 +23,7 @@
#include "mpi.h"
#include "ompi/constants.h"
#include "ompi/mca/fcoll/fcoll.h"
#include "ompi/mca/io/ompio/io_ompio.h"
#include "ompi/mca/io/io.h"
#include "ompi/mca/common/ompio/common_ompio.h"
#include "math.h"
#include <unistd.h>
@ -38,5 +37,5 @@ mca_fcoll_individual_file_read_all (mca_io_ompio_file_t *fh,
struct ompi_datatype_t *datatype,
ompi_status_public_t *status)
{
return ompio_io_ompio_file_read( fh, buf, count, datatype, status);
return mca_common_ompio_file_read( fh, buf, count, datatype, status);
}

Просмотреть файл

@ -25,8 +25,7 @@
#include "mpi.h"
#include "ompi/constants.h"
#include "ompi/mca/fcoll/fcoll.h"
#include "ompi/mca/io/ompio/io_ompio.h"
#include "ompi/mca/io/io.h"
#include "ompi/mca/common/ompio/common_ompio.h"
#include "math.h"
#include <unistd.h>
@ -37,5 +36,5 @@ int mca_fcoll_individual_file_write_all (mca_io_ompio_file_t *fh,
struct ompi_datatype_t *datatype,
ompi_status_public_t *status)
{
return ompio_io_ompio_file_write (fh, buf, count, datatype, status);
return mca_common_ompio_file_write (fh, buf, count, datatype, status);
}

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008-2015 University of Houston. All rights reserved.
* Copyright (c) 2008-2016 University of Houston. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -28,7 +28,7 @@
#include "ompi/mca/mca.h"
#include "ompi/mca/fcoll/fcoll.h"
#include "ompi/mca/fcoll/base/base.h"
#include "ompi/mca/io/ompio/io_ompio.h"
#include "ompi/mca/common/ompio/common_ompio.h"
BEGIN_C_DECLS

Просмотреть файл

@ -103,7 +103,7 @@ mca_fcoll_static_file_read_all (mca_io_ompio_file_t *fh,
double read_time = 0.0, start_read_time = 0.0, end_read_time = 0.0;
double rcomm_time = 0.0, start_rcomm_time = 0.0, end_rcomm_time = 0.0;
double read_exch = 0.0, start_rexch = 0.0, end_rexch = 0.0;
mca_io_ompio_print_entry nentry;
mca_common_ompio_print_entry nentry;
#endif
#if DEBUG_ON
MPI_Aint gc_in;
@ -919,9 +919,9 @@ mca_fcoll_static_file_read_all (mca_io_ompio_file_t *fh,
else
nentry.aggregator = 0;
nentry.nprocs_for_coll = static_num_io_procs;
if (!fh->f_full_print_queue(READ_PRINT_QUEUE)){
fh->f_register_print_entry(READ_PRINT_QUEUE,
nentry);
if (!mca_common_ompio_full_print_queue(fh->f_coll_read_time)){
mca_common_ompio_register_print_entry(fh->f_coll_read_time,
nentry);
}
#endif

Просмотреть файл

@ -104,7 +104,7 @@ mca_fcoll_static_file_write_all (mca_io_ompio_file_t *fh,
double write_time = 0.0, start_write_time = 0.0, end_write_time = 0.0;
double comm_time = 0.0, start_comm_time = 0.0, end_comm_time = 0.0;
double exch_write = 0.0, start_exch = 0.0, end_exch = 0.0;
mca_io_ompio_print_entry nentry;
mca_common_ompio_print_entry nentry;
#endif
@ -952,9 +952,9 @@ mca_fcoll_static_file_write_all (mca_io_ompio_file_t *fh,
else
nentry.aggregator = 0;
nentry.nprocs_for_coll = static_num_io_procs;
if (!fh->f_full_print_queue(WRITE_PRINT_QUEUE)){
fh->f_register_print_entry(WRITE_PRINT_QUEUE,
nentry);
if (!mca_common_ompio_full_print_queue(fh->f_coll_write_time)){
mca_common_ompio_register_print_entry(fh->f_coll_write_time,
nentry);
}
#endif

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008-2014 University of Houston. All rights reserved.
* Copyright (c) 2008-2016 University of Houston. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -28,7 +28,7 @@
#include "ompi/mca/mca.h"
#include "ompi/mca/fcoll/fcoll.h"
#include "ompi/mca/fcoll/base/base.h"
#include "ompi/mca/io/ompio/io_ompio.h"
#include "ompi/mca/common/ompio/common_ompio.h"
BEGIN_C_DECLS

Просмотреть файл

@ -140,7 +140,7 @@ mca_fcoll_two_phase_file_read_all (mca_io_ompio_file_t *fh,
Flatlist_node *flat_buf=NULL;
mca_io_ompio_access_array_t *my_req=NULL, *others_req=NULL;
#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
mca_io_ompio_print_entry nentry;
mca_common_ompio_print_entry nentry;
#endif
// if (opal_datatype_is_predefined(&datatype->super)) {
// fh->f_flags = fh->f_flags | OMPIO_CONTIGUOUS_MEMORY;
@ -479,9 +479,9 @@ mca_fcoll_two_phase_file_read_all (mca_io_ompio_file_t *fh,
nentry.nprocs_for_coll = two_phase_num_io_procs;
if (!fh->f_full_print_queue(READ_PRINT_QUEUE)){
fh->f_register_print_entry(READ_PRINT_QUEUE,
nentry);
if (!mca_common_ompio_full_print_queue(fh->f_coll_read_time)){
mca_common_ompio_register_print_entry(fh->f_coll_read_time,
nentry);
}
#endif

Просмотреть файл

@ -168,7 +168,7 @@ mca_fcoll_two_phase_file_write_all (mca_io_ompio_file_t *fh,
mca_io_ompio_access_array_t *my_req=NULL, *others_req=NULL;
MPI_Aint send_buf_addr;
#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
mca_io_ompio_print_entry nentry;
mca_common_ompio_print_entry nentry;
#endif
@ -543,9 +543,9 @@ mca_fcoll_two_phase_file_write_all (mca_io_ompio_file_t *fh,
nentry.aggregator = 0;
}
nentry.nprocs_for_coll = two_phase_num_io_procs;
if (!fh->f_full_print_queue(WRITE_PRINT_QUEUE)){
fh->f_register_print_entry(WRITE_PRINT_QUEUE,
nentry);
if (!mca_common_ompio_full_print_queue(fh->f_coll_write_time)){
mca_common_ompio_register_print_entry(fh->f_coll_write_time,
nentry);
}
#endif

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше