ROMIO 3.1.4 refresh: import romio from mpich 3.1.4 tarball
Этот коммит содержится в:
родитель
e2e91142d5
Коммит
eacd434a02
55
ompi/mca/io/romio314/romio/.codingcheck
Обычный файл
55
ompi/mca/io/romio314/romio/.codingcheck
Обычный файл
@ -0,0 +1,55 @@
|
||||
# Here are names that at least at one point were used within ROMIO.
|
||||
# We should look at these and decide which we wish to allow and which
|
||||
# should be replaced with something more ROMIO-specific.
|
||||
%romioDefines = ( 'ROMIO_[A-Za-z0-9_]+' => romio,
|
||||
'PROFILE' => romio,
|
||||
'PRINT_ERR_MSG' => romio,
|
||||
'HPUX' => romio,
|
||||
'SPPUX'=> romio,
|
||||
'SX4'=> romio,
|
||||
'AIO_SUN'=> romio,
|
||||
'AIO_HANDLE_IN_AIOCB'=> romio,
|
||||
'NO_FD_IN_AIOCB'=> romio,
|
||||
'NO_AIO'=> romio,
|
||||
'AIO_PRIORITY_DEFAULT'=> romio,
|
||||
'AIO_SIGNOTIFY_NONE'=> romio,
|
||||
'MPISGI'=> romio,
|
||||
'CRAY'=> romio,
|
||||
'PARAGON'=> romio,
|
||||
'FREEBSD'=> romio,
|
||||
'LINUX'=> romio,
|
||||
'tflops'=> romio,
|
||||
'NFS'=> romio,
|
||||
'XFS'=> romio,
|
||||
'CB_CONFIG_LIST_DEBUG'=> romio,
|
||||
'SFS'=> romio,
|
||||
'HFS'=> romio,
|
||||
'UFS'=> romio,
|
||||
'PVFS_.+' => romio,
|
||||
'MPI_hpux'=> romio,
|
||||
'FORTRANCAPS'=> romio,
|
||||
'MPILAM'=> romio,
|
||||
'NEEDS_ADIOCB_T'=> romio,
|
||||
'AGG_DEBUG'=> romio,
|
||||
'SOLARIS'=> romio,
|
||||
'IRIX'=> romio,
|
||||
'AIX'=> romio,
|
||||
'DEC'=> romio,
|
||||
'NEEDS_MPI_TEST'=> romio,
|
||||
'PFS'=> romio,
|
||||
'PIOFS'=> romio,
|
||||
'MPICH'=> romio,
|
||||
'MPICH' => romio,
|
||||
'MPI_OFFSET_IS_INT'=> romio,
|
||||
'MPI_COMBINER_NAMED'=> romio,
|
||||
'_UNICOS'=> romio,
|
||||
'MPIHP'=> romio,
|
||||
);
|
||||
|
||||
# Only invoke this function if the function is defined (in case the
|
||||
# user removed the cpp defines check with -rmchecks=cppdefines)
|
||||
if (defined(&PushDefinesNames)) {
|
||||
&PushDefinesNames( "romioDefines", "tree", "add" );
|
||||
}
|
||||
|
||||
1;
|
38
ompi/mca/io/romio314/romio/.config_params
Обычный файл
38
ompi/mca/io/romio314/romio/.config_params
Обычный файл
@ -0,0 +1,38 @@
|
||||
__sun4_
|
||||
__rs6000_
|
||||
__paragon_
|
||||
__solaris_
|
||||
__solaris86_
|
||||
__tflop_
|
||||
__tflops_
|
||||
__hpux_
|
||||
__sppux_
|
||||
__SX4_
|
||||
__sgi_
|
||||
__sgi5_
|
||||
__IRIX_
|
||||
__IRIX32_
|
||||
__IRIXN32_
|
||||
__IRIX64_
|
||||
__alpha_
|
||||
__ALPHA_
|
||||
__freebsd_
|
||||
__netbsd_
|
||||
__LINUX_
|
||||
__LINUX_ALPHA_
|
||||
__CRAY_
|
||||
__Darwin_
|
||||
__nfs_
|
||||
__ufs_
|
||||
__pfs_
|
||||
__piofs_
|
||||
__pvfs_
|
||||
__testfs_
|
||||
__xfs_
|
||||
__hfs_
|
||||
__sfs_
|
||||
__mpich_mpi
|
||||
__sgi_mpi
|
||||
__hp_mpi
|
||||
__cray_mpi
|
||||
__lam_mpi
|
41
ompi/mca/io/romio314/romio/COPYRIGHT
Обычный файл
41
ompi/mca/io/romio314/romio/COPYRIGHT
Обычный файл
@ -0,0 +1,41 @@
|
||||
COPYRIGHT
|
||||
|
||||
The following is a notice of limited availability of the code and
|
||||
disclaimer, which must be included in the prologue of the code and in
|
||||
all source listings of the code.
|
||||
|
||||
Copyright (C) 1997 University of Chicago
|
||||
|
||||
Permission is hereby granted to use, reproduce, prepare derivative
|
||||
works, and to redistribute to others.
|
||||
|
||||
The University of Chicago makes no representations as to the suitability,
|
||||
operability, accuracy, or correctness of this software for any purpose.
|
||||
It is provided "as is" without express or implied warranty.
|
||||
|
||||
This software was authored by:
|
||||
Rajeev Thakur: (630) 252-1682; thakur@mcs.anl.gov
|
||||
Mathematics and Computer Science Division
|
||||
Argonne National Laboratory, Argonne IL 60439, USA
|
||||
|
||||
|
||||
GOVERNMENT LICENSE
|
||||
|
||||
Portions of this material resulted from work developed under a U.S.
|
||||
Government Contract and are subject to the following license: the
|
||||
Government is granted for itself and others acting on its behalf a
|
||||
paid-up, nonexclusive, irrevocable worldwide license in this computer
|
||||
software to reproduce, prepare derivative works, and perform publicly
|
||||
and display publicly.
|
||||
|
||||
DISCLAIMER
|
||||
|
||||
This computer code material was prepared, in part, as an account of
|
||||
work sponsored by an agency of the United States Government. Neither
|
||||
the United States Government, nor the University of Chicago, nor any
|
||||
of their employees, makes any warranty express or implied, or assumes
|
||||
any legal liability or responsibility for the accuracy, completeness,
|
||||
or usefulness of any information, apparatus, product, or process
|
||||
disclosed, or represents that its use would not infringe privately
|
||||
owned rights.
|
||||
|
172
ompi/mca/io/romio314/romio/Makefile.am
Обычный файл
172
ompi/mca/io/romio314/romio/Makefile.am
Обычный файл
@ -0,0 +1,172 @@
|
||||
# -*- Mode: Makefile; -*-
|
||||
#
|
||||
# (C) 2011 by Argonne National Laboratory.
|
||||
# See COPYRIGHT in top-level directory.
|
||||
#
|
||||
|
||||
## TODO: need to write an automakefile that handles two primary cases:
|
||||
## 1) that ROMIO is being embedded within the MPI library, as in MPICH or Open
|
||||
## MPI
|
||||
## 2) that ROMIO is being built standalone, old-school style. This case is
|
||||
## basically unused in modern practice.
|
||||
|
||||
# help autoreconf and friends realize where the macros live
|
||||
ACLOCAL_AMFLAGS = -I confdb
|
||||
|
||||
# empty variable initializations so that later code can append (+=)
|
||||
include_HEADERS =
|
||||
nodist_include_HEADERS =
|
||||
noinst_HEADERS =
|
||||
AM_CPPFLAGS =
|
||||
EXTRA_DIST =
|
||||
SUFFIXES =
|
||||
doc1_src_txt =
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# variables to be populated by the included Makefile.mk fragments:
|
||||
|
||||
# These are files that contain MPI routines (e.g., MPI_File_open).
|
||||
# In MPICH these will have an MPI_ and a PMPI_ version. Other implementations
|
||||
# (like OMPI) only want these to be MPI_ routines, possibly with some
|
||||
# name-shifting prefix.
|
||||
romio_mpi_sources =
|
||||
|
||||
# regular old source files that implement ROMIO, such as ADIO code
|
||||
romio_other_sources =
|
||||
|
||||
# code that may need to be "up" called from the MPI library and/or is
|
||||
# MPI-implementation-specific in some way
|
||||
glue_sources =
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# when building under MPICH we must be able to find mpi.h
|
||||
AM_CPPFLAGS += $(MPI_H_INCLUDE)
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# handle the "include" directory here
|
||||
AM_CPPFLAGS += -I$(top_builddir)/include -I$(top_srcdir)/include
|
||||
# nodist_ b/c these are created by config.status and should not be distributed
|
||||
nodist_include_HEADERS += include/mpio.h include/mpiof.h
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
|
||||
SUBDIRS =
|
||||
DIST_SUBDIRS = test test-internal
|
||||
|
||||
# for the sake of parallel make and avoiding an excessive number of convenience
|
||||
# libs, we use a subdir automake fragment strategy
|
||||
include mpi-io/Makefile.mk
|
||||
include adio/Makefile.mk
|
||||
|
||||
EXTRA_DIST += autogen.sh
|
||||
|
||||
if BUILD_ROMIO_EMBEDDED
|
||||
# Build a libtool convenience library that the enclosing MPI implementation can
|
||||
# use by adding it to the right _LIBADD variable.
|
||||
noinst_LTLIBRARIES = libromio.la
|
||||
libromio_la_SOURCES = $(romio_mpi_sources) $(romio_other_sources) $(glue_sources)
|
||||
|
||||
## NOTE: ROMIO's old build system builds a bunch of _foo.o objects that contain
|
||||
## PMPI_ implementations as well as calls to only other PMPI routines. In
|
||||
## MPICH, these are the objects that need to go into libmpi, while the foo.o
|
||||
## objects should go into libpmpi. Furthermore, the -D option for ROMIO's
|
||||
## source files is different and inverted (in the boolean sense) compared with
|
||||
## MPICH's defintion. And ROMIO was dumping all of the symbols into the main
|
||||
## libmpi library, regardless of the separate profiling library's existence.
|
||||
##
|
||||
## Annoying, right?
|
||||
if BUILD_PROFILING_LIB
|
||||
# The current best strategy for now is to build the PMPI symbols as a separate
|
||||
# convenience lib to permit adding the special "-D..." argument for all objects.
|
||||
# MPICH will then link in both convenience library into libmpi, since it
|
||||
# won't work very well the other way around.
|
||||
noinst_LTLIBRARIES += libpromio.la
|
||||
libpromio_la_SOURCES = $(romio_mpi_sources)
|
||||
libpromio_la_CPPFLAGS = $(AM_CPPFLAGS) -DMPIO_BUILD_PROFILING
|
||||
endif BUILD_PROFILING_LIB
|
||||
|
||||
else !BUILD_ROMIO_EMBEDDED
|
||||
lib_LTLIBRARIES = libromio.la
|
||||
libromio_la_SOURCES = $(romio_mpi_sources) $(romio_other_sources) $(glue_sources)
|
||||
if BUILD_PROFILING_LIB
|
||||
libpromio_la_SOURCES = $(romio_mpi_sources)
|
||||
libpromio_la_CPPFLAGS = $(AM_CPPFLAGS) -DMPIO_BUILD_PROFILING
|
||||
endif BUILD_PROFILING_LIB
|
||||
|
||||
endif
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
.PHONY: coverage
|
||||
gcov_sources = $(libmpl_la_SOURCES)
|
||||
# assumes that these sources were compiled appropriately ("-fprofile-arcs"
|
||||
# and "-ftest-coverage")
|
||||
coverage:
|
||||
@for file in $(gcov_sources) ; do \
|
||||
dir=`dirname $$file` ; \
|
||||
bname=`basename $$file` ; \
|
||||
aux=`echo $$bname | sed -e 's,\.*$$,,'` ; \
|
||||
echo "( $(GCOV) -b -f -o $$file $$file && mv $${bname}.gcov $$dir )" ; \
|
||||
( $(GCOV) -b -f -o $$file $$file && mv $${bname}.gcov $$dir ) ; \
|
||||
rm -f *.gcov ; \
|
||||
done
|
||||
for subdir in $(SUBDIRS) - ; do \
|
||||
if test $$subdir = "-" ; then break ; fi ; \
|
||||
( cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) coverage ) ; \
|
||||
done
|
||||
# --------------------------------------------------------------------------
|
||||
.PHONY: mandoc mandoc-local htmldoc htmldoc-local
|
||||
SUFFIXES += .man-phony .html-phony .man1-phony .html1-phony .txt
|
||||
|
||||
# "make V=1" support for our documentation recipes
|
||||
doctextman_verbose = $(doctextman_verbose_$(V))
|
||||
doctextman_verbose_ = $(doctextman_verbose_$(AM_DEFAULT_VERBOSITY))
|
||||
doctextman_verbose_0 = @echo " DOCTEXTMAN " $@;
|
||||
doctexthtml_verbose = $(doctexthtml_verbose_$(V))
|
||||
doctexthtml_verbose_ = $(doctexthtml_verbose_$(AM_DEFAULT_VERBOSITY))
|
||||
doctexthtml_verbose_0 = @echo " DOCTEXTHTML " $@;
|
||||
|
||||
# Build dir paths where the man pages will be created. Will usually be
|
||||
# overridden by MPICH make.
|
||||
mandoc_path1=$(abs_top_builddir)/man/man1
|
||||
mandoc_path3=$(abs_top_builddir)/man/man3
|
||||
htmldoc_path1=$(abs_top_builddir)/www/www1
|
||||
htmldoc_path3=$(abs_top_builddir)/www/www3
|
||||
doctext_docnotes=
|
||||
# Provide an easily replaced url root for the generated index file.
|
||||
# You can override this with URL desired in the index file generated by doctext.
|
||||
# You can ignore this if you don't use mapnames or tohtml to add links
|
||||
# to the MPI manual pages to documents.
|
||||
htmldoc_root3="--your-url-here--"
|
||||
|
||||
.c.man-phony:
|
||||
$(doctextman_verbose)$(DOCTEXT) -man -mpath $(mandoc_path3) -ext 3 \
|
||||
-heading MPI -quotefmt -nolocation $(doctext_docnotes) $<
|
||||
.c.html-phony:
|
||||
$(doctexthtml_verbose)$(DOCTEXT) -html -mpath $(htmldoc_path3) \
|
||||
-heading MPI -quotefmt -nolocation \
|
||||
-index $(htmldoc_path3)/mpi.cit -indexdir $(htmldoc_root3) \
|
||||
$(doctext_docnotes) $<
|
||||
|
||||
.txt.man1-phony:
|
||||
$(doctextman_verbose)$(DOCTEXT) -man -mpath $(mandoc_path1) -ext 1 \
|
||||
-heading MPI -quotefmt -nolocation $(doctext_docnotes) $<
|
||||
.txt.html1-phony:
|
||||
$(doctexthtml_verbose)$(DOCTEXT) -html -mpath $(htmldoc_path1) \
|
||||
-heading MPI -quotefmt -nolocation $(doctext_docnotes) $<
|
||||
|
||||
# use mandoc-local target to force directory creation before running DOCTEXT
|
||||
mandoc:
|
||||
test -d $(mandoc_path1) || $(MKDIR_P) $(mandoc_path1)
|
||||
test -d $(mandoc_path3) || $(MKDIR_P) $(mandoc_path3)
|
||||
$(MAKE) $(AM_MAKEFLAGS) mandoc-local
|
||||
mandoc-local: $(romio_mpi_sources:.c=.man-phony) $(doc1_src_txt:.txt=.man1-phony)
|
||||
|
||||
# use htmldoc-local target to force directory creation before running DOCTEXT
|
||||
htmldoc:
|
||||
test -d $(top_builddir)/www/www1 || $(MKDIR_P) $(top_builddir)/www/www1
|
||||
test -d $(top_builddir)/www/www3 || $(MKDIR_P) $(top_builddir)/www/www3
|
||||
$(MAKE) $(AM_MAKEFLAGS) htmldoc-local
|
||||
htmldoc-local: $(romio_mpi_sources:.c=.html-phony) $(doc1_src_txt:.txt=.html1-phony)
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
|
660
ompi/mca/io/romio314/romio/README
Обычный файл
660
ompi/mca/io/romio314/romio/README
Обычный файл
@ -0,0 +1,660 @@
|
||||
ROMIO: A High-Performance, Portable MPI-IO Implementation
|
||||
|
||||
Version 2008-03-09
|
||||
|
||||
Major Changes in this version:
|
||||
------------------------------
|
||||
* Fixed performance problems with the darray and subarray datatypes
|
||||
when using MPICH.
|
||||
|
||||
* Better support for building against existing MPICH and MPICH versions.
|
||||
|
||||
When building against an existing MPICH installation, use the
|
||||
"--with-mpi=mpich" option to ROMIO configure. For MPICH, use the
|
||||
"--with-mpi=mpich" option. These will allow ROMIO to take advantage
|
||||
of internal features of these implementations.
|
||||
|
||||
* Deprecation of SFS, HFS, and PIOFS implementations.
|
||||
|
||||
These are no longer actively supported, although the code will continue
|
||||
to be distributed for now.
|
||||
|
||||
* Initial support for the Panasas PanFS filesystem.
|
||||
|
||||
PanFS allows users to specify the layout of a file at file-creation time.
|
||||
Layout information includes the number of StorageBlades (SB)
|
||||
across which the data is stored, the number of SBs across which a
|
||||
parity stripe is written, and the number of consecutive stripes that
|
||||
are placed on the same set of SBs. The panfs_layout_* hints are only
|
||||
used if supplied at file-creation time.
|
||||
|
||||
panfs_layout_type - Specifies the layout of a file:
|
||||
2 = RAID0
|
||||
3 = RAID5 Parity Stripes
|
||||
panfs_layout_stripe_unit - The size of the stripe unit in bytes
|
||||
panfs_layout_total_num_comps - The total number of StorageBlades a file
|
||||
is striped across.
|
||||
panfs_layout_parity_stripe_width - If the layout type is RAID5 Parity
|
||||
Stripes, this hint specifies the
|
||||
number of StorageBlades in a parity
|
||||
stripe.
|
||||
panfs_layout_parity_stripe_depth - If the layout type is RAID5 Parity
|
||||
Stripes, this hint specifies the
|
||||
number of contiguous parity stripes written
|
||||
across the same set of SBs.
|
||||
panfs_layout_visit_policy - If the layout type is RAID5 Parity Stripes,
|
||||
the policy used to determine the parity
|
||||
stripe a given file offset is written to:
|
||||
1 = Round Robin
|
||||
|
||||
PanFS supports the "concurrent write" (CW) mode, where groups of cooperating
|
||||
clients can disable the PanFS consistency mechanisms and use their own
|
||||
consistency protocol. Clients participating in concurrent write mode use
|
||||
application specific information to improve performance while maintaining
|
||||
file consistency. All clients accessing the file(s) must enable concurrent
|
||||
write mode. If any client does not enable concurrent write mode, then the
|
||||
PanFS consistency protocol will be invoked. Once a file is opened in CW mode
|
||||
on a machine, attempts to open a file in non-CW mode will fail with
|
||||
EACCES. If a file is already opened in non-CW mode, attempts to open
|
||||
the file in CW mode will fail with EACCES. The following hint is
|
||||
used to enable concurrent write mode.
|
||||
|
||||
panfs_concurrent_write - If set to 1 at file open time, the file
|
||||
is opened using the PanFS concurrent write
|
||||
mode flag. Concurrent write mode is not a
|
||||
persistent attribute of the file.
|
||||
|
||||
Below is an example PanFS layout using the following parameters:
|
||||
|
||||
- panfs_layout_type = 3
|
||||
- panfs_layout_total_num_comps = 100
|
||||
- panfs_layout_parity_stripe_width = 10
|
||||
- panfs_layout_parity_stripe_depth = 8
|
||||
- panfs_layout_visit_policy = 1
|
||||
|
||||
Parity Stripe Group 1 Parity Stripe Group 2 . . . Parity Stripe Group 10
|
||||
---------------------- ---------------------- --------------------
|
||||
SB1 SB2 ... SB10 SB11 SB12 ... SB20 ... SB91 SB92 ... SB100
|
||||
----------------------- ----------------------- ---------------------
|
||||
D1 D2 ... D10 D91 D92 ... D100 D181 D182 ... D190
|
||||
D11 D12 D20 D101 D102 D110 D191 D192 D193
|
||||
D21 D22 D30 . . . . . .
|
||||
D31 D32 D40
|
||||
D41 D42 D50
|
||||
D51 D52 D60
|
||||
D61 D62 D70
|
||||
D71 D72 D80
|
||||
D81 D82 D90 D171 D172 D180 D261 D262 D270
|
||||
D271 D272 D273 . . . . . .
|
||||
...
|
||||
|
||||
* Initial support for the Globus GridFTP filesystem. Work contributed by Troy
|
||||
Baer (troy@osc.edu).
|
||||
|
||||
Major Changes in Version 1.2.5:
|
||||
------------------------------
|
||||
|
||||
* Initial support for MPICH-2
|
||||
|
||||
* fix for a bug in which ROMIO would get confused for some permutations
|
||||
of the aggregator list
|
||||
|
||||
* direct io on IRIX's XFS should work now
|
||||
|
||||
* fixed an issue with the Fortran bindings that would cause them to fail
|
||||
when some compilers tried to build them.
|
||||
|
||||
* Initial support for deferred opens
|
||||
|
||||
Major Changes in Version 1.2.4:
|
||||
------------------------------
|
||||
* Added section describing ROMIO MPI_FILE_SYNC and MPI_FILE_CLOSE behavior to
|
||||
User's Guide
|
||||
|
||||
* Bug removed from PVFS ADIO implementation regarding resize operations
|
||||
|
||||
* Added support for PVFS listio operations, including hints to control use
|
||||
|
||||
|
||||
Major Changes in Version 1.2.3:
|
||||
-------------------------------
|
||||
* Enhanced aggregation control via cb_config_list, romio_cb_read,
|
||||
and romio_cb_write hints
|
||||
|
||||
* Asynchronous IO can be enabled under Linux with the --enable-aio argument
|
||||
to configure
|
||||
|
||||
* Additional PVFS support
|
||||
|
||||
* Additional control over data sieving with romio_ds_read hint
|
||||
|
||||
* NTFS ADIO implementation integrated into source tree
|
||||
|
||||
* testfs ADIO implementation added for debugging purposes
|
||||
|
||||
|
||||
Major Changes in Version 1.0.3:
|
||||
-------------------------------
|
||||
|
||||
* When used with MPICH 1.2.1, the MPI-IO functions return proper error codes
|
||||
and classes, and the status object is filled in.
|
||||
|
||||
* On SGI's XFS file system, ROMIO can use direct I/O even if the
|
||||
user's request does not meet the various restrictions needed to use
|
||||
direct I/O. ROMIO does this by doing part of the request with
|
||||
buffered I/O (until all the restrictions are met) and doing the rest
|
||||
with direct I/O. (This feature hasn't been tested rigorously. Please
|
||||
check for errors.)
|
||||
|
||||
By default, ROMIO will use only buffered I/O. Direct I/O can be
|
||||
enabled either by setting the environment variables MPIO_DIRECT_READ
|
||||
and/or MPIO_DIRECT_WRITE to TRUE, or on a per-file basis by using
|
||||
the info keys "direct_read" and "direct_write".
|
||||
|
||||
Direct I/O will result in higher performance only if you are
|
||||
accessing a high-bandwidth disk system. Otherwise, buffered I/O is
|
||||
better and is therefore used as the default.
|
||||
|
||||
* Miscellaneous bug fixes.
|
||||
|
||||
|
||||
Major Changes Version 1.0.2:
|
||||
---------------------------
|
||||
|
||||
* Implemented the shared file pointer functions and
|
||||
split collective I/O functions. Therefore, the main
|
||||
components of the MPI I/O chapter not yet implemented are
|
||||
file interoperability and error handling.
|
||||
|
||||
* Added support for using "direct I/O" on SGI's XFS file system.
|
||||
Direct I/O is an optional feature of XFS in which data is moved
|
||||
directly between the user's buffer and the storage devices, bypassing
|
||||
the file-system cache. This can improve performance significantly on
|
||||
systems with high disk bandwidth. Without high disk bandwidth,
|
||||
regular I/O (that uses the file-system cache) perfoms better.
|
||||
ROMIO, therefore, does not use direct I/O by default. The user can
|
||||
turn on direct I/O (separately for reading and writing) either by
|
||||
using environment variables or by using MPI's hints mechanism (info).
|
||||
To use the environment-variables method, do
|
||||
setenv MPIO_DIRECT_READ TRUE
|
||||
setenv MPIO_DIRECT_WRITE TRUE
|
||||
To use the hints method, the two keys are "direct_read" and "direct_write".
|
||||
By default their values are "false". To turn on direct I/O, set the values
|
||||
to "true". The environment variables have priority over the info keys.
|
||||
In other words, if the environment variables are set to TRUE, direct I/O
|
||||
will be used even if the info keys say "false", and vice versa.
|
||||
Note that direct I/O must be turned on separately for reading
|
||||
and writing.
|
||||
The environment-variables method assumes that the environment
|
||||
variables can be read by each process in the MPI job. This is
|
||||
not guaranteed by the MPI Standard, but it works with SGI's MPI
|
||||
and the ch_shmem device of MPICH.
|
||||
|
||||
* Added support (new ADIO device, ad_pvfs) for the PVFS parallel
|
||||
file system for Linux clusters, developed at Clemson University
|
||||
(see http://www.parl.clemson.edu/pvfs ). To use it, you must first install
|
||||
PVFS and then when configuring ROMIO, specify "-file_system=pvfs" in
|
||||
addition to any other options to "configure". (As usual, you can configure
|
||||
for multiple file systems by using "+"; for example,
|
||||
"-file_system=pvfs+ufs+nfs".) You will need to specify the path
|
||||
to the PVFS include files via the "-cflags" option to configure,
|
||||
for example, "configure -cflags=-I/usr/pvfs/include". You
|
||||
will also need to specify the full path name of the PVFS library.
|
||||
The best way to do this is via the "-lib" option to MPICH's
|
||||
configure script (assuming you are using ROMIO from within MPICH).
|
||||
|
||||
* Uses weak symbols (where available) for building the profiling version,
|
||||
i.e., the PMPI routines. As a result, the size of the library is reduced
|
||||
considerably.
|
||||
|
||||
* The Makefiles use "virtual paths" if supported by the make utility. GNU make
|
||||
supports it, for example. This feature allows you to untar the
|
||||
distribution in some directory, say a slow NFS directory,
|
||||
and compile the library (the .o files) in another
|
||||
directory, say on a faster local disk. For example, if the tar file
|
||||
has been untarred in an NFS directory called /home/thakur/romio,
|
||||
one can compile it in a different directory, say /tmp/thakur, as follows:
|
||||
cd /tmp/thakur
|
||||
/home/thakur/romio/configure
|
||||
make
|
||||
The .o files will be created in /tmp/thakur; the library will be created in
|
||||
/home/thakur/romio/lib/$ARCH/libmpio.a .
|
||||
This method works only if the make utility supports virtual paths.
|
||||
If the default make does not, you can install GNU make which does,
|
||||
and specify it to configure as
|
||||
/home/thakur/romio/configure -make=/usr/gnu/bin/gmake (or whatever)
|
||||
|
||||
* Lots of miscellaneous bug fixes and other enhancements.
|
||||
|
||||
* This version is included in MPICH 1.2.0. If you are using MPICH, you
|
||||
need not download ROMIO separately; it gets built as part of MPICH.
|
||||
The previous version of ROMIO is included in LAM, HP MPI, SGI MPI, and
|
||||
NEC MPI. NEC has also implemented the MPI-IO functions missing
|
||||
in ROMIO, and therefore NEC MPI has a complete implementation
|
||||
of MPI-IO.
|
||||
|
||||
|
||||
Major Changes in Version 1.0.1:
|
||||
------------------------------
|
||||
|
||||
* This version is included in MPICH 1.1.1 and HP MPI 1.4.
|
||||
|
||||
* Added support for NEC SX-4 and created a new device ad_sfs for
|
||||
NEC SFS file system.
|
||||
|
||||
* New devices ad_hfs for HP/Convex HFS file system and ad_xfs for
|
||||
SGI XFS file system.
|
||||
|
||||
* Users no longer need to prefix the filename with the type of
|
||||
file system; ROMIO determines the file-system type on its own.
|
||||
|
||||
* Added support for 64-bit file sizes on IBM PIOFS, SGI XFS,
|
||||
HP/Convex HFS, and NEC SFS file systems.
|
||||
|
||||
* MPI_Offset is an 8-byte integer on machines that support 8-byte integers.
|
||||
It is of type "long long" in C and "integer*8" in Fortran.
|
||||
With a Fortran 90 compiler, you can use either integer*8 or
|
||||
integer(kind=MPI_OFFSET_KIND).
|
||||
If you printf an MPI_Offset in C, remember to use %lld
|
||||
or %ld as required by your compiler. (See what is used in the test
|
||||
program romio/test/misc.c.)
|
||||
|
||||
* On some machines, ROMIO detects at configure time that "long long" is
|
||||
either not supported by the C compiler or it doesn't work properly.
|
||||
In such cases, configure sets MPI_Offset to long in C and integer in
|
||||
Fortran. This happens on Intel Paragon, Sun4, and FreeBSD.
|
||||
|
||||
* Added support for passing hints to the implementation via the MPI_Info
|
||||
parameter. ROMIO understands the following hints (keys in MPI_Info object):
|
||||
|
||||
/* on all file systems */
|
||||
cb_buffer_size - buffer size for collective I/O
|
||||
cb_nodes - no. of processes that actually perform I/O in collective I/O
|
||||
ind_rd_buffer_size - buffer size for data sieving in independent reads
|
||||
|
||||
/* on all file systems except IBM PIOFS */
|
||||
ind_wr_buffer_size - buffer size for data sieving in independent writes
|
||||
/* ind_wr_buffer_size is ignored on PIOFS because data sieving
|
||||
cannot be done for writes since PIOFS doesn't support file locking */
|
||||
|
||||
/* on Intel PFS and IBM PIOFS only. These hints are understood only if
|
||||
supplied at file-creation time. */
|
||||
striping_factor - no. of I/O devices to stripe the file across
|
||||
striping_unit - the striping unit in bytes
|
||||
start_iodevice - the number of the I/O device from which to start
|
||||
striping (between 0 and (striping_factor-1))
|
||||
|
||||
/* on Intel PFS only. */
|
||||
pfs_svr_buf - turn on or off PFS server buffering by setting the value
|
||||
to "true" or "false", case-sensitive.
|
||||
|
||||
If ROMIO doesn't understand a hint, or if the value is invalid, the hint
|
||||
will be ignored. The values of hints being used by ROMIO at any time
|
||||
can be obtained via MPI_File_get_info.
|
||||
|
||||
|
||||
|
||||
General Information
|
||||
-------------------
|
||||
|
||||
ROMIO is a high-performance, portable implementation of MPI-IO (the
|
||||
I/O chapter in MPI). ROMIO's home page is at
|
||||
http://www.mcs.anl.gov/romio . The MPI standard is available at
|
||||
http://www.mpi-forum.org/docs/docs.html .
|
||||
|
||||
This version of ROMIO includes everything defined in the MPI I/O
|
||||
chapter except support for file interoperability and
|
||||
user-defined error handlers for files. The subarray and
|
||||
distributed array datatype constructor functions from Chapter 4
|
||||
(Sec. 4.14.4 & 4.14.5) have been implemented. They are useful for
|
||||
accessing arrays stored in files. The functions MPI_File_f2c and
|
||||
MPI_File_c2f (Sec. 4.12.4) are also implemented.
|
||||
|
||||
C, Fortran, and profiling interfaces are provided for all functions
|
||||
that have been implemented.
|
||||
|
||||
Please read the limitations of this version of ROMIO that are listed
|
||||
below (e.g., MPIO_Request object, restriction to homogeneous
|
||||
environments).
|
||||
|
||||
This version of ROMIO runs on at least the following machines: IBM SP;
|
||||
Intel Paragon; HP Exemplar; SGI Origin2000; Cray T3E; NEC SX-4; other
|
||||
symmetric multiprocessors from HP, SGI, DEC, Sun, and IBM; and networks of
|
||||
workstations (Sun, SGI, HP, IBM, DEC, Linux, and FreeBSD). Supported
|
||||
file systems are IBM PIOFS, Intel PFS, HP/Convex HFS, SGI XFS, NEC
|
||||
SFS, PVFS, NFS, and any Unix file system (UFS).
|
||||
|
||||
This version of ROMIO is included in MPICH 1.2.3; an earlier version
|
||||
is included in at least the following MPI implementations: LAM, HP
|
||||
MPI, SGI MPI, and NEC MPI.
|
||||
|
||||
Note that proper I/O error codes and classes are returned and the
|
||||
status variable is filled only when used with MPICH 1.2.1 or later.
|
||||
|
||||
You can open files on multiple file systems in the same program. The
|
||||
only restriction is that the directory where the file is to be opened
|
||||
must be accessible from the process opening the file. For example, a
|
||||
process running on one workstation may not be able to access a
|
||||
directory on the local disk of another workstation, and therefore
|
||||
ROMIO will not be able to open a file in such a directory. NFS-mounted
|
||||
files can be accessed.
|
||||
|
||||
An MPI-IO file created by ROMIO is no different than any other file
|
||||
created by the underlying file system. Therefore, you may use any of
|
||||
the commands provided by the file system to access the file, e.g., ls,
|
||||
mv, cp, rm, ftp.
|
||||
|
||||
|
||||
Using ROMIO on NFS
|
||||
------------------
|
||||
|
||||
To use ROMIO on NFS, file locking with fcntl must work correctly on
|
||||
the NFS installation. On some installations, fcntl locks don't work.
|
||||
To get them to work, you need to use Version 3 of NFS, ensure that the
|
||||
lockd daemon is running on all the machines, and have the system
|
||||
administrator mount the NFS file system with the "noac" option (no
|
||||
attribute caching). Turning off attribute caching may reduce
|
||||
performance, but it is necessary for correct behavior.
|
||||
|
||||
The following are some instructions we received from Ian Wells of HP
|
||||
for setting the noac option on NFS. We have not tried them
|
||||
ourselves. We are including them here because you may find
|
||||
them useful. Note that some of the steps may be specific to HP
|
||||
systems, and you may need root permission to execute some of the
|
||||
commands.
|
||||
|
||||
>1. first confirm you are running nfs version 3
|
||||
>
|
||||
>rpcnfo -p `hostname` | grep nfs
|
||||
>
|
||||
>ie
|
||||
> goedel >rpcinfo -p goedel | grep nfs
|
||||
> 100003 2 udp 2049 nfs
|
||||
> 100003 3 udp 2049 nfs
|
||||
>
|
||||
>
|
||||
>2. then edit /etc/fstab for each nfs directory read/written by MPIO
|
||||
> on each machine used for multihost MPIO.
|
||||
>
|
||||
> Here is an example of a correct fstab entry for /epm1:
|
||||
>
|
||||
> ie grep epm1 /etc/fstab
|
||||
>
|
||||
> ROOOOT 11>grep epm1 /etc/fstab
|
||||
> gershwin:/epm1 /rmt/gershwin/epm1 nfs bg,intr,noac 0 0
|
||||
>
|
||||
> if the noac option is not present, add it
|
||||
> and then remount this directory
|
||||
> on each of the machines that will be used to share MPIO files
|
||||
>
|
||||
>ie
|
||||
>
|
||||
>ROOOOT >umount /rmt/gershwin/epm1
|
||||
>ROOOOT >mount /rmt/gershwin/epm1
|
||||
>
|
||||
>3. Confirm that the directory is mounted noac:
|
||||
>
|
||||
>ROOOOT >grep gershwin /etc/mnttab
|
||||
>gershwin:/epm1 /rmt/gershwin/epm1 nfs
|
||||
>noac,acregmin=0,acregmax=0,acdirmin=0,acdirmax=0 0 0 899911504
|
||||
|
||||
|
||||
|
||||
|
||||
ROMIO Installation Instructions
|
||||
-------------------------------
|
||||
|
||||
Since ROMIO is included in MPICH, LAM, HP MPI, SGI MPI, and NEC MPI,
|
||||
you don't need to install it separately if you are using any of these
|
||||
MPI implementations. If you are using some other MPI, you can
|
||||
configure and build ROMIO as follows:
|
||||
|
||||
Untar the tar file as
|
||||
|
||||
gunzip -c romio.tar.gz | tar xvf -
|
||||
|
||||
OR
|
||||
|
||||
zcat romio.tar.Z | tar xvf -
|
||||
|
||||
THEN
|
||||
|
||||
cd romio
|
||||
./configure
|
||||
make
|
||||
|
||||
Some example programs and a Makefile are provided in the romio/test directory.
|
||||
Run the examples the way you would run any MPI program. Each program takes
|
||||
the filename as a command-line argument "-fname filename".
|
||||
|
||||
The configure script by default configures ROMIO for the file systems
|
||||
most likely to be used on the given machine. If you wish, you can
|
||||
explicitly specify the file systems by using the "-file_system" option
|
||||
to configure. Multiple file systems can be specified by using "+" as a
|
||||
separator. For example,
|
||||
|
||||
./configure -file_system=xfs+nfs
|
||||
|
||||
For the entire list of options to configure do
|
||||
|
||||
./configure -h | more
|
||||
|
||||
After building a specific version as above, you can install it in a
|
||||
particular directory with
|
||||
|
||||
make install PREFIX=/usr/local/romio (or whatever directory you like)
|
||||
|
||||
or just
|
||||
|
||||
make install (if you used -prefix at configure time)
|
||||
|
||||
If you intend to leave ROMIO where you built it, you should NOT install it
|
||||
(install is used only to move the necessary parts of a built ROMIO to
|
||||
another location). The installed copy will have the include files,
|
||||
libraries, man pages, and a few other odds and ends, but not the whole
|
||||
source tree. It will have a test directory for testing the
|
||||
installation and a location-independent Makefile built during
|
||||
installation, which users can copy and modify to compile and link
|
||||
against the installed copy.
|
||||
|
||||
To rebuild ROMIO with a different set of configure options, do
|
||||
|
||||
make distclean
|
||||
|
||||
to clean everything including the Makefiles created by configure.
|
||||
Then run configure again with the new options, followed by make.
|
||||
|
||||
|
||||
|
||||
Testing ROMIO
|
||||
-------------
|
||||
|
||||
To test if the installation works, do
|
||||
|
||||
make testing
|
||||
|
||||
in the romio/test directory. This calls a script that runs the test
|
||||
programs and compares the results with what they should be. By
|
||||
default, "make testing" causes the test programs to create files in
|
||||
the current directory and use whatever file system that corresponds
|
||||
to. To test with other file systems, you need to specify a filename in
|
||||
a directory corresponding to that file system as follows:
|
||||
|
||||
make testing TESTARGS="-fname=/foo/piofs/test"
|
||||
|
||||
|
||||
|
||||
Compiling and Running MPI-IO Programs
|
||||
-------------------------------------
|
||||
|
||||
If ROMIO is not already included in the MPI implementation, you need
|
||||
to include the file mpio.h for C or mpiof.h for Fortran in your MPI-IO
|
||||
program.
|
||||
|
||||
Note that on HP machines running HPUX and on NEC SX-4, you need to
|
||||
compile Fortran programs with mpifort, because the f77 compilers on
|
||||
these machines don't support 8-byte integers.
|
||||
|
||||
With MPICH, HP MPI, or NEC MPI, you can compile MPI-IO programs as
|
||||
mpicc foo.c
|
||||
or
|
||||
mpif77 foo.f
|
||||
or
|
||||
mpifort foo.f
|
||||
|
||||
As mentioned above, mpifort is preferred over mpif77 on HPUX and NEC
|
||||
because the f77 compilers on those machines do not support 8-byte integers.
|
||||
|
||||
With SGI MPI, you can compile MPI-IO programs as
|
||||
cc foo.c -lmpi
|
||||
or
|
||||
f77 foo.f -lmpi
|
||||
or
|
||||
f90 foo.f -lmpi
|
||||
|
||||
With LAM, you can compile MPI-IO programs as
|
||||
hcc foo.c -lmpi
|
||||
or
|
||||
hf77 foo.f -lmpi
|
||||
|
||||
If you have built ROMIO with some other MPI implementation, you can
|
||||
compile MPI-IO programs by explicitly giving the path to the include
|
||||
file mpio.h or mpiof.h and explicitly specifying the path to the
|
||||
library libmpio.a, which is located in $(ROMIO_HOME)/lib/$(ARCH)/libmpio.a .
|
||||
|
||||
|
||||
Run the program as you would run any MPI program on the machine. If
|
||||
you use mpirun, make sure you use the correct mpirun for the MPI
|
||||
implementation you are using. For example, if you are using MPICH on
|
||||
an SGI machine, make sure that you use MPICH's mpirun and not SGI's
|
||||
mpirun.
|
||||
|
||||
The Makefile in the romio/test directory illustrates how to compile
|
||||
and link MPI-IO programs.
|
||||
|
||||
|
||||
|
||||
Limitations of this version of ROMIO
|
||||
------------------------------------
|
||||
|
||||
* When used with any MPI implementation other than MPICH 1.2.1 (or later),
|
||||
the "status" argument is not filled in any MPI-IO function. Consequently,
|
||||
MPI_Get_count and MPI_Get_elements will not work when passed the status
|
||||
object from an MPI-IO operation.
|
||||
|
||||
* All nonblocking I/O functions use a ROMIO-defined "MPIO_Request"
|
||||
object instead of the usual "MPI_Request" object. Accordingly, two
|
||||
functions, MPIO_Test and MPIO_Wait, are provided to wait and test on
|
||||
these MPIO_Request objects. They have the same semantics as MPI_Test
|
||||
and MPI_Wait.
|
||||
|
||||
int MPIO_Test(MPIO_Request *request, int *flag, MPI_Status *status);
|
||||
int MPIO_Wait(MPIO_Request *request, MPI_Status *status);
|
||||
|
||||
The usual functions MPI_Test, MPI_Wait, MPI_Testany, etc., will not
|
||||
work for nonblocking I/O.
|
||||
|
||||
* This version works only on a homogeneous cluster of machines,
|
||||
and only the "native" file data representation is supported.
|
||||
|
||||
* When used with any MPI implementation other than MPICH 1.2.1 (or later),
|
||||
all MPI-IO functions return only two possible error codes---MPI_SUCCESS
|
||||
on success and MPI_ERR_UNKNOWN on failure.
|
||||
|
||||
* Shared file pointers are not supported on PVFS and IBM PIOFS file
|
||||
systems because they don't support fcntl file locks, and ROMIO uses
|
||||
that feature to implement shared file pointers.
|
||||
|
||||
* On HP machines running HPUX and on NEC SX-4, you need to compile
|
||||
Fortran programs with mpifort instead of mpif77, because the f77
|
||||
compilers on these machines don't support 8-byte integers.
|
||||
|
||||
* The file-open mode MPI_MODE_EXCL does not work on Intel PFS file system,
|
||||
due to a bug in PFS.
|
||||
|
||||
|
||||
|
||||
Usage Tips
|
||||
----------
|
||||
|
||||
* When using ROMIO with SGI MPI, you may sometimes get an error
|
||||
message from SGI MPI: ``MPI has run out of internal datatype
|
||||
entries. Please set the environment variable MPI_TYPE_MAX for
|
||||
additional space.'' If you get this error message, add this line to
|
||||
your .cshrc file:
|
||||
setenv MPI_TYPE_MAX 65536
|
||||
Use a larger number if you still get the error message.
|
||||
|
||||
* If a Fortran program uses a file handle created using ROMIO's C
|
||||
interface, or vice-versa, you must use the functions MPI_File_c2f
|
||||
or MPI_File_f2c. Such a situation occurs,
|
||||
for example, if a Fortran program uses an I/O library written in C
|
||||
with MPI-IO calls. Similar functions MPIO_Request_f2c and
|
||||
MPIO_Request_c2f are also provided.
|
||||
|
||||
* For Fortran programs on the Intel Paragon, you may need
|
||||
to provide the complete path to mpif.h in the include statement, e.g.,
|
||||
include '/usr/local/mpich/include/mpif.h'
|
||||
instead of
|
||||
include 'mpif.h'
|
||||
This is because the -I option to the Paragon Fortran compiler if77
|
||||
doesn't work correctly. It always looks in the default directories first
|
||||
and, therefore, picks up Intel's mpif.h, which is actually the
|
||||
mpif.h of an older version of MPICH.
|
||||
|
||||
|
||||
|
||||
ROMIO Users Mailing List
|
||||
------------------------
|
||||
|
||||
Please register your copy of ROMIO with us by sending email
|
||||
to majordomo@mcs.anl.gov with the message
|
||||
|
||||
subscribe romio-users
|
||||
|
||||
This will enable us to notify you of new releases of ROMIO as well as
|
||||
bug fixes.
|
||||
|
||||
|
||||
|
||||
Reporting Bugs
|
||||
--------------
|
||||
|
||||
If you have trouble, first check the users guide (in
|
||||
romio/doc/users-guide.ps.gz). Then check the on-line list of known
|
||||
bugs and patches at http://www.mcs.anl.gov/romio .
|
||||
Finally, if you still have problems, send a detailed message containing:
|
||||
|
||||
The type of system (often, uname -a)
|
||||
The output of configure
|
||||
The output of make
|
||||
Any programs or tests
|
||||
|
||||
to romio-maint@mcs.anl.gov .
|
||||
|
||||
|
||||
|
||||
ROMIO Internals
|
||||
---------------
|
||||
|
||||
A key component of ROMIO that enables such a portable MPI-IO
|
||||
implementation is an internal abstract I/O device layer called
|
||||
ADIO. Most users of ROMIO will not need to deal with the ADIO layer at
|
||||
all. However, ADIO is useful to those who want to port ROMIO to some
|
||||
other file system. The ROMIO source code and the ADIO paper
|
||||
(see doc/README) will help you get started.
|
||||
|
||||
MPI-IO implementation issues are discussed in our IOPADS '99 paper,
|
||||
"On Implementing MPI-IO Portably and with High Performance."
|
||||
All ROMIO-related papers are available online from
|
||||
http://www.mcs.anl.gov/romio.
|
||||
|
||||
|
||||
Learning MPI-IO
|
||||
---------------
|
||||
|
||||
The book "Using MPI-2: Advanced Features of the Message-Passing
|
||||
Interface," published by MIT Press, provides a tutorial introduction to
|
||||
all aspects of MPI-2, including parallel I/O. It has lots of example
|
||||
programs. See http://www.mcs.anl.gov/mpi/usingmpi2 for further
|
||||
information about the book.
|
46
ompi/mca/io/romio314/romio/adio/Makefile.mk
Обычный файл
46
ompi/mca/io/romio314/romio/adio/Makefile.mk
Обычный файл
@ -0,0 +1,46 @@
|
||||
## -*- Mode: Makefile; -*-
|
||||
## vim: set ft=automake :
|
||||
##
|
||||
## (C) 2011 by Argonne National Laboratory.
|
||||
## See COPYRIGHT in top-level directory.
|
||||
##
|
||||
|
||||
AM_CPPFLAGS += -I$(top_builddir)/adio/include -I$(top_srcdir)/adio/include
|
||||
|
||||
noinst_HEADERS += \
|
||||
adio/include/adio.h \
|
||||
adio/include/adio_cb_config_list.h \
|
||||
adio/include/adio_extern.h \
|
||||
adio/include/adioi.h \
|
||||
adio/include/adioi_errmsg.h \
|
||||
adio/include/adioi_error.h \
|
||||
adio/include/adioi_fs_proto.h \
|
||||
adio/include/heap-sort.h \
|
||||
adio/include/mpio_error.h \
|
||||
adio/include/mpipr.h \
|
||||
adio/include/mpiu_greq.h \
|
||||
adio/include/nopackage.h \
|
||||
adio/include/mpiu_external32.h \
|
||||
adio/include/hint_fns.h
|
||||
|
||||
include $(top_srcdir)/adio/ad_gpfs/Makefile.mk
|
||||
include $(top_srcdir)/adio/ad_gpfs/bg/Makefile.mk
|
||||
include $(top_srcdir)/adio/ad_gpfs/pe/Makefile.mk
|
||||
include $(top_srcdir)/adio/ad_gridftp/Makefile.mk
|
||||
include $(top_srcdir)/adio/ad_hfs/Makefile.mk
|
||||
include $(top_srcdir)/adio/ad_lustre/Makefile.mk
|
||||
include $(top_srcdir)/adio/ad_nfs/Makefile.mk
|
||||
## NTFS builds are handled entirely by the separate Windows build system
|
||||
##include $(top_srcdir)/adio/ad_ntfs/Makefile.mk
|
||||
include $(top_srcdir)/adio/ad_panfs/Makefile.mk
|
||||
include $(top_srcdir)/adio/ad_pfs/Makefile.mk
|
||||
include $(top_srcdir)/adio/ad_piofs/Makefile.mk
|
||||
include $(top_srcdir)/adio/ad_pvfs/Makefile.mk
|
||||
include $(top_srcdir)/adio/ad_pvfs2/Makefile.mk
|
||||
include $(top_srcdir)/adio/ad_sfs/Makefile.mk
|
||||
include $(top_srcdir)/adio/ad_testfs/Makefile.mk
|
||||
include $(top_srcdir)/adio/ad_ufs/Makefile.mk
|
||||
include $(top_srcdir)/adio/ad_xfs/Makefile.mk
|
||||
include $(top_srcdir)/adio/ad_zoidfs/Makefile.mk
|
||||
include $(top_srcdir)/adio/common/Makefile.mk
|
||||
|
11
ompi/mca/io/romio314/romio/adio/ad_gpfs/.gitignore
поставляемый
Обычный файл
11
ompi/mca/io/romio314/romio/adio/ad_gpfs/.gitignore
поставляемый
Обычный файл
@ -0,0 +1,11 @@
|
||||
/Makefile
|
||||
/.deps
|
||||
/*.bb
|
||||
/*.bbg
|
||||
/*.gcda
|
||||
/*.gcno
|
||||
/.libs
|
||||
/.libstamp*
|
||||
/*.lo
|
||||
/.*-cache
|
||||
/.state-cache
|
26
ompi/mca/io/romio314/romio/adio/ad_gpfs/Makefile.mk
Обычный файл
26
ompi/mca/io/romio314/romio/adio/ad_gpfs/Makefile.mk
Обычный файл
@ -0,0 +1,26 @@
|
||||
## -*- Mode: Makefile; -*-
|
||||
## vim: set ft=automake :
|
||||
##
|
||||
## (C) 2012 by Argonne National Laboratory.
|
||||
## See COPYRIGHT in top-level directory.
|
||||
##
|
||||
|
||||
if BUILD_AD_GPFS
|
||||
|
||||
noinst_HEADERS += \
|
||||
adio/ad_gpfs/ad_gpfs_aggrs.h \
|
||||
adio/ad_gpfs/ad_gpfs.h \
|
||||
adio/ad_gpfs/ad_gpfs_tuning.h
|
||||
|
||||
romio_other_sources += \
|
||||
adio/ad_gpfs/ad_gpfs_aggrs.c \
|
||||
adio/ad_gpfs/ad_gpfs_close.c \
|
||||
adio/ad_gpfs/ad_gpfs_flush.c \
|
||||
adio/ad_gpfs/ad_gpfs_tuning.c \
|
||||
adio/ad_gpfs/ad_gpfs.c \
|
||||
adio/ad_gpfs/ad_gpfs_open.c \
|
||||
adio/ad_gpfs/ad_gpfs_hints.c \
|
||||
adio/ad_gpfs/ad_gpfs_rdcoll.c \
|
||||
adio/ad_gpfs/ad_gpfs_wrcoll.c
|
||||
|
||||
endif BUILD_AD_GPFS
|
61
ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs.c
Обычный файл
61
ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs.c
Обычный файл
@ -0,0 +1,61 @@
|
||||
/* ---------------------------------------------------------------- */
|
||||
/* (C)Copyright IBM Corp. 2007, 2008 */
|
||||
/* ---------------------------------------------------------------- */
|
||||
/**
|
||||
* \file ad_gpfs.c
|
||||
* \brief ???
|
||||
*/
|
||||
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (C) 2001 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
#include "ad_gpfs.h"
|
||||
|
||||
/* adioi.h has the ADIOI_Fns_struct define */
|
||||
#include "adioi.h"
|
||||
|
||||
struct ADIOI_Fns_struct ADIO_GPFS_operations = {
|
||||
ADIOI_GPFS_Open, /* Open */
|
||||
ADIOI_GEN_OpenColl, /* Collective open */
|
||||
ADIOI_GEN_ReadContig, /* ReadContig */
|
||||
ADIOI_GEN_WriteContig, /* WriteContig */
|
||||
ADIOI_GPFS_ReadStridedColl, /* ReadStridedColl */
|
||||
ADIOI_GPFS_WriteStridedColl, /* WriteStridedColl */
|
||||
ADIOI_GEN_SeekIndividual, /* SeekIndividual */
|
||||
ADIOI_GEN_Fcntl, /* Fcntl */
|
||||
#if defined(BGQPLATFORM) || defined(PEPLATFORM)
|
||||
ADIOI_GPFS_SetInfo, /* SetInfo for BlueGene or PE */
|
||||
#else
|
||||
ADIOI_GEN_SetInfo, /* SetInfo for any platform besides BlueGene or PE */
|
||||
#endif
|
||||
ADIOI_GEN_ReadStrided, /* ReadStrided */
|
||||
ADIOI_GEN_WriteStrided, /* WriteStrided */
|
||||
ADIOI_GPFS_Close, /* Close */
|
||||
#ifdef ROMIO_HAVE_WORKING_AIO
|
||||
#warning Consider BG support for NFS before enabling this.
|
||||
ADIOI_GEN_IreadContig, /* IreadContig */
|
||||
ADIOI_GEN_IwriteContig, /* IwriteContig */
|
||||
#else
|
||||
ADIOI_FAKE_IreadContig, /* IreadContig */
|
||||
ADIOI_FAKE_IwriteContig, /* IwriteContig */
|
||||
#endif
|
||||
ADIOI_GEN_IODone, /* ReadDone */
|
||||
ADIOI_GEN_IODone, /* WriteDone */
|
||||
ADIOI_GEN_IOComplete, /* ReadComplete */
|
||||
ADIOI_GEN_IOComplete, /* WriteComplete */
|
||||
ADIOI_GEN_IreadStrided, /* IreadStrided */
|
||||
ADIOI_GEN_IwriteStrided, /* IwriteStrided */
|
||||
ADIOI_GPFS_Flush, /* Flush */
|
||||
ADIOI_GEN_Resize, /* Resize */
|
||||
ADIOI_GEN_Delete, /* Delete */
|
||||
ADIOI_GEN_Feature, /* Features */
|
||||
#ifdef BGQPLATFORM
|
||||
"GPFS+BGQ: IBM GPFS for Blue Gene",
|
||||
#elif PEPLATFORM
|
||||
"GPFS+PE: IBM GPFS for PE",
|
||||
#else
|
||||
"GPFS: IBM GPFS"
|
||||
#endif
|
||||
};
|
71
ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs.h
Обычный файл
71
ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs.h
Обычный файл
@ -0,0 +1,71 @@
|
||||
/* ---------------------------------------------------------------- */
|
||||
/* (C)Copyright IBM Corp. 2007, 2008 */
|
||||
/* ---------------------------------------------------------------- */
|
||||
/**
|
||||
* \file ad_gpfs.h
|
||||
* \brief ???
|
||||
*/
|
||||
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#ifndef AD_GPFS_INCLUDE
|
||||
#define AD_GPFS_INCLUDE
|
||||
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/types.h>
|
||||
#include <fcntl.h>
|
||||
#include "adio.h"
|
||||
|
||||
#ifdef HAVE_SIGNAL_H
|
||||
#include <signal.h>
|
||||
#endif
|
||||
#ifdef HAVE_AIO_H
|
||||
#include <aio.h>
|
||||
#endif
|
||||
|
||||
|
||||
void ADIOI_GPFS_Open(ADIO_File fd, int *error_code);
|
||||
|
||||
void ADIOI_GPFS_Close(ADIO_File fd, int *error_code);
|
||||
|
||||
void ADIOI_GPFS_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code);
|
||||
void ADIOI_GPFS_WriteContig(ADIO_File fd, const void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code);
|
||||
|
||||
void ADIOI_GPFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code);
|
||||
|
||||
void ADIOI_GPFS_WriteStrided(ADIO_File fd, const void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code);
|
||||
void ADIOI_GPFS_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code);
|
||||
|
||||
void ADIOI_GPFS_ReadStridedColl(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code);
|
||||
|
||||
void ADIOI_GPFS_WriteStridedColl(ADIO_File fd, const void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code);
|
||||
|
||||
void ADIOI_GPFS_Flush(ADIO_File fd, int *error_code);
|
||||
|
||||
#include "ad_gpfs_tuning.h"
|
||||
|
||||
|
||||
#endif
|
846
ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_aggrs.c
Обычный файл
846
ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_aggrs.c
Обычный файл
@ -0,0 +1,846 @@
|
||||
/* ---------------------------------------------------------------- */
|
||||
/* (C)Copyright IBM Corp. 2007, 2008 */
|
||||
/* ---------------------------------------------------------------- */
|
||||
/**
|
||||
* \file ad_gpfs_aggrs.c
|
||||
* \brief The externally used function from this file is is declared in ad_gpfs_aggrs.h
|
||||
*/
|
||||
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997-2001 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
|
||||
#include "adio.h"
|
||||
#include "adio_cb_config_list.h"
|
||||
#include "ad_gpfs.h"
|
||||
#include "ad_gpfs_aggrs.h"
|
||||
|
||||
#ifdef AGGREGATION_PROFILE
|
||||
#include "mpe.h"
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef USE_DBG_LOGGING
|
||||
#define AGG_DEBUG 1
|
||||
#endif
|
||||
|
||||
#ifndef TRACE_ERR
|
||||
# define TRACE_ERR(format...)
|
||||
#endif
|
||||
|
||||
/* Comments copied from common:
|
||||
* This file contains four functions:
|
||||
*
|
||||
* ADIOI_Calc_aggregator()
|
||||
* ADIOI_Calc_file_domains()
|
||||
* ADIOI_Calc_my_req()
|
||||
* ADIOI_Calc_others_req()
|
||||
*
|
||||
* The last three of these were originally in ad_read_coll.c, but they are
|
||||
* also shared with ad_write_coll.c. I felt that they were better kept with
|
||||
* the rest of the shared aggregation code.
|
||||
*/
|
||||
|
||||
/* Discussion of values available from above:
|
||||
*
|
||||
* ADIO_Offset st_offsets[0..nprocs-1]
|
||||
* ADIO_Offset end_offsets[0..nprocs-1]
|
||||
* These contain a list of start and end offsets for each process in
|
||||
* the communicator. For example, an access at loc 10, size 10 would
|
||||
* have a start offset of 10 and end offset of 19.
|
||||
* int nprocs
|
||||
* number of processors in the collective I/O communicator
|
||||
* ADIO_Offset min_st_offset
|
||||
* ADIO_Offset fd_start[0..nprocs_for_coll-1]
|
||||
* starting location of "file domain"; region that a given process will
|
||||
* perform aggregation for (i.e. actually do I/O)
|
||||
* ADIO_Offset fd_end[0..nprocs_for_coll-1]
|
||||
* start + size - 1 roughly, but it can be less, or 0, in the case of
|
||||
* uneven distributions
|
||||
*/
|
||||
|
||||
/* Description from common/ad_aggregate.c. (Does it completely apply to bg?)
|
||||
* ADIOI_Calc_aggregator()
|
||||
*
|
||||
* The intention here is to implement a function which provides basically
|
||||
* the same functionality as in Rajeev's original version of
|
||||
* ADIOI_Calc_my_req(). He used a ceiling division approach to assign the
|
||||
* file domains, and we use the same approach here when calculating the
|
||||
* location of an offset/len in a specific file domain. Further we assume
|
||||
* this same distribution when calculating the rank_index, which is later
|
||||
* used to map to a specific process rank in charge of the file domain.
|
||||
*
|
||||
* A better (i.e. more general) approach would be to use the list of file
|
||||
* domains only. This would be slower in the case where the
|
||||
* original ceiling division was used, but it would allow for arbitrary
|
||||
* distributions of regions to aggregators. We'd need to know the
|
||||
* nprocs_for_coll in that case though, which we don't have now.
|
||||
*
|
||||
* Note a significant difference between this function and Rajeev's old code:
|
||||
* this code doesn't necessarily return a rank in the range
|
||||
* 0..nprocs_for_coll; instead you get something in 0..nprocs. This is a
|
||||
* result of the rank mapping; any set of ranks in the communicator could be
|
||||
* used now.
|
||||
*
|
||||
* Returns an integer representing a rank in the collective I/O communicator.
|
||||
*
|
||||
* The "len" parameter is also modified to indicate the amount of data
|
||||
* actually available in this file domain.
|
||||
*/
|
||||
/*
|
||||
* This is more general aggregator search function which does not base on the assumption
|
||||
* that each aggregator hosts the file domain with the same size
|
||||
*/
|
||||
int ADIOI_GPFS_Calc_aggregator(ADIO_File fd,
|
||||
ADIO_Offset off,
|
||||
ADIO_Offset min_off,
|
||||
ADIO_Offset *len,
|
||||
ADIO_Offset fd_size,
|
||||
ADIO_Offset *fd_start,
|
||||
ADIO_Offset *fd_end)
|
||||
{
|
||||
int rank_index, rank;
|
||||
ADIO_Offset avail_bytes;
|
||||
TRACE_ERR("Entering ADIOI_GPFS_Calc_aggregator\n");
|
||||
|
||||
ADIOI_Assert ( (off <= fd_end[fd->hints->cb_nodes-1] && off >= min_off && fd_start[0] >= min_off ) );
|
||||
|
||||
/* binary search --> rank_index is returned */
|
||||
int ub = fd->hints->cb_nodes;
|
||||
int lb = 0;
|
||||
/* get an index into our array of aggregators */
|
||||
/* Common code for striping - bg doesn't use it but it's
|
||||
here to make diff'ing easier.
|
||||
rank_index = (int) ((off - min_off + fd_size)/ fd_size - 1);
|
||||
|
||||
if (fd->hints->striping_unit > 0) {
|
||||
* wkliao: implementation for file domain alignment
|
||||
fd_start[] and fd_end[] have been aligned with file lock
|
||||
boundaries when returned from ADIOI_Calc_file_domains() so cannot
|
||||
just use simple arithmatic as above *
|
||||
rank_index = 0;
|
||||
while (off > fd_end[rank_index]) rank_index++;
|
||||
}
|
||||
bg does it's own striping below
|
||||
*/
|
||||
rank_index = fd->hints->cb_nodes / 2;
|
||||
while ( off < fd_start[rank_index] || off > fd_end[rank_index] ) {
|
||||
if ( off > fd_end [rank_index] ) {
|
||||
lb = rank_index;
|
||||
rank_index = (rank_index + ub) / 2;
|
||||
}
|
||||
else
|
||||
if ( off < fd_start[rank_index] ) {
|
||||
ub = rank_index;
|
||||
rank_index = (rank_index + lb) / 2;
|
||||
}
|
||||
}
|
||||
/* we index into fd_end with rank_index, and fd_end was allocated to be no
|
||||
* bigger than fd->hins->cb_nodes. If we ever violate that, we're
|
||||
* overrunning arrays. Obviously, we should never ever hit this abort */
|
||||
if (rank_index >= fd->hints->cb_nodes || rank_index < 0) {
|
||||
FPRINTF(stderr, "Error in ADIOI_Calc_aggregator(): rank_index(%d) >= fd->hints->cb_nodes (%d) fd_size=%lld off=%lld\n",
|
||||
rank_index,fd->hints->cb_nodes,fd_size,off);
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
/* DBG_FPRINTF ("ADIOI_GPFS_Calc_aggregator: rank_index = %d\n",
|
||||
rank_index ); */
|
||||
|
||||
/*
|
||||
* remember here that even in Rajeev's original code it was the case that
|
||||
* different aggregators could end up with different amounts of data to
|
||||
* aggregate. here we use fd_end[] to make sure that we know how much
|
||||
* data this aggregator is working with.
|
||||
*
|
||||
* the +1 is to take into account the end vs. length issue.
|
||||
*/
|
||||
avail_bytes = fd_end[rank_index] + 1 - off;
|
||||
if (avail_bytes < *len && avail_bytes > 0) {
|
||||
/* this file domain only has part of the requested contig. region */
|
||||
|
||||
*len = avail_bytes;
|
||||
}
|
||||
|
||||
/* map our index to a rank */
|
||||
/* NOTE: FOR NOW WE DON'T HAVE A MAPPING...JUST DO 0..NPROCS_FOR_COLL */
|
||||
rank = fd->hints->ranklist[rank_index];
|
||||
TRACE_ERR("Leaving ADIOI_GPFS_Calc_aggregator\n");
|
||||
|
||||
return rank;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute a dynamic access range based file domain partition among I/O aggregators,
|
||||
* which align to the GPFS block size
|
||||
* Divide the I/O workload among "nprocs_for_coll" processes. This is
|
||||
* done by (logically) dividing the file into file domains (FDs); each
|
||||
* process may directly access only its own file domain.
|
||||
* Additional effort is to make sure that each I/O aggregator get
|
||||
* a file domain that aligns to the GPFS block size. So, there will
|
||||
* not be any false sharing of GPFS file blocks among multiple I/O nodes.
|
||||
*
|
||||
* The common version of this now accepts a min_fd_size and striping_unit.
|
||||
* It doesn't seem necessary here (using GPFS block sizes) but keep it in mind
|
||||
* (e.g. we could pass striping unit instead of using fs_ptr->blksize).
|
||||
*/
|
||||
void ADIOI_GPFS_Calc_file_domains(ADIO_File fd,
|
||||
ADIO_Offset *st_offsets,
|
||||
ADIO_Offset *end_offsets,
|
||||
int nprocs,
|
||||
int nprocs_for_coll,
|
||||
ADIO_Offset *min_st_offset_ptr,
|
||||
ADIO_Offset **fd_start_ptr,
|
||||
ADIO_Offset **fd_end_ptr,
|
||||
ADIO_Offset *fd_size_ptr,
|
||||
void *fs_ptr)
|
||||
{
|
||||
ADIO_Offset min_st_offset, max_end_offset, *fd_start, *fd_end, *fd_size;
|
||||
int i, aggr;
|
||||
TRACE_ERR("Entering ADIOI_GPFS_Calc_file_domains\n");
|
||||
blksize_t blksize;
|
||||
|
||||
#ifdef AGGREGATION_PROFILE
|
||||
MPE_Log_event (5004, 0, NULL);
|
||||
#endif
|
||||
|
||||
# if AGG_DEBUG
|
||||
static char myname[] = "ADIOI_GPFS_Calc_file_domains";
|
||||
DBG_FPRINTF(stderr, "%s(%d): %d aggregator(s)\n",
|
||||
myname,__LINE__,nprocs_for_coll);
|
||||
# endif
|
||||
if (fd->blksize <= 0)
|
||||
/* default to 1M if blksize unset */
|
||||
fd->blksize = 1048576;
|
||||
blksize = fd->blksize;
|
||||
|
||||
# if AGG_DEBUG
|
||||
DBG_FPRINTF(stderr,"%s(%d): Blocksize=%ld\n",myname,__LINE__,blksize);
|
||||
# endif
|
||||
/* find min of start offsets and max of end offsets of all processes */
|
||||
min_st_offset = st_offsets [0];
|
||||
max_end_offset = end_offsets[0];
|
||||
for (i=1; i<nprocs; i++) {
|
||||
min_st_offset = ADIOI_MIN(min_st_offset, st_offsets[i]);
|
||||
max_end_offset = ADIOI_MAX(max_end_offset, end_offsets[i]);
|
||||
}
|
||||
|
||||
/* DBG_FPRINTF(stderr, "_calc_file_domains, min_st_offset, max_
|
||||
= %qd, %qd\n", min_st_offset, max_end_offset );*/
|
||||
|
||||
/* determine the "file domain (FD)" of each process, i.e., the portion of
|
||||
the file that will be "owned" by each process */
|
||||
|
||||
ADIO_Offset gpfs_ub = (max_end_offset +blksize-1) / blksize * blksize - 1;
|
||||
ADIO_Offset gpfs_lb = min_st_offset / blksize * blksize;
|
||||
ADIO_Offset gpfs_ub_rdoff = (max_end_offset +blksize-1) / blksize * blksize - 1 - max_end_offset;
|
||||
ADIO_Offset gpfs_lb_rdoff = min_st_offset - min_st_offset / blksize * blksize;
|
||||
ADIO_Offset fd_gpfs_range = gpfs_ub - gpfs_lb + 1;
|
||||
|
||||
int naggs = nprocs_for_coll;
|
||||
|
||||
/* Tweak the file domains so that no fd is smaller than a threshold. We
|
||||
* have to strike a balance between efficency and parallelism: somewhere
|
||||
* between 10k processes sending 32-byte requests and one process sending a
|
||||
* 320k request is a (system-dependent) sweet spot
|
||||
|
||||
This is from the common code - the new min_fd_size parm that we didn't implement.
|
||||
(And common code uses a different declaration of fd_size so beware)
|
||||
|
||||
if (fd_size < min_fd_size)
|
||||
fd_size = min_fd_size;
|
||||
*/
|
||||
fd_size = (ADIO_Offset *) ADIOI_Malloc(nprocs_for_coll * sizeof(ADIO_Offset));
|
||||
*fd_start_ptr = (ADIO_Offset *) ADIOI_Malloc(nprocs_for_coll * sizeof(ADIO_Offset));
|
||||
*fd_end_ptr = (ADIO_Offset *) ADIOI_Malloc(nprocs_for_coll * sizeof(ADIO_Offset));
|
||||
fd_start = *fd_start_ptr;
|
||||
fd_end = *fd_end_ptr;
|
||||
|
||||
/* each process will have a file domain of some number of gpfs blocks, but
|
||||
* the division of blocks is not likely to be even. Some file domains will
|
||||
* be "large" and others "small"
|
||||
*
|
||||
* Example: consider 17 blocks distributed over 3 aggregators.
|
||||
* nb_cn_small = 17/3 = 5
|
||||
* naggs_large = 17 - 3*(17/3) = 17 - 15 = 2
|
||||
* naggs_small = 3 - 2 = 1
|
||||
*
|
||||
* and you end up with file domains of {5-blocks, 6-blocks, 6-blocks}
|
||||
*
|
||||
* what about (relatively) small files? say, a file of 1000 blocks
|
||||
* distributed over 2064 aggregators:
|
||||
* nb_cn_small = 1000/2064 = 0
|
||||
* naggs_large = 1000 - 2064*(1000/2064) = 1000
|
||||
* naggs_small = 2064 - 1000 = 1064
|
||||
* and you end up with domains of {0, 0, 0, ... 1, 1, 1 ...}
|
||||
*
|
||||
* it might be a good idea instead of having all the zeros up front, to
|
||||
* "mix" those zeros into the fd_size array. that way, no pset/bridge-set
|
||||
* is left with zero work. In fact, even if the small file domains aren't
|
||||
* zero, it's probably still a good idea to mix the "small" file domains
|
||||
* across the fd_size array to keep the io nodes in balance */
|
||||
|
||||
|
||||
ADIO_Offset n_gpfs_blk = fd_gpfs_range / blksize;
|
||||
ADIO_Offset nb_cn_small = n_gpfs_blk/naggs;
|
||||
ADIO_Offset naggs_large = n_gpfs_blk - naggs * (n_gpfs_blk/naggs);
|
||||
ADIO_Offset naggs_small = naggs - naggs_large;
|
||||
|
||||
#ifdef BGQPLATFORM
|
||||
if (gpfsmpio_balancecontig == 1) {
|
||||
/* File domains blocks are assigned to aggregators in a breadth-first
|
||||
* fashion relative to the ions - additionally, file domains on the
|
||||
* aggregators sharing the same bridgeset and ion have contiguous
|
||||
* offsets. */
|
||||
|
||||
// initialize everything to small
|
||||
for (i=0; i<naggs; i++)
|
||||
fd_size[i] = nb_cn_small * blksize;
|
||||
|
||||
// go thru and distribute the large across the bridges
|
||||
|
||||
/* bridelistoffset: agg rank list offsets using the bridgelist - each
|
||||
* entry is created by adding up the indexes for the aggs from all
|
||||
* previous bridges */
|
||||
int *bridgelistoffset =
|
||||
(int *) ADIOI_Malloc(fd->hints->fs_hints.bg.numbridges*sizeof(int));
|
||||
/* tmpbridgelistnum: copy of the bridgelistnum whose entries can be
|
||||
* decremented to keep track of bridge assignments during the actual
|
||||
* large block assignments to the agg rank list*/
|
||||
int *tmpbridgelistnum =
|
||||
(int *) ADIOI_Malloc(fd->hints->fs_hints.bg.numbridges*sizeof(int));
|
||||
|
||||
int j;
|
||||
for (j=0;j<fd->hints->fs_hints.bg.numbridges;j++) {
|
||||
int k, bridgerankoffset = 0;
|
||||
for (k=0;k<j;k++) {
|
||||
bridgerankoffset += fd->hints->fs_hints.bg.bridgelistnum[k];
|
||||
}
|
||||
bridgelistoffset[j] = bridgerankoffset;
|
||||
}
|
||||
|
||||
for (j=0;j<fd->hints->fs_hints.bg.numbridges;j++)
|
||||
tmpbridgelistnum[j] = fd->hints->fs_hints.bg.bridgelistnum[j];
|
||||
int bridgeiter = 0;
|
||||
|
||||
/* distribute the large blocks across the aggs going breadth-first
|
||||
* across the bridgelist - this distributes the fd sizes across the
|
||||
* ions, so later in the file domain assignment when it iterates thru
|
||||
* the ranklist the offsets will be contiguous within the bridge and
|
||||
* ion as well */
|
||||
for (j=0;j<naggs_large;j++) {
|
||||
int foundbridge = 0;
|
||||
int numbridgelistpasses = 0;
|
||||
while (!foundbridge) {
|
||||
if (tmpbridgelistnum[bridgeiter] > 0) {
|
||||
foundbridge = 1;
|
||||
/*
|
||||
printf("bridgeiter is %d tmpbridgelistnum[bridgeiter] is %d bridgelistoffset[bridgeiter] is %d\n",bridgeiter,tmpbridgelistnum[bridgeiter],bridgelistoffset[bridgeiter]);
|
||||
printf("naggs is %d bridgeiter is %d bridgelistoffset[bridgeiter] is %d tmpbridgelistnum[bridgeiter] is %d\n",naggs, bridgeiter,bridgelistoffset[bridgeiter],tmpbridgelistnum[bridgeiter]);
|
||||
printf("naggs is %d bridgeiter is %d setting fd_size[%d]\n",naggs, bridgeiter,bridgelistoffset[bridgeiter]+(fd->hints->bridgelistnum[bridgeiter]-tmpbridgelistnum[bridgeiter]));
|
||||
*/
|
||||
int currentbridgelistnum =
|
||||
(fd->hints->fs_hints.bg.bridgelistnum[bridgeiter]-
|
||||
tmpbridgelistnum[bridgeiter]);
|
||||
int currentfdsizeindex = bridgelistoffset[bridgeiter] +
|
||||
currentbridgelistnum;
|
||||
fd_size[currentfdsizeindex] = (nb_cn_small+1) * blksize;
|
||||
tmpbridgelistnum[bridgeiter]--;
|
||||
}
|
||||
if (bridgeiter == (fd->hints->fs_hints.bg.numbridges-1)) {
|
||||
/* guard against infinite loop - should only ever make 1 pass
|
||||
* thru bridgelist */
|
||||
ADIOI_Assert(numbridgelistpasses == 0);
|
||||
numbridgelistpasses++;
|
||||
bridgeiter = 0;
|
||||
}
|
||||
else
|
||||
bridgeiter++;
|
||||
}
|
||||
}
|
||||
ADIOI_Free(tmpbridgelistnum);
|
||||
ADIOI_Free(bridgelistoffset);
|
||||
|
||||
} else {
|
||||
/* BG/L- and BG/P-style distribution of file domains: simple allocation of
|
||||
* file domins to each aggregator */
|
||||
for (i=0; i<naggs; i++) {
|
||||
if (i < naggs_large) {
|
||||
fd_size[i] = (nb_cn_small+1) * blksize;
|
||||
} else {
|
||||
fd_size[i] = nb_cn_small * blksize;
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef balancecontigtrace
|
||||
int myrank;
|
||||
MPI_Comm_rank(fd->comm,&myrank);
|
||||
if (myrank == 0) {
|
||||
fprintf(stderr,"naggs_small is %d nb_cn_small is %d\n",naggs_small,nb_cn_small);
|
||||
for (i=0; i<naggs; i++) {
|
||||
fprintf(stderr,"fd_size[%d] set to %d agg rank is %d\n",i,fd_size[i],fd->hints->ranklist[i]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#else // not BGQ platform
|
||||
for (i=0; i<naggs; i++) {
|
||||
if (i < naggs_large) {
|
||||
fd_size[i] = (nb_cn_small+1) * blksize;
|
||||
} else {
|
||||
fd_size[i] = nb_cn_small * blksize;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
# if AGG_DEBUG
|
||||
DBG_FPRINTF(stderr,"%s(%d): "
|
||||
"gpfs_ub %llu, "
|
||||
"gpfs_lb %llu, "
|
||||
"gpfs_ub_rdoff %llu, "
|
||||
"gpfs_lb_rdoff %llu, "
|
||||
"fd_gpfs_range %llu, "
|
||||
"n_gpfs_blk %llu, "
|
||||
"nb_cn_small %llu, "
|
||||
"naggs_large %llu, "
|
||||
"naggs_small %llu, "
|
||||
"\n",
|
||||
myname,__LINE__,
|
||||
gpfs_ub ,
|
||||
gpfs_lb ,
|
||||
gpfs_ub_rdoff,
|
||||
gpfs_lb_rdoff,
|
||||
fd_gpfs_range,
|
||||
n_gpfs_blk ,
|
||||
nb_cn_small ,
|
||||
naggs_large ,
|
||||
naggs_small
|
||||
);
|
||||
# endif
|
||||
|
||||
fd_size[0] -= gpfs_lb_rdoff;
|
||||
fd_size[naggs-1] -= gpfs_ub_rdoff;
|
||||
|
||||
/* compute the file domain for each aggr */
|
||||
ADIO_Offset offset = min_st_offset;
|
||||
for (aggr=0; aggr<naggs; aggr++) {
|
||||
fd_start[aggr] = offset;
|
||||
fd_end [aggr] = offset + fd_size[aggr] - 1;
|
||||
offset += fd_size[aggr];
|
||||
}
|
||||
|
||||
*fd_size_ptr = fd_size[0];
|
||||
*min_st_offset_ptr = min_st_offset;
|
||||
|
||||
#ifdef AGGREGATION_PROFILE
|
||||
MPE_Log_event (5005, 0, NULL);
|
||||
#endif
|
||||
ADIOI_Free (fd_size);
|
||||
TRACE_ERR("Leaving ADIOI_GPFS_Calc_file_domains\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* ADIOI_GPFS_Calc_my_req() overrides ADIOI_Calc_my_req for the default implementation
|
||||
* is specific for static file domain partitioning.
|
||||
*
|
||||
* ADIOI_Calc_my_req() - calculate what portions of the access requests
|
||||
* of this process are located in the file domains of various processes
|
||||
* (including this one)
|
||||
*/
|
||||
void ADIOI_GPFS_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, ADIO_Offset *len_list,
|
||||
int contig_access_count, ADIO_Offset
|
||||
min_st_offset, ADIO_Offset *fd_start,
|
||||
ADIO_Offset *fd_end, ADIO_Offset fd_size,
|
||||
int nprocs,
|
||||
int *count_my_req_procs_ptr,
|
||||
int **count_my_req_per_proc_ptr,
|
||||
ADIOI_Access **my_req_ptr,
|
||||
int **buf_idx_ptr)
|
||||
/* Possibly reconsider if buf_idx's are ok as int's, or should they be aints/offsets?
|
||||
They are used as memory buffer indices so it seems like the 2G limit is in effect */
|
||||
{
|
||||
int *count_my_req_per_proc, count_my_req_procs, *buf_idx;
|
||||
int i, l, proc;
|
||||
ADIO_Offset fd_len, rem_len, curr_idx, off;
|
||||
ADIOI_Access *my_req;
|
||||
TRACE_ERR("Entering ADIOI_GPFS_Calc_my_req\n");
|
||||
|
||||
#ifdef AGGREGATION_PROFILE
|
||||
MPE_Log_event (5024, 0, NULL);
|
||||
#endif
|
||||
*count_my_req_per_proc_ptr = (int *) ADIOI_Calloc(nprocs,sizeof(int));
|
||||
count_my_req_per_proc = *count_my_req_per_proc_ptr;
|
||||
/* count_my_req_per_proc[i] gives the no. of contig. requests of this
|
||||
process in process i's file domain. calloc initializes to zero.
|
||||
I'm allocating memory of size nprocs, so that I can do an
|
||||
MPI_Alltoall later on.*/
|
||||
|
||||
buf_idx = (int *) ADIOI_Malloc(nprocs*sizeof(int));
|
||||
/* buf_idx is relevant only if buftype_is_contig.
|
||||
buf_idx[i] gives the index into user_buf where data received
|
||||
from proc. i should be placed. This allows receives to be done
|
||||
without extra buffer. This can't be done if buftype is not contig. */
|
||||
|
||||
/* initialize buf_idx to -1 */
|
||||
for (i=0; i < nprocs; i++) buf_idx[i] = -1;
|
||||
|
||||
/* one pass just to calculate how much space to allocate for my_req;
|
||||
* contig_access_count was calculated way back in ADIOI_Calc_my_off_len()
|
||||
*/
|
||||
for (i=0; i < contig_access_count; i++) {
|
||||
/* short circuit offset/len processing if len == 0
|
||||
* (zero-byte read/write */
|
||||
if (len_list[i] == 0)
|
||||
continue;
|
||||
off = offset_list[i];
|
||||
fd_len = len_list[i];
|
||||
/* note: we set fd_len to be the total size of the access. then
|
||||
* ADIOI_Calc_aggregator() will modify the value to return the
|
||||
* amount that was available from the file domain that holds the
|
||||
* first part of the access.
|
||||
*/
|
||||
/* BES */
|
||||
proc = ADIOI_GPFS_Calc_aggregator(fd, off, min_st_offset, &fd_len, fd_size,
|
||||
fd_start, fd_end);
|
||||
count_my_req_per_proc[proc]++;
|
||||
|
||||
/* figure out how much data is remaining in the access (i.e. wasn't
|
||||
* part of the file domain that had the starting byte); we'll take
|
||||
* care of this data (if there is any) in the while loop below.
|
||||
*/
|
||||
rem_len = len_list[i] - fd_len;
|
||||
|
||||
while (rem_len > 0) {
|
||||
off += fd_len; /* point to first remaining byte */
|
||||
fd_len = rem_len; /* save remaining size, pass to calc */
|
||||
proc = ADIOI_GPFS_Calc_aggregator(fd, off, min_st_offset, &fd_len,
|
||||
fd_size, fd_start, fd_end);
|
||||
|
||||
count_my_req_per_proc[proc]++;
|
||||
rem_len -= fd_len; /* reduce remaining length by amount from fd */
|
||||
}
|
||||
}
|
||||
|
||||
/* now allocate space for my_req, offset, and len */
|
||||
|
||||
*my_req_ptr = (ADIOI_Access *)
|
||||
ADIOI_Malloc(nprocs*sizeof(ADIOI_Access));
|
||||
my_req = *my_req_ptr;
|
||||
|
||||
count_my_req_procs = 0;
|
||||
for (i=0; i < nprocs; i++) {
|
||||
if (count_my_req_per_proc[i]) {
|
||||
my_req[i].offsets = (ADIO_Offset *)
|
||||
ADIOI_Malloc(count_my_req_per_proc[i] * sizeof(ADIO_Offset));
|
||||
my_req[i].lens =
|
||||
ADIOI_Malloc(count_my_req_per_proc[i] * sizeof(ADIO_Offset));
|
||||
count_my_req_procs++;
|
||||
}
|
||||
my_req[i].count = 0; /* will be incremented where needed
|
||||
later */
|
||||
}
|
||||
|
||||
/* now fill in my_req */
|
||||
curr_idx = 0;
|
||||
for (i=0; i<contig_access_count; i++) {
|
||||
/* short circuit offset/len processing if len == 0
|
||||
* (zero-byte read/write */
|
||||
if (len_list[i] == 0)
|
||||
continue;
|
||||
off = offset_list[i];
|
||||
fd_len = len_list[i];
|
||||
proc = ADIOI_GPFS_Calc_aggregator(fd, off, min_st_offset, &fd_len, fd_size,
|
||||
fd_start, fd_end);
|
||||
|
||||
/* for each separate contiguous access from this process */
|
||||
if (buf_idx[proc] == -1)
|
||||
{
|
||||
ADIOI_Assert(curr_idx == (int) curr_idx);
|
||||
buf_idx[proc] = (int) curr_idx;
|
||||
}
|
||||
|
||||
l = my_req[proc].count;
|
||||
curr_idx += fd_len;
|
||||
|
||||
rem_len = len_list[i] - fd_len;
|
||||
|
||||
/* store the proc, offset, and len information in an array
|
||||
* of structures, my_req. Each structure contains the
|
||||
* offsets and lengths located in that process's FD,
|
||||
* and the associated count.
|
||||
*/
|
||||
my_req[proc].offsets[l] = off;
|
||||
my_req[proc].lens[l] = fd_len;
|
||||
my_req[proc].count++;
|
||||
|
||||
while (rem_len > 0) {
|
||||
off += fd_len;
|
||||
fd_len = rem_len;
|
||||
proc = ADIOI_GPFS_Calc_aggregator(fd, off, min_st_offset, &fd_len,
|
||||
fd_size, fd_start, fd_end);
|
||||
|
||||
if (buf_idx[proc] == -1)
|
||||
{
|
||||
ADIOI_Assert(curr_idx == (int) curr_idx);
|
||||
buf_idx[proc] = (int) curr_idx;
|
||||
}
|
||||
|
||||
l = my_req[proc].count;
|
||||
curr_idx += fd_len;
|
||||
rem_len -= fd_len;
|
||||
|
||||
my_req[proc].offsets[l] = off;
|
||||
my_req[proc].lens[l] = fd_len;
|
||||
my_req[proc].count++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef AGG_DEBUG
|
||||
for (i=0; i<nprocs; i++) {
|
||||
if (count_my_req_per_proc[i] > 0) {
|
||||
DBG_FPRINTF(stderr, "data needed from %d (count = %d):\n", i,
|
||||
my_req[i].count);
|
||||
for (l=0; l < my_req[i].count; l++) {
|
||||
DBG_FPRINTF(stderr, " off[%d] = %lld, len[%d] = %lld\n", l,
|
||||
my_req[i].offsets[l], l, my_req[i].lens[l]);
|
||||
}
|
||||
}
|
||||
DBG_FPRINTF(stderr, "buf_idx[%d] = 0x%x\n", i, buf_idx[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
*count_my_req_procs_ptr = count_my_req_procs;
|
||||
*buf_idx_ptr = buf_idx;
|
||||
#ifdef AGGREGATION_PROFILE
|
||||
MPE_Log_event (5025, 0, NULL);
|
||||
#endif
|
||||
TRACE_ERR("Leaving ADIOI_GPFS_Calc_my_req\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* ADIOI_Calc_others_req (copied to bg and switched to all to all for performance)
|
||||
*
|
||||
* param[in] count_my_req_procs Number of processes whose file domain my
|
||||
* request touches.
|
||||
* param[in] count_my_req_per_proc count_my_req_per_proc[i] gives the no. of
|
||||
* contig. requests of this process in
|
||||
* process i's file domain.
|
||||
* param[in] my_req A structure defining my request
|
||||
* param[in] nprocs Number of nodes in the block
|
||||
* param[in] myrank Rank of this node
|
||||
* param[out] count_others_req_proc_ptr Number of processes whose requests lie in
|
||||
* my process's file domain (including my
|
||||
* process itself)
|
||||
* param[out] others_req_ptr Array of other process' requests that lie
|
||||
* in my process's file domain
|
||||
*/
|
||||
void ADIOI_GPFS_Calc_others_req(ADIO_File fd, int count_my_req_procs,
|
||||
int *count_my_req_per_proc,
|
||||
ADIOI_Access *my_req,
|
||||
int nprocs, int myrank,
|
||||
int *count_others_req_procs_ptr,
|
||||
ADIOI_Access **others_req_ptr)
|
||||
{
|
||||
TRACE_ERR("Entering ADIOI_GPFS_Calc_others_req\n");
|
||||
/* determine what requests of other processes lie in this process's
|
||||
file domain */
|
||||
|
||||
/* count_others_req_procs = number of processes whose requests lie in
|
||||
this process's file domain (including this process itself)
|
||||
count_others_req_per_proc[i] indicates how many separate contiguous
|
||||
requests of proc. i lie in this process's file domain. */
|
||||
|
||||
int *count_others_req_per_proc, count_others_req_procs;
|
||||
int i;
|
||||
ADIOI_Access *others_req;
|
||||
|
||||
/* Parameters for MPI_Alltoallv */
|
||||
int *scounts, *sdispls, *rcounts, *rdispls;
|
||||
|
||||
/* Parameters for MPI_Alltoallv. These are the buffers, which
|
||||
* are later computed to be the lowest address of all buffers
|
||||
* to be sent/received for offsets and lengths. Initialize to
|
||||
* the highest possible address which is the current minimum.
|
||||
*/
|
||||
void *sendBufForOffsets=(void*)0xFFFFFFFFFFFFFFFF,
|
||||
*sendBufForLens =(void*)0xFFFFFFFFFFFFFFFF,
|
||||
*recvBufForOffsets=(void*)0xFFFFFFFFFFFFFFFF,
|
||||
*recvBufForLens =(void*)0xFFFFFFFFFFFFFFFF;
|
||||
|
||||
/* first find out how much to send/recv and from/to whom */
|
||||
#ifdef AGGREGATION_PROFILE
|
||||
MPE_Log_event (5026, 0, NULL);
|
||||
#endif
|
||||
/* Send 1 int to each process. count_my_req_per_proc[i] is the number of
|
||||
* requests that my process will do to the file domain owned by process[i].
|
||||
* Receive 1 int from each process. count_others_req_per_proc[i] is the number of
|
||||
* requests that process[i] will do to the file domain owned by my process.
|
||||
*/
|
||||
count_others_req_per_proc = (int *) ADIOI_Malloc(nprocs*sizeof(int));
|
||||
/* cora2a1=timebase(); */
|
||||
/*for(i=0;i<nprocs;i++) ?*/
|
||||
MPI_Alltoall(count_my_req_per_proc, 1, MPI_INT,
|
||||
count_others_req_per_proc, 1, MPI_INT, fd->comm);
|
||||
|
||||
/* total_cora2a+=timebase()-cora2a1; */
|
||||
|
||||
/* Allocate storage for an array of other nodes' accesses of our
|
||||
* node's file domain. Also allocate storage for the alltoallv
|
||||
* parameters.
|
||||
*/
|
||||
*others_req_ptr = (ADIOI_Access *)
|
||||
ADIOI_Malloc(nprocs*sizeof(ADIOI_Access));
|
||||
others_req = *others_req_ptr;
|
||||
|
||||
scounts = ADIOI_Malloc(nprocs*sizeof(int));
|
||||
sdispls = ADIOI_Malloc(nprocs*sizeof(int));
|
||||
rcounts = ADIOI_Malloc(nprocs*sizeof(int));
|
||||
rdispls = ADIOI_Malloc(nprocs*sizeof(int));
|
||||
|
||||
/* If process[i] has any requests in my file domain,
|
||||
* initialize an ADIOI_Access structure that will describe each request
|
||||
* from process[i]. The offsets, lengths, and buffer pointers still need
|
||||
* to be obtained to complete the setting of this structure.
|
||||
*/
|
||||
count_others_req_procs = 0;
|
||||
for (i=0; i<nprocs; i++) {
|
||||
if (count_others_req_per_proc[i])
|
||||
{
|
||||
others_req[i].count = count_others_req_per_proc[i];
|
||||
|
||||
others_req[i].offsets = (ADIO_Offset *)
|
||||
ADIOI_Malloc(count_others_req_per_proc[i]*sizeof(ADIO_Offset));
|
||||
others_req[i].lens =
|
||||
ADIOI_Malloc(count_others_req_per_proc[i]*sizeof(ADIO_Offset));
|
||||
|
||||
if ( (MPIR_Upint)others_req[i].offsets < (MPIR_Upint)recvBufForOffsets )
|
||||
recvBufForOffsets = others_req[i].offsets;
|
||||
if ( (MPIR_Upint)others_req[i].lens < (MPIR_Upint)recvBufForLens )
|
||||
recvBufForLens = others_req[i].lens;
|
||||
|
||||
others_req[i].mem_ptrs = (MPI_Aint *)
|
||||
ADIOI_Malloc(count_others_req_per_proc[i]*sizeof(MPI_Aint));
|
||||
|
||||
count_others_req_procs++;
|
||||
}
|
||||
else
|
||||
{
|
||||
others_req[i].count = 0;
|
||||
others_req[i].offsets = NULL;
|
||||
others_req[i].lens = NULL;
|
||||
}
|
||||
}
|
||||
/* If no recv buffer was allocated in the loop above, make it NULL */
|
||||
if ( recvBufForOffsets == (void*)0xFFFFFFFFFFFFFFFF) recvBufForOffsets = NULL;
|
||||
if ( recvBufForLens == (void*)0xFFFFFFFFFFFFFFFF) recvBufForLens = NULL;
|
||||
|
||||
/* Now send the calculated offsets and lengths to respective processes */
|
||||
|
||||
/************************/
|
||||
/* Exchange the offsets */
|
||||
/************************/
|
||||
|
||||
/* Determine the lowest sendBufForOffsets/Lens */
|
||||
for (i=0; i<nprocs; i++)
|
||||
{
|
||||
if ( (my_req[i].count) &&
|
||||
((MPIR_Upint)my_req[i].offsets <= (MPIR_Upint)sendBufForOffsets) )
|
||||
{
|
||||
sendBufForOffsets = my_req[i].offsets;
|
||||
}
|
||||
|
||||
if ( (my_req[i].count) &&
|
||||
((MPIR_Upint)my_req[i].lens <= (MPIR_Upint)sendBufForLens) )
|
||||
{
|
||||
sendBufForLens = my_req[i].lens;
|
||||
}
|
||||
}
|
||||
|
||||
/* If no send buffer was found in the loop above, make it NULL */
|
||||
if ( sendBufForOffsets == (void*)0xFFFFFFFFFFFFFFFF) sendBufForOffsets = NULL;
|
||||
if ( sendBufForLens == (void*)0xFFFFFFFFFFFFFFFF) sendBufForLens = NULL;
|
||||
|
||||
/* Calculate the displacements from the sendBufForOffsets/Lens */
|
||||
for (i=0; i<nprocs; i++)
|
||||
{
|
||||
/* Send these offsets to process i.*/
|
||||
scounts[i] = count_my_req_per_proc[i];
|
||||
if ( scounts[i] == 0 )
|
||||
sdispls[i] = 0;
|
||||
else
|
||||
sdispls[i] = (int)
|
||||
( ( (MPIR_Upint)my_req[i].offsets -
|
||||
(MPIR_Upint)sendBufForOffsets ) /
|
||||
(MPIR_Upint)sizeof(ADIO_Offset) );
|
||||
|
||||
/* Receive these offsets from process i.*/
|
||||
rcounts[i] = count_others_req_per_proc[i];
|
||||
if ( rcounts[i] == 0 )
|
||||
rdispls[i] = 0;
|
||||
else
|
||||
rdispls[i] = (int)
|
||||
( ( (MPIR_Upint)others_req[i].offsets -
|
||||
(MPIR_Upint)recvBufForOffsets ) /
|
||||
(MPIR_Upint)sizeof(ADIO_Offset) );
|
||||
}
|
||||
|
||||
/* Exchange the offsets */
|
||||
MPI_Alltoallv(sendBufForOffsets,
|
||||
scounts, sdispls, ADIO_OFFSET,
|
||||
recvBufForOffsets,
|
||||
rcounts, rdispls, ADIO_OFFSET,
|
||||
fd->comm);
|
||||
|
||||
/************************/
|
||||
/* Exchange the lengths */
|
||||
/************************/
|
||||
|
||||
for (i=0; i<nprocs; i++)
|
||||
{
|
||||
/* Send these lengths to process i.*/
|
||||
scounts[i] = count_my_req_per_proc[i];
|
||||
if ( scounts[i] == 0 )
|
||||
sdispls[i] = 0;
|
||||
else
|
||||
sdispls[i] = (int)
|
||||
( ( (MPIR_Upint)my_req[i].lens -
|
||||
(MPIR_Upint)sendBufForLens ) /
|
||||
(MPIR_Upint) sizeof(ADIO_Offset) );
|
||||
|
||||
/* Receive these offsets from process i. */
|
||||
rcounts[i] = count_others_req_per_proc[i];
|
||||
if ( rcounts[i] == 0 )
|
||||
rdispls[i] = 0;
|
||||
else
|
||||
rdispls[i] = (int)
|
||||
( ( (MPIR_Upint)others_req[i].lens -
|
||||
(MPIR_Upint)recvBufForLens ) /
|
||||
(MPIR_Upint) sizeof(ADIO_Offset) );
|
||||
}
|
||||
|
||||
/* Exchange the lengths */
|
||||
MPI_Alltoallv(sendBufForLens,
|
||||
scounts, sdispls, ADIO_OFFSET,
|
||||
recvBufForLens,
|
||||
rcounts, rdispls, ADIO_OFFSET,
|
||||
fd->comm);
|
||||
|
||||
/* Clean up */
|
||||
ADIOI_Free(count_others_req_per_proc);
|
||||
ADIOI_Free (scounts);
|
||||
ADIOI_Free (sdispls);
|
||||
ADIOI_Free (rcounts);
|
||||
ADIOI_Free (rdispls);
|
||||
|
||||
*count_others_req_procs_ptr = count_others_req_procs;
|
||||
#ifdef AGGREGATION_PROFILE
|
||||
MPE_Log_event (5027, 0, NULL);
|
||||
#endif
|
||||
TRACE_ERR("Leaving ADIOI_GPFS_Calc_others_req\n");
|
||||
}
|
86
ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_aggrs.h
Обычный файл
86
ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_aggrs.h
Обычный файл
@ -0,0 +1,86 @@
|
||||
/* ---------------------------------------------------------------- */
|
||||
/* (C)Copyright IBM Corp. 2007, 2008 */
|
||||
/* ---------------------------------------------------------------- */
|
||||
/**
|
||||
* \file ad_gpfs_aggrs.h
|
||||
* \brief ???
|
||||
*/
|
||||
|
||||
/*
|
||||
* File: ad_gpfs_aggrs.h
|
||||
*
|
||||
* Declares functions optimized specifically for GPFS parallel I/O solution.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef AD_GPFS_AGGRS_H_
|
||||
#define AD_GPFS_AGGRS_H_
|
||||
|
||||
#include "adio.h"
|
||||
#include <sys/stat.h>
|
||||
|
||||
#ifdef HAVE_GPFS_H
|
||||
#include <gpfs.h>
|
||||
#endif
|
||||
|
||||
|
||||
/* overriding ADIOI_Calc_file_domains() to apply 'aligned file domain partitioning'. */
|
||||
void ADIOI_GPFS_Calc_file_domains(ADIO_File fd,
|
||||
ADIO_Offset *st_offsets,
|
||||
ADIO_Offset *end_offsets,
|
||||
int nprocs,
|
||||
int nprocs_for_coll,
|
||||
ADIO_Offset *min_st_offset_ptr,
|
||||
ADIO_Offset **fd_start_ptr,
|
||||
ADIO_Offset **fd_end_ptr,
|
||||
ADIO_Offset *fd_size_ptr,
|
||||
void *fs_ptr);
|
||||
|
||||
/* overriding ADIOI_Calc_aggregator() for the default implementation is specific for
|
||||
static file domain partitioning */
|
||||
int ADIOI_GPFS_Calc_aggregator(ADIO_File fd,
|
||||
ADIO_Offset off,
|
||||
ADIO_Offset min_off,
|
||||
ADIO_Offset *len,
|
||||
ADIO_Offset fd_size,
|
||||
ADIO_Offset *fd_start,
|
||||
ADIO_Offset *fd_end);
|
||||
|
||||
/* overriding ADIOI_Calc_my_req for the default implementation is specific for
|
||||
static file domain partitioning */
|
||||
void ADIOI_GPFS_Calc_my_req ( ADIO_File fd, ADIO_Offset *offset_list, ADIO_Offset *len_list,
|
||||
int contig_access_count, ADIO_Offset
|
||||
min_st_offset, ADIO_Offset *fd_start,
|
||||
ADIO_Offset *fd_end, ADIO_Offset fd_size,
|
||||
int nprocs,
|
||||
int *count_my_req_procs_ptr,
|
||||
int **count_my_req_per_proc_ptr,
|
||||
ADIOI_Access **my_req_ptr,
|
||||
int **buf_idx_ptr);
|
||||
|
||||
/*
|
||||
* ADIOI_Calc_others_req
|
||||
*
|
||||
* param[in] count_my_req_procs Number of processes whose file domain my
|
||||
* request touches.
|
||||
* param[in] count_my_req_per_proc count_my_req_per_proc[i] gives the no. of
|
||||
* contig. requests of this process in
|
||||
* process i's file domain.
|
||||
* param[in] my_req A structure defining my request
|
||||
* param[in] nprocs Number of nodes in the block
|
||||
* param[in] myrank Rank of this node
|
||||
* param[out] count_others_req_proc_ptr Number of processes whose requests lie in
|
||||
* my process's file domain (including my
|
||||
* process itself)
|
||||
* param[out] others_req_ptr Array of other process' requests that lie
|
||||
* in my process's file domain
|
||||
*/
|
||||
void ADIOI_GPFS_Calc_others_req(ADIO_File fd, int count_my_req_procs,
|
||||
int *count_my_req_per_proc,
|
||||
ADIOI_Access *my_req,
|
||||
int nprocs, int myrank,
|
||||
int *count_others_req_procs_ptr,
|
||||
ADIOI_Access **others_req_ptr);
|
||||
|
||||
|
||||
#endif /* AD_GPFS_AGGRS_H_ */
|
57
ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_close.c
Обычный файл
57
ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_close.c
Обычный файл
@ -0,0 +1,57 @@
|
||||
/* ---------------------------------------------------------------- */
|
||||
/* (C)Copyright IBM Corp. 2007, 2008 */
|
||||
/* ---------------------------------------------------------------- */
|
||||
/**
|
||||
* \file ad_gpfs_close.c
|
||||
* \brief ???
|
||||
*/
|
||||
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_gpfs.h"
|
||||
#include "ad_gpfs_tuning.h"
|
||||
#include <unistd.h>
|
||||
|
||||
void ADIOI_GPFS_Close(ADIO_File fd, int *error_code)
|
||||
{
|
||||
int err, derr=0;
|
||||
static char myname[] = "ADIOI_GPFS_CLOSE";
|
||||
|
||||
#ifdef PROFILE
|
||||
MPE_Log_event(9, 0, "start close");
|
||||
#endif
|
||||
|
||||
if (fd->null_fd >= 0)
|
||||
close(fd->null_fd);
|
||||
|
||||
err = close(fd->fd_sys);
|
||||
if (fd->fd_direct >= 0)
|
||||
{
|
||||
derr = close(fd->fd_direct);
|
||||
}
|
||||
|
||||
#ifdef PROFILE
|
||||
MPE_Log_event(10, 0, "end close");
|
||||
#endif
|
||||
|
||||
/* FPRINTF(stderr,"%s(%d):'%s'. Free %#X\n",myname,__LINE__,fd->filename,(int)fd->fs_ptr);*/
|
||||
if (fd->fs_ptr != NULL) {
|
||||
ADIOI_Free(fd->fs_ptr);
|
||||
fd->fs_ptr = NULL;
|
||||
}
|
||||
fd->fd_sys = -1;
|
||||
fd->fd_direct = -1;
|
||||
|
||||
if (err == -1 || derr == -1)
|
||||
{
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", strerror(errno));
|
||||
}
|
||||
else *error_code = MPI_SUCCESS;
|
||||
}
|
68
ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_flush.c
Обычный файл
68
ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_flush.c
Обычный файл
@ -0,0 +1,68 @@
|
||||
/* ---------------------------------------------------------------- */
|
||||
/* (C)Copyright IBM Corp. 2007, 2008 */
|
||||
/* ---------------------------------------------------------------- */
|
||||
/**
|
||||
* \file ad_gpfs_flush.c
|
||||
* \brief Scalable flush for GPFS
|
||||
*/
|
||||
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_gpfs.h"
|
||||
|
||||
void ADIOI_GPFS_Flush(ADIO_File fd, int *error_code)
|
||||
{
|
||||
int err=0;
|
||||
static char myname[] = "ADIOI_GPFS_FLUSH";
|
||||
|
||||
int rank;
|
||||
|
||||
MPI_Comm_rank(fd->comm, &rank);
|
||||
|
||||
/* the old logic about who is an fsync aggregator and who is not fell down
|
||||
* when deferred open was enabled. Instead, make this look more like
|
||||
* ad_pvfs2_flush. If one day the I/O aggregators have something they need
|
||||
* to flush, we can consult the 'fd->hints->ranklist[]' array. For now, a
|
||||
* flush from one process should suffice */
|
||||
|
||||
/* ensure all other proceses are done writing. On many platforms MPI_Reduce
|
||||
* is fastest because it has the lightest constraints. On Blue Gene, BARRIER
|
||||
* is optimized */
|
||||
MPI_Barrier(fd->comm);
|
||||
|
||||
if (rank == fd->hints->ranklist[0]) {
|
||||
err = fsync(fd->fd_sys);
|
||||
DBG_FPRINTF(stderr,"aggregation:fsync %s, err=%#X, errno=%#X\n",fd->filename, err, errno);
|
||||
/* We want errno, not the return code if it failed */
|
||||
if (err == -1) err = errno;
|
||||
else err = 0;
|
||||
}
|
||||
MPI_Bcast(&err, 1, MPI_UNSIGNED, fd->hints->ranklist[0], fd->comm);
|
||||
DBGV_FPRINTF(stderr,"aggregation result:fsync %s, errno %#X,\n",fd->filename, err);
|
||||
|
||||
if (err) /* if it's non-zero, it must be an errno */
|
||||
{
|
||||
errno = err;
|
||||
err = -1;
|
||||
}
|
||||
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err == -1)
|
||||
{
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", strerror(errno));
|
||||
DBGT_FPRINTF(stderr,"fsync %s, err=%#X, errno=%#X\n",fd->filename, err, errno);
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
}
|
||||
|
288
ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_hints.c
Обычный файл
288
ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_hints.c
Обычный файл
@ -0,0 +1,288 @@
|
||||
/* ---------------------------------------------------------------- */
|
||||
/* (C)Copyright IBM Corp. 2007, 2008 */
|
||||
/* ---------------------------------------------------------------- */
|
||||
/**
|
||||
* \file ad_gpfs_hints.c
|
||||
* \brief GPFS hint processing - for now, only used for BlueGene and PE platforms
|
||||
*/
|
||||
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "adio.h"
|
||||
#include "adio_extern.h"
|
||||
#include "hint_fns.h"
|
||||
|
||||
#include "ad_gpfs.h"
|
||||
|
||||
#define ADIOI_GPFS_CB_BUFFER_SIZE_DFLT "16777216"
|
||||
#define ADIOI_GPFS_IND_RD_BUFFER_SIZE_DFLT "4194304"
|
||||
#define ADIOI_GPFS_IND_WR_BUFFER_SIZE_DFLT "4194304"
|
||||
|
||||
#ifdef BGQPLATFORM
|
||||
#define ADIOI_BG_NAGG_IN_PSET_HINT_NAME "bg_nodes_pset"
|
||||
#endif
|
||||
|
||||
/** \page mpiio_vars MPIIO Configuration
|
||||
*
|
||||
* GPFS MPIIO configuration and performance tuning. Used by ad_gpfs ADIO.
|
||||
*
|
||||
* Used for BlueGene and PE platforms, which each have their own aggregator selection
|
||||
* algorithms that ignore user provided cb_config_list.
|
||||
*
|
||||
* \section hint_sec Hints
|
||||
* - bg_nodes_pset - BlueGene only - specify how many aggregators to use per pset.
|
||||
* This hint will override the cb_nodes hint based on BlueGene psets.
|
||||
* - N - Use N nodes per pset as aggregators.
|
||||
* - Default is based on partition configuration and cb_nodes.
|
||||
*
|
||||
* The following default key/value pairs may differ from other platform defaults.
|
||||
*
|
||||
* - key = cb_buffer_size value = 16777216
|
||||
* - key = romio_cb_read value = enable
|
||||
* - key = romio_cb_write value = enable
|
||||
* - key = ind_rd_buffer_size value = 4194304
|
||||
* - key = ind_wr_buffer_size value = 4194304
|
||||
*/
|
||||
|
||||
#ifdef BGQPLATFORM
|
||||
/* Compute the aggregator-related parameters that are required in 2-phase collective IO of ADIO. */
|
||||
extern int
|
||||
ADIOI_BG_gen_agg_ranklist(ADIO_File fd, int n_proxy_per_pset);
|
||||
#elif PEPLATFORM
|
||||
extern int
|
||||
ADIOI_PE_gen_agg_ranklist(ADIO_File fd);
|
||||
#endif
|
||||
|
||||
void ADIOI_GPFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
{
|
||||
/* if fd->info is null, create a new info object.
|
||||
Initialize fd->info to default values.
|
||||
Initialize fd->hints to default values.
|
||||
Examine the info object passed by the user. If it contains values that
|
||||
ROMIO understands, override the default. */
|
||||
|
||||
MPI_Info info;
|
||||
char *value;
|
||||
int flag, intval, nprocs=0, nprocs_is_valid = 0;
|
||||
static char myname[] = "ADIOI_GPFS_SETINFO";
|
||||
|
||||
int did_anything = 0;
|
||||
|
||||
if (fd->info == MPI_INFO_NULL) MPI_Info_create(&(fd->info));
|
||||
info = fd->info;
|
||||
|
||||
/* Note that fd->hints is allocated at file open time; thus it is
|
||||
* not necessary to allocate it, or check for allocation, here.
|
||||
*/
|
||||
|
||||
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
|
||||
ADIOI_Assert ((value != NULL));
|
||||
|
||||
/* initialize info and hints to default values if they haven't been
|
||||
* previously initialized
|
||||
*/
|
||||
if (!fd->hints->initialized) {
|
||||
|
||||
ad_gpfs_get_env_vars();
|
||||
did_anything = 1;
|
||||
|
||||
/* buffer size for collective I/O */
|
||||
ADIOI_Info_set(info, "cb_buffer_size", ADIOI_GPFS_CB_BUFFER_SIZE_DFLT);
|
||||
fd->hints->cb_buffer_size = atoi(ADIOI_GPFS_CB_BUFFER_SIZE_DFLT);
|
||||
|
||||
/* default is to let romio automatically decide when to use
|
||||
* collective buffering
|
||||
*/
|
||||
ADIOI_Info_set(info, "romio_cb_read", "enable");
|
||||
fd->hints->cb_read = ADIOI_HINT_ENABLE;
|
||||
ADIOI_Info_set(info, "romio_cb_write", "enable");
|
||||
fd->hints->cb_write = ADIOI_HINT_ENABLE;
|
||||
|
||||
if ( fd->hints->cb_config_list != NULL ) ADIOI_Free (fd->hints->cb_config_list);
|
||||
fd->hints->cb_config_list = NULL;
|
||||
|
||||
/* number of processes that perform I/O in collective I/O */
|
||||
MPI_Comm_size(fd->comm, &nprocs);
|
||||
nprocs_is_valid = 1;
|
||||
ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", nprocs);
|
||||
ADIOI_Info_set(info, "cb_nodes", value);
|
||||
fd->hints->cb_nodes = -1;
|
||||
|
||||
/* hint indicating that no indep. I/O will be performed on this file */
|
||||
ADIOI_Info_set(info, "romio_no_indep_rw", "false");
|
||||
fd->hints->no_indep_rw = 0;
|
||||
|
||||
/* gpfs is not implementing file realms (ADIOI_IOStridedColl),
|
||||
initialize to disabled it. */
|
||||
/* hint instructing the use of persistent file realms */
|
||||
ADIOI_Info_set(info, "romio_cb_pfr", "disable");
|
||||
fd->hints->cb_pfr = ADIOI_HINT_DISABLE;
|
||||
|
||||
/* hint guiding the assignment of persistent file realms */
|
||||
ADIOI_Info_set(info, "romio_cb_fr_types", "aar");
|
||||
fd->hints->cb_fr_type = ADIOI_FR_AAR;
|
||||
|
||||
/* hint to align file realms with a certain byte value */
|
||||
ADIOI_Info_set(info, "romio_cb_fr_alignment", "1");
|
||||
fd->hints->cb_fr_alignment = 1;
|
||||
|
||||
/* hint to set a threshold percentage for a datatype's size/extent at
|
||||
* which data sieving should be done in collective I/O */
|
||||
ADIOI_Info_set(info, "romio_cb_ds_threshold", "0");
|
||||
fd->hints->cb_ds_threshold = 0;
|
||||
|
||||
/* hint to switch between point-to-point or all-to-all for two-phase */
|
||||
ADIOI_Info_set(info, "romio_cb_alltoall", "automatic");
|
||||
fd->hints->cb_alltoall = ADIOI_HINT_AUTO;
|
||||
|
||||
/* deferred_open derived from no_indep_rw and cb_{read,write} */
|
||||
fd->hints->deferred_open = 0;
|
||||
|
||||
/* buffer size for data sieving in independent reads */
|
||||
ADIOI_Info_set(info, "ind_rd_buffer_size", ADIOI_GPFS_IND_RD_BUFFER_SIZE_DFLT);
|
||||
fd->hints->ind_rd_buffer_size = atoi(ADIOI_GPFS_IND_RD_BUFFER_SIZE_DFLT);
|
||||
|
||||
/* buffer size for data sieving in independent writes */
|
||||
ADIOI_Info_set(info, "ind_wr_buffer_size", ADIOI_GPFS_IND_WR_BUFFER_SIZE_DFLT);
|
||||
fd->hints->ind_wr_buffer_size = atoi(ADIOI_GPFS_IND_WR_BUFFER_SIZE_DFLT);
|
||||
|
||||
|
||||
ADIOI_Info_set(info, "romio_ds_read", "automatic");
|
||||
fd->hints->ds_read = ADIOI_HINT_AUTO;
|
||||
ADIOI_Info_set(info, "romio_ds_write", "automatic");
|
||||
fd->hints->ds_write = ADIOI_HINT_AUTO;
|
||||
|
||||
/* still to do: tune this a bit for a variety of file systems. there's
|
||||
* no good default value so just leave it unset */
|
||||
fd->hints->min_fdomain_size = 0;
|
||||
fd->hints->striping_unit = 0;
|
||||
|
||||
fd->hints->initialized = 1;
|
||||
}
|
||||
|
||||
/* add in user's info if supplied */
|
||||
if (users_info != MPI_INFO_NULL) {
|
||||
ADIOI_Info_check_and_install_int(fd, users_info, "cb_buffer_size",
|
||||
&(fd->hints->cb_buffer_size), myname, error_code);
|
||||
/* new hints for enabling/disabling coll. buffering on
|
||||
* reads/writes
|
||||
*/
|
||||
ADIOI_Info_check_and_install_enabled(fd, users_info, "romio_cb_read",
|
||||
&(fd->hints->cb_read), myname, error_code);
|
||||
if (fd->hints->cb_read == ADIOI_HINT_DISABLE) {
|
||||
/* romio_cb_read overrides no_indep_rw */
|
||||
ADIOI_Info_set(info, "romio_no_indep_rw", "false");
|
||||
fd->hints->no_indep_rw = ADIOI_HINT_DISABLE;
|
||||
}
|
||||
ADIOI_Info_check_and_install_enabled(fd, users_info, "romio_cb_write",
|
||||
&(fd->hints->cb_write), myname, error_code);
|
||||
if (fd->hints->cb_write == ADIOI_HINT_DISABLE) {
|
||||
/* romio_cb_write overrides no_indep_rw */
|
||||
ADIOI_Info_set(info, "romio_no_indep_rw", "false");
|
||||
fd->hints->no_indep_rw = ADIOI_HINT_DISABLE;
|
||||
}
|
||||
/* Has the user indicated all I/O will be done collectively? */
|
||||
ADIOI_Info_check_and_install_true(fd, users_info, "romio_no_indep_rw",
|
||||
&(fd->hints->no_indep_rw), myname, error_code);
|
||||
if (fd->hints->no_indep_rw == 1) {
|
||||
/* if 'no_indep_rw' set, also hint that we will do
|
||||
* collective buffering: if we aren't doing independent io,
|
||||
* then we have to do collective */
|
||||
ADIOI_Info_set(info, "romio_cb_write", "enable");
|
||||
ADIOI_Info_set(info, "romio_cb_read", "enable");
|
||||
fd->hints->cb_read = 1;
|
||||
fd->hints->cb_write = 1;
|
||||
}
|
||||
|
||||
/* new hints for enabling/disabling data sieving on
|
||||
* reads/writes
|
||||
*/
|
||||
ADIOI_Info_check_and_install_enabled(fd, users_info, "romio_ds_read",
|
||||
&(fd->hints->ds_read), myname, error_code);
|
||||
ADIOI_Info_check_and_install_enabled(fd, users_info, "romio_ds_write",
|
||||
&(fd->hints->ds_write), myname, error_code);
|
||||
|
||||
ADIOI_Info_check_and_install_int(fd, users_info, "ind_wr_buffer_size",
|
||||
&(fd->hints->ind_wr_buffer_size), myname, error_code);
|
||||
ADIOI_Info_check_and_install_int(fd, users_info, "ind_rd_buffer_size",
|
||||
&(fd->hints->ind_rd_buffer_size), myname, error_code);
|
||||
|
||||
memset( value, 0, MPI_MAX_INFO_VAL+1 );
|
||||
ADIOI_Info_get(users_info, "romio_min_fdomain_size", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if ( flag && ((intval = atoi(value)) > 0) ) {
|
||||
ADIOI_Info_set(info, "romio_min_fdomain_size", value);
|
||||
fd->hints->min_fdomain_size = intval;
|
||||
}
|
||||
/* Now we use striping unit in common code so we should
|
||||
process hints for it. */
|
||||
ADIOI_Info_check_and_install_int(fd, users_info, "striping_unit",
|
||||
&(fd->hints->striping_unit), myname, error_code);
|
||||
|
||||
#ifdef BGQPLATFORM
|
||||
memset( value, 0, MPI_MAX_INFO_VAL+1 );
|
||||
ADIOI_Info_get(users_info, ADIOI_BG_NAGG_IN_PSET_HINT_NAME, MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag && ((intval = atoi(value)) > 0)) {
|
||||
|
||||
did_anything = 1;
|
||||
ADIOI_Info_set(info, ADIOI_BG_NAGG_IN_PSET_HINT_NAME, value);
|
||||
fd->hints->cb_nodes = intval;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* special CB aggregator assignment */
|
||||
if (did_anything) {
|
||||
#ifdef BGQPLATFORM
|
||||
ADIOI_BG_gen_agg_ranklist(fd, fd->hints->cb_nodes);
|
||||
#elif PEPLATFORM
|
||||
ADIOI_PE_gen_agg_ranklist(fd);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* deferred_open won't be set by callers, but if the user doesn't
|
||||
* explicitly disable collecitve buffering (two-phase) and does hint that
|
||||
* io w/o independent io is going on, we'll set this internal hint as a
|
||||
* convenience */
|
||||
if ( ( (fd->hints->cb_read != ADIOI_HINT_DISABLE) \
|
||||
&& (fd->hints->cb_write != ADIOI_HINT_DISABLE)\
|
||||
&& fd->hints->no_indep_rw ) ) {
|
||||
fd->hints->deferred_open = 1;
|
||||
} else {
|
||||
/* setting romio_no_indep_rw enable and romio_cb_{read,write}
|
||||
* disable at the same time doesn't make sense. honor
|
||||
* romio_cb_{read,write} and force the no_indep_rw hint to
|
||||
* 'disable' */
|
||||
ADIOI_Info_set(info, "romio_no_indep_rw", "false");
|
||||
fd->hints->no_indep_rw = 0;
|
||||
fd->hints->deferred_open = 0;
|
||||
}
|
||||
|
||||
/* BobC commented this out, but since hint processing runs on both bg and
|
||||
* bglockless, we need to keep DS writes enabled on gpfs and disabled on
|
||||
* PVFS */
|
||||
if (ADIO_Feature(fd, ADIO_DATA_SIEVING_WRITES) == 0) {
|
||||
/* disable data sieving for fs that do not
|
||||
support file locking */
|
||||
ADIOI_Info_get(info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
/* get rid of this value if it is set */
|
||||
ADIOI_Info_delete(info, "ind_wr_buffer_size");
|
||||
}
|
||||
/* note: leave ind_wr_buffer_size alone; used for other cases
|
||||
* as well. -- Rob Ross, 04/22/2003
|
||||
*/
|
||||
ADIOI_Info_set(info, "romio_ds_write", "disable");
|
||||
fd->hints->ds_write = ADIOI_HINT_DISABLE;
|
||||
}
|
||||
|
||||
ADIOI_Free(value);
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
}
|
156
ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_open.c
Обычный файл
156
ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_open.c
Обычный файл
@ -0,0 +1,156 @@
|
||||
/* ---------------------------------------------------------------- */
|
||||
/* (C)Copyright IBM Corp. 2007, 2008 */
|
||||
/* ---------------------------------------------------------------- */
|
||||
/**
|
||||
* \file ad_gpfs_open.c
|
||||
* \brief ???
|
||||
*/
|
||||
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_gpfs.h"
|
||||
#include "ad_gpfs_tuning.h"
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
|
||||
#ifdef HAVE_GPFS_H
|
||||
#include <gpfs.h>
|
||||
#endif
|
||||
#ifdef HAVE_GPFS_FCNTL_H
|
||||
#include <gpfs_fcntl.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_GPFS_FCNTL_H
|
||||
static void gpfs_free_all_locks(int fd)
|
||||
{
|
||||
int rc;
|
||||
struct {
|
||||
gpfsFcntlHeader_t header;
|
||||
gpfsFreeRange_t release;
|
||||
} release_all;
|
||||
|
||||
release_all.header.totalLength = sizeof(release_all);
|
||||
release_all.header.fcntlVersion = GPFS_FCNTL_CURRENT_VERSION;
|
||||
release_all.header.fcntlReserved = 0;
|
||||
|
||||
release_all.release.structLen = sizeof(release_all.release);
|
||||
release_all.release.structType = GPFS_FREE_RANGE;
|
||||
release_all.release.start = 0;
|
||||
release_all.release.length = 0;
|
||||
|
||||
rc = gpfs_fcntl(fd, &release_all);
|
||||
if (rc != 0) {
|
||||
DBGV_FPRINTF(stderr,"GPFS fcntl release failed with rc=%d, errno=%d\n",
|
||||
rc,errno);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void ADIOI_GPFS_Open(ADIO_File fd, int *error_code)
|
||||
{
|
||||
int perm, old_mask, amode, rank, rc;
|
||||
static char myname[] = "ADIOI_GPFS_OPEN";
|
||||
|
||||
/* set internal variables for tuning environment variables */
|
||||
ad_gpfs_get_env_vars();
|
||||
|
||||
if (fd->perm == ADIO_PERM_NULL) {
|
||||
old_mask = umask(022);
|
||||
umask(old_mask);
|
||||
perm = old_mask ^ 0666;
|
||||
}
|
||||
else perm = fd->perm;
|
||||
|
||||
amode = 0;
|
||||
if (fd->access_mode & ADIO_CREATE)
|
||||
amode = amode | O_CREAT;
|
||||
if (fd->access_mode & ADIO_RDONLY)
|
||||
amode = amode | O_RDONLY;
|
||||
if (fd->access_mode & ADIO_WRONLY)
|
||||
amode = amode | O_WRONLY;
|
||||
if (fd->access_mode & ADIO_RDWR)
|
||||
amode = amode | O_RDWR;
|
||||
if (fd->access_mode & ADIO_EXCL)
|
||||
amode = amode | O_EXCL;
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event(ADIOI_MPE_open_a, 0, NULL);
|
||||
#endif
|
||||
fd->fd_sys = open(fd->filename, amode, perm);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event(ADIOI_MPE_open_b, 0, NULL);
|
||||
#endif
|
||||
DBG_FPRINTF(stderr,"open('%s',%#X,%#X) rc=%d, errno=%d\n",fd->filename,amode,perm,fd->fd_sys,errno);
|
||||
fd->fd_direct = -1;
|
||||
|
||||
if (gpfsmpio_devnullio == 1) {
|
||||
fd->null_fd = open("/dev/null", O_RDWR);
|
||||
} else {
|
||||
fd->null_fd = -1;
|
||||
}
|
||||
|
||||
if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND))
|
||||
fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
|
||||
|
||||
if(fd->fd_sys != -1)
|
||||
{
|
||||
|
||||
fd->blksize = 1048576; /* default to 1M */
|
||||
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event(ADIOI_MPE_stat_a, 0, NULL);
|
||||
#endif
|
||||
/* in this fs-specific routine, we might not be called over entire
|
||||
* communicator (deferred open). Collect statistics on one process.
|
||||
* ADIOI_GEN_Opencoll (common-code caller) will take care of the
|
||||
* broadcast */
|
||||
|
||||
MPI_Comm_rank(fd->comm, &rank);
|
||||
if ((rank == fd->hints->ranklist[0]) || (fd->comm == MPI_COMM_SELF)) {
|
||||
struct stat64 gpfs_statbuf;
|
||||
/* Get the (real) underlying file system block size */
|
||||
rc = stat64(fd->filename, &gpfs_statbuf);
|
||||
if (rc >= 0)
|
||||
{
|
||||
fd->blksize = gpfs_statbuf.st_blksize;
|
||||
DBGV_FPRINTF(stderr,"Successful stat '%s'. Blocksize=%ld\n",
|
||||
fd->filename,gpfs_statbuf.st_blksize);
|
||||
}
|
||||
else
|
||||
{
|
||||
DBGV_FPRINTF(stderr,"Stat '%s' failed with rc=%d, errno=%d\n",
|
||||
fd->filename,rc,errno);
|
||||
}
|
||||
}
|
||||
/* all other ranks have incorrect fd->blocksize, but ADIOI_GEN_Opencoll
|
||||
* will take care of that in both standard and deferred-open case */
|
||||
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event(ADIOI_MPE_stat_b, 0, NULL);
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_GPFS_FCNTL_H
|
||||
/* in parallel workload, might be helpful to immediately release block
|
||||
* tokens. Or, system call overhead will outweigh any benefits... */
|
||||
if (getenv("ROMIO_GPFS_FREE_LOCKS")!=NULL)
|
||||
gpfs_free_all_locks(fd->fd_sys);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
if (fd->fd_sys == -1) {
|
||||
*error_code = ADIOI_Err_create_code(myname, fd->filename, errno);
|
||||
}
|
||||
else *error_code = MPI_SUCCESS;
|
||||
}
|
||||
/*
|
||||
*vim: ts=8 sts=4 sw=4 noexpandtab
|
||||
*/
|
1171
ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_rdcoll.c
Обычный файл
1171
ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_rdcoll.c
Обычный файл
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
277
ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_tuning.c
Обычный файл
277
ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_tuning.c
Обычный файл
@ -0,0 +1,277 @@
|
||||
/* ---------------------------------------------------------------- */
|
||||
/* (C)Copyright IBM Corp. 2007, 2008 */
|
||||
/* ---------------------------------------------------------------- */
|
||||
/**
|
||||
* \file ad_gpfs_tuning.c
|
||||
* \brief Defines ad_gpfs performance tuning
|
||||
*/
|
||||
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (C) 2008 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
/*---------------------------------------------------------------------
|
||||
* ad_gpfs_tuning.c
|
||||
*
|
||||
* defines global variables and functions for performance tuning and
|
||||
* functional debugging.
|
||||
*---------------------------------------------------------------------*/
|
||||
|
||||
#include "ad_gpfs_tuning.h"
|
||||
#include "mpi.h"
|
||||
|
||||
#if !defined(PVFS2_SUPER_MAGIC)
|
||||
#define PVFS2_SUPER_MAGIC (0x20030528)
|
||||
#endif
|
||||
|
||||
|
||||
int gpfsmpio_timing;
|
||||
int gpfsmpio_timing2;
|
||||
int gpfsmpio_timing_cw_level;
|
||||
int gpfsmpio_comm;
|
||||
int gpfsmpio_tunegather;
|
||||
int gpfsmpio_tuneblocking;
|
||||
long bglocklessmpio_f_type;
|
||||
int gpfsmpio_bg_nagg_pset;
|
||||
int gpfsmpio_pthreadio;
|
||||
int gpfsmpio_p2pcontig;
|
||||
int gpfsmpio_balancecontig;
|
||||
int gpfsmpio_devnullio;
|
||||
int gpfsmpio_bridgeringagg;
|
||||
|
||||
double gpfsmpio_prof_cw [GPFSMPIO_CIO_LAST+1];
|
||||
double gpfsmpio_prof_cr [GPFSMPIO_CIO_LAST+1];
|
||||
|
||||
/* set internal variables for tuning environment variables */
|
||||
/** \page mpiio_vars MPIIO Configuration
|
||||
\section env_sec Environment Variables
|
||||
* - GPFSMPIO_COMM - Define how data is exchanged on collective
|
||||
* reads and writes. Possible values:
|
||||
* - 0 - Use MPI_Alltoallv.
|
||||
* - 1 - Use MPI_Isend/MPI_Irecv.
|
||||
* - Default is 0.
|
||||
*
|
||||
* - GPFSMPIO_TIMING - collect timing breakdown for MPI I/O collective calls.
|
||||
* Possible values:
|
||||
* - 0 - Do not collect/report timing.
|
||||
* - 1 - Collect/report timing.
|
||||
* - Default is 0.
|
||||
*
|
||||
* - GPFSMPIO_TUNEGATHER - Tune how starting and ending offsets are communicated
|
||||
* for aggregator collective i/o. Possible values:
|
||||
* - 0 - Use two MPI_Allgather's to collect starting and ending offsets.
|
||||
* - 1 - Use MPI_Allreduce(MPI_MAX) to collect starting and ending offsets.
|
||||
* - Default is 1.
|
||||
*
|
||||
* - GPFSMPIO_TUNEBLOCKING - Tune how aggregate file domains are
|
||||
* calculated (block size). Possible values:
|
||||
* - 0 - Evenly calculate file domains across aggregators. Also use
|
||||
* MPI_Isend/MPI_Irecv to exchange domain information.
|
||||
* - 1 - Align file domains with the underlying file system's block size. Also use
|
||||
* MPI_Alltoallv to exchange domain information.
|
||||
* - Default is 1.
|
||||
*
|
||||
* - BGLOCKLESSMPIO_F_TYPE - Specify a filesystem type that should run
|
||||
* the ad_bglockless driver. NOTE: Using romio prefixes (such as
|
||||
* "bg:" or "bglockless:") on a file name will override this environment
|
||||
* variable. Possible values:
|
||||
* - 0xnnnnnnnn - Any valid file system type (or "magic number") from
|
||||
* statfs() field f_type.
|
||||
* - The default is 0x20030528 (PVFS2_SUPER_MAGIC)
|
||||
*
|
||||
* - GPFSMPIO_NAGG_PSET - Specify a ratio of "I/O aggregators" to use for each
|
||||
* compute group (compute nodes + i/o nodes). Possible values:
|
||||
* - any integer
|
||||
* - Default is 8
|
||||
*
|
||||
* - GPFSMPIO_PTHREADIO - Enables a very simple form of asyncronous io where a
|
||||
* pthread is spawned to do the posix writes while the main thread does the
|
||||
* data aggregation - useful for large files where multiple rounds are
|
||||
* required (more that the cb_buffer_size of data per aggregator). User
|
||||
* must ensure there is hw resource available for the thread to run. I
|
||||
* am sure there is a better way to do this involving comm threads - this is
|
||||
* just a start. NOTE: For some reason the stats collected when this is
|
||||
* enabled misses some of the data so the data sizes are off a bit - this is
|
||||
* a statistical issue only, the data is still accurately written out
|
||||
*
|
||||
* - GPFSMPIO_P2PCONTIG - Does simple point-to-point communication between the
|
||||
* aggregator and the procs that feed it. Performance could be enhanced by a
|
||||
* one-sided put algorithm. Current implementation allows only 1 round of
|
||||
* data. Useful/allowed only when:
|
||||
* 1.) The datatype is contiguous.
|
||||
* 2.) The offsets are increasing in rank-order.
|
||||
* 3.) There are no gaps between the offsets.
|
||||
* 4.) No single rank has a data size which spans multiple file domains.
|
||||
*
|
||||
* - GPFSMPIO_BALANCECONTIG - Relevant only to BGQ. File domain blocks are assigned
|
||||
* to aggregators in a breadth-first fashion relative to the ions - additionally,
|
||||
* file domains on the aggregators sharing the same bridgeset and ion have contiguous
|
||||
* offsets. The breadth-first assignment improves performance in the case of
|
||||
* a relatively small file of size less than the gpfs block size multiplied
|
||||
* by the number of ions. Files: ad_gpfs_aggrs.c ad_bg_aggrs.c. Possible Values
|
||||
* - 0 - assign file domain blocks in the traditional manner
|
||||
* - 1 - if there are variable sized file domain blocks, spread them out
|
||||
* (balance) across bridge nodes
|
||||
*
|
||||
* - GPFSMPIO_DEVNULLIO - do everything *except* write to / read from the file
|
||||
* system. When experimenting with different two-phase I/O strategies, it's
|
||||
* helpful to remove the highly variable file system from the experiment.
|
||||
* - 0 (disabled) or 1 (enabled)
|
||||
* - Default is 0
|
||||
*
|
||||
* - GPFSMPIO_BRIDGERINGAGG - Relevant only to BGQ. Aggregator placement
|
||||
* optimization whch forms a 5-d ring around the bridge node starting at
|
||||
* GPFSMPIO_BRIDGERINGAGG hops away. Experimental performance results
|
||||
* suggest best value is 1 and only in conjunction with GPFSMPIO_P2PCONTIG
|
||||
* and GPFSMPIO_BALANCECONTIG. The number of aggregators selected is still
|
||||
* GPFSMPIO_NAGG_PSET however the bridge node itself is NOT selected.
|
||||
*
|
||||
*/
|
||||
|
||||
void ad_gpfs_get_env_vars() {
|
||||
char *x, *dummy;
|
||||
|
||||
gpfsmpio_comm = 0;
|
||||
x = getenv( "GPFSMPIO_COMM" );
|
||||
if (x) gpfsmpio_comm = atoi(x);
|
||||
gpfsmpio_timing = 0;
|
||||
x = getenv( "GPFSMPIO_TIMING" );
|
||||
if (x) gpfsmpio_timing = atoi(x);
|
||||
gpfsmpio_tunegather = 1;
|
||||
x = getenv( "GPFSMPIO_TUNEGATHER" );
|
||||
if (x) gpfsmpio_tunegather = atoi(x);
|
||||
gpfsmpio_tuneblocking = 1;
|
||||
x = getenv( "GPFSMPIO_TUNEBLOCKING" );
|
||||
if (x) gpfsmpio_tuneblocking = atoi(x);
|
||||
bglocklessmpio_f_type = PVFS2_SUPER_MAGIC;
|
||||
x = getenv( "BGLOCKLESSMPIO_F_TYPE" );
|
||||
if (x) bglocklessmpio_f_type = strtol(x,&dummy,0);
|
||||
DBG_FPRINTF(stderr,"BGLOCKLESSMPIO_F_TYPE=%ld/%#lX\n",
|
||||
bglocklessmpio_f_type,bglocklessmpio_f_type);
|
||||
/* note: this value will be 'sanity checked' in ADIOI_BG_persInfo_init(),
|
||||
* when we know a bit more about what "largest possible value" and
|
||||
* "smallest possible value" should be */
|
||||
gpfsmpio_bg_nagg_pset = ADIOI_BG_NAGG_PSET_DFLT;
|
||||
x = getenv("GPFSMPIO_NAGG_PSET");
|
||||
if (x) gpfsmpio_bg_nagg_pset = atoi(x);
|
||||
|
||||
gpfsmpio_pthreadio = 0;
|
||||
x = getenv( "GPFSMPIO_PTHREADIO" );
|
||||
if (x) gpfsmpio_pthreadio = atoi(x);
|
||||
|
||||
gpfsmpio_p2pcontig = 0;
|
||||
x = getenv( "GPFSMPIO_P2PCONTIG" );
|
||||
if (x) gpfsmpio_p2pcontig = atoi(x);
|
||||
|
||||
gpfsmpio_balancecontig = 0;
|
||||
x = getenv( "GPFSMPIO_BALANCECONTIG" );
|
||||
if (x) gpfsmpio_balancecontig = atoi(x);
|
||||
|
||||
gpfsmpio_devnullio = 0;
|
||||
x = getenv( "GPFSMPIO_DEVNULLIO" );
|
||||
if (x) gpfsmpio_devnullio = atoi(x);
|
||||
|
||||
gpfsmpio_bridgeringagg = 0;
|
||||
x = getenv( "GPFSMPIO_BRIDGERINGAGG" );
|
||||
if (x) gpfsmpio_bridgeringagg = atoi(x);
|
||||
}
|
||||
|
||||
/* report timing breakdown for MPI I/O collective call */
|
||||
void ad_gpfs_timing_crw_report( int rw, ADIO_File fd, int myrank, int nprocs )
|
||||
{
|
||||
int i;
|
||||
|
||||
if (gpfsmpio_timing) {
|
||||
/* Timing across the whole communicator is a little bit interesting,
|
||||
* but what is *more* interesting is if we single out the aggregators
|
||||
* themselves. non-aggregators spend a lot of time in "exchange" not
|
||||
* exchanging data, but blocked because they are waiting for
|
||||
* aggregators to finish writing. If we focus on just the aggregator
|
||||
* processes we will get a more clear picture about the data exchange
|
||||
* vs. i/o time breakdown */
|
||||
|
||||
/* if deferred open enabled, we could use the aggregator communicator */
|
||||
MPI_Comm agg_comm;
|
||||
int nr_aggs, agg_rank;
|
||||
MPI_Comm_split(fd->comm, (fd->is_agg ? 1 : MPI_UNDEFINED), 0, &agg_comm);
|
||||
if(agg_comm != MPI_COMM_NULL) {
|
||||
MPI_Comm_size(agg_comm, &nr_aggs);
|
||||
MPI_Comm_rank(agg_comm, &agg_rank);
|
||||
}
|
||||
|
||||
double *gpfsmpio_prof_org = gpfsmpio_prof_cr;
|
||||
if (rw) gpfsmpio_prof_org = gpfsmpio_prof_cw;
|
||||
|
||||
double gpfsmpio_prof_avg[ GPFSMPIO_CIO_LAST ];
|
||||
double gpfsmpio_prof_max[ GPFSMPIO_CIO_LAST ];
|
||||
|
||||
if( agg_comm != MPI_COMM_NULL) {
|
||||
MPI_Reduce( gpfsmpio_prof_org, gpfsmpio_prof_avg, GPFSMPIO_CIO_LAST, MPI_DOUBLE, MPI_SUM, 0, agg_comm);
|
||||
MPI_Reduce( gpfsmpio_prof_org, gpfsmpio_prof_max, GPFSMPIO_CIO_LAST, MPI_DOUBLE, MPI_MAX, 0, agg_comm);
|
||||
}
|
||||
if (agg_comm != MPI_COMM_NULL && agg_rank == 0) {
|
||||
|
||||
for (i=0; i<GPFSMPIO_CIO_LAST; i++) gpfsmpio_prof_avg[i] /= nr_aggs;
|
||||
|
||||
gpfsmpio_prof_avg[ GPFSMPIO_CIO_B_POSI_RW ] =
|
||||
gpfsmpio_prof_avg[ GPFSMPIO_CIO_DATA_SIZE ] * nr_aggs /
|
||||
gpfsmpio_prof_max[ GPFSMPIO_CIO_T_POSI_RW ];
|
||||
gpfsmpio_prof_avg[ GPFSMPIO_CIO_B_MPIO_RW ] =
|
||||
gpfsmpio_prof_avg[ GPFSMPIO_CIO_DATA_SIZE ] * nr_aggs /
|
||||
gpfsmpio_prof_max[ GPFSMPIO_CIO_T_MPIO_RW ];
|
||||
|
||||
gpfsmpio_prof_avg[ GPFSMPIO_CIO_B_MPIO_CRW ] =
|
||||
gpfsmpio_prof_avg[ GPFSMPIO_CIO_DATA_SIZE ] * nr_aggs /
|
||||
gpfsmpio_prof_max[ GPFSMPIO_CIO_T_MPIO_CRW ];
|
||||
|
||||
fprintf(stderr,"TIMING-%1s,", (rw ? "W" : "R") );
|
||||
fprintf(stderr,"SIZE: %12.4lld , ", (long long int)(gpfsmpio_prof_avg[ GPFSMPIO_CIO_DATA_SIZE ] * nr_aggs));
|
||||
fprintf(stderr,"SEEK-avg: %10.3f , ",
|
||||
gpfsmpio_prof_avg[ GPFSMPIO_CIO_T_SEEK ] );
|
||||
fprintf(stderr,"SEEK-max: %10.3f , ",
|
||||
gpfsmpio_prof_max[ GPFSMPIO_CIO_T_SEEK ] );
|
||||
fprintf(stderr,"LOCAL-avg: %10.3f , ",
|
||||
gpfsmpio_prof_avg[ GPFSMPIO_CIO_T_LCOMP ] );
|
||||
fprintf(stderr,"GATHER-max: %10.3f , ",
|
||||
gpfsmpio_prof_max[ GPFSMPIO_CIO_T_GATHER ] );
|
||||
fprintf(stderr,"PATTERN-avg: %10.3f , ",
|
||||
gpfsmpio_prof_avg[ GPFSMPIO_CIO_T_PATANA ] );
|
||||
fprintf(stderr,"FILEDOMAIN-avg: %10.3f , ",
|
||||
gpfsmpio_prof_avg[ GPFSMPIO_CIO_T_FD_PART ] );
|
||||
fprintf(stderr,"MYREQ-avg: %10.3f , ",
|
||||
gpfsmpio_prof_avg[ GPFSMPIO_CIO_T_MYREQ ] );
|
||||
fprintf(stderr,"OTHERREQ-max: %10.3f , ",
|
||||
gpfsmpio_prof_max[ GPFSMPIO_CIO_T_OTHREQ ] );
|
||||
fprintf(stderr,"EXCHANGE-max: %10.3f , ",
|
||||
gpfsmpio_prof_max[ GPFSMPIO_CIO_T_DEXCH ] );
|
||||
fprintf(stderr, "EXCHANGE-RECV_EXCH-max: %10.3f , ",
|
||||
gpfsmpio_prof_max[ GPFSMPIO_CIO_T_DEXCH_RECV_EXCH] );
|
||||
fprintf(stderr, "EXCHANGE-SETUP-max: %10.3f , ",
|
||||
gpfsmpio_prof_max[ GPFSMPIO_CIO_T_DEXCH_SETUP] );
|
||||
fprintf(stderr, "EXCHANGE-NET-max: %10.3f , ",
|
||||
gpfsmpio_prof_max[ GPFSMPIO_CIO_T_DEXCH_NET] );
|
||||
fprintf(stderr, "EXCHANGE-SORT-max: %10.3f , ",
|
||||
gpfsmpio_prof_max[ GPFSMPIO_CIO_T_DEXCH_SORT] );
|
||||
fprintf(stderr, "EXCHANGE-SIEVE-max: %10.3f , ",
|
||||
gpfsmpio_prof_max[ GPFSMPIO_CIO_T_DEXCH_SIEVE] );
|
||||
fprintf(stderr,"POSIX-TIME-avg: %10.3f , ",
|
||||
gpfsmpio_prof_avg[ GPFSMPIO_CIO_T_POSI_RW ] );
|
||||
fprintf(stderr,"POSIX-TIME-max: %10.3f , ",
|
||||
gpfsmpio_prof_max[ GPFSMPIO_CIO_T_POSI_RW ] );
|
||||
fprintf(stderr,"MPIIO-CONTIG-TIME-avg: %10.3f , ",
|
||||
gpfsmpio_prof_avg[ GPFSMPIO_CIO_T_MPIO_RW ] );
|
||||
fprintf(stderr,"MPIIO-STRIDED-TIME-avg: %10.3f , ",
|
||||
gpfsmpio_prof_avg[ GPFSMPIO_CIO_T_MPIO_CRW ] );
|
||||
fprintf(stderr,"POSIX-BW-avg: %10.3f , ",
|
||||
gpfsmpio_prof_avg[ GPFSMPIO_CIO_B_POSI_RW ] );
|
||||
fprintf(stderr,"MPI-BW-avg: %10.3f , ",
|
||||
gpfsmpio_prof_avg[ GPFSMPIO_CIO_B_MPIO_RW ] );
|
||||
fprintf(stderr,"MPI-BW-collective-avg: %10.3f\n ",
|
||||
gpfsmpio_prof_avg[ GPFSMPIO_CIO_B_MPIO_CRW ] );
|
||||
}
|
||||
if (agg_comm != MPI_COMM_NULL) MPI_Comm_free(&agg_comm);
|
||||
}
|
||||
|
||||
}
|
114
ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_tuning.h
Обычный файл
114
ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_tuning.h
Обычный файл
@ -0,0 +1,114 @@
|
||||
/* ---------------------------------------------------------------- */
|
||||
/* (C)Copyright IBM Corp. 2007, 2008 */
|
||||
/* ---------------------------------------------------------------- */
|
||||
/**
|
||||
* \file ad_gpfs_tuning.h
|
||||
* \brief ???
|
||||
*/
|
||||
|
||||
/*---------------------------------------------------------------------
|
||||
* ad_gpfs_tuning.h
|
||||
*
|
||||
* declares global variables and macros for performance tuning and
|
||||
* functional debugging.
|
||||
*---------------------------------------------------------------------*/
|
||||
|
||||
#ifndef AD_GPFS_TUNING_H_
|
||||
#define AD_GPFS_TUNING_H_
|
||||
|
||||
#include "adio.h"
|
||||
|
||||
|
||||
/*-----------------------------------------
|
||||
* Global variables for the control of
|
||||
* 1. timing
|
||||
* 2. select specific optimizations
|
||||
*-----------------------------------------*/
|
||||
|
||||
/* timing fields */
|
||||
enum {
|
||||
GPFSMPIO_CIO_DATA_SIZE=0,
|
||||
GPFSMPIO_CIO_T_SEEK,
|
||||
GPFSMPIO_CIO_T_LCOMP, /* time for ADIOI_Calc_my_off_len(), local */
|
||||
GPFSMPIO_CIO_T_GATHER, /* time for previous MPI_Allgather, now Allreduce */
|
||||
GPFSMPIO_CIO_T_PATANA, /* time for a quick test if access is contiguous or not, local */
|
||||
GPFSMPIO_CIO_T_FD_PART, /* time for file domain partitioning, local */
|
||||
GPFSMPIO_CIO_T_MYREQ, /* time for ADIOI_Calc_my_req(), local */
|
||||
GPFSMPIO_CIO_T_OTHREQ, /* time for ADIOI_Calc_others_req(), short Alltoall */
|
||||
GPFSMPIO_CIO_T_DEXCH, /* time for I/O data exchange */
|
||||
/* the next DEXCH_* timers capture finer-grained portions of T_DEXCH */
|
||||
GPFSMPIO_CIO_T_DEXCH_RECV_EXCH,/* time for each process to exchange recieve
|
||||
size info with everyone else */
|
||||
GPFSMPIO_CIO_T_DEXCH_SETUP, /* time for setup portion of I/O data exchange */
|
||||
GPFSMPIO_CIO_T_DEXCH_NET, /* time for network portion of I/O data exchange */
|
||||
GPFSMPIO_CIO_T_DEXCH_SORT, /* time to sort requesst in I/O data exchange */
|
||||
GPFSMPIO_CIO_T_DEXCH_SIEVE, /* time for read portion of RMW in two phase */
|
||||
GPFSMPIO_CIO_T_POSI_RW,
|
||||
GPFSMPIO_CIO_B_POSI_RW,
|
||||
GPFSMPIO_CIO_T_MPIO_RW, /* time for ADIOI_WriteContig() */
|
||||
GPFSMPIO_CIO_B_MPIO_RW,
|
||||
GPFSMPIO_CIO_T_MPIO_CRW, /* time for ADIOI_GPFS_WriteStridedColl() */
|
||||
GPFSMPIO_CIO_B_MPIO_CRW,
|
||||
GPFSMPIO_CIO_LAST
|
||||
};
|
||||
|
||||
/* +1 because GPFSMPIO_CIO_LAST is actually used to say "zero this counter"" */
|
||||
extern double gpfsmpio_prof_cw [GPFSMPIO_CIO_LAST+1];
|
||||
extern double gpfsmpio_prof_cr [GPFSMPIO_CIO_LAST+1];
|
||||
|
||||
|
||||
/* corresponds to environment variables to select optimizations and timing level */
|
||||
extern int gpfsmpio_timing;
|
||||
extern int gpfsmpio_timing_cw_level;
|
||||
extern int gpfsmpio_comm;
|
||||
extern int gpfsmpio_tunegather;
|
||||
extern int gpfsmpio_tuneblocking;
|
||||
extern long bglocklessmpio_f_type;
|
||||
extern int gpfsmpio_pthreadio;
|
||||
extern int gpfsmpio_p2pcontig;
|
||||
extern int gpfsmpio_balancecontig;
|
||||
extern int gpfsmpio_devnullio;
|
||||
extern int gpfsmpio_bridgeringagg;
|
||||
|
||||
/* Default is, well, kind of complicated. Blue Gene /L and /P had "psets": one
|
||||
* i/o node and all compute nodes wired to it. On Blue Gene /Q that
|
||||
* relationship is a lot more fluid. There are still I/O nodes, and compute
|
||||
* nodes are assigned to an i/o node, but there are two routes to the i/o node,
|
||||
* via compute nodes designated as "bridge nodes". In this code, what we used
|
||||
* to call a "pset" is actually "compute nodes associated with and including a
|
||||
* bridge node". So, "nAgg" is roughly "number of aggregators per bridge", but
|
||||
* look closely at ADIOI_BG_persInfo_init() for the details */
|
||||
|
||||
#define ADIOI_BG_NAGG_PSET_DFLT 16
|
||||
|
||||
extern int gpfsmpio_bg_nagg_pset;
|
||||
|
||||
|
||||
/* set internal variables for tuning environment variables */
|
||||
void ad_gpfs_get_env_vars(void);
|
||||
|
||||
/* report timing breakdown for MPI I/O collective call */
|
||||
void ad_gpfs_timing_crw_report( int rw, ADIO_File fd, int myrank, int nprocs );
|
||||
|
||||
/* note:
|
||||
* T := timing;
|
||||
* CIO := collective I/O
|
||||
*/
|
||||
#define GPFSMPIO_T_CIO_RESET( RW ) \
|
||||
{ \
|
||||
int _i; \
|
||||
for ( _i = 0; _i < GPFSMPIO_CIO_LAST; _i ++ ) \
|
||||
gpfsmpio_prof_c##RW [ _i ] = 0; \
|
||||
}
|
||||
|
||||
#define GPFSMPIO_T_CIO_REPORT( RW, FD, MYRANK, NPROCS ) \
|
||||
ad_gpfs_timing_crw_report ( RW, FD, MYRANK, NPROCS ); \
|
||||
|
||||
#define GPFSMPIO_T_CIO_SET_GET(RW, ISSET, ISGET, VAR1, VAR2 ) \
|
||||
{\
|
||||
double temp = MPI_Wtime(); \
|
||||
if ( ISSET ) gpfsmpio_prof_c##RW [ VAR1 ] = temp; \
|
||||
if ( ISGET ) gpfsmpio_prof_c##RW [ VAR2 ] = temp - gpfsmpio_prof_c##RW [ VAR2 ] ;\
|
||||
}
|
||||
|
||||
#endif /* AD_GPFS_TUNING_H_ */
|
1696
ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_wrcoll.c
Обычный файл
1696
ompi/mca/io/romio314/romio/adio/ad_gpfs/ad_gpfs_wrcoll.c
Обычный файл
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
18
ompi/mca/io/romio314/romio/adio/ad_gpfs/bg/Makefile.mk
Обычный файл
18
ompi/mca/io/romio314/romio/adio/ad_gpfs/bg/Makefile.mk
Обычный файл
@ -0,0 +1,18 @@
|
||||
## -*- Mode: Makefile; -*-
|
||||
## vim: set ft=automake :
|
||||
##
|
||||
## (C) 2012 by Argonne National Laboratory.
|
||||
## See COPYRIGHT in top-level directory.
|
||||
##
|
||||
|
||||
if BUILD_AD_BG
|
||||
|
||||
noinst_HEADERS += \
|
||||
adio/ad_gpfs/bg/ad_bg_aggrs.h \
|
||||
adio/ad_gpfs/bg/ad_bg_pset.h
|
||||
|
||||
romio_other_sources += \
|
||||
adio/ad_gpfs/bg/ad_bg_aggrs.c \
|
||||
adio/ad_gpfs/bg/ad_bg_pset.c
|
||||
|
||||
endif BUILD_AD_BG
|
675
ompi/mca/io/romio314/romio/adio/ad_gpfs/bg/ad_bg_aggrs.c
Обычный файл
675
ompi/mca/io/romio314/romio/adio/ad_gpfs/bg/ad_bg_aggrs.c
Обычный файл
@ -0,0 +1,675 @@
|
||||
/* ---------------------------------------------------------------- */
|
||||
/* (C)Copyright IBM Corp. 2007, 2008 */
|
||||
/* ---------------------------------------------------------------- */
|
||||
/**
|
||||
* \file ad_bg_aggrs.c
|
||||
* \brief The externally used function from this file is is declared in ad_bg_aggrs.h
|
||||
*/
|
||||
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997-2001 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
/*#define TRACE_ON */
|
||||
|
||||
// Uncomment this line to turn tracing on for the gpfsmpio_balancecontig aggr selection optimization
|
||||
// #define balancecontigtrace 1
|
||||
// #define bridgeringaggtrace 1
|
||||
|
||||
#include "adio.h"
|
||||
#include "adio_cb_config_list.h"
|
||||
#include "../ad_gpfs.h"
|
||||
#include "ad_bg_pset.h"
|
||||
#include "ad_bg_aggrs.h"
|
||||
#ifdef AGGREGATION_PROFILE
|
||||
#include "mpe.h"
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef USE_DBG_LOGGING
|
||||
#define AGG_DEBUG 1
|
||||
#endif
|
||||
|
||||
#ifndef TRACE_ERR
|
||||
# define TRACE_ERR(format...)
|
||||
#endif
|
||||
|
||||
/* Comments copied from common:
|
||||
* This file contains four functions:
|
||||
*
|
||||
* ADIOI_Calc_aggregator()
|
||||
* ADIOI_Calc_file_domains()
|
||||
* ADIOI_Calc_my_req()
|
||||
* ADIOI_Calc_others_req()
|
||||
*
|
||||
* The last three of these were originally in ad_read_coll.c, but they are
|
||||
* also shared with ad_write_coll.c. I felt that they were better kept with
|
||||
* the rest of the shared aggregation code.
|
||||
*/
|
||||
|
||||
/* Discussion of values available from above:
|
||||
*
|
||||
* ADIO_Offset st_offsets[0..nprocs-1]
|
||||
* ADIO_Offset end_offsets[0..nprocs-1]
|
||||
* These contain a list of start and end offsets for each process in
|
||||
* the communicator. For example, an access at loc 10, size 10 would
|
||||
* have a start offset of 10 and end offset of 19.
|
||||
* int nprocs
|
||||
* number of processors in the collective I/O communicator
|
||||
* ADIO_Offset min_st_offset
|
||||
* ADIO_Offset fd_start[0..nprocs_for_coll-1]
|
||||
* starting location of "file domain"; region that a given process will
|
||||
* perform aggregation for (i.e. actually do I/O)
|
||||
* ADIO_Offset fd_end[0..nprocs_for_coll-1]
|
||||
* start + size - 1 roughly, but it can be less, or 0, in the case of
|
||||
* uneven distributions
|
||||
*/
|
||||
|
||||
/* forward declaration */
|
||||
static void
|
||||
ADIOI_BG_compute_agg_ranklist_serial ( ADIO_File fd,
|
||||
const ADIOI_BG_ConfInfo_t *confInfo,
|
||||
ADIOI_BG_ProcInfo_t *all_procInfo);
|
||||
|
||||
/*
|
||||
* Compute the aggregator-related parameters that are required in 2-phase collective IO of ADIO.
|
||||
* The parameters are
|
||||
* . the number of aggregators (proxies) : fd->hints->cb_nodes
|
||||
* . the ranks of the aggregators : fd->hints->ranklist
|
||||
* By compute these two parameters in a BG-PSET-aware way, the default 2-phase collective IO of
|
||||
* ADIO can work more efficiently.
|
||||
*/
|
||||
int
|
||||
ADIOI_BG_gen_agg_ranklist(ADIO_File fd, int n_aggrs_per_pset)
|
||||
{
|
||||
int r, s;
|
||||
ADIOI_BG_ProcInfo_t *procInfo, *all_procInfo;
|
||||
ADIOI_BG_ConfInfo_t *confInfo;
|
||||
TRACE_ERR("Entering ADIOI_BG_gen_agg_ranklist\n");
|
||||
|
||||
MPI_Comm_size( fd->comm, &s );
|
||||
MPI_Comm_rank( fd->comm, &r );
|
||||
|
||||
/* Collect individual BG personality information */
|
||||
confInfo = ADIOI_BG_ConfInfo_new ();
|
||||
procInfo = ADIOI_BG_ProcInfo_new ();
|
||||
ADIOI_BG_persInfo_init( confInfo, procInfo, s, r, n_aggrs_per_pset, fd->comm);
|
||||
|
||||
/* Gather BG personality infomation onto process 0 */
|
||||
/* if (r == 0) */
|
||||
all_procInfo = ADIOI_BG_ProcInfo_new_n (s);
|
||||
|
||||
MPI_Gather( (void *)procInfo, sizeof(ADIOI_BG_ProcInfo_t), MPI_BYTE,
|
||||
(void *)all_procInfo, sizeof(ADIOI_BG_ProcInfo_t), MPI_BYTE,
|
||||
0,
|
||||
fd->comm );
|
||||
|
||||
/* Compute a list of the ranks of chosen IO proxy CN on process 0 */
|
||||
if (r == 0) {
|
||||
ADIOI_BG_compute_agg_ranklist_serial (fd, confInfo, all_procInfo);
|
||||
/* ADIOI_BG_ProcInfo_free (all_procInfo);*/
|
||||
}
|
||||
ADIOI_BG_ProcInfo_free (all_procInfo);
|
||||
|
||||
/* Send the info of IO proxy CN to all processes and keep the info in fd->hints struct.
|
||||
Declared in adio_cb_config_list.h */
|
||||
ADIOI_cb_bcast_rank_map(fd);
|
||||
if (gpfsmpio_balancecontig == 1) { /* additionally need to send bridgelist,
|
||||
bridgelistnum and numbridges to all
|
||||
ranks */
|
||||
if (r != 0) {
|
||||
fd->hints->fs_hints.bg.bridgelist =
|
||||
ADIOI_Malloc(fd->hints->cb_nodes*sizeof(int));
|
||||
if (fd->hints->fs_hints.bg.bridgelist == NULL) {
|
||||
/* NEED TO HANDLE ENOMEM */
|
||||
}
|
||||
}
|
||||
MPI_Bcast(fd->hints->fs_hints.bg.bridgelist, fd->hints->cb_nodes, MPI_INT, 0,
|
||||
fd->comm);
|
||||
|
||||
if (r != 0) {
|
||||
fd->hints->fs_hints.bg.bridgelistnum =
|
||||
ADIOI_Malloc(fd->hints->cb_nodes*sizeof(int));
|
||||
if (fd->hints->fs_hints.bg.bridgelistnum == NULL) {
|
||||
/* NEED TO HANDLE ENOMEM */
|
||||
}
|
||||
}
|
||||
MPI_Bcast(fd->hints->fs_hints.bg.bridgelistnum, fd->hints->cb_nodes,
|
||||
MPI_INT, 0, fd->comm);
|
||||
|
||||
MPI_Bcast(&fd->hints->fs_hints.bg.numbridges, 1, MPI_INT, 0,
|
||||
fd->comm);
|
||||
|
||||
}
|
||||
|
||||
|
||||
ADIOI_BG_persInfo_free( confInfo, procInfo );
|
||||
TRACE_ERR("Leaving ADIOI_BG_gen_agg_ranklist\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* There are some number of bridge nodes (randomly) distributed through the job
|
||||
* We need to split the nodes among the bridge nodes */
|
||||
/* Maybe find which bridge node is closer (manhattan distance) and try to
|
||||
* distribute evenly.
|
||||
*/
|
||||
/*
|
||||
* Pick IO aggregators based on the under PSET organization and stores the ranks of the proxy CNs in tmp_ranklist.
|
||||
* The first order of tmp_ranklist is : PSET number
|
||||
* The secondary order of the list is determined in ADIOI_BG_select_agg_in_pset() and thus adjustable.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
int rank;
|
||||
int bridge;
|
||||
} sortstruct;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int bridgeRank;
|
||||
int numAggsAssigned;
|
||||
} bridgeAggAssignment;
|
||||
|
||||
static int intsort(const void *p1, const void *p2)
|
||||
{
|
||||
sortstruct *i1, *i2;
|
||||
i1 = (sortstruct *)p1;
|
||||
i2 = (sortstruct *)p2;
|
||||
return(i1->bridge - i2->bridge);
|
||||
}
|
||||
|
||||
static int
|
||||
ADIOI_BG_compute_agg_ranklist_serial_do (const ADIOI_BG_ConfInfo_t *confInfo,
|
||||
ADIOI_BG_ProcInfo_t *all_procInfo,
|
||||
int *tmp_ranklist)
|
||||
{
|
||||
TRACE_ERR("Entering ADIOI_BG_compute_agg_ranklist_serial_do\n");
|
||||
/* BES: This should be done in the init routines probably. */
|
||||
int i, j;
|
||||
int aggTotal;
|
||||
int *aggList;
|
||||
|
||||
if (gpfsmpio_bridgeringagg > 0) {
|
||||
|
||||
int numAggs = confInfo->aggRatio * confInfo->ioMinSize /*virtualPsetSize*/;
|
||||
/* the number of aggregators is (numAggs per bridgenode) */
|
||||
if(numAggs == 1)
|
||||
aggTotal = 1;
|
||||
else
|
||||
aggTotal = confInfo->numBridgeRanks * numAggs;
|
||||
|
||||
aggList = (int *)ADIOI_Malloc(aggTotal * sizeof(int));
|
||||
if(aggTotal == 1) { /* special case when we only have one bridge node */
|
||||
|
||||
sortstruct *bridgelist = (sortstruct *)ADIOI_Malloc(confInfo->nProcs * sizeof(sortstruct));
|
||||
for(i=0; i < confInfo->nProcs; i++)
|
||||
{
|
||||
bridgelist[i].bridge = all_procInfo[i].bridgeRank;
|
||||
bridgelist[i].rank = i;
|
||||
TRACE_ERR("bridgelist[%d].bridge: %d .rank: %d\n", i, bridgelist[i].bridge, i);
|
||||
}
|
||||
|
||||
/* This list contains rank->bridge info. Now, we need to sort this list. */
|
||||
qsort(bridgelist, confInfo->nProcs, sizeof(sortstruct), intsort);
|
||||
|
||||
aggList[0] = bridgelist[0].bridge;
|
||||
ADIOI_Free(bridgelist);
|
||||
|
||||
}
|
||||
else { // aggTotal > 1
|
||||
|
||||
int currentAggListSize = 0;
|
||||
int numBridgesWithAggAssignments = 0;
|
||||
bridgeAggAssignment *aggAssignments = (bridgeAggAssignment *)ADIOI_Malloc(confInfo->numBridgeRanks * sizeof(bridgeAggAssignment));
|
||||
|
||||
int partitionSize = all_procInfo[0].numNodesInPartition;
|
||||
int *nodesAssigned = (int *)ADIOI_Malloc(partitionSize * sizeof(int));
|
||||
for (i=0;i<partitionSize;i++)
|
||||
nodesAssigned[i] = 0;
|
||||
|
||||
int currentNumHops = gpfsmpio_bridgeringagg;
|
||||
int allAggsAssigned = 0;
|
||||
|
||||
/* Iterate thru the process infos and select aggregators starting at currentNumHops
|
||||
away. Increase the currentNumHops until all bridges have numAggs assigned to them.
|
||||
*/
|
||||
while (!allAggsAssigned) {
|
||||
/* track whether any aggs are selected durng this round */
|
||||
int startingCurrentAggListSize = currentAggListSize;
|
||||
int numIterForHopsWithNoAggs = 0;
|
||||
for (i=0;i<confInfo->nProcs;i++) {
|
||||
if (all_procInfo[i].manhattanDistanceToBridge == currentNumHops) {
|
||||
if (nodesAssigned[all_procInfo[i].nodeRank] == 0) { // node is not assigned as an agg yet
|
||||
int foundBridge = 0;
|
||||
for (j=0;(j<numBridgesWithAggAssignments && !foundBridge);j++) {
|
||||
if (aggAssignments[j].bridgeRank == all_procInfo[i].bridgeRank) {
|
||||
foundBridge = 1;
|
||||
if (aggAssignments[j].numAggsAssigned < numAggs) {
|
||||
aggAssignments[j].numAggsAssigned++;
|
||||
nodesAssigned[all_procInfo[i].nodeRank] = 1;
|
||||
aggList[currentAggListSize] = all_procInfo[i].rank;
|
||||
currentAggListSize++;
|
||||
#ifdef bridgeringaggtrace
|
||||
printf("Assigned agg rank %d at nodeRank %d to bridge rank %d at a distance of %d hops\n",all_procInfo[i].rank,all_procInfo[i].nodeRank,all_procInfo[i].bridgeRank,currentNumHops);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!foundBridge) {
|
||||
aggAssignments[numBridgesWithAggAssignments].bridgeRank = all_procInfo[i].bridgeRank;
|
||||
aggAssignments[numBridgesWithAggAssignments].numAggsAssigned = 1;
|
||||
numBridgesWithAggAssignments++;
|
||||
nodesAssigned[all_procInfo[i].nodeRank] = 1;
|
||||
aggList[currentAggListSize] = all_procInfo[i].rank;
|
||||
currentAggListSize++;
|
||||
#ifdef bridgeringaggtrace
|
||||
printf("Assigned agg rank %d at nodeRank %d to bridge rank %d at a distance of %d hops\n",all_procInfo[i].rank,all_procInfo[i].nodeRank,all_procInfo[i].bridgeRank,currentNumHops);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (numBridgesWithAggAssignments == confInfo->numBridgeRanks) {
|
||||
allAggsAssigned = 1;
|
||||
for (i=0;(i<numBridgesWithAggAssignments && allAggsAssigned);i++) {
|
||||
if (aggAssignments[i].numAggsAssigned < numAggs)
|
||||
allAggsAssigned = 0;
|
||||
}
|
||||
}
|
||||
currentNumHops++;
|
||||
/* If 3 rounds go by without selecting an agg abort to avoid
|
||||
infinite loop.
|
||||
*/
|
||||
if (startingCurrentAggListSize == currentAggListSize)
|
||||
numIterForHopsWithNoAggs++;
|
||||
else
|
||||
numIterForHopsWithNoAggs = 0;
|
||||
ADIOI_Assert(numIterForHopsWithNoAggs <= 3);
|
||||
}
|
||||
|
||||
ADIOI_Free(aggAssignments);
|
||||
ADIOI_Free(nodesAssigned);
|
||||
|
||||
} // else aggTotal > 1
|
||||
|
||||
memcpy(tmp_ranklist, aggList, aggTotal*sizeof(int));
|
||||
} // gpfsmpio_bridgeringagg > 0
|
||||
|
||||
else { // gpfsmpio_bridgeringagg unset - default code
|
||||
|
||||
int distance, numAggs;
|
||||
|
||||
/* Aggregators will be midpoints between sorted MPI rank lists of who shares a given
|
||||
* bridge node */
|
||||
|
||||
sortstruct *bridgelist = (sortstruct *)ADIOI_Malloc(confInfo->nProcs * sizeof(sortstruct));
|
||||
for(i=0; i < confInfo->nProcs; i++)
|
||||
{
|
||||
bridgelist[i].bridge = all_procInfo[i].bridgeRank;
|
||||
bridgelist[i].rank = i;
|
||||
TRACE_ERR("bridgelist[%d].bridge: %d .rank: %d\n", i, bridgelist[i].bridge, i);
|
||||
}
|
||||
|
||||
/* This list contains rank->bridge info. Now, we need to sort this list. */
|
||||
qsort(bridgelist, confInfo->nProcs, sizeof(sortstruct), intsort);
|
||||
|
||||
/* In this array, we can pick an appropriate number of midpoints based on
|
||||
* our bridgenode index and the number of aggregators */
|
||||
|
||||
numAggs = confInfo->aggRatio * confInfo->ioMinSize /*virtualPsetSize*/;
|
||||
if(numAggs == 1)
|
||||
aggTotal = 1;
|
||||
else
|
||||
/* the number of aggregators is (numAggs per bridgenode) plus each
|
||||
* bridge node is an aggregator */
|
||||
aggTotal = confInfo->numBridgeRanks * (numAggs+1);
|
||||
|
||||
if(aggTotal>confInfo->nProcs) aggTotal=confInfo->nProcs;
|
||||
|
||||
TRACE_ERR("numBridgeRanks: %d, aggRatio: %f numBridge: %d pset size: %d/%d numAggs: %d, aggTotal: %d\n", confInfo->numBridgeRanks, confInfo->aggRatio, confInfo->numBridgeRanks, confInfo->ioMinSize, confInfo->ioMaxSize /*virtualPsetSize*/, numAggs, aggTotal);
|
||||
aggList = (int *)ADIOI_Malloc(aggTotal * sizeof(int));
|
||||
|
||||
|
||||
/* For each bridge node, determine who the aggregators will be */
|
||||
/* basically, the n*distance and bridge node */
|
||||
if(aggTotal == 1) /* special case when we only have one bridge node */
|
||||
aggList[0] = bridgelist[0].bridge;
|
||||
else
|
||||
{
|
||||
int lastBridge = bridgelist[confInfo->nProcs-1].bridge;
|
||||
int nextBridge = 0, nextAggr = confInfo->numBridgeRanks;
|
||||
int psetSize = 0;
|
||||
int procIndex;
|
||||
for(procIndex=confInfo->nProcs-1; procIndex>=0; procIndex--)
|
||||
{
|
||||
TRACE_ERR("bridgelist[%d].bridge %u/rank %u\n",procIndex, bridgelist[procIndex].bridge, bridgelist[procIndex].rank);
|
||||
if(lastBridge == bridgelist[procIndex].bridge)
|
||||
{
|
||||
psetSize++;
|
||||
if(procIndex) continue;
|
||||
else procIndex--;/* procIndex == 0 */
|
||||
}
|
||||
/* Sets up a list of nodes which will act as aggregators. numAggs
|
||||
* per bridge node total. The list of aggregators is
|
||||
* bridgeNode 0
|
||||
* bridgeNode 1
|
||||
* bridgeNode ...
|
||||
* bridgeNode N
|
||||
* bridgeNode[0]aggr[0]
|
||||
* bridgeNode[0]aggr[1]...
|
||||
* bridgeNode[0]aggr[N]...
|
||||
* ...
|
||||
* bridgeNode[N]aggr[0]..
|
||||
* bridgeNode[N]aggr[N]
|
||||
*/
|
||||
aggList[nextBridge]=lastBridge;
|
||||
distance = psetSize/numAggs;
|
||||
TRACE_ERR("nextBridge %u is bridge %u, distance %u, size %u\n",nextBridge, aggList[nextBridge],distance,psetSize);
|
||||
if(numAggs>1)
|
||||
{
|
||||
for(j = 0; j < numAggs; j++)
|
||||
{
|
||||
ADIOI_Assert(nextAggr<aggTotal);
|
||||
aggList[nextAggr] = bridgelist[procIndex+j*distance+1].rank;
|
||||
TRACE_ERR("agglist[%d] -> bridgelist[%d] = %d\n", nextAggr, procIndex+j*distance+1,aggList[nextAggr]);
|
||||
if(aggList[nextAggr]==lastBridge) /* can't have bridge in the list twice */
|
||||
{
|
||||
aggList[nextAggr] = bridgelist[procIndex+psetSize].rank; /* take the last one in the pset */
|
||||
TRACE_ERR("replacement agglist[%d] -> bridgelist[%d] = %d\n", nextAggr, procIndex+psetSize,aggList[nextAggr]);
|
||||
}
|
||||
nextAggr++;
|
||||
}
|
||||
}
|
||||
if(procIndex<0) break;
|
||||
lastBridge = bridgelist[procIndex].bridge;
|
||||
psetSize = 1;
|
||||
nextBridge++;
|
||||
}
|
||||
}
|
||||
|
||||
TRACE_ERR("memcpy(tmp_ranklist, aggList, (numAggs(%u)*confInfo->numBridgeRanks(%u)+numAggs(%u)) (%u) %u*sizeof(int))\n",numAggs,confInfo->numBridgeRanks,numAggs,(numAggs*confInfo->numBridgeRanks+numAggs),aggTotal);
|
||||
memcpy(tmp_ranklist, aggList, aggTotal*sizeof(int));
|
||||
for(i=0;i<aggTotal;i++)
|
||||
{
|
||||
TRACE_ERR("tmp_ranklist[%d]: %d\n", i, tmp_ranklist[i]);
|
||||
}
|
||||
|
||||
|
||||
ADIOI_Free (bridgelist);
|
||||
|
||||
TRACE_ERR("Leaving ADIOI_BG_compute_agg_ranklist_serial_do\n");
|
||||
}
|
||||
|
||||
ADIOI_Free (aggList);
|
||||
return aggTotal;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* compute aggregators ranklist and put it into fd->hints struct
|
||||
*/
|
||||
static void
|
||||
ADIOI_BG_compute_agg_ranklist_serial ( ADIO_File fd,
|
||||
const ADIOI_BG_ConfInfo_t *confInfo,
|
||||
ADIOI_BG_ProcInfo_t *all_procInfo)
|
||||
{
|
||||
TRACE_ERR("Entering ADIOI_BG_compute_agg_ranklist_serial\n");
|
||||
int i;
|
||||
int naggs;
|
||||
int size;
|
||||
int *tmp_ranklist;
|
||||
|
||||
/* compute the ranklist of IO aggregators and put into tmp_ranklist */
|
||||
tmp_ranklist = (int *) ADIOI_Malloc (confInfo->nProcs * sizeof(int));
|
||||
|
||||
# if AGG_DEBUG
|
||||
for (i=0; i<confInfo->nProcs; i++) {
|
||||
DBG_FPRINTF(stderr, "\tcpuid %1d, rank = %6d\n", all_procInfo[i].coreID, all_procInfo[i].rank );
|
||||
}
|
||||
# endif
|
||||
|
||||
naggs=
|
||||
ADIOI_BG_compute_agg_ranklist_serial_do (confInfo, all_procInfo, tmp_ranklist);
|
||||
|
||||
# define VERIFY 1
|
||||
# if VERIFY
|
||||
DBG_FPRINTF(stderr, "\tconfInfo = min: %3d, max: %3d, naggrs: %3d, bridge: %3d, nprocs: %3d, vpset: %3d, tsize: %3d, ratio: %.4f; naggs = %d\n",
|
||||
confInfo->ioMinSize ,
|
||||
confInfo->ioMaxSize ,
|
||||
confInfo->nAggrs ,
|
||||
confInfo->numBridgeRanks ,
|
||||
confInfo->nProcs ,
|
||||
confInfo->ioMaxSize /*virtualPsetSize*/ ,
|
||||
confInfo->cpuIDsize,
|
||||
confInfo->aggRatio ,
|
||||
naggs );
|
||||
# endif
|
||||
MPI_Comm_size( fd->comm, &size );
|
||||
/* This fix is for when the bridgenode rnk is not part of the particular
|
||||
* subcomm associated with this MPI File operation. I don't know if
|
||||
* this is the best/right answer but it passes the test cases at least.
|
||||
* I don't know how common file IO in subcomms is anyway... */
|
||||
for(i=0;i<naggs;i++)
|
||||
{
|
||||
if(tmp_ranklist[i] > size)
|
||||
{
|
||||
TRACE_ERR("Using 0 as tmp_ranklist[%d] instead of %d for comm %x\n",
|
||||
i, tmp_ranklist[i], fd->comm);
|
||||
tmp_ranklist[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
# if AGG_DEBUG
|
||||
for (i=0; i<naggs; i++) {
|
||||
DBG_FPRINTF(stderr, "\taggr %-4d = %6d\n", i, tmp_ranklist[i] );
|
||||
}
|
||||
# endif
|
||||
if (gpfsmpio_balancecontig == 1) {
|
||||
/* what comes out of this code block is the agg ranklist sorted by
|
||||
* bridge set and ion id with associated bridge info stored in the
|
||||
* hints structure for later access during file domain assignment */
|
||||
|
||||
// sort the agg ranklist by ions and bridges
|
||||
|
||||
int *interleavedbridgeranklist = (int *) ADIOI_Malloc (naggs * sizeof(int)); // resorted agg rank list
|
||||
/* list of all bridge ranks */
|
||||
int *bridgelist = (int *) ADIOI_Malloc (naggs * sizeof(int));
|
||||
|
||||
/* each entry here is the number of aggregators associated with the
|
||||
* bridge rank of the same index in bridgelist */
|
||||
int *bridgelistnum = (int *) ADIOI_Malloc (naggs * sizeof(int));
|
||||
/* list of all ion IDs corresponding with bridgelist entries of same index */
|
||||
int *ionlist = (int *) ADIOI_Malloc (naggs * sizeof(int));
|
||||
|
||||
int numbridges = 0;
|
||||
|
||||
for (i=0;i<naggs;i++)
|
||||
bridgelistnum[i] = 0;
|
||||
|
||||
/* Each entry in this list corresponds with the bridgelist and will contain the lowest bridge
|
||||
* agg rank on that ion. */
|
||||
int *summarybridgeminionaggrank = (int *) ADIOI_Malloc (naggs * sizeof(int));
|
||||
for (i=0;i<naggs;i++)
|
||||
summarybridgeminionaggrank[i] = -1;
|
||||
|
||||
/* build the bridgelist, ionlist and bridgelistnum data by going thru each agg
|
||||
* entry and find the associated bridge list index - at the end we will
|
||||
* know how many aggs belong to each bridge in each ion */
|
||||
for (i=0;i<naggs;i++) {
|
||||
int aggbridgerank = all_procInfo[tmp_ranklist[i]].bridgeRank;
|
||||
int aggionid = all_procInfo[tmp_ranklist[i]].ionID;
|
||||
int foundrank = 0;
|
||||
int summaryranklistbridgeindex = 0;
|
||||
int j;
|
||||
for (j=0;(j<numbridges && !foundrank);j++) {
|
||||
if (bridgelist[j] == aggbridgerank) {
|
||||
foundrank = 1;
|
||||
summaryranklistbridgeindex = j;
|
||||
}
|
||||
else
|
||||
summaryranklistbridgeindex++;
|
||||
}
|
||||
if (!foundrank) {
|
||||
bridgelist[summaryranklistbridgeindex] = aggbridgerank;
|
||||
ionlist[summaryranklistbridgeindex] = aggionid;
|
||||
|
||||
if (summarybridgeminionaggrank[summaryranklistbridgeindex] == -1)
|
||||
summarybridgeminionaggrank[summaryranklistbridgeindex] = aggbridgerank;
|
||||
else if (summarybridgeminionaggrank[summaryranklistbridgeindex] > aggbridgerank)
|
||||
summarybridgeminionaggrank[summaryranklistbridgeindex] = aggbridgerank;
|
||||
numbridges++;
|
||||
}
|
||||
|
||||
bridgelistnum[summaryranklistbridgeindex]++;
|
||||
}
|
||||
|
||||
/* at this point summarybridgeminionaggrank has the agg rank of the bridge for entries,
|
||||
* need to make each entry the minimum bridge rank for the entire ion. */
|
||||
for (i=0;i<numbridges;i++) {
|
||||
int aggIonId = ionlist[i];
|
||||
int j;
|
||||
for (j=0;j<numbridges;j++) {
|
||||
if (ionlist[j] == aggIonId) {
|
||||
if (summarybridgeminionaggrank[j] < summarybridgeminionaggrank[i])
|
||||
summarybridgeminionaggrank[i] = summarybridgeminionaggrank[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// resort by io node minimum bridge rank
|
||||
int x;
|
||||
for (x=0;x<numbridges;x++) {
|
||||
for (i=0;i<(numbridges-1);i++) {
|
||||
if (summarybridgeminionaggrank[i] > summarybridgeminionaggrank[i+1]) {
|
||||
int tmpminionaggrank = summarybridgeminionaggrank[i];
|
||||
summarybridgeminionaggrank[i] = summarybridgeminionaggrank[i+1];
|
||||
summarybridgeminionaggrank[i+1] = tmpminionaggrank;
|
||||
int tmpionid = ionlist[i];
|
||||
ionlist[i] = ionlist[i+1];
|
||||
ionlist[i+1] = tmpionid;
|
||||
int tmpbridgerank = bridgelist[i];
|
||||
bridgelist[i] = bridgelist[i+1];
|
||||
bridgelist[i+1] = tmpbridgerank;
|
||||
int tmpbridgeranknum = bridgelistnum[i];
|
||||
bridgelistnum[i] = bridgelistnum[i+1];
|
||||
bridgelistnum[i+1] = tmpbridgeranknum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// for each io node make sure bridgelist is in rank order
|
||||
int startSortIndex = -1;
|
||||
int endSortIndex = -1;
|
||||
int currentBridgeIndex = 0;
|
||||
|
||||
while (currentBridgeIndex < numbridges) {
|
||||
int currentIonId = ionlist[currentBridgeIndex];
|
||||
startSortIndex = currentBridgeIndex;
|
||||
while (ionlist[currentBridgeIndex] == currentIonId)
|
||||
currentBridgeIndex++;
|
||||
endSortIndex = currentBridgeIndex-1;
|
||||
for (x=startSortIndex;x<=endSortIndex;x++) {
|
||||
for (i=startSortIndex;i<endSortIndex;i++) {
|
||||
if (bridgelist[i] > bridgelist[i+1]) {
|
||||
int tmpbridgerank = bridgelist[i];
|
||||
bridgelist[i] = bridgelist[i+1];
|
||||
bridgelist[i+1] = tmpbridgerank;
|
||||
int tmpbridgeranknum = bridgelistnum[i];
|
||||
bridgelistnum[i] = bridgelistnum[i+1];
|
||||
bridgelistnum[i+1] = tmpbridgeranknum;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* populate interleavedbridgeranklist - essentially the agg rank list
|
||||
* is now sorted by the ion minimum bridge rank and bridge node */
|
||||
int currentrankoffset = 0;
|
||||
for (i=0;i<numbridges;i++) {
|
||||
int *thisBridgeAggList = (int *) ADIOI_Malloc (naggs * sizeof(int));
|
||||
int numAggsForThisBridge = 0;
|
||||
|
||||
int k;
|
||||
for (k=0;k<naggs;k++) {
|
||||
int aggbridgerank = all_procInfo[tmp_ranklist[k]].bridgeRank;
|
||||
if (aggbridgerank == bridgelist[i]) {
|
||||
thisBridgeAggList[numAggsForThisBridge] = tmp_ranklist[k];
|
||||
numAggsForThisBridge++;
|
||||
}
|
||||
}
|
||||
|
||||
// sort thisBridgeAggList
|
||||
for (x=0;x<numAggsForThisBridge;x++) {
|
||||
int n;
|
||||
for (n=0;n<(numAggsForThisBridge-1);n++) {
|
||||
if (thisBridgeAggList[n] > thisBridgeAggList[n+1]) {
|
||||
int tmpthisBridgeAggList = thisBridgeAggList[n];
|
||||
thisBridgeAggList[n] = thisBridgeAggList[n+1];
|
||||
thisBridgeAggList[n+1] = tmpthisBridgeAggList;
|
||||
}
|
||||
}
|
||||
}
|
||||
int n;
|
||||
for (n=0;n<numAggsForThisBridge;n++) {
|
||||
interleavedbridgeranklist[currentrankoffset] = thisBridgeAggList[n];
|
||||
currentrankoffset++;
|
||||
}
|
||||
ADIOI_Free(thisBridgeAggList);
|
||||
}
|
||||
|
||||
#ifdef balancecontigtrace
|
||||
fprintf(stderr,"Interleaved aggregator list:\n");
|
||||
for (i=0;i<naggs;i++) {
|
||||
fprintf(stderr,"Agg: %d Agg rank: %d with bridge rank %d and ion ID %d\n",i,interleavedbridgeranklist[i],all_procInfo[interleavedbridgeranklist[i]].bridgeRank,all_procInfo[interleavedbridgeranklist[i]].ionID);
|
||||
}
|
||||
fprintf(stderr,"Bridges list:\n");
|
||||
for (i=0;i<numbridges;i++) {
|
||||
fprintf(stderr,"bridge %d ion min rank %d rank %d number of aggs %d ion id %d\n",i,summarybridgeminionaggrank[i],bridgelist[i],bridgelistnum[i],ionlist[i]);
|
||||
}
|
||||
|
||||
#endif
|
||||
/* copy the ranklist of IO aggregators to fd->hints */
|
||||
if(fd->hints->ranklist != NULL)
|
||||
ADIOI_Free (fd->hints->ranklist);
|
||||
if(fd->hints->fs_hints.bg.bridgelist != NULL)
|
||||
ADIOI_Free (fd->hints->fs_hints.bg.bridgelist);
|
||||
if(fd->hints->fs_hints.bg.bridgelistnum != NULL)
|
||||
ADIOI_Free (fd->hints->fs_hints.bg.bridgelistnum);
|
||||
|
||||
fd->hints->cb_nodes = naggs;
|
||||
fd->hints->fs_hints.bg.numbridges = numbridges;
|
||||
fd->hints->ranklist = (int *) ADIOI_Malloc (naggs * sizeof(int));
|
||||
memcpy( fd->hints->ranklist, interleavedbridgeranklist, naggs*sizeof(int) );
|
||||
|
||||
fd->hints->fs_hints.bg.bridgelist = (int *) ADIOI_Malloc (naggs * sizeof(int));
|
||||
memcpy( fd->hints->fs_hints.bg.bridgelist, bridgelist, naggs*sizeof(int) );
|
||||
|
||||
fd->hints->fs_hints.bg.bridgelistnum = (int *) ADIOI_Malloc (naggs * sizeof(int));
|
||||
memcpy( fd->hints->fs_hints.bg.bridgelistnum, bridgelistnum, naggs*sizeof(int) );
|
||||
|
||||
ADIOI_Free(summarybridgeminionaggrank);
|
||||
ADIOI_Free( tmp_ranklist );
|
||||
ADIOI_Free( bridgelistnum );
|
||||
ADIOI_Free( bridgelist );
|
||||
ADIOI_Free( interleavedbridgeranklist );
|
||||
ADIOI_Free(ionlist);
|
||||
|
||||
} else {
|
||||
/* classic topology-agnostic copy of the ranklist of IO aggregators to
|
||||
* fd->hints */
|
||||
if(fd->hints->ranklist != NULL) ADIOI_Free (fd->hints->ranklist);
|
||||
|
||||
fd->hints->cb_nodes = naggs;
|
||||
fd->hints->ranklist = (int *) ADIOI_Malloc (naggs * sizeof(int));
|
||||
memcpy( fd->hints->ranklist, tmp_ranklist, naggs*sizeof(int) );
|
||||
|
||||
ADIOI_Free( tmp_ranklist );
|
||||
}
|
||||
TRACE_ERR("Leaving ADIOI_BG_compute_agg_ranklist_serial\n");
|
||||
return;
|
||||
}
|
33
ompi/mca/io/romio314/romio/adio/ad_gpfs/bg/ad_bg_aggrs.h
Обычный файл
33
ompi/mca/io/romio314/romio/adio/ad_gpfs/bg/ad_bg_aggrs.h
Обычный файл
@ -0,0 +1,33 @@
|
||||
/* ---------------------------------------------------------------- */
|
||||
/* (C)Copyright IBM Corp. 2007, 2008 */
|
||||
/* ---------------------------------------------------------------- */
|
||||
/**
|
||||
* \file ad_bg_aggrs.h
|
||||
* \brief ???
|
||||
*/
|
||||
|
||||
/*
|
||||
*
|
||||
* Declares functions specific for the BlueGene platform within the GPFS
|
||||
* parallel I/O solution. Implements aligned file-domain partitioning
|
||||
* (7/28/2005); persistent file doamin work not implemented
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef AD_BG_AGGRS_H_
|
||||
#define AD_BG_AGGRS_H_
|
||||
|
||||
#include "adio.h"
|
||||
#include <sys/stat.h>
|
||||
|
||||
#ifdef HAVE_GPFS_H
|
||||
#include <gpfs.h>
|
||||
#endif
|
||||
#if !defined(GPFS_SUPER_MAGIC)
|
||||
#define GPFS_SUPER_MAGIC (0x47504653)
|
||||
#endif
|
||||
|
||||
/* generate a list of I/O aggregators that utilizes BG-PSET orginization. */
|
||||
int ADIOI_BG_gen_agg_ranklist(ADIO_File fd, int n_aggrs_per_pset);
|
||||
|
||||
#endif /* AD_BG_AGGRS_H_ */
|
377
ompi/mca/io/romio314/romio/adio/ad_gpfs/bg/ad_bg_pset.c
Обычный файл
377
ompi/mca/io/romio314/romio/adio/ad_gpfs/bg/ad_bg_pset.c
Обычный файл
@ -0,0 +1,377 @@
|
||||
/* ---------------------------------------------------------------- */
|
||||
/* (C)Copyright IBM Corp. 2007, 2008 */
|
||||
/* ---------------------------------------------------------------- */
|
||||
/**
|
||||
* \file ad_bg_pset.c
|
||||
* \brief Definition of functions associated to structs ADIOI_BG_ProcInfo_t and ADIOI_BG_ConfInfo_t
|
||||
*/
|
||||
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
/* #define TRACE_ON */
|
||||
// #define bridgeringaggtrace 1
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "../ad_gpfs.h"
|
||||
#include "ad_bg_pset.h"
|
||||
#include <spi/include/kernel/process.h>
|
||||
#include <firmware/include/personality.h>
|
||||
|
||||
#ifdef HAVE_MPIX_H
|
||||
#include <mpix.h>
|
||||
#endif
|
||||
|
||||
#ifndef TRACE_ERR
|
||||
# define TRACE_ERR(fmt...)
|
||||
#endif
|
||||
|
||||
ADIOI_BG_ProcInfo_t *
|
||||
ADIOI_BG_ProcInfo_new()
|
||||
{
|
||||
ADIOI_BG_ProcInfo_t *p = (ADIOI_BG_ProcInfo_t *) ADIOI_Malloc (sizeof(ADIOI_BG_ProcInfo_t));
|
||||
ADIOI_Assert ((p != NULL));
|
||||
return p;
|
||||
}
|
||||
|
||||
ADIOI_BG_ProcInfo_t *
|
||||
ADIOI_BG_ProcInfo_new_n( int n )
|
||||
{
|
||||
ADIOI_BG_ProcInfo_t *p = (ADIOI_BG_ProcInfo_t *) ADIOI_Malloc (n * sizeof(ADIOI_BG_ProcInfo_t));
|
||||
ADIOI_Assert ((p != NULL));
|
||||
return p;
|
||||
}
|
||||
|
||||
void
|
||||
ADIOI_BG_ProcInfo_free( ADIOI_BG_ProcInfo_t *info )
|
||||
{
|
||||
if (info != NULL) ADIOI_Free (info);
|
||||
}
|
||||
|
||||
ADIOI_BG_ConfInfo_t *
|
||||
ADIOI_BG_ConfInfo_new ()
|
||||
{
|
||||
ADIOI_BG_ConfInfo_t *p = (ADIOI_BG_ConfInfo_t *) ADIOI_Malloc (sizeof(ADIOI_BG_ConfInfo_t));
|
||||
ADIOI_Assert ((p != NULL));
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ADIOI_BG_ConfInfo_free( ADIOI_BG_ConfInfo_t *info )
|
||||
{
|
||||
if (info != NULL) ADIOI_Free (info);
|
||||
}
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int rank;
|
||||
int bridgeCoord;
|
||||
} sortstruct;
|
||||
|
||||
static int intsort(const void *p1, const void *p2)
|
||||
{
|
||||
sortstruct *i1, *i2;
|
||||
i1 = (sortstruct *)p1;
|
||||
i2 = (sortstruct *)p2;
|
||||
return(i1->bridgeCoord - i2->bridgeCoord);
|
||||
}
|
||||
|
||||
unsigned torusSize[MPIX_TORUS_MAX_DIMS];
|
||||
unsigned dimTorus[MPIX_TORUS_MAX_DIMS];
|
||||
|
||||
/* This function computes the number of hops between the torus coordinates of the
|
||||
* aggCoords and bridgeCoords parameters.
|
||||
*/
|
||||
static unsigned procManhattanDistance(unsigned *aggCoords, unsigned *bridgeCoords) {
|
||||
|
||||
unsigned totalDistance = 0;
|
||||
int i;
|
||||
for (i=0;i<MPIX_TORUS_MAX_DIMS;i++) {
|
||||
unsigned dimDistance = abs((int)aggCoords[i] - (int)bridgeCoords[i]);
|
||||
if (dimDistance > 0) { // could torus make it closer?
|
||||
if (dimTorus[i]) {
|
||||
if (aggCoords[i] == torusSize[i]) { // is wrap-around closer
|
||||
if ((bridgeCoords[i]+1) < dimDistance) // assume will use torus link
|
||||
dimDistance = bridgeCoords[i]+1;
|
||||
}
|
||||
else if (bridgeCoords[i] == torusSize[i]) { // is wrap-around closer
|
||||
if ((aggCoords[i]+1) < dimDistance) // assume will use torus link
|
||||
dimDistance = aggCoords[i]+1;
|
||||
}
|
||||
}
|
||||
} /* else: dimDistance == 0, meaning aggCoords[i] and bridgeCoords[i] are
|
||||
the same and there's no closer point to pick */
|
||||
totalDistance += dimDistance;
|
||||
}
|
||||
return totalDistance;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ADIOI_BG_persInfo_init(ADIOI_BG_ConfInfo_t *conf,
|
||||
ADIOI_BG_ProcInfo_t *proc,
|
||||
int size, int rank, int n_aggrs, MPI_Comm comm)
|
||||
{
|
||||
int i, iambridge=0, bridgerank = -1, bridgeIndex;
|
||||
int countPset;
|
||||
sortstruct *bridges;
|
||||
int commsize;
|
||||
|
||||
TRACE_ERR("Entering BG_persInfo_init, size: %d, rank: %d, n_aggrs: %d, comm: %d\n", size, rank, n_aggrs, (int)comm);
|
||||
|
||||
Personality_t pers;
|
||||
MPIX_Hardware_t hw;
|
||||
MPIX_Hardware(&hw);
|
||||
TRACE_ERR("BG_persInfo_init, my coords{%u,%u,%u,%u,%u} rankInPset %u,sizeOfPset %u,idOfPset %u\n",hw.Coords[0],hw.Coords[1],hw.Coords[2],hw.Coords[3],hw.Coords[4],hw.rankInPset,hw.sizeOfPset,hw.idOfPset);
|
||||
|
||||
|
||||
Kernel_GetPersonality(&pers, sizeof(pers));
|
||||
|
||||
proc->rank = rank;
|
||||
proc->coreID = hw.coreID;
|
||||
|
||||
if (gpfsmpio_bridgeringagg > 0) {
|
||||
#ifdef bridgeringaggtrace
|
||||
if (rank == 0)
|
||||
fprintf(stderr,"Block dimensions:\n");
|
||||
#endif
|
||||
|
||||
/* Set the numNodesInPartition and nodeRank for this proc
|
||||
*/
|
||||
proc->numNodesInPartition = 1;
|
||||
proc->nodeRank = 0;
|
||||
for (i=0;i<MPIX_TORUS_MAX_DIMS;i++) {
|
||||
torusSize[i] = hw.Size[i];
|
||||
dimTorus[i] = hw.isTorus[i];
|
||||
proc->numNodesInPartition *= hw.Size[i];
|
||||
int baseNum = 1, j;
|
||||
for (j=0;j<i;j++)
|
||||
baseNum *= hw.Size[j];
|
||||
proc->nodeRank += (hw.Coords[i] * baseNum);
|
||||
#ifdef bridgeringaggtrace
|
||||
if (rank == 0)
|
||||
fprintf(stderr,"Dimension %d has %d elements wrap-around value is %d\n",i,torusSize[i],dimTorus[i]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
MPI_Comm_size(comm, &commsize);
|
||||
|
||||
proc->ionID = MPIX_IO_node_id ();
|
||||
|
||||
if(size == 1)
|
||||
{
|
||||
proc->iamBridge = 1;
|
||||
proc->bridgeRank = rank;
|
||||
if (gpfsmpio_bridgeringagg > 0) {
|
||||
proc->manhattanDistanceToBridge = 0;
|
||||
}
|
||||
|
||||
/* Set up the other parameters */
|
||||
proc->myIOSize = size;
|
||||
proc->ioNodeIndex = 0;
|
||||
conf->ioMinSize = size;
|
||||
conf->ioMaxSize = size;
|
||||
conf->numBridgeRanks = 1;
|
||||
conf->nProcs = size;
|
||||
conf->cpuIDsize = hw.ppn;
|
||||
/*conf->virtualPsetSize = conf->ioMaxSize * conf->cpuIDsize;*/
|
||||
conf->nAggrs = 1;
|
||||
conf->aggRatio = 1. * conf->nAggrs / conf->ioMinSize /*virtualPsetSize*/;
|
||||
if(conf->aggRatio > 1) conf->aggRatio = 1.;
|
||||
TRACE_ERR("I am (single) Bridge rank\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Find the nearest bridge node coords. We don't know the
|
||||
rank in our comm so we will collective find/pick a bridge
|
||||
rank later.
|
||||
*/
|
||||
int32_t bridgeCoords;
|
||||
bridgeCoords = pers.Network_Config.cnBridge_A << 24 |
|
||||
pers.Network_Config.cnBridge_B << 18 |
|
||||
pers.Network_Config.cnBridge_C << 12 |
|
||||
pers.Network_Config.cnBridge_D << 6 |
|
||||
pers.Network_Config.cnBridge_E << 2;
|
||||
ADIOI_Assert((bridgeCoords >= 0)); /* A dim is < 6 bits or sorting won't work */
|
||||
|
||||
if((hw.Coords[0] == pers.Network_Config.cnBridge_A) &&
|
||||
(hw.Coords[1] == pers.Network_Config.cnBridge_B) &&
|
||||
(hw.Coords[2] == pers.Network_Config.cnBridge_C) &&
|
||||
(hw.Coords[3] == pers.Network_Config.cnBridge_D) &&
|
||||
(hw.Coords[4] == pers.Network_Config.cnBridge_E)) {
|
||||
iambridge = 1; /* I am bridge */
|
||||
if (gpfsmpio_bridgeringagg > 0) {
|
||||
proc->manhattanDistanceToBridge = 0;
|
||||
}
|
||||
}
|
||||
else { // calculate manhattan distance to bridge if gpfsmpio_bridgeringagg is set
|
||||
if (gpfsmpio_bridgeringagg > 0) {
|
||||
unsigned aggCoords[MPIX_TORUS_MAX_DIMS],manhattanBridgeCoords[MPIX_TORUS_MAX_DIMS];
|
||||
aggCoords[0] = hw.Coords[0];
|
||||
manhattanBridgeCoords[0] = pers.Network_Config.cnBridge_A;
|
||||
aggCoords[1] = hw.Coords[1];
|
||||
manhattanBridgeCoords[1] = pers.Network_Config.cnBridge_B;
|
||||
aggCoords[2] = hw.Coords[2];
|
||||
manhattanBridgeCoords[2] = pers.Network_Config.cnBridge_C;
|
||||
aggCoords[3] = hw.Coords[3];
|
||||
manhattanBridgeCoords[3] = pers.Network_Config.cnBridge_D;
|
||||
aggCoords[4] = hw.Coords[4];
|
||||
manhattanBridgeCoords[4] = pers.Network_Config.cnBridge_E;
|
||||
|
||||
proc->manhattanDistanceToBridge= procManhattanDistance(aggCoords, manhattanBridgeCoords);
|
||||
#ifdef bridgeringaggtrace
|
||||
fprintf(stderr,"agg coords are %u %u %u %u %u bridge coords are %u %u %u %u %u distance is %u\n",aggCoords[0],aggCoords[1],aggCoords[2],aggCoords[3],aggCoords[4],manhattanBridgeCoords[0],manhattanBridgeCoords[1],manhattanBridgeCoords[2],manhattanBridgeCoords[3],manhattanBridgeCoords[4], proc->manhattanDistanceToBridge);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
TRACE_ERR("Bridge coords(%8.8X): %d %d %d %d %d, %d. iambridge %d\n",bridgeCoords, pers.Network_Config.cnBridge_A,pers.Network_Config.cnBridge_B,pers.Network_Config.cnBridge_C,pers.Network_Config.cnBridge_D,pers.Network_Config.cnBridge_E,0, iambridge);
|
||||
|
||||
/* Allgather the ranks and bridgeCoords to determine the bridge
|
||||
rank and how many ranks belong to each bridge rank*/
|
||||
bridges = (sortstruct *) ADIOI_Malloc(sizeof(sortstruct) * size);
|
||||
|
||||
/* We're going to sort this structure by bridgeCoord:
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int rank;
|
||||
int bridgeCoord;
|
||||
} sortstruct;
|
||||
|
||||
and I want the rank that IS the bridge to sort first, so
|
||||
OR in '1' on non-bridge ranks that use a bridge coord.
|
||||
*/
|
||||
|
||||
/* My input to the collective */
|
||||
bridges[rank].rank = rank;
|
||||
bridges[rank].bridgeCoord = bridgeCoords;
|
||||
if(!iambridge)
|
||||
bridges[rank].bridgeCoord |= 1; /* I am not bridge, turn on bit */
|
||||
|
||||
|
||||
MPI_Allgather(MPI_IN_PLACE, 2, MPI_INT, bridges, 2, MPI_INT, comm);
|
||||
|
||||
qsort(bridges, size, sizeof(sortstruct), intsort);
|
||||
|
||||
/* Once the list is sorted walk through it to setup bridge
|
||||
info and find bridge ranks, etc. */
|
||||
|
||||
int tempCoords, tempRank, mincompute, maxcompute;
|
||||
tempCoords = bridges[0].bridgeCoord & ~1;
|
||||
tempRank = bridges[0].rank;
|
||||
|
||||
countPset=1;
|
||||
bridgeIndex = 0;
|
||||
mincompute = size+1;
|
||||
maxcompute = 1;
|
||||
|
||||
for(i=1; i<size; i++)
|
||||
{
|
||||
if((bridges[i].bridgeCoord & ~1) == tempCoords)
|
||||
countPset++; /* same bridge (pset), count it */
|
||||
else /* new bridge found */
|
||||
{
|
||||
#ifdef TRACE_ON
|
||||
if(rank == 0)
|
||||
TRACE_ERR("Bridge set %u, bridge rank %d (%#8.8X) has %d ranks\n",
|
||||
bridgeIndex, tempRank, tempCoords, countPset);
|
||||
#endif
|
||||
if(countPset > maxcompute)
|
||||
maxcompute = countPset;
|
||||
if(countPset < mincompute)
|
||||
mincompute = countPset;
|
||||
|
||||
/* Was this my bridge we finished? */
|
||||
if(tempCoords == bridgeCoords)
|
||||
{
|
||||
/* Am I the bridge rank? */
|
||||
if(tempRank == rank)
|
||||
iambridge = 1;
|
||||
else
|
||||
iambridge = 0; /* Another rank on my node may have taken over */
|
||||
TRACE_ERR("Rank %u, bridge set %u, bridge rank %d (%#8.8X) has %d ranks, iambridge %u\n",
|
||||
rank, bridgeIndex, tempRank, tempCoords, countPset,iambridge);
|
||||
bridgerank = tempRank;
|
||||
proc->myIOSize = countPset;
|
||||
proc->ioNodeIndex = bridgeIndex;
|
||||
}
|
||||
/* Setup next bridge */
|
||||
tempCoords = bridges[i].bridgeCoord & ~1;
|
||||
tempRank = bridges[i].rank;
|
||||
bridgeIndex++;
|
||||
countPset = 1;
|
||||
}
|
||||
}
|
||||
/* Process last bridge */
|
||||
|
||||
#ifdef TRACE_ON
|
||||
if(rank == 0)
|
||||
TRACE_ERR("Bridge set %u, bridge rank %d (%#8.8X) has %d ranks\n",
|
||||
bridgeIndex, tempRank, tempCoords, countPset);
|
||||
#endif
|
||||
if(countPset > maxcompute)
|
||||
maxcompute = countPset;
|
||||
if(countPset < mincompute)
|
||||
mincompute = countPset;
|
||||
|
||||
/* Was this my bridge? */
|
||||
if(tempCoords == bridgeCoords)
|
||||
{
|
||||
/* Am I the bridge rank? */
|
||||
if(tempRank == rank)
|
||||
iambridge = 1;
|
||||
else
|
||||
iambridge = 0; /* Another rank on my node may have taken over */
|
||||
bridgerank = tempRank;
|
||||
proc->myIOSize = countPset;
|
||||
proc->ioNodeIndex = bridgeIndex;
|
||||
}
|
||||
|
||||
|
||||
if(rank == 0)
|
||||
{
|
||||
/* Only rank 0 has a conf structure, fill in stuff as appropriate */
|
||||
conf->ioMinSize = mincompute;
|
||||
conf->ioMaxSize = maxcompute; /* equivalent to pset size */
|
||||
conf->numBridgeRanks = bridgeIndex+1;
|
||||
conf->nProcs = size;
|
||||
conf->cpuIDsize = hw.ppn;
|
||||
/*conf->virtualPsetSize = maxcompute * conf->cpuIDsize;*/
|
||||
|
||||
conf->nAggrs = n_aggrs;
|
||||
/* First pass gets nAggrs = -1 */
|
||||
if(conf->nAggrs <=0)
|
||||
conf->nAggrs = gpfsmpio_bg_nagg_pset;
|
||||
if(conf->ioMinSize <= conf->nAggrs)
|
||||
conf->nAggrs = ADIOI_MAX(1,conf->ioMinSize-1); /* not including bridge itself */
|
||||
/* if(conf->nAggrs > conf->numBridgeRanks)
|
||||
conf->nAggrs = conf->numBridgeRanks;
|
||||
*/
|
||||
conf->aggRatio = 1. * conf->nAggrs / conf->ioMinSize /*virtualPsetSize*/;
|
||||
/* if(conf->aggRatio > 1) conf->aggRatio = 1.; */
|
||||
TRACE_ERR("n_aggrs %zd, conf->nProcs %zu, conf->ioMaxSize %zu, ADIOI_BG_NAGG_PSET_DFLT %zu,conf->numBridgeRanks %zu,conf->nAggrs %zu\n",(size_t)n_aggrs, (size_t)conf->nProcs, (size_t)conf->ioMaxSize, (size_t)ADIOI_BG_NAGG_PSET_DFLT,(size_t)conf->numBridgeRanks,(size_t)conf->nAggrs);
|
||||
TRACE_ERR("Maximum ranks under a bridge rank: %d, minimum: %d, nAggrs: %d, numBridgeRanks: %d pset dflt: %d naggrs: %d ratio: %f\n", maxcompute, mincompute, conf->nAggrs, conf->numBridgeRanks, ADIOI_BG_NAGG_PSET_DFLT, conf->nAggrs, conf->aggRatio);
|
||||
}
|
||||
|
||||
ADIOI_Assert((bridgerank != -1));
|
||||
proc->bridgeRank = bridgerank;
|
||||
proc->iamBridge = iambridge;
|
||||
TRACE_ERR("Rank %d has bridge set index %d (bridge rank: %d) with %d other ranks, ioNodeIndex: %d\n", rank, proc->ioNodeIndex, bridgerank, proc->myIOSize, proc->ioNodeIndex);
|
||||
|
||||
ADIOI_Free(bridges);
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
ADIOI_BG_persInfo_free( ADIOI_BG_ConfInfo_t *conf, ADIOI_BG_ProcInfo_t *proc )
|
||||
{
|
||||
ADIOI_BG_ConfInfo_free( conf );
|
||||
ADIOI_BG_ProcInfo_free( proc );
|
||||
}
|
83
ompi/mca/io/romio314/romio/adio/ad_gpfs/bg/ad_bg_pset.h
Обычный файл
83
ompi/mca/io/romio314/romio/adio/ad_gpfs/bg/ad_bg_pset.h
Обычный файл
@ -0,0 +1,83 @@
|
||||
/* ---------------------------------------------------------------- */
|
||||
/* (C)Copyright IBM Corp. 2007, 2008 */
|
||||
/* ---------------------------------------------------------------- */
|
||||
/**
|
||||
* \file ad_bg_pset.h
|
||||
* \brief ???
|
||||
*/
|
||||
|
||||
/* File: ad_bg_pset.h
|
||||
*
|
||||
* Defines two structures that keep BlueGene PSET specific information and their public interfaces:
|
||||
* . ADIOI_BG_ProcInfo_t object keeps specific information to each process
|
||||
* . ADIOI_BG_ConfInfo_t object keeps general information for the whole communicator, only kept
|
||||
* on process 0.
|
||||
*/
|
||||
|
||||
#ifndef AD_BG_PSET_H_
|
||||
#define AD_BG_PSET_H_
|
||||
|
||||
#ifdef HAVE_MPIX_H
|
||||
#include <mpix.h>
|
||||
#endif
|
||||
|
||||
/* Keeps specific information to each process, will be exchanged among processes */
|
||||
typedef struct {
|
||||
int ioNodeIndex; /* similar to psetNum on BGL/BGP */
|
||||
int rank; /* my rank */
|
||||
int ionID; /* ion id this cn is using */
|
||||
/* int myCoords[5]; */
|
||||
int bridgeRank; /* my bridge node (or proxy) rank */
|
||||
unsigned char coreID;
|
||||
unsigned char threadID; /* unlikely to be useful but better than just padding */
|
||||
unsigned char __cpad[2];
|
||||
int myIOSize; /* number of ranks sharing my bridge/IO
|
||||
node, i.e. psetsize*/
|
||||
int iamBridge; /* am *I* the bridge rank? */
|
||||
int __ipad[2];
|
||||
unsigned nodeRank; /* torus coords converted to an integer for use with gpfsmpio_bridgeringagg */
|
||||
unsigned numNodesInPartition; /* number of physical nodes in the job partition */
|
||||
unsigned manhattanDistanceToBridge; /* number of hops between this rank and the bridge node */
|
||||
} ADIOI_BG_ProcInfo_t __attribute__((aligned(16)));
|
||||
|
||||
/* Keeps general information for the whole communicator, only on process 0 */
|
||||
typedef struct {
|
||||
int ioMinSize; /* Smallest number of ranks shareing 1 bridge node */
|
||||
int ioMaxSize; /* Largest number of ranks sharing 1 bridge node */
|
||||
/* ioMaxSize will be the "psetsize" */
|
||||
int nAggrs;
|
||||
int numBridgeRanks;
|
||||
/*int virtualPsetSize; ppn * pset size */
|
||||
int nProcs;
|
||||
int cpuIDsize; /* num ppn */
|
||||
float aggRatio;
|
||||
|
||||
} ADIOI_BG_ConfInfo_t __attribute__((aligned(16)));
|
||||
|
||||
|
||||
#undef MIN
|
||||
#define MIN(a,b) (((a)<(b) ? (a) : (b)))
|
||||
|
||||
|
||||
|
||||
|
||||
/* public funcs for ADIOI_BG_ProcInfo_t objects */
|
||||
ADIOI_BG_ProcInfo_t * ADIOI_BG_ProcInfo_new();
|
||||
ADIOI_BG_ProcInfo_t * ADIOI_BG_ProcInfo_new_n( int n );
|
||||
void ADIOI_BG_ProcInfo_free( ADIOI_BG_ProcInfo_t *info );
|
||||
|
||||
|
||||
/* public funcs for ADIOI_BG_ConfInfo_t objects */
|
||||
ADIOI_BG_ConfInfo_t * ADIOI_BG_ConfInfo_new ();
|
||||
void ADIOI_BG_ConfInfo_free( ADIOI_BG_ConfInfo_t *info );
|
||||
|
||||
|
||||
/* public funcs for a pair of ADIOI_BG_ConfInfo_t and ADIOI_BG_ProcInfo_t objects */
|
||||
void ADIOI_BG_persInfo_init( ADIOI_BG_ConfInfo_t *conf,
|
||||
ADIOI_BG_ProcInfo_t *proc,
|
||||
int s, int r, int n_aggrs, MPI_Comm comm);
|
||||
void ADIOI_BG_persInfo_free( ADIOI_BG_ConfInfo_t *conf,
|
||||
ADIOI_BG_ProcInfo_t *proc );
|
||||
|
||||
|
||||
#endif /* AD_BG_PSET_H_ */
|
16
ompi/mca/io/romio314/romio/adio/ad_gpfs/pe/Makefile.mk
Обычный файл
16
ompi/mca/io/romio314/romio/adio/ad_gpfs/pe/Makefile.mk
Обычный файл
@ -0,0 +1,16 @@
|
||||
## -*- Mode: Makefile; -*-
|
||||
## vim: set ft=automake :
|
||||
##
|
||||
## (C) 2012 by Argonne National Laboratory.
|
||||
## See COPYRIGHT in top-level directory.
|
||||
##
|
||||
|
||||
if BUILD_AD_PE
|
||||
|
||||
noinst_HEADERS += \
|
||||
adio/ad_gpfs/pe/ad_pe_aggrs.h
|
||||
|
||||
romio_other_sources += \
|
||||
adio/ad_gpfs/pe/ad_pe_aggrs.c
|
||||
|
||||
endif BUILD_AD_PE
|
276
ompi/mca/io/romio314/romio/adio/ad_gpfs/pe/ad_pe_aggrs.c
Обычный файл
276
ompi/mca/io/romio314/romio/adio/ad_gpfs/pe/ad_pe_aggrs.c
Обычный файл
@ -0,0 +1,276 @@
|
||||
/* ---------------------------------------------------------------- */
|
||||
/* (C)Copyright IBM Corp. 2007, 2008 */
|
||||
/* ---------------------------------------------------------------- */
|
||||
/**
|
||||
* \file ad_pe_aggrs.c
|
||||
* \brief The externally used function from this file is is declared in ad_pe_aggrs.h
|
||||
*/
|
||||
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997-2001 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
/*#define TRACE_ON */
|
||||
|
||||
#include "adio.h"
|
||||
#include "adio_cb_config_list.h"
|
||||
#include "../ad_gpfs.h"
|
||||
#include "ad_pe_aggrs.h"
|
||||
#include "mpiimpl.h"
|
||||
|
||||
#ifdef AGGREGATION_PROFILE
|
||||
#include "mpe.h"
|
||||
#endif
|
||||
|
||||
#ifdef USE_DBG_LOGGING
|
||||
#define AGG_DEBUG 1
|
||||
#endif
|
||||
|
||||
#ifndef TRACE_ERR
|
||||
# define TRACE_ERR(format...)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Compute the aggregator-related parameters that are required in 2-phase
|
||||
* collective IO of ADIO.
|
||||
* The parameters are
|
||||
* . the number of aggregators (proxies) : fd->hints->cb_nodes
|
||||
* . the ranks of the aggregators : fd->hints->ranklist
|
||||
* If MP_IONODEFILE is defined, POE determines all tasks on every node listed
|
||||
* in the node file and defines MP_IOTASKLIST with them, making them all
|
||||
* aggregators. Alternatively, the user can explictly set MP_IOTASKLIST
|
||||
* themselves. The format of the MP_IOTASKLIST is a colon-delimited list of
|
||||
* task ids, the first entry being the total number of aggregators, for example
|
||||
* to specify 4 aggregators on task ids 0,8,16,24 the value would be:
|
||||
* 4:0:8:16:24. If there is no MP_IONODEFILE, or MP_IOTASKLIST, then the
|
||||
* default aggregator selection is 1 task per node for every node of the job -
|
||||
* additionally, an environment variable MP_IOAGGR_CNT can be specified, which
|
||||
* defines the total number of aggregators, spread evenly across all the nodes.
|
||||
* The romio_cb_nodes and romio_cb_config_list hint user settings are ignored.
|
||||
*/
|
||||
int
|
||||
ADIOI_PE_gen_agg_ranklist(ADIO_File fd)
|
||||
{
|
||||
|
||||
int numAggs = 0;
|
||||
char *ioTaskList = getenv( "MP_IOTASKLIST" );
|
||||
char *ioAggrCount = getenv("MP_IOAGGR_CNT");
|
||||
int i,j;
|
||||
int inTERcommFlag = 0;
|
||||
|
||||
int myRank,commSize;
|
||||
MPI_Comm_rank(fd->comm, &myRank);
|
||||
MPI_Comm_size(fd->comm, &commSize);
|
||||
|
||||
MPI_Comm_test_inter(fd->comm, &inTERcommFlag);
|
||||
if (inTERcommFlag) {
|
||||
FPRINTF(stderr,"ERROR: ATTENTION: inTERcomms are not supported in MPI-IO - aborting....\n");
|
||||
perror("ADIOI_PE_gen_agg_ranklist:");
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
|
||||
if (ioTaskList) {
|
||||
int ioTaskListLen = strlen(ioTaskList);
|
||||
int ioTaskListPos = 0;
|
||||
char tmpBuf[8]; /* Big enough for 1M tasks (7 digits task ID). */
|
||||
tmpBuf[7] = '\0';
|
||||
for (i=0; i<7; i++) {
|
||||
tmpBuf[i] = *ioTaskList++; /* Maximum is 7 digits for 1 million. */
|
||||
ioTaskListPos++;
|
||||
if (*ioTaskList == ':') { /* If the next char is a ':' ends it. */
|
||||
tmpBuf[i+1] = '\0';
|
||||
break;
|
||||
}
|
||||
}
|
||||
numAggs = atoi(tmpBuf);
|
||||
if (numAggs == 0)
|
||||
FPRINTF(stderr,"ERROR: ATTENTION: Number of aggregators specified in MP_IOTASKLIST set at 0 - default aggregator selection will be used.\n");
|
||||
else if (!((numAggs > 0 ) && (numAggs <= commSize))) {
|
||||
FPRINTF(stderr,"ERROR: ATTENTION: The number of aggregators (%s) specified in MP_IOTASKLIST is outside the communicator task range of %d.\n",tmpBuf,commSize);
|
||||
numAggs = commSize;
|
||||
}
|
||||
fd->hints->ranklist = (int *) ADIOI_Malloc (numAggs * sizeof(int));
|
||||
|
||||
int aggIndex = 0;
|
||||
while (aggIndex < numAggs) {
|
||||
ioTaskList++; /* Advance past the ':' */
|
||||
ioTaskListPos++;
|
||||
int allDigits=1;
|
||||
for (i=0; i<7; i++) {
|
||||
if (*ioTaskList < '0' || *ioTaskList > '9')
|
||||
allDigits=0;
|
||||
tmpBuf[i] = *ioTaskList++;
|
||||
ioTaskListPos++;
|
||||
if ( (*ioTaskList == ':') || (*ioTaskList == '\0') ) {
|
||||
tmpBuf[i+1] = '\0';
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (allDigits) {
|
||||
int newAggRank = atoi(tmpBuf);
|
||||
if (!((newAggRank >= 0 ) && (newAggRank < commSize))) {
|
||||
FPRINTF(stderr,"ERROR: ATTENTION: The aggregator '%s' specified in MP_IOTASKLIST is not within the communicator task range of 0 to %d - it will be ignored.\n",tmpBuf,commSize-1);
|
||||
}
|
||||
else {
|
||||
int aggAlreadyAdded = 0;
|
||||
for (i=0;i<aggIndex;i++)
|
||||
if (fd->hints->ranklist[i] == newAggRank) {
|
||||
aggAlreadyAdded = 1;
|
||||
break;
|
||||
}
|
||||
if (!aggAlreadyAdded)
|
||||
fd->hints->ranklist[aggIndex++] = newAggRank;
|
||||
else
|
||||
FPRINTF(stderr,"ERROR: ATTENTION: The aggregator '%d' is specified multiple times in MP_IOTASKLIST - duplicates are ignored.\n",newAggRank);
|
||||
}
|
||||
}
|
||||
else {
|
||||
FPRINTF(stderr,"ERROR: ATTENTION: The aggregator '%s' specified in MP_IOTASKLIST is not a valid integer task id - it will be ignored.\n",tmpBuf);
|
||||
}
|
||||
|
||||
/* At the end check whether the list is shorter than specified. */
|
||||
if (ioTaskListPos == ioTaskListLen) {
|
||||
if (aggIndex == 0) {
|
||||
FPRINTF(stderr,"ERROR: ATTENTION: No aggregators were correctly specified in MP_IOTASKLIST - default aggregator selection will be used.\n");
|
||||
ADIOI_Free(fd->hints->ranklist);
|
||||
}
|
||||
else if (aggIndex < numAggs)
|
||||
FPRINTF(stderr,"ERROR: ATTENTION: %d aggregators were specified in MP_IOTASKLIST but only %d were correctly specified - setting the number of aggregators to %d.\n",numAggs, aggIndex,aggIndex);
|
||||
numAggs = aggIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (numAggs == 0) {
|
||||
MPID_Comm *mpidCommData;
|
||||
|
||||
MPID_Comm_get_ptr(fd->comm,mpidCommData);
|
||||
int localSize = mpidCommData->local_size;
|
||||
|
||||
// get my node rank
|
||||
int myNodeRank = mpidCommData->intranode_table[mpidCommData->rank];
|
||||
|
||||
int *allNodeRanks = (int *) ADIOI_Malloc (localSize * sizeof(int));
|
||||
|
||||
allNodeRanks[myRank] = myNodeRank;
|
||||
MPI_Allgather(MPI_IN_PLACE, 1, MPI_INT, allNodeRanks, 1, MPI_INT, fd->comm);
|
||||
|
||||
#ifdef AGG_DEBUG
|
||||
printf("MPID_Comm data: local_size is %d\nintranode_table entries:\n",mpidCommData->local_size);
|
||||
for (i=0;i<localSize;i++) {
|
||||
printf("%d ",mpidCommData->intranode_table[i]);
|
||||
}
|
||||
printf("\ninternode_table entries:\n");
|
||||
for (i=0;i<localSize;i++) {
|
||||
printf("%d ",mpidCommData->internode_table[i]);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
printf("\nallNodeRanks entries:\n");
|
||||
for (i=0;i<localSize;i++) {
|
||||
printf("%d ",allNodeRanks[i]);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
#endif
|
||||
|
||||
if (ioAggrCount) {
|
||||
int cntType = -1;
|
||||
|
||||
if ( strcasecmp(ioAggrCount, "ALL") ) {
|
||||
if ( (cntType = atoi(ioAggrCount)) <= 0 ) {
|
||||
/* Input is other non-digit or less than 1 the assume */
|
||||
/* 1 aggregator per node. Note: atoi(-1) reutns -1. */
|
||||
/* No warning message given here -- done earlier. */
|
||||
cntType = -1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* ALL is specified set aggr count to localSize */
|
||||
cntType = -2;
|
||||
}
|
||||
switch(cntType) {
|
||||
case -1:
|
||||
/* 1 aggr/node case */
|
||||
{
|
||||
int rankListIndex = 0;
|
||||
fd->hints->ranklist = (int *) ADIOI_Malloc (localSize * sizeof(int));
|
||||
for (i=0;i<localSize;i++) {
|
||||
if (allNodeRanks[i] == 0) {
|
||||
fd->hints->ranklist[rankListIndex++] = i;
|
||||
numAggs++;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case -2:
|
||||
/* ALL tasks case */
|
||||
fd->hints->ranklist = (int *) ADIOI_Malloc (localSize * sizeof(int));
|
||||
for (i=0;i<localSize;i++) {
|
||||
fd->hints->ranklist[i] = i;
|
||||
numAggs++;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
/* Specific aggr count case -- MUST be less than localSize, otherwise set to localSize */
|
||||
if (cntType > localSize)
|
||||
cntType = localSize;
|
||||
|
||||
numAggs = cntType;
|
||||
// Round-robin thru allNodeRanks - pick the 0's, then the 1's, etc
|
||||
int currentNodeRank = 0; // node rank currently being selected as aggregator
|
||||
int rankListIndex = 0;
|
||||
int currentAllNodeIndex = 0;
|
||||
|
||||
fd->hints->ranklist = (int *) ADIOI_Malloc (numAggs * sizeof(int));
|
||||
|
||||
while (rankListIndex < numAggs) {
|
||||
int foundEntry = 0;
|
||||
while (!foundEntry && (currentAllNodeIndex < localSize)) {
|
||||
if (allNodeRanks[currentAllNodeIndex] == currentNodeRank) {
|
||||
fd->hints->ranklist[rankListIndex++] = currentAllNodeIndex;
|
||||
foundEntry = 1;
|
||||
}
|
||||
currentAllNodeIndex++;
|
||||
}
|
||||
if (!foundEntry) {
|
||||
currentNodeRank++;
|
||||
currentAllNodeIndex = 0;
|
||||
}
|
||||
} // while
|
||||
break;
|
||||
} // switch(cntType)
|
||||
} // if (ioAggrCount)
|
||||
|
||||
else { // default is 1 aggregator per node
|
||||
// take the 0 entries from allNodeRanks
|
||||
int rankListIndex = 0;
|
||||
fd->hints->ranklist = (int *) ADIOI_Malloc (localSize * sizeof(int));
|
||||
for (i=0;i<localSize;i++) {
|
||||
if (allNodeRanks[i] == 0) {
|
||||
fd->hints->ranklist[rankListIndex++] = i;
|
||||
numAggs++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ADIOI_Free(allNodeRanks);
|
||||
|
||||
}
|
||||
|
||||
if ( getenv("MP_I_SHOW_AGGRS") ) {
|
||||
if (myRank == 0) {
|
||||
printf("Agg rank list of %d generated:\n", numAggs);
|
||||
for (i=0;i<numAggs;i++) {
|
||||
printf("%d ",fd->hints->ranklist[i]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
fd->hints->cb_nodes = numAggs;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
30
ompi/mca/io/romio314/romio/adio/ad_gpfs/pe/ad_pe_aggrs.h
Обычный файл
30
ompi/mca/io/romio314/romio/adio/ad_gpfs/pe/ad_pe_aggrs.h
Обычный файл
@ -0,0 +1,30 @@
|
||||
/* ---------------------------------------------------------------- */
|
||||
/* (C)Copyright IBM Corp. 2007, 2008 */
|
||||
/* ---------------------------------------------------------------- */
|
||||
/**
|
||||
* \file ad_pe_aggrs.h
|
||||
* \brief ???
|
||||
*/
|
||||
|
||||
/*
|
||||
*
|
||||
* Declares functions specific for the PE platform within the GPFS
|
||||
* parallel I/O solution. For now simply processes the MP_IOTASKLIST
|
||||
* env var.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef AD_PE_AGGRS_H_
|
||||
#define AD_PE_AGGRS_H_
|
||||
|
||||
#include "adio.h"
|
||||
#include <sys/stat.h>
|
||||
|
||||
#if !defined(GPFS_SUPER_MAGIC)
|
||||
#define GPFS_SUPER_MAGIC (0x47504653)
|
||||
#endif
|
||||
|
||||
/* generate a list of I/O aggregators following a methodology specific for PE */
|
||||
int ADIOI_PE_gen_agg_ranklist(ADIO_File fd);
|
||||
|
||||
#endif /* AD_PE_AGGRS_H_ */
|
27
ompi/mca/io/romio314/romio/adio/ad_gridftp/Makefile.mk
Обычный файл
27
ompi/mca/io/romio314/romio/adio/ad_gridftp/Makefile.mk
Обычный файл
@ -0,0 +1,27 @@
|
||||
## -*- Mode: Makefile; -*-
|
||||
## vim: set ft=automake :
|
||||
##
|
||||
## (C) 2011 by Argonne National Laboratory.
|
||||
## See COPYRIGHT in top-level directory.
|
||||
##
|
||||
|
||||
if BUILD_AD_GRIDFTP
|
||||
|
||||
noinst_HEADERS += adio/ad_gridftp/ad_gridftp.h
|
||||
|
||||
romio_other_sources += \
|
||||
adio/ad_gridftp/ad_gridftp_close.c \
|
||||
adio/ad_gridftp/ad_gridftp_open.c \
|
||||
adio/ad_gridftp/ad_gridftp_read.c \
|
||||
adio/ad_gridftp/ad_gridftp_write.c \
|
||||
adio/ad_gridftp/ad_gridftp_fcntl.c \
|
||||
adio/ad_gridftp/ad_gridftp_flush.c \
|
||||
adio/ad_gridftp/ad_gridftp_resize.c \
|
||||
adio/ad_gridftp/ad_gridftp_hints.c \
|
||||
adio/ad_gridftp/ad_gridftp_delete.c \
|
||||
adio/ad_gridftp/ad_gridftp.c \
|
||||
adio/ad_gridftp/globus_routines.c \
|
||||
adio/ad_gridftp/ad_gridftp_features.c
|
||||
|
||||
endif BUILD_AD_GRIDFTP
|
||||
|
37
ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp.c
Обычный файл
37
ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp.c
Обычный файл
@ -0,0 +1,37 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_gridftp.h"
|
||||
|
||||
/* adioi.h has the ADIOI_Fns_struct define */
|
||||
#include "adioi.h"
|
||||
|
||||
struct ADIOI_Fns_struct ADIO_GRIDFTP_operations = {
|
||||
ADIOI_GRIDFTP_Open, /* Open */
|
||||
ADIOI_GRIDFTP_ReadContig, /* ReadContig */
|
||||
ADIOI_GRIDFTP_WriteContig, /* WriteContig */
|
||||
ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
|
||||
ADIOI_GEN_WriteStridedColl, /* WriteStridedColl */
|
||||
ADIOI_GEN_SeekIndividual, /* SeekIndividual */
|
||||
ADIOI_GRIDFTP_Fcntl, /* Fcntl */
|
||||
ADIOI_GRIDFTP_SetInfo, /* SetInfo */
|
||||
ADIOI_GRIDFTP_ReadStrided, /* ReadStrided */
|
||||
ADIOI_GRIDFTP_WriteStrided, /* WriteStrided */
|
||||
ADIOI_GRIDFTP_Close, /* Close */
|
||||
ADIOI_FAKE_IreadContig, /* IreadContig */
|
||||
ADIOI_FAKE_IwriteContig, /* IwriteContig */
|
||||
ADIOI_FAKE_IODone, /* ReadDone */
|
||||
ADIOI_FAKE_IODone, /* WriteDone */
|
||||
ADIOI_FAKE_IOComplete, /* ReadComplete */
|
||||
ADIOI_FAKE_IOComplete, /* WriteComplete */
|
||||
ADIOI_FAKE_IreadStrided, /* IreadStrided */
|
||||
ADIOI_FAKE_IwriteStrided, /* IwriteStrided */
|
||||
ADIOI_GRIDFTP_Flush, /* Flush */
|
||||
ADIOI_GRIDFTP_Resize, /* Resize */
|
||||
ADIOI_GRIDFTP_Delete, /* Delete */
|
||||
ADIOI_GRIDFTP_Feature, /* Features */
|
||||
};
|
96
ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp.h
Обычный файл
96
ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp.h
Обычный файл
@ -0,0 +1,96 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
* Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#ifndef AD_GRIDFTP_INCLUDE
|
||||
#define AD_GRIDFTP_INCLUDE
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/uio.h>
|
||||
#include <fcntl.h>
|
||||
#include "adio.h"
|
||||
#include <globus_ftp_client.h>
|
||||
|
||||
/* These live in globus_routines.c */
|
||||
extern int num_gridftp_handles;
|
||||
#ifndef ADIO_GRIDFTP_HANDLES_MAX
|
||||
#define ADIO_GRIDFTP_HANDLES_MAX 200
|
||||
#endif /* ! ADIO_GRIDFTP_HANDLES_MAX */
|
||||
extern globus_ftp_client_handle_t gridftp_fh[ADIO_GRIDFTP_HANDLES_MAX];
|
||||
extern globus_ftp_client_operationattr_t oattr[ADIO_GRIDFTP_HANDLES_MAX];
|
||||
|
||||
|
||||
/* TODO: weed out the now-unused prototypes */
|
||||
void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code);
|
||||
void ADIOI_GRIDFTP_Close(ADIO_File fd, int *error_code);
|
||||
void ADIOI_GRIDFTP_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code);
|
||||
void ADIOI_GRIDFTP_WriteContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code);
|
||||
void ADIOI_GRIDFTP_IwriteContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Request *request, int
|
||||
*error_code);
|
||||
void ADIOI_GRIDFTP_IreadContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Request *request, int
|
||||
*error_code);
|
||||
int ADIOI_GRIDFTP_ReadDone(ADIO_Request *request, ADIO_Status *status, int
|
||||
*error_code);
|
||||
int ADIOI_GRIDFTP_WriteDone(ADIO_Request *request, ADIO_Status *status, int
|
||||
*error_code);
|
||||
void ADIOI_GRIDFTP_ReadComplete(ADIO_Request *request, ADIO_Status *status, int
|
||||
*error_code);
|
||||
void ADIOI_GRIDFTP_WriteComplete(ADIO_Request *request, ADIO_Status *status,
|
||||
int *error_code);
|
||||
void ADIOI_GRIDFTP_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct,
|
||||
int *error_code);
|
||||
void ADIOI_GRIDFTP_WriteStrided(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int *error_code);
|
||||
void ADIOI_GRIDFTP_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code);
|
||||
void ADIOI_GRIDFTP_WriteStridedColl(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code);
|
||||
void ADIOI_GRIDFTP_ReadStridedColl(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code);
|
||||
void ADIOI_GRIDFTP_IreadStrided(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Request *request, int
|
||||
*error_code);
|
||||
void ADIOI_GRIDFTP_IwriteStrided(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Request *request, int
|
||||
*error_code);
|
||||
void ADIOI_GRIDFTP_Flush(ADIO_File fd, int *error_code);
|
||||
void ADIOI_GRIDFTP_Resize(ADIO_File fd, ADIO_Offset size, int *error_code);
|
||||
void ADIOI_GRIDFTP_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code);
|
||||
void ADIOI_GRIDFTP_Get_shared_fp(ADIO_File fd, int size,
|
||||
ADIO_Offset *shared_fp,
|
||||
int *error_code);
|
||||
void ADIOI_GRIDFTP_Set_shared_fp(ADIO_File fd, ADIO_Offset offset,
|
||||
int *error_code);
|
||||
void ADIOI_GRIDFTP_Delete(char *filename, int *error_code);
|
||||
|
||||
void globus_err_handler(const char *routine, const char *caller,
|
||||
globus_result_t result);
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
50
ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_close.c
Обычный файл
50
ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_close.c
Обычный файл
@ -0,0 +1,50 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_gridftp.h"
|
||||
#include "adioi.h"
|
||||
|
||||
void ADIOI_GRIDFTP_Close(ADIO_File fd, int *error_code)
|
||||
{
|
||||
int err;
|
||||
static char myname[]="ADIOI_GRIDFTP_Close";
|
||||
|
||||
globus_result_t result;
|
||||
|
||||
MPI_Barrier(fd->comm);
|
||||
|
||||
/* Destroy the ftp handle and opattr */
|
||||
result = globus_ftp_client_operationattr_destroy(&(oattr[fd->fd_sys]));
|
||||
if (result != GLOBUS_SUCCESS )
|
||||
{
|
||||
globus_err_handler("globus_ftp_client_operationattr_destroy",
|
||||
myname,result);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s",globus_object_printable_to_string(globus_error_get(result)));
|
||||
return;
|
||||
}
|
||||
result=globus_ftp_client_handle_destroy(&(gridftp_fh[fd->fd_sys]));
|
||||
if (result != GLOBUS_SUCCESS )
|
||||
{
|
||||
globus_err_handler("globus_ftp_client_handle_destroy",
|
||||
myname,result);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", globus_object_printable_to_string(globus_error_get(result)));
|
||||
return;
|
||||
}
|
||||
|
||||
fd->fd_sys = -1;
|
||||
fd->fp_ind=0;
|
||||
fd->fp_sys_posn=0;
|
||||
num_gridftp_handles--;
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
}
|
95
ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_delete.c
Обычный файл
95
ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_delete.c
Обычный файл
@ -0,0 +1,95 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_gridftp.h"
|
||||
#include "adioi.h"
|
||||
|
||||
static globus_mutex_t lock;
|
||||
static globus_cond_t cond;
|
||||
static globus_bool_t delete_done, delete_success;
|
||||
static void delete_cb(void *myarg, globus_ftp_client_handle_t *handle, globus_object_t *error)
|
||||
{
|
||||
|
||||
if (error)
|
||||
{
|
||||
FPRINTF(stderr, "%s\n", globus_object_printable_to_string(error));
|
||||
}
|
||||
else
|
||||
{
|
||||
delete_success=GLOBUS_TRUE;
|
||||
}
|
||||
delete_done=GLOBUS_TRUE;
|
||||
}
|
||||
|
||||
void ADIOI_GRIDFTP_Delete(char *filename, int *error_code)
|
||||
{
|
||||
char myname[]="ADIOI_GRIDFTP_Delete";
|
||||
int myrank, nprocs;
|
||||
globus_ftp_client_handle_t handle;
|
||||
globus_result_t result;
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
|
||||
|
||||
globus_module_activate(GLOBUS_FTP_CLIENT_MODULE);
|
||||
result=globus_ftp_client_handle_init(&handle,GLOBUS_NULL);
|
||||
|
||||
if (result != GLOBUS_SUCCESS )
|
||||
{
|
||||
globus_err_handler("globus_ftp_client_handle_init",myname,result);
|
||||
*error_code= MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
MPI_ERR_IO,
|
||||
"**io", "**io %s",
|
||||
globus_object_printable_to_string(globus_error_get(result)));
|
||||
return;
|
||||
}
|
||||
|
||||
delete_done=GLOBUS_FALSE;
|
||||
delete_success=GLOBUS_FALSE;
|
||||
result=globus_ftp_client_delete(&handle,filename,GLOBUS_NULL,delete_cb,GLOBUS_NULL);
|
||||
if (result != GLOBUS_SUCCESS )
|
||||
{
|
||||
globus_err_handler("globus_ftp_client_delete",myname,result);
|
||||
*error_code= MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
MPI_ERR_IO,
|
||||
"**io", "**io %s",
|
||||
globus_object_printable_to_string(globus_error_get(result)));
|
||||
return;
|
||||
}
|
||||
globus_mutex_lock(&lock);
|
||||
while ( delete_done!=GLOBUS_TRUE )
|
||||
globus_cond_wait(&cond,&lock);
|
||||
globus_mutex_unlock(&lock);
|
||||
result=globus_ftp_client_handle_destroy(&handle);
|
||||
if (result != GLOBUS_SUCCESS )
|
||||
{
|
||||
globus_err_handler("globus_ftp_client_handle_destroy",myname,result);
|
||||
*error_code= MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
MPI_ERR_IO,
|
||||
"**io", "**io %s",
|
||||
globus_object_printable_to_string(globus_error_get(result)));
|
||||
return;
|
||||
}
|
||||
|
||||
if ( delete_success!=GLOBUS_TRUE )
|
||||
{
|
||||
*error_code= MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
MPI_ERR_IO,
|
||||
"**io", "**io %s",
|
||||
globus_object_printable_to_string(globus_error_get(result)));
|
||||
}
|
||||
}
|
91
ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_fcntl.c
Обычный файл
91
ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_fcntl.c
Обычный файл
@ -0,0 +1,91 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_gridftp.h"
|
||||
#include "adioi.h"
|
||||
#include "adio_extern.h"
|
||||
|
||||
globus_mutex_t fcntl_size_lock;
|
||||
globus_cond_t fcntl_size_cond;
|
||||
globus_bool_t fcntl_size_done;
|
||||
|
||||
void fcntl_size_cb(void *myargs, globus_ftp_client_handle_t *handle,
|
||||
globus_object_t *error)
|
||||
{
|
||||
if (error)
|
||||
{
|
||||
FPRINTF(stderr, "%s\n", globus_object_printable_to_string(error));
|
||||
}
|
||||
globus_mutex_lock(&fcntl_size_lock);
|
||||
fcntl_size_done=GLOBUS_TRUE;
|
||||
globus_cond_signal(&fcntl_size_cond);
|
||||
globus_mutex_unlock(&fcntl_size_lock);
|
||||
}
|
||||
|
||||
void ADIOI_GRIDFTP_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct,
|
||||
int *error_code)
|
||||
{
|
||||
MPI_Datatype copy_etype, copy_filetype;
|
||||
int combiner, i, j, k, filetype_is_contig, err;
|
||||
ADIOI_Flatlist_node *flat_file;
|
||||
char myname[]="ADIOI_GRIDFTP_Fcntl";
|
||||
|
||||
int myrank, nprocs;
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
|
||||
MPI_Comm_size(fd->comm, &nprocs);
|
||||
MPI_Comm_rank(fd->comm, &myrank);
|
||||
|
||||
switch(flag) {
|
||||
case ADIO_FCNTL_GET_FSIZE:
|
||||
{
|
||||
globus_result_t result;
|
||||
globus_off_t fsize=0;
|
||||
|
||||
globus_mutex_init(&fcntl_size_lock,GLOBUS_NULL);
|
||||
globus_cond_init(&fcntl_size_cond,GLOBUS_NULL);
|
||||
fcntl_size_done=GLOBUS_FALSE;
|
||||
if ( (result=globus_ftp_client_size(&(gridftp_fh[fd->fd_sys]),
|
||||
fd->filename,
|
||||
&(oattr[fd->fd_sys]),
|
||||
&(fsize),
|
||||
fcntl_size_cb,
|
||||
GLOBUS_NULL))!=GLOBUS_SUCCESS )
|
||||
{
|
||||
globus_err_handler("globus_ftp_client_size",myname,result);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io", "**io %s",
|
||||
globus_object_printable_to_string(globus_error_get(result)));
|
||||
return;
|
||||
}
|
||||
globus_mutex_lock(&fcntl_size_lock);
|
||||
while ( fcntl_size_done!=GLOBUS_TRUE )
|
||||
globus_cond_wait(&fcntl_size_lock,&fcntl_size_cond);
|
||||
globus_mutex_unlock(&fcntl_size_lock);
|
||||
globus_mutex_destroy(&fcntl_size_lock);
|
||||
globus_cond_destroy(&fcntl_size_cond);
|
||||
fcntl_struct->fsize=fsize;
|
||||
}
|
||||
*error_code = MPI_SUCCESS;
|
||||
break;
|
||||
|
||||
case ADIO_FCNTL_SET_DISKSPACE:
|
||||
ADIOI_GEN_Prealloc(fd, fcntl_struct->diskspace, error_code);
|
||||
break;
|
||||
|
||||
case ADIO_FCNTL_SET_ATOMICITY:
|
||||
default:
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
MPI_ERR_ARG,
|
||||
"**flag", "**flag %d", flag);
|
||||
}
|
||||
}
|
18
ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_features.c
Обычный файл
18
ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_features.c
Обычный файл
@ -0,0 +1,18 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* (C) 2008 by Argonne National Laboratory.
|
||||
* See COPYRIGHT in top-level directory.
|
||||
*/
|
||||
int ADIOI_GRIDFTP_Feature (ADIO_File fd, int flag)
|
||||
{
|
||||
switch(flag) {
|
||||
case ADIO_SCALABLE_OPEN:
|
||||
case ADIO_SHARED_FP:
|
||||
case ADIO_LOCKS:
|
||||
case ADIO_SEQUENTIAL:
|
||||
case ADIO_DATA_SIEVING_WRITES:
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
19
ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_flush.c
Обычный файл
19
ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_flush.c
Обычный файл
@ -0,0 +1,19 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_gridftp.h"
|
||||
#include "adioi.h"
|
||||
|
||||
/* GridFTP doesn't give you a way to cache writes on the client side, so
|
||||
is essentially a no-op */
|
||||
/* if there is a mechanism where we can ask the server to flush data to disk we
|
||||
* should do it here. I'll leave that up to Troy */
|
||||
|
||||
void ADIOI_GRIDFTP_Flush(ADIO_File fd, int *error_code)
|
||||
{
|
||||
return;
|
||||
}
|
68
ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_hints.c
Обычный файл
68
ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_hints.c
Обычный файл
@ -0,0 +1,68 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
/*
|
||||
|
||||
Valid hints for ftp:// and gsiftp:// URLs (aside from the std. ones):
|
||||
|
||||
ftp_control_mode extended|block|stream|compressed
|
||||
(default extended for gsiftp:// URLs and stream for ftp:// URLs)
|
||||
|
||||
parallelism integer number of simultaneous threads connecting to
|
||||
ftp server (default 1)
|
||||
|
||||
striped_ftp true|false or enable|disable; enables gsiftp striped data transfer
|
||||
|
||||
tcp_buffer integer size of tcp stream buffers in bytes
|
||||
|
||||
transfer_type ascii or binary (default binary)
|
||||
|
||||
These *must* be specified at open time currently.
|
||||
*/
|
||||
|
||||
#include "ad_gridftp.h"
|
||||
#include "adioi.h"
|
||||
|
||||
void ADIOI_GRIDFTP_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
{
|
||||
|
||||
if (!(fd->info))
|
||||
{
|
||||
if ( users_info==MPI_INFO_NULL )
|
||||
{
|
||||
/* This must be part of the open call. */
|
||||
MPI_Info_create(&(fd->info));
|
||||
}
|
||||
else
|
||||
{
|
||||
MPI_Info_dup(users_info,&(fd->info));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int i,nkeys,valuelen,flag;
|
||||
char key[MPI_MAX_INFO_KEY], value[MPI_MAX_INFO_VAL];
|
||||
|
||||
if ( users_info!=MPI_INFO_NULL )
|
||||
{
|
||||
MPI_Info_get_nkeys(users_info,&nkeys);
|
||||
for (i=0;i<nkeys;i++)
|
||||
{
|
||||
MPI_Info_get_nthkey(users_info,i,key);
|
||||
MPI_Info_get_valuelen(users_info,key,&valuelen,&flag);
|
||||
if (flag)
|
||||
{
|
||||
ADIOI_Info_get(users_info,key,valuelen,value,&flag);
|
||||
if (flag) ADIOI_Info_set(fd->info,key,value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* let the generic ROMIO and MPI-I/O stuff happen... */
|
||||
ADIOI_GEN_SetInfo(fd, users_info, error_code);
|
||||
}
|
343
ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_open.c
Обычный файл
343
ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_open.c
Обычный файл
@ -0,0 +1,343 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
* Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_gridftp.h"
|
||||
#include "adioi.h"
|
||||
|
||||
static globus_mutex_t lock;
|
||||
static globus_cond_t cond;
|
||||
|
||||
static globus_bool_t file_exists,exists_done;
|
||||
static void exists_cb(void *myargs, globus_ftp_client_handle_t *handle, globus_object_t *error)
|
||||
{
|
||||
if (error)
|
||||
{
|
||||
FPRINTF(stderr, "%s\n", globus_object_printable_to_string(error));
|
||||
}
|
||||
else
|
||||
{
|
||||
file_exists=GLOBUS_TRUE;
|
||||
}
|
||||
exists_done=GLOBUS_TRUE;
|
||||
}
|
||||
|
||||
static globus_bool_t touch_ctl_done;
|
||||
static void touch_ctl_cb(void *myargs, globus_ftp_client_handle_t *handle, globus_object_t *error)
|
||||
{
|
||||
if (error)
|
||||
{
|
||||
FPRINTF(stderr, "%s\n", globus_object_printable_to_string(error));
|
||||
}
|
||||
globus_mutex_lock(&lock);
|
||||
touch_ctl_done=GLOBUS_TRUE;
|
||||
globus_cond_signal(&cond);
|
||||
globus_mutex_unlock(&lock);
|
||||
}
|
||||
|
||||
static void touch_data_cb(void *myargs, globus_ftp_client_handle_t *handle, globus_object_t *error,
|
||||
globus_byte_t *buffer, globus_size_t length, globus_off_t offset,
|
||||
globus_bool_t eof)
|
||||
{
|
||||
if (error)
|
||||
FPRINTF(stderr, "%s\n", globus_object_printable_to_string(error));
|
||||
globus_ftp_client_register_read(handle,buffer,length,touch_data_cb,myargs);
|
||||
return;
|
||||
}
|
||||
|
||||
void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code)
|
||||
{
|
||||
static char myname[]="ADIOI_GRIDFTP_Open";
|
||||
int myrank, nprocs, keyfound;
|
||||
char hintval[MPI_MAX_INFO_VAL+1];
|
||||
globus_ftp_client_handleattr_t hattr;
|
||||
globus_result_t result;
|
||||
|
||||
MPI_Comm_size(fd->comm, &nprocs);
|
||||
MPI_Comm_rank(fd->comm, &myrank);
|
||||
|
||||
/* activate Globus ftp client module -- can be called multiple times, so
|
||||
it's safest to call once per file/connection */
|
||||
globus_module_activate(GLOBUS_FTP_CLIENT_MODULE);
|
||||
fd->fd_sys = num_gridftp_handles;
|
||||
/* No shared file pointers for now */
|
||||
fd->shared_fp_fname = NULL;
|
||||
*error_code = MPI_SUCCESS;
|
||||
|
||||
/* Access modes here mean something very different here than they
|
||||
would on a "real" filesystem... As a result, the amode and hint
|
||||
processing here is intermingled and a little weird because many
|
||||
of them have to do with the connection rather than the file itself.
|
||||
The thing that sucks about this is that read and write ops will
|
||||
have to check themselves if the file is being accessed rdonly, rdwr,
|
||||
or wronly.
|
||||
*/
|
||||
result=globus_ftp_client_handleattr_init(&hattr);
|
||||
if ( result != GLOBUS_SUCCESS )
|
||||
{
|
||||
|
||||
|
||||
globus_err_handler("globus_ftp_client_handleattr_init",
|
||||
myname,result);
|
||||
fd->fd_sys = -1;
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", globus_object_printable_to_string(globus_error_get(result)));
|
||||
return;
|
||||
}
|
||||
result = globus_ftp_client_operationattr_init(&(oattr[fd->fd_sys]));
|
||||
if ( result != GLOBUS_SUCCESS )
|
||||
{
|
||||
globus_err_handler("globus_ftp_client_operationattr_init",
|
||||
myname,result);
|
||||
fd->fd_sys = -1;
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", globus_object_printable_to_string(globus_error_get(result)));
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/* Always use connection caching unless told otherwise */
|
||||
result=globus_ftp_client_handleattr_set_cache_all(&hattr,GLOBUS_TRUE);
|
||||
if ( result !=GLOBUS_SUCCESS )
|
||||
globus_err_handler("globus_ftp_client_handleattr_set_cache_all",myname,result);
|
||||
|
||||
/* Assume that it's safe to cache a file if it's read-only */
|
||||
if ( (fd->access_mode&ADIO_RDONLY) &&
|
||||
(result=globus_ftp_client_handleattr_add_cached_url(&hattr,fd->filename))!=GLOBUS_SUCCESS )
|
||||
globus_err_handler("globus_ftp_client_handleattr_add_cached_url",myname,result);
|
||||
|
||||
/* Since we're (almost by definition) doing things that FTP S (stream)
|
||||
control mode can't handle, default to E (extended block) control mode
|
||||
for gsiftp:// URLs. ftp:// URLs use standard stream control mode
|
||||
by default. This behavior can be overridden by the ftp_control_mode
|
||||
hint. */
|
||||
|
||||
/*
|
||||
if ( !strncmp(fd->filename,"gsiftp:",7) &&
|
||||
(result=globus_ftp_client_operationattr_set_mode(&(oattr[fd->fd_sys]),GLOBUS_FTP_CONTROL_MODE_EXTENDED_BLOCK))!=GLOBUS_SUCCESS )
|
||||
globus_err_handler("globus_ftp_client_operationattr_set_mode",myname,result);
|
||||
else if ( !strncmp(fd->filename,"ftp:",4) &&
|
||||
(result=globus_ftp_client_operationattr_set_mode(&(oattr[fd->fd_sys]),GLOBUS_FTP_CONTROL_MODE_STREAM))!=GLOBUS_SUCCESS )
|
||||
globus_err_handler("globus_ftp_client_operationattr_set_mode",myname,result);
|
||||
*/
|
||||
|
||||
/* Set append mode if necessary */
|
||||
if ( (fd->access_mode&ADIO_APPEND) &&
|
||||
((result=globus_ftp_client_operationattr_set_append(&(oattr[fd->fd_sys]),GLOBUS_TRUE))!=GLOBUS_SUCCESS) )
|
||||
globus_err_handler("globus_ftp_client_operationattr_set_append",myname,result);
|
||||
|
||||
/* Other hint and amode processing that would affect hattr and/or
|
||||
oattr[] (eg. parallelism, striping, etc.) goes here */
|
||||
if ( fd->info!=MPI_INFO_NULL )
|
||||
{
|
||||
ADIOI_Info_get(fd->info,"ftp_control_mode",MPI_MAX_INFO_VAL,hintval,&keyfound);
|
||||
if ( keyfound )
|
||||
{
|
||||
if ( ( !strcmp(hintval,"extended") || !strcmp(hintval,"extended_block") ) &&
|
||||
(result=globus_ftp_client_operationattr_set_mode(&(oattr[fd->fd_sys]),GLOBUS_FTP_CONTROL_MODE_EXTENDED_BLOCK))!=GLOBUS_SUCCESS )
|
||||
globus_err_handler("globus_ftp_client_operationattr_set_mode",myname,result);
|
||||
else if ( !strcmp(hintval,"block") &&
|
||||
(result=globus_ftp_client_operationattr_set_mode(&(oattr[fd->fd_sys]),GLOBUS_FTP_CONTROL_MODE_BLOCK))!=GLOBUS_SUCCESS )
|
||||
globus_err_handler("globus_ftp_client_operationattr_set_mode",myname,result);
|
||||
else if ( !strcmp(hintval,"compressed") &&
|
||||
(result=globus_ftp_client_operationattr_set_mode(&(oattr[fd->fd_sys]),GLOBUS_FTP_CONTROL_MODE_COMPRESSED))!=GLOBUS_SUCCESS )
|
||||
globus_err_handler("globus_ftp_client_operationattr_set_mode",myname,result);
|
||||
else if ( !strcmp(hintval,"stream") &&
|
||||
(result=globus_ftp_client_operationattr_set_mode(&(oattr[fd->fd_sys]),GLOBUS_FTP_CONTROL_MODE_STREAM))!=GLOBUS_SUCCESS )
|
||||
globus_err_handler("globus_ftp_client_operationattr_set_mode",myname,result);
|
||||
}
|
||||
|
||||
ADIOI_Info_get(fd->info,"parallelism",MPI_MAX_INFO_VAL,hintval,&keyfound);
|
||||
if ( keyfound )
|
||||
{
|
||||
int nftpthreads;
|
||||
|
||||
if ( sscanf(hintval,"%d",&nftpthreads)==1 )
|
||||
{
|
||||
globus_ftp_control_parallelism_t parallelism;
|
||||
|
||||
parallelism.mode = GLOBUS_FTP_CONTROL_PARALLELISM_FIXED;
|
||||
parallelism.fixed.size = nftpthreads;
|
||||
if ( (result=globus_ftp_client_operationattr_set_parallelism(&(oattr[fd->fd_sys]),
|
||||
¶llelism))!=GLOBUS_SUCCESS )
|
||||
globus_err_handler("globus_ftp_client_operationattr_set_parallelism",myname,result);
|
||||
}
|
||||
}
|
||||
|
||||
ADIOI_Info_get(fd->info,"striped_ftp",MPI_MAX_INFO_VAL,hintval,&keyfound);
|
||||
if ( keyfound )
|
||||
{
|
||||
/* if set to "true" or "enable", set up round-robin block layout */
|
||||
if ( !strncmp("true",hintval,4) || !strncmp("TRUE",hintval,4) ||
|
||||
!strncmp("enable",hintval,4) || !strncmp("ENABLE",hintval,4) )
|
||||
{
|
||||
ADIOI_Info_get(fd->info,"striping_factor",MPI_MAX_INFO_VAL,hintval,&keyfound);
|
||||
if ( keyfound )
|
||||
{
|
||||
int striping_factor;
|
||||
|
||||
if ( sscanf(hintval,"%d",&striping_factor)==1 )
|
||||
{
|
||||
globus_ftp_control_layout_t layout;
|
||||
|
||||
layout.mode = GLOBUS_FTP_CONTROL_STRIPING_BLOCKED_ROUND_ROBIN;
|
||||
layout.round_robin.block_size = striping_factor;
|
||||
if ( (result=globus_ftp_client_operationattr_set_layout(&(oattr[fd->fd_sys]),
|
||||
&layout))!=GLOBUS_SUCCESS )
|
||||
globus_err_handler("globus_ftp_client_operationattr_set_layout",
|
||||
myname,result);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ADIOI_Info_get(fd->info,"tcp_buffer",MPI_MAX_INFO_VAL,hintval,&keyfound);
|
||||
if ( keyfound )
|
||||
{
|
||||
/* set tcp buffer size */
|
||||
int buffer_size;
|
||||
if ( sscanf(hintval,"%d",&buffer_size)==1 )
|
||||
{
|
||||
globus_ftp_control_tcpbuffer_t tcpbuf;
|
||||
|
||||
tcpbuf.mode = GLOBUS_FTP_CONTROL_TCPBUFFER_FIXED;
|
||||
tcpbuf.fixed.size = buffer_size;
|
||||
if ( (result=globus_ftp_client_operationattr_set_tcp_buffer(&(oattr[fd->fd_sys]),
|
||||
&tcpbuf))!=GLOBUS_SUCCESS )
|
||||
globus_err_handler("globus_ftp_client_operationattr_set_tcp_buffer",myname,result);
|
||||
}
|
||||
}
|
||||
|
||||
ADIOI_Info_get(fd->info,"transfer_type",MPI_MAX_INFO_VAL,hintval,&keyfound);
|
||||
if ( keyfound )
|
||||
{
|
||||
globus_ftp_control_type_t filetype;
|
||||
/* set transfer type (i.e. ASCII or binary) */
|
||||
if ( !strcmp("ascii",hintval) || !strcmp("ASCII",hintval) )
|
||||
{
|
||||
filetype=GLOBUS_FTP_CONTROL_TYPE_ASCII;
|
||||
}
|
||||
else
|
||||
{
|
||||
filetype=GLOBUS_FTP_CONTROL_TYPE_IMAGE;
|
||||
}
|
||||
if ( (result=globus_ftp_client_operationattr_set_type(&(oattr[fd->fd_sys]),filetype))!=GLOBUS_SUCCESS )
|
||||
globus_err_handler("globus_ftp_client_operationattr_set_type",myname,result);
|
||||
}
|
||||
}
|
||||
else
|
||||
FPRINTF(stderr,"no MPI_Info object associated with %s\n",fd->filename);
|
||||
|
||||
/* Create the ftp handle */
|
||||
result=globus_ftp_client_handle_init(&(gridftp_fh[fd->fd_sys]),&hattr);
|
||||
if ( result != GLOBUS_SUCCESS )
|
||||
{
|
||||
globus_err_handler("globus_ftp_client_handle_init",myname,result);
|
||||
fd->fd_sys = -1;
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", globus_object_printable_to_string(globus_error_get(result)));
|
||||
return;
|
||||
}
|
||||
|
||||
/* Check for existence of the file */
|
||||
globus_mutex_init(&lock, GLOBUS_NULL);
|
||||
globus_cond_init(&cond, GLOBUS_NULL);
|
||||
file_exists=GLOBUS_FALSE;
|
||||
exists_done=GLOBUS_FALSE;
|
||||
if ( myrank==0 )
|
||||
{
|
||||
if ( (result=globus_ftp_client_exists(&(gridftp_fh[fd->fd_sys]),
|
||||
fd->filename,
|
||||
&(oattr[fd->fd_sys]),
|
||||
exists_cb,
|
||||
GLOBUS_NULL))!=GLOBUS_SUCCESS )
|
||||
{
|
||||
globus_err_handler("globus_ftp_client_exists",myname,result);
|
||||
fd->fd_sys = -1;
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io", "**io %s",
|
||||
globus_object_printable_to_string(globus_error_get(result)));
|
||||
return;
|
||||
}
|
||||
/* wait till the callback completes */
|
||||
globus_mutex_lock(&lock);
|
||||
while ( exists_done!=GLOBUS_TRUE )
|
||||
globus_cond_wait(&cond,&lock);
|
||||
globus_mutex_unlock(&lock);
|
||||
}
|
||||
MPI_Barrier(fd->comm);
|
||||
MPI_Bcast(&file_exists,1,MPI_INT,0,fd->comm);
|
||||
|
||||
/* It turns out that this is handled by MPI_File_open() directly */
|
||||
if ( (file_exists!=GLOBUS_TRUE) && (fd->access_mode&ADIO_CREATE) &&
|
||||
!(fd->access_mode&ADIO_EXCL) && !(fd->access_mode&ADIO_RDONLY) )
|
||||
{
|
||||
if ( myrank==0 )
|
||||
{
|
||||
/* if the file doesn't exist, write a single NULL to it */
|
||||
globus_byte_t touchbuf=(globus_byte_t)'\0';
|
||||
touch_ctl_done=GLOBUS_FALSE;
|
||||
if ( (result=globus_ftp_client_put(&(gridftp_fh[fd->fd_sys]),
|
||||
fd->filename,
|
||||
&(oattr[fd->fd_sys]),
|
||||
GLOBUS_NULL,
|
||||
touch_ctl_cb,
|
||||
GLOBUS_NULL))!=GLOBUS_SUCCESS )
|
||||
{
|
||||
globus_err_handler("globus_ftp_client_put",myname,result);
|
||||
fd->fd_sys = -1;
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io", "**io %s",
|
||||
globus_object_printable_to_string(globus_error_get(result)));
|
||||
return;
|
||||
}
|
||||
result=globus_ftp_client_register_write(&(gridftp_fh[fd->fd_sys]),
|
||||
(globus_byte_t *)&touchbuf, 0,
|
||||
(globus_off_t)0, GLOBUS_TRUE,
|
||||
touch_data_cb, GLOBUS_NULL);
|
||||
|
||||
if ( result != GLOBUS_SUCCESS )
|
||||
{
|
||||
globus_err_handler("globus_ftp_client_register_write",myname,result);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io", "**io %s",
|
||||
globus_object_printable_to_string(globus_error_get(result)));
|
||||
return;
|
||||
}
|
||||
globus_mutex_lock(&lock);
|
||||
while ( touch_ctl_done!=GLOBUS_TRUE )
|
||||
globus_cond_wait(&cond,&lock);
|
||||
globus_mutex_unlock(&lock);
|
||||
}
|
||||
MPI_Barrier(fd->comm);
|
||||
}
|
||||
else if ( (fd->access_mode&ADIO_EXCL) && (file_exists==GLOBUS_TRUE) )
|
||||
{
|
||||
fd->fd_sys = -1;
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io", 0);
|
||||
return;
|
||||
}
|
||||
else if ( (fd->access_mode&ADIO_RDONLY) && (file_exists!=GLOBUS_TRUE) )
|
||||
{
|
||||
if ( myrank==0 )
|
||||
{
|
||||
FPRINTF(stderr,"WARNING: read-only file %s does not exist!\n",fd->filename);
|
||||
}
|
||||
}
|
||||
num_gridftp_handles++;
|
||||
}
|
468
ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_read.c
Обычный файл
468
ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_read.c
Обычный файл
@ -0,0 +1,468 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_gridftp.h"
|
||||
#include "adioi.h"
|
||||
#include "adio_extern.h"
|
||||
|
||||
static globus_mutex_t readcontig_ctl_lock;
|
||||
static globus_cond_t readcontig_ctl_cond;
|
||||
static globus_bool_t readcontig_ctl_done;
|
||||
static void readcontig_ctl_cb(void *myargs, globus_ftp_client_handle_t *handle, globus_object_t *error)
|
||||
{
|
||||
if (error)
|
||||
{
|
||||
FPRINTF(stderr, "%s\n", globus_object_printable_to_string(error));
|
||||
}
|
||||
globus_mutex_lock(&readcontig_ctl_lock);
|
||||
if ( readcontig_ctl_done!=GLOBUS_TRUE )
|
||||
readcontig_ctl_done=GLOBUS_TRUE;
|
||||
globus_cond_signal(&readcontig_ctl_cond);
|
||||
globus_mutex_unlock(&readcontig_ctl_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
static void readcontig_data_cb(void *myargs, globus_ftp_client_handle_t *handle, globus_object_t *error,
|
||||
globus_byte_t *buffer, globus_size_t length, globus_off_t offset,
|
||||
globus_bool_t eof)
|
||||
{
|
||||
globus_size_t *bytes_read;
|
||||
|
||||
bytes_read=(globus_size_t *)myargs;
|
||||
if (error)
|
||||
{
|
||||
FPRINTF(stderr, "%s\n", globus_object_printable_to_string(error));
|
||||
}
|
||||
*bytes_read+=length;
|
||||
/* I don't understand why the data callback has to keep recalling register_read,
|
||||
but everything I've done and all the examples I've seen seem to require
|
||||
that behavior to work... */
|
||||
/*
|
||||
* Using buffer+length seems to work, but is probably not the correct
|
||||
* solution. A big read of 256kB chunks will have lines like this:
|
||||
readcontig_data_cb: buffer 0x404e0008 length 0 offset 31719424 eof 1
|
||||
readcontig_data_cb: buffer 0x404a0008 length 65536 offset 31981568 eof 0
|
||||
readcontig_data_cb: buffer 0x404b0008 length 65536 offset 32047104 eof 0
|
||||
readcontig_data_cb: buffer 0x404c0008 length 65536 offset 32112640 eof 0
|
||||
readcontig_data_cb: buffer 0x404d0008 length 65536 offset 32178176 eof 0
|
||||
*/
|
||||
if ( !eof )
|
||||
globus_ftp_client_register_read(handle,
|
||||
buffer+length,
|
||||
length,
|
||||
readcontig_data_cb,
|
||||
(void *)(bytes_read));
|
||||
return;
|
||||
}
|
||||
|
||||
static globus_mutex_t readdiscontig_ctl_lock;
|
||||
static globus_cond_t readdiscontig_ctl_cond;
|
||||
static globus_bool_t readdiscontig_ctl_done;
|
||||
static void readdiscontig_ctl_cb(void *myargs, globus_ftp_client_handle_t *handle, globus_object_t *error)
|
||||
{
|
||||
if (error)
|
||||
{
|
||||
FPRINTF(stderr, "%s\n", globus_object_printable_to_string(error));
|
||||
}
|
||||
globus_mutex_lock(&readdiscontig_ctl_lock);
|
||||
if ( readdiscontig_ctl_done!=GLOBUS_TRUE )
|
||||
readdiscontig_ctl_done=GLOBUS_TRUE;
|
||||
globus_cond_signal(&readdiscontig_ctl_cond);
|
||||
globus_mutex_unlock(&readdiscontig_ctl_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
static void readdiscontig_data_cb(void *myargs, globus_ftp_client_handle_t *handle, globus_object_t *error,
|
||||
globus_byte_t *buffer, globus_size_t length, globus_off_t offset,
|
||||
globus_bool_t eof)
|
||||
{
|
||||
globus_size_t *bytes_read;
|
||||
|
||||
bytes_read=(globus_size_t *)myargs;
|
||||
if (error)
|
||||
{
|
||||
FPRINTF(stderr, "%s\n", globus_object_printable_to_string(error));
|
||||
}
|
||||
*bytes_read+=length;
|
||||
/* I don't understand why the data callback has to keep recalling register_read,
|
||||
but everything I've done and all the examples I've seen seem to require
|
||||
that behavior to work... */
|
||||
if ( !eof )
|
||||
globus_ftp_client_register_read(handle,
|
||||
buffer,
|
||||
length,
|
||||
readdiscontig_data_cb,
|
||||
(void *)(bytes_read));
|
||||
return;
|
||||
}
|
||||
|
||||
void ADIOI_GRIDFTP_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code)
|
||||
{
|
||||
static char myname[]="ADIOI_GRIDFTP_ReadContig";
|
||||
int myrank, nprocs;
|
||||
MPI_Count datatype_size;
|
||||
globus_size_t len,bytes_read=0;
|
||||
globus_off_t goff;
|
||||
globus_result_t result;
|
||||
|
||||
if ( fd->access_mode&ADIO_WRONLY )
|
||||
{
|
||||
*error_code=MPIR_ERR_MODE_WRONLY;
|
||||
return;
|
||||
}
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
|
||||
MPI_Comm_size(fd->comm, &nprocs);
|
||||
MPI_Comm_rank(fd->comm, &myrank);
|
||||
MPI_Type_size_x(datatype, &datatype_size);
|
||||
|
||||
if (file_ptr_type != ADIO_EXPLICIT_OFFSET)
|
||||
{
|
||||
offset = fd->fp_ind;
|
||||
}
|
||||
|
||||
/* Do the gridftp I/O transfer */
|
||||
goff = (globus_off_t)offset;
|
||||
len = ((globus_size_t)datatype_size)*((globus_size_t)count);
|
||||
|
||||
globus_mutex_init(&readcontig_ctl_lock, GLOBUS_NULL);
|
||||
globus_cond_init(&readcontig_ctl_cond, GLOBUS_NULL);
|
||||
readcontig_ctl_done=GLOBUS_FALSE;
|
||||
if ( (result=globus_ftp_client_partial_get(&(gridftp_fh[fd->fd_sys]),
|
||||
fd->filename,
|
||||
&(oattr[fd->fd_sys]),
|
||||
GLOBUS_NULL,
|
||||
goff,
|
||||
goff+(globus_off_t)len,
|
||||
readcontig_ctl_cb,
|
||||
GLOBUS_NULL))!=GLOBUS_SUCCESS )
|
||||
{
|
||||
globus_err_handler("globus_ftp_client_partial_get",myname,result);
|
||||
*error_code=MPI_ERR_IO;
|
||||
ADIOI_Error(fd,*error_code,myname);
|
||||
return;
|
||||
}
|
||||
result=globus_ftp_client_register_read(&(gridftp_fh[fd->fd_sys]),
|
||||
(globus_byte_t *)buf, len, readcontig_data_cb,
|
||||
(void *)(&bytes_read));
|
||||
if ( result != GLOBUS_SUCCESS )
|
||||
{
|
||||
globus_err_handler("globus_ftp_client_register_read",myname,result);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE, myname, __LINE__,
|
||||
MPI_ERR_IO, "**io", "**io %s",
|
||||
globus_object_printable_to_string(globus_error_get(result)));
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/* The ctl callback won't start till the data callbacks complete, so it's
|
||||
safe to wait on just the ctl callback */
|
||||
globus_mutex_lock(&readcontig_ctl_lock);
|
||||
while ( readcontig_ctl_done!=GLOBUS_TRUE )
|
||||
globus_cond_wait(&readcontig_ctl_cond,&readcontig_ctl_lock);
|
||||
globus_mutex_unlock(&readcontig_ctl_lock);
|
||||
|
||||
globus_mutex_destroy(&readcontig_ctl_lock);
|
||||
globus_cond_destroy(&readcontig_ctl_cond);
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, bytes_read);
|
||||
#endif
|
||||
if (file_ptr_type != ADIO_EXPLICIT_OFFSET)
|
||||
{
|
||||
fd->fp_ind += bytes_read;
|
||||
fd->fp_sys_posn = fd->fp_ind;
|
||||
}
|
||||
else {
|
||||
fd->fp_sys_posn = offset + bytes_read;
|
||||
}
|
||||
}
|
||||
|
||||
void ADIOI_GRIDFTP_ReadDiscontig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code)
|
||||
{
|
||||
char myname[]="ADIOI_GRIDFTP_ReadDiscontig";
|
||||
int myrank,nprocs;
|
||||
/* size and extent of buffer in memory */
|
||||
MPI_Aint btype_size,btype_extent;
|
||||
/* size and extent of file record layout */
|
||||
MPI_Aint ftype_size,ftype_extent;
|
||||
/* size of file elemental type; seeks are done in units of this */
|
||||
MPI_Aint etype_size;
|
||||
MPI_Aint extent;
|
||||
ADIOI_Flatlist_node *flat_file;
|
||||
int i,buf_contig,boff,nblks;
|
||||
globus_off_t start,end,goff;
|
||||
globus_size_t bytes_read;
|
||||
globus_result_t result;
|
||||
globus_byte_t *tmp;
|
||||
|
||||
if ( fd->access_mode&ADIO_WRONLY )
|
||||
{
|
||||
*error_code=MPIR_ERR_MODE_WRONLY;
|
||||
return;
|
||||
}
|
||||
|
||||
*error_code=MPI_SUCCESS;
|
||||
|
||||
MPI_Comm_rank(fd->comm,&myrank);
|
||||
MPI_Comm_size(fd->comm,&nprocs);
|
||||
|
||||
etype_size=fd->etype_size;
|
||||
MPI_Type_size_x(fd->filetype,&ftype_size);
|
||||
MPI_Type_extent(fd->filetype,&ftype_extent);
|
||||
/* This is arguably unnecessary, as this routine assumes that the
|
||||
buffer in memory is contiguous */
|
||||
MPI_Type_size_x(datatype,&btype_size);
|
||||
MPI_Type_extent(datatype,&btype_extent);
|
||||
ADIOI_Datatype_iscontig(datatype,&buf_contig);
|
||||
|
||||
if ( ( btype_extent!=btype_size ) || ( ! buf_contig ) )
|
||||
{
|
||||
FPRINTF(stderr,"[%d/%d] %s called with discontigous memory buffer\n",
|
||||
myrank,nprocs,myname);
|
||||
fflush(stderr);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE, myname, __LINE__,
|
||||
MPI_ERR_IO, "**io", 0 );
|
||||
return;
|
||||
}
|
||||
/* from here we can assume btype_extent==btype_size */
|
||||
|
||||
/* Flatten out fd->filetype so we know which blocks to skip */
|
||||
ADIOI_Flatten_datatype(fd->filetype);
|
||||
flat_file = ADIOI_Flatlist;
|
||||
while (flat_file->type != fd->filetype && flat_file->next!=NULL)
|
||||
flat_file = flat_file->next;
|
||||
|
||||
/* Figure out how big the area to read is */
|
||||
start=(globus_off_t)(offset*etype_size);
|
||||
goff=start;
|
||||
boff=0;
|
||||
extent=0;
|
||||
nblks=0;
|
||||
while ( boff < (count*btype_size) )
|
||||
{
|
||||
int blklen=0;
|
||||
|
||||
for (i=0;i<flat_file->count;i++)
|
||||
{
|
||||
/* find the length of the next block */
|
||||
if ( (boff+flat_file->blocklens[i]) < (count*btype_size) )
|
||||
blklen=flat_file->blocklens[i];
|
||||
else
|
||||
blklen=(count*btype_size)-boff;
|
||||
/* increment buffer size to be used */
|
||||
boff+=blklen;
|
||||
/* compute extent -- the nblks*ftype_extent bit is
|
||||
there so we remember how many ftypes we've already
|
||||
been through */
|
||||
extent=MAX(extent,nblks*ftype_extent+flat_file->indices[i]+blklen);
|
||||
if ( boff>=(count*btype_size) )
|
||||
break;
|
||||
}
|
||||
nblks++;
|
||||
}
|
||||
if ( extent < count*btype_size )
|
||||
{
|
||||
FPRINTF(stderr,"[%d/%d] %s error in computing extent -- extent %d is smaller than total bytes requested %d!\n",
|
||||
myrank,nprocs,myname,extent,count*btype_size);
|
||||
fflush(stderr);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE, myname, __LINE__,
|
||||
MPI_ERR_IO, "**io", 0);
|
||||
return;
|
||||
}
|
||||
end=start+(globus_off_t)extent;
|
||||
tmp=(globus_byte_t *)ADIOI_Malloc((size_t)extent*sizeof(globus_byte_t));
|
||||
|
||||
/* start up the globus partial read */
|
||||
globus_mutex_init(&readdiscontig_ctl_lock, GLOBUS_NULL);
|
||||
globus_cond_init(&readdiscontig_ctl_cond, GLOBUS_NULL);
|
||||
readdiscontig_ctl_done=GLOBUS_FALSE;
|
||||
if ( (result=globus_ftp_client_partial_get(&(gridftp_fh[fd->fd_sys]),
|
||||
fd->filename,
|
||||
&(oattr[fd->fd_sys]),
|
||||
GLOBUS_NULL,
|
||||
start,
|
||||
end,
|
||||
readdiscontig_ctl_cb,
|
||||
GLOBUS_NULL))!=GLOBUS_SUCCESS )
|
||||
{
|
||||
globus_err_handler("globus_ftp_client_partial_get",myname,result);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE, myname, __LINE__,
|
||||
MPI_ERR_IO, "**io", "**io %s",
|
||||
globus_object_printable_to_string(globus_error_get(result)));
|
||||
return;
|
||||
}
|
||||
|
||||
/* Do all the actual I/Os */
|
||||
/* Since globus_ftp_client_register_read() is brain-dead and doesn't
|
||||
let you specify an offset, we have to slurp the entire extent into
|
||||
memory and then parse out the pieces we want... Sucks, doesn't it?
|
||||
|
||||
This should probably be done in chunks (preferably of a size
|
||||
set using a file hint), but that'll have to come later.
|
||||
--TB */
|
||||
if ( (result=globus_ftp_client_register_read(&(gridftp_fh[fd->fd_sys]),
|
||||
tmp,
|
||||
(globus_size_t)extent,
|
||||
readdiscontig_data_cb,
|
||||
(void *)(&bytes_read)))!=GLOBUS_SUCCESS )
|
||||
{
|
||||
globus_err_handler("globus_ftp_client_register_read",myname,result);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", globus_object_printable_to_string(globus_error_get(result)));
|
||||
return;
|
||||
}
|
||||
/* The ctl callback won't start till the data callbacks complete, so it's
|
||||
safe to wait on just the ctl callback */
|
||||
globus_mutex_lock(&readdiscontig_ctl_lock);
|
||||
while ( readdiscontig_ctl_done!=GLOBUS_TRUE )
|
||||
globus_cond_wait(&readdiscontig_ctl_cond,&readdiscontig_ctl_lock);
|
||||
globus_mutex_unlock(&readdiscontig_ctl_lock);
|
||||
|
||||
globus_mutex_destroy(&readdiscontig_ctl_lock);
|
||||
globus_cond_destroy(&readdiscontig_ctl_cond);
|
||||
|
||||
boff=0;
|
||||
nblks=0;
|
||||
goff=0;
|
||||
while ( boff < (count*btype_size) )
|
||||
{
|
||||
int i,blklen;
|
||||
|
||||
for (i=0;i<flat_file->count;i++)
|
||||
{
|
||||
if ( (boff+flat_file->blocklens[i]) < (count*btype_size) )
|
||||
blklen=flat_file->blocklens[i];
|
||||
else
|
||||
blklen=(count*btype_size)-boff;
|
||||
if ( blklen > 0 )
|
||||
{
|
||||
goff=nblks*ftype_extent+flat_file->indices[i];
|
||||
memcpy((globus_byte_t *)buf+boff,tmp+goff,(size_t)blklen);
|
||||
boff+=blklen;
|
||||
if ( boff>=(count*btype_size) )
|
||||
break;
|
||||
}
|
||||
}
|
||||
nblks++;
|
||||
}
|
||||
ADIOI_Free(tmp);
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, bytes_read);
|
||||
#endif
|
||||
if (file_ptr_type != ADIO_EXPLICIT_OFFSET)
|
||||
{
|
||||
fd->fp_ind += extent;
|
||||
fd->fp_sys_posn = fd->fp_ind;
|
||||
}
|
||||
else {
|
||||
fd->fp_sys_posn = offset + extent;
|
||||
}
|
||||
}
|
||||
|
||||
void ADIOI_GRIDFTP_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code)
|
||||
{
|
||||
/*
|
||||
int myrank, nprocs;
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
|
||||
MPI_Comm_size(fd->comm, &nprocs);
|
||||
MPI_Comm_rank(fd->comm, &myrank);
|
||||
#ifdef PRINT_ERR_MSG
|
||||
FPRINTF(stdout, "[%d/%d] ADIOI_GRIDFTP_ReadStrided called on %s\n", myrank,
|
||||
nprocs, fd->filename);
|
||||
FPRINTF(stdout, "[%d/%d] calling ADIOI_GEN_ReadStrided\n", myrank,
|
||||
nprocs);
|
||||
#endif
|
||||
|
||||
ADIOI_GEN_ReadStrided(fd, buf, count, datatype, file_ptr_type, offset,
|
||||
status, error_code);
|
||||
|
||||
*/
|
||||
|
||||
char myname[]="ADIOI_GRIDFTP_ReadStrided";
|
||||
int myrank, nprocs;
|
||||
int i,j;
|
||||
int buf_contig,file_contig;
|
||||
MPI_Aint btype_size,bufsize;
|
||||
globus_off_t start,disp;
|
||||
globus_size_t bytes_read;
|
||||
globus_byte_t *intermediate;
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
|
||||
MPI_Comm_size(fd->comm, &nprocs);
|
||||
MPI_Comm_rank(fd->comm, &myrank);
|
||||
|
||||
MPI_Type_size_x(datatype,&btype_size);
|
||||
bufsize=count*btype_size;
|
||||
ADIOI_Datatype_iscontig(fd->filetype,&file_contig);
|
||||
ADIOI_Datatype_iscontig(datatype,&buf_contig);
|
||||
if ( buf_contig && !file_contig )
|
||||
{
|
||||
/* Contiguous in memory, discontig in file */
|
||||
ADIOI_GRIDFTP_ReadDiscontig(fd, buf, count, datatype,
|
||||
file_ptr_type, offset, status, error_code);
|
||||
}
|
||||
else if ( !buf_contig && file_contig )
|
||||
{
|
||||
/* Discontiguous in mem, contig in file -- comparatively easy */
|
||||
int posn=0;
|
||||
|
||||
/* read contiguous data into intermediate buffer */
|
||||
intermediate=(globus_byte_t *)ADIOI_Malloc((size_t)bufsize);
|
||||
ADIOI_GRIDFTP_ReadContig(fd, intermediate, bufsize, MPI_BYTE,
|
||||
file_ptr_type, offset, status, error_code);
|
||||
|
||||
/* explode contents of intermediate buffer into main buffer */
|
||||
MPI_Unpack(intermediate,bufsize,&posn,buf,count,datatype,fd->comm);
|
||||
|
||||
ADIOI_Free(intermediate);
|
||||
}
|
||||
else if ( !buf_contig && !file_contig )
|
||||
{
|
||||
/* Discontig in both mem and file -- the hardest case */
|
||||
int posn=0;
|
||||
|
||||
/* Read discontiguous data into intermediate buffer */
|
||||
intermediate=(globus_byte_t *)ADIOI_Malloc((size_t)bufsize);
|
||||
ADIOI_GRIDFTP_ReadDiscontig(fd, intermediate, bufsize, MPI_BYTE,
|
||||
file_ptr_type, offset, status, error_code);
|
||||
|
||||
/* explode contents of intermediate buffer into main buffer */
|
||||
posn=0;
|
||||
MPI_Unpack(intermediate,bufsize,&posn,buf,count,datatype,fd->comm);
|
||||
|
||||
ADIOI_Free(intermediate);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Why did you bother calling ReadStrided?!?!?! */
|
||||
ADIOI_GRIDFTP_ReadContig(fd, buf, count, datatype,
|
||||
file_ptr_type, offset, status, error_code);
|
||||
}
|
||||
|
||||
}
|
||||
|
241
ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_resize.c
Обычный файл
241
ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_resize.c
Обычный файл
@ -0,0 +1,241 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_gridftp.h"
|
||||
#include "adioi.h"
|
||||
|
||||
static globus_mutex_t resize_lock;
|
||||
static globus_cond_t resize_cond;
|
||||
static globus_bool_t resize_done;
|
||||
static globus_bool_t resize_success;
|
||||
|
||||
void resize_cb(void *myargs, globus_ftp_client_handle_t *handle,
|
||||
globus_object_t *error)
|
||||
{
|
||||
if (error)
|
||||
{
|
||||
FPRINTF(stderr, "%s\n", globus_object_printable_to_string(error));
|
||||
globus_mutex_lock(&resize_lock);
|
||||
resize_success=GLOBUS_FALSE;
|
||||
globus_mutex_unlock(&resize_lock);
|
||||
}
|
||||
else
|
||||
{
|
||||
globus_mutex_lock(&resize_lock);
|
||||
resize_success=GLOBUS_TRUE;
|
||||
globus_mutex_unlock(&resize_lock);
|
||||
}
|
||||
globus_mutex_lock(&resize_lock);
|
||||
resize_done=GLOBUS_TRUE;
|
||||
globus_cond_signal(&resize_cond);
|
||||
globus_mutex_unlock(&resize_lock);
|
||||
}
|
||||
|
||||
|
||||
static void resize_wrdata_cb(void *myargs, globus_ftp_client_handle_t *handle, globus_object_t *error,
|
||||
globus_byte_t *buffer, globus_size_t length, globus_off_t offset,
|
||||
globus_bool_t eof)
|
||||
{
|
||||
if (error)
|
||||
FPRINTF(stderr, "%s\n", globus_object_printable_to_string(error));
|
||||
if (!eof)
|
||||
globus_ftp_client_register_read(handle,
|
||||
buffer,
|
||||
length,
|
||||
resize_wrdata_cb,
|
||||
myargs);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void ADIOI_GRIDFTP_Resize(ADIO_File fd, ADIO_Offset size, int *error_code)
|
||||
{
|
||||
int myrank, nprocs;
|
||||
char myname[]="ADIOI_GRIDFTP_Resize";
|
||||
globus_off_t fsize;
|
||||
globus_result_t result;
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
|
||||
MPI_Comm_size(fd->comm, &nprocs);
|
||||
MPI_Comm_rank(fd->comm, &myrank);
|
||||
|
||||
/* Sanity check */
|
||||
if ( fd->access_mode&ADIO_RDONLY )
|
||||
{
|
||||
FPRINTF(stderr,"%s: attempt to resize read-only file %s!\n",
|
||||
myname,fd->filename);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io", 0);
|
||||
return;
|
||||
}
|
||||
|
||||
/* This routine is supposed to do the moral equivalent of truncate(),
|
||||
but there's not an equivalent operation in the globus_ftp_client API. */
|
||||
globus_mutex_init(&resize_lock,GLOBUS_NULL);
|
||||
globus_cond_init(&resize_cond,GLOBUS_NULL);
|
||||
resize_done=GLOBUS_FALSE;
|
||||
if ( (result=globus_ftp_client_size(&(gridftp_fh[fd->fd_sys]),
|
||||
fd->filename,
|
||||
&(oattr[fd->fd_sys]),
|
||||
&(fsize),
|
||||
resize_cb,
|
||||
GLOBUS_NULL))!=GLOBUS_SUCCESS )
|
||||
{
|
||||
globus_err_handler("globus_ftp_client_size",myname,result);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", globus_object_printable_to_string(globus_error_get(result)));
|
||||
return;
|
||||
}
|
||||
globus_mutex_lock(&resize_lock);
|
||||
while ( resize_done!=GLOBUS_TRUE )
|
||||
globus_cond_wait(&resize_lock,&resize_cond);
|
||||
if ( fsize < (globus_off_t)size )
|
||||
{
|
||||
/* The file is smaller than the requested size, so
|
||||
do a zero-byte write to where the new EOF should be. */
|
||||
globus_byte_t touchbuf=(globus_byte_t)'\0';
|
||||
resize_done=GLOBUS_FALSE;
|
||||
if ( (result=globus_ftp_client_partial_put(&(gridftp_fh[fd->fd_sys]),
|
||||
fd->filename,
|
||||
&(oattr[fd->fd_sys]),
|
||||
GLOBUS_NULL,
|
||||
(globus_off_t)size,
|
||||
(globus_off_t)size,
|
||||
resize_cb,
|
||||
GLOBUS_NULL))!=GLOBUS_SUCCESS )
|
||||
{
|
||||
globus_err_handler("globus_ftp_client_partial_put",myname,result);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE, myname, __LINE__,
|
||||
MPI_ERR_IO, "**io", "**io %s",
|
||||
globus_object_printable_to_string(globus_error_get(result)));
|
||||
return;
|
||||
}
|
||||
|
||||
if ( (result=globus_ftp_client_register_write(&(gridftp_fh[fd->fd_sys]),
|
||||
(globus_byte_t *)&touchbuf,
|
||||
0,
|
||||
(globus_off_t)0,
|
||||
GLOBUS_TRUE,
|
||||
resize_wrdata_cb,
|
||||
GLOBUS_NULL))!=GLOBUS_SUCCESS )
|
||||
{
|
||||
globus_err_handler("globus_ftp_client_register_write",myname,result);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE, myname, __LINE__,
|
||||
MPI_ERR_IO, "**io", "**io %s",
|
||||
globus_object_printable_to_string(globus_error_get(result)));
|
||||
return;
|
||||
}
|
||||
globus_mutex_lock(&resize_lock);
|
||||
while ( resize_done!=GLOBUS_TRUE )
|
||||
globus_cond_wait(&resize_cond,&resize_lock);
|
||||
globus_mutex_unlock(&resize_lock);
|
||||
}
|
||||
else if ( fsize > (globus_off_t)size )
|
||||
{
|
||||
/* The file is bigger than the requested size, so
|
||||
we'll abuse globus_ftp_client_third_party_partial_put()
|
||||
into truncating it for us. */
|
||||
char *urlold;
|
||||
size_t urllen;
|
||||
|
||||
urllen=strlen(fd->filename);
|
||||
urlold=(char *)ADIOI_Malloc(urllen+5);
|
||||
ADIOI_Snprintf(urlold,urllen+5,"%s.old",fd->filename);
|
||||
resize_done=GLOBUS_FALSE;
|
||||
resize_success=GLOBUS_FALSE;
|
||||
if ( (result=globus_ftp_client_move(&(gridftp_fh[fd->fd_sys]),
|
||||
fd->filename,
|
||||
urlold,
|
||||
&(oattr[fd->fd_sys]),
|
||||
resize_cb,
|
||||
GLOBUS_NULL))!=GLOBUS_SUCCESS )
|
||||
{
|
||||
globus_err_handler("globus_ftp_client_move",myname,result);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE, myname, __LINE__,
|
||||
MPI_ERR_IO, "**io", "**io %s",
|
||||
globus_object_printable_to_string(globus_error_get(result)));
|
||||
return;
|
||||
}
|
||||
globus_mutex_lock(&resize_lock);
|
||||
while ( resize_done!=GLOBUS_TRUE )
|
||||
globus_cond_wait(&resize_cond,&resize_lock);
|
||||
globus_mutex_unlock(&resize_lock);
|
||||
if ( resize_success!=GLOBUS_TRUE )
|
||||
{
|
||||
*error_code = MPI_ERR_IO;
|
||||
return;
|
||||
}
|
||||
resize_done=GLOBUS_FALSE;
|
||||
if ( (result=globus_ftp_client_partial_third_party_transfer(&(gridftp_fh[fd->fd_sys]),
|
||||
urlold,
|
||||
&(oattr[fd->fd_sys]),
|
||||
fd->filename,
|
||||
&(oattr[fd->fd_sys]),
|
||||
GLOBUS_NULL,
|
||||
0,
|
||||
(globus_off_t)size,
|
||||
resize_cb,
|
||||
GLOBUS_NULL))!=GLOBUS_SUCCESS )
|
||||
{
|
||||
globus_err_handler("globus_ftp_client_partial_third_party_transfer",myname,result);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE, myname, __LINE__,
|
||||
MPI_ERR_IO, "**io", "**io %s",
|
||||
globus_object_printable_to_string(globus_error_get(result)));
|
||||
return;
|
||||
}
|
||||
globus_mutex_lock(&resize_lock);
|
||||
while ( resize_done!=GLOBUS_TRUE )
|
||||
globus_cond_wait(&resize_cond,&resize_lock);
|
||||
globus_mutex_unlock(&resize_lock);
|
||||
if ( resize_success!=GLOBUS_TRUE )
|
||||
{
|
||||
*error_code = MPI_ERR_IO;
|
||||
ADIOI_Error(fd,*error_code,myname);
|
||||
return;
|
||||
}
|
||||
resize_done=GLOBUS_FALSE;
|
||||
if ( (result=globus_ftp_client_delete(&(gridftp_fh[fd->fd_sys]),
|
||||
urlold,
|
||||
&(oattr[fd->fd_sys]),
|
||||
resize_cb,
|
||||
GLOBUS_NULL))!=GLOBUS_SUCCESS )
|
||||
{
|
||||
globus_err_handler("globus_ftp_client_delete",myname,result);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE, myname, __LINE__,
|
||||
MPI_ERR_IO, "**io", "**io %s",
|
||||
globus_object_printable_to_string(globus_error_get(result)));
|
||||
return;
|
||||
}
|
||||
globus_mutex_lock(&resize_lock);
|
||||
while ( resize_done!=GLOBUS_TRUE )
|
||||
globus_cond_wait(&resize_cond,&resize_lock);
|
||||
globus_mutex_unlock(&resize_lock);
|
||||
if ( resize_success!=GLOBUS_TRUE )
|
||||
{
|
||||
*error_code = MPI_ERR_IO;
|
||||
ADIOI_Error(fd,*error_code,myname);
|
||||
return;
|
||||
}
|
||||
ADIOI_Free(urlold);
|
||||
}
|
||||
globus_mutex_destroy(&resize_lock);
|
||||
globus_cond_destroy(&resize_cond);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
473
ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_write.c
Обычный файл
473
ompi/mca/io/romio314/romio/adio/ad_gridftp/ad_gridftp_write.c
Обычный файл
@ -0,0 +1,473 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_gridftp.h"
|
||||
#include "adioi.h"
|
||||
#include "adio_extern.h"
|
||||
|
||||
static globus_mutex_t writecontig_ctl_lock;
|
||||
static globus_cond_t writecontig_ctl_cond;
|
||||
static globus_bool_t writecontig_ctl_done;
|
||||
static void writecontig_ctl_cb(void *myargs, globus_ftp_client_handle_t *handle, globus_object_t *error)
|
||||
{
|
||||
if (error)
|
||||
{
|
||||
FPRINTF(stderr, "%s\n", globus_object_printable_to_string(error));
|
||||
}
|
||||
globus_mutex_lock(&writecontig_ctl_lock);
|
||||
if ( writecontig_ctl_done!=GLOBUS_TRUE )
|
||||
writecontig_ctl_done=GLOBUS_TRUE;
|
||||
globus_cond_signal(&writecontig_ctl_cond);
|
||||
globus_mutex_unlock(&writecontig_ctl_lock);
|
||||
#ifdef PRINT_ERR_MSG
|
||||
FPRINTF(stderr,"finished with contig write transaction\n");
|
||||
#endif /* PRINT_ERR_MSG */
|
||||
return;
|
||||
}
|
||||
|
||||
static void writecontig_data_cb(void *myargs, globus_ftp_client_handle_t *handle, globus_object_t *error,
|
||||
globus_byte_t *buffer, globus_size_t length, globus_off_t offset,
|
||||
globus_bool_t eof)
|
||||
{
|
||||
globus_size_t *bytes_written;
|
||||
|
||||
bytes_written=(globus_size_t *)myargs;
|
||||
if (error)
|
||||
{
|
||||
FPRINTF(stderr, "%s\n", globus_object_printable_to_string(error));
|
||||
}
|
||||
*bytes_written+=length;
|
||||
/* I don't understand why the data callback has to keep recalling register_write,
|
||||
but everything I've done and all the examples I've seen seem to require
|
||||
that behavior to work... */
|
||||
if ( !eof )
|
||||
{
|
||||
globus_ftp_client_register_write(handle,
|
||||
buffer,
|
||||
length,
|
||||
offset,
|
||||
GLOBUS_TRUE,
|
||||
writecontig_data_cb,
|
||||
(void *)(bytes_written));
|
||||
}
|
||||
#ifdef PRINT_ERR_MSG
|
||||
FPRINTF(stderr,"wrote %Ld bytes...",(long long)length);
|
||||
#endif /* PRINT_ERR_MSG */
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
static globus_mutex_t writediscontig_ctl_lock;
|
||||
static globus_cond_t writediscontig_ctl_cond;
|
||||
static globus_bool_t writediscontig_ctl_done;
|
||||
static void writediscontig_ctl_cb(void *myargs, globus_ftp_client_handle_t *handle, globus_object_t *error)
|
||||
{
|
||||
if (error)
|
||||
{
|
||||
FPRINTF(stderr, "%s\n", globus_object_printable_to_string(error));
|
||||
}
|
||||
globus_mutex_lock(&writediscontig_ctl_lock);
|
||||
if ( writediscontig_ctl_done!=GLOBUS_TRUE )
|
||||
writediscontig_ctl_done=GLOBUS_TRUE;
|
||||
globus_cond_signal(&writediscontig_ctl_cond);
|
||||
globus_mutex_unlock(&writediscontig_ctl_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
static void writediscontig_data_cb(void *myargs, globus_ftp_client_handle_t *handle, globus_object_t *error,
|
||||
globus_byte_t *buffer, globus_size_t length, globus_off_t offset,
|
||||
globus_bool_t eof)
|
||||
{
|
||||
globus_size_t *bytes_written;
|
||||
|
||||
bytes_written=(globus_size_t *)myargs;
|
||||
if (error)
|
||||
{
|
||||
FPRINTF(stderr, "%s\n", globus_object_printable_to_string(error));
|
||||
}
|
||||
*bytes_written+=length;
|
||||
/* I don't understand why the data callback has to keep recalling register_read,
|
||||
but everything I've done and all the examples I've seen seem to require
|
||||
that behavior to work... */
|
||||
if ( !eof )
|
||||
globus_ftp_client_register_write(handle,
|
||||
buffer,
|
||||
length,
|
||||
offset,
|
||||
eof,
|
||||
writediscontig_data_cb,
|
||||
(void *)(bytes_written));
|
||||
FPRINTF(stderr,"wrote %Ld bytes...",(long long)length);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void ADIOI_GRIDFTP_WriteContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code)
|
||||
{
|
||||
char myname[]="ADIOI_GRIDFTP_WriteContig";
|
||||
int myrank, nprocs;
|
||||
MPI_Count datatype_size;
|
||||
globus_size_t len,bytes_written=0;
|
||||
globus_off_t goff;
|
||||
globus_result_t result;
|
||||
|
||||
if ( fd->access_mode&ADIO_RDONLY )
|
||||
{
|
||||
*error_code=MPI_ERR_AMODE;
|
||||
return;
|
||||
}
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
|
||||
MPI_Comm_size(fd->comm, &nprocs);
|
||||
MPI_Comm_rank(fd->comm, &myrank);
|
||||
MPI_Type_size_x(datatype, &datatype_size);
|
||||
|
||||
if (file_ptr_type != ADIO_EXPLICIT_OFFSET)
|
||||
{
|
||||
offset = fd->fp_ind;
|
||||
}
|
||||
|
||||
/* Do the gridftp I/O transfer */
|
||||
goff = (globus_off_t)offset;
|
||||
len = ((globus_size_t)datatype_size)*((globus_size_t)count);
|
||||
|
||||
globus_mutex_init(&writecontig_ctl_lock, GLOBUS_NULL);
|
||||
globus_cond_init(&writecontig_ctl_cond, GLOBUS_NULL);
|
||||
writecontig_ctl_done=GLOBUS_FALSE;
|
||||
if ( (result=globus_ftp_client_partial_put(&(gridftp_fh[fd->fd_sys]),
|
||||
fd->filename,
|
||||
&(oattr[fd->fd_sys]),
|
||||
GLOBUS_NULL,
|
||||
goff,
|
||||
goff+(globus_off_t)len,
|
||||
writecontig_ctl_cb,
|
||||
GLOBUS_NULL))!=GLOBUS_SUCCESS )
|
||||
{
|
||||
globus_err_handler("globus_ftp_client_partial_put",myname,result);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", globus_object_printable_to_string(globus_error_get(result)));
|
||||
return;
|
||||
}
|
||||
if ( (result=globus_ftp_client_register_write(&(gridftp_fh[fd->fd_sys]),
|
||||
(globus_byte_t *)buf,
|
||||
len,
|
||||
goff,
|
||||
GLOBUS_TRUE,
|
||||
writecontig_data_cb,
|
||||
(void *)(&bytes_written)))!=GLOBUS_SUCCESS )
|
||||
{
|
||||
globus_err_handler("globus_ftp_client_register_write",myname,result);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", globus_object_printable_to_string(globus_error_get(result)));
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/* The ctl callback won't start till the data callbacks complete, so it's
|
||||
safe to wait on just the ctl callback */
|
||||
globus_mutex_lock(&writecontig_ctl_lock);
|
||||
while ( writecontig_ctl_done!=GLOBUS_TRUE )
|
||||
globus_cond_wait(&writecontig_ctl_cond,&writecontig_ctl_lock);
|
||||
globus_mutex_unlock(&writecontig_ctl_lock);
|
||||
|
||||
globus_mutex_destroy(&writecontig_ctl_lock);
|
||||
globus_cond_destroy(&writecontig_ctl_cond);
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, bytes_written);
|
||||
#endif
|
||||
if (file_ptr_type != ADIO_EXPLICIT_OFFSET)
|
||||
{
|
||||
offset = fd->fp_ind;
|
||||
fd->fp_ind += bytes_written;
|
||||
fd->fp_sys_posn = fd->fp_ind;
|
||||
}
|
||||
else {
|
||||
fd->fp_sys_posn = offset + bytes_written;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void ADIOI_GRIDFTP_WriteDiscontig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code)
|
||||
{
|
||||
char myname[]="ADIOI_GRIDFTP_WriteDiscontig";
|
||||
int myrank,nprocs;
|
||||
MPI_Aint btype_size,btype_extent;
|
||||
MPI_Aint ftype_size,ftype_extent;
|
||||
MPI_Aint etype_size;
|
||||
MPI_Aint extent;
|
||||
ADIOI_Flatlist_node *flat_file;
|
||||
int buf_contig,boff,i,nblks;
|
||||
globus_off_t start,end,goff;
|
||||
globus_size_t bytes_written;
|
||||
globus_result_t result;
|
||||
|
||||
MPI_Comm_rank(fd->comm,&myrank);
|
||||
MPI_Comm_size(fd->comm,&nprocs);
|
||||
etype_size=fd->etype_size;
|
||||
MPI_Type_size_x(fd->filetype,&ftype_size);
|
||||
MPI_Type_extent(fd->filetype,&ftype_extent);
|
||||
/* This is arguably unnecessary, as this routine assumes that the
|
||||
buffer in memory is contiguous */
|
||||
MPI_Type_size_x(datatype,&btype_size);
|
||||
MPI_Type_extent(datatype,&btype_extent);
|
||||
ADIOI_Datatype_iscontig(datatype,&buf_contig);
|
||||
|
||||
if ( ( btype_extent!=btype_size ) || ( ! buf_contig ) )
|
||||
{
|
||||
FPRINTF(stderr,"[%d/%d] %s called with discontigous memory buffer\n",
|
||||
myrank,nprocs,myname);
|
||||
fflush(stderr);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", globus_object_printable_to_string(globus_error_get(result)));
|
||||
return;
|
||||
}
|
||||
/* from here we can assume btype_extent==btype_size */
|
||||
|
||||
/* Flatten out fd->filetype so we know which blocks to skip */
|
||||
ADIOI_Flatten_datatype(fd->filetype);
|
||||
flat_file = ADIOI_Flatlist;
|
||||
while (flat_file->type != fd->filetype && flat_file->next!=NULL)
|
||||
flat_file = flat_file->next;
|
||||
|
||||
/* Figure out how big the area to write is */
|
||||
/* ASSUMPTION: ftype_size is an integer multiple of btype_size or vice versa. */
|
||||
start=(globus_off_t)(offset*etype_size);
|
||||
goff=start;
|
||||
boff=0;
|
||||
extent=0;
|
||||
nblks=0;
|
||||
while ( boff < (count*btype_size) )
|
||||
{
|
||||
int blklen;
|
||||
|
||||
for (i=0;i<flat_file->count;i++)
|
||||
{
|
||||
if ( (boff+flat_file->blocklens[i]) < (count*btype_size) )
|
||||
blklen=flat_file->blocklens[i];
|
||||
else
|
||||
blklen=(count*btype_size)-boff;
|
||||
boff+=blklen;
|
||||
extent=MAX(extent,nblks*ftype_extent+flat_file->indices[i]+blklen);
|
||||
if ( boff>=(count*btype_size) )
|
||||
break;
|
||||
}
|
||||
nblks++;
|
||||
}
|
||||
if ( extent < count*btype_size )
|
||||
{
|
||||
FPRINTF(stderr,"[%d/%d] %s error in computing extent -- extent %d is smaller than total bytes requested %d!\n",
|
||||
myrank,nprocs,myname,extent,count*btype_size);
|
||||
fflush(stderr);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", globus_object_printable_to_string(globus_error_get(result)));
|
||||
return;
|
||||
}
|
||||
end=start+(globus_off_t)extent;
|
||||
FPRINTF(stderr,"[%d/%d] %s writing %d bytes into extent of %d bytes starting at offset %Ld\n",
|
||||
myrank,nprocs,myname,count*btype_size,extent,(long long)start);
|
||||
fflush(stderr);
|
||||
|
||||
/* start up the globus partial write */
|
||||
globus_mutex_init(&writediscontig_ctl_lock, GLOBUS_NULL);
|
||||
globus_cond_init(&writediscontig_ctl_cond, GLOBUS_NULL);
|
||||
writediscontig_ctl_done=GLOBUS_FALSE;
|
||||
if ( (result=globus_ftp_client_partial_put(&(gridftp_fh[fd->fd_sys]),
|
||||
fd->filename,
|
||||
&(oattr[fd->fd_sys]),
|
||||
GLOBUS_NULL,
|
||||
start,
|
||||
end,
|
||||
writediscontig_ctl_cb,
|
||||
GLOBUS_NULL))!=GLOBUS_SUCCESS )
|
||||
{
|
||||
globus_err_handler("globus_ftp_client_partial_get",myname,result);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", globus_object_printable_to_string(globus_error_get(result)));
|
||||
return;
|
||||
}
|
||||
|
||||
/* Do all the actual I/Os */
|
||||
boff=0;
|
||||
nblks=0;
|
||||
while ( boff < (count*btype_size) )
|
||||
{
|
||||
int i,blklen;
|
||||
|
||||
for (i=0;i<flat_file->count;i++)
|
||||
{
|
||||
if ( (boff+flat_file->blocklens[i]) < (count*btype_size) )
|
||||
blklen=flat_file->blocklens[i];
|
||||
else
|
||||
blklen=(count*btype_size)-boff;
|
||||
if ( blklen > 0 )
|
||||
{
|
||||
goff=start+nblks*ftype_extent+((globus_off_t)flat_file->indices[i]);
|
||||
/*
|
||||
FPRINTF(stderr,"[%d/%d] %s writing %d bytes from boff=%d at goff=%Ld\n",myrank,nprocs,myname,blklen,boff,goff);
|
||||
*/
|
||||
if ( (result=globus_ftp_client_register_write(&(gridftp_fh[fd->fd_sys]),
|
||||
((globus_byte_t *)buf)+boff,
|
||||
(globus_size_t)blklen,
|
||||
goff,
|
||||
GLOBUS_TRUE,
|
||||
writediscontig_data_cb,
|
||||
(void *)(&bytes_written)))!=GLOBUS_SUCCESS )
|
||||
{
|
||||
globus_err_handler("globus_ftp_client_register_write",myname,result);
|
||||
*error_code=MPI_ERR_IO;
|
||||
ADIOI_Error(fd,*error_code,myname);
|
||||
return;
|
||||
}
|
||||
boff+=blklen;
|
||||
if ( boff>=(count*btype_size) )
|
||||
break;
|
||||
}
|
||||
}
|
||||
nblks++;
|
||||
}
|
||||
|
||||
|
||||
/* The ctl callback won't start till the data callbacks complete, so it's
|
||||
safe to wait on just the ctl callback */
|
||||
globus_mutex_lock(&writediscontig_ctl_lock);
|
||||
while ( writediscontig_ctl_done!=GLOBUS_TRUE )
|
||||
globus_cond_wait(&writediscontig_ctl_cond,&writediscontig_ctl_lock);
|
||||
globus_mutex_unlock(&writediscontig_ctl_lock);
|
||||
globus_mutex_destroy(&writediscontig_ctl_lock);
|
||||
globus_cond_destroy(&writediscontig_ctl_cond);
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, bytes_written);
|
||||
#endif
|
||||
if (file_ptr_type != ADIO_EXPLICIT_OFFSET)
|
||||
{
|
||||
fd->fp_ind += extent;
|
||||
fd->fp_sys_posn = fd->fp_ind;
|
||||
}
|
||||
else {
|
||||
fd->fp_sys_posn = offset + extent;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#define GRIDFTP_USE_GENERIC_STRIDED
|
||||
void ADIOI_GRIDFTP_WriteStrided(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int *error_code)
|
||||
{
|
||||
#ifdef GRIDFTP_USE_GENERIC_STRIDED
|
||||
int myrank, nprocs;
|
||||
|
||||
if ( fd->access_mode&ADIO_RDONLY )
|
||||
{
|
||||
*error_code=MPI_ERR_AMODE;
|
||||
return;
|
||||
}
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
|
||||
MPI_Comm_size(fd->comm, &nprocs);
|
||||
MPI_Comm_rank(fd->comm, &myrank);
|
||||
|
||||
ADIOI_GEN_WriteStrided(fd, buf, count, datatype, file_ptr_type, offset,
|
||||
status, error_code);
|
||||
return;
|
||||
#else
|
||||
char myname[]="ADIOI_GRIDFTP_WriteStrided";
|
||||
int myrank, nprocs;
|
||||
int buf_contig,file_contig;
|
||||
MPI_Aint btype_size,bufsize;
|
||||
globus_byte_t *intermediate;
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
|
||||
MPI_Comm_size(fd->comm, &nprocs);
|
||||
MPI_Comm_rank(fd->comm, &myrank);
|
||||
|
||||
MPI_Type_size_x(datatype,&btype_size);
|
||||
bufsize=count*btype_size;
|
||||
ADIOI_Datatype_iscontig(fd->filetype,&file_contig);
|
||||
ADIOI_Datatype_iscontig(datatype,&buf_contig);
|
||||
if ( buf_contig && !file_contig )
|
||||
{
|
||||
/* Contiguous in memory, discontig in file */
|
||||
FPRINTF(stderr,"[%d/%d] %s called w/ contig mem, discontig file\n",
|
||||
myrank,nprocs,myname);
|
||||
fflush(stderr);
|
||||
|
||||
ADIOI_GRIDFTP_WriteDiscontig(fd, buf, count, datatype,
|
||||
file_ptr_type, offset, status, error_code);
|
||||
}
|
||||
else if ( !buf_contig && file_contig )
|
||||
{
|
||||
/* Discontiguous in mem, contig in file -- comparatively easy */
|
||||
int posn=0;
|
||||
|
||||
FPRINTF(stderr,"[%d/%d] %s called w/ discontig mem, contig file\n",
|
||||
myrank,nprocs,myname);
|
||||
fflush(stderr);
|
||||
|
||||
|
||||
/* squeeze contents of main buffer into intermediate buffer*/
|
||||
intermediate=(globus_byte_t *)ADIOI_Malloc((size_t)bufsize);
|
||||
MPI_Pack(buf,count,datatype,intermediate,bufsize,&posn,fd->comm);
|
||||
|
||||
/* write contiguous data from intermediate buffer */
|
||||
ADIOI_GRIDFTP_WriteContig(fd, intermediate, bufsize, MPI_BYTE,
|
||||
file_ptr_type, offset, status, error_code);
|
||||
|
||||
ADIOI_Free(intermediate);
|
||||
}
|
||||
else if ( !buf_contig && !file_contig )
|
||||
{
|
||||
/* Discontig in both mem and file -- the hardest case */
|
||||
int posn=0;
|
||||
|
||||
FPRINTF(stderr,"[%d/%d] %s called w/ discontig mem, discontig file\n",
|
||||
myrank,nprocs,myname);
|
||||
fflush(stderr);
|
||||
|
||||
/* squeeze contents of main buffer into intermediate buffer*/
|
||||
intermediate=(globus_byte_t *)ADIOI_Malloc((size_t)bufsize);
|
||||
MPI_Pack(buf,count,datatype,intermediate,bufsize,&posn,fd->comm);
|
||||
|
||||
/* write contiguous data from intermediate buffer */
|
||||
ADIOI_GRIDFTP_WriteDiscontig(fd, intermediate, bufsize, MPI_BYTE,
|
||||
file_ptr_type, offset, status, error_code);
|
||||
|
||||
ADIOI_Free(intermediate);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Why did you bother calling WriteStrided?!?!?! */
|
||||
FPRINTF(stderr,"[%d/%d] Why the heck did you call %s with contiguous buffer *and* file types?\n",
|
||||
myrank,nprocs,myname);
|
||||
ADIOI_GRIDFTP_WriteContig(fd, buf, count, datatype,
|
||||
file_ptr_type, offset, status, error_code);
|
||||
}
|
||||
#endif /* ! GRIDFTP_USE_GENERIC_STRIDED */
|
||||
}
|
||||
|
36
ompi/mca/io/romio314/romio/adio/ad_gridftp/globus_routines.c
Обычный файл
36
ompi/mca/io/romio314/romio/adio/ad_gridftp/globus_routines.c
Обычный файл
@ -0,0 +1,36 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/uio.h>
|
||||
#include <fcntl.h>
|
||||
#include "adio.h"
|
||||
#include <globus_ftp_client.h>
|
||||
|
||||
/* Here are the canonical definitions of the extern's referenced by
|
||||
ad_gridftp.h */
|
||||
int num_gridftp_handles=0;
|
||||
#ifndef ADIO_GRIDFTP_HANDLES_MAX
|
||||
#define ADIO_GRIDFTP_HANDLES_MAX 200
|
||||
#endif /* ! ADIO_GRIDFTP_HANDLES_MAX */
|
||||
/* having to keep not one but two big global tables sucks... */
|
||||
globus_ftp_client_handle_t gridftp_fh[ADIO_GRIDFTP_HANDLES_MAX];
|
||||
globus_ftp_client_operationattr_t oattr[ADIO_GRIDFTP_HANDLES_MAX];
|
||||
|
||||
void globus_err_handler(const char *routine, const char *caller,
|
||||
globus_result_t result)
|
||||
{
|
||||
int myrank,nprocs;
|
||||
globus_object_t *err;
|
||||
|
||||
MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
|
||||
err = globus_error_get(result);
|
||||
FPRINTF(stderr, "[%d/%d] %s error \"%s\", called from %s\n",
|
||||
myrank,nprocs,routine,globus_object_printable_to_string(err),caller);
|
||||
}
|
21
ompi/mca/io/romio314/romio/adio/ad_hfs/Makefile.mk
Обычный файл
21
ompi/mca/io/romio314/romio/adio/ad_hfs/Makefile.mk
Обычный файл
@ -0,0 +1,21 @@
|
||||
## -*- Mode: Makefile; -*-
|
||||
## vim: set ft=automake :
|
||||
##
|
||||
## (C) 2011 by Argonne National Laboratory.
|
||||
## See COPYRIGHT in top-level directory.
|
||||
##
|
||||
|
||||
if BUILD_AD_HFS
|
||||
|
||||
noinst_HEADERS += adio/ad_hfs/ad_hfs.h
|
||||
|
||||
romio_other_sources += \
|
||||
adio/ad_hfs/ad_hfs_read.c \
|
||||
adio/ad_hfs/ad_hfs_open.c \
|
||||
adio/ad_hfs/ad_hfs_write.c \
|
||||
adio/ad_hfs/ad_hfs_fcntl.c \
|
||||
adio/ad_hfs/ad_hfs_resize.c \
|
||||
adio/ad_hfs/ad_hfs.c
|
||||
|
||||
endif BUILD_AD_HFS
|
||||
|
1
ompi/mca/io/romio314/romio/adio/ad_hfs/README
Обычный файл
1
ompi/mca/io/romio314/romio/adio/ad_hfs/README
Обычный файл
@ -0,0 +1 @@
|
||||
This code is no longer supported.
|
36
ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs.c
Обычный файл
36
ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs.c
Обычный файл
@ -0,0 +1,36 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2001 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_hfs.h"
|
||||
|
||||
/* adioi.h has the ADIOI_Fns_struct define */
|
||||
#include "adioi.h"
|
||||
|
||||
struct ADIOI_Fns_struct ADIO_HFS_operations = {
|
||||
ADIOI_HFS_Open, /* Open */
|
||||
ADIOI_HFS_ReadContig, /* ReadContig */
|
||||
ADIOI_HFS_WriteContig, /* WriteContig */
|
||||
ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
|
||||
ADIOI_GEN_WriteStridedColl, /* WriteStridedColl */
|
||||
ADIOI_GEN_SeekIndividual, /* SeekIndividual */
|
||||
ADIOI_HFS_Fcntl, /* Fcntl */
|
||||
ADIOI_GEN_SetInfo, /* SetInfo */
|
||||
ADIOI_GEN_ReadStrided, /* ReadStrided */
|
||||
ADIOI_GEN_WriteStrided, /* WriteStrided */
|
||||
ADIOI_GEN_Close, /* Close */
|
||||
ADIOI_FAKE_IreadContig, /* IreadContig */
|
||||
ADIOI_FAKE_IwriteContig, /* IwriteContig */
|
||||
ADIOI_FAKE_IODone, /* ReadDone */
|
||||
ADIOI_FAKE_IODone, /* WriteDone */
|
||||
ADIOI_FAKE_IOComplete, /* ReadComplete */
|
||||
ADIOI_FAKE_IOComplete, /* WriteComplete */
|
||||
ADIOI_FAKE_IreadStrided, /* IreadStrided */
|
||||
ADIOI_FAKE_IwriteStrided, /* IwriteStrided */
|
||||
ADIOI_GEN_Flush, /* Flush */
|
||||
ADIOI_HFS_Resize, /* Resize */
|
||||
ADIOI_GEN_Delete, /* Delete */
|
||||
};
|
34
ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs.h
Обычный файл
34
ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs.h
Обычный файл
@ -0,0 +1,34 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#ifndef AD_HFS_INCLUDE
|
||||
#define AD_HFS_INCLUDE
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <fcntl.h>
|
||||
#include "adio.h"
|
||||
#ifdef SPPUX
|
||||
# include <sys/cnx_types.h>
|
||||
# include <sys/puio.h>
|
||||
# include <cnx_unistd.h>
|
||||
#endif
|
||||
|
||||
void ADIOI_HFS_Open(ADIO_File fd, int *error_code);
|
||||
void ADIOI_HFS_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code);
|
||||
void ADIOI_HFS_WriteContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code);
|
||||
void ADIOI_HFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int
|
||||
*error_code);
|
||||
void ADIOI_HFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code);
|
||||
|
||||
#endif
|
113
ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_fcntl.c
Обычный файл
113
ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_fcntl.c
Обычный файл
@ -0,0 +1,113 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_hfs.h"
|
||||
#include "adio_extern.h"
|
||||
|
||||
#ifndef HAVE_LSEEK64
|
||||
#define lseek64 lseek
|
||||
#endif
|
||||
void ADIOI_HFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code)
|
||||
{
|
||||
int i, ntimes, err;
|
||||
ADIO_Offset curr_fsize, alloc_size, size, len, done;
|
||||
ADIO_Status status;
|
||||
char *buf;
|
||||
#ifndef PRINT_ERR_MSG
|
||||
static char myname[] = "ADIOI_HFS_FCNTL";
|
||||
#endif
|
||||
|
||||
switch(flag) {
|
||||
case ADIO_FCNTL_GET_FSIZE:
|
||||
fcntl_struct->fsize = lseek64(fd->fd_sys, 0, SEEK_END);
|
||||
#ifdef HPUX
|
||||
if (fd->fp_sys_posn != -1)
|
||||
lseek64(fd->fd_sys, fd->fp_sys_posn, SEEK_SET);
|
||||
/* not required in SPPUX since there we use pread/pwrite */
|
||||
#endif
|
||||
if (fcntl_struct->fsize == -1) {
|
||||
#ifdef MPICH
|
||||
*error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
|
||||
"**io %s", strerror(errno));
|
||||
#elif defined(PRINT_ERR_MSG)
|
||||
*error_code = MPI_ERR_UNKNOWN;
|
||||
#else /* MPICH-1 */
|
||||
*error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
|
||||
myname, "I/O Error", "%s", strerror(errno));
|
||||
ADIOI_Error(fd, *error_code, myname);
|
||||
#endif
|
||||
}
|
||||
else *error_code = MPI_SUCCESS;
|
||||
break;
|
||||
|
||||
case ADIO_FCNTL_SET_DISKSPACE:
|
||||
/* will be called by one process only */
|
||||
|
||||
#ifdef HPUX
|
||||
err = prealloc64(fd->fd_sys, fcntl_struct->diskspace);
|
||||
/* prealloc64 works only if file is of zero length */
|
||||
if (err && (errno != ENOTEMPTY)) {
|
||||
#ifdef MPICH
|
||||
*error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
|
||||
"**io %s", strerror(errno));
|
||||
#elif defined(PRINT_ERR_MSG)
|
||||
*error_code = MPI_ERR_UNKNOWN;
|
||||
#else
|
||||
*error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
|
||||
myname, "I/O Error", "%s", strerror(errno));
|
||||
ADIOI_Error(fd, *error_code, myname);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
if (err && (errno == ENOTEMPTY)) {
|
||||
#endif
|
||||
|
||||
#ifdef SPPUX
|
||||
/* SPPUX has no prealloc64. therefore, use prealloc
|
||||
if size < (2GB - 1), otherwise use long method. */
|
||||
if (fcntl_struct->diskspace <= 2147483647) {
|
||||
err = prealloc(fd->fd_sys, (off_t) fcntl_struct->diskspace);
|
||||
if (err && (errno != ENOTEMPTY)) {
|
||||
#ifdef MPICH
|
||||
*error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
|
||||
"**io %s", strerror(errno));
|
||||
#elif defined(PRINT_ERR_MSG)
|
||||
*error_code = MPI_ERR_UNKNOWN;
|
||||
#else
|
||||
*error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
|
||||
myname, "I/O Error", "%s", strerror(errno));
|
||||
ADIOI_Error(fd, *error_code, myname);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if ((fcntl_struct->diskspace > 2147483647) ||
|
||||
(err && (errno == ENOTEMPTY))) {
|
||||
#endif
|
||||
ADIOI_GEN_Prealloc(fd,fcntl_struct->diskspace, error_code);
|
||||
}
|
||||
ADIOI_Free(buf);
|
||||
#ifdef HPUX
|
||||
if (fd->fp_sys_posn != -1)
|
||||
lseek64(fd->fd_sys, fd->fp_sys_posn, SEEK_SET);
|
||||
/* not required in SPPUX since there we use pread/pwrite */
|
||||
#endif
|
||||
}
|
||||
*error_code = MPI_SUCCESS;
|
||||
break;
|
||||
|
||||
case ADIO_FCNTL_SET_ATOMICITY:
|
||||
fd->atomicity = (fcntl_struct->atomicity == 0) ? 0 : 1;
|
||||
*error_code = MPI_SUCCESS;
|
||||
break;
|
||||
|
||||
default:
|
||||
FPRINTF(stderr, "Unknown flag passed to ADIOI_HFS_Fcntl\n");
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
}
|
67
ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_open.c
Обычный файл
67
ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_open.c
Обычный файл
@ -0,0 +1,67 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_hfs.h"
|
||||
|
||||
#ifndef HAVE_LSEEK64
|
||||
#define lseek64 lseek
|
||||
#endif
|
||||
|
||||
void ADIOI_HFS_Open(ADIO_File fd, int *error_code)
|
||||
{
|
||||
int perm, old_mask, amode;
|
||||
#ifndef PRINT_ERR_MSG
|
||||
static char myname[] = "ADIOI_HFS_OPEN";
|
||||
#endif
|
||||
|
||||
if (fd->perm == ADIO_PERM_NULL) {
|
||||
old_mask = umask(022);
|
||||
umask(old_mask);
|
||||
perm = old_mask ^ 0666;
|
||||
}
|
||||
else perm = fd->perm;
|
||||
|
||||
amode = 0;
|
||||
if (fd->access_mode & ADIO_CREATE)
|
||||
amode = amode | O_CREAT;
|
||||
if (fd->access_mode & ADIO_RDONLY)
|
||||
amode = amode | O_RDONLY;
|
||||
if (fd->access_mode & ADIO_WRONLY)
|
||||
amode = amode | O_WRONLY;
|
||||
if (fd->access_mode & ADIO_RDWR)
|
||||
amode = amode | O_RDWR;
|
||||
if (fd->access_mode & ADIO_EXCL)
|
||||
amode = amode | O_EXCL;
|
||||
|
||||
fd->fd_sys = open64(fd->filename, amode, perm);
|
||||
fd->fd_direct = -1;
|
||||
|
||||
if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND)) {
|
||||
fd->fp_ind = lseek64(fd->fd_sys, 0, SEEK_END);
|
||||
#ifdef HPUX
|
||||
fd->fp_sys_posn = fd->fp_ind;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef SPPUX
|
||||
fd->fp_sys_posn = -1; /* set it to null bec. we use pread, pwrite*/
|
||||
#endif
|
||||
|
||||
if (fd->fd_sys == -1 ) {
|
||||
#ifdef MPICH
|
||||
*error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
|
||||
"**io %s", strerror(errno));
|
||||
#elif defined(PRINT_ERR_MSG)
|
||||
*error_code = MPI_ERR_UNKNOWN;
|
||||
#else /* MPICH-1 */
|
||||
*error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
|
||||
myname, "I/O Error", "%s", strerror(errno));
|
||||
ADIOI_Error(ADIO_FILE_NULL, *error_code, myname);
|
||||
#endif
|
||||
}
|
||||
else *error_code = MPI_SUCCESS;
|
||||
}
|
71
ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_read.c
Обычный файл
71
ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_read.c
Обычный файл
@ -0,0 +1,71 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_hfs.h"
|
||||
|
||||
#ifndef HAVE_LSEEK64
|
||||
#define lseek64 lseek
|
||||
#endif
|
||||
|
||||
void ADIOI_HFS_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int *error_code)
|
||||
{
|
||||
MPI_Count err=-1, datatype_size, len;
|
||||
#ifndef PRINT_ERR_MSG
|
||||
static char myname[] = "ADIOI_HFS_READCONTIG";
|
||||
#endif
|
||||
|
||||
MPI_Type_size_x(datatype, &datatype_size);
|
||||
len = datatype_size * count;
|
||||
|
||||
#ifdef SPPUX
|
||||
fd->fp_sys_posn = -1; /* set it to null, since we are using pread */
|
||||
|
||||
if (file_ptr_type == ADIO_EXPLICIT_OFFSET)
|
||||
err = pread64(fd->fd_sys, buf, len, offset);
|
||||
else { /* read from curr. location of ind. file pointer */
|
||||
err = pread64(fd->fd_sys, buf, len, fd->fp_ind);
|
||||
fd->fp_ind += err;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HPUX
|
||||
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
|
||||
if (fd->fp_sys_posn != offset)
|
||||
lseek64(fd->fd_sys, offset, SEEK_SET);
|
||||
err = read(fd->fd_sys, buf, len);
|
||||
fd->fp_sys_posn = offset + err;
|
||||
/* individual file pointer not updated */
|
||||
}
|
||||
else { /* read from curr. location of ind. file pointer */
|
||||
if (fd->fp_sys_posn != fd->fp_ind)
|
||||
lseek64(fd->fd_sys, fd->fp_ind, SEEK_SET);
|
||||
err = read(fd->fd_sys, buf, len);
|
||||
fd->fp_ind += err;
|
||||
fd->fp_sys_posn = fd->fp_ind;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
if (err != -1) MPIR_Status_set_bytes(status, datatype, err);
|
||||
#endif
|
||||
|
||||
if (err == -1 ) {
|
||||
#ifdef MPICH
|
||||
*error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
|
||||
"**io %s", strerror(errno));
|
||||
#elif defined(PRINT_ERR_MSG)
|
||||
*error_code = (err == -1) ? MPI_ERR_UNKNOWN : MPI_SUCCESS;
|
||||
#else /* MPICH-1 */
|
||||
*error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
|
||||
myname, "I/O Error", "%s", strerror(errno));
|
||||
ADIOI_Error(fd, *error_code, myname);
|
||||
#endif
|
||||
}
|
||||
else *error_code = MPI_SUCCESS;
|
||||
}
|
31
ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_resize.c
Обычный файл
31
ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_resize.c
Обычный файл
@ -0,0 +1,31 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_hfs.h"
|
||||
|
||||
void ADIOI_HFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code)
|
||||
{
|
||||
int err;
|
||||
#ifndef PRINT_ERR_MSG
|
||||
static char myname[] = "ADIOI_HFS_RESIZE";
|
||||
#endif
|
||||
|
||||
err = ftruncate64(fd->fd_sys, size);
|
||||
if (err == -1) {
|
||||
#ifdef MPICH
|
||||
*error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
|
||||
"**io %s", strerror(errno));
|
||||
#elif defined(PRINT_ERR_MSG)
|
||||
*error_code = MPI_ERR_UNKNOWN;
|
||||
#else /* MPICH-1 */
|
||||
*error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
|
||||
myname, "I/O Error", "%s", strerror(errno));
|
||||
ADIOI_Error(fd, *error_code, myname);
|
||||
#endif
|
||||
}
|
||||
else *error_code = MPI_SUCCESS;
|
||||
}
|
70
ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_write.c
Обычный файл
70
ompi/mca/io/romio314/romio/adio/ad_hfs/ad_hfs_write.c
Обычный файл
@ -0,0 +1,70 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_hfs.h"
|
||||
|
||||
#ifndef HAVE_LSEEK64
|
||||
#define lseek64 lseek
|
||||
#endif
|
||||
|
||||
void ADIOI_HFS_WriteContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int *error_code)
|
||||
{
|
||||
MPI_Count err=-1, datatype_size, len;
|
||||
#ifndef PRINT_ERR_MSG
|
||||
static char myname[] = "ADIOI_HFS_WRITECONTIG";
|
||||
#endif
|
||||
|
||||
MPI_Type_size_x(datatype, &datatype_size);
|
||||
len = datatype_size * count;
|
||||
|
||||
#ifdef SPPUX
|
||||
fd->fp_sys_posn = -1; /* set it to null, since we are using pwrite */
|
||||
if (file_ptr_type == ADIO_EXPLICIT_OFFSET)
|
||||
err = pwrite64(fd->fd_sys, buf, len, offset);
|
||||
else { /* write from curr. location of ind. file pointer */
|
||||
err = pwrite64(fd->fd_sys, buf, len, fd->fp_ind);
|
||||
fd->fp_ind += err;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HPUX
|
||||
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
|
||||
if (fd->fp_sys_posn != offset)
|
||||
lseek64(fd->fd_sys, offset, SEEK_SET);
|
||||
err = write(fd->fd_sys, buf, len);
|
||||
fd->fp_sys_posn = offset + err;
|
||||
/* individual file pointer not updated */
|
||||
}
|
||||
else { /* write from curr. location of ind. file pointer */
|
||||
if (fd->fp_sys_posn != fd->fp_ind)
|
||||
lseek64(fd->fd_sys, fd->fp_ind, SEEK_SET);
|
||||
err = write(fd->fd_sys, buf, len);
|
||||
fd->fp_ind += err;
|
||||
fd->fp_sys_posn = fd->fp_ind;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
if (err != -1) MPIR_Status_set_bytes(status, datatype, err);
|
||||
#endif
|
||||
|
||||
if (err == -1) {
|
||||
#ifdef MPICH
|
||||
*error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
|
||||
"**io %s", strerror(errno));
|
||||
#elif defined(PRINT_ERR_MSG)
|
||||
*error_code = MPI_SUCCESS;
|
||||
#else /* MPICH-1 */
|
||||
*error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR,
|
||||
myname, "I/O Error", "%s", strerror(errno));
|
||||
ADIOI_Error(fd, *error_code, myname);
|
||||
#endif
|
||||
}
|
||||
else *error_code = MPI_SUCCESS;
|
||||
}
|
22
ompi/mca/io/romio314/romio/adio/ad_lustre/Makefile.mk
Обычный файл
22
ompi/mca/io/romio314/romio/adio/ad_lustre/Makefile.mk
Обычный файл
@ -0,0 +1,22 @@
|
||||
## -*- Mode: Makefile; -*-
|
||||
## vim: set ft=automake :
|
||||
##
|
||||
## (C) 2011 by Argonne National Laboratory.
|
||||
## See COPYRIGHT in top-level directory.
|
||||
##
|
||||
|
||||
if BUILD_AD_LUSTRE
|
||||
|
||||
noinst_HEADERS += adio/ad_lustre/ad_lustre.h
|
||||
|
||||
romio_other_sources += \
|
||||
adio/ad_lustre/ad_lustre.c \
|
||||
adio/ad_lustre/ad_lustre_open.c \
|
||||
adio/ad_lustre/ad_lustre_rwcontig.c \
|
||||
adio/ad_lustre/ad_lustre_wrcoll.c \
|
||||
adio/ad_lustre/ad_lustre_wrstr.c \
|
||||
adio/ad_lustre/ad_lustre_hints.c \
|
||||
adio/ad_lustre/ad_lustre_aggregate.c
|
||||
|
||||
endif BUILD_AD_LUSTRE
|
||||
|
55
ompi/mca/io/romio314/romio/adio/ad_lustre/README
Обычный файл
55
ompi/mca/io/romio314/romio/adio/ad_lustre/README
Обычный файл
@ -0,0 +1,55 @@
|
||||
Upcoming soon:
|
||||
o Hierarchical striping as described in the paper from CCGrid2007
|
||||
http://ft.ornl.gov/projects/io/pubs/CCGrid-2007-file-joining.pdf
|
||||
Further out:
|
||||
o To post the code for ParColl (Partitioned collective IO)
|
||||
|
||||
-----------------------------------------------------
|
||||
V05:
|
||||
-----------------------------------------------------
|
||||
Improved data redistribution
|
||||
o Improve I/O pattern identification. Besides checking interleaving,
|
||||
if request I/O size is small, collective I/O will be performed.
|
||||
The hint bigsize can be used to define the req size value.
|
||||
o Provide hint CO for load balancing to control the number of
|
||||
IO clients for each OST
|
||||
o Produce stripe-contiguous I/O pattern that Lustre prefers
|
||||
o Control read-modify-write in data sieving in collective IO
|
||||
by hint ds_in_coll.
|
||||
o Reduce extent lock conflicts by make each OST accessed by one or
|
||||
more constant clients.
|
||||
|
||||
-----------------------------------------------------
|
||||
V04:
|
||||
-----------------------------------------------------
|
||||
o Direct IO and Lockless IO support
|
||||
|
||||
-----------------------------------------------------
|
||||
V03:
|
||||
-----------------------------------------------------
|
||||
o Correct detection of fs_type when lustre: prefix is not given
|
||||
o Further fix on stripe alignment
|
||||
o Tested/Enabled striping hints over Cray XT (Catamount and CNL)
|
||||
|
||||
-----------------------------------------------------
|
||||
V02:
|
||||
-----------------------------------------------------
|
||||
The Lustre ADIO driver has been cleaned up quite a lot. Compared
|
||||
to the intital posting, here are the changes:
|
||||
o Removal of dead/redundant code
|
||||
o Removal of asynchronous IO piece as it appears outdated
|
||||
o Bug fixes for setting Lustre Hints
|
||||
o Bug fixes for data sieving
|
||||
o Improved Setsize operation with one process calling ftruncate
|
||||
o Improved collective IO with domain partitioning on
|
||||
Lustre stripe boundary
|
||||
|
||||
Contributing:
|
||||
o You may contribute via many different ways, such as
|
||||
testing results, bug reports, and new feature patches.
|
||||
o We appreciate any courtesy reference of this work.
|
||||
o Disclaimer: you are welcome to try the code, but at your own risk.
|
||||
|
||||
Contact info:
|
||||
For more info, visit http://ft.ornl.gov/projects/io/
|
||||
|
44
ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre.c
Обычный файл
44
ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre.c
Обычный файл
@ -0,0 +1,44 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
* Copyright (C) 2001 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*
|
||||
* Copyright (C) 2007 Oak Ridge National Laboratory
|
||||
*
|
||||
* Copyright (C) 2008 Sun Microsystems, Lustre group
|
||||
*/
|
||||
|
||||
#include "ad_lustre.h"
|
||||
|
||||
struct ADIOI_Fns_struct ADIO_LUSTRE_operations = {
|
||||
ADIOI_LUSTRE_Open, /* Open */
|
||||
ADIOI_GEN_OpenColl, /* OpenColl */
|
||||
ADIOI_LUSTRE_ReadContig, /* ReadContig */
|
||||
ADIOI_LUSTRE_WriteContig, /* WriteContig */
|
||||
ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
|
||||
ADIOI_LUSTRE_WriteStridedColl, /* WriteStridedColl */
|
||||
ADIOI_GEN_SeekIndividual, /* SeekIndividual */
|
||||
ADIOI_GEN_Fcntl, /* Fcntl */
|
||||
ADIOI_LUSTRE_SetInfo, /* SetInfo */
|
||||
ADIOI_GEN_ReadStrided, /* ReadStrided */
|
||||
ADIOI_LUSTRE_WriteStrided, /* WriteStrided */
|
||||
ADIOI_GEN_Close, /* Close */
|
||||
#if defined(ROMIO_HAVE_WORKING_AIO) && !defined(CRAY_XT_LUSTRE)
|
||||
ADIOI_GEN_IreadContig, /* IreadContig */
|
||||
ADIOI_GEN_IwriteContig, /* IwriteContig */
|
||||
#else
|
||||
ADIOI_FAKE_IreadContig, /* IreadContig */
|
||||
ADIOI_FAKE_IwriteContig, /* IwriteContig */
|
||||
#endif
|
||||
ADIOI_GEN_IODone, /* ReadDone */
|
||||
ADIOI_GEN_IODone, /* WriteDone */
|
||||
ADIOI_GEN_IOComplete, /* ReadComplete */
|
||||
ADIOI_GEN_IOComplete, /* WriteComplete */
|
||||
ADIOI_GEN_IreadStrided, /* IreadStrided */
|
||||
ADIOI_GEN_IwriteStrided, /* IwriteStrided */
|
||||
ADIOI_GEN_Flush, /* Flush */
|
||||
ADIOI_GEN_Resize, /* Resize */
|
||||
ADIOI_GEN_Delete, /* Delete */
|
||||
ADIOI_GEN_Feature, /* Features */
|
||||
"LUSTRE:",
|
||||
};
|
91
ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre.h
Обычный файл
91
ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre.h
Обычный файл
@ -0,0 +1,91 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*
|
||||
* Copyright (C) 2007 Oak Ridge National Laboratory
|
||||
*
|
||||
* Copyright (C) 2008 Sun Microsystems, Lustre group
|
||||
*/
|
||||
|
||||
#ifndef AD_UNIX_INCLUDE
|
||||
#define AD_UNIX_INCLUDE
|
||||
|
||||
/* temp*/
|
||||
#define HAVE_ASM_TYPES_H 1
|
||||
|
||||
#include <unistd.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#ifdef __linux__
|
||||
# include <sys/ioctl.h> /* necessary for: */
|
||||
# include <time.h>
|
||||
# define __USE_GNU /* O_DIRECT and */
|
||||
# include <fcntl.h> /* IO operations */
|
||||
# undef __USE_GNU
|
||||
#endif /* __linux__ */
|
||||
|
||||
/*#include <fcntl.h>*/
|
||||
#include <sys/ioctl.h>
|
||||
#include <lustre/lustre_user.h>
|
||||
#include "adio.h"
|
||||
/*#include "adioi.h"*/
|
||||
|
||||
#ifdef HAVE_SIGNAL_H
|
||||
#include <signal.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_AIO_H
|
||||
#include <aio.h>
|
||||
#ifdef HAVE_SYS_AIO_H
|
||||
#include <sys/aio.h>
|
||||
#endif
|
||||
#endif /* End of HAVE_SYS_AIO_H */
|
||||
|
||||
void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code);
|
||||
void ADIOI_LUSTRE_Close(ADIO_File fd, int *error_code);
|
||||
void ADIOI_LUSTRE_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int *error_code);
|
||||
void ADIOI_LUSTRE_WriteContig(ADIO_File fd, const void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int *error_code);
|
||||
void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, const void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int *error_code);
|
||||
void ADIOI_LUSTRE_WriteStridedColl(ADIO_File fd, const void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int *error_code);
|
||||
void ADIOI_LUSTRE_ReadStridedColl(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int *error_code);
|
||||
void ADIOI_LUSTRE_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int *error_code);
|
||||
void ADIOI_LUSTRE_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct,
|
||||
int *error_code);
|
||||
void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code);
|
||||
|
||||
/* the lustre utilities: */
|
||||
int ADIOI_LUSTRE_Docollect(ADIO_File fd, int contig_access_count,
|
||||
ADIO_Offset *len_list, int nprocs);
|
||||
|
||||
void ADIOI_LUSTRE_Get_striping_info(ADIO_File fd, int **striping_info_ptr,
|
||||
int mode);
|
||||
void ADIOI_LUSTRE_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list,
|
||||
ADIO_Offset *len_list, int contig_access_count,
|
||||
int *striping_info, int nprocs,
|
||||
int *count_my_req_procs_ptr,
|
||||
int **count_my_req_per_proc_ptr,
|
||||
ADIOI_Access **my_req_ptr,
|
||||
int ***buf_idx_ptr);
|
||||
|
||||
int ADIOI_LUSTRE_Calc_aggregator(ADIO_File fd, ADIO_Offset off,
|
||||
ADIO_Offset *len, int *striping_info);
|
||||
#endif /* End of AD_UNIX_INCLUDE */
|
322
ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_aggregate.c
Обычный файл
322
ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_aggregate.c
Обычный файл
@ -0,0 +1,322 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*
|
||||
* Copyright (C) 2007 Oak Ridge National Laboratory
|
||||
*
|
||||
* Copyright (C) 2008 Sun Microsystems, Lustre group
|
||||
*/
|
||||
|
||||
#include "ad_lustre.h"
|
||||
#include "adio_extern.h"
|
||||
|
||||
#undef AGG_DEBUG
|
||||
|
||||
void ADIOI_LUSTRE_Get_striping_info(ADIO_File fd, int **striping_info_ptr,
|
||||
int mode)
|
||||
{
|
||||
int *striping_info = NULL;
|
||||
/* get striping information:
|
||||
* striping_info[0]: stripe_size
|
||||
* striping_info[1]: stripe_count
|
||||
* striping_info[2]: avail_cb_nodes
|
||||
*/
|
||||
int stripe_size, stripe_count, CO = 1;
|
||||
int avail_cb_nodes, divisor, nprocs_for_coll = fd->hints->cb_nodes;
|
||||
|
||||
/* Get hints value */
|
||||
/* stripe size */
|
||||
stripe_size = fd->hints->striping_unit;
|
||||
/* stripe count */
|
||||
/* stripe_size and stripe_count have been validated in ADIOI_LUSTRE_Open() */
|
||||
stripe_count = fd->hints->striping_factor;
|
||||
|
||||
/* Calculate the available number of I/O clients */
|
||||
if (!mode) {
|
||||
/* for collective read,
|
||||
* if "CO" clients access the same OST simultaneously,
|
||||
* the OST disk seek time would be much. So, to avoid this,
|
||||
* it might be better if 1 client only accesses 1 OST.
|
||||
* So, we set CO = 1 to meet the above requirement.
|
||||
*/
|
||||
CO = 1;
|
||||
/*XXX: maybe there are other better way for collective read */
|
||||
} else {
|
||||
/* CO also has been validated in ADIOI_LUSTRE_Open(), >0 */
|
||||
CO = fd->hints->fs_hints.lustre.co_ratio;
|
||||
}
|
||||
/* Calculate how many IO clients we need */
|
||||
/* Algorithm courtesy Pascal Deveze (pascal.deveze@bull.net) */
|
||||
/* To avoid extent lock conflicts,
|
||||
* avail_cb_nodes should either
|
||||
* - be a multiple of stripe_count,
|
||||
* - or divide stripe_count exactly
|
||||
* so that each OST is accessed by a maximum of CO constant clients. */
|
||||
if (nprocs_for_coll >= stripe_count)
|
||||
/* avail_cb_nodes should be a multiple of stripe_count and the number
|
||||
* of procs per OST should be limited to the minimum between
|
||||
* nprocs_for_coll/stripe_count and CO
|
||||
*
|
||||
* e.g. if stripe_count=20, nprocs_for_coll=42 and CO=3 then
|
||||
* avail_cb_nodes should be equal to 40 */
|
||||
avail_cb_nodes =
|
||||
stripe_count * ADIOI_MIN(nprocs_for_coll/stripe_count, CO);
|
||||
else {
|
||||
/* nprocs_for_coll is less than stripe_count */
|
||||
/* avail_cb_nodes should divide stripe_count */
|
||||
/* e.g. if stripe_count=60 and nprocs_for_coll=8 then
|
||||
* avail_cb_nodes should be egal to 6 */
|
||||
/* This could be done with :
|
||||
while (stripe_count % avail_cb_nodes != 0) avail_cb_nodes--;
|
||||
but this can be optimized for large values of nprocs_for_coll and
|
||||
stripe_count */
|
||||
divisor = 2;
|
||||
avail_cb_nodes = 1;
|
||||
/* try to divise */
|
||||
while (stripe_count >= divisor*divisor) {
|
||||
if ((stripe_count % divisor) == 0) {
|
||||
if (stripe_count/divisor <= nprocs_for_coll) {
|
||||
/* The value is found ! */
|
||||
avail_cb_nodes = stripe_count/divisor;
|
||||
break;
|
||||
}
|
||||
/* if divisor is less than nprocs_for_coll, divisor is a
|
||||
* solution, but it is not sure that it is the best one */
|
||||
else if (divisor <= nprocs_for_coll)
|
||||
avail_cb_nodes = divisor;
|
||||
}
|
||||
divisor++;
|
||||
}
|
||||
}
|
||||
|
||||
*striping_info_ptr = (int *) ADIOI_Malloc(3 * sizeof(int));
|
||||
striping_info = *striping_info_ptr;
|
||||
striping_info[0] = stripe_size;
|
||||
striping_info[1] = stripe_count;
|
||||
striping_info[2] = avail_cb_nodes;
|
||||
}
|
||||
|
||||
int ADIOI_LUSTRE_Calc_aggregator(ADIO_File fd, ADIO_Offset off,
|
||||
ADIO_Offset *len, int *striping_info)
|
||||
{
|
||||
int rank_index, rank;
|
||||
ADIO_Offset avail_bytes;
|
||||
int stripe_size = striping_info[0];
|
||||
int avail_cb_nodes = striping_info[2];
|
||||
|
||||
/* Produce the stripe-contiguous pattern for Lustre */
|
||||
rank_index = (int)((off / stripe_size) % avail_cb_nodes);
|
||||
|
||||
/* we index into fd_end with rank_index, and fd_end was allocated to be no
|
||||
* bigger than fd->hins->cb_nodes. If we ever violate that, we're
|
||||
* overrunning arrays. Obviously, we should never ever hit this abort
|
||||
*/
|
||||
if (rank_index >= fd->hints->cb_nodes)
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
|
||||
avail_bytes = (off / (ADIO_Offset)stripe_size + 1) *
|
||||
(ADIO_Offset)stripe_size - off;
|
||||
if (avail_bytes < *len) {
|
||||
/* this proc only has part of the requested contig. region */
|
||||
*len = avail_bytes;
|
||||
}
|
||||
/* map our index to a rank */
|
||||
/* NOTE: FOR NOW WE DON'T HAVE A MAPPING...JUST DO 0..NPROCS_FOR_COLL */
|
||||
rank = fd->hints->ranklist[rank_index];
|
||||
|
||||
return rank;
|
||||
}
|
||||
|
||||
/* ADIOI_LUSTRE_Calc_my_req() - calculate what portions of the access requests
|
||||
* of this process are located in the file domains of various processes
|
||||
* (including this one)
|
||||
*/
|
||||
|
||||
|
||||
void ADIOI_LUSTRE_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list,
|
||||
ADIO_Offset *len_list, int contig_access_count,
|
||||
int *striping_info, int nprocs,
|
||||
int *count_my_req_procs_ptr,
|
||||
int **count_my_req_per_proc_ptr,
|
||||
ADIOI_Access **my_req_ptr,
|
||||
int ***buf_idx_ptr)
|
||||
{
|
||||
/* Nothing different from ADIOI_Calc_my_req(), except calling
|
||||
* ADIOI_Lustre_Calc_aggregator() instead of the old one */
|
||||
int *count_my_req_per_proc, count_my_req_procs, **buf_idx;
|
||||
int i, l, proc;
|
||||
ADIO_Offset avail_len, rem_len, curr_idx, off;
|
||||
ADIOI_Access *my_req;
|
||||
|
||||
*count_my_req_per_proc_ptr = (int *) ADIOI_Calloc(nprocs, sizeof(int));
|
||||
count_my_req_per_proc = *count_my_req_per_proc_ptr;
|
||||
/* count_my_req_per_proc[i] gives the no. of contig. requests of this
|
||||
* process in process i's file domain. calloc initializes to zero.
|
||||
* I'm allocating memory of size nprocs, so that I can do an
|
||||
* MPI_Alltoall later on.
|
||||
*/
|
||||
|
||||
buf_idx = (int **) ADIOI_Malloc(nprocs * sizeof(int*));
|
||||
|
||||
/* one pass just to calculate how much space to allocate for my_req;
|
||||
* contig_access_count was calculated way back in ADIOI_Calc_my_off_len()
|
||||
*/
|
||||
for (i = 0; i < contig_access_count; i++) {
|
||||
/* short circuit offset/len processing if len == 0
|
||||
* (zero-byte read/write
|
||||
*/
|
||||
if (len_list[i] == 0)
|
||||
continue;
|
||||
off = offset_list[i];
|
||||
avail_len = len_list[i];
|
||||
/* note: we set avail_len to be the total size of the access.
|
||||
* then ADIOI_LUSTRE_Calc_aggregator() will modify the value to return
|
||||
* the amount that was available.
|
||||
*/
|
||||
proc = ADIOI_LUSTRE_Calc_aggregator(fd, off, &avail_len, striping_info);
|
||||
count_my_req_per_proc[proc]++;
|
||||
|
||||
/* figure out how many data is remaining in the access
|
||||
* we'll take care of this data (if there is any)
|
||||
* in the while loop below.
|
||||
*/
|
||||
rem_len = len_list[i] - avail_len;
|
||||
|
||||
while (rem_len != 0) {
|
||||
off += avail_len; /* point to first remaining byte */
|
||||
avail_len = rem_len; /* save remaining size, pass to calc */
|
||||
proc = ADIOI_LUSTRE_Calc_aggregator(fd, off, &avail_len, striping_info);
|
||||
count_my_req_per_proc[proc]++;
|
||||
rem_len -= avail_len; /* reduce remaining length by amount from fd */
|
||||
}
|
||||
}
|
||||
|
||||
/* buf_idx is relevant only if buftype_is_contig.
|
||||
* buf_idx[i] gives the index into user_buf where data received
|
||||
* from proc 'i' should be placed. This allows receives to be done
|
||||
* without extra buffer. This can't be done if buftype is not contig.
|
||||
*/
|
||||
|
||||
/* initialize buf_idx vectors */
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
/* add one to count_my_req_per_proc[i] to avoid zero size malloc */
|
||||
buf_idx[i] = (int *) ADIOI_Malloc((count_my_req_per_proc[i] + 1)
|
||||
* sizeof(int));
|
||||
}
|
||||
|
||||
/* now allocate space for my_req, offset, and len */
|
||||
*my_req_ptr = (ADIOI_Access *) ADIOI_Malloc(nprocs * sizeof(ADIOI_Access));
|
||||
my_req = *my_req_ptr;
|
||||
|
||||
count_my_req_procs = 0;
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
if (count_my_req_per_proc[i]) {
|
||||
my_req[i].offsets = (ADIO_Offset *)
|
||||
ADIOI_Malloc(count_my_req_per_proc[i] *
|
||||
sizeof(ADIO_Offset));
|
||||
my_req[i].lens = ADIOI_Malloc(count_my_req_per_proc[i] *
|
||||
sizeof(ADIO_Offset));
|
||||
count_my_req_procs++;
|
||||
}
|
||||
my_req[i].count = 0; /* will be incremented where needed later */
|
||||
}
|
||||
|
||||
/* now fill in my_req */
|
||||
curr_idx = 0;
|
||||
for (i = 0; i < contig_access_count; i++) {
|
||||
/* short circuit offset/len processing if len == 0
|
||||
* (zero-byte read/write */
|
||||
if (len_list[i] == 0)
|
||||
continue;
|
||||
off = offset_list[i];
|
||||
avail_len = len_list[i];
|
||||
proc = ADIOI_LUSTRE_Calc_aggregator(fd, off, &avail_len, striping_info);
|
||||
|
||||
l = my_req[proc].count;
|
||||
|
||||
ADIOI_Assert(curr_idx == (int) curr_idx);
|
||||
ADIOI_Assert(l < count_my_req_per_proc[proc]);
|
||||
buf_idx[proc][l] = (int) curr_idx;
|
||||
curr_idx += avail_len;
|
||||
|
||||
rem_len = len_list[i] - avail_len;
|
||||
|
||||
/* store the proc, offset, and len information in an array
|
||||
* of structures, my_req. Each structure contains the
|
||||
* offsets and lengths located in that process's FD,
|
||||
* and the associated count.
|
||||
*/
|
||||
my_req[proc].offsets[l] = off;
|
||||
ADIOI_Assert(avail_len == (int) avail_len);
|
||||
my_req[proc].lens[l] = (int) avail_len;
|
||||
my_req[proc].count++;
|
||||
|
||||
while (rem_len != 0) {
|
||||
off += avail_len;
|
||||
avail_len = rem_len;
|
||||
proc = ADIOI_LUSTRE_Calc_aggregator(fd, off, &avail_len,
|
||||
striping_info);
|
||||
|
||||
l = my_req[proc].count;
|
||||
ADIOI_Assert(curr_idx == (int) curr_idx);
|
||||
ADIOI_Assert(l < count_my_req_per_proc[proc]);
|
||||
buf_idx[proc][l] = (int) curr_idx;
|
||||
|
||||
curr_idx += avail_len;
|
||||
rem_len -= avail_len;
|
||||
|
||||
my_req[proc].offsets[l] = off;
|
||||
ADIOI_Assert(avail_len == (int) avail_len);
|
||||
my_req[proc].lens[l] = (int) avail_len;
|
||||
my_req[proc].count++;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef AGG_DEBUG
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
if (count_my_req_per_proc[i] > 0) {
|
||||
FPRINTF(stdout, "data needed from %d (count = %d):\n",
|
||||
i, my_req[i].count);
|
||||
for (l = 0; l < my_req[i].count; l++) {
|
||||
FPRINTF(stdout, " off[%d] = %lld, len[%d] = %d\n",
|
||||
l, my_req[i].offsets[l], l, my_req[i].lens[l]);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
*count_my_req_procs_ptr = count_my_req_procs;
|
||||
*buf_idx_ptr = buf_idx;
|
||||
}
|
||||
|
||||
int ADIOI_LUSTRE_Docollect(ADIO_File fd, int contig_access_count,
|
||||
ADIO_Offset *len_list, int nprocs)
|
||||
{
|
||||
/* If the processes are non-interleaved, we will check the req_size.
|
||||
* if (avg_req_size > big_req_size) {
|
||||
* docollect = 0;
|
||||
* }
|
||||
*/
|
||||
|
||||
int i, docollect = 1, big_req_size = 0;
|
||||
ADIO_Offset req_size = 0, total_req_size;
|
||||
int avg_req_size, total_access_count;
|
||||
|
||||
/* calculate total_req_size and total_access_count */
|
||||
for (i = 0; i < contig_access_count; i++)
|
||||
req_size += len_list[i];
|
||||
MPI_Allreduce(&req_size, &total_req_size, 1, MPI_LONG_LONG_INT, MPI_SUM,
|
||||
fd->comm);
|
||||
MPI_Allreduce(&contig_access_count, &total_access_count, 1, MPI_INT, MPI_SUM,
|
||||
fd->comm);
|
||||
/* estimate average req_size */
|
||||
avg_req_size = (int)(total_req_size / total_access_count);
|
||||
/* get hint of big_req_size */
|
||||
big_req_size = fd->hints->fs_hints.lustre.coll_threshold;
|
||||
/* Don't perform collective I/O if there are big requests */
|
||||
if ((big_req_size > 0) && (avg_req_size > big_req_size))
|
||||
docollect = 0;
|
||||
|
||||
return docollect;
|
||||
}
|
97
ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_fcntl.c
Обычный файл
97
ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_fcntl.c
Обычный файл
@ -0,0 +1,97 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*
|
||||
* Copyright (C) 2007 Oak Ridge National Laboratory
|
||||
*/
|
||||
|
||||
#include "ad_lustre.h"
|
||||
#include "adio_extern.h"
|
||||
|
||||
void ADIOI_LUSTRE_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code)
|
||||
{
|
||||
int i, ntimes;
|
||||
ADIO_Offset curr_fsize, alloc_size, size, len, done;
|
||||
ADIO_Status status;
|
||||
char *buf;
|
||||
#if defined(MPICH) || !defined(PRINT_ERR_MSG)
|
||||
static char myname[] = "ADIOI_LUSTRE_FCNTL";
|
||||
#endif
|
||||
|
||||
switch(flag) {
|
||||
case ADIO_FCNTL_GET_FSIZE:
|
||||
fcntl_struct->fsize = lseek(fd->fd_sys, 0, SEEK_END);
|
||||
if (fd->fp_sys_posn != -1)
|
||||
lseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET);
|
||||
if (fcntl_struct->fsize == -1) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE, myname, __LINE__,
|
||||
MPI_ERR_IO, "**io", "**io %s", strerror(errno));
|
||||
}
|
||||
else *error_code = MPI_SUCCESS;
|
||||
break;
|
||||
|
||||
case ADIO_FCNTL_SET_DISKSPACE:
|
||||
/* will be called by one process only */
|
||||
/* On file systems with no preallocation function, I have to
|
||||
explicitly write
|
||||
to allocate space. Since there could be holes in the file,
|
||||
I need to read up to the current file size, write it back,
|
||||
and then write beyond that depending on how much
|
||||
preallocation is needed.
|
||||
read/write in sizes of no more than ADIOI_PREALLOC_BUFSZ */
|
||||
|
||||
curr_fsize = lseek(fd->fd_sys, 0, SEEK_END);
|
||||
alloc_size = fcntl_struct->diskspace;
|
||||
|
||||
size = ADIOI_MIN(curr_fsize, alloc_size);
|
||||
|
||||
ntimes = (size + ADIOI_PREALLOC_BUFSZ - 1)/ADIOI_PREALLOC_BUFSZ;
|
||||
buf = (char *) ADIOI_Malloc(ADIOI_PREALLOC_BUFSZ);
|
||||
done = 0;
|
||||
|
||||
for (i=0; i<ntimes; i++) {
|
||||
len = ADIOI_MIN(size-done, ADIOI_PREALLOC_BUFSZ);
|
||||
ADIO_ReadContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, done,
|
||||
&status, error_code);
|
||||
if (*error_code != MPI_SUCCESS) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE, myname, __LINE__,
|
||||
MPI_ERR_IO, "**io", "**io %s", strerror(errno));
|
||||
return;
|
||||
}
|
||||
ADIO_WriteContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
|
||||
done, &status, error_code);
|
||||
if (*error_code != MPI_SUCCESS) return;
|
||||
done += len;
|
||||
}
|
||||
|
||||
if (alloc_size > curr_fsize) {
|
||||
memset(buf, 0, ADIOI_PREALLOC_BUFSZ);
|
||||
size = alloc_size - curr_fsize;
|
||||
ntimes = (size + ADIOI_PREALLOC_BUFSZ - 1)/ADIOI_PREALLOC_BUFSZ;
|
||||
for (i=0; i<ntimes; i++) {
|
||||
len = ADIOI_MIN(alloc_size-done, ADIOI_PREALLOC_BUFSZ);
|
||||
ADIO_WriteContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
|
||||
done, &status, error_code);
|
||||
if (*error_code != MPI_SUCCESS) return;
|
||||
done += len;
|
||||
}
|
||||
}
|
||||
ADIOI_Free(buf);
|
||||
if (fd->fp_sys_posn != -1)
|
||||
lseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET);
|
||||
*error_code = MPI_SUCCESS;
|
||||
break;
|
||||
|
||||
case ADIO_FCNTL_SET_ATOMICITY:
|
||||
fd->atomicity = (fcntl_struct->atomicity == 0) ? 0 : 1;
|
||||
*error_code = MPI_SUCCESS;
|
||||
break;
|
||||
|
||||
default:
|
||||
FPRINTF(stderr, "Unknown flag passed to ADIOI_LUSTRE_Fcntl\n");
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
}
|
180
ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_hints.c
Обычный файл
180
ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_hints.c
Обычный файл
@ -0,0 +1,180 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*
|
||||
* Copyright (C) 2007 Oak Ridge National Laboratory
|
||||
*
|
||||
* Copyright (C) 2008 Sun Microsystems, Lustre group
|
||||
*/
|
||||
|
||||
#include "ad_lustre.h"
|
||||
#include "adio_extern.h"
|
||||
#include "hint_fns.h"
|
||||
#ifdef HAVE_LIMITS_H
|
||||
#include <limits.h>
|
||||
#endif
|
||||
|
||||
void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
{
|
||||
char *value;
|
||||
int flag;
|
||||
ADIO_Offset stripe_val[3], str_factor = -1, str_unit=0, start_iodev=-1;
|
||||
struct lov_user_md lum = { 0 };
|
||||
int err, myrank, fd_sys, perm, amode, old_mask;
|
||||
static char myname[] = "ADIOI_LUSTRE_SETINFO";
|
||||
|
||||
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
|
||||
if ( (fd->info) == MPI_INFO_NULL) {
|
||||
/* This must be part of the open call. can set striping parameters
|
||||
if necessary. */
|
||||
MPI_Info_create(&(fd->info));
|
||||
|
||||
ADIOI_Info_set(fd->info, "direct_read", "false");
|
||||
ADIOI_Info_set(fd->info, "direct_write", "false");
|
||||
fd->direct_read = fd->direct_write = 0;
|
||||
/* initialize lustre hints */
|
||||
ADIOI_Info_set(fd->info, "romio_lustre_co_ratio", "1");
|
||||
fd->hints->fs_hints.lustre.co_ratio = 1;
|
||||
ADIOI_Info_set(fd->info, "romio_lustre_coll_threshold", "0");
|
||||
fd->hints->fs_hints.lustre.coll_threshold = 0;
|
||||
ADIOI_Info_set(fd->info, "romio_lustre_ds_in_coll", "enable");
|
||||
fd->hints->fs_hints.lustre.ds_in_coll = ADIOI_HINT_ENABLE;
|
||||
|
||||
/* has user specified striping or server buffering parameters
|
||||
and do they have the same value on all processes? */
|
||||
if (users_info != MPI_INFO_NULL) {
|
||||
/* striping information */
|
||||
ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag)
|
||||
str_unit=atoll(value);
|
||||
|
||||
ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag)
|
||||
str_factor=atoll(value);
|
||||
|
||||
ADIOI_Info_get(users_info, "romio_lustre_start_iodevice",
|
||||
MPI_MAX_INFO_VAL, value, &flag);
|
||||
if (flag)
|
||||
start_iodev=atoll(value);
|
||||
|
||||
/* direct read and write */
|
||||
ADIOI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag && (!strcmp(value, "true") || !strcmp(value, "TRUE"))) {
|
||||
ADIOI_Info_set(fd->info, "direct_read", "true");
|
||||
fd->direct_read = 1;
|
||||
}
|
||||
ADIOI_Info_get(users_info, "direct_write", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag && (!strcmp(value, "true") || !strcmp(value, "TRUE"))) {
|
||||
ADIOI_Info_set(fd->info, "direct_write", "true");
|
||||
fd->direct_write = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* set striping information with ioctl */
|
||||
MPI_Comm_rank(fd->comm, &myrank);
|
||||
if (myrank == 0) {
|
||||
stripe_val[0] = str_factor;
|
||||
stripe_val[1] = str_unit;
|
||||
stripe_val[2] = start_iodev;
|
||||
}
|
||||
MPI_Bcast(stripe_val, 3, MPI_OFFSET, 0, fd->comm);
|
||||
|
||||
if (stripe_val[0] != str_factor
|
||||
|| stripe_val[1] != str_unit
|
||||
|| stripe_val[2] != start_iodev) {
|
||||
FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: All keys"
|
||||
"-striping_factor:striping_unit:start_iodevice "
|
||||
"need to be identical across all processes\n");
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
} else if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0)) {
|
||||
/* if user has specified striping info, process 0 tries to set it */
|
||||
if (!myrank) {
|
||||
if (fd->perm == ADIO_PERM_NULL) {
|
||||
old_mask = umask(022);
|
||||
umask(old_mask);
|
||||
perm = old_mask ^ 0666;
|
||||
}
|
||||
else perm = fd->perm;
|
||||
|
||||
amode = 0;
|
||||
if (fd->access_mode & ADIO_CREATE)
|
||||
amode = amode | O_CREAT;
|
||||
if (fd->access_mode & ADIO_RDONLY)
|
||||
amode = amode | O_RDONLY;
|
||||
if (fd->access_mode & ADIO_WRONLY)
|
||||
amode = amode | O_WRONLY;
|
||||
if (fd->access_mode & ADIO_RDWR)
|
||||
amode = amode | O_RDWR;
|
||||
if (fd->access_mode & ADIO_EXCL)
|
||||
amode = amode | O_EXCL;
|
||||
|
||||
/* we need to create file so ensure this is set */
|
||||
amode = amode | O_LOV_DELAY_CREATE | O_CREAT;
|
||||
|
||||
fd_sys = open(fd->filename, amode, perm);
|
||||
if (fd_sys == -1) {
|
||||
if (errno != EEXIST)
|
||||
fprintf(stderr,
|
||||
"Failure to open file %s %d %d\n",strerror(errno), amode, perm);
|
||||
} else {
|
||||
lum.lmm_magic = LOV_USER_MAGIC;
|
||||
lum.lmm_pattern = 0;
|
||||
lum.lmm_stripe_size = str_unit;
|
||||
/* crude check for overflow of lustre internal datatypes.
|
||||
* Silently cap to large value if user provides a value
|
||||
* larger than lustre supports */
|
||||
if (lum.lmm_stripe_size != str_unit) {
|
||||
lum.lmm_stripe_size = UINT_MAX;
|
||||
}
|
||||
lum.lmm_stripe_count = str_factor;
|
||||
if ( lum.lmm_stripe_count != str_factor) {
|
||||
lum.lmm_stripe_count = USHRT_MAX;
|
||||
}
|
||||
lum.lmm_stripe_offset = start_iodev;
|
||||
if (lum.lmm_stripe_offset != start_iodev) {
|
||||
lum.lmm_stripe_offset = USHRT_MAX;
|
||||
}
|
||||
|
||||
err = ioctl(fd_sys, LL_IOC_LOV_SETSTRIPE, &lum);
|
||||
if (err == -1 && errno != EEXIST) {
|
||||
fprintf(stderr, "Failure to set stripe info %s \n", strerror(errno));
|
||||
}
|
||||
close(fd_sys);
|
||||
}
|
||||
} /* End of striping parameters validation */
|
||||
}
|
||||
MPI_Barrier(fd->comm);
|
||||
}
|
||||
/* get other hint */
|
||||
if (users_info != MPI_INFO_NULL) {
|
||||
/* CO: IO Clients/OST,
|
||||
* to keep the load balancing between clients and OSTs */
|
||||
ADIOI_Info_check_and_install_int(fd, users_info, "romio_lustre_co_ratio",
|
||||
&(fd->hints->fs_hints.lustre.co_ratio), myname, error_code );
|
||||
|
||||
/* coll_threshold:
|
||||
* if the req size is bigger than this, collective IO may not be performed.
|
||||
*/
|
||||
ADIOI_Info_check_and_install_int(fd, users_info, "romio_lustre_coll_threshold",
|
||||
&(fd->hints->fs_hints.lustre.coll_threshold), myname, error_code );
|
||||
|
||||
/* ds_in_coll: disable data sieving in collective IO */
|
||||
ADIOI_Info_check_and_install_enabled(fd, users_info, "romio_lustre_ds_in_coll",
|
||||
&(fd->hints->fs_hints.lustre.ds_in_coll), myname, error_code );
|
||||
|
||||
}
|
||||
/* set the values for collective I/O and data sieving parameters */
|
||||
ADIOI_GEN_SetInfo(fd, users_info, error_code);
|
||||
|
||||
if (ADIOI_Direct_read) fd->direct_read = 1;
|
||||
if (ADIOI_Direct_write) fd->direct_write = 1;
|
||||
|
||||
ADIOI_Free(value);
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
}
|
113
ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_open.c
Обычный файл
113
ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_open.c
Обычный файл
@ -0,0 +1,113 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*
|
||||
* Copyright (C) 2007 Oak Ridge National Laboratory
|
||||
*
|
||||
* Copyright (C) 2008 Sun Microsystems, Lustre group
|
||||
*/
|
||||
|
||||
#include "ad_lustre.h"
|
||||
|
||||
/* what is the basis for this define?
|
||||
* what happens if there are more than 1k UUIDs? */
|
||||
|
||||
#define MAX_LOV_UUID_COUNT 1000
|
||||
|
||||
void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
|
||||
{
|
||||
int perm, old_mask, amode, amode_direct;
|
||||
int lumlen;
|
||||
struct lov_user_md *lum = NULL;
|
||||
char *value;
|
||||
|
||||
#if defined(MPICH) || !defined(PRINT_ERR_MSG)
|
||||
static char myname[] = "ADIOI_LUSTRE_OPEN";
|
||||
#endif
|
||||
|
||||
if (fd->perm == ADIO_PERM_NULL) {
|
||||
old_mask = umask(022);
|
||||
umask(old_mask);
|
||||
perm = old_mask ^ 0666;
|
||||
}
|
||||
else perm = fd->perm;
|
||||
|
||||
amode = 0;
|
||||
if (fd->access_mode & ADIO_CREATE)
|
||||
amode = amode | O_CREAT;
|
||||
if (fd->access_mode & ADIO_RDONLY)
|
||||
amode = amode | O_RDONLY;
|
||||
if (fd->access_mode & ADIO_WRONLY)
|
||||
amode = amode | O_WRONLY;
|
||||
if (fd->access_mode & ADIO_RDWR)
|
||||
amode = amode | O_RDWR;
|
||||
if (fd->access_mode & ADIO_EXCL)
|
||||
amode = amode | O_EXCL;
|
||||
|
||||
amode_direct = amode | O_DIRECT;
|
||||
|
||||
fd->fd_sys = open(fd->filename, amode|O_CREAT, perm);
|
||||
|
||||
if (fd->fd_sys != -1) {
|
||||
int err;
|
||||
|
||||
/* get file striping information and set it in info */
|
||||
/* odd malloc here because lov_user_md contains some fixed data and
|
||||
* then a list of 'lmm_objects' representing stripe */
|
||||
lumlen = sizeof(struct lov_user_md) +
|
||||
MAX_LOV_UUID_COUNT * sizeof(struct lov_user_ost_data);
|
||||
/* furthermore, Pascal Deveze reports that, even though we pass a
|
||||
* "GETSTRIPE" (read) flag to the ioctl, if some of the values of this
|
||||
* struct are uninitialzed, the call can give an error. calloc in case
|
||||
* there are other members that must be initialized and in case
|
||||
* lov_user_md struct changes in future */
|
||||
lum = (struct lov_user_md *)ADIOI_Calloc(1,lumlen);
|
||||
lum->lmm_magic = LOV_USER_MAGIC;
|
||||
err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *)lum);
|
||||
if (!err) {
|
||||
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
|
||||
|
||||
fd->hints->striping_unit = lum->lmm_stripe_size;
|
||||
sprintf(value, "%d", lum->lmm_stripe_size);
|
||||
ADIOI_Info_set(fd->info, "striping_unit", value);
|
||||
|
||||
fd->hints->striping_factor = lum->lmm_stripe_count;
|
||||
sprintf(value, "%d", lum->lmm_stripe_count);
|
||||
ADIOI_Info_set(fd->info, "striping_factor", value);
|
||||
|
||||
fd->hints->fs_hints.lustre.start_iodevice = lum->lmm_stripe_offset;
|
||||
sprintf(value, "%d", lum->lmm_stripe_offset);
|
||||
ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value);
|
||||
|
||||
ADIOI_Free(value);
|
||||
}
|
||||
ADIOI_Free(lum);
|
||||
|
||||
if (fd->access_mode & ADIO_APPEND)
|
||||
fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
|
||||
}
|
||||
|
||||
if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND))
|
||||
fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
|
||||
|
||||
fd->fd_direct = -1;
|
||||
if (fd->direct_write || fd->direct_read) {
|
||||
fd->fd_direct = open(fd->filename, amode_direct, perm);
|
||||
if (fd->fd_direct != -1) {
|
||||
fd->d_mem = fd->d_miniosz = (1<<12);
|
||||
} else {
|
||||
perror("cannot open file with O_Direct");
|
||||
fd->direct_write = fd->direct_read = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (fd->fd_sys == -1 || ((fd->fd_direct == -1) &&
|
||||
(fd->direct_write || fd->direct_read))) {
|
||||
*error_code = ADIOI_Err_create_code(myname, fd->filename, errno);
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
else *error_code = MPI_SUCCESS;
|
||||
|
||||
}
|
208
ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_rwcontig.c
Обычный файл
208
ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_rwcontig.c
Обычный файл
@ -0,0 +1,208 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*
|
||||
* Copyright (C) 2007 Oak Ridge National Laboratory
|
||||
*
|
||||
* Copyright (C) 2008 Sun Microsystems, Lustre group
|
||||
*/
|
||||
|
||||
#ifdef _STDC_C99
|
||||
#define _XOPEN_SOURCE 600
|
||||
#else
|
||||
#define _XOPEN_SOURCE 500
|
||||
#endif
|
||||
#include <unistd.h>
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <malloc.h>
|
||||
#include "ad_lustre.h"
|
||||
|
||||
#define LUSTRE_MEMALIGN (1<<12) /* to use page_shift */
|
||||
|
||||
static void ADIOI_LUSTRE_Aligned_Mem_File_Write(ADIO_File fd, const void *buf, int len,
|
||||
ADIO_Offset offset, int *err);
|
||||
static void ADIOI_LUSTRE_Aligned_Mem_File_Write(ADIO_File fd, const void *buf, int len,
|
||||
ADIO_Offset offset, int *err)
|
||||
{
|
||||
int rem, size, nbytes;
|
||||
if (!(len % fd->d_miniosz) && (len >= fd->d_miniosz)) {
|
||||
*err = pwrite(fd->fd_direct, buf, len, offset);
|
||||
} else if (len < fd->d_miniosz) {
|
||||
*err = pwrite(fd->fd_sys, buf, len, offset);
|
||||
} else {
|
||||
rem = len % fd->d_miniosz;
|
||||
size = len - rem;
|
||||
nbytes = pwrite(fd->fd_direct, buf, size, offset);
|
||||
nbytes += pwrite(fd->fd_sys, ((char *)buf) + size, rem, offset+size);
|
||||
*err = nbytes;
|
||||
}
|
||||
}
|
||||
|
||||
static void ADIOI_LUSTRE_Aligned_Mem_File_Read(ADIO_File fd, const void *buf, int len,
|
||||
ADIO_Offset offset, int *err);
|
||||
static void ADIOI_LUSTRE_Aligned_Mem_File_Read(ADIO_File fd, const void *buf, int len,
|
||||
ADIO_Offset offset, int *err)
|
||||
{
|
||||
int rem, size, nbytes;
|
||||
if (!(len % fd->d_miniosz) && (len >= fd->d_miniosz))
|
||||
*err = pread(fd->fd_direct, (void *)buf, len, offset);
|
||||
else if (len < fd->d_miniosz)
|
||||
*err = pread(fd->fd_sys, (void *)buf, len, offset);
|
||||
else {
|
||||
rem = len % fd->d_miniosz;
|
||||
size = len - rem;
|
||||
nbytes = pread(fd->fd_direct, (void *)buf, size, offset);
|
||||
nbytes += pread(fd->fd_sys, ((char *)buf) + size, rem, offset+size);
|
||||
*err = nbytes;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static int ADIOI_LUSTRE_Directio(ADIO_File fd, const void *buf, int len,
|
||||
off_t offset, int rw);
|
||||
static int ADIOI_LUSTRE_Directio(ADIO_File fd, const void *buf, int len,
|
||||
off_t offset, int rw)
|
||||
{
|
||||
int err=-1, diff, size=len, nbytes = 0;
|
||||
void *newbuf;
|
||||
|
||||
if (offset % fd->d_miniosz) {
|
||||
diff = fd->d_miniosz - (offset % fd->d_miniosz);
|
||||
diff = ADIOI_MIN(diff, len);
|
||||
if (rw)
|
||||
nbytes = pwrite(fd->fd_sys, (void *)buf, diff, offset);
|
||||
else
|
||||
nbytes = pread(fd->fd_sys, (void *)buf, diff, offset);
|
||||
buf = ((char *) buf) + diff;
|
||||
offset += diff;
|
||||
size = len - diff;
|
||||
}
|
||||
|
||||
if (!size) {
|
||||
return diff;
|
||||
}
|
||||
|
||||
if (rw) { /* direct I/O enabled */
|
||||
if (!(((long) buf) % fd->d_mem)) {
|
||||
ADIOI_LUSTRE_Aligned_Mem_File_Write(fd, buf, size, offset, &err);
|
||||
nbytes += err;
|
||||
} else {
|
||||
newbuf = (void *) memalign(LUSTRE_MEMALIGN, size);
|
||||
if (newbuf) {
|
||||
memcpy(newbuf, buf, size);
|
||||
ADIOI_LUSTRE_Aligned_Mem_File_Write(fd, newbuf, size, offset, &err);
|
||||
nbytes += err;
|
||||
ADIOI_Free(newbuf);
|
||||
}
|
||||
else nbytes += pwrite(fd->fd_sys, buf, size, offset);
|
||||
}
|
||||
err = nbytes;
|
||||
} else {
|
||||
if (!(((long) buf) % fd->d_mem)) {
|
||||
ADIOI_LUSTRE_Aligned_Mem_File_Read(fd, buf, size, offset, &err);
|
||||
nbytes += err;
|
||||
} else {
|
||||
newbuf = (void *) memalign(LUSTRE_MEMALIGN, size);
|
||||
if (newbuf) {
|
||||
ADIOI_LUSTRE_Aligned_Mem_File_Read(fd, newbuf, size, offset, &err);
|
||||
if (err > 0) memcpy((void *)buf, newbuf, err);
|
||||
nbytes += err;
|
||||
ADIOI_Free(newbuf);
|
||||
}
|
||||
else nbytes += pread(fd->fd_sys, (void *)buf, size, offset);
|
||||
}
|
||||
err = nbytes;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static void ADIOI_LUSTRE_IOContig(ADIO_File fd, const void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int io_mode, int *error_code);
|
||||
static void ADIOI_LUSTRE_IOContig(ADIO_File fd, const void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int io_mode, int *error_code)
|
||||
{
|
||||
int err=-1;
|
||||
MPI_Count datatype_size, len;
|
||||
static char myname[] = "ADIOI_LUSTRE_IOCONTIG";
|
||||
|
||||
MPI_Type_size_x(datatype, &datatype_size);
|
||||
len = datatype_size * count;
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
offset = fd->fp_ind;
|
||||
}
|
||||
|
||||
if (!(fd->direct_read || fd->direct_write)) {
|
||||
if (fd->fp_sys_posn != offset) {
|
||||
err = lseek(fd->fd_sys, offset, SEEK_SET);
|
||||
if (err == -1) goto ioerr;
|
||||
}
|
||||
|
||||
if (io_mode) {
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event(ADIOI_MPE_write_a, 0, NULL);
|
||||
#endif
|
||||
err = write(fd->fd_sys, buf, len);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event(ADIOI_MPE_write_b, 0, NULL);
|
||||
#endif
|
||||
} else {
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event(ADIOI_MPE_read_a, 0, NULL);
|
||||
#endif
|
||||
err = read(fd->fd_sys, (void *)buf, len);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event(ADIOI_MPE_read_b, 0, NULL);
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
err = ADIOI_LUSTRE_Directio(fd, buf, len, offset, io_mode);
|
||||
}
|
||||
|
||||
if (err == -1) goto ioerr;
|
||||
fd->fp_sys_posn = offset + err;
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
fd->fp_ind += err;
|
||||
}
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
if (status) MPIR_Status_set_bytes(status, datatype, err);
|
||||
#endif
|
||||
*error_code = MPI_SUCCESS;
|
||||
|
||||
ioerr:
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err == -1) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
MPI_ERR_IO, "**io",
|
||||
"**io %s", strerror(errno));
|
||||
fd->fp_sys_posn = -1;
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
}
|
||||
|
||||
void ADIOI_LUSTRE_WriteContig(ADIO_File fd, const void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int *error_code)
|
||||
{
|
||||
ADIOI_LUSTRE_IOContig(fd, buf, count, datatype, file_ptr_type,
|
||||
offset, status, 1, error_code);
|
||||
}
|
||||
|
||||
void ADIOI_LUSTRE_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int *error_code)
|
||||
{
|
||||
ADIOI_LUSTRE_IOContig(fd, buf, count, datatype, file_ptr_type,
|
||||
offset, status, 0, error_code);
|
||||
}
|
989
ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_wrcoll.c
Обычный файл
989
ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_wrcoll.c
Обычный файл
@ -0,0 +1,989 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*
|
||||
* Copyright (C) 2007 Oak Ridge National Laboratory
|
||||
*
|
||||
* Copyright (C) 2008 Sun Microsystems, Lustre group
|
||||
*/
|
||||
|
||||
#include "ad_lustre.h"
|
||||
#include "adio_extern.h"
|
||||
|
||||
/* prototypes of functions used for collective writes only. */
|
||||
static void ADIOI_LUSTRE_Exch_and_write(ADIO_File fd, const void *buf,
|
||||
MPI_Datatype datatype, int nprocs,
|
||||
int myrank,
|
||||
ADIOI_Access *others_req,
|
||||
ADIOI_Access *my_req,
|
||||
ADIO_Offset *offset_list,
|
||||
ADIO_Offset *len_list,
|
||||
int contig_access_count,
|
||||
int *striping_info,
|
||||
int **buf_idx, int *error_code);
|
||||
static void ADIOI_LUSTRE_Fill_send_buffer(ADIO_File fd, const void *buf,
|
||||
ADIOI_Flatlist_node *flat_buf,
|
||||
char **send_buf,
|
||||
ADIO_Offset *offset_list,
|
||||
ADIO_Offset *len_list, int *send_size,
|
||||
MPI_Request *requests,
|
||||
int *sent_to_proc, int nprocs,
|
||||
int myrank, int contig_access_count,
|
||||
int *striping_info,
|
||||
int *send_buf_idx,
|
||||
int *curr_to_proc,
|
||||
int *done_to_proc, int iter,
|
||||
MPI_Aint buftype_extent);
|
||||
static void ADIOI_LUSTRE_W_Exchange_data(ADIO_File fd, const void *buf,
|
||||
char *write_buf,
|
||||
ADIOI_Flatlist_node *flat_buf,
|
||||
ADIO_Offset *offset_list,
|
||||
ADIO_Offset *len_list, int *send_size,
|
||||
int *recv_size, ADIO_Offset off,
|
||||
int size, int *count,
|
||||
int *start_pos,
|
||||
int *sent_to_proc, int nprocs,
|
||||
int myrank, int buftype_is_contig,
|
||||
int contig_access_count,
|
||||
int *striping_info,
|
||||
ADIOI_Access *others_req,
|
||||
int *send_buf_idx,
|
||||
int *curr_to_proc,
|
||||
int *done_to_proc, int *hole,
|
||||
int iter, MPI_Aint buftype_extent,
|
||||
int *buf_idx,
|
||||
ADIO_Offset **srt_off, int **srt_len, int *srt_num,
|
||||
int *error_code);
|
||||
void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count,
|
||||
ADIO_Offset *srt_off, int *srt_len, int *start_pos,
|
||||
int nprocs, int nprocs_recv, int total_elements);
|
||||
|
||||
void ADIOI_LUSTRE_WriteStridedColl(ADIO_File fd, const void *buf, int count,
|
||||
MPI_Datatype datatype,
|
||||
int file_ptr_type, ADIO_Offset offset,
|
||||
ADIO_Status *status, int *error_code)
|
||||
{
|
||||
/* Uses a generalized version of the extended two-phase method described
|
||||
* in "An Extended Two-Phase Method for Accessing Sections of
|
||||
* Out-of-Core Arrays", Rajeev Thakur and Alok Choudhary,
|
||||
* Scientific Programming, (5)4:301--317, Winter 1996.
|
||||
* http://www.mcs.anl.gov/home/thakur/ext2ph.ps
|
||||
*/
|
||||
|
||||
ADIOI_Access *my_req;
|
||||
/* array of nprocs access structures, one for each other process has
|
||||
this process's request */
|
||||
|
||||
ADIOI_Access *others_req;
|
||||
/* array of nprocs access structures, one for each other process
|
||||
whose request is written by this process. */
|
||||
|
||||
int i, filetype_is_contig, nprocs, myrank, do_collect = 0;
|
||||
int contig_access_count = 0, buftype_is_contig, interleave_count = 0;
|
||||
int *count_my_req_per_proc, count_my_req_procs, count_others_req_procs;
|
||||
ADIO_Offset orig_fp, start_offset, end_offset, off;
|
||||
ADIO_Offset *offset_list = NULL, *st_offsets = NULL, *end_offsets = NULL;
|
||||
ADIO_Offset *len_list = NULL;
|
||||
int **buf_idx = NULL, *striping_info = NULL;
|
||||
int old_error, tmp_error;
|
||||
|
||||
MPI_Comm_size(fd->comm, &nprocs);
|
||||
MPI_Comm_rank(fd->comm, &myrank);
|
||||
|
||||
orig_fp = fd->fp_ind;
|
||||
|
||||
/* IO patten identification if cb_write isn't disabled */
|
||||
if (fd->hints->cb_write != ADIOI_HINT_DISABLE) {
|
||||
/* For this process's request, calculate the list of offsets and
|
||||
lengths in the file and determine the start and end offsets. */
|
||||
|
||||
/* Note: end_offset points to the last byte-offset that will be accessed.
|
||||
* e.g., if start_offset=0 and 100 bytes to be read, end_offset=99
|
||||
*/
|
||||
|
||||
ADIOI_Calc_my_off_len(fd, count, datatype, file_ptr_type, offset,
|
||||
&offset_list, &len_list, &start_offset,
|
||||
&end_offset, &contig_access_count);
|
||||
|
||||
/* each process communicates its start and end offsets to other
|
||||
* processes. The result is an array each of start and end offsets
|
||||
* stored in order of process rank.
|
||||
*/
|
||||
st_offsets = (ADIO_Offset *) ADIOI_Malloc(nprocs * sizeof(ADIO_Offset));
|
||||
end_offsets = (ADIO_Offset *) ADIOI_Malloc(nprocs * sizeof(ADIO_Offset));
|
||||
MPI_Allgather(&start_offset, 1, ADIO_OFFSET, st_offsets, 1,
|
||||
ADIO_OFFSET, fd->comm);
|
||||
MPI_Allgather(&end_offset, 1, ADIO_OFFSET, end_offsets, 1,
|
||||
ADIO_OFFSET, fd->comm);
|
||||
/* are the accesses of different processes interleaved? */
|
||||
for (i = 1; i < nprocs; i++)
|
||||
if ((st_offsets[i] < end_offsets[i-1]) &&
|
||||
(st_offsets[i] <= end_offsets[i]))
|
||||
interleave_count++;
|
||||
/* This is a rudimentary check for interleaving, but should suffice
|
||||
for the moment. */
|
||||
|
||||
/* Two typical access patterns can benefit from collective write.
|
||||
* 1) the processes are interleaved, and
|
||||
* 2) the req size is small.
|
||||
*/
|
||||
if (interleave_count > 0) {
|
||||
do_collect = 1;
|
||||
} else {
|
||||
do_collect = ADIOI_LUSTRE_Docollect(fd, contig_access_count,
|
||||
len_list, nprocs);
|
||||
}
|
||||
}
|
||||
ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
|
||||
|
||||
/* Decide if collective I/O should be done */
|
||||
if ((!do_collect && fd->hints->cb_write == ADIOI_HINT_AUTO) ||
|
||||
fd->hints->cb_write == ADIOI_HINT_DISABLE) {
|
||||
|
||||
/* use independent accesses */
|
||||
if (fd->hints->cb_write != ADIOI_HINT_DISABLE) {
|
||||
ADIOI_Free(offset_list);
|
||||
ADIOI_Free(len_list);
|
||||
ADIOI_Free(st_offsets);
|
||||
ADIOI_Free(end_offsets);
|
||||
}
|
||||
|
||||
fd->fp_ind = orig_fp;
|
||||
ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
|
||||
if (buftype_is_contig && filetype_is_contig) {
|
||||
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
|
||||
off = fd->disp + (ADIO_Offset)(fd->etype_size) * offset;
|
||||
ADIO_WriteContig(fd, buf, count, datatype,
|
||||
ADIO_EXPLICIT_OFFSET,
|
||||
off, status, error_code);
|
||||
} else
|
||||
ADIO_WriteContig(fd, buf, count, datatype, ADIO_INDIVIDUAL,
|
||||
0, status, error_code);
|
||||
} else {
|
||||
ADIO_WriteStrided(fd, buf, count, datatype, file_ptr_type,
|
||||
offset, status, error_code);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* Get Lustre hints information */
|
||||
ADIOI_LUSTRE_Get_striping_info(fd, &striping_info, 1);
|
||||
|
||||
/* calculate what portions of the access requests of this process are
|
||||
* located in which process
|
||||
*/
|
||||
ADIOI_LUSTRE_Calc_my_req(fd, offset_list, len_list, contig_access_count,
|
||||
striping_info, nprocs, &count_my_req_procs,
|
||||
&count_my_req_per_proc, &my_req,
|
||||
&buf_idx);
|
||||
|
||||
/* based on everyone's my_req, calculate what requests of other processes
|
||||
* will be accessed by this process.
|
||||
* count_others_req_procs = number of processes whose requests (including
|
||||
* this process itself) will be accessed by this process
|
||||
* count_others_req_per_proc[i] indicates how many separate contiguous
|
||||
* requests of proc. i will be accessed by this process.
|
||||
*/
|
||||
|
||||
ADIOI_Calc_others_req(fd, count_my_req_procs, count_my_req_per_proc,
|
||||
my_req, nprocs, myrank, &count_others_req_procs,
|
||||
&others_req);
|
||||
ADIOI_Free(count_my_req_per_proc);
|
||||
|
||||
/* exchange data and write in sizes of no more than stripe_size. */
|
||||
ADIOI_LUSTRE_Exch_and_write(fd, buf, datatype, nprocs, myrank,
|
||||
others_req, my_req, offset_list, len_list,
|
||||
contig_access_count, striping_info,
|
||||
buf_idx, error_code);
|
||||
|
||||
/* If this collective write is followed by an independent write,
|
||||
* it's possible to have those subsequent writes on other processes
|
||||
* race ahead and sneak in before the read-modify-write completes.
|
||||
* We carry out a collective communication at the end here so no one
|
||||
* can start independent i/o before collective I/O completes.
|
||||
*
|
||||
* need to do some gymnastics with the error codes so that if something
|
||||
* went wrong, all processes report error, but if a process has a more
|
||||
* specific error code, we can still have that process report the
|
||||
* additional information */
|
||||
|
||||
old_error = *error_code;
|
||||
if (*error_code != MPI_SUCCESS)
|
||||
*error_code = MPI_ERR_IO;
|
||||
|
||||
/* optimization: if only one process performing i/o, we can perform
|
||||
* a less-expensive Bcast */
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event(ADIOI_MPE_postwrite_a, 0, NULL);
|
||||
#endif
|
||||
if (fd->hints->cb_nodes == 1)
|
||||
MPI_Bcast(error_code, 1, MPI_INT,
|
||||
fd->hints->ranklist[0], fd->comm);
|
||||
else {
|
||||
tmp_error = *error_code;
|
||||
MPI_Allreduce(&tmp_error, error_code, 1, MPI_INT,
|
||||
MPI_MAX, fd->comm);
|
||||
}
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event(ADIOI_MPE_postwrite_b, 0, NULL);
|
||||
#endif
|
||||
|
||||
if ((old_error != MPI_SUCCESS) && (old_error != MPI_ERR_IO))
|
||||
*error_code = old_error;
|
||||
|
||||
|
||||
if (!buftype_is_contig)
|
||||
ADIOI_Delete_flattened(datatype);
|
||||
|
||||
/* free all memory allocated for collective I/O */
|
||||
/* free others_req */
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
if (others_req[i].count) {
|
||||
ADIOI_Free(others_req[i].offsets);
|
||||
ADIOI_Free(others_req[i].lens);
|
||||
ADIOI_Free(others_req[i].mem_ptrs);
|
||||
}
|
||||
}
|
||||
ADIOI_Free(others_req);
|
||||
/* free my_req here */
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
if (my_req[i].count) {
|
||||
ADIOI_Free(my_req[i].offsets);
|
||||
ADIOI_Free(my_req[i].lens);
|
||||
}
|
||||
}
|
||||
ADIOI_Free(my_req);
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
ADIOI_Free(buf_idx[i]);
|
||||
}
|
||||
ADIOI_Free(buf_idx);
|
||||
ADIOI_Free(offset_list);
|
||||
ADIOI_Free(len_list);
|
||||
ADIOI_Free(st_offsets);
|
||||
ADIOI_Free(end_offsets);
|
||||
ADIOI_Free(striping_info);
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
if (status) {
|
||||
MPI_Count bufsize, size;
|
||||
/* Don't set status if it isn't needed */
|
||||
MPI_Type_size_x(datatype, &size);
|
||||
bufsize = size * count;
|
||||
MPIR_Status_set_bytes(status, datatype, bufsize);
|
||||
}
|
||||
/* This is a temporary way of filling in status. The right way is to
|
||||
* keep track of how much data was actually written during collective I/O.
|
||||
*/
|
||||
#endif
|
||||
|
||||
fd->fp_sys_posn = -1; /* set it to null. */
|
||||
}
|
||||
|
||||
/* If successful, error_code is set to MPI_SUCCESS. Otherwise an error
|
||||
* code is created and returned in error_code.
|
||||
*/
|
||||
static void ADIOI_LUSTRE_Exch_and_write(ADIO_File fd, const void *buf,
|
||||
MPI_Datatype datatype, int nprocs,
|
||||
int myrank, ADIOI_Access *others_req,
|
||||
ADIOI_Access *my_req,
|
||||
ADIO_Offset *offset_list,
|
||||
ADIO_Offset *len_list,
|
||||
int contig_access_count,
|
||||
int *striping_info, int **buf_idx,
|
||||
int *error_code)
|
||||
{
|
||||
/* Send data to appropriate processes and write in sizes of no more
|
||||
* than lustre stripe_size.
|
||||
* The idea is to reduce the amount of extra memory required for
|
||||
* collective I/O. If all data were written all at once, which is much
|
||||
* easier, it would require temp space more than the size of user_buf,
|
||||
* which is often unacceptable. For example, to write a distributed
|
||||
* array to a file, where each local array is 8Mbytes, requiring
|
||||
* at least another 8Mbytes of temp space is unacceptable.
|
||||
*/
|
||||
|
||||
int hole, i, j, m, flag, ntimes = 1 , max_ntimes, buftype_is_contig;
|
||||
ADIO_Offset st_loc = -1, end_loc = -1, min_st_loc, max_end_loc;
|
||||
ADIO_Offset off, req_off, send_off, iter_st_off, *off_list;
|
||||
ADIO_Offset max_size, step_size = 0;
|
||||
int real_size, req_len, send_len;
|
||||
int *recv_curr_offlen_ptr, *recv_count, *recv_size;
|
||||
int *send_curr_offlen_ptr, *send_size;
|
||||
int *sent_to_proc, *recv_start_pos;
|
||||
int *send_buf_idx, *curr_to_proc, *done_to_proc;
|
||||
int *this_buf_idx;
|
||||
char *write_buf = NULL;
|
||||
MPI_Status status;
|
||||
ADIOI_Flatlist_node *flat_buf = NULL;
|
||||
MPI_Aint buftype_extent;
|
||||
int stripe_size = striping_info[0], avail_cb_nodes = striping_info[2];
|
||||
int data_sieving = 0;
|
||||
ADIO_Offset *srt_off = NULL;
|
||||
int *srt_len = NULL;
|
||||
int srt_num = 0;
|
||||
ADIO_Offset block_offset;
|
||||
int block_len;
|
||||
|
||||
*error_code = MPI_SUCCESS; /* changed below if error */
|
||||
/* only I/O errors are currently reported */
|
||||
|
||||
/* calculate the number of writes of stripe size to be done.
|
||||
* That gives the no. of communication phases as well.
|
||||
* Note:
|
||||
* Because we redistribute data in stripe-contiguous pattern for Lustre,
|
||||
* each process has the same no. of communication phases.
|
||||
*/
|
||||
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
if (others_req[i].count) {
|
||||
st_loc = others_req[i].offsets[0];
|
||||
end_loc = others_req[i].offsets[0];
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
for (j = 0; j < others_req[i].count; j++) {
|
||||
st_loc = ADIOI_MIN(st_loc, others_req[i].offsets[j]);
|
||||
end_loc = ADIOI_MAX(end_loc, (others_req[i].offsets[j] +
|
||||
others_req[i].lens[j] - 1));
|
||||
}
|
||||
}
|
||||
/* this process does no writing. */
|
||||
if ((st_loc == -1) && (end_loc == -1))
|
||||
ntimes = 0;
|
||||
MPI_Allreduce(&end_loc, &max_end_loc, 1, MPI_LONG_LONG_INT, MPI_MAX, fd->comm);
|
||||
/* avoid min_st_loc be -1 */
|
||||
if (st_loc == -1)
|
||||
st_loc = max_end_loc;
|
||||
MPI_Allreduce(&st_loc, &min_st_loc, 1, MPI_LONG_LONG_INT, MPI_MIN, fd->comm);
|
||||
/* align downward */
|
||||
min_st_loc -= min_st_loc % (ADIO_Offset)stripe_size;
|
||||
|
||||
/* Each time, only avail_cb_nodes number of IO clients perform IO,
|
||||
* so, step_size=avail_cb_nodes*stripe_size IO will be performed at most,
|
||||
* and ntimes=whole_file_portion/step_size
|
||||
*/
|
||||
step_size = (ADIO_Offset) avail_cb_nodes * stripe_size;
|
||||
max_ntimes = (max_end_loc - min_st_loc + 1) / step_size
|
||||
+ (((max_end_loc - min_st_loc + 1) % step_size) ? 1 : 0);
|
||||
/* max_ntimes = (int)((max_end_loc - min_st_loc) / step_size + 1); */
|
||||
if (ntimes)
|
||||
write_buf = (char *) ADIOI_Malloc(stripe_size);
|
||||
|
||||
/* calculate the start offset for each iteration */
|
||||
off_list = (ADIO_Offset *) ADIOI_Malloc(max_ntimes * sizeof(ADIO_Offset));
|
||||
for (m = 0; m < max_ntimes; m ++)
|
||||
off_list[m] = max_end_loc;
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
for (j = 0; j < others_req[i].count; j ++) {
|
||||
req_off = others_req[i].offsets[j];
|
||||
m = (int)((req_off - min_st_loc) / step_size);
|
||||
off_list[m] = ADIOI_MIN(off_list[m], req_off);
|
||||
}
|
||||
}
|
||||
|
||||
recv_curr_offlen_ptr = (int *) ADIOI_Calloc(nprocs, sizeof(int));
|
||||
send_curr_offlen_ptr = (int *) ADIOI_Calloc(nprocs, sizeof(int));
|
||||
/* their use is explained below. calloc initializes to 0. */
|
||||
|
||||
recv_count = (int *) ADIOI_Malloc(nprocs * sizeof(int));
|
||||
/* to store count of how many off-len pairs per proc are satisfied
|
||||
in an iteration. */
|
||||
|
||||
send_size = (int *) ADIOI_Malloc(nprocs * sizeof(int));
|
||||
/* total size of data to be sent to each proc. in an iteration.
|
||||
Of size nprocs so that I can use MPI_Alltoall later. */
|
||||
|
||||
recv_size = (int *) ADIOI_Malloc(nprocs * sizeof(int));
|
||||
/* total size of data to be recd. from each proc. in an iteration. */
|
||||
|
||||
sent_to_proc = (int *) ADIOI_Calloc(nprocs, sizeof(int));
|
||||
/* amount of data sent to each proc so far. Used in
|
||||
ADIOI_Fill_send_buffer. initialized to 0 here. */
|
||||
|
||||
send_buf_idx = (int *) ADIOI_Malloc(nprocs * sizeof(int));
|
||||
curr_to_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int));
|
||||
done_to_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int));
|
||||
/* Above three are used in ADIOI_Fill_send_buffer */
|
||||
|
||||
this_buf_idx = (int *) ADIOI_Malloc(nprocs * sizeof(int));
|
||||
|
||||
recv_start_pos = (int *) ADIOI_Malloc(nprocs * sizeof(int));
|
||||
/* used to store the starting value of recv_curr_offlen_ptr[i] in
|
||||
this iteration */
|
||||
|
||||
ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
|
||||
if (!buftype_is_contig) {
|
||||
ADIOI_Flatten_datatype(datatype);
|
||||
flat_buf = ADIOI_Flatlist;
|
||||
while (flat_buf->type != datatype)
|
||||
flat_buf = flat_buf->next;
|
||||
}
|
||||
MPI_Type_extent(datatype, &buftype_extent);
|
||||
/* I need to check if there are any outstanding nonblocking writes to
|
||||
* the file, which could potentially interfere with the writes taking
|
||||
* place in this collective write call. Since this is not likely to be
|
||||
* common, let me do the simplest thing possible here: Each process
|
||||
* completes all pending nonblocking operations before completing.
|
||||
*/
|
||||
/*ADIOI_Complete_async(error_code);
|
||||
if (*error_code != MPI_SUCCESS) return;
|
||||
MPI_Barrier(fd->comm);
|
||||
*/
|
||||
|
||||
iter_st_off = min_st_loc;
|
||||
|
||||
/* Although we have recognized the data according to OST index,
|
||||
* a read-modify-write will be done if there is a hole between the data.
|
||||
* For example: if blocksize=60, xfersize=30 and stripe_size=100,
|
||||
* then rank0 will collect data [0, 30] and [60, 90] then write. There
|
||||
* is a hole in [30, 60], which will cause a read-modify-write in [0, 90].
|
||||
*
|
||||
* To reduce its impact on the performance, we can disable data sieving
|
||||
* by hint "ds_in_coll".
|
||||
*/
|
||||
/* check the hint for data sieving */
|
||||
data_sieving = fd->hints->fs_hints.lustre.ds_in_coll;
|
||||
|
||||
for (m = 0; m < max_ntimes; m++) {
|
||||
/* go through all others_req and my_req to check which will be received
|
||||
* and sent in this iteration.
|
||||
*/
|
||||
|
||||
/* Note that MPI guarantees that displacements in filetypes are in
|
||||
monotonically nondecreasing order and that, for writes, the
|
||||
filetypes cannot specify overlapping regions in the file. This
|
||||
simplifies implementation a bit compared to reads. */
|
||||
|
||||
/*
|
||||
off = start offset in the file for the data to be written in
|
||||
this iteration
|
||||
iter_st_off = start offset of this iteration
|
||||
real_size = size of data written (bytes) corresponding to off
|
||||
max_size = possible maximum size of data written in this iteration
|
||||
req_off = offset in the file for a particular contiguous request minus
|
||||
what was satisfied in previous iteration
|
||||
send_off = offset the request needed by other processes in this iteration
|
||||
req_len = size corresponding to req_off
|
||||
send_len = size corresponding to send_off
|
||||
*/
|
||||
|
||||
/* first calculate what should be communicated */
|
||||
for (i = 0; i < nprocs; i++)
|
||||
recv_count[i] = recv_size[i] = send_size[i] = 0;
|
||||
|
||||
off = off_list[m];
|
||||
max_size = ADIOI_MIN(step_size, max_end_loc - iter_st_off + 1);
|
||||
real_size = (int) ADIOI_MIN((off / stripe_size + 1) * stripe_size -
|
||||
off,
|
||||
end_loc - off + 1);
|
||||
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
if (my_req[i].count) {
|
||||
this_buf_idx[i] = buf_idx[i][send_curr_offlen_ptr[i]];
|
||||
for (j = send_curr_offlen_ptr[i]; j < my_req[i].count; j++) {
|
||||
send_off = my_req[i].offsets[j];
|
||||
send_len = my_req[i].lens[j];
|
||||
if (send_off < iter_st_off + max_size) {
|
||||
send_size[i] += send_len;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
send_curr_offlen_ptr[i] = j;
|
||||
}
|
||||
if (others_req[i].count) {
|
||||
recv_start_pos[i] = recv_curr_offlen_ptr[i];
|
||||
for (j = recv_curr_offlen_ptr[i]; j < others_req[i].count; j++) {
|
||||
req_off = others_req[i].offsets[j];
|
||||
req_len = others_req[i].lens[j];
|
||||
if (req_off < iter_st_off + max_size) {
|
||||
recv_count[i]++;
|
||||
ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)write_buf)+req_off-off) == (ADIO_Offset)(MPIR_Upint)(write_buf+req_off-off));
|
||||
MPI_Address(write_buf + req_off - off,
|
||||
&(others_req[i].mem_ptrs[j]));
|
||||
recv_size[i] += req_len;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
recv_curr_offlen_ptr[i] = j;
|
||||
}
|
||||
}
|
||||
/* use variable "hole" to pass data_sieving flag into W_Exchange_data */
|
||||
hole = data_sieving;
|
||||
ADIOI_LUSTRE_W_Exchange_data(fd, buf, write_buf, flat_buf, offset_list,
|
||||
len_list, send_size, recv_size, off, real_size,
|
||||
recv_count, recv_start_pos,
|
||||
sent_to_proc, nprocs, myrank,
|
||||
buftype_is_contig, contig_access_count,
|
||||
striping_info, others_req, send_buf_idx,
|
||||
curr_to_proc, done_to_proc, &hole, m,
|
||||
buftype_extent, this_buf_idx,
|
||||
&srt_off, &srt_len, &srt_num, error_code);
|
||||
|
||||
if (*error_code != MPI_SUCCESS)
|
||||
goto over;
|
||||
|
||||
flag = 0;
|
||||
for (i = 0; i < nprocs; i++)
|
||||
if (recv_count[i]) {
|
||||
flag = 1;
|
||||
break;
|
||||
}
|
||||
if (flag) {
|
||||
/* check whether to do data sieving */
|
||||
if(data_sieving == ADIOI_HINT_ENABLE) {
|
||||
ADIO_WriteContig(fd, write_buf, real_size, MPI_BYTE,
|
||||
ADIO_EXPLICIT_OFFSET, off, &status,
|
||||
error_code);
|
||||
} else {
|
||||
/* if there is no hole, write data in one time;
|
||||
* otherwise, write data in several times */
|
||||
if (!hole) {
|
||||
ADIO_WriteContig(fd, write_buf, real_size, MPI_BYTE,
|
||||
ADIO_EXPLICIT_OFFSET, off, &status,
|
||||
error_code);
|
||||
} else {
|
||||
block_offset = -1;
|
||||
block_len = 0;
|
||||
for (i = 0; i < srt_num; ++i) {
|
||||
if (srt_off[i] < off + real_size &&
|
||||
srt_off[i] >= off) {
|
||||
if (block_offset == -1) {
|
||||
block_offset = srt_off[i];
|
||||
block_len = srt_len[i];
|
||||
} else {
|
||||
if (srt_off[i] == block_offset + block_len) {
|
||||
block_len += srt_len[i];
|
||||
} else {
|
||||
ADIO_WriteContig(fd,
|
||||
write_buf + block_offset - off,
|
||||
block_len,
|
||||
MPI_BYTE, ADIO_EXPLICIT_OFFSET,
|
||||
block_offset, &status,
|
||||
error_code);
|
||||
if (*error_code != MPI_SUCCESS)
|
||||
goto over;
|
||||
block_offset = srt_off[i];
|
||||
block_len = srt_len[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (block_offset != -1) {
|
||||
ADIO_WriteContig(fd,
|
||||
write_buf + block_offset - off,
|
||||
block_len,
|
||||
MPI_BYTE, ADIO_EXPLICIT_OFFSET,
|
||||
block_offset, &status,
|
||||
error_code);
|
||||
if (*error_code != MPI_SUCCESS)
|
||||
goto over;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (*error_code != MPI_SUCCESS)
|
||||
goto over;
|
||||
}
|
||||
iter_st_off += max_size;
|
||||
}
|
||||
over:
|
||||
if (srt_off)
|
||||
ADIOI_Free(srt_off);
|
||||
if (srt_len)
|
||||
ADIOI_Free(srt_len);
|
||||
if (ntimes)
|
||||
ADIOI_Free(write_buf);
|
||||
ADIOI_Free(recv_curr_offlen_ptr);
|
||||
ADIOI_Free(send_curr_offlen_ptr);
|
||||
ADIOI_Free(recv_count);
|
||||
ADIOI_Free(send_size);
|
||||
ADIOI_Free(recv_size);
|
||||
ADIOI_Free(sent_to_proc);
|
||||
ADIOI_Free(recv_start_pos);
|
||||
ADIOI_Free(send_buf_idx);
|
||||
ADIOI_Free(curr_to_proc);
|
||||
ADIOI_Free(done_to_proc);
|
||||
ADIOI_Free(this_buf_idx);
|
||||
ADIOI_Free(off_list);
|
||||
}
|
||||
|
||||
/* Sets error_code to MPI_SUCCESS if successful, or creates an error code
|
||||
* in the case of error.
|
||||
*/
|
||||
static void ADIOI_LUSTRE_W_Exchange_data(ADIO_File fd, const void *buf,
|
||||
char *write_buf,
|
||||
ADIOI_Flatlist_node *flat_buf,
|
||||
ADIO_Offset *offset_list,
|
||||
ADIO_Offset *len_list, int *send_size,
|
||||
int *recv_size, ADIO_Offset off,
|
||||
int size, int *count,
|
||||
int *start_pos,
|
||||
int *sent_to_proc, int nprocs,
|
||||
int myrank, int buftype_is_contig,
|
||||
int contig_access_count,
|
||||
int *striping_info,
|
||||
ADIOI_Access *others_req,
|
||||
int *send_buf_idx,
|
||||
int *curr_to_proc, int *done_to_proc,
|
||||
int *hole, int iter,
|
||||
MPI_Aint buftype_extent,
|
||||
int *buf_idx,
|
||||
ADIO_Offset **srt_off, int **srt_len, int *srt_num,
|
||||
int *error_code)
|
||||
{
|
||||
int i, j, nprocs_recv, nprocs_send, err;
|
||||
char **send_buf = NULL;
|
||||
MPI_Request *requests, *send_req;
|
||||
MPI_Datatype *recv_types;
|
||||
MPI_Status *statuses, status;
|
||||
int sum_recv;
|
||||
int data_sieving = *hole;
|
||||
static char myname[] = "ADIOI_W_EXCHANGE_DATA";
|
||||
|
||||
/* create derived datatypes for recv */
|
||||
nprocs_recv = 0;
|
||||
for (i = 0; i < nprocs; i++)
|
||||
if (recv_size[i])
|
||||
nprocs_recv++;
|
||||
|
||||
recv_types = (MPI_Datatype *) ADIOI_Malloc((nprocs_recv + 1) *
|
||||
sizeof(MPI_Datatype));
|
||||
/* +1 to avoid a 0-size malloc */
|
||||
|
||||
j = 0;
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
if (recv_size[i]) {
|
||||
ADIOI_Type_create_hindexed_x(count[i],
|
||||
&(others_req[i].lens[start_pos[i]]),
|
||||
&(others_req[i].mem_ptrs[start_pos[i]]),
|
||||
MPI_BYTE, recv_types + j);
|
||||
/* absolute displacements; use MPI_BOTTOM in recv */
|
||||
MPI_Type_commit(recv_types + j);
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
/* To avoid a read-modify-write,
|
||||
* check if there are holes in the data to be written.
|
||||
* For this, merge the (sorted) offset lists others_req using a heap-merge.
|
||||
*/
|
||||
|
||||
*srt_num = 0;
|
||||
for (i = 0; i < nprocs; i++)
|
||||
*srt_num += count[i];
|
||||
if (*srt_off)
|
||||
*srt_off = (ADIO_Offset *) ADIOI_Realloc(*srt_off, (*srt_num + 1) * sizeof(ADIO_Offset));
|
||||
else
|
||||
*srt_off = (ADIO_Offset *) ADIOI_Malloc((*srt_num + 1) * sizeof(ADIO_Offset));
|
||||
if (*srt_len)
|
||||
*srt_len = (int *) ADIOI_Realloc(*srt_len, (*srt_num + 1) * sizeof(int));
|
||||
else
|
||||
*srt_len = (int *) ADIOI_Malloc((*srt_num + 1) * sizeof(int));
|
||||
/* +1 to avoid a 0-size malloc */
|
||||
|
||||
ADIOI_Heap_merge(others_req, count, *srt_off, *srt_len, start_pos,
|
||||
nprocs, nprocs_recv, *srt_num);
|
||||
|
||||
/* check if there are any holes */
|
||||
*hole = 0;
|
||||
for (i = 0; i < *srt_num - 1; i++) {
|
||||
if ((*srt_off)[i] + (*srt_len)[i] < (*srt_off)[i + 1]) {
|
||||
*hole = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* In some cases (see John Bent ROMIO REQ # 835), an odd interaction
|
||||
* between aggregation, nominally contiguous regions, and cb_buffer_size
|
||||
* should be handled with a read-modify-write (otherwise we will write out
|
||||
* more data than we receive from everyone else (inclusive), so override
|
||||
* hole detection
|
||||
*/
|
||||
if (*hole == 0) {
|
||||
sum_recv = 0;
|
||||
for (i = 0; i < nprocs; i++)
|
||||
sum_recv += recv_size[i];
|
||||
if (size > sum_recv)
|
||||
*hole = 1;
|
||||
}
|
||||
/* check the hint for data sieving */
|
||||
if (data_sieving == ADIOI_HINT_ENABLE && nprocs_recv && *hole) {
|
||||
ADIO_ReadContig(fd, write_buf, size, MPI_BYTE,
|
||||
ADIO_EXPLICIT_OFFSET, off, &status, &err);
|
||||
// --BEGIN ERROR HANDLING--
|
||||
if (err != MPI_SUCCESS) {
|
||||
*error_code = MPIO_Err_create_code(err,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
MPI_ERR_IO,
|
||||
"**ioRMWrdwr", 0);
|
||||
ADIOI_Free(recv_types);
|
||||
return;
|
||||
}
|
||||
// --END ERROR HANDLING--
|
||||
}
|
||||
|
||||
nprocs_send = 0;
|
||||
for (i = 0; i < nprocs; i++)
|
||||
if (send_size[i])
|
||||
nprocs_send++;
|
||||
|
||||
if (fd->atomicity) {
|
||||
/* bug fix from Wei-keng Liao and Kenin Coloma */
|
||||
requests = (MPI_Request *) ADIOI_Malloc((nprocs_send + 1) *
|
||||
sizeof(MPI_Request));
|
||||
send_req = requests;
|
||||
} else {
|
||||
requests = (MPI_Request *) ADIOI_Malloc((nprocs_send + nprocs_recv + 1)*
|
||||
sizeof(MPI_Request));
|
||||
/* +1 to avoid a 0-size malloc */
|
||||
|
||||
/* post receives */
|
||||
j = 0;
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
if (recv_size[i]) {
|
||||
MPI_Irecv(MPI_BOTTOM, 1, recv_types[j], i,
|
||||
myrank + i + 100 * iter, fd->comm, requests + j);
|
||||
j++;
|
||||
}
|
||||
}
|
||||
send_req = requests + nprocs_recv;
|
||||
}
|
||||
|
||||
/* post sends.
|
||||
* if buftype_is_contig, data can be directly sent from
|
||||
* user buf at location given by buf_idx. else use send_buf.
|
||||
*/
|
||||
if (buftype_is_contig) {
|
||||
j = 0;
|
||||
for (i = 0; i < nprocs; i++)
|
||||
if (send_size[i]) {
|
||||
ADIOI_Assert(buf_idx[i] != -1);
|
||||
MPI_Isend(((char *) buf) + buf_idx[i], send_size[i],
|
||||
MPI_BYTE, i, myrank + i + 100 * iter, fd->comm,
|
||||
send_req + j);
|
||||
j++;
|
||||
}
|
||||
} else
|
||||
if (nprocs_send) {
|
||||
/* buftype is not contig */
|
||||
send_buf = (char **) ADIOI_Malloc(nprocs * sizeof(char *));
|
||||
for (i = 0; i < nprocs; i++)
|
||||
if (send_size[i])
|
||||
send_buf[i] = (char *) ADIOI_Malloc(send_size[i]);
|
||||
|
||||
ADIOI_LUSTRE_Fill_send_buffer(fd, buf, flat_buf, send_buf, offset_list,
|
||||
len_list, send_size, send_req,
|
||||
sent_to_proc, nprocs, myrank,
|
||||
contig_access_count, striping_info,
|
||||
send_buf_idx, curr_to_proc, done_to_proc,
|
||||
iter, buftype_extent);
|
||||
/* the send is done in ADIOI_Fill_send_buffer */
|
||||
}
|
||||
|
||||
/* bug fix from Wei-keng Liao and Kenin Coloma */
|
||||
if (fd->atomicity) {
|
||||
j = 0;
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
MPI_Status wkl_status;
|
||||
if (recv_size[i]) {
|
||||
MPI_Recv(MPI_BOTTOM, 1, recv_types[j], i,
|
||||
myrank + i + 100 * iter, fd->comm, &wkl_status);
|
||||
j++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < nprocs_recv; i++)
|
||||
MPI_Type_free(recv_types + i);
|
||||
ADIOI_Free(recv_types);
|
||||
|
||||
/* bug fix from Wei-keng Liao and Kenin Coloma */
|
||||
/* +1 to avoid a 0-size malloc */
|
||||
if (fd->atomicity) {
|
||||
statuses = (MPI_Status *) ADIOI_Malloc((nprocs_send + 1) *
|
||||
sizeof(MPI_Status));
|
||||
} else {
|
||||
statuses = (MPI_Status *) ADIOI_Malloc((nprocs_send + nprocs_recv + 1) *
|
||||
sizeof(MPI_Status));
|
||||
}
|
||||
|
||||
#ifdef NEEDS_MPI_TEST
|
||||
i = 0;
|
||||
if (fd->atomicity) {
|
||||
/* bug fix from Wei-keng Liao and Kenin Coloma */
|
||||
while (!i)
|
||||
MPI_Testall(nprocs_send, send_req, &i, statuses);
|
||||
} else {
|
||||
while (!i)
|
||||
MPI_Testall(nprocs_send + nprocs_recv, requests, &i, statuses);
|
||||
}
|
||||
#else
|
||||
/* bug fix from Wei-keng Liao and Kenin Coloma */
|
||||
if (fd->atomicity)
|
||||
MPI_Waitall(nprocs_send, send_req, statuses);
|
||||
else
|
||||
MPI_Waitall(nprocs_send + nprocs_recv, requests, statuses);
|
||||
#endif
|
||||
ADIOI_Free(statuses);
|
||||
ADIOI_Free(requests);
|
||||
if (!buftype_is_contig && nprocs_send) {
|
||||
for (i = 0; i < nprocs; i++)
|
||||
if (send_size[i])
|
||||
ADIOI_Free(send_buf[i]);
|
||||
ADIOI_Free(send_buf);
|
||||
}
|
||||
}
|
||||
|
||||
#define ADIOI_BUF_INCR \
|
||||
{ \
|
||||
while (buf_incr) { \
|
||||
size_in_buf = ADIOI_MIN(buf_incr, flat_buf_sz); \
|
||||
user_buf_idx += size_in_buf; \
|
||||
flat_buf_sz -= size_in_buf; \
|
||||
if (!flat_buf_sz) { \
|
||||
if (flat_buf_idx < (flat_buf->count - 1)) flat_buf_idx++; \
|
||||
else { \
|
||||
flat_buf_idx = 0; \
|
||||
n_buftypes++; \
|
||||
} \
|
||||
user_buf_idx = flat_buf->indices[flat_buf_idx] + \
|
||||
(ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \
|
||||
flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \
|
||||
} \
|
||||
buf_incr -= size_in_buf; \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
#define ADIOI_BUF_COPY \
|
||||
{ \
|
||||
while (size) { \
|
||||
size_in_buf = ADIOI_MIN(size, flat_buf_sz); \
|
||||
ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + user_buf_idx) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + user_buf_idx)); \
|
||||
ADIOI_Assert(size_in_buf == (size_t)size_in_buf); \
|
||||
memcpy(&(send_buf[p][send_buf_idx[p]]), \
|
||||
((char *) buf) + user_buf_idx, size_in_buf); \
|
||||
send_buf_idx[p] += size_in_buf; \
|
||||
user_buf_idx += size_in_buf; \
|
||||
flat_buf_sz -= size_in_buf; \
|
||||
if (!flat_buf_sz) { \
|
||||
if (flat_buf_idx < (flat_buf->count - 1)) flat_buf_idx++; \
|
||||
else { \
|
||||
flat_buf_idx = 0; \
|
||||
n_buftypes++; \
|
||||
} \
|
||||
user_buf_idx = flat_buf->indices[flat_buf_idx] + \
|
||||
(ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \
|
||||
flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \
|
||||
} \
|
||||
size -= size_in_buf; \
|
||||
buf_incr -= size_in_buf; \
|
||||
} \
|
||||
ADIOI_BUF_INCR \
|
||||
}
|
||||
|
||||
static void ADIOI_LUSTRE_Fill_send_buffer(ADIO_File fd, const void *buf,
|
||||
ADIOI_Flatlist_node *flat_buf,
|
||||
char **send_buf,
|
||||
ADIO_Offset *offset_list,
|
||||
ADIO_Offset *len_list, int *send_size,
|
||||
MPI_Request *requests,
|
||||
int *sent_to_proc, int nprocs,
|
||||
int myrank,
|
||||
int contig_access_count,
|
||||
int *striping_info,
|
||||
int *send_buf_idx,
|
||||
int *curr_to_proc,
|
||||
int *done_to_proc, int iter,
|
||||
MPI_Aint buftype_extent)
|
||||
{
|
||||
/* this function is only called if buftype is not contig */
|
||||
int i, p, flat_buf_idx, size;
|
||||
int flat_buf_sz, buf_incr, size_in_buf, jj, n_buftypes;
|
||||
ADIO_Offset off, len, rem_len, user_buf_idx;
|
||||
|
||||
/* curr_to_proc[p] = amount of data sent to proc. p that has already
|
||||
* been accounted for so far
|
||||
* done_to_proc[p] = amount of data already sent to proc. p in
|
||||
* previous iterations
|
||||
* user_buf_idx = current location in user buffer
|
||||
* send_buf_idx[p] = current location in send_buf of proc. p
|
||||
*/
|
||||
|
||||
for (i = 0; i < nprocs; i++) {
|
||||
send_buf_idx[i] = curr_to_proc[i] = 0;
|
||||
done_to_proc[i] = sent_to_proc[i];
|
||||
}
|
||||
jj = 0;
|
||||
|
||||
user_buf_idx = flat_buf->indices[0];
|
||||
flat_buf_idx = 0;
|
||||
n_buftypes = 0;
|
||||
flat_buf_sz = flat_buf->blocklens[0];
|
||||
|
||||
/* flat_buf_idx = current index into flattened buftype
|
||||
* flat_buf_sz = size of current contiguous component in flattened buf
|
||||
*/
|
||||
for (i = 0; i < contig_access_count; i++) {
|
||||
off = offset_list[i];
|
||||
rem_len = (ADIO_Offset) len_list[i];
|
||||
|
||||
/*this request may span to more than one process */
|
||||
while (rem_len != 0) {
|
||||
len = rem_len;
|
||||
/* NOTE: len value is modified by ADIOI_Calc_aggregator() to be no
|
||||
* longer than the single region that processor "p" is responsible
|
||||
* for.
|
||||
*/
|
||||
p = ADIOI_LUSTRE_Calc_aggregator(fd, off, &len, striping_info);
|
||||
|
||||
if (send_buf_idx[p] < send_size[p]) {
|
||||
if (curr_to_proc[p] + len > done_to_proc[p]) {
|
||||
if (done_to_proc[p] > curr_to_proc[p]) {
|
||||
size = (int) ADIOI_MIN(curr_to_proc[p] + len -
|
||||
done_to_proc[p],
|
||||
send_size[p] -
|
||||
send_buf_idx[p]);
|
||||
buf_incr = done_to_proc[p] - curr_to_proc[p];
|
||||
ADIOI_BUF_INCR
|
||||
ADIOI_Assert((curr_to_proc[p] + len - done_to_proc[p]) == (unsigned)(curr_to_proc[p] + len - done_to_proc[p]));
|
||||
buf_incr = (int) (curr_to_proc[p] + len -
|
||||
done_to_proc[p]);
|
||||
ADIOI_Assert((done_to_proc[p] + size) == (unsigned)(done_to_proc[p] + size));
|
||||
curr_to_proc[p] = done_to_proc[p] + size;
|
||||
ADIOI_BUF_COPY
|
||||
} else {
|
||||
size = (int) ADIOI_MIN(len, send_size[p] -
|
||||
send_buf_idx[p]);
|
||||
buf_incr = (int) len;
|
||||
ADIOI_Assert((curr_to_proc[p] + size) == (unsigned)((ADIO_Offset)curr_to_proc[p] + size));
|
||||
curr_to_proc[p] += size;
|
||||
ADIOI_BUF_COPY
|
||||
}
|
||||
if (send_buf_idx[p] == send_size[p]) {
|
||||
MPI_Isend(send_buf[p], send_size[p], MPI_BYTE, p,
|
||||
myrank + p + 100 * iter, fd->comm,
|
||||
requests + jj);
|
||||
jj++;
|
||||
}
|
||||
} else {
|
||||
ADIOI_Assert((curr_to_proc[p] + len) == (unsigned)((ADIO_Offset)curr_to_proc[p] + len));
|
||||
curr_to_proc[p] += (int) len;
|
||||
buf_incr = (int) len;
|
||||
ADIOI_BUF_INCR
|
||||
}
|
||||
} else {
|
||||
buf_incr = (int) len;
|
||||
ADIOI_BUF_INCR
|
||||
}
|
||||
off += len;
|
||||
rem_len -= len;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < nprocs; i++)
|
||||
if (send_size[i])
|
||||
sent_to_proc[i] = curr_to_proc[i];
|
||||
}
|
533
ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_wrstr.c
Обычный файл
533
ompi/mca/io/romio314/romio/adio/ad_lustre/ad_lustre_wrstr.c
Обычный файл
@ -0,0 +1,533 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*
|
||||
* Copyright (C) 2007 Oak Ridge National Laboratory
|
||||
*
|
||||
* Copyright (C) 2008 Sun Microsystems, Lustre group
|
||||
*/
|
||||
|
||||
#include "ad_lustre.h"
|
||||
#include "adio_extern.h"
|
||||
|
||||
#define ADIOI_BUFFERED_WRITE \
|
||||
{ \
|
||||
if (req_off >= writebuf_off + writebuf_len) { \
|
||||
if (writebuf_len) { \
|
||||
ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \
|
||||
ADIO_EXPLICIT_OFFSET, writebuf_off, \
|
||||
&status1, error_code); \
|
||||
if (!(fd->atomicity)) \
|
||||
ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
if (*error_code != MPI_SUCCESS) { \
|
||||
*error_code = MPIO_Err_create_code(*error_code, \
|
||||
MPIR_ERR_RECOVERABLE, \
|
||||
myname, \
|
||||
__LINE__, MPI_ERR_IO, \
|
||||
"**iowswc", 0); \
|
||||
ADIOI_Free(writebuf); \
|
||||
return; \
|
||||
} \
|
||||
} \
|
||||
writebuf_off = req_off; \
|
||||
/* stripe_size alignment */ \
|
||||
writebuf_len = (unsigned) ADIOI_MIN(end_offset - writebuf_off + 1, \
|
||||
(writebuf_off / stripe_size + 1) * \
|
||||
stripe_size - writebuf_off); \
|
||||
if (!(fd->atomicity)) \
|
||||
ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
ADIO_ReadContig(fd, writebuf, writebuf_len, MPI_BYTE, \
|
||||
ADIO_EXPLICIT_OFFSET, \
|
||||
writebuf_off, &status1, error_code); \
|
||||
if (*error_code != MPI_SUCCESS) { \
|
||||
*error_code = MPIO_Err_create_code(*error_code, \
|
||||
MPIR_ERR_RECOVERABLE, \
|
||||
myname, \
|
||||
__LINE__, MPI_ERR_IO, \
|
||||
"**iowsrc", 0); \
|
||||
ADIOI_Free(writebuf); \
|
||||
return; \
|
||||
} \
|
||||
} \
|
||||
write_sz = (unsigned) (ADIOI_MIN(req_len, \
|
||||
writebuf_off + writebuf_len - req_off)); \
|
||||
ADIOI_Assert((ADIO_Offset)write_sz == \
|
||||
ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
|
||||
memcpy(writebuf + req_off - writebuf_off, (char *)buf +userbuf_off, write_sz); \
|
||||
while (write_sz != req_len) { \
|
||||
ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \
|
||||
ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); \
|
||||
if (!(fd->atomicity)) \
|
||||
ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
if (*error_code != MPI_SUCCESS) { \
|
||||
*error_code = MPIO_Err_create_code(*error_code, \
|
||||
MPIR_ERR_RECOVERABLE, myname, \
|
||||
__LINE__, MPI_ERR_IO, \
|
||||
"**iowswc", 0); \
|
||||
ADIOI_Free(writebuf); \
|
||||
return; \
|
||||
} \
|
||||
req_len -= write_sz; \
|
||||
userbuf_off += write_sz; \
|
||||
writebuf_off += writebuf_len; \
|
||||
/* stripe_size alignment */ \
|
||||
writebuf_len = (unsigned) ADIOI_MIN(end_offset - writebuf_off + 1, \
|
||||
(writebuf_off / stripe_size + 1) * \
|
||||
stripe_size - writebuf_off); \
|
||||
if (!(fd->atomicity)) \
|
||||
ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
ADIO_ReadContig(fd, writebuf, writebuf_len, MPI_BYTE, \
|
||||
ADIO_EXPLICIT_OFFSET, \
|
||||
writebuf_off, &status1, error_code); \
|
||||
if (*error_code != MPI_SUCCESS) { \
|
||||
*error_code = MPIO_Err_create_code(*error_code, \
|
||||
MPIR_ERR_RECOVERABLE, myname, \
|
||||
__LINE__, MPI_ERR_IO, \
|
||||
"**iowsrc", 0); \
|
||||
ADIOI_Free(writebuf); \
|
||||
return; \
|
||||
} \
|
||||
write_sz = ADIOI_MIN(req_len, writebuf_len); \
|
||||
memcpy(writebuf, (char *)buf + userbuf_off, write_sz); \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
/* this macro is used when filetype is contig and buftype is not contig.
|
||||
it does not do a read-modify-write and does not lock*/
|
||||
#define ADIOI_BUFFERED_WRITE_WITHOUT_READ \
|
||||
{ \
|
||||
if (req_off >= writebuf_off + writebuf_len) { \
|
||||
ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \
|
||||
ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, \
|
||||
error_code); \
|
||||
if (*error_code != MPI_SUCCESS) { \
|
||||
*error_code = MPIO_Err_create_code(*error_code, \
|
||||
MPIR_ERR_RECOVERABLE, \
|
||||
myname, \
|
||||
__LINE__, MPI_ERR_IO, \
|
||||
"**iowswc", 0); \
|
||||
ADIOI_Free(writebuf); \
|
||||
return; \
|
||||
} \
|
||||
writebuf_off = req_off; \
|
||||
/* stripe_size alignment */ \
|
||||
writebuf_len = (unsigned) ADIOI_MIN(end_offset - writebuf_off + 1, \
|
||||
(writebuf_off / stripe_size + 1) * \
|
||||
stripe_size - writebuf_off); \
|
||||
} \
|
||||
write_sz = (unsigned) ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off); \
|
||||
ADIOI_Assert((ADIO_Offset)write_sz == ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
|
||||
memcpy(writebuf + req_off - writebuf_off, \
|
||||
(char *)buf + userbuf_off, write_sz); \
|
||||
while (write_sz != req_len) { \
|
||||
ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \
|
||||
ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); \
|
||||
if (*error_code != MPI_SUCCESS) { \
|
||||
*error_code = MPIO_Err_create_code(*error_code, \
|
||||
MPIR_ERR_RECOVERABLE, myname, \
|
||||
__LINE__, MPI_ERR_IO, \
|
||||
"**iowswc", 0); \
|
||||
ADIOI_Free(writebuf); \
|
||||
return; \
|
||||
} \
|
||||
req_len -= write_sz; \
|
||||
userbuf_off += write_sz; \
|
||||
writebuf_off += writebuf_len; \
|
||||
/* stripe_size alignment */ \
|
||||
writebuf_len = (unsigned) ADIOI_MIN(end_offset - writebuf_off + 1, \
|
||||
(writebuf_off / stripe_size + 1) * \
|
||||
stripe_size - writebuf_off); \
|
||||
write_sz = ADIOI_MIN(req_len, writebuf_len); \
|
||||
memcpy(writebuf, (char *)buf + userbuf_off, write_sz); \
|
||||
} \
|
||||
}
|
||||
|
||||
void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, const void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status * status,
|
||||
int *error_code)
|
||||
{
|
||||
/* offset is in units of etype relative to the filetype. */
|
||||
ADIOI_Flatlist_node *flat_buf, *flat_file;
|
||||
ADIO_Offset i_offset, sum, size_in_filetype;
|
||||
int i, j, k, st_index=0;
|
||||
int n_etypes_in_filetype;
|
||||
ADIO_Offset num, size, n_filetypes, etype_in_filetype, st_n_filetypes;
|
||||
ADIO_Offset abs_off_in_filetype=0;
|
||||
MPI_Count filetype_size, etype_size, buftype_size;
|
||||
MPI_Aint filetype_extent, buftype_extent;
|
||||
int buf_count, buftype_is_contig, filetype_is_contig;
|
||||
ADIO_Offset userbuf_off;
|
||||
ADIO_Offset off, req_off, disp, end_offset=0, writebuf_off, start_off;
|
||||
char *writebuf;
|
||||
unsigned bufsize, writebuf_len, write_sz;
|
||||
ADIO_Status status1;
|
||||
ADIO_Offset new_bwr_size, new_fwr_size, st_fwr_size, fwr_size=0, bwr_size, req_len;
|
||||
int stripe_size;
|
||||
static char myname[] = "ADIOI_LUSTRE_WriteStrided";
|
||||
|
||||
if (fd->hints->ds_write == ADIOI_HINT_DISABLE) {
|
||||
/* if user has disabled data sieving on writes, use naive
|
||||
* approach instead.
|
||||
*/
|
||||
ADIOI_GEN_WriteStrided_naive(fd,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
file_ptr_type,
|
||||
offset, status, error_code);
|
||||
return;
|
||||
}
|
||||
|
||||
*error_code = MPI_SUCCESS; /* changed below if error */
|
||||
|
||||
ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
|
||||
ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
|
||||
|
||||
MPI_Type_size_x(fd->filetype, &filetype_size);
|
||||
if (!filetype_size) {
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, 0);
|
||||
#endif
|
||||
*error_code = MPI_SUCCESS;
|
||||
return;
|
||||
}
|
||||
|
||||
MPI_Type_extent(fd->filetype, &filetype_extent);
|
||||
MPI_Type_size_x(datatype, &buftype_size);
|
||||
MPI_Type_extent(datatype, &buftype_extent);
|
||||
etype_size = fd->etype_size;
|
||||
|
||||
ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count));
|
||||
bufsize = buftype_size * count;
|
||||
|
||||
/* get striping info */
|
||||
stripe_size = fd->hints->striping_unit;
|
||||
|
||||
/* Different buftype to different filetype */
|
||||
if (!buftype_is_contig && filetype_is_contig) {
|
||||
/* noncontiguous in memory, contiguous in file. */
|
||||
ADIOI_Flatten_datatype(datatype);
|
||||
flat_buf = ADIOI_Flatlist;
|
||||
while (flat_buf->type != datatype)
|
||||
flat_buf = flat_buf->next;
|
||||
|
||||
off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
|
||||
fd->disp + (ADIO_Offset)etype_size * offset;
|
||||
|
||||
start_off = off;
|
||||
end_offset = start_off + bufsize - 1;
|
||||
/* write stripe size buffer each time */
|
||||
writebuf = (char *) ADIOI_Malloc(ADIOI_MIN(bufsize, stripe_size));
|
||||
writebuf_off = 0;
|
||||
writebuf_len = 0;
|
||||
|
||||
/* if atomicity is true, lock the region to be accessed */
|
||||
if (fd->atomicity)
|
||||
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, bufsize);
|
||||
|
||||
for (j = 0; j < count; j++) {
|
||||
for (i = 0; i < flat_buf->count; i++) {
|
||||
userbuf_off = (ADIO_Offset)j * (ADIO_Offset)buftype_extent +
|
||||
flat_buf->indices[i];
|
||||
req_off = off;
|
||||
req_len = flat_buf->blocklens[i];
|
||||
ADIOI_BUFFERED_WRITE_WITHOUT_READ
|
||||
off += flat_buf->blocklens[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* write the buffer out finally */
|
||||
ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE,
|
||||
ADIO_EXPLICIT_OFFSET, writebuf_off, &status1,
|
||||
error_code);
|
||||
|
||||
if (fd->atomicity)
|
||||
ADIOI_UNLOCK(fd, start_off, SEEK_SET, bufsize);
|
||||
if (*error_code != MPI_SUCCESS) {
|
||||
ADIOI_Free(writebuf);
|
||||
return;
|
||||
}
|
||||
ADIOI_Free(writebuf);
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL)
|
||||
fd->fp_ind = off;
|
||||
} else {
|
||||
/* noncontiguous in file */
|
||||
/* filetype already flattened in ADIO_Open */
|
||||
flat_file = ADIOI_Flatlist;
|
||||
while (flat_file->type != fd->filetype)
|
||||
flat_file = flat_file->next;
|
||||
disp = fd->disp;
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
/* Wei-keng reworked type processing to be a bit more efficient */
|
||||
offset = fd->fp_ind - disp;
|
||||
n_filetypes = (offset - flat_file->indices[0]) / filetype_extent;
|
||||
offset -= (ADIO_Offset)n_filetypes * filetype_extent;
|
||||
/* now offset is local to this extent */
|
||||
|
||||
/* find the block where offset is located, skip blocklens[i]==0 */
|
||||
for (i=0; i<flat_file->count; i++) {
|
||||
ADIO_Offset dist;
|
||||
if (flat_file->blocklens[i] == 0) continue;
|
||||
dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
|
||||
/* fwr_size is from offset to the end of block i */
|
||||
if (dist == 0) {
|
||||
i++;
|
||||
offset = flat_file->indices[i];
|
||||
fwr_size = flat_file->blocklens[i];
|
||||
break;
|
||||
}
|
||||
if (dist > 0) {
|
||||
fwr_size = dist;
|
||||
break;
|
||||
}
|
||||
}
|
||||
st_index = i; /* starting index in flat_file->indices[] */
|
||||
offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
|
||||
}
|
||||
else {
|
||||
n_etypes_in_filetype = filetype_size/etype_size;
|
||||
n_filetypes = offset / n_etypes_in_filetype;
|
||||
etype_in_filetype = offset % n_etypes_in_filetype;
|
||||
size_in_filetype = etype_in_filetype * etype_size;
|
||||
|
||||
sum = 0;
|
||||
for (i = 0; i < flat_file->count; i++) {
|
||||
sum += flat_file->blocklens[i];
|
||||
if (sum > size_in_filetype) {
|
||||
st_index = i;
|
||||
fwr_size = sum - size_in_filetype;
|
||||
abs_off_in_filetype = flat_file->indices[i] +
|
||||
size_in_filetype - (sum - flat_file->blocklens[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* abs. offset in bytes in the file */
|
||||
offset = disp + (ADIO_Offset) n_filetypes *filetype_extent +
|
||||
abs_off_in_filetype;
|
||||
}
|
||||
|
||||
start_off = offset;
|
||||
|
||||
/* Wei-keng Liao:write request is within single flat_file
|
||||
* contig block*/
|
||||
/* this could happen, for example, with subarray types that are
|
||||
* actually fairly contiguous */
|
||||
if (buftype_is_contig && bufsize <= fwr_size) {
|
||||
req_off = start_off;
|
||||
req_len = bufsize;
|
||||
end_offset = start_off + bufsize - 1;
|
||||
writebuf = (char *) ADIOI_Malloc(ADIOI_MIN(bufsize, stripe_size));
|
||||
memset(writebuf, -1, ADIOI_MIN(bufsize, stripe_size));
|
||||
writebuf_off = 0;
|
||||
writebuf_len = 0;
|
||||
userbuf_off = 0;
|
||||
ADIOI_BUFFERED_WRITE_WITHOUT_READ
|
||||
/* write the buffer out finally */
|
||||
ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE,
|
||||
ADIO_EXPLICIT_OFFSET, writebuf_off, &status1,
|
||||
error_code);
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
/* update MPI-IO file pointer to point to the first byte
|
||||
* that can be accessed in the fileview. */
|
||||
fd->fp_ind = offset + bufsize;
|
||||
if (bufsize == fwr_size) {
|
||||
do {
|
||||
st_index++;
|
||||
if (st_index == flat_file->count) {
|
||||
st_index = 0;
|
||||
n_filetypes++;
|
||||
}
|
||||
} while (flat_file->blocklens[st_index] == 0);
|
||||
fd->fp_ind = disp + flat_file->indices[st_index]
|
||||
+ (ADIO_Offset)n_filetypes*filetype_extent;
|
||||
}
|
||||
}
|
||||
fd->fp_sys_posn = -1; /* set it to null. */
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, bufsize);
|
||||
#endif
|
||||
ADIOI_Free(writebuf);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Calculate end_offset, the last byte-offset that will be accessed.
|
||||
e.g., if start_offset=0 and 100 bytes to be write, end_offset=99*/
|
||||
|
||||
st_fwr_size = fwr_size;
|
||||
st_n_filetypes = n_filetypes;
|
||||
i_offset = 0;
|
||||
j = st_index;
|
||||
off = offset;
|
||||
fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
|
||||
while (i_offset < bufsize) {
|
||||
i_offset += fwr_size;
|
||||
end_offset = off + fwr_size - 1;
|
||||
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
while (flat_file->blocklens[j]==0) {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
}
|
||||
|
||||
off = disp + flat_file->indices[j] +
|
||||
n_filetypes*(ADIO_Offset)filetype_extent;
|
||||
fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
|
||||
}
|
||||
|
||||
/* if atomicity is true, lock the region to be accessed */
|
||||
if (fd->atomicity)
|
||||
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
|
||||
|
||||
writebuf_off = 0;
|
||||
writebuf_len = 0;
|
||||
writebuf = (char *) ADIOI_Malloc(stripe_size);
|
||||
memset(writebuf, -1, stripe_size);
|
||||
|
||||
if (buftype_is_contig && !filetype_is_contig) {
|
||||
|
||||
/* contiguous in memory, noncontiguous in file. should be the most
|
||||
common case. */
|
||||
|
||||
i_offset = 0;
|
||||
j = st_index;
|
||||
off = offset;
|
||||
n_filetypes = st_n_filetypes;
|
||||
fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
|
||||
while (i_offset < bufsize) {
|
||||
if (fwr_size) {
|
||||
/* TYPE_UB and TYPE_LB can result in
|
||||
fwr_size = 0. save system call in such cases */
|
||||
/* lseek(fd->fd_sys, off, SEEK_SET);
|
||||
err = write(fd->fd_sys, ((char *) buf) + i_offset, fwr_size);*/
|
||||
|
||||
req_off = off;
|
||||
req_len = fwr_size;
|
||||
userbuf_off = i_offset;
|
||||
ADIOI_BUFFERED_WRITE
|
||||
}
|
||||
i_offset += fwr_size;
|
||||
|
||||
if (off + fwr_size < disp + flat_file->indices[j] +
|
||||
flat_file->blocklens[j] +
|
||||
n_filetypes*(ADIO_Offset)filetype_extent)
|
||||
off += fwr_size;
|
||||
/* did not reach end of contiguous block in filetype.
|
||||
no more I/O needed. off is incremented by fwr_size. */
|
||||
else {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
while (flat_file->blocklens[j]==0) {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
}
|
||||
off = disp + flat_file->indices[j] +
|
||||
n_filetypes*(ADIO_Offset)filetype_extent;
|
||||
fwr_size = ADIOI_MIN(flat_file->blocklens[j],
|
||||
bufsize-i_offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* noncontiguous in memory as well as in file */
|
||||
|
||||
ADIOI_Flatten_datatype(datatype);
|
||||
flat_buf = ADIOI_Flatlist;
|
||||
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
|
||||
|
||||
k = num = buf_count = 0;
|
||||
i_offset = flat_buf->indices[0];
|
||||
j = st_index;
|
||||
off = offset;
|
||||
n_filetypes = st_n_filetypes;
|
||||
fwr_size = st_fwr_size;
|
||||
bwr_size = flat_buf->blocklens[0];
|
||||
|
||||
while (num < bufsize) {
|
||||
size = ADIOI_MIN(fwr_size, bwr_size);
|
||||
if (size) {
|
||||
/* lseek(fd->fd_sys, off, SEEK_SET);
|
||||
err = write(fd->fd_sys, ((char *) buf) + i_offset, size); */
|
||||
|
||||
req_off = off;
|
||||
req_len = size;
|
||||
userbuf_off = i_offset;
|
||||
ADIOI_BUFFERED_WRITE
|
||||
}
|
||||
|
||||
new_fwr_size = fwr_size;
|
||||
new_bwr_size = bwr_size;
|
||||
|
||||
if (size == fwr_size) {
|
||||
/* reached end of contiguous block in file */
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
while (flat_file->blocklens[j]==0) {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
}
|
||||
|
||||
off = disp + flat_file->indices[j] +
|
||||
n_filetypes*(ADIO_Offset)filetype_extent;
|
||||
|
||||
new_fwr_size = flat_file->blocklens[j];
|
||||
if (size != bwr_size) {
|
||||
i_offset += size;
|
||||
new_bwr_size -= size;
|
||||
}
|
||||
}
|
||||
|
||||
if (size == bwr_size) {
|
||||
/* reached end of contiguous block in memory */
|
||||
|
||||
k = (k + 1)%flat_buf->count;
|
||||
buf_count++;
|
||||
i_offset = (ADIO_Offset)buftype_extent *
|
||||
(ADIO_Offset)(buf_count/flat_buf->count) +
|
||||
flat_buf->indices[k];
|
||||
new_bwr_size = flat_buf->blocklens[k];
|
||||
if (size != fwr_size) {
|
||||
off += size;
|
||||
new_fwr_size -= size;
|
||||
}
|
||||
}
|
||||
num += size;
|
||||
fwr_size = new_fwr_size;
|
||||
bwr_size = new_bwr_size;
|
||||
}
|
||||
}
|
||||
|
||||
/* write the buffer out finally */
|
||||
if (writebuf_len) {
|
||||
ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE,
|
||||
ADIO_EXPLICIT_OFFSET,
|
||||
writebuf_off, &status1, error_code);
|
||||
if (!(fd->atomicity))
|
||||
ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
|
||||
if (*error_code != MPI_SUCCESS) return;
|
||||
}
|
||||
if (fd->atomicity)
|
||||
ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
|
||||
|
||||
ADIOI_Free(writebuf);
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
|
||||
}
|
||||
|
||||
fd->fp_sys_posn = -1; /* set it to null. */
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, bufsize);
|
||||
/* This is a temporary way of filling in status. The right way is to
|
||||
keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
|
||||
#endif
|
||||
|
||||
if (!buftype_is_contig)
|
||||
ADIOI_Delete_flattened(datatype);
|
||||
}
|
28
ompi/mca/io/romio314/romio/adio/ad_nfs/Makefile.mk
Обычный файл
28
ompi/mca/io/romio314/romio/adio/ad_nfs/Makefile.mk
Обычный файл
@ -0,0 +1,28 @@
|
||||
## -*- Mode: Makefile; -*-
|
||||
## vim: set ft=automake :
|
||||
##
|
||||
## (C) 2011 by Argonne National Laboratory.
|
||||
## See COPYRIGHT in top-level directory.
|
||||
##
|
||||
|
||||
if BUILD_AD_NFS
|
||||
|
||||
noinst_HEADERS += adio/ad_nfs/ad_nfs.h
|
||||
|
||||
romio_other_sources += \
|
||||
adio/ad_nfs/ad_nfs_read.c \
|
||||
adio/ad_nfs/ad_nfs_open.c \
|
||||
adio/ad_nfs/ad_nfs_write.c \
|
||||
adio/ad_nfs/ad_nfs_done.c \
|
||||
adio/ad_nfs/ad_nfs_fcntl.c \
|
||||
adio/ad_nfs/ad_nfs_iread.c \
|
||||
adio/ad_nfs/ad_nfs_iwrite.c \
|
||||
adio/ad_nfs/ad_nfs_wait.c \
|
||||
adio/ad_nfs/ad_nfs_setsh.c \
|
||||
adio/ad_nfs/ad_nfs_getsh.c \
|
||||
adio/ad_nfs/ad_nfs.c \
|
||||
adio/ad_nfs/ad_nfs_resize.c \
|
||||
adio/ad_nfs/ad_nfs_features.c
|
||||
|
||||
endif BUILD_AD_NFS
|
||||
|
41
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs.c
Обычный файл
41
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs.c
Обычный файл
@ -0,0 +1,41 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2001 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_nfs.h"
|
||||
|
||||
/* adioi.h has the ADIOI_Fns_struct define */
|
||||
#include "adioi.h"
|
||||
|
||||
struct ADIOI_Fns_struct ADIO_NFS_operations = {
|
||||
ADIOI_NFS_Open, /* Open */
|
||||
ADIOI_FAILSAFE_OpenColl, /* OpenColl */
|
||||
ADIOI_NFS_ReadContig, /* ReadContig */
|
||||
ADIOI_NFS_WriteContig, /* WriteContig */
|
||||
ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
|
||||
ADIOI_GEN_WriteStridedColl, /* WriteStridedColl */
|
||||
ADIOI_GEN_SeekIndividual, /* SeekIndividual */
|
||||
ADIOI_NFS_Fcntl, /* Fcntl */
|
||||
ADIOI_GEN_SetInfo, /* SetInfo */
|
||||
ADIOI_NFS_ReadStrided, /* ReadStrided */
|
||||
ADIOI_NFS_WriteStrided, /* WriteStrided */
|
||||
ADIOI_GEN_Close, /* Close */
|
||||
/* Even with lockd running and NFS mounted 'noac', we have been unable to
|
||||
* gaurantee correct behavior over NFS with asyncronous I/O operations */
|
||||
ADIOI_FAKE_IreadContig, /* IreadContig */
|
||||
ADIOI_FAKE_IwriteContig, /* IwriteContig */
|
||||
ADIOI_NFS_ReadDone, /* ReadDone */
|
||||
ADIOI_NFS_WriteDone, /* WriteDone */
|
||||
ADIOI_NFS_ReadComplete, /* ReadComplete */
|
||||
ADIOI_NFS_WriteComplete, /* WriteComplete */
|
||||
ADIOI_GEN_IreadStrided, /* IreadStrided */
|
||||
ADIOI_GEN_IwriteStrided, /* IwriteStrided */
|
||||
ADIOI_GEN_Flush, /* Flush */
|
||||
ADIOI_NFS_Resize, /* Resize */
|
||||
ADIOI_GEN_Delete, /* Delete */
|
||||
ADIOI_NFS_Feature, /* Features */
|
||||
"NFS:" /* fsname: just a string */
|
||||
};
|
83
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs.h
Обычный файл
83
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs.h
Обычный файл
@ -0,0 +1,83 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#ifndef AD_NFS_INCLUDE
|
||||
#define AD_NFS_INCLUDE
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <fcntl.h>
|
||||
#include "adio.h"
|
||||
|
||||
#ifdef HAVE_SIGNAL_H
|
||||
#include <signal.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#ifdef HAVE_AIO_H
|
||||
#include <aio.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_AIO_H
|
||||
#include <sys/aio.h>
|
||||
#endif
|
||||
|
||||
/* Workaround for incomplete set of definitions if __REDIRECT is not
|
||||
defined and large file support is used in aio.h */
|
||||
#if !defined(__REDIRECT) && defined(__USE_FILE_OFFSET64)
|
||||
#define aiocb aiocb64
|
||||
#endif
|
||||
|
||||
int ADIOI_NFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
|
||||
int wr, MPI_Request *request);
|
||||
|
||||
#ifdef SX4
|
||||
#define lseek llseek
|
||||
#endif
|
||||
|
||||
void ADIOI_NFS_Open(ADIO_File fd, int *error_code);
|
||||
void ADIOI_NFS_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code);
|
||||
void ADIOI_NFS_WriteContig(ADIO_File fd, const void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code);
|
||||
void ADIOI_NFS_IwriteContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Request *request, int
|
||||
*error_code);
|
||||
void ADIOI_NFS_IreadContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Request *request, int
|
||||
*error_code);
|
||||
int ADIOI_NFS_ReadDone(ADIO_Request *request, ADIO_Status *status, int
|
||||
*error_code);
|
||||
int ADIOI_NFS_WriteDone(ADIO_Request *request, ADIO_Status *status, int
|
||||
*error_code);
|
||||
void ADIOI_NFS_ReadComplete(ADIO_Request *request, ADIO_Status *status, int
|
||||
*error_code);
|
||||
void ADIOI_NFS_WriteComplete(ADIO_Request *request, ADIO_Status *status,
|
||||
int *error_code);
|
||||
void ADIOI_NFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int
|
||||
*error_code);
|
||||
void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code);
|
||||
void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code);
|
||||
void ADIOI_NFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code);
|
||||
void ADIOI_NFS_Get_shared_fp(ADIO_File fd, ADIO_Offset size, ADIO_Offset *shared_fp,
|
||||
int *error_code);
|
||||
void ADIOI_NFS_Set_shared_fp(ADIO_File fd, ADIO_Offset offset, int *error_code);
|
||||
void ADIOI_NFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code);
|
||||
int ADIOI_NFS_Feature(ADIO_File fd, int feature_flag);
|
||||
|
||||
#endif
|
19
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_done.c
Обычный файл
19
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_done.c
Обычный файл
@ -0,0 +1,19 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_nfs.h"
|
||||
|
||||
int ADIOI_NFS_ReadDone(ADIO_Request *request, ADIO_Status *status,
|
||||
int *error_code)
|
||||
{
|
||||
*error_code = MPI_SUCCESS;
|
||||
return 1;
|
||||
}
|
||||
int ADIOI_NFS_WriteDone(ADIO_Request *request, ADIO_Status *status,
|
||||
int *error_code)
|
||||
{
|
||||
return ADIOI_NFS_ReadDone(request, status, error_code);
|
||||
}
|
65
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_fcntl.c
Обычный файл
65
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_fcntl.c
Обычный файл
@ -0,0 +1,65 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_nfs.h"
|
||||
#include "adio_extern.h"
|
||||
/* #ifdef MPISGI
|
||||
#include "mpisgi2.h"
|
||||
#endif */
|
||||
|
||||
void ADIOI_NFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code)
|
||||
{
|
||||
static char myname[] = "ADIOI_NFS_FCNTL";
|
||||
|
||||
switch(flag) {
|
||||
case ADIO_FCNTL_GET_FSIZE:
|
||||
ADIOI_READ_LOCK(fd, 0, SEEK_SET, 1);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
|
||||
#endif
|
||||
fcntl_struct->fsize = lseek(fd->fd_sys, 0, SEEK_END);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
|
||||
#endif
|
||||
ADIOI_UNLOCK(fd, 0, SEEK_SET, 1);
|
||||
if (fd->fp_sys_posn != -1) {
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
|
||||
#endif
|
||||
lseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
|
||||
#endif
|
||||
}
|
||||
if (fcntl_struct->fsize == -1) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE, myname,
|
||||
__LINE__, MPI_ERR_IO, "**io",
|
||||
"**io %s", strerror(errno));
|
||||
}
|
||||
else *error_code = MPI_SUCCESS;
|
||||
break;
|
||||
|
||||
case ADIO_FCNTL_SET_DISKSPACE:
|
||||
ADIOI_GEN_Prealloc(fd, fcntl_struct->diskspace, error_code);
|
||||
break;
|
||||
|
||||
case ADIO_FCNTL_SET_ATOMICITY:
|
||||
fd->atomicity = (fcntl_struct->atomicity == 0) ? 0 : 1;
|
||||
*error_code = MPI_SUCCESS;
|
||||
break;
|
||||
|
||||
default:
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
MPI_ERR_ARG,
|
||||
"**flag", "**flag %d", flag);
|
||||
return;
|
||||
/* --END ERROR HANDLING-- */
|
||||
}
|
||||
}
|
24
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_features.c
Обычный файл
24
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_features.c
Обычный файл
@ -0,0 +1,24 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* (C) 2008 by Argonne National Laboratory.
|
||||
* See COPYRIGHT in top-level directory.
|
||||
*/
|
||||
#include "adio.h"
|
||||
#include "ad_nfs.h"
|
||||
|
||||
int ADIOI_NFS_Feature(ADIO_File fd, int flag)
|
||||
{
|
||||
switch(flag) {
|
||||
case ADIO_SHARED_FP:
|
||||
case ADIO_LOCKS:
|
||||
case ADIO_SEQUENTIAL:
|
||||
case ADIO_DATA_SIEVING_WRITES:
|
||||
return 1;
|
||||
case ADIO_SCALABLE_OPEN:
|
||||
case ADIO_UNLINK_AFTER_CLOSE:
|
||||
case ADIO_SCALABLE_RESIZE:
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
105
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_getsh.c
Обычный файл
105
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_getsh.c
Обычный файл
@ -0,0 +1,105 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_nfs.h"
|
||||
|
||||
/* returns the current location of the shared_fp in terms of the
|
||||
no. of etypes relative to the current view, and also increments the
|
||||
shared_fp by the number of etypes to be accessed (incr) in the read
|
||||
or write following this function. */
|
||||
|
||||
void ADIOI_NFS_Get_shared_fp(ADIO_File fd, ADIO_Offset incr, ADIO_Offset *shared_fp,
|
||||
int *error_code)
|
||||
{
|
||||
ADIO_Offset new_fp;
|
||||
ssize_t err;
|
||||
MPI_Comm dupcommself;
|
||||
static char myname[] = "ADIOI_NFS_GET_SHARED_FP";
|
||||
|
||||
if (fd->shared_fp_fd == ADIO_FILE_NULL) {
|
||||
MPI_Comm_dup(MPI_COMM_SELF, &dupcommself);
|
||||
fd->shared_fp_fd = ADIO_Open(MPI_COMM_SELF, dupcommself,
|
||||
fd->shared_fp_fname,
|
||||
fd->file_system,
|
||||
fd->fns,
|
||||
ADIO_CREATE | ADIO_RDWR | ADIO_DELETE_ON_CLOSE,
|
||||
0, MPI_BYTE, MPI_BYTE, MPI_INFO_NULL,
|
||||
ADIO_PERM_NULL, error_code);
|
||||
if (*error_code != MPI_SUCCESS) return;
|
||||
*shared_fp = 0;
|
||||
ADIOI_WRITE_LOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset));
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
|
||||
#endif
|
||||
err = read(fd->shared_fp_fd->fd_sys, shared_fp, sizeof(ADIO_Offset));
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
|
||||
#endif
|
||||
/* if the file is empty, the above read may return error
|
||||
(reading beyond end of file). In that case, shared_fp = 0,
|
||||
set above, is the correct value. */
|
||||
}
|
||||
else {
|
||||
ADIOI_WRITE_LOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset));
|
||||
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
|
||||
#endif
|
||||
err = lseek(fd->shared_fp_fd->fd_sys, 0, SEEK_SET);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
|
||||
#endif
|
||||
if (err == 0) {
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
|
||||
#endif
|
||||
err = read(fd->shared_fp_fd->fd_sys, shared_fp,
|
||||
sizeof(ADIO_Offset));
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
|
||||
#endif
|
||||
}
|
||||
if (err == -1) {
|
||||
ADIOI_UNLOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset));
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE, myname,
|
||||
__LINE__, MPI_ERR_IO, "**io",
|
||||
"**io %s", strerror(errno));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (incr == 0) {goto done;}
|
||||
|
||||
new_fp = *shared_fp + incr;
|
||||
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
|
||||
#endif
|
||||
err = lseek(fd->shared_fp_fd->fd_sys, 0, SEEK_SET);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
|
||||
#endif
|
||||
if (err == 0) {
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
|
||||
#endif
|
||||
err = write(fd->shared_fp_fd->fd_sys, &new_fp, sizeof(ADIO_Offset));
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
|
||||
#endif
|
||||
}
|
||||
done:
|
||||
ADIOI_UNLOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset));
|
||||
if (err == -1) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", strerror(errno));
|
||||
}
|
||||
else *error_code = MPI_SUCCESS;
|
||||
}
|
13
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_hints.c
Обычный файл
13
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_hints.c
Обычный файл
@ -0,0 +1,13 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_nfs.h"
|
||||
|
||||
void ADIOI_NFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
{
|
||||
ADIOI_GEN_SetInfo(fd, users_info, error_code);
|
||||
}
|
37
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_iread.c
Обычный файл
37
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_iread.c
Обычный файл
@ -0,0 +1,37 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_nfs.h"
|
||||
|
||||
#ifdef ROMIO_HAVE_WORKING_AIO
|
||||
/* nearly identical to ADIOI_GEN_IreadContig, except we lock around I/O */
|
||||
void ADIOI_NFS_IreadContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Request *request,
|
||||
int *error_code)
|
||||
{
|
||||
MPI_Count len, typesize;
|
||||
int aio_errno = 0;
|
||||
static char myname[] = "ADIOI_NFS_IREADCONTIG";
|
||||
|
||||
MPI_Type_size_x(datatype, &typesize);
|
||||
len = count * typesize;
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind;
|
||||
aio_errno = ADIOI_NFS_aio(fd, buf, len, offset, 0, request);
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len;
|
||||
|
||||
fd->fp_sys_posn = -1;
|
||||
|
||||
if (aio_errno != 0) {
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
MPIO_ERR_CREATE_CODE_ERRNO(myname, aio_errno, error_code);
|
||||
return;
|
||||
/* --END ERROR HANDLING-- */
|
||||
}
|
||||
else *error_code = MPI_SUCCESS;
|
||||
}
|
||||
#endif
|
130
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_iwrite.c
Обычный файл
130
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_iwrite.c
Обычный файл
@ -0,0 +1,130 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_nfs.h"
|
||||
|
||||
#include "../../mpi-io/mpioimpl.h"
|
||||
#include "../../mpi-io/mpioprof.h"
|
||||
#include "mpiu_greq.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#ifdef ROMIO_HAVE_WORKING_AIO
|
||||
static MPIX_Grequest_class ADIOI_GEN_greq_class = 0;
|
||||
/* this routine is nearly identical to ADIOI_GEN_IwriteContig, except we lock
|
||||
* around I/O */
|
||||
void ADIOI_NFS_IwriteContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Request *request, int *error_code)
|
||||
{
|
||||
MPI_Count len, typesize;
|
||||
int aio_errno = 0;
|
||||
static char myname[] = "ADIOI_NFS_IWRITECONTIG";
|
||||
|
||||
MPI_Type_size_x(datatype, &typesize);
|
||||
len = count * typesize;
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind;
|
||||
aio_errno = ADIOI_NFS_aio(fd, buf, len, offset, 1, request);
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len;
|
||||
|
||||
fd->fp_sys_posn = -1;
|
||||
|
||||
if (aio_errno != 0) {
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
MPIO_ERR_CREATE_CODE_ERRNO(myname, aio_errno, error_code);
|
||||
return;
|
||||
/* --END ERROR HANDLING-- */
|
||||
}
|
||||
else *error_code = MPI_SUCCESS;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* This function is for implementation convenience. It is not user-visible.
|
||||
* It takes care of the differences in the interface for nonblocking I/O
|
||||
* on various Unix machines! If wr==1 write, wr==0 read.
|
||||
*
|
||||
* Returns 0 on success, -errno on failure.
|
||||
*/
|
||||
#ifdef ROMIO_HAVE_WORKING_AIO
|
||||
int ADIOI_NFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
|
||||
int wr, MPI_Request *request)
|
||||
{
|
||||
int err=-1, fd_sys;
|
||||
int error_code, this_errno;
|
||||
|
||||
struct aiocb *aiocbp;
|
||||
ADIOI_AIO_Request *aio_req;
|
||||
MPI_Status status;
|
||||
|
||||
fd_sys = fd->fd_sys;
|
||||
|
||||
aio_req = (ADIOI_AIO_Request*)ADIOI_Calloc(sizeof(ADIOI_AIO_Request), 1);
|
||||
aiocbp = (struct aiocb *) ADIOI_Calloc(sizeof(struct aiocb), 1);
|
||||
aiocbp->aio_offset = offset;
|
||||
aiocbp->aio_buf = buf;
|
||||
aiocbp->aio_nbytes = len;
|
||||
|
||||
#ifdef ROMIO_HAVE_STRUCT_AIOCB_WITH_AIO_WHENCE
|
||||
aiocbp->aio_whence = SEEK_SET;
|
||||
#endif
|
||||
#ifdef ROMIO_HAVE_STRUCT_AIOCB_WITH_AIO_FILDES
|
||||
aiocbp->aio_fildes = fd_sys;
|
||||
#endif
|
||||
#ifdef ROMIO_HAVE_STRUCT_AIOCB_WITH_AIO_SIGEVENT
|
||||
# ifdef AIO_SIGNOTIFY_NONE
|
||||
aiocbp->aio_sigevent.sigev_notify = SIGEV_NONE;
|
||||
# endif
|
||||
aiocbp->aio_sigevent.sigev_signo = 0;
|
||||
#endif
|
||||
#ifdef ROMIO_HAVE_STRUCT_AIOCB_WITH_AIO_REQPRIO
|
||||
# ifdef AIO_PRIO_DFL
|
||||
aiocbp->aio_reqprio = AIO_PRIO_DFL; /* not needed in DEC Unix 4.0 */
|
||||
# else
|
||||
aiocbp->aio_reqprio = 0;
|
||||
# endif
|
||||
#endif
|
||||
|
||||
if (wr) ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
|
||||
else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len);
|
||||
|
||||
#ifndef ROMIO_HAVE_AIO_CALLS_NEED_FILEDES
|
||||
if (wr) err = aio_write(aiocbp);
|
||||
else err = aio_read(aiocbp);
|
||||
#else
|
||||
/* Broken IBM interface */
|
||||
if (wr) err = aio_write(fd_sys, aiocbp);
|
||||
else err = aio_read(fd_sys, aiocbp);
|
||||
#endif
|
||||
|
||||
this_errno = errno;
|
||||
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
|
||||
|
||||
if (err == -1) {
|
||||
if (this_errno == EAGAIN) {
|
||||
/* exceeded the max. no. of outstanding requests.
|
||||
complete all previous async. requests and try again. */
|
||||
ADIO_WriteContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
|
||||
offset, &status, &error_code);
|
||||
MPIO_Completed_request_create(&fd, len, &error_code, request);
|
||||
return 0;
|
||||
} else {
|
||||
return -this_errno;
|
||||
}
|
||||
}
|
||||
aio_req->aiocbp = aiocbp;
|
||||
if (ADIOI_GEN_greq_class == 0) {
|
||||
MPIX_Grequest_class_create(ADIOI_GEN_aio_query_fn,
|
||||
ADIOI_GEN_aio_free_fn, MPIU_Greq_cancel_fn,
|
||||
ADIOI_GEN_aio_poll_fn, ADIOI_GEN_aio_wait_fn,
|
||||
&ADIOI_GEN_greq_class);
|
||||
}
|
||||
MPIX_Grequest_class_allocate(ADIOI_GEN_greq_class, aio_req, request);
|
||||
memcpy(&(aio_req->req), request, sizeof(MPI_Request));
|
||||
return 0;
|
||||
}
|
||||
#endif
|
58
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_open.c
Обычный файл
58
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_open.c
Обычный файл
@ -0,0 +1,58 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_nfs.h"
|
||||
|
||||
void ADIOI_NFS_Open(ADIO_File fd, int *error_code)
|
||||
{
|
||||
int perm, amode;
|
||||
mode_t old_mask;
|
||||
static char myname[] = "ADIOI_NFS_OPEN";
|
||||
|
||||
if (fd->perm == ADIO_PERM_NULL) {
|
||||
old_mask = umask(022);
|
||||
umask(old_mask);
|
||||
perm = old_mask ^ 0666;
|
||||
}
|
||||
else perm = fd->perm;
|
||||
|
||||
amode = 0;
|
||||
if (fd->access_mode & ADIO_CREATE)
|
||||
amode = amode | O_CREAT;
|
||||
if (fd->access_mode & ADIO_RDONLY)
|
||||
amode = amode | O_RDONLY;
|
||||
if (fd->access_mode & ADIO_WRONLY)
|
||||
amode = amode | O_WRONLY;
|
||||
if (fd->access_mode & ADIO_RDWR)
|
||||
amode = amode | O_RDWR;
|
||||
if (fd->access_mode & ADIO_EXCL)
|
||||
amode = amode | O_EXCL;
|
||||
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_open_a, 0, NULL );
|
||||
#endif
|
||||
fd->fd_sys = open(fd->filename, amode, perm);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_open_b, 0, NULL );
|
||||
#endif
|
||||
fd->fd_direct = -1;
|
||||
|
||||
if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND)) {
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
|
||||
#endif
|
||||
fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
|
||||
#endif
|
||||
}
|
||||
|
||||
if (fd->fd_sys == -1) {
|
||||
*error_code = ADIOI_Err_create_code(myname, fd->filename, errno);
|
||||
}
|
||||
else *error_code = MPI_SUCCESS;
|
||||
}
|
553
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_read.c
Обычный файл
553
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_read.c
Обычный файл
@ -0,0 +1,553 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_nfs.h"
|
||||
#include "adio_extern.h"
|
||||
|
||||
void ADIOI_NFS_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int *error_code)
|
||||
{
|
||||
int err=-1;
|
||||
MPI_Count datatype_size, len;
|
||||
static char myname[] = "ADIOI_NFS_READCONTIG";
|
||||
|
||||
MPI_Type_size_x(datatype, &datatype_size);
|
||||
len = datatype_size * count;
|
||||
|
||||
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
|
||||
if (fd->fp_sys_posn != offset) {
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
|
||||
#endif
|
||||
lseek(fd->fd_sys, offset, SEEK_SET);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
|
||||
#endif
|
||||
}
|
||||
if (fd->atomicity)
|
||||
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
|
||||
else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
|
||||
#endif
|
||||
err = read(fd->fd_sys, buf, len);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
|
||||
#endif
|
||||
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
|
||||
fd->fp_sys_posn = offset + err;
|
||||
/* individual file pointer not updated */
|
||||
}
|
||||
else { /* read from curr. location of ind. file pointer */
|
||||
offset = fd->fp_ind;
|
||||
if (fd->fp_sys_posn != fd->fp_ind) {
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
|
||||
#endif
|
||||
lseek(fd->fd_sys, fd->fp_ind, SEEK_SET);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
|
||||
#endif
|
||||
}
|
||||
if (fd->atomicity)
|
||||
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
|
||||
else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
|
||||
#endif
|
||||
err = read(fd->fd_sys, buf, len);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
|
||||
#endif
|
||||
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
|
||||
fd->fp_ind += err;
|
||||
fd->fp_sys_posn = fd->fp_ind;
|
||||
}
|
||||
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err == -1) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io", "**io %s", strerror(errno));
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, err);
|
||||
#endif
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
#define ADIOI_BUFFERED_READ \
|
||||
{ \
|
||||
if (req_off >= readbuf_off + readbuf_len) { \
|
||||
readbuf_off = req_off; \
|
||||
readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));\
|
||||
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL ); \
|
||||
lseek(fd->fd_sys, readbuf_off, SEEK_SET);\
|
||||
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL ); \
|
||||
if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, readbuf_off, SEEK_SET, readbuf_len);\
|
||||
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL ); \
|
||||
err = read(fd->fd_sys, readbuf, readbuf_len);\
|
||||
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); \
|
||||
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, readbuf_off, SEEK_SET, readbuf_len);\
|
||||
if (err == -1) err_flag = 1; \
|
||||
} \
|
||||
while (req_len > readbuf_off + readbuf_len - req_off) { \
|
||||
partial_read = (int) (readbuf_off + readbuf_len - req_off); \
|
||||
tmp_buf = (char *) ADIOI_Malloc(partial_read); \
|
||||
memcpy(tmp_buf, readbuf+readbuf_len-partial_read, partial_read); \
|
||||
ADIOI_Free(readbuf); \
|
||||
readbuf = (char *) ADIOI_Malloc(partial_read + max_bufsize); \
|
||||
memcpy(readbuf, tmp_buf, partial_read); \
|
||||
ADIOI_Free(tmp_buf); \
|
||||
readbuf_off += readbuf_len-partial_read; \
|
||||
readbuf_len = (int) (partial_read + ADIOI_MIN(max_bufsize, \
|
||||
end_offset-readbuf_off+1)); \
|
||||
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL ); \
|
||||
lseek(fd->fd_sys, readbuf_off+partial_read, SEEK_SET);\
|
||||
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL ); \
|
||||
if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, readbuf_off+partial_read, SEEK_SET, readbuf_len-partial_read);\
|
||||
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL ); \
|
||||
err = read(fd->fd_sys, readbuf+partial_read, readbuf_len-partial_read);\
|
||||
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); \
|
||||
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, readbuf_off+partial_read, SEEK_SET, readbuf_len-partial_read);\
|
||||
if (err == -1) err_flag = 1; \
|
||||
} \
|
||||
memcpy((char *)buf + userbuf_off, readbuf+req_off-readbuf_off, req_len); \
|
||||
}
|
||||
#else
|
||||
#define ADIOI_BUFFERED_READ \
|
||||
{ \
|
||||
if (req_off >= readbuf_off + readbuf_len) { \
|
||||
readbuf_off = req_off; \
|
||||
readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));\
|
||||
lseek(fd->fd_sys, readbuf_off, SEEK_SET);\
|
||||
if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, readbuf_off, SEEK_SET, readbuf_len);\
|
||||
err = read(fd->fd_sys, readbuf, readbuf_len);\
|
||||
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, readbuf_off, SEEK_SET, readbuf_len);\
|
||||
if (err == -1) err_flag = 1; \
|
||||
} \
|
||||
while (req_len > readbuf_off + readbuf_len - req_off) { \
|
||||
partial_read = (int) (readbuf_off + readbuf_len - req_off); \
|
||||
tmp_buf = (char *) ADIOI_Malloc(partial_read); \
|
||||
memcpy(tmp_buf, readbuf+readbuf_len-partial_read, partial_read); \
|
||||
ADIOI_Free(readbuf); \
|
||||
readbuf = (char *) ADIOI_Malloc(partial_read + max_bufsize); \
|
||||
memcpy(readbuf, tmp_buf, partial_read); \
|
||||
ADIOI_Free(tmp_buf); \
|
||||
readbuf_off += readbuf_len-partial_read; \
|
||||
readbuf_len = (int) (partial_read + ADIOI_MIN(max_bufsize, \
|
||||
end_offset-readbuf_off+1)); \
|
||||
lseek(fd->fd_sys, readbuf_off+partial_read, SEEK_SET);\
|
||||
if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, readbuf_off+partial_read, SEEK_SET, readbuf_len-partial_read);\
|
||||
err = read(fd->fd_sys, readbuf+partial_read, readbuf_len-partial_read);\
|
||||
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, readbuf_off+partial_read, SEEK_SET, readbuf_len-partial_read);\
|
||||
if (err == -1) err_flag = 1; \
|
||||
} \
|
||||
memcpy((char *)buf + userbuf_off, readbuf+req_off-readbuf_off, req_len); \
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code)
|
||||
{
|
||||
/* offset is in units of etype relative to the filetype. */
|
||||
|
||||
ADIOI_Flatlist_node *flat_buf, *flat_file;
|
||||
int i, j, k, err=-1, brd_size, frd_size=0, st_index=0;
|
||||
int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype;
|
||||
int n_filetypes, etype_in_filetype;
|
||||
ADIO_Offset abs_off_in_filetype=0;
|
||||
int req_len, partial_read;
|
||||
MPI_Count filetype_size, etype_size, buftype_size;
|
||||
MPI_Aint filetype_extent, buftype_extent;
|
||||
int buf_count, buftype_is_contig, filetype_is_contig;
|
||||
ADIO_Offset userbuf_off;
|
||||
ADIO_Offset off, req_off, disp, end_offset=0, readbuf_off, start_off;
|
||||
char *readbuf, *tmp_buf, *value;
|
||||
int st_frd_size, st_n_filetypes, readbuf_len;
|
||||
int new_brd_size, new_frd_size, err_flag=0, info_flag, max_bufsize;
|
||||
|
||||
static char myname[] = "ADIOI_NFS_READSTRIDED";
|
||||
|
||||
ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
|
||||
ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
|
||||
|
||||
MPI_Type_size_x(fd->filetype, &filetype_size);
|
||||
if ( ! filetype_size ) {
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, 0);
|
||||
#endif
|
||||
*error_code = MPI_SUCCESS;
|
||||
return;
|
||||
}
|
||||
|
||||
MPI_Type_extent(fd->filetype, &filetype_extent);
|
||||
MPI_Type_size_x(datatype, &buftype_size);
|
||||
MPI_Type_extent(datatype, &buftype_extent);
|
||||
etype_size = fd->etype_size;
|
||||
|
||||
bufsize = buftype_size * count;
|
||||
|
||||
/* get max_bufsize from the info object. */
|
||||
|
||||
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
|
||||
ADIOI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value,
|
||||
&info_flag);
|
||||
max_bufsize = atoi(value);
|
||||
ADIOI_Free(value);
|
||||
|
||||
if (!buftype_is_contig && filetype_is_contig) {
|
||||
|
||||
/* noncontiguous in memory, contiguous in file. */
|
||||
|
||||
ADIOI_Flatten_datatype(datatype);
|
||||
flat_buf = ADIOI_Flatlist;
|
||||
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
|
||||
|
||||
off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
|
||||
fd->disp + etype_size * offset;
|
||||
|
||||
start_off = off;
|
||||
end_offset = off + bufsize - 1;
|
||||
readbuf_off = off;
|
||||
readbuf = (char *) ADIOI_Malloc(max_bufsize);
|
||||
readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));
|
||||
|
||||
/* if atomicity is true, lock (exclusive) the region to be accessed */
|
||||
if (fd->atomicity)
|
||||
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
|
||||
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
|
||||
#endif
|
||||
lseek(fd->fd_sys, readbuf_off, SEEK_SET);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
|
||||
#endif
|
||||
if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, readbuf_off, SEEK_SET, readbuf_len);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
|
||||
#endif
|
||||
err = read(fd->fd_sys, readbuf, readbuf_len);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
|
||||
#endif
|
||||
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, readbuf_off, SEEK_SET, readbuf_len);
|
||||
if (err == -1) err_flag = 1;
|
||||
|
||||
for (j=0; j<count; j++)
|
||||
for (i=0; i<flat_buf->count; i++) {
|
||||
userbuf_off = j*buftype_extent + flat_buf->indices[i];
|
||||
req_off = off;
|
||||
req_len = flat_buf->blocklens[i];
|
||||
ADIOI_BUFFERED_READ
|
||||
off += flat_buf->blocklens[i];
|
||||
}
|
||||
|
||||
if (fd->atomicity)
|
||||
ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
|
||||
|
||||
ADIOI_Free(readbuf); /* malloced in the buffered_read macro */
|
||||
|
||||
if (err_flag) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE, myname,
|
||||
__LINE__, MPI_ERR_IO, "**io",
|
||||
"**io %s", strerror(errno));
|
||||
}
|
||||
else *error_code = MPI_SUCCESS;
|
||||
}
|
||||
|
||||
else { /* noncontiguous in file */
|
||||
|
||||
/* filetype already flattened in ADIO_Open */
|
||||
flat_file = ADIOI_Flatlist;
|
||||
while (flat_file->type != fd->filetype) flat_file = flat_file->next;
|
||||
disp = fd->disp;
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
/* Wei-keng reworked type processing to be a bit more efficient */
|
||||
offset = fd->fp_ind - disp;
|
||||
n_filetypes = (offset - flat_file->indices[0]) / filetype_extent;
|
||||
offset -= (ADIO_Offset)n_filetypes * filetype_extent;
|
||||
/* now offset is local to this extent */
|
||||
|
||||
/* find the block where offset is located, skip blocklens[i]==0 */
|
||||
for (i=0; i<flat_file->count; i++) {
|
||||
ADIO_Offset dist;
|
||||
if (flat_file->blocklens[i] == 0) continue;
|
||||
dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
|
||||
/* frd_size is from offset to the end of block i */
|
||||
if (dist == 0) {
|
||||
i++;
|
||||
offset = flat_file->indices[i];
|
||||
frd_size = flat_file->blocklens[i];
|
||||
break;
|
||||
}
|
||||
if (dist > 0 ) {
|
||||
frd_size = dist;
|
||||
break;
|
||||
}
|
||||
}
|
||||
st_index = i; /* starting index in flat_file->indices[] */
|
||||
offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
|
||||
}
|
||||
else {
|
||||
n_etypes_in_filetype = filetype_size/etype_size;
|
||||
n_filetypes = (int) (offset / n_etypes_in_filetype);
|
||||
etype_in_filetype = (int) (offset % n_etypes_in_filetype);
|
||||
size_in_filetype = etype_in_filetype * etype_size;
|
||||
|
||||
sum = 0;
|
||||
for (i=0; i<flat_file->count; i++) {
|
||||
sum += flat_file->blocklens[i];
|
||||
if (sum > size_in_filetype) {
|
||||
st_index = i;
|
||||
frd_size = sum - size_in_filetype;
|
||||
abs_off_in_filetype = flat_file->indices[i] +
|
||||
size_in_filetype - (sum - flat_file->blocklens[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* abs. offset in bytes in the file */
|
||||
offset = disp + (ADIO_Offset) n_filetypes*filetype_extent +
|
||||
abs_off_in_filetype;
|
||||
}
|
||||
|
||||
start_off = offset;
|
||||
|
||||
/* Wei-keng Liao: read request is within a single flat_file contig
|
||||
* block e.g. with subarray types that actually describe the whole
|
||||
* array */
|
||||
if (buftype_is_contig && bufsize <= frd_size) {
|
||||
ADIO_ReadContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
|
||||
offset, status, error_code);
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
/* update MPI-IO file pointer to point to the first byte that
|
||||
* can be accessed in the fileview. */
|
||||
fd->fp_ind = offset + bufsize;
|
||||
if (bufsize == frd_size) {
|
||||
do {
|
||||
st_index++;
|
||||
if (st_index == flat_file->count) {
|
||||
st_index = 0;
|
||||
n_filetypes++;
|
||||
}
|
||||
} while (flat_file->blocklens[st_index] == 0);
|
||||
fd->fp_ind = disp + flat_file->indices[st_index]
|
||||
+ n_filetypes*filetype_extent;
|
||||
}
|
||||
}
|
||||
fd->fp_sys_posn = -1; /* set it to null. */
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, bufsize);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
/* Calculate end_offset, the last byte-offset that will be accessed.
|
||||
e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/
|
||||
|
||||
st_frd_size = frd_size;
|
||||
st_n_filetypes = n_filetypes;
|
||||
i = 0;
|
||||
j = st_index;
|
||||
off = offset;
|
||||
frd_size = ADIOI_MIN(st_frd_size, bufsize);
|
||||
while (i < bufsize) {
|
||||
i += frd_size;
|
||||
end_offset = off + frd_size - 1;
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
while (flat_file->blocklens[j]==0) {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
}
|
||||
|
||||
off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent;
|
||||
frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
|
||||
}
|
||||
|
||||
/* if atomicity is true, lock (exclusive) the region to be accessed */
|
||||
if (fd->atomicity)
|
||||
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
|
||||
|
||||
/* initial read into readbuf */
|
||||
readbuf_off = offset;
|
||||
readbuf = (char *) ADIOI_Malloc(max_bufsize);
|
||||
readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));
|
||||
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
|
||||
#endif
|
||||
lseek(fd->fd_sys, offset, SEEK_SET);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
|
||||
#endif
|
||||
if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, offset, SEEK_SET, readbuf_len);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
|
||||
#endif
|
||||
err = read(fd->fd_sys, readbuf, readbuf_len);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
|
||||
#endif
|
||||
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, offset, SEEK_SET, readbuf_len);
|
||||
|
||||
if (err == -1) err_flag = 1;
|
||||
|
||||
if (buftype_is_contig && !filetype_is_contig) {
|
||||
|
||||
/* contiguous in memory, noncontiguous in file. should be the most
|
||||
common case. */
|
||||
|
||||
i = 0;
|
||||
j = st_index;
|
||||
off = offset;
|
||||
n_filetypes = st_n_filetypes;
|
||||
frd_size = ADIOI_MIN(st_frd_size, bufsize);
|
||||
while (i < bufsize) {
|
||||
if (frd_size) {
|
||||
/* TYPE_UB and TYPE_LB can result in
|
||||
frd_size = 0. save system call in such cases */
|
||||
/* lseek(fd->fd_sys, off, SEEK_SET);
|
||||
err = read(fd->fd_sys, ((char *) buf) + i, frd_size);*/
|
||||
|
||||
req_off = off;
|
||||
req_len = frd_size;
|
||||
userbuf_off = i;
|
||||
ADIOI_BUFFERED_READ
|
||||
}
|
||||
i += frd_size;
|
||||
|
||||
if (off + frd_size < disp + flat_file->indices[j] +
|
||||
flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent)
|
||||
off += frd_size;
|
||||
/* did not reach end of contiguous block in filetype.
|
||||
no more I/O needed. off is incremented by frd_size. */
|
||||
else {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
while (flat_file->blocklens[j]==0) {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
}
|
||||
off = disp + flat_file->indices[j] +
|
||||
(ADIO_Offset) n_filetypes*filetype_extent;
|
||||
frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* noncontiguous in memory as well as in file */
|
||||
|
||||
ADIOI_Flatten_datatype(datatype);
|
||||
flat_buf = ADIOI_Flatlist;
|
||||
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
|
||||
|
||||
k = num = buf_count = 0;
|
||||
i = (int) (flat_buf->indices[0]);
|
||||
j = st_index;
|
||||
off = offset;
|
||||
n_filetypes = st_n_filetypes;
|
||||
frd_size = st_frd_size;
|
||||
brd_size = flat_buf->blocklens[0];
|
||||
|
||||
while (num < bufsize) {
|
||||
size = ADIOI_MIN(frd_size, brd_size);
|
||||
if (size) {
|
||||
/* lseek(fd->fd_sys, off, SEEK_SET);
|
||||
err = read(fd->fd_sys, ((char *) buf) + i, size); */
|
||||
|
||||
req_off = off;
|
||||
req_len = size;
|
||||
userbuf_off = i;
|
||||
ADIOI_BUFFERED_READ
|
||||
}
|
||||
|
||||
new_frd_size = frd_size;
|
||||
new_brd_size = brd_size;
|
||||
|
||||
if (size == frd_size) {
|
||||
/* reached end of contiguous block in file */
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
while (flat_file->blocklens[j]==0) {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
}
|
||||
off = disp + flat_file->indices[j] +
|
||||
(ADIO_Offset) n_filetypes*filetype_extent;
|
||||
|
||||
new_frd_size = flat_file->blocklens[j];
|
||||
if (size != brd_size) {
|
||||
i += size;
|
||||
new_brd_size -= size;
|
||||
}
|
||||
}
|
||||
|
||||
if (size == brd_size) {
|
||||
/* reached end of contiguous block in memory */
|
||||
|
||||
k = (k + 1)%flat_buf->count;
|
||||
buf_count++;
|
||||
i = (int) (buftype_extent*(buf_count/flat_buf->count) +
|
||||
flat_buf->indices[k]);
|
||||
new_brd_size = flat_buf->blocklens[k];
|
||||
if (size != frd_size) {
|
||||
off += size;
|
||||
new_frd_size -= size;
|
||||
}
|
||||
}
|
||||
num += size;
|
||||
frd_size = new_frd_size;
|
||||
brd_size = new_brd_size;
|
||||
}
|
||||
}
|
||||
|
||||
if (fd->atomicity)
|
||||
ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
|
||||
|
||||
ADIOI_Free(readbuf); /* malloced in the buffered_read macro */
|
||||
|
||||
if (err_flag) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE, myname,
|
||||
__LINE__, MPI_ERR_IO, "**io",
|
||||
"**io %s", strerror(errno));
|
||||
}
|
||||
else *error_code = MPI_SUCCESS;
|
||||
}
|
||||
|
||||
fd->fp_sys_posn = -1; /* set it to null. */
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, bufsize);
|
||||
/* This is a temporary way of filling in status. The right way is to
|
||||
keep track of how much data was actually read and placed in buf
|
||||
by ADIOI_BUFFERED_READ. */
|
||||
#endif
|
||||
|
||||
if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
|
||||
}
|
35
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_resize.c
Обычный файл
35
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_resize.c
Обычный файл
@ -0,0 +1,35 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2004 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_nfs.h"
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
/* NFS resize
|
||||
*
|
||||
* Note: we resize on all processors to guarantee that all processors
|
||||
* will have updated cache values. This used to be the generic
|
||||
* implementation used by the majority of the ADIO implementations.
|
||||
*/
|
||||
void ADIOI_NFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code)
|
||||
{
|
||||
int err;
|
||||
static char myname[] = "ADIOI_NFS_RESIZE";
|
||||
|
||||
err = ftruncate(fd->fd_sys, size);
|
||||
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err == -1) {
|
||||
*error_code = ADIOI_Err_create_code(myname, fd->filename, errno);
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
}
|
74
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_setsh.c
Обычный файл
74
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_setsh.c
Обычный файл
@ -0,0 +1,74 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_nfs.h"
|
||||
|
||||
/* set the shared file pointer to "offset" etypes relative to the current
|
||||
view */
|
||||
|
||||
/*
|
||||
This looks very similar to ADIOI_GEN_Set_shared_fp, except this
|
||||
function avoids locking the file twice. The generic version does
|
||||
|
||||
Write lock
|
||||
ADIO_WriteContig
|
||||
Unlock
|
||||
|
||||
For NFS, ADIOI_NFS_WriteContig does a lock before writing to disable
|
||||
caching. To avoid the lock being called twice, this version for NFS does
|
||||
|
||||
Write lock
|
||||
Lseek
|
||||
Write
|
||||
Unlock
|
||||
|
||||
*/
|
||||
|
||||
void ADIOI_NFS_Set_shared_fp(ADIO_File fd, ADIO_Offset offset, int *error_code)
|
||||
{
|
||||
ssize_t err;
|
||||
MPI_Comm dupcommself;
|
||||
static char myname[] = "ADIOI_NFS_SET_SHARED_FP";
|
||||
|
||||
if (fd->shared_fp_fd == ADIO_FILE_NULL) {
|
||||
MPI_Comm_dup(MPI_COMM_SELF, &dupcommself);
|
||||
fd->shared_fp_fd = ADIO_Open(MPI_COMM_SELF, dupcommself,
|
||||
fd->shared_fp_fname,
|
||||
fd->file_system, fd->fns,
|
||||
ADIO_CREATE | ADIO_RDWR | ADIO_DELETE_ON_CLOSE,
|
||||
0, MPI_BYTE, MPI_BYTE, MPI_INFO_NULL,
|
||||
ADIO_PERM_NULL, error_code);
|
||||
}
|
||||
|
||||
if (*error_code != MPI_SUCCESS) return;
|
||||
|
||||
ADIOI_WRITE_LOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset));
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
|
||||
#endif
|
||||
lseek(fd->shared_fp_fd->fd_sys, 0, SEEK_SET);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
|
||||
#endif
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
|
||||
#endif
|
||||
err = write(fd->shared_fp_fd->fd_sys, &offset, sizeof(ADIO_Offset));
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
|
||||
#endif
|
||||
ADIOI_UNLOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset));
|
||||
|
||||
if (err == -1) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", strerror(errno));
|
||||
}
|
||||
else *error_code = MPI_SUCCESS;
|
||||
}
|
||||
|
20
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_wait.c
Обычный файл
20
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_wait.c
Обычный файл
@ -0,0 +1,20 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_nfs.h"
|
||||
|
||||
void ADIOI_NFS_ReadComplete(ADIO_Request *request, ADIO_Status *status,
|
||||
int *error_code)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void ADIOI_NFS_WriteComplete(ADIO_Request *request, ADIO_Status *status,
|
||||
int *error_code)
|
||||
{
|
||||
ADIOI_NFS_ReadComplete(request, status, error_code);
|
||||
}
|
679
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_write.c
Обычный файл
679
ompi/mca/io/romio314/romio/adio/ad_nfs/ad_nfs_write.c
Обычный файл
@ -0,0 +1,679 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_nfs.h"
|
||||
#include "adio_extern.h"
|
||||
|
||||
void ADIOI_NFS_WriteContig(ADIO_File fd, const void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int *error_code)
|
||||
{
|
||||
int err=-1;
|
||||
MPI_Count datatype_size, len;
|
||||
static char myname[] = "ADIOI_NFS_WRITECONTIG";
|
||||
|
||||
MPI_Type_size_x(datatype, &datatype_size);
|
||||
len = datatype_size * count;
|
||||
|
||||
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
|
||||
if (fd->fp_sys_posn != offset) {
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
|
||||
#endif
|
||||
lseek(fd->fd_sys, offset, SEEK_SET);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
|
||||
#endif
|
||||
}
|
||||
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
|
||||
#endif
|
||||
err = write(fd->fd_sys, buf, len);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
|
||||
#endif
|
||||
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
|
||||
fd->fp_sys_posn = offset + err;
|
||||
/* individual file pointer not updated */
|
||||
}
|
||||
else { /* write from curr. location of ind. file pointer */
|
||||
offset = fd->fp_ind;
|
||||
if (fd->fp_sys_posn != fd->fp_ind) {
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
|
||||
#endif
|
||||
lseek(fd->fd_sys, fd->fp_ind, SEEK_SET);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
|
||||
#endif
|
||||
}
|
||||
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
|
||||
#endif
|
||||
err = write(fd->fd_sys, buf, len);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
|
||||
#endif
|
||||
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
|
||||
fd->fp_ind += err;
|
||||
fd->fp_sys_posn = fd->fp_ind;
|
||||
}
|
||||
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err == -1) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", strerror(errno));
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, err);
|
||||
#endif
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
#define ADIOI_BUFFERED_WRITE \
|
||||
{ \
|
||||
if (req_off >= writebuf_off + writebuf_len) { \
|
||||
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL ); \
|
||||
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
|
||||
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL ); \
|
||||
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); \
|
||||
err = write(fd->fd_sys, writebuf, writebuf_len); \
|
||||
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); \
|
||||
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
if (err == -1) err_flag = 1; \
|
||||
writebuf_off = req_off; \
|
||||
writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
|
||||
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL ); \
|
||||
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
|
||||
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL ); \
|
||||
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL ); \
|
||||
err = read(fd->fd_sys, writebuf, writebuf_len); \
|
||||
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); \
|
||||
if (err == -1) { \
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, \
|
||||
MPIR_ERR_RECOVERABLE, myname, \
|
||||
__LINE__, MPI_ERR_IO, \
|
||||
"**ioRMWrdwr", 0); \
|
||||
goto fn_exit; \
|
||||
} \
|
||||
} \
|
||||
write_sz = (int) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
|
||||
memcpy(writebuf+req_off-writebuf_off, (char *)buf +userbuf_off, write_sz);\
|
||||
while (write_sz != req_len) { \
|
||||
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL ); \
|
||||
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
|
||||
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL ); \
|
||||
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); \
|
||||
err = write(fd->fd_sys, writebuf, writebuf_len); \
|
||||
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); \
|
||||
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
if (err == -1) err_flag = 1; \
|
||||
req_len -= write_sz; \
|
||||
userbuf_off += write_sz; \
|
||||
writebuf_off += writebuf_len; \
|
||||
writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
|
||||
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL ); \
|
||||
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
|
||||
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL ); \
|
||||
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL ); \
|
||||
err = read(fd->fd_sys, writebuf, writebuf_len); \
|
||||
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); \
|
||||
if (err == -1) { \
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, \
|
||||
MPIR_ERR_RECOVERABLE, myname, \
|
||||
__LINE__, MPI_ERR_IO, \
|
||||
"**ioRMWrdwr", 0); \
|
||||
goto fn_exit; \
|
||||
} \
|
||||
write_sz = ADIOI_MIN(req_len, writebuf_len); \
|
||||
memcpy(writebuf, (char *)buf + userbuf_off, write_sz);\
|
||||
} \
|
||||
}
|
||||
#else
|
||||
#define ADIOI_BUFFERED_WRITE \
|
||||
{ \
|
||||
if (req_off >= writebuf_off + writebuf_len) { \
|
||||
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
|
||||
err = write(fd->fd_sys, writebuf, writebuf_len); \
|
||||
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
if (err == -1) err_flag = 1; \
|
||||
writebuf_off = req_off; \
|
||||
writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
|
||||
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
|
||||
err = read(fd->fd_sys, writebuf, writebuf_len); \
|
||||
if (err == -1) { \
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, \
|
||||
MPIR_ERR_RECOVERABLE, myname, \
|
||||
__LINE__, MPI_ERR_IO, \
|
||||
"**ioRMWrdwr", 0); \
|
||||
goto fn_exit; \
|
||||
} \
|
||||
} \
|
||||
write_sz = (int) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
|
||||
memcpy(writebuf+req_off-writebuf_off, (char *)buf +userbuf_off, write_sz);\
|
||||
while (write_sz != req_len) { \
|
||||
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
|
||||
err = write(fd->fd_sys, writebuf, writebuf_len); \
|
||||
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
if (err == -1) err_flag = 1; \
|
||||
req_len -= write_sz; \
|
||||
userbuf_off += write_sz; \
|
||||
writebuf_off += writebuf_len; \
|
||||
writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
|
||||
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
|
||||
err = read(fd->fd_sys, writebuf, writebuf_len); \
|
||||
if (err == -1) { \
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, \
|
||||
MPIR_ERR_RECOVERABLE, myname, \
|
||||
__LINE__, MPI_ERR_IO, \
|
||||
"**ioRMWrdwr", 0); \
|
||||
goto fn_exit; \
|
||||
} \
|
||||
write_sz = ADIOI_MIN(req_len, writebuf_len); \
|
||||
memcpy(writebuf, (char *)buf + userbuf_off, write_sz);\
|
||||
} \
|
||||
}
|
||||
#endif
|
||||
|
||||
/* this macro is used when filetype is contig and buftype is not contig.
|
||||
it does not do a read-modify-write and does not lock*/
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
#define ADIOI_BUFFERED_WRITE_WITHOUT_READ \
|
||||
{ \
|
||||
if (req_off >= writebuf_off + writebuf_len) { \
|
||||
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL ); \
|
||||
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
|
||||
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL ); \
|
||||
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); \
|
||||
err = write(fd->fd_sys, writebuf, writebuf_len); \
|
||||
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); \
|
||||
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
if (err == -1) err_flag = 1; \
|
||||
writebuf_off = req_off; \
|
||||
writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
|
||||
} \
|
||||
write_sz = (int) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
|
||||
memcpy(writebuf+req_off-writebuf_off, (char *)buf +userbuf_off, write_sz);\
|
||||
while (write_sz != req_len) { \
|
||||
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL ); \
|
||||
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
|
||||
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL ); \
|
||||
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); \
|
||||
err = write(fd->fd_sys, writebuf, writebuf_len); \
|
||||
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); \
|
||||
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
if (err == -1) err_flag = 1; \
|
||||
req_len -= write_sz; \
|
||||
userbuf_off += write_sz; \
|
||||
writebuf_off += writebuf_len; \
|
||||
writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
|
||||
write_sz = ADIOI_MIN(req_len, writebuf_len); \
|
||||
memcpy(writebuf, (char *)buf + userbuf_off, write_sz);\
|
||||
} \
|
||||
}
|
||||
#else
|
||||
#define ADIOI_BUFFERED_WRITE_WITHOUT_READ \
|
||||
{ \
|
||||
if (req_off >= writebuf_off + writebuf_len) { \
|
||||
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
|
||||
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
err = write(fd->fd_sys, writebuf, writebuf_len); \
|
||||
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
if (err == -1) err_flag = 1; \
|
||||
writebuf_off = req_off; \
|
||||
writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
|
||||
} \
|
||||
write_sz = (int) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
|
||||
memcpy(writebuf+req_off-writebuf_off, (char *)buf +userbuf_off, write_sz);\
|
||||
while (write_sz != req_len) { \
|
||||
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
|
||||
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
err = write(fd->fd_sys, writebuf, writebuf_len); \
|
||||
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
|
||||
if (err == -1) err_flag = 1; \
|
||||
req_len -= write_sz; \
|
||||
userbuf_off += write_sz; \
|
||||
writebuf_off += writebuf_len; \
|
||||
writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
|
||||
write_sz = ADIOI_MIN(req_len, writebuf_len); \
|
||||
memcpy(writebuf, (char *)buf + userbuf_off, write_sz);\
|
||||
} \
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code)
|
||||
{
|
||||
/* offset is in units of etype relative to the filetype. */
|
||||
|
||||
ADIOI_Flatlist_node *flat_buf, *flat_file;
|
||||
int i, j, k, err=-1, bwr_size, fwr_size=0, st_index=0;
|
||||
int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype;
|
||||
int n_filetypes, etype_in_filetype;
|
||||
ADIO_Offset abs_off_in_filetype=0;
|
||||
int req_len;
|
||||
MPI_Count filetype_size, etype_size, buftype_size;
|
||||
MPI_Aint filetype_extent, buftype_extent;
|
||||
int buf_count, buftype_is_contig, filetype_is_contig;
|
||||
ADIO_Offset userbuf_off;
|
||||
ADIO_Offset off, req_off, disp, end_offset=0, writebuf_off, start_off;
|
||||
char *writebuf=NULL, *value;
|
||||
int st_fwr_size, st_n_filetypes, writebuf_len, write_sz;
|
||||
int new_bwr_size, new_fwr_size, err_flag=0, info_flag, max_bufsize;
|
||||
static char myname[] = "ADIOI_NFS_WRITESTRIDED";
|
||||
|
||||
ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
|
||||
ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
|
||||
|
||||
MPI_Type_size_x(fd->filetype, &filetype_size);
|
||||
if ( ! filetype_size ) {
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, 0);
|
||||
#endif
|
||||
*error_code = MPI_SUCCESS;
|
||||
return;
|
||||
}
|
||||
|
||||
MPI_Type_extent(fd->filetype, &filetype_extent);
|
||||
MPI_Type_size_x(datatype, &buftype_size);
|
||||
MPI_Type_extent(datatype, &buftype_extent);
|
||||
etype_size = fd->etype_size;
|
||||
|
||||
bufsize = buftype_size * count;
|
||||
|
||||
/* get max_bufsize from the info object. */
|
||||
|
||||
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
|
||||
ADIOI_Info_get(fd->info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value,
|
||||
&info_flag);
|
||||
max_bufsize = atoi(value);
|
||||
ADIOI_Free(value);
|
||||
|
||||
if (!buftype_is_contig && filetype_is_contig) {
|
||||
|
||||
/* noncontiguous in memory, contiguous in file. */
|
||||
|
||||
ADIOI_Flatten_datatype(datatype);
|
||||
flat_buf = ADIOI_Flatlist;
|
||||
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
|
||||
|
||||
off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
|
||||
fd->disp + etype_size * offset;
|
||||
|
||||
start_off = off;
|
||||
end_offset = off + bufsize - 1;
|
||||
writebuf_off = off;
|
||||
writebuf = (char *) ADIOI_Malloc(max_bufsize);
|
||||
writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));
|
||||
|
||||
/* if atomicity is true, lock the region to be accessed */
|
||||
if (fd->atomicity)
|
||||
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
|
||||
|
||||
for (j=0; j<count; j++)
|
||||
for (i=0; i<flat_buf->count; i++) {
|
||||
userbuf_off = j*buftype_extent + flat_buf->indices[i];
|
||||
req_off = off;
|
||||
req_len = flat_buf->blocklens[i];
|
||||
ADIOI_BUFFERED_WRITE_WITHOUT_READ
|
||||
off += flat_buf->blocklens[i];
|
||||
}
|
||||
|
||||
/* write the buffer out finally */
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
|
||||
#endif
|
||||
lseek(fd->fd_sys, writebuf_off, SEEK_SET);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
|
||||
#endif
|
||||
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
|
||||
#endif
|
||||
err = write(fd->fd_sys, writebuf, writebuf_len);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
|
||||
#endif
|
||||
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
|
||||
if (err == -1) err_flag = 1;
|
||||
|
||||
if (fd->atomicity)
|
||||
ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
|
||||
if (err_flag) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE, myname,
|
||||
__LINE__, MPI_ERR_IO, "**io",
|
||||
"**io %s", strerror(errno));
|
||||
}
|
||||
else *error_code = MPI_SUCCESS;
|
||||
}
|
||||
|
||||
else { /* noncontiguous in file */
|
||||
|
||||
/* filetype already flattened in ADIO_Open */
|
||||
flat_file = ADIOI_Flatlist;
|
||||
while (flat_file->type != fd->filetype) flat_file = flat_file->next;
|
||||
disp = fd->disp;
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
/* Wei-keng reworked type processing to be a bit more efficient */
|
||||
offset = fd->fp_ind - disp;
|
||||
n_filetypes = (offset - flat_file->indices[0]) / filetype_extent;
|
||||
offset -= (ADIO_Offset)n_filetypes * filetype_extent;
|
||||
/* now offset is local to this extent */
|
||||
|
||||
/* find the block where offset is located, skip blocklens[i]==0 */
|
||||
for (i=0; i<flat_file->count; i++) {
|
||||
ADIO_Offset dist;
|
||||
if (flat_file->blocklens[i] == 0) continue;
|
||||
dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
|
||||
/* fwr_size is from offset to the end of block i */
|
||||
if (dist == 0) {
|
||||
i++;
|
||||
offset = flat_file->indices[i];
|
||||
fwr_size = flat_file->blocklens[i];
|
||||
break;
|
||||
}
|
||||
if (dist > 0) {
|
||||
fwr_size = dist;
|
||||
break;
|
||||
}
|
||||
}
|
||||
st_index = i; /* starting index in flat_file->indices[] */
|
||||
offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
|
||||
}
|
||||
else {
|
||||
n_etypes_in_filetype = filetype_size/etype_size;
|
||||
n_filetypes = (int) (offset / n_etypes_in_filetype);
|
||||
etype_in_filetype = (int) (offset % n_etypes_in_filetype);
|
||||
size_in_filetype = etype_in_filetype * etype_size;
|
||||
|
||||
sum = 0;
|
||||
for (i=0; i<flat_file->count; i++) {
|
||||
sum += flat_file->blocklens[i];
|
||||
if (sum > size_in_filetype) {
|
||||
st_index = i;
|
||||
fwr_size = sum - size_in_filetype;
|
||||
abs_off_in_filetype = flat_file->indices[i] +
|
||||
size_in_filetype - (sum - flat_file->blocklens[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* abs. offset in bytes in the file */
|
||||
offset = disp + (ADIO_Offset) n_filetypes*filetype_extent +
|
||||
abs_off_in_filetype;
|
||||
}
|
||||
|
||||
start_off = offset;
|
||||
/* Wei-keng Liao:write request is within single flat_file contig block*/
|
||||
/* this could happen, for example, with subarray types that are
|
||||
* actually fairly contiguous */
|
||||
if (buftype_is_contig && bufsize <= fwr_size) {
|
||||
ADIO_WriteContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
|
||||
offset, status, error_code);
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
/* update MPI-IO file pointer to point to the first byte
|
||||
* that can be accessed in the fileview. */
|
||||
fd->fp_ind = offset + bufsize;
|
||||
if (bufsize == fwr_size) {
|
||||
do {
|
||||
st_index++;
|
||||
if (st_index == flat_file->count) {
|
||||
st_index = 0;
|
||||
n_filetypes++;
|
||||
}
|
||||
} while (flat_file->blocklens[st_index] == 0);
|
||||
fd->fp_ind = disp + flat_file->indices[st_index]
|
||||
+ (ADIO_Offset)n_filetypes*filetype_extent;
|
||||
}
|
||||
}
|
||||
fd->fp_sys_posn = -1; /* set it to null. */
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, bufsize);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
/* Calculate end_offset, the last byte-offset that will be accessed.
|
||||
e.g., if start_offset=0 and 100 bytes to be write, end_offset=99*/
|
||||
|
||||
st_fwr_size = fwr_size;
|
||||
st_n_filetypes = n_filetypes;
|
||||
i = 0;
|
||||
j = st_index;
|
||||
off = offset;
|
||||
fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
|
||||
while (i < bufsize) {
|
||||
i += fwr_size;
|
||||
end_offset = off + fwr_size - 1;
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
while (flat_file->blocklens[j]==0) {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
}
|
||||
|
||||
off = disp + flat_file->indices[j] +
|
||||
(ADIO_Offset) n_filetypes*filetype_extent;
|
||||
fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
|
||||
}
|
||||
|
||||
/* if atomicity is true, lock the region to be accessed */
|
||||
if (fd->atomicity)
|
||||
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
|
||||
|
||||
/* initial read for the read-modify-write */
|
||||
writebuf_off = offset;
|
||||
writebuf = (char *) ADIOI_Malloc(max_bufsize);
|
||||
writebuf_len = (int)(ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));
|
||||
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
|
||||
#endif
|
||||
lseek(fd->fd_sys, writebuf_off, SEEK_SET);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
|
||||
#endif
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
|
||||
#endif
|
||||
err = read(fd->fd_sys, writebuf, writebuf_len);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
|
||||
#endif
|
||||
if (err == -1) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
MPI_ERR_IO,
|
||||
"ADIOI_NFS_WriteStrided: ROMIO tries to optimize this access by doing a read-modify-write, but is unable to read the file. Please give the file read permission and open it with MPI_MODE_RDWR.", 0);
|
||||
goto fn_exit;
|
||||
}
|
||||
|
||||
if (buftype_is_contig && !filetype_is_contig) {
|
||||
|
||||
/* contiguous in memory, noncontiguous in file. should be the most
|
||||
common case. */
|
||||
|
||||
i = 0;
|
||||
j = st_index;
|
||||
off = offset;
|
||||
n_filetypes = st_n_filetypes;
|
||||
fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
|
||||
while (i < bufsize) {
|
||||
if (fwr_size) {
|
||||
/* TYPE_UB and TYPE_LB can result in
|
||||
fwr_size = 0. save system call in such cases */
|
||||
/* lseek(fd->fd_sys, off, SEEK_SET);
|
||||
err = write(fd->fd_sys, ((char *) buf) + i, fwr_size);*/
|
||||
|
||||
req_off = off;
|
||||
req_len = fwr_size;
|
||||
userbuf_off = i;
|
||||
ADIOI_BUFFERED_WRITE
|
||||
}
|
||||
i += fwr_size;
|
||||
|
||||
if (off + fwr_size < disp + flat_file->indices[j] +
|
||||
flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent)
|
||||
off += fwr_size;
|
||||
/* did not reach end of contiguous block in filetype.
|
||||
no more I/O needed. off is incremented by fwr_size. */
|
||||
else {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
while (flat_file->blocklens[j]==0) {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
}
|
||||
off = disp + flat_file->indices[j] +
|
||||
(ADIO_Offset) n_filetypes*filetype_extent;
|
||||
fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* noncontiguous in memory as well as in file */
|
||||
|
||||
ADIOI_Flatten_datatype(datatype);
|
||||
flat_buf = ADIOI_Flatlist;
|
||||
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
|
||||
|
||||
k = num = buf_count = 0;
|
||||
i = (int) (flat_buf->indices[0]);
|
||||
j = st_index;
|
||||
off = offset;
|
||||
n_filetypes = st_n_filetypes;
|
||||
fwr_size = st_fwr_size;
|
||||
bwr_size = flat_buf->blocklens[0];
|
||||
|
||||
while (num < bufsize) {
|
||||
size = ADIOI_MIN(fwr_size, bwr_size);
|
||||
if (size) {
|
||||
/* lseek(fd->fd_sys, off, SEEK_SET);
|
||||
err = write(fd->fd_sys, ((char *) buf) + i, size); */
|
||||
|
||||
req_off = off;
|
||||
req_len = size;
|
||||
userbuf_off = i;
|
||||
ADIOI_BUFFERED_WRITE
|
||||
}
|
||||
|
||||
new_fwr_size = fwr_size;
|
||||
new_bwr_size = bwr_size;
|
||||
|
||||
if (size == fwr_size) {
|
||||
/* reached end of contiguous block in file */
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
while (flat_file->blocklens[j]==0) {
|
||||
j = (j+1) % flat_file->count;
|
||||
n_filetypes += (j == 0) ? 1 : 0;
|
||||
}
|
||||
|
||||
off = disp + flat_file->indices[j] +
|
||||
(ADIO_Offset) n_filetypes*filetype_extent;
|
||||
|
||||
new_fwr_size = flat_file->blocklens[j];
|
||||
if (size != bwr_size) {
|
||||
i += size;
|
||||
new_bwr_size -= size;
|
||||
}
|
||||
}
|
||||
|
||||
if (size == bwr_size) {
|
||||
/* reached end of contiguous block in memory */
|
||||
|
||||
k = (k + 1)%flat_buf->count;
|
||||
buf_count++;
|
||||
i = (int) (buftype_extent*(buf_count/flat_buf->count) +
|
||||
flat_buf->indices[k]);
|
||||
new_bwr_size = flat_buf->blocklens[k];
|
||||
if (size != fwr_size) {
|
||||
off += size;
|
||||
new_fwr_size -= size;
|
||||
}
|
||||
}
|
||||
num += size;
|
||||
fwr_size = new_fwr_size;
|
||||
bwr_size = new_bwr_size;
|
||||
}
|
||||
}
|
||||
|
||||
/* write the buffer out finally */
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
|
||||
#endif
|
||||
lseek(fd->fd_sys, writebuf_off, SEEK_SET);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
|
||||
#endif
|
||||
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
|
||||
#endif
|
||||
err = write(fd->fd_sys, writebuf, writebuf_len);
|
||||
#ifdef ADIOI_MPE_LOGGING
|
||||
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
|
||||
#endif
|
||||
|
||||
if (!(fd->atomicity))
|
||||
ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
|
||||
else ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
|
||||
|
||||
if (err == -1) err_flag = 1;
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
|
||||
if (err_flag) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE, myname,
|
||||
__LINE__, MPI_ERR_IO, "**io",
|
||||
"**io %s", strerror(errno));
|
||||
}
|
||||
else *error_code = MPI_SUCCESS;
|
||||
}
|
||||
|
||||
fd->fp_sys_posn = -1; /* set it to null. */
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
MPIR_Status_set_bytes(status, datatype, bufsize);
|
||||
/* This is a temporary way of filling in status. The right way is to
|
||||
keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
|
||||
#endif
|
||||
|
||||
if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
|
||||
fn_exit:
|
||||
if (writebuf != NULL) ADIOI_Free(writebuf);
|
||||
|
||||
return;
|
||||
}
|
38
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs.c
Обычный файл
38
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs.c
Обычный файл
@ -0,0 +1,38 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2001 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_ntfs.h"
|
||||
|
||||
/* adioi.h has the ADIOI_Fns_struct define */
|
||||
#include "adioi.h"
|
||||
|
||||
struct ADIOI_Fns_struct ADIO_NTFS_operations = {
|
||||
ADIOI_NTFS_Open, /* Open */
|
||||
ADIOI_FAILSAFE_OpenColl, /* OpenColl */
|
||||
ADIOI_NTFS_ReadContig, /* ReadContig */
|
||||
ADIOI_NTFS_WriteContig, /* WriteContig */
|
||||
ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
|
||||
ADIOI_GEN_WriteStridedColl, /* WriteStridedColl */
|
||||
ADIOI_GEN_SeekIndividual, /* SeekIndividual */
|
||||
ADIOI_NTFS_Fcntl, /* Fcntl */
|
||||
ADIOI_GEN_SetInfo, /* SetInfo */
|
||||
ADIOI_GEN_ReadStrided, /* ReadStrided */
|
||||
ADIOI_GEN_WriteStrided, /* WriteStrided */
|
||||
ADIOI_NTFS_Close, /* Close */
|
||||
ADIOI_NTFS_IreadContig, /* IreadContig */
|
||||
ADIOI_NTFS_IwriteContig, /* IwriteContig */
|
||||
ADIOI_NTFS_ReadDone, /* ReadDone */
|
||||
ADIOI_NTFS_WriteDone, /* WriteDone */
|
||||
ADIOI_NTFS_ReadComplete, /* ReadComplete */
|
||||
ADIOI_NTFS_WriteComplete, /* WriteComplete */
|
||||
ADIOI_FAKE_IreadStrided, /* IreadStrided */
|
||||
ADIOI_FAKE_IwriteStrided, /* IwriteStrided */
|
||||
ADIOI_NTFS_Flush, /* Flush */
|
||||
ADIOI_NTFS_Resize, /* Resize */
|
||||
ADIOI_GEN_Delete, /* Delete */
|
||||
ADIOI_NTFS_Feature /* Features */
|
||||
};
|
68
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs.h
Обычный файл
68
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs.h
Обычный файл
@ -0,0 +1,68 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#ifndef AD_NTFS_INCLUDE
|
||||
#define AD_NTFS_INCLUDE
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <fcntl.h>
|
||||
#include "adio.h"
|
||||
|
||||
#ifdef HAVE_INT64
|
||||
#define DWORDLOW(x) ( (DWORD) ( x & (__int64) 0xFFFFFFFF ) )
|
||||
#define DWORDHIGH(x) ( (DWORD) ( (x >> 32) & (__int64) 0xFFFFFFFF ) )
|
||||
#define DWORDTOINT64(x,y) ( (__int64) ( ( (__int64 x) << 32 ) + (__int64) y ) )
|
||||
#else
|
||||
#define DWORDLOW(x) x
|
||||
#define DWORDHIGH(x) 0
|
||||
#define DWORDTOINT64(x,y) x
|
||||
#endif
|
||||
|
||||
int ADIOI_NTFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
|
||||
int wr, void *handle);
|
||||
|
||||
void ADIOI_NTFS_Open(ADIO_File fd, int *error_code);
|
||||
void ADIOI_NTFS_Close(ADIO_File fd, int *error_code);
|
||||
void ADIOI_NTFS_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code);
|
||||
void ADIOI_NTFS_WriteContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status, int
|
||||
*error_code);
|
||||
void ADIOI_NTFS_IwriteContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Request *request, int
|
||||
*error_code);
|
||||
void ADIOI_NTFS_IreadContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Request *request, int
|
||||
*error_code);
|
||||
int ADIOI_NTFS_ReadDone(ADIO_Request *request, ADIO_Status *status, int
|
||||
*error_code);
|
||||
int ADIOI_NTFS_WriteDone(ADIO_Request *request, ADIO_Status *status, int
|
||||
*error_code);
|
||||
void ADIOI_NTFS_ReadComplete(ADIO_Request *request, ADIO_Status *status, int
|
||||
*error_code);
|
||||
void ADIOI_NTFS_WriteComplete(ADIO_Request *request, ADIO_Status *status,
|
||||
int *error_code);
|
||||
void ADIOI_NTFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int
|
||||
*error_code);
|
||||
void ADIOI_NTFS_IwriteStrided(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Request *request, int
|
||||
*error_code);
|
||||
void ADIOI_NTFS_Flush(ADIO_File fd, int *error_code);
|
||||
void ADIOI_NTFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code);
|
||||
|
||||
#define FORMAT_MESSAGE_MIN_SIZE 100
|
||||
#define ADIOI_NTFS_ERR_MSG_MAX FORMAT_MESSAGE_MIN_SIZE
|
||||
void ADIOI_NTFS_Strerror(int error, char *errMsg, int errMsgLen);
|
||||
|
||||
#endif
|
30
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_close.c
Обычный файл
30
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_close.c
Обычный файл
@ -0,0 +1,30 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_ntfs.h"
|
||||
|
||||
void ADIOI_NTFS_Close(ADIO_File fd, int *error_code)
|
||||
{
|
||||
int err;
|
||||
static char myname[] = "ADIOI_NTFS_Close";
|
||||
|
||||
err = CloseHandle(fd->fd_sys);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err == FALSE)
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
err = GetLastError();
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", errMsg);
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
*error_code = MPI_SUCCESS;
|
||||
}
|
20
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_done.c
Обычный файл
20
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_done.c
Обычный файл
@ -0,0 +1,20 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_ntfs.h"
|
||||
|
||||
int ADIOI_NTFS_ReadDone(ADIO_Request *request, ADIO_Status *status,
|
||||
int *error_code)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int ADIOI_NTFS_WriteDone(ADIO_Request *request, ADIO_Status *status,
|
||||
int *error_code)
|
||||
{
|
||||
return 0;
|
||||
}
|
76
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_fcntl.c
Обычный файл
76
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_fcntl.c
Обычный файл
@ -0,0 +1,76 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_ntfs.h"
|
||||
#include "adio_extern.h"
|
||||
|
||||
void ADIOI_NTFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code)
|
||||
{
|
||||
DWORD err;
|
||||
LONG dwTemp;
|
||||
static char myname[] = "ADIOI_NTFS_FCNTL";
|
||||
|
||||
switch(flag)
|
||||
{
|
||||
case ADIO_FCNTL_GET_FSIZE:
|
||||
fcntl_struct->fsize = SetFilePointer(fd->fd_sys, 0, 0, FILE_END);
|
||||
if (fd->fp_sys_posn != -1)
|
||||
{
|
||||
dwTemp = DWORDHIGH(fd->fp_sys_posn);
|
||||
if (SetFilePointer(fd->fd_sys, DWORDLOW(fd->fp_sys_posn), &dwTemp, FILE_BEGIN) == INVALID_SET_FILE_POINTER)
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
err = GetLastError();
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
if (err != NO_ERROR)
|
||||
{
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io", "**io %s", errMsg);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (fcntl_struct->fsize == INVALID_SET_FILE_POINTER)
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
dwTemp = GetLastError();
|
||||
ADIOI_NTFS_Strerror(dwTemp, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE, myname,
|
||||
__LINE__, MPI_ERR_IO, "**io",
|
||||
"**io %s", errMsg);
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
*error_code = MPI_SUCCESS;
|
||||
break;
|
||||
|
||||
case ADIO_FCNTL_SET_DISKSPACE:
|
||||
ADIOI_GEN_Prealloc(fd, fcntl_struct->diskspace, error_code);
|
||||
break;
|
||||
|
||||
case ADIO_FCNTL_SET_ATOMICITY:
|
||||
fd->atomicity = (fcntl_struct->atomicity == 0) ? 0 : 1;
|
||||
*error_code = MPI_SUCCESS;
|
||||
/*
|
||||
fd->atomicity = 0;
|
||||
*error_code = MPI_ERR_UNSUPPORTED_OPERATION;
|
||||
*/
|
||||
break;
|
||||
|
||||
default:
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
MPI_ERR_ARG,
|
||||
"**flag", "**flag %d", flag);
|
||||
return;
|
||||
/* --END ERROR HANDLING-- */
|
||||
}
|
||||
}
|
26
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_feature.c
Обычный файл
26
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_feature.c
Обычный файл
@ -0,0 +1,26 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* (C) 2008 by Argonne National Laboratory.
|
||||
* See COPYRIGHT in top-level directory.
|
||||
*/
|
||||
#include "adio.h"
|
||||
|
||||
int ADIOI_NTFS_Feature(ADIO_File fd, int flag)
|
||||
{
|
||||
switch(flag) {
|
||||
/* supported features */
|
||||
case ADIO_LOCKS:
|
||||
case ADIO_SHARED_FP:
|
||||
case ADIO_ATOMIC_MODE:
|
||||
case ADIO_DATA_SIEVING_WRITES:
|
||||
return 1;
|
||||
break;
|
||||
/* unsupported features */
|
||||
case ADIO_SCALABLE_OPEN:
|
||||
case ADIO_UNLINK_AFTER_CLOSE:
|
||||
default:
|
||||
return 0;
|
||||
break;
|
||||
}
|
||||
}
|
32
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_flush.c
Обычный файл
32
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_flush.c
Обычный файл
@ -0,0 +1,32 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_ntfs.h"
|
||||
|
||||
void ADIOI_NTFS_Flush(ADIO_File fd, int *error_code)
|
||||
{
|
||||
int err;
|
||||
static char myname[] = "ADIOI_NTFS_Flush";
|
||||
|
||||
err = (fd->access_mode & ADIO_RDONLY) ? TRUE :
|
||||
FlushFileBuffers(fd->fd_sys);
|
||||
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err == FALSE)
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
err = GetLastError();
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", errMsg);
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
*error_code = MPI_SUCCESS;
|
||||
}
|
42
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_iread.c
Обычный файл
42
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_iread.c
Обычный файл
@ -0,0 +1,42 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_ntfs.h"
|
||||
|
||||
void ADIOI_NTFS_IreadContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Request *request, int *error_code)
|
||||
{
|
||||
MPI_Count len, typesize;
|
||||
int err;
|
||||
static char myname[] = "ADIOI_NTFS_IreadContig";
|
||||
|
||||
MPI_Type_size_x(datatype, &typesize);
|
||||
len = count * typesize;
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL)
|
||||
{
|
||||
offset = fd->fp_ind;
|
||||
}
|
||||
err = ADIOI_NTFS_aio(fd, buf, len, offset, 0, request);
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL)
|
||||
{
|
||||
fd->fp_ind += len;
|
||||
}
|
||||
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err != MPI_SUCCESS)
|
||||
{
|
||||
*error_code = MPIO_Err_create_code(err, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io", 0);
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
*error_code = MPI_SUCCESS;
|
||||
|
||||
fd->fp_sys_posn = -1; /* set it to null. */
|
||||
}
|
303
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_iwrite.c
Обычный файл
303
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_iwrite.c
Обычный файл
@ -0,0 +1,303 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_ntfs.h"
|
||||
|
||||
#include "../../mpi-io/mpioimpl.h"
|
||||
#include "../../mpi-io/mpioprof.h"
|
||||
#include "mpiu_greq.h"
|
||||
|
||||
static MPIX_Grequest_class ADIOI_NTFS_greq_class = 0;
|
||||
|
||||
/* Fills the input buffer, errMsg, with the error message
|
||||
corresponding to error code, error */
|
||||
void ADIOI_NTFS_Strerror(int error, char *errMsg, int errMsgLen)
|
||||
{
|
||||
LPTSTR str;
|
||||
int num_bytes;
|
||||
num_bytes = FormatMessage(
|
||||
FORMAT_MESSAGE_FROM_SYSTEM |
|
||||
FORMAT_MESSAGE_ALLOCATE_BUFFER,
|
||||
NULL,
|
||||
error,
|
||||
0,
|
||||
&str,
|
||||
FORMAT_MESSAGE_MIN_SIZE,
|
||||
0);
|
||||
if (num_bytes == 0)
|
||||
{
|
||||
strncpy(errMsg, "\0", errMsgLen);
|
||||
}
|
||||
else
|
||||
{
|
||||
strncpy(errMsg, str, errMsgLen);
|
||||
LocalFree(str);
|
||||
}
|
||||
}
|
||||
|
||||
/* poll for completion of a single outstanding AIO request */
|
||||
int ADIOI_NTFS_aio_poll_fn(void *extra_state, MPI_Status *status)
|
||||
{
|
||||
ADIOI_AIO_Request *aio_req;
|
||||
int mpi_errno = MPI_SUCCESS;
|
||||
|
||||
/* FIXME: Validate the args -- has it already been done by the
|
||||
caller ? */
|
||||
|
||||
aio_req = (ADIOI_AIO_Request *)extra_state;
|
||||
|
||||
/* XXX: test for AIO completion here */
|
||||
if(!GetOverlappedResult( aio_req->fd, aio_req->lpOvl,
|
||||
&(aio_req->nbytes), FALSE)){
|
||||
if(GetLastError() == ERROR_IO_INCOMPLETE){
|
||||
/* IO in progress */
|
||||
/* TODO: need to diddle with status somehow */
|
||||
}else{
|
||||
/* Error occured */
|
||||
/* TODO: unsure how to handle this */
|
||||
}
|
||||
}else{
|
||||
mpi_errno = MPI_Grequest_complete(aio_req->req);
|
||||
if (mpi_errno != MPI_SUCCESS) {
|
||||
mpi_errno = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
"ADIOI_NTFS_aio_poll_fn", __LINE__,
|
||||
MPI_ERR_IO, "**mpi_grequest_complete",
|
||||
0);
|
||||
}
|
||||
}
|
||||
return mpi_errno;
|
||||
}
|
||||
|
||||
|
||||
/* Wait for completion of one of the outstanding AIO requests */
|
||||
int ADIOI_NTFS_aio_wait_fn(int count, void **array_of_states,
|
||||
double timeout, MPI_Status *status)
|
||||
{
|
||||
int i, mpi_errno = MPI_SUCCESS;
|
||||
ADIOI_AIO_Request **aio_reqlist;
|
||||
LPHANDLE lpHandles;
|
||||
DWORD retObject=0;
|
||||
|
||||
/* FIXME: Validate the args -- has it already been done by the
|
||||
caller ? */
|
||||
aio_reqlist = (ADIOI_AIO_Request **)array_of_states;
|
||||
lpHandles = (LPHANDLE) ADIOI_Calloc(count, sizeof(HANDLE));
|
||||
if (lpHandles == NULL)
|
||||
{
|
||||
mpi_errno = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
"ADIOI_NTFS_aio_wait_fn", __LINE__, MPI_ERR_IO,
|
||||
"**nomem", "**nomem %s", "Event handles");
|
||||
return mpi_errno;
|
||||
}
|
||||
/* XXX: set-up arrays of outstanding requests */
|
||||
for(i=0; i<count; i++){
|
||||
lpHandles[i] = (aio_reqlist[i])->lpOvl->hEvent;
|
||||
}
|
||||
|
||||
/* XXX: wait for one request to complete */
|
||||
/* FIXME: Is the timeout in seconds ? */
|
||||
timeout = (timeout <= 0) ? INFINITE : (timeout * 1000);
|
||||
|
||||
if((retObject = WaitForMultipleObjects(count, lpHandles,
|
||||
FALSE, timeout)) != WAIT_FAILED){
|
||||
retObject = retObject - WAIT_OBJECT_0;
|
||||
if(GetOverlappedResult( aio_reqlist[retObject]->fd,
|
||||
aio_reqlist[retObject]->lpOvl, &(aio_reqlist[retObject]->nbytes),
|
||||
FALSE)){
|
||||
/* XXX: mark completed requests as 'done'*/
|
||||
mpi_errno = MPI_Grequest_complete(aio_reqlist[retObject]->req);
|
||||
if (mpi_errno != MPI_SUCCESS) {
|
||||
mpi_errno = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
"ADIOI_NTFS_aio_wait_fn", __LINE__,
|
||||
MPI_ERR_IO, "**mpi_grequest_complete",
|
||||
0);
|
||||
}
|
||||
}else{
|
||||
if(GetLastError() == ERROR_IO_INCOMPLETE){
|
||||
/* IO in progress */
|
||||
/* TODO: need to diddle with status somehow */
|
||||
}else{
|
||||
/* Error occured */
|
||||
/* TODO: not sure how to handle this */
|
||||
}
|
||||
}
|
||||
}else{
|
||||
/* TODO: How to handle error while waiting ? */
|
||||
}
|
||||
ADIOI_Free(lpHandles);
|
||||
return mpi_errno;
|
||||
}
|
||||
|
||||
int ADIOI_NTFS_aio_query_fn(void *extra_state, MPI_Status *status)
|
||||
{
|
||||
ADIOI_AIO_Request *aio_req;
|
||||
|
||||
aio_req = (ADIOI_AIO_Request *)extra_state;
|
||||
|
||||
|
||||
MPI_Status_set_elements(status, MPI_BYTE, aio_req->nbytes);
|
||||
|
||||
/* can never cancel so always true */
|
||||
MPI_Status_set_cancelled(status, 0);
|
||||
|
||||
/* choose not to return a value for this */
|
||||
status->MPI_SOURCE = MPI_UNDEFINED;
|
||||
/* tag has no meaning for this generalized request */
|
||||
status->MPI_TAG = MPI_UNDEFINED;
|
||||
/* this generalized request never fails */
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int ADIOI_NTFS_aio_free_fn(void *extra_state)
|
||||
{
|
||||
ADIOI_AIO_Request *aio_req;
|
||||
/* FIXME: Validate the args -- has it already been done by the
|
||||
caller ? */
|
||||
aio_req = (ADIOI_AIO_Request*)extra_state;
|
||||
CloseHandle(aio_req->lpOvl->hEvent);
|
||||
ADIOI_Free(aio_req->lpOvl);
|
||||
ADIOI_Free(aio_req);
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
void ADIOI_NTFS_IwriteContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Request *request,
|
||||
int *error_code)
|
||||
{
|
||||
MPI_Count len, typesize;
|
||||
int err;
|
||||
static char myname[] = "ADIOI_NTFS_IwriteContig";
|
||||
|
||||
MPI_Type_size_x(datatype, &typesize);
|
||||
len = count * typesize;
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL)
|
||||
{
|
||||
offset = fd->fp_ind;
|
||||
}
|
||||
err = ADIOI_NTFS_aio(fd, buf, len, offset, 1, request);
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL)
|
||||
{
|
||||
fd->fp_ind += len;
|
||||
}
|
||||
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err != MPI_SUCCESS)
|
||||
{
|
||||
*error_code = MPIO_Err_create_code(err, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io", 0);
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
*error_code = MPI_SUCCESS;
|
||||
|
||||
fd->fp_sys_posn = -1; /* set it to null. */
|
||||
}
|
||||
|
||||
|
||||
/* This function is for implementation convenience. It is not user-visible.
|
||||
* If wr==1 write, wr==0 read.
|
||||
*
|
||||
* Returns MPI_SUCCESS on success, mpi_errno on failure.
|
||||
*/
|
||||
int ADIOI_NTFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
|
||||
int wr, MPI_Request *request)
|
||||
{
|
||||
static char myname[] = "ADIOI_NTFS_aio";
|
||||
|
||||
ADIOI_AIO_Request *aio_req;
|
||||
static DWORD dwNumWritten, dwNumRead;
|
||||
BOOL ret_val = FALSE;
|
||||
FDTYPE fd_sys;
|
||||
int mpi_errno = MPI_SUCCESS;
|
||||
DWORD err;
|
||||
|
||||
fd_sys = fd->fd_sys;
|
||||
|
||||
aio_req = (ADIOI_AIO_Request *)ADIOI_Calloc(sizeof(ADIOI_AIO_Request), 1);
|
||||
if (aio_req == NULL)
|
||||
{
|
||||
mpi_errno = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**nomem", "**nomem %s", "AIO_REQ");
|
||||
return mpi_errno;
|
||||
}
|
||||
aio_req->lpOvl = (LPOVERLAPPED ) ADIOI_Calloc(sizeof(OVERLAPPED), 1);
|
||||
if (aio_req->lpOvl == NULL)
|
||||
{
|
||||
mpi_errno = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**nomem", "**nomem %s", "OVERLAPPED");
|
||||
ADIOI_Free(aio_req);
|
||||
return mpi_errno;
|
||||
}
|
||||
aio_req->lpOvl->hEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
|
||||
if (aio_req->lpOvl->hEvent == NULL)
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
err = GetLastError();
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
mpi_errno = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io", "**io %s", errMsg);
|
||||
ADIOI_Free(aio_req->lpOvl);
|
||||
ADIOI_Free(aio_req);
|
||||
return mpi_errno;
|
||||
}
|
||||
aio_req->lpOvl->Offset = DWORDLOW(offset);
|
||||
aio_req->lpOvl->OffsetHigh = DWORDHIGH(offset);
|
||||
aio_req->fd = fd_sys;
|
||||
|
||||
/* XXX: initiate async I/O */
|
||||
if (wr)
|
||||
{
|
||||
ret_val = WriteFile(fd_sys, buf, len, &dwNumWritten, aio_req->lpOvl);
|
||||
}
|
||||
else
|
||||
{
|
||||
ret_val = ReadFile(fd_sys, buf, len, &dwNumRead, aio_req->lpOvl);
|
||||
}
|
||||
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (ret_val == FALSE)
|
||||
{
|
||||
mpi_errno = GetLastError();
|
||||
if (mpi_errno != ERROR_IO_PENDING)
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
ADIOI_NTFS_Strerror(mpi_errno, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
mpi_errno = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", errMsg);
|
||||
return mpi_errno;
|
||||
}
|
||||
mpi_errno = MPI_SUCCESS;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
/* XXX: set up generalized request class and request */
|
||||
if (ADIOI_NTFS_greq_class == 0) {
|
||||
mpi_errno = MPIX_Grequest_class_create(ADIOI_NTFS_aio_query_fn,
|
||||
ADIOI_NTFS_aio_free_fn, MPIU_Greq_cancel_fn,
|
||||
ADIOI_NTFS_aio_poll_fn, ADIOI_NTFS_aio_wait_fn,
|
||||
&ADIOI_NTFS_greq_class);
|
||||
if(mpi_errno != MPI_SUCCESS){
|
||||
/* FIXME: Pass appropriate error code to user */
|
||||
}
|
||||
}
|
||||
mpi_errno = MPIX_Grequest_class_allocate(ADIOI_NTFS_greq_class, aio_req, request);
|
||||
if(mpi_errno != MPI_SUCCESS){
|
||||
/* FIXME: Pass appropriate error code to user */
|
||||
}
|
||||
memcpy(&(aio_req->req), request, sizeof(MPI_Request));
|
||||
return mpi_errno;
|
||||
}
|
101
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_open.c
Обычный файл
101
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_open.c
Обычный файл
@ -0,0 +1,101 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_ntfs.h"
|
||||
|
||||
void ADIOI_NTFS_Open(ADIO_File fd, int *error_code)
|
||||
{
|
||||
int err;
|
||||
int cmode, amode, attrib;
|
||||
static char myname[] = "ADIOI_NTFS_Open";
|
||||
|
||||
amode = 0;
|
||||
cmode = OPEN_EXISTING;
|
||||
#ifdef USE_WIN_THREADED_IO
|
||||
attrib = FILE_FLAG_OVERLAPPED;
|
||||
#else
|
||||
attrib = FILE_ATTRIBUTE_NORMAL;
|
||||
#endif
|
||||
|
||||
if (fd->access_mode & ADIO_CREATE)
|
||||
{
|
||||
cmode = OPEN_ALWAYS;
|
||||
}
|
||||
if (fd->access_mode & ADIO_EXCL)
|
||||
{
|
||||
cmode = CREATE_NEW;
|
||||
}
|
||||
|
||||
if (fd->access_mode & ADIO_RDONLY)
|
||||
{
|
||||
amode = GENERIC_READ;
|
||||
}
|
||||
if (fd->access_mode & ADIO_WRONLY)
|
||||
{
|
||||
amode = GENERIC_WRITE;
|
||||
}
|
||||
if (fd->access_mode & ADIO_RDWR)
|
||||
{
|
||||
amode = GENERIC_READ | GENERIC_WRITE;
|
||||
}
|
||||
|
||||
if (fd->access_mode & ADIO_DELETE_ON_CLOSE)
|
||||
{
|
||||
attrib = attrib | FILE_FLAG_DELETE_ON_CLOSE;
|
||||
}
|
||||
if (fd->access_mode & ADIO_SEQUENTIAL)
|
||||
{
|
||||
attrib = attrib | FILE_FLAG_SEQUENTIAL_SCAN;
|
||||
}
|
||||
else
|
||||
{
|
||||
attrib = attrib | FILE_FLAG_RANDOM_ACCESS;
|
||||
}
|
||||
|
||||
fd->fd_sys = CreateFile(fd->filename,
|
||||
amode,
|
||||
FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
|
||||
NULL,
|
||||
cmode,
|
||||
attrib,
|
||||
NULL);
|
||||
fd->fd_direct = -1;
|
||||
|
||||
if ((fd->fd_sys != INVALID_HANDLE_VALUE) && (fd->access_mode & ADIO_APPEND))
|
||||
{
|
||||
fd->fp_ind = fd->fp_sys_posn = SetFilePointer(fd->fd_sys, 0, NULL, FILE_END);
|
||||
if (fd->fp_ind == INVALID_SET_FILE_POINTER)
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
err = GetLastError();
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
if (err != NO_ERROR)
|
||||
{
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io", "**io %s", errMsg);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (fd->fd_sys == INVALID_HANDLE_VALUE)
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
err = GetLastError();
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", errMsg);
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
*error_code = MPI_SUCCESS;
|
||||
}
|
259
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_read.c
Обычный файл
259
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_read.c
Обычный файл
@ -0,0 +1,259 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_ntfs.h"
|
||||
|
||||
void ADIOI_NTFS_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int *error_code)
|
||||
{
|
||||
LONG dwTemp;
|
||||
DWORD dwNumRead = 0;
|
||||
int err=-1;
|
||||
MPI_Count datatype_size, len;
|
||||
static char myname[] = "ADIOI_NTFS_ReadContig";
|
||||
OVERLAPPED *pOvl;
|
||||
|
||||
/* If file pointer is of type ADIO_INDIVIDUAL ignore the offset
|
||||
and use the current location of file pointer */
|
||||
if(file_ptr_type == ADIO_INDIVIDUAL){
|
||||
offset = fd->fp_ind;
|
||||
}
|
||||
|
||||
MPI_Type_size_x(datatype, &datatype_size);
|
||||
len = datatype_size * count;
|
||||
|
||||
pOvl = (OVERLAPPED *) ADIOI_Calloc(sizeof(OVERLAPPED), 1);
|
||||
if (pOvl == NULL)
|
||||
{
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**nomem", "**nomem %s", "OVERLAPPED");
|
||||
return;
|
||||
}
|
||||
pOvl->hEvent = CreateEvent(NULL, TRUE, TRUE, NULL);
|
||||
if (pOvl->hEvent == NULL)
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
err = GetLastError();
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io", "**io %s", errMsg);
|
||||
ADIOI_Free(pOvl);
|
||||
return;
|
||||
}
|
||||
pOvl->Offset = DWORDLOW(offset);
|
||||
pOvl->OffsetHigh = DWORDHIGH(offset);
|
||||
|
||||
if (file_ptr_type == ADIO_EXPLICIT_OFFSET)
|
||||
{
|
||||
if (fd->fp_sys_posn != offset)
|
||||
{
|
||||
dwTemp = DWORDHIGH(offset);
|
||||
if (SetFilePointer(fd->fd_sys, DWORDLOW(offset), &dwTemp, FILE_BEGIN) == INVALID_SET_FILE_POINTER)
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
err = GetLastError();
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
if (err != NO_ERROR)
|
||||
{
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io", "**io %s", errMsg);
|
||||
CloseHandle(pOvl->hEvent);
|
||||
ADIOI_Free(pOvl);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
{
|
||||
ADIO_Fcntl_t fcntl_struct;
|
||||
int error_code;
|
||||
ADIO_Fcntl(fd, ADIO_FCNTL_GET_FSIZE, &fcntl_struct, &error_code);
|
||||
printf("File size b: %d\n", fcntl_struct.fsize);
|
||||
}
|
||||
printf("ReadFile(%d bytes)\n", len);fflush(stdout);
|
||||
*/
|
||||
err = ReadFile(fd->fd_sys, buf, len, &dwNumRead, pOvl);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err == FALSE)
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
err = GetLastError();
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
switch (err)
|
||||
{
|
||||
case ERROR_IO_PENDING:
|
||||
break;
|
||||
case ERROR_HANDLE_EOF:
|
||||
/*printf("EOF error\n");fflush(stdout);*/
|
||||
SetEvent(pOvl->hEvent);
|
||||
break;
|
||||
default:
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", errMsg);
|
||||
CloseHandle(pOvl->hEvent);
|
||||
ADIOI_Free(pOvl);
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
err = GetOverlappedResult(fd->fd_sys, pOvl, &dwNumRead, TRUE);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err == FALSE)
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
err = GetLastError();
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
if (err != ERROR_HANDLE_EOF) /* Ignore EOF errors */
|
||||
{
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE, myname,
|
||||
__LINE__, MPI_ERR_IO, "**io",
|
||||
"**io %s", errMsg);
|
||||
CloseHandle(pOvl->hEvent);
|
||||
ADIOI_Free(pOvl);
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
if (!CloseHandle(pOvl->hEvent))
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
err = GetLastError();
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io", "**io %s", errMsg);
|
||||
CloseHandle(pOvl->hEvent);
|
||||
ADIOI_Free(pOvl);
|
||||
return;
|
||||
}
|
||||
ADIOI_Free(pOvl);
|
||||
|
||||
fd->fp_sys_posn = offset + (ADIO_Offset)dwNumRead;
|
||||
/* individual file pointer not updated */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* read from curr. location of ind. file pointer */
|
||||
if (fd->fp_sys_posn != fd->fp_ind)
|
||||
{
|
||||
dwTemp = DWORDHIGH(fd->fp_ind);
|
||||
if (SetFilePointer(fd->fd_sys, DWORDLOW(fd->fp_ind), &dwTemp, FILE_BEGIN) == INVALID_SET_FILE_POINTER)
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
err = GetLastError();
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
if (err != NO_ERROR)
|
||||
{
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io", "**io %s", errMsg);
|
||||
CloseHandle(pOvl->hEvent);
|
||||
ADIOI_Free(pOvl);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
{
|
||||
ADIO_Fcntl_t fcntl_struct;
|
||||
int error_code;
|
||||
ADIO_Fcntl(fd, ADIO_FCNTL_GET_FSIZE, &fcntl_struct, &error_code);
|
||||
printf("File size c: %d\n", fcntl_struct.fsize);
|
||||
}
|
||||
printf("ReadFile(%d bytes)\n", len);fflush(stdout);
|
||||
*/
|
||||
err = ReadFile(fd->fd_sys, buf, len, &dwNumRead, pOvl);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err == FALSE)
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
err = GetLastError();
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
switch (err)
|
||||
{
|
||||
case ERROR_IO_PENDING:
|
||||
break;
|
||||
case ERROR_HANDLE_EOF:
|
||||
/*printf("EOF error\n");fflush(stdout);*/
|
||||
SetEvent(pOvl->hEvent);
|
||||
break;
|
||||
default:
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", errMsg);
|
||||
CloseHandle(pOvl->hEvent);
|
||||
ADIOI_Free(pOvl);
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
err = GetOverlappedResult(fd->fd_sys, pOvl, &dwNumRead, TRUE);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err == FALSE)
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
err = GetLastError();
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
if (err != ERROR_HANDLE_EOF) /* Ignore EOF errors */
|
||||
{
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE, myname,
|
||||
__LINE__, MPI_ERR_IO, "**io",
|
||||
"**io %s", errMsg);
|
||||
CloseHandle(pOvl->hEvent);
|
||||
ADIOI_Free(pOvl);
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
if (!CloseHandle(pOvl->hEvent))
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
err = GetLastError();
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io", "**io %s", errMsg);
|
||||
ADIOI_Free(pOvl);
|
||||
return;
|
||||
}
|
||||
ADIOI_Free(pOvl);
|
||||
|
||||
fd->fp_ind = fd->fp_ind + (ADIO_Offset)dwNumRead;
|
||||
fd->fp_sys_posn = fd->fp_ind;
|
||||
}
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
if (err != FALSE)
|
||||
{
|
||||
MPIR_Status_set_bytes(status, datatype, dwNumRead);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err == FALSE)
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
err = GetLastError();
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", errMsg);
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
*error_code = MPI_SUCCESS;
|
||||
}
|
51
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_resize.c
Обычный файл
51
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_resize.c
Обычный файл
@ -0,0 +1,51 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_ntfs.h"
|
||||
|
||||
void ADIOI_NTFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code)
|
||||
{
|
||||
LONG dwTemp;
|
||||
DWORD err;
|
||||
BOOL result;
|
||||
static char myname[] = "ADIOI_NTFS_Resize";
|
||||
|
||||
dwTemp = DWORDHIGH(size);
|
||||
err = SetFilePointer(fd->fd_sys, DWORDLOW(size), &dwTemp, FILE_BEGIN);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err == INVALID_SET_FILE_POINTER)
|
||||
{
|
||||
err = GetLastError();
|
||||
if (err != NO_ERROR)
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", errMsg);
|
||||
return;
|
||||
}
|
||||
}
|
||||
/*printf("setting file length to %d\n", size);fflush(stdout);*/
|
||||
/* --END ERROR HANDLING-- */
|
||||
result = SetEndOfFile(fd->fd_sys);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (result == FALSE)
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
err = GetLastError();
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", errMsg);
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
*error_code = MPI_SUCCESS;
|
||||
}
|
20
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_wait.c
Обычный файл
20
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_wait.c
Обычный файл
@ -0,0 +1,20 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_ntfs.h"
|
||||
|
||||
void ADIOI_NTFS_ReadComplete(ADIO_Request *request, ADIO_Status *status,
|
||||
int *error_code)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void ADIOI_NTFS_WriteComplete(ADIO_Request *request, ADIO_Status *status,
|
||||
int *error_code)
|
||||
{
|
||||
return;
|
||||
}
|
222
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_write.c
Обычный файл
222
ompi/mca/io/romio314/romio/adio/ad_ntfs/ad_ntfs_write.c
Обычный файл
@ -0,0 +1,222 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_ntfs.h"
|
||||
|
||||
void ADIOI_NTFS_WriteContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int *error_code)
|
||||
{
|
||||
static char myname[] = "ADIOI_NTFS_WriteContig";
|
||||
LONG dwTemp;
|
||||
DWORD dwNumWritten = 0;
|
||||
MPI_Count err=-1, datatype_size, len;
|
||||
OVERLAPPED *pOvl;
|
||||
|
||||
/* If file pointer type in ADIO_INDIVIDUAL then offset should be
|
||||
ignored and the current location of file pointer should be used */
|
||||
if(file_ptr_type == ADIO_INDIVIDUAL){
|
||||
offset = fd->fp_ind;
|
||||
}
|
||||
|
||||
MPI_Type_size_x(datatype, &datatype_size);
|
||||
len = datatype_size * count;
|
||||
|
||||
pOvl = (OVERLAPPED *) ADIOI_Calloc(sizeof(OVERLAPPED), 1);
|
||||
if (pOvl == NULL)
|
||||
{
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**nomem", "**nomem %s", "OVERLAPPED");
|
||||
return;
|
||||
}
|
||||
pOvl->hEvent = CreateEvent(NULL, TRUE, TRUE, NULL);
|
||||
if (pOvl->hEvent == NULL)
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
err = GetLastError();
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io", "**io %s", errMsg);
|
||||
ADIOI_Free(pOvl);
|
||||
return;
|
||||
}
|
||||
pOvl->Offset = DWORDLOW(offset);
|
||||
pOvl->OffsetHigh = DWORDHIGH(offset);
|
||||
|
||||
if (file_ptr_type == ADIO_EXPLICIT_OFFSET)
|
||||
{
|
||||
if (fd->fp_sys_posn != offset)
|
||||
{
|
||||
dwTemp = DWORDHIGH(offset);
|
||||
if (SetFilePointer(fd->fd_sys, DWORDLOW(offset), &dwTemp, FILE_BEGIN) == INVALID_SET_FILE_POINTER)
|
||||
{
|
||||
err = GetLastError();
|
||||
if (err != NO_ERROR)
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io", "**io %s", errMsg);
|
||||
CloseHandle(pOvl->hEvent);
|
||||
ADIOI_Free(pOvl);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
/*printf("WriteFile(%d bytes)\n", len);fflush(stdout);*/
|
||||
err = WriteFile(fd->fd_sys, buf, len, &dwNumWritten, pOvl);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err == FALSE)
|
||||
{
|
||||
err = GetLastError();
|
||||
if (err != ERROR_IO_PENDING)
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", errMsg);
|
||||
CloseHandle(pOvl->hEvent);
|
||||
ADIOI_Free(pOvl);
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
err = GetOverlappedResult(fd->fd_sys, pOvl, &dwNumWritten, TRUE);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err == FALSE)
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
err = GetLastError();
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE, myname,
|
||||
__LINE__, MPI_ERR_IO, "**io",
|
||||
"**io %s", errMsg);
|
||||
CloseHandle(pOvl->hEvent);
|
||||
ADIOI_Free(pOvl);
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
if (!CloseHandle(pOvl->hEvent))
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
err = GetLastError();
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io", "**io %s", errMsg);
|
||||
CloseHandle(pOvl->hEvent);
|
||||
ADIOI_Free(pOvl);
|
||||
return;
|
||||
}
|
||||
ADIOI_Free(pOvl);
|
||||
|
||||
fd->fp_sys_posn = offset + dwNumWritten;
|
||||
/* individual file pointer not updated */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* write from curr. location of ind. file pointer */
|
||||
if (fd->fp_sys_posn != fd->fp_ind)
|
||||
{
|
||||
dwTemp = DWORDHIGH(fd->fp_ind);
|
||||
if (SetFilePointer(fd->fd_sys, DWORDLOW(fd->fp_ind), &dwTemp, FILE_BEGIN) == INVALID_SET_FILE_POINTER)
|
||||
{
|
||||
err = GetLastError();
|
||||
if (err != NO_ERROR)
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io", "**io %s", errMsg);
|
||||
CloseHandle(pOvl->hEvent);
|
||||
ADIOI_Free(pOvl);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
/*printf("WriteFile(%d bytes)\n", len);fflush(stdout);*/
|
||||
err = WriteFile(fd->fd_sys, buf, len, &dwNumWritten, pOvl);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err == FALSE)
|
||||
{
|
||||
err = GetLastError();
|
||||
if (err != ERROR_IO_PENDING)
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", errMsg);
|
||||
CloseHandle(pOvl->hEvent);
|
||||
ADIOI_Free(pOvl);
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
err = GetOverlappedResult(fd->fd_sys, pOvl, &dwNumWritten, TRUE);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err == FALSE)
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
err = GetLastError();
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE, myname,
|
||||
__LINE__, MPI_ERR_IO, "**io",
|
||||
"**io %s", errMsg);
|
||||
CloseHandle(pOvl->hEvent);
|
||||
ADIOI_Free(pOvl);
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
if (!CloseHandle(pOvl->hEvent))
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
err = GetLastError();
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io", "**io %s", errMsg);
|
||||
ADIOI_Free(pOvl);
|
||||
return;
|
||||
}
|
||||
ADIOI_Free(pOvl);
|
||||
|
||||
fd->fp_ind = fd->fp_ind + dwNumWritten;
|
||||
fd->fp_sys_posn = fd->fp_ind;
|
||||
}
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
if (err != FALSE)
|
||||
{
|
||||
MPIR_Status_set_bytes(status, datatype, dwNumWritten);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err == FALSE)
|
||||
{
|
||||
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
|
||||
err = GetLastError();
|
||||
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io",
|
||||
"**io %s", errMsg);
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
*error_code = MPI_SUCCESS;
|
||||
}
|
21
ompi/mca/io/romio314/romio/adio/ad_panfs/Makefile.mk
Обычный файл
21
ompi/mca/io/romio314/romio/adio/ad_panfs/Makefile.mk
Обычный файл
@ -0,0 +1,21 @@
|
||||
## -*- Mode: Makefile; -*-
|
||||
## vim: set ft=automake :
|
||||
##
|
||||
## (C) 2011 by Argonne National Laboratory.
|
||||
## See COPYRIGHT in top-level directory.
|
||||
##
|
||||
|
||||
if BUILD_AD_PANFS
|
||||
|
||||
noinst_HEADERS += adio/ad_panfs/ad_panfs.h
|
||||
|
||||
romio_other_sources += \
|
||||
adio/ad_panfs/ad_panfs.c \
|
||||
adio/ad_panfs/ad_panfs_open.c \
|
||||
adio/ad_panfs/ad_panfs_hints.c \
|
||||
adio/ad_panfs/ad_panfs_read.c \
|
||||
adio/ad_panfs/ad_panfs_resize.c \
|
||||
adio/ad_panfs/ad_panfs_write.c
|
||||
|
||||
endif BUILD_AD_PANFS
|
||||
|
45
ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs.c
Обычный файл
45
ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs.c
Обычный файл
@ -0,0 +1,45 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
* ad_panfs.c
|
||||
*
|
||||
* Copyright (C) 2001 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_panfs.h"
|
||||
|
||||
/* adioi.h has the ADIOI_Fns_struct define */
|
||||
#include "adioi.h"
|
||||
|
||||
struct ADIOI_Fns_struct ADIO_PANFS_operations = {
|
||||
ADIOI_PANFS_Open, /* Open */
|
||||
ADIOI_GEN_OpenColl,
|
||||
ADIOI_PANFS_ReadContig, /* ReadContig */
|
||||
ADIOI_PANFS_WriteContig, /* WriteContig */
|
||||
ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
|
||||
ADIOI_GEN_WriteStridedColl, /* WriteStridedColl */
|
||||
ADIOI_GEN_SeekIndividual, /* SeekIndividual */
|
||||
ADIOI_GEN_Fcntl, /* Fcntl */
|
||||
ADIOI_PANFS_SetInfo, /* SetInfo */
|
||||
ADIOI_GEN_ReadStrided, /* ReadStrided */
|
||||
ADIOI_GEN_WriteStrided, /* WriteStrided */
|
||||
ADIOI_GEN_Close, /* Close */
|
||||
#ifdef ROMIO_HAVE_WORKING_AIO
|
||||
ADIOI_GEN_IreadContig, /* IreadContig */
|
||||
ADIOI_GEN_IwriteContig, /* IwriteContig */
|
||||
#else
|
||||
ADIOI_FAKE_IreadContig, /* IreadContig */
|
||||
ADIOI_FAKE_IwriteContig, /* IwriteContig */
|
||||
#endif
|
||||
ADIOI_GEN_IODone, /* ReadDone */
|
||||
ADIOI_GEN_IODone, /* WriteDone */
|
||||
ADIOI_GEN_IOComplete, /* ReadComplete */
|
||||
ADIOI_GEN_IOComplete, /* WriteComplete */
|
||||
ADIOI_GEN_IreadStrided, /* IreadStrided */
|
||||
ADIOI_GEN_IwriteStrided, /* IwriteStrided */
|
||||
ADIOI_GEN_Flush, /* Flush */
|
||||
ADIOI_PANFS_Resize, /* Resize */
|
||||
ADIOI_GEN_Delete, /* Delete */
|
||||
ADIOI_GEN_Feature,
|
||||
"PANFS: Panasas PanFS"
|
||||
};
|
62
ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs.h
Обычный файл
62
ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs.h
Обычный файл
@ -0,0 +1,62 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
* ad_panfs.h
|
||||
*
|
||||
* Copyright (C) 2001 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#ifndef AD_PANFS_INCLUDE
|
||||
#define AD_PANFS_INCLUDE
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <fcntl.h>
|
||||
#include "adio.h"
|
||||
|
||||
#ifndef NO_AIO
|
||||
#ifdef AIO_SUN
|
||||
#include <sys/asynch.h>
|
||||
#else
|
||||
#include <aio.h>
|
||||
#ifdef NEEDS_ADIOCB_T
|
||||
typedef struct adiocb adiocb_t;
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
void ADIOI_PANFS_Open(ADIO_File fd, int *error_code);
|
||||
void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code);
|
||||
void ADIOI_PANFS_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int *error_code);
|
||||
void ADIOI_PANFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code);
|
||||
void ADIOI_PANFS_WriteContig(ADIO_File fd, const void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int *error_code);
|
||||
|
||||
/* TODO: move this to common code and have all routines retry. */
|
||||
/* TODO: also check for EWOULDBLOCK */
|
||||
#if defined(NEEDS_USLEEP_DECL)
|
||||
int usleep(useconds_t usec);
|
||||
#endif
|
||||
|
||||
/* Delay 1 ms */
|
||||
#define AD_PANFS_RETRY_DELAY 1000
|
||||
|
||||
#define AD_PANFS_RETRY(_op_,_rc_) \
|
||||
{ \
|
||||
_rc_ = (_op_); \
|
||||
while(_rc_ == -1 && errno == EAGAIN) \
|
||||
{ \
|
||||
if(usleep(AD_PANFS_RETRY_DELAY) == -1) \
|
||||
{ \
|
||||
break; \
|
||||
} \
|
||||
_rc_ = (_op_); \
|
||||
} \
|
||||
}
|
||||
|
||||
#endif
|
72
ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_hints.c
Обычный файл
72
ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_hints.c
Обычный файл
@ -0,0 +1,72 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
* ad_panfs_hints.c
|
||||
*
|
||||
* Copyright (C) 2001 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_panfs.h"
|
||||
#include <pan_fs_client_cw_mode.h>
|
||||
#include "hint_fns.h"
|
||||
|
||||
void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
|
||||
{
|
||||
#if defined(MPICH) || !defined(PRINT_ERR_MSG)
|
||||
static char myname[] = "ADIOI_PANFS_SETINFO";
|
||||
#endif
|
||||
int gen_error_code;
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
|
||||
if (fd->info == MPI_INFO_NULL) {
|
||||
/* This must be part of the open call. can set striping parameters
|
||||
* if necessary.
|
||||
*/
|
||||
MPI_Info_create(&(fd->info));
|
||||
|
||||
/* anticipate concurrent writes in an MPI-IO application */
|
||||
ADIOI_Info_set (fd->info, "panfs_concurrent_write", "1");
|
||||
|
||||
/* has user specified striping parameters
|
||||
and do they have the same value on all processes? */
|
||||
if (users_info != MPI_INFO_NULL) {
|
||||
|
||||
ADIOI_Info_check_and_install_int(fd, users_info, "panfs_concurrent_write",
|
||||
NULL, myname, error_code);
|
||||
|
||||
ADIOI_Info_check_and_install_int(fd, users_info, "panfs_layout_type",
|
||||
NULL, myname, error_code);
|
||||
|
||||
ADIOI_Info_check_and_install_int(fd, users_info, "panfs_layout_stripe_unit",
|
||||
NULL, myname, error_code);
|
||||
|
||||
/* strange: there was a check "layout_type ==
|
||||
* PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE, but
|
||||
* nothing ever touched layout_type */
|
||||
ADIOI_Info_check_and_install_int(fd, users_info,
|
||||
"panfs_layout_parity_stripe_width", NULL, myname, error_code);
|
||||
|
||||
ADIOI_Info_check_and_install_int(fd, users_info,
|
||||
"panfs_layout_parity_stripe_depth", NULL, myname, error_code);
|
||||
|
||||
ADIOI_Info_check_and_install_int(fd, users_info,
|
||||
"panfs_layout_total_num_comps", NULL, myname, error_code);
|
||||
/* this hint used to check for
|
||||
* PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE or
|
||||
* PAN_FS_CLIENT_LAYOUT_TYPE__RAID10, but again, layout_type never
|
||||
* gets updated */
|
||||
ADIOI_Info_check_and_install_int(fd, users_info,
|
||||
"panfs_layout_visit_policy", NULL, myname, error_code);
|
||||
}
|
||||
}
|
||||
|
||||
ADIOI_GEN_SetInfo(fd, users_info, &gen_error_code);
|
||||
/* If this function is successful, use the error code returned from ADIOI_GEN_SetInfo
|
||||
* otherwise use the error_code generated by this function
|
||||
*/
|
||||
if(*error_code == MPI_SUCCESS)
|
||||
{
|
||||
*error_code = gen_error_code;
|
||||
}
|
||||
}
|
348
ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_open.c
Обычный файл
348
ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_open.c
Обычный файл
@ -0,0 +1,348 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
* ad_panfs_open.c
|
||||
*
|
||||
* Copyright (C) 2001 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_panfs.h"
|
||||
#include <string.h>
|
||||
#include <pan_fs_client_cw_mode.h>
|
||||
#define TEMP_BUFFER_SIZE 64
|
||||
|
||||
void ADIOI_PANFS_Open(ADIO_File fd, int *error_code)
|
||||
{
|
||||
char* value;
|
||||
int perm, old_mask, amode, flag;
|
||||
static char myname[] = "ADIOI_PANFS_OPEN";
|
||||
|
||||
if (fd->perm == ADIO_PERM_NULL) {
|
||||
old_mask = umask(022);
|
||||
umask(old_mask);
|
||||
perm = ~old_mask & 0666;
|
||||
}
|
||||
else perm = fd->perm;
|
||||
|
||||
amode = 0;
|
||||
if (fd->access_mode & ADIO_CREATE)
|
||||
{
|
||||
pan_fs_client_layout_agg_type_t layout_type = PAN_FS_CLIENT_LAYOUT_TYPE__DEFAULT;
|
||||
unsigned long int layout_stripe_unit = 0;
|
||||
unsigned long int layout_parity_stripe_width = 0;
|
||||
unsigned long int layout_parity_stripe_depth = 0;
|
||||
unsigned long int layout_total_num_comps = 0;
|
||||
pan_fs_client_layout_visit_t layout_visit_policy = PAN_FS_CLIENT_LAYOUT_VISIT__ROUND_ROBIN;
|
||||
int myrank;
|
||||
|
||||
MPI_Comm_rank(fd->comm, &myrank);
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
|
||||
ADIOI_Info_get(fd->info, "panfs_layout_type", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
layout_type = strtoul(value,NULL,10);
|
||||
}
|
||||
ADIOI_Info_get(fd->info, "panfs_layout_stripe_unit", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
layout_stripe_unit = strtoul(value,NULL,10);
|
||||
}
|
||||
ADIOI_Info_get(fd->info, "panfs_layout_total_num_comps", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
layout_total_num_comps = strtoul(value,NULL,10);
|
||||
}
|
||||
ADIOI_Info_get(fd->info, "panfs_layout_parity_stripe_width", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
layout_parity_stripe_width = strtoul(value,NULL,10);
|
||||
}
|
||||
ADIOI_Info_get(fd->info, "panfs_layout_parity_stripe_depth", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
layout_parity_stripe_depth = strtoul(value,NULL,10);
|
||||
}
|
||||
ADIOI_Info_get(fd->info, "panfs_layout_visit_policy", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
layout_visit_policy = strtoul(value,NULL,10);
|
||||
}
|
||||
ADIOI_Free(value);
|
||||
|
||||
amode = amode | O_CREAT;
|
||||
/* Check for valid set of hints */
|
||||
if ((layout_type < PAN_FS_CLIENT_LAYOUT_TYPE__DEFAULT) ||
|
||||
(layout_type > PAN_FS_CLIENT_LAYOUT_TYPE__RAID10))
|
||||
{
|
||||
FPRINTF(stderr, "%s: panfs_layout_type is not a valid value: %u.\n", myname, layout_type);
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
if ((layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID0) &&
|
||||
((layout_stripe_unit == 0) || (layout_total_num_comps == 0)))
|
||||
{
|
||||
if(layout_stripe_unit == 0)
|
||||
{
|
||||
FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_stripe_unit hint which is necessary to specify a valid RAID0 layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname);
|
||||
}
|
||||
if(layout_total_num_comps == 0)
|
||||
{
|
||||
FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_total_num_comps hint which is necessary to specify a valid RAID0 layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname);
|
||||
}
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
if (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE)
|
||||
{
|
||||
if ((layout_stripe_unit == 0) ||
|
||||
(layout_parity_stripe_width == 0) ||
|
||||
(layout_parity_stripe_depth == 0) ||
|
||||
(layout_total_num_comps == 0))
|
||||
{
|
||||
if(layout_stripe_unit == 0)
|
||||
{
|
||||
FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_stripe_unit hint which is necessary to specify a valid RAID5 parity stripe layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname);
|
||||
}
|
||||
if(layout_total_num_comps == 0)
|
||||
{
|
||||
FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_total_num_comps hint which is necessary to specify a valid RAID5 parity stripe layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname);
|
||||
}
|
||||
if(layout_parity_stripe_width == 0)
|
||||
{
|
||||
FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_parity_stripe_width hint which is necessary to specify a valid RAID5 parity stripe layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname);
|
||||
}
|
||||
if(layout_parity_stripe_depth == 0)
|
||||
{
|
||||
FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_parity_stripe_depth hint which is necessary to specify a valid RAID5 parity stripe layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname);
|
||||
}
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
if ((layout_visit_policy < PAN_FS_CLIENT_LAYOUT_VISIT__ROUND_ROBIN) ||
|
||||
(layout_visit_policy > PAN_FS_CLIENT_LAYOUT_VISIT__ROUND_ROBIN_WITH_HASHED_OFFSET))
|
||||
{
|
||||
FPRINTF(stderr, "%s: panfs_layout_visit_policy is not a valid value: %u.\n", myname, layout_visit_policy);
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
}
|
||||
if (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID10)
|
||||
{
|
||||
if ((layout_stripe_unit == 0) || (layout_total_num_comps == 0))
|
||||
{
|
||||
if(layout_stripe_unit == 0)
|
||||
{
|
||||
FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_stripe_unit hint which is necessary to specify a valid RAID10 layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname);
|
||||
}
|
||||
if(layout_total_num_comps == 0)
|
||||
{
|
||||
FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_total_num_comps hint which is necessary to specify a valid RAID10 layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname);
|
||||
}
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
if ((layout_visit_policy < PAN_FS_CLIENT_LAYOUT_VISIT__ROUND_ROBIN) ||
|
||||
(layout_visit_policy > PAN_FS_CLIENT_LAYOUT_VISIT__ROUND_ROBIN_WITH_HASHED_OFFSET))
|
||||
{
|
||||
FPRINTF(stderr, "%s: panfs_layout_visit_policy is not a valid value: %u.\n", myname, layout_visit_policy);
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
}
|
||||
/* Create the file via ioctl() or open(). ADIOI_PANFS_Open's caller
|
||||
* already optimizes performance by only calling this function with
|
||||
* ADIO_CREATE on rank 0. Therefore, we don't need to worry about
|
||||
* implementing that optimization here. */
|
||||
if((layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID0) || (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE)
|
||||
|| (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID10)) {
|
||||
pan_fs_client_layout_create_args_t file_create_args;
|
||||
int fd_dir;
|
||||
char* slash;
|
||||
struct stat stat_buf;
|
||||
int err;
|
||||
char *path;
|
||||
|
||||
/* Check that the file does not exist before
|
||||
* trying to create it. The ioctl itself should
|
||||
* be able to handle this condition. Currently,
|
||||
* the ioctl will return successfully if the file
|
||||
* has been previously created. Filed bug 33862
|
||||
* to track the problem.
|
||||
*/
|
||||
err = stat(fd->filename,&stat_buf);
|
||||
if((err == -1) && (errno != ENOENT))
|
||||
{
|
||||
FPRINTF(stderr,"%s: Unexpected I/O Error calling stat() on PanFS file: %s.\n", myname, strerror(errno));
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
else if (err == 0)
|
||||
{
|
||||
FPRINTF(stderr,"%s: Cannot create PanFS file with ioctl when file already exists.\n", myname);
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* (err == -1) && (errno == ENOENT) */
|
||||
/* File does not exist */
|
||||
path = ADIOI_Strdup(fd->filename);
|
||||
slash = strrchr(path, '/');
|
||||
if (!slash)
|
||||
ADIOI_Strncpy(path, ".", 2);
|
||||
else {
|
||||
if (slash == path)
|
||||
*(path + 1) = '\0';
|
||||
else *slash = '\0';
|
||||
}
|
||||
|
||||
/* create PanFS object */
|
||||
memset(&file_create_args,0,sizeof(pan_fs_client_layout_create_args_t));
|
||||
/* open directory */
|
||||
fd_dir = open(path, O_RDONLY);
|
||||
if (fd_dir < 0) {
|
||||
FPRINTF(stderr, "%s: I/O Error opening parent directory to create PanFS file using ioctl: %s.\n", myname, strerror(errno));
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
char *file_name_ptr = fd->filename;
|
||||
slash = strrchr(fd->filename, '/');
|
||||
if (slash)
|
||||
{
|
||||
file_name_ptr = slash + 1;
|
||||
}
|
||||
/* create file in the directory */
|
||||
file_create_args.mode = perm;
|
||||
file_create_args.version = PAN_FS_CLIENT_LAYOUT_VERSION;
|
||||
file_create_args.flags = PAN_FS_CLIENT_LAYOUT_CREATE_F__NONE;
|
||||
ADIOI_Strncpy(file_create_args.filename, file_name_ptr, strlen(fd->filename)+1);
|
||||
file_create_args.layout.agg_type = layout_type;
|
||||
file_create_args.layout.layout_is_valid = 1;
|
||||
if(layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE)
|
||||
{
|
||||
file_create_args.layout.u.raid1_5_parity_stripe.total_num_comps = layout_total_num_comps;
|
||||
file_create_args.layout.u.raid1_5_parity_stripe.parity_stripe_width = layout_parity_stripe_width;
|
||||
file_create_args.layout.u.raid1_5_parity_stripe.parity_stripe_depth = layout_parity_stripe_depth;
|
||||
file_create_args.layout.u.raid1_5_parity_stripe.stripe_unit = layout_stripe_unit;
|
||||
file_create_args.layout.u.raid1_5_parity_stripe.layout_visit_policy = layout_visit_policy;
|
||||
}
|
||||
else if(layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID0)
|
||||
{
|
||||
file_create_args.layout.u.raid0.total_num_comps = layout_total_num_comps;
|
||||
file_create_args.layout.u.raid0.stripe_unit = layout_stripe_unit;
|
||||
}
|
||||
else if(layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID10)
|
||||
{
|
||||
file_create_args.layout.u.raid10.total_num_comps = layout_total_num_comps;
|
||||
file_create_args.layout.u.raid10.stripe_unit = layout_stripe_unit;
|
||||
file_create_args.layout.u.raid10.layout_visit_policy = layout_visit_policy;
|
||||
}
|
||||
err = ioctl(fd_dir, PAN_FS_CLIENT_LAYOUT_CREATE_FILE, &file_create_args);
|
||||
if (err < 0) {
|
||||
FPRINTF(stderr, "%s: I/O Error doing ioctl on parent directory to create PanFS file using ioctl: %s.\n", myname, strerror(errno));
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
err = close(fd_dir);
|
||||
}
|
||||
ADIOI_Free(path);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int create_fd = open(fd->filename,amode,perm);
|
||||
if(create_fd != -1)
|
||||
{
|
||||
close(create_fd);
|
||||
}
|
||||
else
|
||||
{
|
||||
FPRINTF(stderr, "%s: I/O Error creating PanFS file using open: %s.\n", myname, strerror(errno));
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (fd->access_mode & ADIO_RDONLY)
|
||||
amode = amode | O_RDONLY;
|
||||
if (fd->access_mode & ADIO_WRONLY)
|
||||
amode = amode | O_WRONLY;
|
||||
if (fd->access_mode & ADIO_RDWR)
|
||||
amode = amode | O_RDWR;
|
||||
if (fd->access_mode & ADIO_EXCL)
|
||||
amode = amode | O_EXCL;
|
||||
|
||||
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
|
||||
ADIOI_Info_get(fd->info, "panfs_concurrent_write", MPI_MAX_INFO_VAL,
|
||||
value, &flag);
|
||||
if (flag) {
|
||||
unsigned long int concurrent_write = strtoul(value,NULL,10);
|
||||
if(concurrent_write == 1)
|
||||
{
|
||||
amode = amode | O_CONCURRENT_WRITE;
|
||||
}
|
||||
}
|
||||
ADIOI_Free(value);
|
||||
|
||||
fd->fd_sys = open(fd->filename, amode, perm);
|
||||
fd->fd_direct = -1;
|
||||
|
||||
if (fd->fd_sys != -1)
|
||||
{
|
||||
int rc;
|
||||
char temp_buffer[TEMP_BUFFER_SIZE];
|
||||
pan_fs_client_layout_query_args_t file_query_args;
|
||||
memset(&file_query_args,0,sizeof(pan_fs_client_layout_query_args_t));
|
||||
file_query_args.version = PAN_FS_CLIENT_LAYOUT_VERSION;
|
||||
rc = ioctl(fd->fd_sys, PAN_FS_CLIENT_LAYOUT_QUERY_FILE, &file_query_args);
|
||||
if (rc < 0)
|
||||
{
|
||||
/* Error - set layout type to unknown */
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_type", "PAN_FS_CLIENT_LAYOUT_TYPE__INVALID");
|
||||
}
|
||||
else
|
||||
{
|
||||
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.agg_type);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_type", temp_buffer);
|
||||
if (file_query_args.layout.layout_is_valid == 1)
|
||||
{
|
||||
switch (file_query_args.layout.agg_type)
|
||||
{
|
||||
case PAN_FS_CLIENT_LAYOUT_TYPE__RAID0:
|
||||
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid0.stripe_unit);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
|
||||
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid0.total_num_comps);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
|
||||
break;
|
||||
case PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE:
|
||||
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.stripe_unit);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
|
||||
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.parity_stripe_width);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_parity_stripe_width", temp_buffer);
|
||||
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.parity_stripe_depth);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_parity_stripe_depth", temp_buffer);
|
||||
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.total_num_comps);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
|
||||
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.layout_visit_policy);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_visit_policy", temp_buffer);
|
||||
break;
|
||||
case PAN_FS_CLIENT_LAYOUT_TYPE__RAID10:
|
||||
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid10.stripe_unit);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
|
||||
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid10.total_num_comps);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
|
||||
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid10.layout_visit_policy);
|
||||
ADIOI_Info_set(fd->info, "panfs_layout_visit_policy", temp_buffer);
|
||||
break;
|
||||
case PAN_FS_CLIENT_LAYOUT_TYPE__INVALID:
|
||||
case PAN_FS_CLIENT_LAYOUT_TYPE__DEFAULT:
|
||||
MPI_Info_set(fd->info, "panfs_layout_type",
|
||||
"PAN_FS_CLIENT_LAYOUT_TYPE__INVALID");
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND))
|
||||
fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
|
||||
|
||||
if (fd->fd_sys == -1) {
|
||||
*error_code = ADIOI_Err_create_code(myname, fd->filename, errno);
|
||||
}
|
||||
else *error_code = MPI_SUCCESS;
|
||||
}
|
68
ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_read.c
Обычный файл
68
ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_read.c
Обычный файл
@ -0,0 +1,68 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 1997 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_panfs.h"
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
void ADIOI_PANFS_ReadContig(ADIO_File fd, void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int *error_code)
|
||||
{
|
||||
MPI_Count err = -1, datatype_size, len;
|
||||
static char myname[] = "ADIOI_PANFS_READCONTIG";
|
||||
|
||||
MPI_Type_size_x(datatype, &datatype_size);
|
||||
len = datatype_size * count;
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
offset = fd->fp_ind;
|
||||
}
|
||||
|
||||
if (fd->fp_sys_posn != offset) {
|
||||
err = lseek(fd->fd_sys, offset, SEEK_SET);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err == -1) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
MPI_ERR_IO, "**io",
|
||||
"**io %s", strerror(errno));
|
||||
fd->fp_sys_posn = -1;
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
}
|
||||
|
||||
AD_PANFS_RETRY(read(fd->fd_sys, buf, len),err)
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err == -1) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
MPI_ERR_IO, "**io",
|
||||
"**io %s", strerror(errno));
|
||||
fd->fp_sys_posn = -1;
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
fd->fp_sys_posn = offset + err;
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
fd->fp_ind += err;
|
||||
}
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
if (err != -1) MPIR_Status_set_bytes(status, datatype, err);
|
||||
#endif
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
}
|
49
ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_resize.c
Обычный файл
49
ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_resize.c
Обычный файл
@ -0,0 +1,49 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2004 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_panfs.h"
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
void ADIOI_PANFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code)
|
||||
{
|
||||
int err;
|
||||
int myrank;
|
||||
struct stat stat_buf;
|
||||
static char myname[] = "ADIOI_PANFS_RESIZE";
|
||||
|
||||
MPI_Comm_rank(fd->comm, &myrank);
|
||||
if (!myrank)
|
||||
{
|
||||
AD_PANFS_RETRY(ftruncate(fd->fd_sys,size),err);
|
||||
MPI_Barrier(fd->comm);
|
||||
}
|
||||
else
|
||||
{
|
||||
MPI_Barrier(fd->comm);
|
||||
AD_PANFS_RETRY(fstat(fd->fd_sys,&stat_buf),err);
|
||||
if(((ADIO_Offset)stat_buf.st_size) != size)
|
||||
{
|
||||
/* This should never happen otherwise there is a coherency problem. */
|
||||
FPRINTF(stderr, "%s: Rank %d: Resize failed: requested=%llu actual=%llu.\n",myname,myrank,size,(unsigned long long)stat_buf.st_size);
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
}
|
||||
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err == -1) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__, MPI_ERR_IO,
|
||||
"**io", "**io %s", strerror(errno));
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
}
|
68
ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_write.c
Обычный файл
68
ompi/mca/io/romio314/romio/adio/ad_panfs/ad_panfs_write.c
Обычный файл
@ -0,0 +1,68 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2004 University of Chicago.
|
||||
* See COPYRIGHT notice in top-level directory.
|
||||
*/
|
||||
|
||||
#include "ad_panfs.h"
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
void ADIOI_PANFS_WriteContig(ADIO_File fd, const void *buf, int count,
|
||||
MPI_Datatype datatype, int file_ptr_type,
|
||||
ADIO_Offset offset, ADIO_Status *status,
|
||||
int *error_code)
|
||||
{
|
||||
MPI_Count err = -1, datatype_size, len;
|
||||
static char myname[] = "ADIOI_PANFS_WRITECONTIG";
|
||||
|
||||
MPI_Type_size_x(datatype, &datatype_size);
|
||||
len = datatype_size * count;
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
offset = fd->fp_ind;
|
||||
}
|
||||
|
||||
if (fd->fp_sys_posn != offset) {
|
||||
err = lseek(fd->fd_sys, offset, SEEK_SET);
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err == -1) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
MPI_ERR_IO, "**io",
|
||||
"**io %s", strerror(errno));
|
||||
fd->fp_sys_posn = -1;
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
}
|
||||
|
||||
AD_PANFS_RETRY(write(fd->fd_sys, buf, len),err)
|
||||
/* --BEGIN ERROR HANDLING-- */
|
||||
if (err == -1) {
|
||||
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
|
||||
MPIR_ERR_RECOVERABLE,
|
||||
myname, __LINE__,
|
||||
MPI_ERR_IO, "**io",
|
||||
"**io %s", strerror(errno));
|
||||
fd->fp_sys_posn = -1;
|
||||
return;
|
||||
}
|
||||
/* --END ERROR HANDLING-- */
|
||||
|
||||
fd->fp_sys_posn = offset + err;
|
||||
|
||||
if (file_ptr_type == ADIO_INDIVIDUAL) {
|
||||
fd->fp_ind += err;
|
||||
}
|
||||
|
||||
#ifdef HAVE_STATUS_SET_BYTES
|
||||
if (err != -1 && status) MPIR_Status_set_bytes(status, datatype, err);
|
||||
#endif
|
||||
|
||||
*error_code = MPI_SUCCESS;
|
||||
}
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
x
Ссылка в новой задаче
Block a user