1
1
This commit was SVN r29695.
Этот коммит содержится в:
Nathan Hjelm 2013-11-13 21:02:55 +00:00
родитель f4e647538c
Коммит 6b3cf0c1ba
545 изменённых файлов: 13962 добавлений и 8220 удалений

Просмотреть файл

@ -78,10 +78,10 @@ AC_DEFUN([MCA_ompi_io_romio_CONFIG],[
[AS_IF([test ! -z $build], [io_romio_flags="$io_romio_flags --build=$build"])
AS_IF([test ! -z $host], [io_romio_flags="$io_romio_flags --host=$host"])
AS_IF([test ! -z $target], [io_romio_flags="$io_romio_flags --target=$target"])])
io_romio_flags_define="$io_romio_flags CFLAGS='$CFLAGS' CPPFLAGS='$CPPFLAGS' FFLAGS='$FFLAGS' LDFLAGS='$LDFLAGS' --$io_romio_shared-shared --$io_romio_static-static $io_romio_flags $io_romio_prefix_arg --with-mpi=open_mpi --disable-aio"
io_romio_flags_define="$io_romio_flags FROM_OMPI=yes CC='$CC' CFLAGS='$CFLAGS' CPPFLAGS='$CPPFLAGS' FFLAGS='$FFLAGS' LDFLAGS='$LDFLAGS' --$io_romio_shared-shared --$io_romio_static-static $io_romio_flags $io_romio_prefix_arg --with-mpi=$(top_srcdir)/ompi --disable-aio"
AC_DEFINE_UNQUOTED([MCA_io_romio_COMPLETE_CONFIGURE_FLAGS], ["$io_romio_flags_define"], [Complete set of command line arguments given to ROMIOs configure script])
io_romio_flags="$io_romio_flags CFLAGS="'"'"$CFLAGS"'"'" CPPFLAGS="'"'"$CPPFLAGS"'"'" FFLAGS="'"'"$FFLAGS"'"'" LDFLAGS="'"'"$LDFLAGS"'"'" --$io_romio_shared-shared --$io_romio_static-static $io_romio_flags $io_romio_prefix_arg --with-mpi=open_mpi --disable-aio"
io_romio_flags="$io_romio_flags FROM_OMPI=yes CC="'"'"$CC"'"'" CFLAGS="'"'"$CFLAGS"'"'" CPPFLAGS="'"'"$CPPFLAGS"'"'" FFLAGS="'"'"$FFLAGS"'"'" LDFLAGS="'"'"$LDFLAGS"'"'" --$io_romio_shared-shared --$io_romio_static-static $io_romio_flags $io_romio_prefix_arg --with-mpi=$(top_srcdir) --disable-aio"
ompi_show_subtitle "Configuring ROMIO distribution"
OMPI_CONFIG_SUBDIR([ompi/mca/io/romio/romio],

Просмотреть файл

@ -39,7 +39,7 @@
'PFS'=> romio,
'PIOFS'=> romio,
'MPICH'=> romio,
'MPICH2' => romio,
'MPICH' => romio,
'MPI_OFFSET_IS_INT'=> romio,
'MPI_COMBINER_NAMED'=> romio,
'_UNICOS'=> romio,

Просмотреть файл

@ -21,6 +21,7 @@ __netbsd_
__LINUX_
__LINUX_ALPHA_
__CRAY_
__Darwin_
__nfs_
__ufs_
__pfs_
@ -35,5 +36,4 @@ __sgi_mpi
__hp_mpi
__cray_mpi
__lam_mpi
__Darwin
__open_mpi

Просмотреть файл

@ -1,4 +1,4 @@
#
# -*- Mode: Makefile; -*-
# Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
@ -16,23 +16,164 @@
#
# $HEADER$
#
# (C) 2011 by Argonne National Laboratory.
# See COPYRIGHT in top-level directory.
#
# OMPI: include a top level makefile with some options
include $(top_srcdir)/Makefile.options
# Left out common/dataloop -- it's not enabled in MPICH2-1.0.7.
## TODO: need to write an automakefile that handles two primary cases:
## 1) that ROMIO is being embedded within the MPI library, as in MPICH or Open
## MPI
## 2) that ROMIO is being built standalone, old-school style. This case is
## basically unused in modern practice.
SUBDIRS = include adio mpi-io
DIST_SUBDIRS = $(SUBDIRS) doc util
# help autoreconf and friends realize where the macros live
ACLOCAL_AMFLAGS = -I confdb
EXTRA_DIST = README COPYRIGHT README_OMPI autogen.sh
# empty variable initializations so that later code can append (+=)
include_HEADERS =
nodist_include_HEADERS =
noinst_HEADERS =
EXTRA_DIST =
SUFFIXES =
doc1_src_txt =
# ------------------------------------------------------------------------
# variables to be populated by the included Makefile.mk fragments:
# These are files that contain MPI routines (e.g., MPI_File_open).
# In MPICH these will have an MPI_ and a PMPI_ version. Other implementations
# (like OMPI) only want these to be MPI_ routines, possibly with some
# name-shifting prefix.
romio_mpi_sources =
# regular old source files that implement ROMIO, such as ADIO code
romio_other_sources =
# code that may need to be "up" called from the MPI library and/or is
# MPI-implementation-specific in some way
glue_sources =
# ------------------------------------------------------------------------
# when building under MPICH we must be able to find mpi.h
AM_CPPFLAGS += $(MPI_H_INCLUDE)
# ------------------------------------------------------------------------
# handle the "include" directory here
AM_CPPFLAGS += -I$(top_builddir)/include -I$(top_srcdir)/include
# nodist_ b/c these are created by config.status and should not be distributed
nodist_include_HEADERS += include/mpio.h
noinst_HEADERS += include/io_romio_conv.h
# ------------------------------------------------------------------------
SUBDIRS =
DIST_SUBDIRS = test test-internal
# for the sake of parallel make and avoiding an excessive number of convenience
# libs, we use a subdir automake fragment strategy
include mpi-io/Makefile.mk
include adio/Makefile.mk
EXTRA_DIST += autogen.sh
if BUILD_ROMIO_EMBEDDED
# Build a libtool convenience library that the enclosing MPI implementation can
# use by adding it to the right _LIBADD variable.
noinst_LTLIBRARIES = libromio_dist.la
libromio_dist_la_SOURCES = $(romio_mpi_sources) $(romio_other_sources) $(glue_sources)
## NOTE: ROMIO's old build system builds a bunch of _foo.o objects that contain
## PMPI_ implementations as well as calls to only other PMPI routines. In
## MPICH, these are the objects that need to go into libmpich, while the foo.o
## objects should go into libpmpich. Furthermore, the -D option for ROMIO's
## source files is different and inverted (in the boolean sense) compared with
## MPICH's defintion. And ROMIO was dumping all of the symbols into the main
## libmpich library, regardless of the separate profiling library's existence.
##
## Annoying, right?
if BUILD_PROFILING_LIB
# The current best strategy for now is to build the PMPI symbols as a separate
# convenience lib to permit adding the special "-D..." argument for all objects.
# MPICH will then link in both convenience library into libmpich, since it
# won't work very well the other way around.
noinst_LTLIBRARIES += libpromio.la
libpromio_la_SOURCES = $(romio_mpi_sources)
libpromio_la_CPPFLAGS = $(AM_CPPFLAGS) -DMPIO_BUILD_PROFILING
endif BUILD_PROFILING_LIB
else !BUILD_ROMIO_EMBEDDED
## TODO build a libromio.la (non-convenience) and possibly a libglue.la or something?
endif
# --------------------------------------------------------------------------
.PHONY: coverage
gcov_sources = $(libmpl_la_SOURCES)
# assumes that these sources were compiled appropriately ("-fprofile-arcs"
# and "-ftest-coverage")
coverage:
@for file in $(gcov_sources) ; do \
dir=`dirname $$file` ; \
bname=`basename $$file` ; \
aux=`echo $$bname | sed -e 's,\.*$$,,'` ; \
echo "( $(GCOV) -b -f -o $$file $$file && mv $${bname}.gcov $$dir )" ; \
( $(GCOV) -b -f -o $$file $$file && mv $${bname}.gcov $$dir ) ; \
rm -f *.gcov ; \
done
for subdir in $(SUBDIRS) - ; do \
if test $$subdir = "-" ; then break ; fi ; \
( cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) coverage ) ; \
done
# --------------------------------------------------------------------------
.PHONY: mandoc mandoc-local htmldoc htmldoc-local
SUFFIXES += .man-phony .html-phony .man1-phony .html1-phony .txt
# "make V=1" support for our documentation recipes
doctextman_verbose = $(doctextman_verbose_$(V))
doctextman_verbose_ = $(doctextman_verbose_$(AM_DEFAULT_VERBOSITY))
doctextman_verbose_0 = @echo " DOCTEXTMAN " $@;
doctexthtml_verbose = $(doctexthtml_verbose_$(V))
doctexthtml_verbose_ = $(doctexthtml_verbose_$(AM_DEFAULT_VERBOSITY))
doctexthtml_verbose_0 = @echo " DOCTEXTHTML " $@;
# Build dir paths where the man pages will be created. Will usually be
# overridden by MPICH make.
mandoc_path1=$(abs_top_builddir)/man/man1
mandoc_path3=$(abs_top_builddir)/man/man3
htmldoc_path1=$(abs_top_builddir)/www/www1
htmldoc_path3=$(abs_top_builddir)/www/www3
doctext_docnotes=
.c.man-phony:
$(doctextman_verbose)$(DOCTEXT) -man -mpath $(mandoc_path3) -ext 3 \
-heading MPI -quotefmt $(doctext_docnotes) $<
.c.html-phony:
$(doctexthtml_verbose)$(DOCTEXT) -html -mpath $(htmldoc_path3) \
-heading MPI -quotefmt $(doctext_docnotes) $<
.txt.man1-phony:
$(doctextman_verbose)$(DOCTEXT) -man -mpath $(mandoc_path1) -ext 1 \
-heading MPI -quotefmt $(doctext_docnotes) $<
.txt.html1-phony:
$(doctexthtml_verbose)$(DOCTEXT) -html -mpath $(htmldoc_path1) \
-heading MPI -quotefmt $(doctext_docnotes) $<
# use mandoc-local target to force directory creation before running DOCTEXT
mandoc:
test -d $(mandoc_path1) || $(MKDIR_P) $(mandoc_path1)
test -d $(mandoc_path3) || $(MKDIR_P) $(mandoc_path3)
$(MAKE) $(AM_MAKEFLAGS) mandoc-local
mandoc-local: $(romio_mpi_sources:.c=.man-phony) $(doc1_src_txt:.txt=.man1-phony)
# use htmldoc-local target to force directory creation before running DOCTEXT
htmldoc:
test -d $(top_builddir)/www/www1 || $(MKDIR_P) $(top_builddir)/www/www1
test -d $(top_builddir)/www/www3 || $(MKDIR_P) $(top_builddir)/www/www3
$(MAKE) $(AM_MAKEFLAGS) htmldoc-local
htmldoc-local: $(romio_mpi_sources:.c=.html-phony) $(doc1_src_txt:.txt=.html1-phony)
# --------------------------------------------------------------------------
noinst_LTLIBRARIES = libromio_dist.la
libromio_dist_la_SOURCES =
libromio_dist_la_LIBADD = \
adio/libadio.la \
mpi-io/libmpi-io.la \
mpi-io/glue/openmpi/libglue.la @ROMIO_LIBLIST@
libromio_dist_la_DEPENDENCIES = \
adio/libadio.la \
mpi-io/libmpi-io.la \
mpi-io/glue/openmpi/libglue.la

Просмотреть файл

@ -5,13 +5,13 @@
Major Changes in this version:
------------------------------
* Fixed performance problems with the darray and subarray datatypes
when using MPICH2.
when using MPICH.
* Better support for building against existing MPICH and MPICH2 versions.
* Better support for building against existing MPICH and MPICH versions.
When building against an existing MPICH installation, use the
"--with-mpi=mpich" option to ROMIO configure. For MPICH2, use the
"--with-mpi=mpich2" option. These will allow ROMIO to take advantage
"--with-mpi=mpich" option to ROMIO configure. For MPICH, use the
"--with-mpi=mpich" option. These will allow ROMIO to take advantage
of internal features of these implementations.
* Deprecation of SFS, HFS, and PIOFS implementations.
@ -161,9 +161,9 @@ Major Changes in Version 1.0.3:
Major Changes Version 1.0.2:
---------------------------
* Implemented the shared file pointer functions (Section 9.4.4 of MPI-2) and
split collective I/O functions (Section 9.4.5). Therefore, the main
components of the MPI-2 I/O chapter not yet implemented are
* Implemented the shared file pointer functions and
split collective I/O functions. Therefore, the main
components of the MPI I/O chapter not yet implemented are
file interoperability and error handling.
* Added support for using "direct I/O" on SGI's XFS file system.
@ -298,13 +298,13 @@ General Information
-------------------
ROMIO is a high-performance, portable implementation of MPI-IO (the
I/O chapter in MPI-2). ROMIO's home page is at
http://www.mcs.anl.gov/romio . The MPI-2 standard is available at
I/O chapter in MPI). ROMIO's home page is at
http://www.mcs.anl.gov/romio . The MPI standard is available at
http://www.mpi-forum.org/docs/docs.html .
This version of ROMIO includes everything defined in the MPI-2 I/O
chapter except support for file interoperability (Sec. 9.5 of MPI-2) and
user-defined error handlers for files (Sec. 4.13.3). The subarray and
This version of ROMIO includes everything defined in the MPI I/O
chapter except support for file interoperability and
user-defined error handlers for files. The subarray and
distributed array datatype constructor functions from Chapter 4
(Sec. 4.14.4 & 4.14.5) have been implemented. They are useful for
accessing arrays stored in files. The functions MPI_File_f2c and
@ -587,7 +587,7 @@ Use a larger number if you still get the error message.
* If a Fortran program uses a file handle created using ROMIO's C
interface, or vice-versa, you must use the functions MPI_File_c2f
or MPI_File_f2c (see MPI-2 Section 4.12.4). Such a situation occurs,
or MPI_File_f2c. Such a situation occurs,
for example, if a Fortran program uses an I/O library written in C
with MPI-IO calls. Similar functions MPIO_Request_f2c and
MPIO_Request_c2f are also provided.

Просмотреть файл

@ -1,181 +0,0 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
include $(top_srcdir)/Makefile.options
# Conditionals whether to build each subdir or not
if BUILD_BGL
BGL_DIR = ad_bgl
BGL_LIB = ad_bgl/libadio_bgl.la
else
BGL_DIR =
BGL_LIB =
endif
if BUILD_BGLOCKLESS
BGLOCKLESS_DIR = ad_bglockless
BGLOCKLESS_LIB = ad_bglockless/libadio_bglockless.la
else
BGLOCKLESS_DIR =
BGLOCKLESS_LIB =
endif
if BUILD_GRIDFTP
GRIDFTP_DIR = ad_gridftp
GRIDFTP_LIB = ad_gridftp/libadio_gridftp.la
else
GRIDFTP_DIR =
GRIDFTP_LIB =
endif
# 8 July 2008: romio-maint@mcs.anl.gov says that this is deprecated
#if BUILD_HFS
#HFS_DIR = ad_hfs
#HFS_LIB = ad_hfs/libadio_hfs.la
#else
#HFS_DIR =
#HFS_LIB =
#endif
if BUILD_LUSTRE
LUSTRE_DIR = ad_lustre
LUSTRE_LIB = ad_lustre/libadio_lustre.la
else
LUSTRE_DIR =
LUSTRE_LIB =
endif
if BUILD_NFS
NFS_DIR = ad_nfs
NFS_LIB = ad_nfs/libadio_nfs.la
else
NFS_DIR =
NFS_LIB =
endif
# Currently disabled
#if BUILD_NTFS
#NTFS_DIR = ad_ntfs
#NTFS_LIB = ad_ntfs/libadio_ntfs.la
#else
#NTFS_DIR =
#NTFS_LIB =
#endif
if BUILD_PANFS
PANFS_DIR = ad_panfs
PANFS_LIB = ad_panfs/libadio_panfs.la
else
PANFS_DIR =
PANFS_LIB =
endif
if BUILD_PFS
PFS_DIR = ad_pfs
PFS_LIB = ad_pfs/libadio_pfs.la
else
PFS_DIR =
PFS_LIB =
endif
# 8 July 2008: romio-maint@mcs.anl.gov says that this is deprecated
#if BUILD_PIOFS
#PIOFS_DIR = ad_piofs
#PIOFS_LIB = ad_piofs/libadio_piofs.la
#else
#PIOFS_DIR =
#PIOFS_LIB =
#endif
if BUILD_PVFS
PVFS_DIR = ad_pvfs
PVFS_LIB = ad_pvfs/libadio_pvfs.la
else
PVFS_DIR =
PVFS_LIB =
endif
if BUILD_PVFS2
PVFS2_DIR = ad_pvfs2
PVFS2_LIB = ad_pvfs2/libadio_pvfs2.la
else
PVFS2_DIR =
PVFS2_LIB =
endif
if BUILD_SFS
SFS_DIR = ad_sfs
SFS_LIB = ad_sfs/libadio_sfs.la
else
SFS_DIR =
SFS_LIB =
endif
if BUILD_TESTFS
TESTFS_DIR = ad_testfs
TESTFS_LIB = ad_testfs/libadio_testfs.la
else
TESTFS_DIR =
TESTFS_LIB =
endif
if BUILD_UFS
UFS_DIR = ad_ufs
UFS_LIB = ad_ufs/libadio_ufs.la
else
UFS_DIR =
UFS_LIB =
endif
if BUILD_XFS
XFS_DIR = ad_xfs
XFS_LIB = ad_xfs/libadio_xfs.la
else
XFS_DIR =
XFS_LIB =
endif
if BUILD_ZOIDFS
ZOID_DIR = ad_zoidfs
ZOID_LIB = ad_zoidfs/libadio_zoidfs.la
else
ZOID_DIR =
ZOID_LIB =
endif
SUBDIRS = common include \
$(BG_DIR) $(BGLOCKLESS_DIR) \
$(GRIDFTP_DIR) $(LUSTRE_DIR) $(NFS_DIR) $(NTFS_DIR) $(PANFS_DIR) \
$(PFS_DIR) $(PVFS_DIR) $(PVFS2_DIR) $(SFS_DIR) \
$(TESTFS_DIR) $(UFS_DIR) $(XFS_DIR) $(ZOID_DIR)
DIST_SUBDIRS = common include \
ad_bgl ad_bglockless ad_gridftp ad_lustre ad_nfs ad_ntfs \
ad_panfs ad_pfs ad_pvfs ad_pvfs2 ad_sfs ad_testfs ad_ufs \
ad_xfs ad_zoidfs
# Library
noinst_LTLIBRARIES = libadio.la
libadio_la_SOURCES =
libadio_la_LIBADD = \
common/libadio_common.la \
$(BG_LIB) $(BGLOCKLESS_LIB) \
$(GRIDFTP_LIB) $(LUSTRE_LIB) $(NFS_LIB) $(NTFS_LIB) $(PANFS_LIB) \
$(PFS_LIB) $(PVFS_LIB) $(PVFS2_LIB) $(SFS_LIB) \
$(TESTFS_LIB) $(UFS_LIB) $(XFS_LIB) $(ZOID_LIB)

46
ompi/mca/io/romio/romio/adio/Makefile.mk Обычный файл
Просмотреть файл

@ -0,0 +1,46 @@
## -*- Mode: Makefile; -*-
## vim: set ft=automake :
##
## (C) 2011 by Argonne National Laboratory.
## See COPYRIGHT in top-level directory.
##
AM_CPPFLAGS += -I$(top_builddir)/adio/include -I$(top_srcdir)/adio/include
noinst_HEADERS += \
adio/include/adio.h \
adio/include/adio_cb_config_list.h \
adio/include/adio_extern.h \
adio/include/adioi.h \
adio/include/adioi_errmsg.h \
adio/include/adioi_error.h \
adio/include/adioi_fs_proto.h \
adio/include/heap-sort.h \
adio/include/mpio_error.h \
adio/include/mpipr.h \
adio/include/mpiu_greq.h \
adio/include/nopackage.h \
adio/include/mpiu_external32.h \
adio/include/romioconf-undefs.h
include $(top_srcdir)/adio/ad_bg/Makefile.mk
include $(top_srcdir)/adio/ad_bgl/Makefile.mk
include $(top_srcdir)/adio/ad_bglockless/Makefile.mk
include $(top_srcdir)/adio/ad_gridftp/Makefile.mk
include $(top_srcdir)/adio/ad_hfs/Makefile.mk
include $(top_srcdir)/adio/ad_lustre/Makefile.mk
include $(top_srcdir)/adio/ad_nfs/Makefile.mk
## NTFS builds are handled entirely by the separate Windows build system
##include $(top_srcdir)/adio/ad_ntfs/Makefile.mk
include $(top_srcdir)/adio/ad_panfs/Makefile.mk
include $(top_srcdir)/adio/ad_pfs/Makefile.mk
include $(top_srcdir)/adio/ad_piofs/Makefile.mk
include $(top_srcdir)/adio/ad_pvfs/Makefile.mk
include $(top_srcdir)/adio/ad_pvfs2/Makefile.mk
include $(top_srcdir)/adio/ad_sfs/Makefile.mk
include $(top_srcdir)/adio/ad_testfs/Makefile.mk
include $(top_srcdir)/adio/ad_ufs/Makefile.mk
include $(top_srcdir)/adio/ad_xfs/Makefile.mk
include $(top_srcdir)/adio/ad_zoidfs/Makefile.mk
include $(top_srcdir)/adio/common/Makefile.mk

11
ompi/mca/io/romio/romio/adio/ad_bg/.gitignore поставляемый Обычный файл
Просмотреть файл

@ -0,0 +1,11 @@
/Makefile
/.deps
/*.bb
/*.bbg
/*.gcda
/*.gcno
/.libs
/.libstamp*
/*.lo
/.*-cache
/.state-cache

Просмотреть файл

@ -0,0 +1,35 @@
## -*- Mode: Makefile; -*-
## vim: set ft=automake :
##
## (C) 2011 by Argonne National Laboratory.
## See COPYRIGHT in top-level directory.
##
if BUILD_AD_BG
AM_CPPFLAGS += -DBGL_OPTIM_STEP1_2=1 -DBGL_OPTIM_STEP1_1=1
noinst_HEADERS += \
adio/ad_bg/ad_bg_aggrs.h \
adio/ad_bg/ad_bg.h \
adio/ad_bg/ad_bg_pset.h \
adio/ad_bg/ad_bg_tuning.h
romio_other_sources += \
adio/ad_bg/ad_bg_aggrs.c \
adio/ad_bg/ad_bg_close.c \
adio/ad_bg/ad_bg_flush.c \
adio/ad_bg/ad_bg_hints.c \
adio/ad_bg/ad_bg_pset.c \
adio/ad_bg/ad_bg_read.c \
adio/ad_bg/ad_bg_tuning.c \
adio/ad_bg/ad_bg_write.c \
adio/ad_bg/ad_bg.c \
adio/ad_bg/ad_bg_fcntl.c \
adio/ad_bg/ad_bg_getsh.c \
adio/ad_bg/ad_bg_open.c \
adio/ad_bg/ad_bg_rdcoll.c \
adio/ad_bg/ad_bg_setsh.c \
adio/ad_bg/ad_bg_wrcoll.c
endif BUILD_AD_BG

51
ompi/mca/io/romio/romio/adio/ad_bg/ad_bg.c Обычный файл
Просмотреть файл

@ -0,0 +1,51 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bg.c
* \brief ???
*/
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 2001 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#define BG_OPTIM_STEP1_1 1
#include "ad_bg.h"
/* adioi.h has the ADIOI_Fns_struct define */
#include "adioi.h"
struct ADIOI_Fns_struct ADIO_BG_operations = {
ADIOI_BG_Open, /* Open */
ADIOI_GEN_OpenColl, /* Collective open */
ADIOI_BG_ReadContig, /* ReadContig */
ADIOI_BG_WriteContig, /* WriteContig */
ADIOI_BG_ReadStridedColl, /* ReadStridedColl */
ADIOI_BG_WriteStridedColl, /* WriteStridedColl */
ADIOI_GEN_SeekIndividual, /* SeekIndividual */
ADIOI_BG_Fcntl, /* Fcntl */
ADIOI_BG_SetInfo, /* SetInfo */
ADIOI_BG_ReadStrided, /* ReadStrided */
ADIOI_BG_WriteStrided, /* WriteStrided */
ADIOI_BG_Close, /* Close */
#ifdef ROMIO_HAVE_WORKING_AIO
#warning Consider BG support for NFS before enabling this.
ADIOI_GEN_IreadContig, /* IreadContig */
ADIOI_GEN_IwriteContig, /* IwriteContig */
#else
ADIOI_FAKE_IreadContig, /* IreadContig */
ADIOI_FAKE_IwriteContig, /* IwriteContig */
#endif
ADIOI_GEN_IODone, /* ReadDone */
ADIOI_GEN_IODone, /* WriteDone */
ADIOI_GEN_IOComplete, /* ReadComplete */
ADIOI_GEN_IOComplete, /* WriteComplete */
ADIOI_GEN_IreadStrided, /* IreadStrided */
ADIOI_GEN_IwriteStrided, /* IwriteStrided */
ADIOI_BG_Flush, /* Flush */
ADIOI_GEN_Resize, /* Resize */
ADIOI_GEN_Delete, /* Delete */
ADIOI_GEN_Feature, /* Features */
};

97
ompi/mca/io/romio/romio/adio/ad_bg/ad_bg.h Обычный файл
Просмотреть файл

@ -0,0 +1,97 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bg.h
* \brief ???
*/
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#ifndef AD_BG_INCLUDE
#define AD_BG_INCLUDE
#include <unistd.h>
#include <stdlib.h>
#include <sys/types.h>
#include <fcntl.h>
#include "adio.h"
#ifdef HAVE_SIGNAL_H
#include <signal.h>
#endif
#ifdef HAVE_AIO_H
#include <aio.h>
#endif
#if 0
int ADIOI_BG_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
int wr, void *handle);
#endif
void ADIOI_BG_Open(ADIO_File fd, int *error_code);
void ADIOI_BG_Close(ADIO_File fd, int *error_code);
void ADIOI_BG_ReadContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
*error_code);
void ADIOI_BG_WriteContig(ADIO_File fd, const void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
*error_code);
#if 0
void ADIOI_BG_IwriteContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Request *request, int
*error_code);
void ADIOI_BG_IreadContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Request *request, int
*error_code);
int ADIOI_BG_ReadDone(ADIO_Request *request, ADIO_Status *status, int
*error_code);
int ADIOI_BG_WriteDone(ADIO_Request *request, ADIO_Status *status, int
*error_code);
void ADIOI_BG_ReadComplete(ADIO_Request *request, ADIO_Status *status, int
*error_code);
void ADIOI_BG_WriteComplete(ADIO_Request *request, ADIO_Status *status,
int *error_code);
#endif
void ADIOI_BG_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int
*error_code);
void ADIOI_BG_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code);
void ADIOI_BG_WriteStrided(ADIO_File fd, const void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
*error_code);
void ADIOI_BG_ReadStrided(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
*error_code);
void ADIOI_BG_ReadStridedColl(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
*error_code);
void ADIOI_BG_WriteStridedColl(ADIO_File fd, const void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
*error_code);
void ADIOI_BG_Get_shared_fp(ADIO_File fd, int size, ADIO_Offset *shared_fp, int *error_code);
void ADIOI_BG_Set_shared_fp(ADIO_File fd, ADIO_Offset offset, int *error_code);
void ADIOI_BG_Flush(ADIO_File fd, int *error_code);
#include "ad_bg_tuning.h"
#endif

Просмотреть файл

@ -0,0 +1,983 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bg_aggrs.c
* \brief The externally used function from this file is is declared in ad_bg_aggrs.h
*/
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 1997-2001 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
/*#define TRACE_ON */
#include "adio.h"
#include "adio_cb_config_list.h"
#include "ad_bg.h"
#include "ad_bg_pset.h"
#include "ad_bg_aggrs.h"
#ifdef AGGREGATION_PROFILE
#include "mpe.h"
#endif
#include "mpidi_macros.h"
#ifdef USE_DBG_LOGGING
#define AGG_DEBUG 1
#endif
static int aggrsInPsetSize=0;
static int *aggrsInPset=NULL;
/* Comments copied from common:
* This file contains four functions:
*
* ADIOI_Calc_aggregator()
* ADIOI_Calc_file_domains()
* ADIOI_Calc_my_req()
* ADIOI_Calc_others_req()
*
* The last three of these were originally in ad_read_coll.c, but they are
* also shared with ad_write_coll.c. I felt that they were better kept with
* the rest of the shared aggregation code.
*/
/* Discussion of values available from above:
*
* ADIO_Offset st_offsets[0..nprocs-1]
* ADIO_Offset end_offsets[0..nprocs-1]
* These contain a list of start and end offsets for each process in
* the communicator. For example, an access at loc 10, size 10 would
* have a start offset of 10 and end offset of 19.
* int nprocs
* number of processors in the collective I/O communicator
* ADIO_Offset min_st_offset
* ADIO_Offset fd_start[0..nprocs_for_coll-1]
* starting location of "file domain"; region that a given process will
* perform aggregation for (i.e. actually do I/O)
* ADIO_Offset fd_end[0..nprocs_for_coll-1]
* start + size - 1 roughly, but it can be less, or 0, in the case of
* uneven distributions
*/
/* forward declaration */
static void
ADIOI_BG_compute_agg_ranklist_serial ( ADIO_File fd,
const ADIOI_BG_ConfInfo_t *confInfo,
ADIOI_BG_ProcInfo_t *all_procInfo,
int *aggrsInPset );
/*
* Compute the aggregator-related parameters that are required in 2-phase collective IO of ADIO.
* The parameters are
* . the number of aggregators (proxies) : fd->hints->cb_nodes
* . the ranks of the aggregators : fd->hints->ranklist
* By compute these two parameters in a BG-PSET-aware way, the default 2-phase collective IO of
* ADIO can work more efficiently.
*/
int
ADIOI_BG_gen_agg_ranklist(ADIO_File fd, int n_aggrs_per_pset)
{
int r, s;
ADIOI_BG_ProcInfo_t *procInfo, *all_procInfo;
ADIOI_BG_ConfInfo_t *confInfo;
TRACE_ERR("Entering ADIOI_BG_gen_agg_ranklist\n");
MPI_Comm_size( fd->comm, &s );
MPI_Comm_rank( fd->comm, &r );
/* Collect individual BG personality information */
confInfo = ADIOI_BG_ConfInfo_new ();
procInfo = ADIOI_BG_ProcInfo_new ();
ADIOI_BG_persInfo_init( confInfo, procInfo, s, r, n_aggrs_per_pset, fd->comm);
/* Gather BG personality infomation onto process 0 */
/* if (r == 0) */
all_procInfo = ADIOI_BG_ProcInfo_new_n (s);
if(s > aggrsInPsetSize)
{
if(aggrsInPset) ADIOI_Free(aggrsInPset);
aggrsInPset = (int *) ADIOI_Malloc (s *sizeof(int));
aggrsInPsetSize = s;
}
MPI_Gather( (void *)procInfo, sizeof(ADIOI_BG_ProcInfo_t), MPI_BYTE,
(void *)all_procInfo, sizeof(ADIOI_BG_ProcInfo_t), MPI_BYTE,
0,
fd->comm );
/* Compute a list of the ranks of chosen IO proxy CN on process 0 */
if (r == 0) {
ADIOI_BG_compute_agg_ranklist_serial (fd, confInfo, all_procInfo, aggrsInPset);
/* ADIOI_BG_ProcInfo_free (all_procInfo);*/
}
ADIOI_BG_ProcInfo_free (all_procInfo);
/* Send the info of IO proxy CN to all processes and keep the info in fd->hints struct.
Declared in adio_cb_config_list.h */
ADIOI_cb_bcast_rank_map(fd);
/* Broadcast the BG-GPFS related file domain info */
MPI_Bcast( (void *)aggrsInPset,
fd->hints->cb_nodes * sizeof(int), MPI_BYTE,
0,
fd->comm );
ADIOI_BG_persInfo_free( confInfo, procInfo );
TRACE_ERR("Leaving ADIOI_BG_gen_agg_ranklist\n");
return 0;
}
/* There are some number of bridge nodes (randomly) distributed through the job
* We need to split the nodes among the bridge nodes */
/* Maybe find which bridge node is closer (manhattan distance) and try to
* distribute evenly.
*/
/*
* Pick IO aggregators based on the under PSET organization and stores the ranks of the proxy CNs in tmp_ranklist.
* The first order of tmp_ranklist is : PSET number
* The secondary order of the list is determined in ADIOI_BG_select_agg_in_pset() and thus adjustable.
*/
typedef struct
{
int rank;
int bridge;
} sortstruct;
static int intsort(const void *p1, const void *p2)
{
sortstruct *i1, *i2;
i1 = (sortstruct *)p1;
i2 = (sortstruct *)p2;
return(i1->bridge - i2->bridge);
}
static int
ADIOI_BG_compute_agg_ranklist_serial_do (const ADIOI_BG_ConfInfo_t *confInfo,
ADIOI_BG_ProcInfo_t *all_procInfo,
int *aggrsInPset,
int *tmp_ranklist)
{
TRACE_ERR("Entering ADIOI_BG_compute_agg_ranklist_serial_do\n");
/* BES: This should be done in the init routines probably. */
int i, j;
int aggTotal;
int distance, numAggs;
int *aggList;
/* Aggregators will be midpoints between sorted MPI rank lists of who shares a given
* bridge node */
sortstruct *bridgelist = (sortstruct *)ADIOI_Malloc(confInfo->nProcs * sizeof(sortstruct));
for(i=0; i < confInfo->nProcs; i++)
{
bridgelist[i].bridge = all_procInfo[i].bridgeRank;
bridgelist[i].rank = i;
TRACE_ERR("bridgelist[%d].bridge: %d .rank: %d\n", i, bridgelist[i].bridge, i);
}
/* This list contains rank->bridge info. Now, we need to sort this list. */
qsort(bridgelist, confInfo->nProcs, sizeof(sortstruct), intsort);
/* In this array, we can pick an appropriate number of midpoints based on
* our bridgenode index and the number of aggregators */
numAggs = confInfo->aggRatio * confInfo->ioMaxSize /*virtualPsetSize*/;
if(numAggs == 1)
aggTotal = 1;
else
/* the number of aggregators is (numAggs per bridgenode) plus each
* bridge node is an aggregator */
aggTotal = confInfo->numBridgeRanks * (numAggs+1);
distance = (confInfo->ioMaxSize /*virtualPsetSize*/ / numAggs);
TRACE_ERR("numBridgeRanks: %d, aggRatio: %f numBridge: %d pset size: %d numAggs: %d distance: %d, aggTotal: %d\n", confInfo->numBridgeRanks, confInfo->aggRatio, confInfo->numBridgeRanks, confInfo->ioMaxSize /*virtualPsetSize*/, numAggs, distance, aggTotal);
aggList = (int *)ADIOI_Malloc(aggTotal * sizeof(int));
/* For each bridge node, determine who the aggregators will be */
/* basically, the n*distance and bridge node */
if(aggTotal == 1) /* special case when we only have one bridge node */
aggList[0] = bridgelist[0].bridge;
else
{
for(i=0; i < confInfo->numBridgeRanks; i++)
{
aggList[i]=bridgelist[i*confInfo->ioMaxSize /*virtualPsetSize*/].bridge;
TRACE_ERR("aggList[%d]: %d\n", i, aggList[i]);
for(j = 0; j < numAggs; j++)
{
/* Sets up a list of nodes which will act as aggregators. numAggs
* per bridge node total. The list of aggregators is
* bridgeNodes
* bridgeNode[0]aggr[0]
* bridgeNode[0]aggr[1]...
* bridgeNode[0]aggr[N]...
* ...
* bridgeNode[N]aggr[0]..
* bridgeNode[N]aggr[N]
*/
aggList[i*numAggs+j+confInfo->numBridgeRanks] = bridgelist[i*confInfo->ioMaxSize /*virtualPsetSize*/ + j*distance+1].rank;
TRACE_ERR("(post bridge) agglist[%d] -> %d\n", confInfo->numBridgeRanks +i*numAggs+j, aggList[i*numAggs+j+confInfo->numBridgeRanks]);
}
}
}
memcpy(tmp_ranklist, aggList, (numAggs*confInfo->numBridgeRanks+numAggs)*sizeof(int));
for(i=0;i<aggTotal;i++)
{
TRACE_ERR("tmp_ranklist[%d]: %d\n", i, tmp_ranklist[i]);
}
ADIOI_Free (bridgelist);
ADIOI_Free (aggList);
TRACE_ERR("Leaving ADIOI_BG_compute_agg_ranklist_serial_do\n");
return aggTotal;
}
/*
* compute aggregators ranklist and put it into fd->hints struct
*/
static void
ADIOI_BG_compute_agg_ranklist_serial ( ADIO_File fd,
const ADIOI_BG_ConfInfo_t *confInfo,
ADIOI_BG_ProcInfo_t *all_procInfo,
int *aggrsInPset )
{
TRACE_ERR("Entering ADIOI_BG_compute_agg_ranklist_serial\n");
int i;
int naggs;
int size;
int *tmp_ranklist;
/* compute the ranklist of IO aggregators and put into tmp_ranklist */
tmp_ranklist = (int *) ADIOI_Malloc (confInfo->nProcs * sizeof(int));
# if AGG_DEBUG
for (i=0; i<confInfo->nProcs; i++) {
DBG_FPRINTF(stderr, "\tcpuid %1d, rank = %6d\n", all_procInfo[i].coreID, all_procInfo[i].rank );
}
# endif
naggs=
ADIOI_BG_compute_agg_ranklist_serial_do (confInfo, all_procInfo, aggrsInPset, tmp_ranklist);
# define VERIFY 1
# if VERIFY
DBG_FPRINTF(stderr, "\tconfInfo = min: %3d, max: %3d, naggrs: %3d, bridge: %3d, nprocs: %3d, vpset: %3d, tsize: %3d, ratio: %.4f; naggs = %d\n",
confInfo->ioMinSize ,
confInfo->ioMaxSize ,
confInfo->nAggrs ,
confInfo->numBridgeRanks ,
confInfo->nProcs ,
confInfo->ioMaxSize /*virtualPsetSize*/ ,
confInfo->cpuIDsize,
confInfo->aggRatio ,
naggs );
# endif
MPI_Comm_size( fd->comm, &size );
/* This fix is for when the bridgenode rnk is not part of the particular
* subcomm associated with this MPI File operation. I don't know if
* this is the best/right answer but it passes the test cases at least.
* I don't know how common file IO in subcomms is anyway... */
for(i=0;i<naggs;i++)
{
if(tmp_ranklist[i] > size)
{
TRACE_ERR("Using 0 as tmp_ranklist[%d] instead of %d for comm %x\n",
i, tmp_ranklist[i], fd->comm);
tmp_ranklist[i] = 0;
}
}
# if AGG_DEBUG
for (i=0; i<naggs; i++) {
DBG_FPRINTF(stderr, "\taggr %-4d = %6d\n", i, tmp_ranklist[i] );
}
# endif
/* copy the ranklist of IO aggregators to fd->hints */
if(fd->hints->ranklist != NULL) ADIOI_Free (fd->hints->ranklist);
fd->hints->cb_nodes = naggs;
fd->hints->ranklist = (int *) ADIOI_Malloc (naggs * sizeof(int));
memcpy( fd->hints->ranklist, tmp_ranklist, naggs*sizeof(int) );
/* */
ADIOI_Free( tmp_ranklist );
TRACE_ERR("Leaving ADIOI_BG_compute_agg_ranklist_serial\n");
return;
}
/* Description from common/ad_aggregate.c. (Does it completely apply to bg?)
* ADIOI_Calc_aggregator()
*
* The intention here is to implement a function which provides basically
* the same functionality as in Rajeev's original version of
* ADIOI_Calc_my_req(). He used a ceiling division approach to assign the
* file domains, and we use the same approach here when calculating the
* location of an offset/len in a specific file domain. Further we assume
* this same distribution when calculating the rank_index, which is later
* used to map to a specific process rank in charge of the file domain.
*
* A better (i.e. more general) approach would be to use the list of file
* domains only. This would be slower in the case where the
* original ceiling division was used, but it would allow for arbitrary
* distributions of regions to aggregators. We'd need to know the
* nprocs_for_coll in that case though, which we don't have now.
*
* Note a significant difference between this function and Rajeev's old code:
* this code doesn't necessarily return a rank in the range
* 0..nprocs_for_coll; instead you get something in 0..nprocs. This is a
* result of the rank mapping; any set of ranks in the communicator could be
* used now.
*
* Returns an integer representing a rank in the collective I/O communicator.
*
* The "len" parameter is also modified to indicate the amount of data
* actually available in this file domain.
*/
/*
* This is more general aggregator search function which does not base on the assumption
* that each aggregator hosts the file domain with the same size
*/
int ADIOI_BG_Calc_aggregator(ADIO_File fd,
ADIO_Offset off,
ADIO_Offset min_off,
ADIO_Offset *len,
ADIO_Offset fd_size,
ADIO_Offset *fd_start,
ADIO_Offset *fd_end)
{
int rank_index, rank;
ADIO_Offset avail_bytes;
TRACE_ERR("Entering ADIOI_BG_Calc_aggregator\n");
ADIOI_BG_assert ( (off <= fd_end[fd->hints->cb_nodes-1] && off >= min_off && fd_start[0] >= min_off ) );
/* binary search --> rank_index is returned */
int ub = fd->hints->cb_nodes;
int lb = 0;
/* get an index into our array of aggregators */
/* Common code for striping - bg doesn't use it but it's
here to make diff'ing easier.
rank_index = (int) ((off - min_off + fd_size)/ fd_size - 1);
if (fd->hints->striping_unit > 0) {
* wkliao: implementation for file domain alignment
fd_start[] and fd_end[] have been aligned with file lock
boundaries when returned from ADIOI_Calc_file_domains() so cannot
just use simple arithmatic as above *
rank_index = 0;
while (off > fd_end[rank_index]) rank_index++;
}
bg does it's own striping below
*/
rank_index = fd->hints->cb_nodes / 2;
while ( off < fd_start[rank_index] || off > fd_end[rank_index] ) {
if ( off > fd_end [rank_index] ) {
lb = rank_index;
rank_index = (rank_index + ub) / 2;
}
else
if ( off < fd_start[rank_index] ) {
ub = rank_index;
rank_index = (rank_index + lb) / 2;
}
}
/* we index into fd_end with rank_index, and fd_end was allocated to be no
* bigger than fd->hins->cb_nodes. If we ever violate that, we're
* overrunning arrays. Obviously, we should never ever hit this abort */
if (rank_index >= fd->hints->cb_nodes || rank_index < 0) {
FPRINTF(stderr, "Error in ADIOI_Calc_aggregator(): rank_index(%d) >= fd->hints->cb_nodes (%d) fd_size=%lld off=%lld\n",
rank_index,fd->hints->cb_nodes,fd_size,off);
MPI_Abort(MPI_COMM_WORLD, 1);
}
/* DBG_FPRINTF ("ADIOI_BG_Calc_aggregator: rank_index = %d\n",
rank_index ); */
/*
* remember here that even in Rajeev's original code it was the case that
* different aggregators could end up with different amounts of data to
* aggregate. here we use fd_end[] to make sure that we know how much
* data this aggregator is working with.
*
* the +1 is to take into account the end vs. length issue.
*/
avail_bytes = fd_end[rank_index] + 1 - off;
if (avail_bytes < *len && avail_bytes > 0) {
/* this file domain only has part of the requested contig. region */
*len = avail_bytes;
}
/* map our index to a rank */
/* NOTE: FOR NOW WE DON'T HAVE A MAPPING...JUST DO 0..NPROCS_FOR_COLL */
rank = fd->hints->ranklist[rank_index];
TRACE_ERR("Leaving ADIOI_BG_Calc_aggregator\n");
return rank;
}
/*
* Compute a dynamic access range based file domain partition among I/O aggregators,
* which align to the GPFS block size
* Divide the I/O workload among "nprocs_for_coll" processes. This is
* done by (logically) dividing the file into file domains (FDs); each
* process may directly access only its own file domain.
* Additional effort is to make sure that each I/O aggregator get
* a file domain that aligns to the GPFS block size. So, there will
* not be any false sharing of GPFS file blocks among multiple I/O nodes.
*
* The common version of this now accepts a min_fd_size and striping_unit.
* It doesn't seem necessary here (using GPFS block sizes) but keep it in mind
* (e.g. we could pass striping unit instead of using fs_ptr->blksize).
*/
void ADIOI_BG_GPFS_Calc_file_domains(ADIO_Offset *st_offsets,
ADIO_Offset *end_offsets,
int nprocs,
int nprocs_for_coll,
ADIO_Offset *min_st_offset_ptr,
ADIO_Offset **fd_start_ptr,
ADIO_Offset **fd_end_ptr,
ADIO_Offset *fd_size_ptr,
void *fs_ptr)
{
ADIO_Offset min_st_offset, max_end_offset, *fd_start, *fd_end, *fd_size;
int i, aggr;
TRACE_ERR("Entering ADIOI_BG_GPFS_Calc_file_domains\n");
#ifdef AGGREGATION_PROFILE
MPE_Log_event (5004, 0, NULL);
#endif
# if AGG_DEBUG
static char myname[] = "ADIOI_BG_GPFS_Calc_file_domains";
DBG_FPRINTF(stderr, "%s(%d): %d aggregator(s)\n",
myname,__LINE__,nprocs_for_coll);
# endif
__blksize_t blksize = 1048576; /* default to 1M */
if(fs_ptr && ((ADIOI_BG_fs*)fs_ptr)->blksize) /* ignore null ptr or 0 blksize */
blksize = ((ADIOI_BG_fs*)fs_ptr)->blksize;
# if AGG_DEBUG
DBG_FPRINTF(stderr,"%s(%d): Blocksize=%ld\n",myname,__LINE__,blksize);
# endif
/* find min of start offsets and max of end offsets of all processes */
min_st_offset = st_offsets [0];
max_end_offset = end_offsets[0];
for (i=1; i<nprocs; i++) {
min_st_offset = ADIOI_MIN(min_st_offset, st_offsets[i]);
max_end_offset = ADIOI_MAX(max_end_offset, end_offsets[i]);
}
/* DBG_FPRINTF(stderr, "_calc_file_domains, min_st_offset, max_
= %qd, %qd\n", min_st_offset, max_end_offset );*/
/* determine the "file domain (FD)" of each process, i.e., the portion of
the file that will be "owned" by each process */
ADIO_Offset gpfs_ub = (max_end_offset +blksize-1) / blksize * blksize - 1;
ADIO_Offset gpfs_lb = min_st_offset / blksize * blksize;
ADIO_Offset gpfs_ub_rdoff = (max_end_offset +blksize-1) / blksize * blksize - 1 - max_end_offset;
ADIO_Offset gpfs_lb_rdoff = min_st_offset - min_st_offset / blksize * blksize;
ADIO_Offset fd_gpfs_range = gpfs_ub - gpfs_lb + 1;
int naggs = nprocs_for_coll;
/* Tweak the file domains so that no fd is smaller than a threshold. We
* have to strike a balance between efficency and parallelism: somewhere
* between 10k processes sending 32-byte requests and one process sending a
* 320k request is a (system-dependent) sweet spot
This is from the common code - the new min_fd_size parm that we didn't implement.
(And common code uses a different declaration of fd_size so beware)
if (fd_size < min_fd_size)
fd_size = min_fd_size;
*/
fd_size = (ADIO_Offset *) ADIOI_Malloc(nprocs_for_coll * sizeof(ADIO_Offset));
*fd_start_ptr = (ADIO_Offset *) ADIOI_Malloc(nprocs_for_coll * sizeof(ADIO_Offset));
*fd_end_ptr = (ADIO_Offset *) ADIOI_Malloc(nprocs_for_coll * sizeof(ADIO_Offset));
fd_start = *fd_start_ptr;
fd_end = *fd_end_ptr;
ADIO_Offset n_gpfs_blk = fd_gpfs_range / blksize;
ADIO_Offset nb_cn_small = n_gpfs_blk/naggs;
ADIO_Offset naggs_large = n_gpfs_blk - naggs * (n_gpfs_blk/naggs);
ADIO_Offset naggs_small = naggs - naggs_large;
for (i=0; i<naggs; i++)
if (i < naggs_small) fd_size[i] = nb_cn_small * blksize;
else fd_size[i] = (nb_cn_small+1) * blksize;
# if AGG_DEBUG
DBG_FPRINTF(stderr,"%s(%d): "
"gpfs_ub %llu, "
"gpfs_lb %llu, "
"gpfs_ub_rdoff %llu, "
"gpfs_lb_rdoff %llu, "
"fd_gpfs_range %llu, "
"n_gpfs_blk %llu, "
"nb_cn_small %llu, "
"naggs_large %llu, "
"naggs_small %llu, "
"\n",
myname,__LINE__,
gpfs_ub ,
gpfs_lb ,
gpfs_ub_rdoff,
gpfs_lb_rdoff,
fd_gpfs_range,
n_gpfs_blk ,
nb_cn_small ,
naggs_large ,
naggs_small
);
# endif
fd_size[0] -= gpfs_lb_rdoff;
fd_size[naggs-1] -= gpfs_ub_rdoff;
/* compute the file domain for each aggr */
ADIO_Offset offset = min_st_offset;
for (aggr=0; aggr<naggs; aggr++) {
fd_start[aggr] = offset;
fd_end [aggr] = offset + fd_size[aggr] - 1;
offset += fd_size[aggr];
}
*fd_size_ptr = fd_size[0];
*min_st_offset_ptr = min_st_offset;
#ifdef AGGREGATION_PROFILE
MPE_Log_event (5005, 0, NULL);
#endif
ADIOI_Free (fd_size);
TRACE_ERR("Leaving ADIOI_BG_GPFS_Calc_file_domains\n");
}
/*
* When a process is an IO aggregator, this will return its index in the aggrs list.
* Otherwise, this will return -1
*/
int ADIOI_BG_Aggrs_index( ADIO_File fd, int myrank )
{
int i;
for (i=0; i<fd->hints->cb_nodes; i++)
if (fd->hints->ranklist[i] == myrank) return i;
return -1;
}
/*
* ADIOI_BG_Calc_my_req() overrides ADIOI_Calc_my_req for the default implementation
* is specific for static file domain partitioning.
*
* ADIOI_Calc_my_req() - calculate what portions of the access requests
* of this process are located in the file domains of various processes
* (including this one)
*/
void ADIOI_BG_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, ADIO_Offset *len_list,
int contig_access_count, ADIO_Offset
min_st_offset, ADIO_Offset *fd_start,
ADIO_Offset *fd_end, ADIO_Offset fd_size,
int nprocs,
int *count_my_req_procs_ptr,
int **count_my_req_per_proc_ptr,
ADIOI_Access **my_req_ptr,
int **buf_idx_ptr)
/* Possibly reconsider if buf_idx's are ok as int's, or should they be aints/offsets?
They are used as memory buffer indices so it seems like the 2G limit is in effect */
{
int *count_my_req_per_proc, count_my_req_procs, *buf_idx;
int i, l, proc;
ADIO_Offset fd_len, rem_len, curr_idx, off;
ADIOI_Access *my_req;
TRACE_ERR("Entering ADIOI_BG_Calc_my_req\n");
#ifdef AGGREGATION_PROFILE
MPE_Log_event (5024, 0, NULL);
#endif
*count_my_req_per_proc_ptr = (int *) ADIOI_Calloc(nprocs,sizeof(int));
count_my_req_per_proc = *count_my_req_per_proc_ptr;
/* count_my_req_per_proc[i] gives the no. of contig. requests of this
process in process i's file domain. calloc initializes to zero.
I'm allocating memory of size nprocs, so that I can do an
MPI_Alltoall later on.*/
buf_idx = (int *) ADIOI_Malloc(nprocs*sizeof(int));
/* buf_idx is relevant only if buftype_is_contig.
buf_idx[i] gives the index into user_buf where data received
from proc. i should be placed. This allows receives to be done
without extra buffer. This can't be done if buftype is not contig. */
/* initialize buf_idx to -1 */
for (i=0; i < nprocs; i++) buf_idx[i] = -1;
/* one pass just to calculate how much space to allocate for my_req;
* contig_access_count was calculated way back in ADIOI_Calc_my_off_len()
*/
for (i=0; i < contig_access_count; i++) {
/* short circuit offset/len processing if len == 0
* (zero-byte read/write */
if (len_list[i] == 0)
continue;
off = offset_list[i];
fd_len = len_list[i];
/* note: we set fd_len to be the total size of the access. then
* ADIOI_Calc_aggregator() will modify the value to return the
* amount that was available from the file domain that holds the
* first part of the access.
*/
/* BES */
proc = ADIOI_BG_Calc_aggregator(fd, off, min_st_offset, &fd_len, fd_size,
fd_start, fd_end);
count_my_req_per_proc[proc]++;
/* figure out how much data is remaining in the access (i.e. wasn't
* part of the file domain that had the starting byte); we'll take
* care of this data (if there is any) in the while loop below.
*/
rem_len = len_list[i] - fd_len;
while (rem_len > 0) {
off += fd_len; /* point to first remaining byte */
fd_len = rem_len; /* save remaining size, pass to calc */
proc = ADIOI_BG_Calc_aggregator(fd, off, min_st_offset, &fd_len,
fd_size, fd_start, fd_end);
count_my_req_per_proc[proc]++;
rem_len -= fd_len; /* reduce remaining length by amount from fd */
}
}
/* now allocate space for my_req, offset, and len */
*my_req_ptr = (ADIOI_Access *)
ADIOI_Malloc(nprocs*sizeof(ADIOI_Access));
my_req = *my_req_ptr;
count_my_req_procs = 0;
for (i=0; i < nprocs; i++) {
if (count_my_req_per_proc[i]) {
my_req[i].offsets = (ADIO_Offset *)
ADIOI_Malloc(count_my_req_per_proc[i] * sizeof(ADIO_Offset));
my_req[i].lens = (int *)
ADIOI_Malloc(count_my_req_per_proc[i] * sizeof(int));
count_my_req_procs++;
}
my_req[i].count = 0; /* will be incremented where needed
later */
}
/* now fill in my_req */
curr_idx = 0;
for (i=0; i<contig_access_count; i++) {
/* short circuit offset/len processing if len == 0
* (zero-byte read/write */
if (len_list[i] == 0)
continue;
off = offset_list[i];
fd_len = len_list[i];
proc = ADIOI_BG_Calc_aggregator(fd, off, min_st_offset, &fd_len, fd_size,
fd_start, fd_end);
/* for each separate contiguous access from this process */
if (buf_idx[proc] == -1)
{
ADIOI_Assert(curr_idx == (int) curr_idx);
buf_idx[proc] = (int) curr_idx;
}
l = my_req[proc].count;
curr_idx += fd_len;
rem_len = len_list[i] - fd_len;
/* store the proc, offset, and len information in an array
* of structures, my_req. Each structure contains the
* offsets and lengths located in that process's FD,
* and the associated count.
*/
my_req[proc].offsets[l] = off;
ADIOI_Assert(fd_len == (int) fd_len);
my_req[proc].lens[l] = (int) fd_len;
my_req[proc].count++;
while (rem_len > 0) {
off += fd_len;
fd_len = rem_len;
proc = ADIOI_BG_Calc_aggregator(fd, off, min_st_offset, &fd_len,
fd_size, fd_start, fd_end);
if (buf_idx[proc] == -1)
{
ADIOI_Assert(curr_idx == (int) curr_idx);
buf_idx[proc] = (int) curr_idx;
}
l = my_req[proc].count;
curr_idx += fd_len;
rem_len -= fd_len;
my_req[proc].offsets[l] = off;
ADIOI_Assert(fd_len == (int) fd_len);
my_req[proc].lens[l] = (int) fd_len;
my_req[proc].count++;
}
}
#ifdef AGG_DEBUG
for (i=0; i<nprocs; i++) {
if (count_my_req_per_proc[i] > 0) {
DBG_FPRINTF(stderr, "data needed from %d (count = %d):\n", i,
my_req[i].count);
for (l=0; l < my_req[i].count; l++) {
DBG_FPRINTF(stderr, " off[%d] = %lld, len[%d] = %d\n", l,
my_req[i].offsets[l], l, my_req[i].lens[l]);
}
}
DBG_FPRINTF(stderr, "buf_idx[%d] = 0x%x\n", i, buf_idx[i]);
}
#endif
*count_my_req_procs_ptr = count_my_req_procs;
*buf_idx_ptr = buf_idx;
#ifdef AGGREGATION_PROFILE
MPE_Log_event (5025, 0, NULL);
#endif
TRACE_ERR("Leaving ADIOI_BG_Calc_my_req\n");
}
/*
* ADIOI_Calc_others_req (copied to bg and switched to all to all for performance)
*
* param[in] count_my_req_procs Number of processes whose file domain my
* request touches.
* param[in] count_my_req_per_proc count_my_req_per_proc[i] gives the no. of
* contig. requests of this process in
* process i's file domain.
* param[in] my_req A structure defining my request
* param[in] nprocs Number of nodes in the block
* param[in] myrank Rank of this node
* param[out] count_others_req_proc_ptr Number of processes whose requests lie in
* my process's file domain (including my
* process itself)
* param[out] others_req_ptr Array of other process' requests that lie
* in my process's file domain
*/
void ADIOI_BG_Calc_others_req(ADIO_File fd, int count_my_req_procs,
int *count_my_req_per_proc,
ADIOI_Access *my_req,
int nprocs, int myrank,
int *count_others_req_procs_ptr,
ADIOI_Access **others_req_ptr)
{
TRACE_ERR("Entering ADIOI_BG_Calc_others_req\n");
/* determine what requests of other processes lie in this process's
file domain */
/* count_others_req_procs = number of processes whose requests lie in
this process's file domain (including this process itself)
count_others_req_per_proc[i] indicates how many separate contiguous
requests of proc. i lie in this process's file domain. */
int *count_others_req_per_proc, count_others_req_procs;
int i;
ADIOI_Access *others_req;
/* Parameters for MPI_Alltoallv */
int *scounts, *sdispls, *rcounts, *rdispls;
/* Parameters for MPI_Alltoallv. These are the buffers, which
* are later computed to be the lowest address of all buffers
* to be sent/received for offsets and lengths. Initialize to
* the highest possible address which is the current minimum.
*/
void *sendBufForOffsets=(void*)0xFFFFFFFFFFFFFFFF,
*sendBufForLens =(void*)0xFFFFFFFFFFFFFFFF,
*recvBufForOffsets=(void*)0xFFFFFFFFFFFFFFFF,
*recvBufForLens =(void*)0xFFFFFFFFFFFFFFFF;
/* first find out how much to send/recv and from/to whom */
#ifdef AGGREGATION_PROFILE
MPE_Log_event (5026, 0, NULL);
#endif
/* Send 1 int to each process. count_my_req_per_proc[i] is the number of
* requests that my process will do to the file domain owned by process[i].
* Receive 1 int from each process. count_others_req_per_proc[i] is the number of
* requests that process[i] will do to the file domain owned by my process.
*/
count_others_req_per_proc = (int *) ADIOI_Malloc(nprocs*sizeof(int));
/* cora2a1=timebase(); */
for(i=0;i<nprocs;i++)
MPI_Alltoall(count_my_req_per_proc, 1, MPI_INT,
count_others_req_per_proc, 1, MPI_INT, fd->comm);
/* total_cora2a+=timebase()-cora2a1; */
/* Allocate storage for an array of other nodes' accesses of our
* node's file domain. Also allocate storage for the alltoallv
* parameters.
*/
*others_req_ptr = (ADIOI_Access *)
ADIOI_Malloc(nprocs*sizeof(ADIOI_Access));
others_req = *others_req_ptr;
scounts = ADIOI_Malloc(nprocs*sizeof(int));
sdispls = ADIOI_Malloc(nprocs*sizeof(int));
rcounts = ADIOI_Malloc(nprocs*sizeof(int));
rdispls = ADIOI_Malloc(nprocs*sizeof(int));
/* If process[i] has any requests in my file domain,
* initialize an ADIOI_Access structure that will describe each request
* from process[i]. The offsets, lengths, and buffer pointers still need
* to be obtained to complete the setting of this structure.
*/
count_others_req_procs = 0;
for (i=0; i<nprocs; i++) {
if (count_others_req_per_proc[i])
{
others_req[i].count = count_others_req_per_proc[i];
others_req[i].offsets = (ADIO_Offset *)
ADIOI_Malloc(count_others_req_per_proc[i]*sizeof(ADIO_Offset));
others_req[i].lens = (int *)
ADIOI_Malloc(count_others_req_per_proc[i]*sizeof(int));
if ( (MPIR_Upint)others_req[i].offsets < (MPIR_Upint)recvBufForOffsets )
recvBufForOffsets = others_req[i].offsets;
if ( (MPIR_Upint)others_req[i].lens < (MPIR_Upint)recvBufForLens )
recvBufForLens = others_req[i].lens;
others_req[i].mem_ptrs = (MPI_Aint *)
ADIOI_Malloc(count_others_req_per_proc[i]*sizeof(MPI_Aint));
count_others_req_procs++;
}
else
{
others_req[i].count = 0;
others_req[i].offsets = NULL;
others_req[i].lens = NULL;
}
}
/* If no recv buffer was allocated in the loop above, make it NULL */
if ( recvBufForOffsets == (void*)0xFFFFFFFFFFFFFFFF) recvBufForOffsets = NULL;
if ( recvBufForLens == (void*)0xFFFFFFFFFFFFFFFF) recvBufForLens = NULL;
/* Now send the calculated offsets and lengths to respective processes */
/************************/
/* Exchange the offsets */
/************************/
/* Determine the lowest sendBufForOffsets/Lens */
for (i=0; i<nprocs; i++)
{
if ( (my_req[i].count) &&
((MPIR_Upint)my_req[i].offsets <= (MPIR_Upint)sendBufForOffsets) )
{
sendBufForOffsets = my_req[i].offsets;
}
if ( (my_req[i].count) &&
((MPIR_Upint)my_req[i].lens <= (MPIR_Upint)sendBufForLens) )
{
sendBufForLens = my_req[i].lens;
}
}
/* If no send buffer was found in the loop above, make it NULL */
if ( sendBufForOffsets == (void*)0xFFFFFFFFFFFFFFFF) sendBufForOffsets = NULL;
if ( sendBufForLens == (void*)0xFFFFFFFFFFFFFFFF) sendBufForLens = NULL;
/* Calculate the displacements from the sendBufForOffsets/Lens */
MPI_Barrier(fd->comm);
for (i=0; i<nprocs; i++)
{
/* Send these offsets to process i.*/
scounts[i] = count_my_req_per_proc[i];
if ( scounts[i] == 0 )
sdispls[i] = 0;
else
sdispls[i] = (int)
( ( (MPIR_Upint)my_req[i].offsets -
(MPIR_Upint)sendBufForOffsets ) /
(MPIR_Upint)sizeof(ADIO_Offset) );
/* Receive these offsets from process i.*/
rcounts[i] = count_others_req_per_proc[i];
if ( rcounts[i] == 0 )
rdispls[i] = 0;
else
rdispls[i] = (int)
( ( (MPIR_Upint)others_req[i].offsets -
(MPIR_Upint)recvBufForOffsets ) /
(MPIR_Upint)sizeof(ADIO_Offset) );
}
/* Exchange the offsets */
MPI_Alltoallv(sendBufForOffsets,
scounts, sdispls, ADIO_OFFSET,
recvBufForOffsets,
rcounts, rdispls, ADIO_OFFSET,
fd->comm);
/************************/
/* Exchange the lengths */
/************************/
for (i=0; i<nprocs; i++)
{
/* Send these lengths to process i.*/
scounts[i] = count_my_req_per_proc[i];
if ( scounts[i] == 0 )
sdispls[i] = 0;
else
sdispls[i] = (int)
( ( (MPIR_Upint)my_req[i].lens -
(MPIR_Upint)sendBufForLens ) /
(MPIR_Upint) sizeof(int) );
/* Receive these offsets from process i. */
rcounts[i] = count_others_req_per_proc[i];
if ( rcounts[i] == 0 )
rdispls[i] = 0;
else
rdispls[i] = (int)
( ( (MPIR_Upint)others_req[i].lens -
(MPIR_Upint)recvBufForLens ) /
(MPIR_Upint) sizeof(int) );
}
/* Exchange the lengths */
MPI_Alltoallv(sendBufForLens,
scounts, sdispls, MPI_INT,
recvBufForLens,
rcounts, rdispls, MPI_INT,
fd->comm);
/* Clean up */
ADIOI_Free(count_others_req_per_proc);
ADIOI_Free (scounts);
ADIOI_Free (sdispls);
ADIOI_Free (rcounts);
ADIOI_Free (rdispls);
*count_others_req_procs_ptr = count_others_req_procs;
#ifdef AGGREGATION_PROFILE
MPE_Log_event (5027, 0, NULL);
#endif
TRACE_ERR("Leaving ADIOI_BG_Calc_others_req\n");
}

Просмотреть файл

@ -0,0 +1,104 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bg_aggrs.h
* \brief ???
*/
/*
* File: ad_bg_aggrs.h
*
* Declares functions specific for BG/L - GPFS parallel I/O solution. The implemented optimizations are:
* . Aligned file-domain partitioning, integrated in 7/28/2005
*
* In addition, following optimizations are planned:
* . Integrating multiple file-domain partitioning schemes
* (corresponding to Alok Chouhdary's persistent file domain work).
*/
#ifndef AD_BG_AGGRS_H_
#define AD_BG_AGGRS_H_
#include "adio.h"
#include <sys/stat.h>
#if !defined(GPFS_SUPER_MAGIC)
#define GPFS_SUPER_MAGIC (0x47504653)
#endif
/* File system (BG) specific information -
hung off of ADIOI_FileD file descriptor (fd->fs_ptr) at open */
typedef struct ADIOI_BG_fs_s {
__blksize_t blksize;
int fsync_aggr; /* "fsync aggregation" flags (below) */
#define ADIOI_BG_FSYNC_AGGREGATION_DISABLED 0x00
#define ADIOI_BG_FSYNC_AGGREGATION_ENABLED 0x01
#define ADIOI_BG_FSYNC_AGGREGATOR 0x10 /* This rank is an aggregator */
} ADIOI_BG_fs;
/* generate a list of I/O aggregators that utilizes BG-PSET orginization. */
int ADIOI_BG_gen_agg_ranklist(ADIO_File fd, int n_aggrs_per_pset);
/* overriding ADIOI_Calc_file_domains() to apply 'aligned file domain partitioning'. */
void ADIOI_BG_GPFS_Calc_file_domains(ADIO_Offset *st_offsets,
ADIO_Offset *end_offsets,
int nprocs,
int nprocs_for_coll,
ADIO_Offset *min_st_offset_ptr,
ADIO_Offset **fd_start_ptr,
ADIO_Offset **fd_end_ptr,
ADIO_Offset *fd_size_ptr,
void *fs_ptr);
/* a utilitiy function for debugging */
int ADIOI_BG_Aggrs_index(ADIO_File fd, int myrank );
/* overriding ADIOI_Calc_aggregator() for the default implementation is specific for
static file domain partitioning */
int ADIOI_BG_Calc_aggregator(ADIO_File fd,
ADIO_Offset off,
ADIO_Offset min_off,
ADIO_Offset *len,
ADIO_Offset fd_size,
ADIO_Offset *fd_start,
ADIO_Offset *fd_end);
/* overriding ADIOI_Calc_my_req for the default implementation is specific for
static file domain partitioning */
void ADIOI_BG_Calc_my_req ( ADIO_File fd, ADIO_Offset *offset_list, ADIO_Offset *len_list,
int contig_access_count, ADIO_Offset
min_st_offset, ADIO_Offset *fd_start,
ADIO_Offset *fd_end, ADIO_Offset fd_size,
int nprocs,
int *count_my_req_procs_ptr,
int **count_my_req_per_proc_ptr,
ADIOI_Access **my_req_ptr,
int **buf_idx_ptr);
/*
* ADIOI_Calc_others_req
*
* param[in] count_my_req_procs Number of processes whose file domain my
* request touches.
* param[in] count_my_req_per_proc count_my_req_per_proc[i] gives the no. of
* contig. requests of this process in
* process i's file domain.
* param[in] my_req A structure defining my request
* param[in] nprocs Number of nodes in the block
* param[in] myrank Rank of this node
* param[out] count_others_req_proc_ptr Number of processes whose requests lie in
* my process's file domain (including my
* process itself)
* param[out] others_req_ptr Array of other process' requests that lie
* in my process's file domain
*/
void ADIOI_BG_Calc_others_req(ADIO_File fd, int count_my_req_procs,
int *count_my_req_per_proc,
ADIOI_Access *my_req,
int nprocs, int myrank,
int *count_others_req_procs_ptr,
ADIOI_Access **others_req_ptr);
#endif /* AD_BG_AGGRS_H_ */

Просмотреть файл

@ -0,0 +1,53 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bg_close.c
* \brief ???
*/
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#include "ad_bg.h"
#include "ad_bg_aggrs.h"
void ADIOI_BG_Close(ADIO_File fd, int *error_code)
{
int err, derr=0;
static char myname[] = "ADIOI_BG_CLOSE";
#ifdef PROFILE
MPE_Log_event(9, 0, "start close");
#endif
err = close(fd->fd_sys);
if (fd->fd_direct >= 0)
{
derr = close(fd->fd_direct);
}
#ifdef PROFILE
MPE_Log_event(10, 0, "end close");
#endif
/* FPRINTF(stderr,"%s(%d):'%s'. Free %#X\n",myname,__LINE__,fd->filename,(int)fd->fs_ptr);*/
if (fd->fs_ptr != NULL) {
ADIOI_Free(fd->fs_ptr);
fd->fs_ptr = NULL;
}
fd->fd_sys = -1;
fd->fd_direct = -1;
if (err == -1 || derr == -1)
{
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io",
"**io %s", strerror(errno));
}
else *error_code = MPI_SUCCESS;
}

Просмотреть файл

@ -0,0 +1,58 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bg_fcntl.c
* \brief ???
*/
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#include "ad_bg.h"
#include "adio_extern.h"
/* #ifdef MPISGI
#include "mpisgi2.h"
#endif */
void ADIOI_BG_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct,
int *error_code)
{
static char myname[] = "ADIOI_BG_FCNTL";
switch(flag) {
case ADIO_FCNTL_GET_FSIZE:
fcntl_struct->fsize = lseek(fd->fd_sys, 0, SEEK_END);
if (fd->fp_sys_posn != -1)
lseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET);
if (fcntl_struct->fsize == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
}
else *error_code = MPI_SUCCESS;
break;
case ADIO_FCNTL_SET_DISKSPACE:
ADIOI_GEN_Prealloc(fd, fcntl_struct->diskspace, error_code);
break;
case ADIO_FCNTL_SET_ATOMICITY:
fd->atomicity = (fcntl_struct->atomicity == 0) ? 0 : 1;
*error_code = MPI_SUCCESS;
break;
/* --BEGIN ERROR HANDLING-- */
default:
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE,
myname, __LINE__,
MPI_ERR_ARG,
"**flag", "**flag %d", flag);
/* --END ERROR HANDLING-- */
}
}

Просмотреть файл

@ -0,0 +1,90 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bg_flush.c
* \brief Scalable flush based on underlying filesystem and psets
*/
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#include "ad_bg.h"
#include "ad_bg_aggrs.h"
void ADIOI_BG_Flush(ADIO_File fd, int *error_code)
{
int err=0;
static char myname[] = "ADIOI_BG_FLUSH";
if(((ADIOI_BG_fs*)fd->fs_ptr)->fsync_aggr & ADIOI_BG_FSYNC_AGGREGATION_ENABLED)
{
int rank;
/* Barrier so we can collectively do fewer fsync's */
MPI_Barrier(fd->comm);
MPI_Comm_rank(fd->comm, &rank);
/* All ranks marked as "fsync aggregators" should fsync.
(We currently only do one fsync on rank 0 but this is general
enough to support >1 aggregator using allreduce to get the
results instead of simply bcast'ing the results from rank 0.)*/
if(((ADIOI_BG_fs*)fd->fs_ptr)->fsync_aggr & ADIOI_BG_FSYNC_AGGREGATOR)
{
err = fsync(fd->fd_sys);
DBG_FPRINTF(stderr,"aggregation:fsync %s, err=%#X, errno=%#X\n",fd->filename, err, errno);
/* We want errno, not the return code if it failed */
if (err == -1) err = errno;
else err = 0;
}
/* Just pick an errno (using unsigned MPI_MAX) from any failures */
MPI_Allreduce( MPI_IN_PLACE, (unsigned*)&err, 1, MPI_UNSIGNED, MPI_MAX, fd->comm);
DBGV_FPRINTF(stderr,"aggregation result:fsync %s, errno %#X,\n",fd->filename, err);
if (err) /* if it's non-zero, it must be an errno */
{
errno = err;
err = -1;
}
}
else /* Non-aggregated fsync */
{
#ifdef USE_DBG_LOGGING
int rank;
#endif
err = fsync(fd->fd_sys);
#ifdef USE_DBG_LOGGING
MPI_Comm_rank(fd->comm, &rank);
if(rank == 0)
{
DBG_FPRINTF(stderr,"no aggregation:fsync %s, err=%#X, errno=%#X\n",fd->filename, err, errno);
}
else
{
DBGV_FPRINTF(stderr,"no aggregation:fsync %s, err=%#X, errno=%#X\n",fd->filename, err, errno);
}
#endif
}
/* --BEGIN ERROR HANDLING-- */
if (err == -1)
{
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io",
"**io %s", strerror(errno));
DBGT_FPRINTF(stderr,"fsync %s, err=%#X, errno=%#X\n",fd->filename, err, errno);
return;
}
/* --END ERROR HANDLING-- */
*error_code = MPI_SUCCESS;
}

Просмотреть файл

@ -0,0 +1,84 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bg_getsh.c
* \brief ???
*/
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#include "ad_bg.h"
/* returns the current location of the shared_fp in terms of the
no. of etypes relative to the current view, and also increments the
shared_fp by the number of etypes to be accessed (incr) in the read
or write following this function. */
void ADIOI_BG_Get_shared_fp(ADIO_File fd, int incr, ADIO_Offset *shared_fp,
int *error_code)
{
ADIO_Offset new_fp;
int err;
MPI_Comm dupcommself;
static char myname[] = "ADIOI_BG_GET_SHARED_FP";
if (fd->shared_fp_fd == ADIO_FILE_NULL) {
MPI_Comm_dup(MPI_COMM_SELF, &dupcommself);
fd->shared_fp_fd = ADIO_Open(MPI_COMM_SELF,
dupcommself,
fd->shared_fp_fname,
fd->file_system,
fd->fns,
ADIO_CREATE | ADIO_RDWR | ADIO_DELETE_ON_CLOSE,
0,
MPI_BYTE,
MPI_BYTE,
MPI_INFO_NULL,
ADIO_PERM_NULL,
error_code);
if (*error_code != MPI_SUCCESS) return;
*shared_fp = 0;
ADIOI_WRITE_LOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset));
err = read(fd->shared_fp_fd->fd_sys, shared_fp, sizeof(ADIO_Offset));
/* if the file is empty, the above read may return error
(reading beyond end of file). In that case, shared_fp = 0,
set above, is the correct value. */
}
else {
ADIOI_WRITE_LOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset));
err = lseek(fd->shared_fp_fd->fd_sys, 0, SEEK_SET);
if (err == 0) {
err = read(fd->shared_fp_fd->fd_sys, shared_fp,
sizeof(ADIO_Offset));
}
if (err == -1) {
ADIOI_UNLOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset));
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
return;
}
}
new_fp = *shared_fp + incr;
err = lseek(fd->shared_fp_fd->fd_sys, 0, SEEK_SET);
if (err == 0) {
err = write(fd->shared_fp_fd->fd_sys, &new_fp, sizeof(ADIO_Offset));
}
ADIOI_UNLOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset));
if (err == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io",
"**io %s", strerror(errno));
}
else *error_code = MPI_SUCCESS;
}

Просмотреть файл

@ -0,0 +1,542 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bg_hints.c
* \brief BlueGene hint processing
*/
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#include "adio.h"
#include "adio_extern.h"
#include "ad_bg.h"
#include "ad_bg_pset.h"
#include "ad_bg_aggrs.h"
#define ADIOI_BG_CB_BUFFER_SIZE_DFLT "16777216"
#define ADIOI_BG_IND_RD_BUFFER_SIZE_DFLT "4194304"
#define ADIOI_BG_IND_WR_BUFFER_SIZE_DFLT "4194304"
#define ADIOI_BG_NAGG_IN_PSET_HINT_NAME "bg_nodes_pset"
/** \page mpiio_vars MPIIO Configuration
*
* BlueGene MPIIO configuration and performance tuning. Used by ad_bg and ad_bglockless ADIO's.
*
* \section hint_sec Hints
* - bg_nodes_pset - Specify how many aggregators to use per pset.
* This hint will override the cb_nodes hint based on BlueGene psets.
* - N - Use N nodes per pset as aggregators.
* - Default is based on partition configuration and cb_nodes.
*
* The following default key/value pairs may differ from other platform defaults.
*
* - key = cb_buffer_size value = 16777216
* - key = romio_cb_read value = enable
* - key = romio_cb_write value = enable
* - key = ind_rd_buffer_size value = 4194304
* - key = ind_wr_buffer_size value = 4194304
*/
/* Compute the aggregator-related parameters that are required in 2-phase collective IO of ADIO. */
extern int
ADIOI_BG_gen_agg_ranklist(ADIO_File fd, int n_proxy_per_pset);
void ADIOI_BG_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
{
/* if fd->info is null, create a new info object.
Initialize fd->info to default values.
Initialize fd->hints to default values.
Examine the info object passed by the user. If it contains values that
ROMIO understands, override the default. */
MPI_Info info;
char *value;
int flag, intval, tmp_val, nprocs=0, nprocs_is_valid = 0;
static char myname[] = "ADIOI_BG_SETINFO";
int did_anything = 0;
if (fd->info == MPI_INFO_NULL) MPI_Info_create(&(fd->info));
info = fd->info;
/* Note that fd->hints is allocated at file open time; thus it is
* not necessary to allocate it, or check for allocation, here.
*/
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
ADIOI_BG_assert ((value != NULL));
/* initialize info and hints to default values if they haven't been
* previously initialized
*/
if (!fd->hints->initialized) {
did_anything = 1;
/* buffer size for collective I/O */
ADIOI_Info_set(info, "cb_buffer_size", ADIOI_BG_CB_BUFFER_SIZE_DFLT);
fd->hints->cb_buffer_size = atoi(ADIOI_BG_CB_BUFFER_SIZE_DFLT);
/* default is to let romio automatically decide when to use
* collective buffering
*/
ADIOI_Info_set(info, "romio_cb_read", "enable");
fd->hints->cb_read = ADIOI_HINT_ENABLE;
ADIOI_Info_set(info, "romio_cb_write", "enable");
fd->hints->cb_write = ADIOI_HINT_ENABLE;
if ( fd->hints->cb_config_list != NULL ) ADIOI_Free (fd->hints->cb_config_list);
fd->hints->cb_config_list = NULL;
/* number of processes that perform I/O in collective I/O */
MPI_Comm_size(fd->comm, &nprocs);
nprocs_is_valid = 1;
ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", nprocs);
ADIOI_Info_set(info, "cb_nodes", value);
fd->hints->cb_nodes = -1;
/* hint indicating that no indep. I/O will be performed on this file */
ADIOI_Info_set(info, "romio_no_indep_rw", "false");
fd->hints->no_indep_rw = 0;
/* bg is not implementing file realms (ADIOI_IOStridedColl),
initialize to disabled it. */
/* hint instructing the use of persistent file realms */
ADIOI_Info_set(info, "romio_cb_pfr", "disable");
fd->hints->cb_pfr = ADIOI_HINT_DISABLE;
/* hint guiding the assignment of persistent file realms */
ADIOI_Info_set(info, "romio_cb_fr_types", "aar");
fd->hints->cb_fr_type = ADIOI_FR_AAR;
/* hint to align file realms with a certain byte value */
ADIOI_Info_set(info, "romio_cb_fr_alignment", "1");
fd->hints->cb_fr_alignment = 1;
/* hint to set a threshold percentage for a datatype's size/extent at
* which data sieving should be done in collective I/O */
ADIOI_Info_set(info, "romio_cb_ds_threshold", "0");
fd->hints->cb_ds_threshold = 0;
/* hint to switch between point-to-point or all-to-all for two-phase */
ADIOI_Info_set(info, "romio_cb_alltoall", "automatic");
fd->hints->cb_alltoall = ADIOI_HINT_AUTO;
/* deferred_open derived from no_indep_rw and cb_{read,write} */
fd->hints->deferred_open = 0;
/* buffer size for data sieving in independent reads */
ADIOI_Info_set(info, "ind_rd_buffer_size", ADIOI_BG_IND_RD_BUFFER_SIZE_DFLT);
fd->hints->ind_rd_buffer_size = atoi(ADIOI_BG_IND_RD_BUFFER_SIZE_DFLT);
/* buffer size for data sieving in independent writes */
ADIOI_Info_set(info, "ind_wr_buffer_size", ADIOI_BG_IND_WR_BUFFER_SIZE_DFLT);
fd->hints->ind_wr_buffer_size = atoi(ADIOI_BG_IND_WR_BUFFER_SIZE_DFLT);
if(fd->file_system == ADIO_UFS)
{
/* default for ufs/pvfs is to disable data sieving */
ADIOI_Info_set(info, "romio_ds_read", "disable");
fd->hints->ds_read = ADIOI_HINT_DISABLE;
ADIOI_Info_set(info, "romio_ds_write", "disable");
fd->hints->ds_write = ADIOI_HINT_DISABLE;
}
else
{
/* default is to let romio automatically decide when to use data
* sieving
*/
ADIOI_Info_set(info, "romio_ds_read", "automatic");
fd->hints->ds_read = ADIOI_HINT_AUTO;
ADIOI_Info_set(info, "romio_ds_write", "automatic");
fd->hints->ds_write = ADIOI_HINT_AUTO;
}
/* still to do: tune this a bit for a variety of file systems. there's
* no good default value so just leave it unset */
fd->hints->min_fdomain_size = 0;
fd->hints->striping_unit = 0;
fd->hints->initialized = 1;
}
/* add in user's info if supplied */
if (users_info != MPI_INFO_NULL) {
ADIOI_Info_get(users_info, "cb_buffer_size", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && ((intval=atoi(value)) > 0)) {
tmp_val = intval;
MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
/* --BEGIN ERROR HANDLING-- */
if (tmp_val != intval) {
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
"cb_buffer_size",
error_code);
return;
}
/* --END ERROR HANDLING-- */
ADIOI_Info_set(info, "cb_buffer_size", value);
fd->hints->cb_buffer_size = intval;
}
#if 0
/* bg is not implementing file realms (ADIOI_IOStridedColl) ... */
/* aligning file realms to certain sizes (e.g. stripe sizes)
* may benefit I/O performance */
ADIOI_Info_get(users_info, "romio_cb_fr_alignment", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && ((intval=atoi(value)) > 0)) {
tmp_val = intval;
MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
/* --BEGIN ERROR HANDLING-- */
if (tmp_val != intval) {
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
"romio_cb_fr_alignment",
error_code);
return;
}
/* --END ERROR HANDLING-- */
ADIOI_Info_set(info, "romio_cb_fr_alignment", value);
fd->hints->cb_fr_alignment = intval;
}
/* for collective I/O, try to be smarter about when to do data sieving
* using a specific threshold for the datatype size/extent
* (percentage 0-100%) */
ADIOI_Info_get(users_info, "romio_cb_ds_threshold", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && ((intval=atoi(value)) > 0)) {
tmp_val = intval;
MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
/* --BEGIN ERROR HANDLING-- */
if (tmp_val != intval) {
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
"romio_cb_ds_threshold",
error_code);
return;
}
/* --END ERROR HANDLING-- */
ADIOI_Info_set(info, "romio_cb_ds_threshold", value);
fd->hints->cb_ds_threshold = intval;
}
ADIOI_Info_get(users_info, "romio_cb_alltoall", MPI_MAX_INFO_VAL, value,
&flag);
if (flag) {
if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
ADIOI_Info_set(info, "romio_cb_alltoall", value);
fd->hints->cb_read = ADIOI_HINT_ENABLE;
}
else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
ADIOI_Info_set(info, "romio_cb_alltoall", value);
fd->hints->cb_read = ADIOI_HINT_DISABLE;
}
else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
{
ADIOI_Info_set(info, "romio_cb_alltoall", value);
fd->hints->cb_read = ADIOI_HINT_AUTO;
}
tmp_val = fd->hints->cb_alltoall;
MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
/* --BEGIN ERROR HANDLING-- */
if (tmp_val != fd->hints->cb_alltoall) {
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
"romio_cb_alltoall",
error_code);
return;
}
/* --END ERROR HANDLING-- */
}
#endif
/* new hints for enabling/disabling coll. buffering on
* reads/writes
*/
ADIOI_Info_get(users_info, "romio_cb_read", MPI_MAX_INFO_VAL, value,
&flag);
if (flag) {
if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
ADIOI_Info_set(info, "romio_cb_read", value);
fd->hints->cb_read = ADIOI_HINT_ENABLE;
}
else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
/* romio_cb_read overrides no_indep_rw */
ADIOI_Info_set(info, "romio_cb_read", value);
ADIOI_Info_set(info, "romio_no_indep_rw", "false");
fd->hints->cb_read = ADIOI_HINT_DISABLE;
fd->hints->no_indep_rw = ADIOI_HINT_DISABLE;
}
else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
{
ADIOI_Info_set(info, "romio_cb_read", value);
fd->hints->cb_read = ADIOI_HINT_AUTO;
}
tmp_val = fd->hints->cb_read;
MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
/* --BEGIN ERROR HANDLING-- */
if (tmp_val != fd->hints->cb_read) {
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
"romio_cb_read",
error_code);
return;
}
/* --END ERROR HANDLING-- */
}
ADIOI_Info_get(users_info, "romio_cb_write", MPI_MAX_INFO_VAL, value,
&flag);
if (flag) {
if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
ADIOI_Info_set(info, "romio_cb_write", value);
fd->hints->cb_write = ADIOI_HINT_ENABLE;
}
else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE"))
{
/* romio_cb_write overrides no_indep_rw, too */
ADIOI_Info_set(info, "romio_cb_write", value);
ADIOI_Info_set(info, "romio_no_indep_rw", "false");
fd->hints->cb_write = ADIOI_HINT_DISABLE;
fd->hints->no_indep_rw = ADIOI_HINT_DISABLE;
}
else if (!strcmp(value, "automatic") ||
!strcmp(value, "AUTOMATIC"))
{
ADIOI_Info_set(info, "romio_cb_write", value);
fd->hints->cb_write = ADIOI_HINT_AUTO;
}
tmp_val = fd->hints->cb_write;
MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
/* --BEGIN ERROR HANDLING-- */
if (tmp_val != fd->hints->cb_write) {
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
"romio_cb_write",
error_code);
return;
}
/* --END ERROR HANDLING-- */
}
#if 0
/* bg is not implementing file realms (ADIOI_IOStridedColl) ... */
/* enable/disable persistent file realms for collective I/O */
/* may want to check for no_indep_rdwr hint as well */
ADIOI_Info_get(users_info, "romio_cb_pfr", MPI_MAX_INFO_VAL, value,
&flag);
if (flag) {
if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
ADIOI_Info_set(info, "romio_cb_pfr", value);
fd->hints->cb_pfr = ADIOI_HINT_ENABLE;
}
else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
ADIOI_Info_set(info, "romio_cb_pfr", value);
fd->hints->cb_pfr = ADIOI_HINT_DISABLE;
}
else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
{
ADIOI_Info_set(info, "romio_cb_pfr", value);
fd->hints->cb_pfr = ADIOI_HINT_AUTO;
}
tmp_val = fd->hints->cb_pfr;
MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
/* --BEGIN ERROR HANDLING-- */
if (tmp_val != fd->hints->cb_pfr) {
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
"romio_cb_pfr",
error_code);
return;
}
/* --END ERROR HANDLING-- */
}
/* file realm assignment types ADIOI_FR_AAR(0),
ADIOI_FR_FSZ(-1), ADIOI_FR_USR_REALMS(-2), all others specify
a regular fr size in bytes. probably not the best way... */
ADIOI_Info_get(users_info, "romio_cb_fr_type", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && ((intval=atoi(value)) >= -2)) {
tmp_val = intval;
MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
/* --BEGIN ERROR HANDLING-- */
if (tmp_val != intval) {
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
"romio_cb_fr_type",
error_code);
return;
}
/* --END ERROR HANDLING-- */
ADIOI_Info_set(info, "romio_cb_fr_type", value);
fd->hints->cb_fr_type = intval;
}
#endif
/* new hint for specifying no indep. read/write will be performed */
ADIOI_Info_get(users_info, "romio_no_indep_rw", MPI_MAX_INFO_VAL, value,
&flag);
if (flag) {
if (!strcmp(value, "true") || !strcmp(value, "TRUE")) {
/* if 'no_indep_rw' set, also hint that we will do
* collective buffering: if we aren't doing independent io,
* then we have to do collective */
ADIOI_Info_set(info, "romio_no_indep_rw", value);
ADIOI_Info_set(info, "romio_cb_write", "enable");
ADIOI_Info_set(info, "romio_cb_read", "enable");
fd->hints->no_indep_rw = 1;
fd->hints->cb_read = 1;
fd->hints->cb_write = 1;
tmp_val = 1;
}
else if (!strcmp(value, "false") || !strcmp(value, "FALSE")) {
ADIOI_Info_set(info, "romio_no_indep_rw", value);
fd->hints->no_indep_rw = 0;
tmp_val = 0;
}
else {
/* default is above */
tmp_val = 0;
}
MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
/* --BEGIN ERROR HANDLING-- */
if (tmp_val != fd->hints->no_indep_rw) {
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
"romio_no_indep_rw",
error_code);
return;
}
/* --END ERROR HANDLING-- */
}
/* new hints for enabling/disabling data sieving on
* reads/writes
*/
ADIOI_Info_get(users_info, "romio_ds_read", MPI_MAX_INFO_VAL, value,
&flag);
if (flag) {
if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
ADIOI_Info_set(info, "romio_ds_read", value);
fd->hints->ds_read = ADIOI_HINT_ENABLE;
}
else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
ADIOI_Info_set(info, "romio_ds_read", value);
fd->hints->ds_read = ADIOI_HINT_DISABLE;
}
else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
{
ADIOI_Info_set(info, "romio_ds_read", value);
fd->hints->ds_read = ADIOI_HINT_AUTO;
}
/* otherwise ignore */
}
ADIOI_Info_get(users_info, "romio_ds_write", MPI_MAX_INFO_VAL, value,
&flag);
if (flag) {
if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
ADIOI_Info_set(info, "romio_ds_write", value);
fd->hints->ds_write = ADIOI_HINT_ENABLE;
}
else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
ADIOI_Info_set(info, "romio_ds_write", value);
fd->hints->ds_write = ADIOI_HINT_DISABLE;
}
else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
{
ADIOI_Info_set(info, "romio_ds_write", value);
fd->hints->ds_write = ADIOI_HINT_AUTO;
}
/* otherwise ignore */
}
ADIOI_Info_get(users_info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && ((intval = atoi(value)) > 0)) {
ADIOI_Info_set(info, "ind_wr_buffer_size", value);
fd->hints->ind_wr_buffer_size = intval;
}
ADIOI_Info_get(users_info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && ((intval = atoi(value)) > 0)) {
ADIOI_Info_set(info, "ind_rd_buffer_size", value);
fd->hints->ind_rd_buffer_size = intval;
}
memset( value, 0, MPI_MAX_INFO_VAL+1 );
ADIOI_Info_get(users_info, "romio_min_fdomain_size", MPI_MAX_INFO_VAL,
value, &flag);
if ( flag && ((intval = atoi(value)) > 0) ) {
ADIOI_Info_set(info, "romio_min_fdomain_size", value);
fd->hints->min_fdomain_size = intval;
}
/* Now we use striping unit in common code so we should
process hints for it. */
ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
value, &flag);
if ( flag && ((intval = atoi(value)) > 0) ) {
ADIOI_Info_set(info, "striping_unit", value);
fd->hints->striping_unit = intval;
}
memset( value, 0, MPI_MAX_INFO_VAL+1 );
ADIOI_Info_get(users_info, ADIOI_BG_NAGG_IN_PSET_HINT_NAME, MPI_MAX_INFO_VAL,
value, &flag);
if (flag && ((intval = atoi(value)) > 0)) {
did_anything = 1;
ADIOI_Info_set(info, ADIOI_BG_NAGG_IN_PSET_HINT_NAME, value);
fd->hints->cb_nodes = intval;
}
}
/* associate CB aggregators to certain CNs in every involved PSET */
if (did_anything) {
ADIOI_BG_gen_agg_ranklist(fd, fd->hints->cb_nodes);
}
/* ignore defered open hints and do not enable it for bluegene: need all
* processors in the open path so we can stat-and-broadcast the blocksize
*/
ADIOI_Info_set(info, "romio_no_indep_rw", "false");
fd->hints->no_indep_rw = 0;
fd->hints->deferred_open = 0;
/* BobC commented this out, but since hint processing runs on both bg and
* bglockless, we need to keep DS writes enabled on gpfs and disabled on
* PVFS */
if (ADIO_Feature(fd, ADIO_DATA_SIEVING_WRITES) == 0) {
/* disable data sieving for fs that do not
support file locking */
ADIOI_Info_get(info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL,
value, &flag);
if (flag) {
/* get rid of this value if it is set */
ADIOI_Info_delete(info, "ind_wr_buffer_size");
}
/* note: leave ind_wr_buffer_size alone; used for other cases
* as well. -- Rob Ross, 04/22/2003
*/
ADIOI_Info_set(info, "romio_ds_write", "disable");
fd->hints->ds_write = ADIOI_HINT_DISABLE;
}
ADIOI_Free(value);
*error_code = MPI_SUCCESS;
}

Просмотреть файл

@ -0,0 +1,307 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bg_open.c
* \brief ???
*/
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#include "ad_bg.h"
#include "ad_bg_aggrs.h"
#include <sys/statfs.h>
#include <sys/vfs.h>
/* COPIED FROM ad_fstype.c since it is static in that file
ADIO_FileSysType_parentdir - determines a string pathname for the
parent directory of a given filename.
Input Parameters:
. filename - pointer to file name character array
Output Parameters:
. dirnamep - pointer to location in which to store a pointer to a string
Note that the caller should free the memory located at the pointer returned
after the string is no longer needed.
*/
#ifndef PATH_MAX
#define PATH_MAX 65535
#endif
/* In a strict ANSI environment, S_ISLNK may not be defined. Fix that
here. We assume that S_ISLNK is *always* defined as a macro. If
that is not universally true, then add a test to the romio
configure that trys to link a program that references S_ISLNK */
#if !defined(S_ISLNK)
# if defined(S_IFLNK)
/* Check for the link bit */
# define S_ISLNK(mode) ((mode) & S_IFLNK)
# else
/* no way to check if it is a link, so say false */
# define S_ISLNK(mode) 0
# endif
#endif /* !(S_ISLNK) */
/* ADIO_FileSysType_parentdir
*
* Returns pointer to string in dirnamep; that string is allocated with
* strdup and must be free()'d.
*/
static void ADIO_FileSysType_parentdir(char *filename, char **dirnamep)
{
int err;
char *dir = NULL, *slash;
struct stat statbuf;
err = lstat(filename, &statbuf);
if (err || (!S_ISLNK(statbuf.st_mode))) {
/* no such file, or file is not a link; these are the "normal"
* cases where we can just return the parent directory.
*/
dir = ADIOI_Strdup(filename);
}
else {
/* filename is a symlink. we've presumably already tried
* to stat it and found it to be missing (dangling link),
* but this code doesn't care if the target is really there
* or not.
*/
int namelen;
char *linkbuf;
linkbuf = ADIOI_Malloc(PATH_MAX+1);
namelen = readlink(filename, linkbuf, PATH_MAX+1);
if (namelen == -1) {
/* something strange has happened between the time that
* we determined that this was a link and the time that
* we attempted to read it; punt and use the old name.
*/
dir = ADIOI_Strdup(filename);
}
else {
/* successfully read the link */
linkbuf[namelen] = '\0'; /* readlink doesn't null terminate */
dir = ADIOI_Strdup(linkbuf);
ADIOI_Free(linkbuf);
}
}
slash = strrchr(dir, '/');
if (!slash) ADIOI_Strncpy(dir, ".", 2);
else {
if (slash == dir) *(dir + 1) = '\0';
else *slash = '\0';
}
*dirnamep = dir;
return;
}
static void scaleable_stat(ADIO_File fd)
{
struct stat64 bg_stat;
struct statfs bg_statfs;
int rank, rc;
char * dir;
long buf[2];
MPI_Comm_rank(fd->comm, &rank);
if (rank == 0) {
/* Get the (real) underlying file system block size */
rc = stat64(fd->filename, &bg_stat);
if (rc >= 0)
{
buf[0] = bg_stat.st_blksize;
DBGV_FPRINTF(stderr,"Successful stat '%s'. Blocksize=%ld\n",
fd->filename,bg_stat.st_blksize);
}
else
{
DBGV_FPRINTF(stderr,"Stat '%s' failed with rc=%d, errno=%d\n",
fd->filename,rc,errno);
}
/* Get the (real) underlying file system type so we can
* plan our fsync scaling strategy */
rc = statfs(fd->filename,&bg_statfs);
if (rc >= 0)
{
DBGV_FPRINTF(stderr,"Successful statfs '%s'. Magic number=%#lX\n",
fd->filename,bg_statfs.f_type);
buf[1] = bg_statfs.f_type;
}
else
{
DBGV_FPRINTF(stderr,"Statfs '%s' failed with rc=%d, errno=%d\n",
fd->filename,rc,errno);
ADIO_FileSysType_parentdir(fd->filename, &dir);
rc = statfs(dir,&bg_statfs);
if (rc >= 0)
{
DBGV_FPRINTF(stderr,"Successful statfs '%s'. Magic number=%#lX\n",dir,bg_statfs.f_type);
buf[1] = bg_statfs.f_type;
}
else
{
/* Hmm. Guess we'll assume the worst-case, that it's not GPFS
* or BGLOCKLESSMPIO_F_TYPE (default PVFS2) below */
buf[1] = -1; /* bogus magic number */
DBGV_FPRINTF(stderr,"Statfs '%s' failed with rc=%d, errno=%d\n",dir,rc,errno);
}
free(dir);
}
}
/* now we can broadcast the stat/statfs data to everyone else */
MPI_Bcast(buf, 2, MPI_LONG, 0, fd->comm);
bg_stat.st_blksize = buf[0];
bg_statfs.f_type = buf[1];
/* data from stat64 */
/* store the blksize in the file system specific storage */
((ADIOI_BG_fs*)fd->fs_ptr)->blksize = bg_stat.st_blksize;
/* data from statfs */
if ((bg_statfs.f_type == GPFS_SUPER_MAGIC) ||
(bg_statfs.f_type == bglocklessmpio_f_type))
{
((ADIOI_BG_fs*)fd->fs_ptr)->fsync_aggr =
ADIOI_BG_FSYNC_AGGREGATION_ENABLED;
/* Only one rank is an "fsync aggregator" because only one
* fsync is needed */
if (rank == 0)
{
((ADIOI_BG_fs*)fd->fs_ptr)->fsync_aggr |=
ADIOI_BG_FSYNC_AGGREGATOR;
DBG_FPRINTF(stderr,"fsync aggregator %d\n",rank);
}
else
; /* aggregation enabled but this rank is not an aggregator*/
}
else
; /* Other filesystems default to no fsync aggregation */
}
void ADIOI_BG_Open(ADIO_File fd, int *error_code)
{
int perm, old_mask, amode;
static char myname[] = "ADIOI_BG_OPEN";
/* set internal variables for tuning environment variables */
ad_bg_get_env_vars();
if (fd->perm == ADIO_PERM_NULL) {
old_mask = umask(022);
umask(old_mask);
perm = old_mask ^ 0666;
}
else perm = fd->perm;
amode = 0;
if (fd->access_mode & ADIO_CREATE)
amode = amode | O_CREAT;
if (fd->access_mode & ADIO_RDONLY)
amode = amode | O_RDONLY;
if (fd->access_mode & ADIO_WRONLY)
amode = amode | O_WRONLY;
if (fd->access_mode & ADIO_RDWR)
amode = amode | O_RDWR;
if (fd->access_mode & ADIO_EXCL)
amode = amode | O_EXCL;
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event(ADIOI_MPE_open_a, 0, NULL);
#endif
fd->fd_sys = open(fd->filename, amode, perm);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event(ADIOI_MPE_open_b, 0, NULL);
#endif
DBG_FPRINTF(stderr,"open('%s',%#X,%#X) rc=%d, errno=%d\n",fd->filename,amode,perm,fd->fd_sys,errno);
fd->fd_direct = -1;
if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND))
fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
if(fd->fd_sys != -1)
{
/* Initialize the ad_bg file system specific information */
ADIOI_BG_assert(fd->fs_ptr == NULL);
fd->fs_ptr = (ADIOI_BG_fs*) ADIOI_Malloc(sizeof(ADIOI_BG_fs));
((ADIOI_BG_fs*)fd->fs_ptr)->blksize = 1048576; /* default to 1M */
/* default is no fsync aggregation */
((ADIOI_BG_fs*)fd->fs_ptr)->fsync_aggr =
ADIOI_BG_FSYNC_AGGREGATION_DISABLED;
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event(ADIOI_MPE_stat_a, 0, NULL);
#endif
scaleable_stat(fd);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event(ADIOI_MPE_stat_b, 0, NULL);
#endif
}
if (fd->fd_sys == -1) {
if (errno == ENAMETOOLONG)
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_BAD_FILE,
"**filenamelong",
"**filenamelong %s %d",
fd->filename,
strlen(fd->filename));
else if (errno == ENOENT)
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_NO_SUCH_FILE,
"**filenoexist",
"**filenoexist %s",
fd->filename);
else if (errno == ENOTDIR || errno == ELOOP)
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE,
myname, __LINE__,
MPI_ERR_BAD_FILE,
"**filenamedir",
"**filenamedir %s",
fd->filename);
else if (errno == EACCES) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_ACCESS,
"**fileaccess",
"**fileaccess %s",
fd->filename );
}
else if (errno == EROFS) {
/* Read only file or file system and write access requested */
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_READ_ONLY,
"**ioneedrd", 0 );
}
else {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
}
}
else *error_code = MPI_SUCCESS;
}
/*
*vim: ts=8 sts=4 sw=4 noexpandtab
*/

Просмотреть файл

@ -0,0 +1,279 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bg_pset.c
* \brief Definition of functions associated to structs ADIOI_BG_ProcInfo_t and ADIOI_BG_ConfInfo_t
*/
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
/* #define TRACE_ON */
#include <stdlib.h>
#include "ad_bg.h"
#include "ad_bg_pset.h"
#include "mpidimpl.h"
#include <firmware/include/personality.h>
ADIOI_BG_ProcInfo_t *
ADIOI_BG_ProcInfo_new()
{
ADIOI_BG_ProcInfo_t *p = (ADIOI_BG_ProcInfo_t *) ADIOI_Malloc (sizeof(ADIOI_BG_ProcInfo_t));
ADIOI_BG_assert ((p != NULL));
return p;
}
ADIOI_BG_ProcInfo_t *
ADIOI_BG_ProcInfo_new_n( int n )
{
ADIOI_BG_ProcInfo_t *p = (ADIOI_BG_ProcInfo_t *) ADIOI_Malloc (n * sizeof(ADIOI_BG_ProcInfo_t));
ADIOI_BG_assert ((p != NULL));
return p;
}
void
ADIOI_BG_ProcInfo_free( ADIOI_BG_ProcInfo_t *info )
{
if (info != NULL) ADIOI_Free (info);
}
ADIOI_BG_ConfInfo_t *
ADIOI_BG_ConfInfo_new ()
{
ADIOI_BG_ConfInfo_t *p = (ADIOI_BG_ConfInfo_t *) ADIOI_Malloc (sizeof(ADIOI_BG_ConfInfo_t));
ADIOI_BG_assert ((p != NULL));
return p;
}
void
ADIOI_BG_ConfInfo_free( ADIOI_BG_ConfInfo_t *info )
{
if (info != NULL) ADIOI_Free (info);
}
typedef struct
{
int rank;
int bridgeCoord;
} sortstruct;
static int intsort(const void *p1, const void *p2)
{
sortstruct *i1, *i2;
i1 = (sortstruct *)p1;
i2 = (sortstruct *)p2;
return(i1->bridgeCoord - i2->bridgeCoord);
}
void
ADIOI_BG_persInfo_init(ADIOI_BG_ConfInfo_t *conf,
ADIOI_BG_ProcInfo_t *proc,
int size, int rank, int n_aggrs, MPI_Comm comm)
{
int i, iambridge=0, bridgerank = -1, bridgeIndex;
int countPset;
sortstruct *bridges;
int commsize;
TRACE_ERR("Entering BG_persInfo_init, size: %d, rank: %d, n_aggrs: %d, comm: %d\n", size, rank, n_aggrs, (int)comm);
Personality_t pers;
MPIX_Hardware_t hw;
MPIX_Hardware(&hw);
TRACE_ERR("BG_persInfo_init, my coords{%u,%u,%u,%u,%u} rankInPset %u,sizeOfPset %u,idOfPset %u\n",hw.Coords[0],hw.Coords[1],hw.Coords[2],hw.Coords[3],hw.Coords[4],hw.rankInPset,hw.sizeOfPset,hw.idOfPset);
Kernel_GetPersonality(&pers, sizeof(pers));
proc->rank = rank;
proc->coreID = hw.coreID;
MPI_Comm_size(comm, &commsize);
if(size == 1)
{
proc->iamBridge = 1;
/* Set up the other parameters */
proc->myIOSize = size;
proc->ioNodeIndex = 0;
conf->ioMinSize = size;
conf->ioMaxSize = size;
conf->numBridgeRanks = 1;
conf->nProcs = size;
conf->cpuIDsize = hw.ppn;
/*conf->virtualPsetSize = conf->ioMaxSize * conf->cpuIDsize;*/
conf->nAggrs = 1;
conf->aggRatio = 1. * conf->nAggrs / conf->ioMaxSize /*virtualPsetSize*/;
if(conf->aggRatio > 1) conf->aggRatio = 1.;
TRACE_ERR("I am (single) Bridge rank\n");
return;
}
/* Find the nearest bridge node coords. We don't know the
rank in our comm so we will collective find/pick a bridge
rank later.
*/
int32_t bridgeCoords;
bridgeCoords = pers.Network_Config.cnBridge_A << 24 |
pers.Network_Config.cnBridge_B << 18 |
pers.Network_Config.cnBridge_C << 12 |
pers.Network_Config.cnBridge_D << 6 |
pers.Network_Config.cnBridge_E << 2;
ADIOI_BG_assert((bridgeCoords >= 0)); /* A dim is < 6 bits or sorting won't work */
if((hw.Coords[0] == pers.Network_Config.cnBridge_A) &&
(hw.Coords[1] == pers.Network_Config.cnBridge_B) &&
(hw.Coords[2] == pers.Network_Config.cnBridge_C) &&
(hw.Coords[3] == pers.Network_Config.cnBridge_D) &&
(hw.Coords[4] == pers.Network_Config.cnBridge_E))
iambridge = 1; /* I am bridge */
TRACE_ERR("Bridge coords(%8.8X): %d %d %d %d %d, %d. iambridge %d\n",bridgeCoords, pers.Network_Config.cnBridge_A,pers.Network_Config.cnBridge_B,pers.Network_Config.cnBridge_C,pers.Network_Config.cnBridge_D,pers.Network_Config.cnBridge_E,0, iambridge);
/* Allgather the ranks and bridgeCoords to determine the bridge
rank and how many ranks belong to each bridge rank*/
bridges = (sortstruct *) ADIOI_Malloc(sizeof(sortstruct) * size);
/* We're going to sort this structure by bridgeCoord:
typedef struct
{
int rank;
int bridgeCoord;
} sortstruct;
and I want the rank that IS the bridge to sort first, so
OR in '1' on non-bridge ranks that use a bridge coord.
*/
/* My input to the collective */
bridges[rank].rank = rank;
bridges[rank].bridgeCoord = bridgeCoords;
if(!iambridge)
bridges[rank].bridgeCoord |= 1; /* I am not bridge, turn on bit */
MPI_Allgather(MPI_IN_PLACE, 2, MPI_INT, bridges, 2, MPI_INT, comm);
qsort(bridges, size, sizeof(sortstruct), intsort);
/* Once the list is sorted walk through it to setup bridge
info and find bridge ranks, etc. */
int tempCoords, tempRank, mincompute, maxcompute;
tempCoords = bridges[0].bridgeCoord & ~1;
tempRank = bridges[0].rank;
countPset=1;
bridgeIndex = 0;
mincompute = size+1;
maxcompute = 1;
for(i=1; i<size; i++)
{
if((bridges[i].bridgeCoord & ~1) == tempCoords)
countPset++; /* same bridge (pset), count it */
else /* new bridge found */
{
#ifdef TRACE_ON
if(rank == 0)
TRACE_ERR("Bridge set %u, bridge rank %d (%#8.8X) has %d ranks\n",
bridgeIndex, tempRank, tempCoords, countPset);
#endif
if(countPset > maxcompute)
maxcompute = countPset;
if(countPset < mincompute)
mincompute = countPset;
/* Is this my bridge? */
if(tempCoords == bridgeCoords)
{
/* Am I the bridge rank? */
if(tempRank == rank)
iambridge = 1;
else
iambridge = 0; /* Another rank on my node may have taken over */
TRACE_ERR("Rank %u, bridge set %u, bridge rank %d (%#8.8X) has %d ranks, iambridge %u\n",
rank, bridgeIndex, tempRank, tempCoords, countPset,iambridge);
bridgerank = tempRank;
proc->myIOSize = countPset;
proc->ioNodeIndex = bridgeIndex;
}
tempCoords = bridges[i].bridgeCoord & ~1;
tempRank = bridges[i].rank;
bridgeIndex++;
countPset = 1;
}
}
/* Process last bridge */
#ifdef TRACE_ON
if(rank == 0)
TRACE_ERR("Bridge set %u, bridge rank %d (%#8.8X) has %d ranks\n",
bridgeIndex, tempRank, tempCoords, countPset);
#endif
if(countPset > maxcompute)
maxcompute = countPset;
if(countPset < mincompute)
mincompute = countPset;
/* Is this my bridge? */
if(tempCoords == bridgeCoords)
{
/* Am I the bridge rank? */
if(tempRank == rank)
iambridge = 1;
else
iambridge = 0; /* Another rank on my node may have taken over */
bridgerank = tempRank;
proc->myIOSize = countPset;
proc->ioNodeIndex = bridgeIndex;
}
if(rank == 0)
{
/* Only rank 0 has a conf structure, fill in stuff as appropriate */
conf->ioMinSize = mincompute;
conf->ioMaxSize = maxcompute; /* equivalent to pset size */
conf->numBridgeRanks = bridgeIndex+1;
conf->nProcs = size;
conf->cpuIDsize = hw.ppn;
/*conf->virtualPsetSize = maxcompute * conf->cpuIDsize;*/
conf->nAggrs = n_aggrs;
/* First pass gets nAggrs = -1 */
if(conf->nAggrs <=0 ||
MIN(conf->nProcs, conf->ioMaxSize /*virtualPsetSize*/) < conf->nAggrs)
conf->nAggrs = ADIOI_BG_NAGG_PSET_DFLT;
if(conf->nAggrs > conf->numBridgeRanks) /* maybe? * conf->cpuIDsize) */
conf->nAggrs = conf->numBridgeRanks; /* * conf->cpuIDsize; */
conf->aggRatio = 1. * conf->nAggrs / conf->ioMaxSize /*virtualPsetSize*/;
if(conf->aggRatio > 1) conf->aggRatio = 1.;
TRACE_ERR("Maximum ranks under a bridge rank: %d, minimum: %d, nAggrs: %d, vps: %d, numBridgeRanks: %d pset dflt: %d naggrs: %d ratio: %f\n", maxcompute, mincompute, conf->nAggrs, conf->ioMaxSize /*virtualPsetSize*/, conf->numBridgeRanks, ADIOI_BG_NAGG_PSET_DFLT, conf->nAggrs, conf->aggRatio);
}
ADIOI_BG_assert((bridgerank != -1));
proc->bridgeRank = bridgerank;
proc->iamBridge = iambridge;
TRACE_ERR("Rank %d has bridge set index %d (bridge rank: %d) with %d other ranks, ioNodeIndex: %d\n", rank, proc->ioNodeIndex, bridgerank, proc->myIOSize, proc->ioNodeIndex);
ADIOI_Free(bridges);
}
void
ADIOI_BG_persInfo_free( ADIOI_BG_ConfInfo_t *conf, ADIOI_BG_ProcInfo_t *proc )
{
ADIOI_BG_ConfInfo_free( conf );
ADIOI_BG_ProcInfo_free( proc );
}

Просмотреть файл

@ -0,0 +1,81 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bg_pset.h
* \brief ???
*/
/* File: ad_bg_pset.h
*
* Defines two structures that keep BG/L PSET specific information and their public interfaces:
* . ADIOI_BG_ProcInfo_t object keeps specific information to each process
* . ADIOI_BG_ConfInfo_t object keeps general information for the whole communicator, only kept
* on process 0.
*/
#ifndef AD_BG_PSET_H_
#define AD_BG_PSET_H_
/* Keeps specific information to each process, will be exchanged among processes */
typedef struct {
int ioNodeIndex; /* similar to psetNum on BGL/BGP */
int rank; /* my rank */
/* int myCoords[5]; */
int bridgeRank; /* my bridge node (or proxy) rank */
unsigned char coreID;
unsigned char threadID; /* unlikely to be useful but better than just padding */
unsigned char __cpad[2];
int myIOSize; /* number of ranks sharing my bridge/IO
node, i.e. psetsize*/
int iamBridge; /* am *I* the bridge rank? */
int __ipad[2];
} ADIOI_BG_ProcInfo_t __attribute__((aligned(16)));
/* Keeps general information for the whole communicator, only on process 0 */
typedef struct {
int ioMinSize; /* Smallest number of ranks shareing 1 bridge node */
int ioMaxSize; /* Largest number of ranks sharing 1 bridge node */
/* ioMaxSize will be the "psetsize" */
int nAggrs;
int numBridgeRanks;
/*int virtualPsetSize; ppn * pset size */
int nProcs;
int cpuIDsize; /* num ppn */
float aggRatio;
} ADIOI_BG_ConfInfo_t __attribute__((aligned(16)));
#undef MIN
#define MIN(a,b) ((a<b ? a : b))
/* Default is to choose 8 aggregator nodes in each 32 CN pset.
Also defines default ratio of aggregator nodes in each a pset.
For Virtual Node Mode, the ratio is 8/64 */
#define ADIOI_BG_NAGG_PSET_MIN 1
#define ADIOI_BG_NAGG_PSET_DFLT 8
#define ADIOI_BG_PSET_SIZE_DFLT 32
/* public funcs for ADIOI_BG_ProcInfo_t objects */
ADIOI_BG_ProcInfo_t * ADIOI_BG_ProcInfo_new();
ADIOI_BG_ProcInfo_t * ADIOI_BG_ProcInfo_new_n( int n );
void ADIOI_BG_ProcInfo_free( ADIOI_BG_ProcInfo_t *info );
/* public funcs for ADIOI_BG_ConfInfo_t objects */
ADIOI_BG_ConfInfo_t * ADIOI_BG_ConfInfo_new ();
void ADIOI_BG_ConfInfo_free( ADIOI_BG_ConfInfo_t *info );
/* public funcs for a pair of ADIOI_BG_ConfInfo_t and ADIOI_BG_ProcInfo_t objects */
void ADIOI_BG_persInfo_init( ADIOI_BG_ConfInfo_t *conf,
ADIOI_BG_ProcInfo_t *proc,
int s, int r, int n_aggrs, MPI_Comm comm);
void ADIOI_BG_persInfo_free( ADIOI_BG_ConfInfo_t *conf,
ADIOI_BG_ProcInfo_t *proc );
#endif /* AD_BG_PSET_H_ */

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,558 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bg_read.c
* \brief ???
*/
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#include "ad_bg.h"
#include "adio_extern.h"
#include "ad_bg_tuning.h"
#ifdef AGGREGATION_PROFILE
#include "mpe.h"
#endif
void ADIOI_BG_ReadContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int *error_code)
{
int err=-1, datatype_size;
ADIO_Offset len;
static char myname[] = "ADIOI_BG_READCONTIG";
#ifdef AGGREGATION_PROFILE
MPE_Log_event (5034, 0, NULL);
#endif
#if BG_PROFILE
/* timing */
double io_time, io_time2;
if (bgmpio_timing) {
io_time = MPI_Wtime();
bgmpio_prof_cr[ BGMPIO_CIO_DATA_SIZE ] += len;
}
#endif
MPI_Type_size(datatype, &datatype_size);
len = (ADIO_Offset)datatype_size * (ADIO_Offset)count;
ADIOI_Assert(len == (unsigned int) len); /* read takes an unsigned int parm */
#if BG_PROFILE
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
if (bgmpio_timing2) io_time2 = MPI_Wtime();
if (fd->fp_sys_posn != offset)
lseek(fd->fd_sys, offset, SEEK_SET);
if (bgmpio_timing2) bgmpio_prof_cr[ BGMPIO_CIO_T_SEEK ] += (MPI_Wtime() - io_time2);
if (fd->atomicity)
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len);
if (bgmpio_timing2) io_time2 = MPI_Wtime();
err = read(fd->fd_sys, buf, (unsigned int)len);
if (bgmpio_timing2) bgmpio_prof_cr[ BGMPIO_CIO_T_POSI_RW ] += (MPI_Wtime() - io_time2);
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_sys_posn = offset + err;
/* individual file pointer not updated */
}
else { /* read from curr. location of ind. file pointer */
offset = fd->fp_ind;
if (bgmpio_timing2) io_time2 = MPI_Wtime();
if (fd->fp_sys_posn != fd->fp_ind)
lseek(fd->fd_sys, fd->fp_ind, SEEK_SET);
if (bgmpio_timing2) bgmpio_prof_cr[ BGMPIO_CIO_T_SEEK ] += (MPI_Wtime() - io_time2);
if (fd->atomicity)
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len);
if (bgmpio_timing2) io_time2 = MPI_Wtime();
err = read(fd->fd_sys, buf, (unsigned int)len);
if (bgmpio_timing2) bgmpio_prof_cr[ BGMPIO_CIO_T_POSI_RW ] += (MPI_Wtime() - io_time2);
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_ind += err;
fd->fp_sys_posn = fd->fp_ind;
}
#else /* BG_PROFILE */
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
if (fd->fp_sys_posn != offset)
lseek(fd->fd_sys, offset, SEEK_SET);
if (fd->atomicity)
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len);
err = read(fd->fd_sys, buf, (unsigned int)len);
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_sys_posn = offset + err;
/* individual file pointer not updated */
}
else { /* read from curr. location of ind. file pointer */
offset = fd->fp_ind;
if (fd->fp_sys_posn != fd->fp_ind)
lseek(fd->fd_sys, fd->fp_ind, SEEK_SET);
if (fd->atomicity)
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len);
err = read(fd->fd_sys, buf, (unsigned int)len);
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_ind += err;
fd->fp_sys_posn = fd->fp_ind;
}
#endif /* BG_PROFILE */
#if BG_PROFILE
if (bgmpio_timing) bgmpio_prof_cr[ BGMPIO_CIO_T_MPIO_RW ] += (MPI_Wtime() - io_time);
#endif
/* --BEGIN ERROR HANDLING-- */
if (err == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io", "**io %s", strerror(errno));
return;
}
/* --END ERROR HANDLING-- */
#ifdef HAVE_STATUS_SET_BYTES
MPIR_Status_set_bytes(status, datatype, err);
#endif
*error_code = MPI_SUCCESS;
#ifdef AGGREGATION_PROFILE
MPE_Log_event (5035, 0, NULL);
#endif
}
#define ADIOI_BUFFERED_READ \
{ \
if (req_off >= readbuf_off + readbuf_len) { \
readbuf_off = req_off; \
readbuf_len = (unsigned) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));\
lseek(fd->fd_sys, readbuf_off, SEEK_SET);\
if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, readbuf_off, SEEK_SET, readbuf_len);\
err = read(fd->fd_sys, readbuf, readbuf_len);\
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, readbuf_off, SEEK_SET, readbuf_len);\
if (err == -1) err_flag = 1; \
} \
while (req_len > readbuf_off + readbuf_len - req_off) { \
ADIOI_Assert((readbuf_off + readbuf_len - req_off) == (int) (readbuf_off + readbuf_len - req_off));\
partial_read = (int) (readbuf_off + readbuf_len - req_off); \
tmp_buf = (char *) ADIOI_Malloc(partial_read); \
memcpy(tmp_buf, readbuf+readbuf_len-partial_read, partial_read); \
ADIOI_Free(readbuf); \
readbuf = (char *) ADIOI_Malloc(partial_read + max_bufsize); \
memcpy(readbuf, tmp_buf, partial_read); \
ADIOI_Free(tmp_buf); \
readbuf_off += readbuf_len-partial_read; \
readbuf_len = (unsigned) (partial_read + ADIOI_MIN(max_bufsize, \
end_offset-readbuf_off+1)); \
lseek(fd->fd_sys, readbuf_off+partial_read, SEEK_SET);\
if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, readbuf_off+partial_read, SEEK_SET, readbuf_len-partial_read);\
err = read(fd->fd_sys, readbuf+partial_read, readbuf_len-partial_read);\
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, readbuf_off+partial_read, SEEK_SET, readbuf_len-partial_read);\
if (err == -1) err_flag = 1; \
} \
ADIOI_Assert(req_len == (size_t)req_len); \
memcpy((char *)buf + userbuf_off, readbuf+req_off-readbuf_off, req_len); \
}
void ADIOI_BG_ReadStrided(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
*error_code)
{
/* offset is in units of etype relative to the filetype. */
ADIOI_Flatlist_node *flat_buf, *flat_file;
ADIO_Offset i_offset, new_brd_size, brd_size, size;
int i, j, k, err=-1, st_index=0;
ADIO_Offset frd_size=0, new_frd_size, st_frd_size;
unsigned num, bufsize;
int n_etypes_in_filetype;
ADIO_Offset n_filetypes, etype_in_filetype, st_n_filetypes, size_in_filetype;
ADIO_Offset abs_off_in_filetype=0;
int filetype_size, etype_size, buftype_size, partial_read;
MPI_Aint filetype_extent, buftype_extent;
int buf_count, buftype_is_contig, filetype_is_contig;
ADIO_Offset userbuf_off, req_len, sum;
ADIO_Offset off, req_off, disp, end_offset=0, readbuf_off, start_off;
char *readbuf, *tmp_buf, *value;
int err_flag=0, info_flag;
unsigned max_bufsize, readbuf_len;
static char myname[] = "ADIOI_BG_READSTRIDED";
if (fd->hints->ds_read == ADIOI_HINT_DISABLE) {
/* if user has disabled data sieving on reads, use naive
* approach instead.
*/
/*FPRINTF(stderr, "ADIOI_GEN_ReadStrided_naive(%d):\n", __LINE__);*/
ADIOI_GEN_ReadStrided_naive(fd,
buf,
count,
datatype,
file_ptr_type,
offset,
status,
error_code);
return;
}
/*FPRINTF(stderr, "%s(%d):\n",myname, __LINE__);*/
ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
MPI_Type_size(fd->filetype, &filetype_size);
if ( ! filetype_size ) {
#ifdef HAVE_STATUS_SET_BYTES
MPIR_Status_set_bytes(status, datatype, 0);
#endif
*error_code = MPI_SUCCESS;
return;
}
MPI_Type_extent(fd->filetype, &filetype_extent);
MPI_Type_size(datatype, &buftype_size);
MPI_Type_extent(datatype, &buftype_extent);
etype_size = fd->etype_size;
ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count));
bufsize = buftype_size * count;
/* get max_bufsize from the info object. */
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
ADIOI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value,
&info_flag);
max_bufsize = atoi(value);
ADIOI_Free(value);
if (!buftype_is_contig && filetype_is_contig) {
/* noncontiguous in memory, contiguous in file. */
ADIOI_Flatten_datatype(datatype);
flat_buf = ADIOI_Flatlist;
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
fd->disp + (ADIO_Offset)etype_size * offset;
start_off = off;
end_offset = off + bufsize - 1;
readbuf_off = off;
readbuf = (char *) ADIOI_Malloc(max_bufsize);
readbuf_len = (unsigned) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));
/* if atomicity is true, lock (exclusive) the region to be accessed */
if (fd->atomicity)
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
lseek(fd->fd_sys, readbuf_off, SEEK_SET);
if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, readbuf_off, SEEK_SET, readbuf_len);
err = read(fd->fd_sys, readbuf, readbuf_len);
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, readbuf_off, SEEK_SET, readbuf_len);
if (err == -1) err_flag = 1;
for (j=0; j<count; j++)
{
int i;
for (i=0; i<flat_buf->count; i++) {
userbuf_off = (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i];
req_off = off;
req_len = flat_buf->blocklens[i];
ADIOI_BUFFERED_READ
off += flat_buf->blocklens[i];
}
}
if (fd->atomicity)
ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
ADIOI_Free(readbuf); /* malloced in the buffered_read macro */
if (err_flag) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
}
else *error_code = MPI_SUCCESS;
}
else { /* noncontiguous in file */
/* filetype already flattened in ADIO_Open */
flat_file = ADIOI_Flatlist;
while (flat_file->type != fd->filetype) flat_file = flat_file->next;
disp = fd->disp;
if (file_ptr_type == ADIO_INDIVIDUAL) {
/* Wei-keng reworked type processing to be a bit more efficient */
offset = fd->fp_ind - disp;
n_filetypes = (offset - flat_file->indices[0]) / filetype_extent;
offset -= (ADIO_Offset)n_filetypes * filetype_extent;
/* now offset is local to this extent */
/* find the block where offset is located, skip blocklens[i]==0 */
for (i=0; i<flat_file->count; i++) {
ADIO_Offset dist;
if (flat_file->blocklens[i] == 0) continue;
dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
/* frd_size is from offset to the end of block i */
if (dist == 0) {
i++;
offset = flat_file->indices[i];
frd_size = flat_file->blocklens[i];
break;
}
if (dist > 0) {
frd_size = dist;
break;
}
}
st_index = i; /* starting index in flat_file->indices[] */
offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
}
else {
n_etypes_in_filetype = filetype_size/etype_size;
n_filetypes = offset / n_etypes_in_filetype;
etype_in_filetype = offset % n_etypes_in_filetype;
size_in_filetype = etype_in_filetype * etype_size;
sum = 0;
for (i=0; i<flat_file->count; i++) {
sum += flat_file->blocklens[i];
if (sum > size_in_filetype) {
st_index = i;
frd_size = sum - size_in_filetype;
abs_off_in_filetype = flat_file->indices[i] +
size_in_filetype - (sum - flat_file->blocklens[i]);
break;
}
}
/* abs. offset in bytes in the file */
offset = disp + (ADIO_Offset) n_filetypes*filetype_extent +
abs_off_in_filetype;
}
start_off = offset;
/* Wei-keng Liao: read request is within a single flat_file contig
* block e.g. with subarray types that actually describe the whole
* array */
if (buftype_is_contig && bufsize <= frd_size) {
ADIO_ReadContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
offset, status, error_code);
if (file_ptr_type == ADIO_INDIVIDUAL) {
/* update MPI-IO file pointer to point to the first byte that
* can be accessed in the fileview. */
fd->fp_ind = offset + bufsize;
if (bufsize == frd_size) {
do {
st_index++;
if (st_index == flat_file->count) {
st_index = 0;
n_filetypes++;
}
} while (flat_file->blocklens[st_index] == 0);
fd->fp_ind = disp + flat_file->indices[st_index]
+ n_filetypes*filetype_extent;
}
}
fd->fp_sys_posn = -1; /* set it to null. */
#ifdef HAVE_STATUS_SET_BYTES
MPIR_Status_set_bytes(status, datatype, bufsize);
#endif
return;
}
/* Calculate end_offset, the last byte-offset that will be accessed.
e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/
st_frd_size = frd_size;
st_n_filetypes = n_filetypes;
i_offset = 0;
j = st_index;
off = offset;
frd_size = ADIOI_MIN(st_frd_size, bufsize);
while (i_offset < bufsize) {
i_offset += frd_size;
end_offset = off + frd_size - 1;
j = (j+1) % flat_file->count;
n_filetypes += (j == 0) ? 1 : 0;
while (flat_file->blocklens[j]==0) {
j = (j+1) % flat_file->count;
n_filetypes += (j == 0) ? 1 : 0;
}
off = disp + flat_file->indices[j] + n_filetypes*(ADIO_Offset)filetype_extent;
frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
}
/* if atomicity is true, lock (exclusive) the region to be accessed */
if (fd->atomicity)
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
/* initial read into readbuf */
readbuf_off = offset;
readbuf = (char *) ADIOI_Malloc(max_bufsize);
readbuf_len = (unsigned) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));
lseek(fd->fd_sys, offset, SEEK_SET);
if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, offset, SEEK_SET, readbuf_len);
err = read(fd->fd_sys, readbuf, readbuf_len);
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, offset, SEEK_SET, readbuf_len);
if (err == -1) err_flag = 1;
if (buftype_is_contig && !filetype_is_contig) {
/* contiguous in memory, noncontiguous in file. should be the most
common case. */
i_offset = 0;
j = st_index;
off = offset;
n_filetypes = st_n_filetypes;
frd_size = ADIOI_MIN(st_frd_size, bufsize);
while (i_offset < bufsize) {
if (frd_size) {
/* TYPE_UB and TYPE_LB can result in
frd_size = 0. save system call in such cases */
/* lseek(fd->fd_sys, off, SEEK_SET);
err = read(fd->fd_sys, ((char *) buf) + i, frd_size);*/
req_off = off;
req_len = frd_size;
userbuf_off = i_offset;
ADIOI_BUFFERED_READ
}
i_offset += frd_size;
if (off + frd_size < disp + flat_file->indices[j] +
flat_file->blocklens[j] + n_filetypes*(ADIO_Offset)filetype_extent)
off += frd_size;
/* did not reach end of contiguous block in filetype.
no more I/O needed. off is incremented by frd_size. */
else {
j = (j+1) % flat_file->count;
n_filetypes += (j == 0) ? 1 : 0;
while (flat_file->blocklens[j]==0) {
j = (j+1) % flat_file->count;
n_filetypes += (j == 0) ? 1 : 0;
}
off = disp + flat_file->indices[j] +
n_filetypes*(ADIO_Offset)filetype_extent;
frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
}
}
}
else {
/* noncontiguous in memory as well as in file */
ADIOI_Flatten_datatype(datatype);
flat_buf = ADIOI_Flatlist;
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
k = num = buf_count = 0;
i_offset = flat_buf->indices[0];
j = st_index;
off = offset;
n_filetypes = st_n_filetypes;
frd_size = st_frd_size;
brd_size = flat_buf->blocklens[0];
while (num < bufsize) {
size = ADIOI_MIN(frd_size, brd_size);
if (size) {
/* lseek(fd->fd_sys, off, SEEK_SET);
err = read(fd->fd_sys, ((char *) buf) + i, size); */
req_off = off;
req_len = size;
userbuf_off = i_offset;
ADIOI_BUFFERED_READ
}
new_frd_size = frd_size;
new_brd_size = brd_size;
if (size == frd_size) {
/* reached end of contiguous block in file */
j = (j+1) % flat_file->count;
n_filetypes += (j == 0) ? 1 : 0;
while (flat_file->blocklens[j]==0) {
j = (j+1) % flat_file->count;
n_filetypes += (j == 0) ? 1 : 0;
}
off = disp + flat_file->indices[j] +
n_filetypes*(ADIO_Offset)filetype_extent;
new_frd_size = flat_file->blocklens[j];
if (size != brd_size) {
i_offset += size;
new_brd_size -= size;
}
}
if (size == brd_size) {
/* reached end of contiguous block in memory */
k = (k + 1)%flat_buf->count;
buf_count++;
i_offset = ((ADIO_Offset)buftype_extent*(ADIO_Offset)(buf_count/flat_buf->count) +
flat_buf->indices[k]);
new_brd_size = flat_buf->blocklens[k];
if (size != frd_size) {
off += size;
new_frd_size -= size;
}
}
ADIOI_Assert(((ADIO_Offset)num + size) == (unsigned)(num + size));
num += size;
frd_size = new_frd_size;
brd_size = new_brd_size;
}
}
if (fd->atomicity)
ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
ADIOI_Free(readbuf); /* malloced in the buffered_read macro */
if (err_flag) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
}
else *error_code = MPI_SUCCESS;
}
fd->fp_sys_posn = -1; /* set it to null. */
#ifdef HAVE_STATUS_SET_BYTES
MPIR_Status_set_bytes(status, datatype, bufsize);
/* This is a temporary way of filling in status. The right way is to
keep track of how much data was actually read and placed in buf
by ADIOI_BUFFERED_READ. */
#endif
if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
}

Просмотреть файл

@ -0,0 +1,68 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bg_setsh.c
* \brief ???
*/
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#include "ad_bg.h"
/* set the shared file pointer to "offset" etypes relative to the current
view */
/*
This looks very similar to ADIOI_GEN_Set_shared_fp, except this
function avoids locking the file twice. The generic version does
Write lock
ADIO_WriteContig
Unlock
For BG, ADIOI_BG_WriteContig does a lock before writing to disable
caching. To avoid the lock being called twice, this version for BG does
Write lock
Lseek
Write
Unlock
*/
void ADIOI_BG_Set_shared_fp(ADIO_File fd, ADIO_Offset offset, int *error_code)
{
int err;
MPI_Comm dupcommself;
static char myname[] = "ADIOI_BG_SET_SHARED_FP";
if (fd->shared_fp_fd == ADIO_FILE_NULL) {
MPI_Comm_dup(MPI_COMM_SELF, &dupcommself);
fd->shared_fp_fd = ADIO_Open(MPI_COMM_SELF, dupcommself,
fd->shared_fp_fname,
fd->file_system, fd->fns,
ADIO_CREATE | ADIO_RDWR | ADIO_DELETE_ON_CLOSE,
0, MPI_BYTE, MPI_BYTE, MPI_INFO_NULL,
ADIO_PERM_NULL, error_code);
}
if (*error_code != MPI_SUCCESS) return;
ADIOI_WRITE_LOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset));
lseek(fd->shared_fp_fd->fd_sys, 0, SEEK_SET);
err = write(fd->shared_fp_fd->fd_sys, &offset, sizeof(ADIO_Offset));
ADIOI_UNLOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset));
if (err == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io",
"**io %s", strerror(errno));
}
else *error_code = MPI_SUCCESS;
}

Просмотреть файл

@ -0,0 +1,164 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bg_tuning.c
* \brief Defines ad_bg performance tuning
*/
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 2008 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
/*---------------------------------------------------------------------
* ad_bg_tuning.c
*
* defines global variables and functions for performance tuning and
* functional debugging.
*---------------------------------------------------------------------*/
#include "ad_bg_tuning.h"
#include "mpi.h"
#if !defined(PVFS2_SUPER_MAGIC)
#define PVFS2_SUPER_MAGIC (0x20030528)
#endif
int bgmpio_timing;
int bgmpio_timing2;
int bgmpio_comm;
int bgmpio_tunegather;
int bgmpio_tuneblocking;
long bglocklessmpio_f_type;
double bgmpio_prof_cw [BGMPIO_CIO_LAST];
double bgmpio_prof_cr [BGMPIO_CIO_LAST];
/* set internal variables for tuning environment variables */
/** \page mpiio_vars MPIIO Configuration
\section env_sec Environment Variables
* - BGMPIO_COMM - Define how data is exchanged on collective
* reads and writes. Possible values:
* - 0 - Use MPI_Alltoallv.
* - 1 - Use MPI_Isend/MPI_Irecv.
* - Default is 0.
*
* - BGMPIO_TIMING - collect timing breakdown for MPI I/O collective calls.
* Must also compile the library with BG_PROFILE defined. Possible values:
* - 0 - Do not collect/report timing.
* - 1 - Collect/report timing.
* - Default is 0.
*
* - BGMPIO_TIMING2 - collect additional averages for MPI I/O collective calls.
* Must also compile the library with BG_PROFILE defined. Possible values:
* - 0 - Do not collect/report averages.
* - 1 - Collect/report averages.
* - Default is 0.
*
* - BGMPIO_TUNEGATHER - Tune how starting and ending offsets are communicated
* for aggregator collective i/o. Possible values:
* - 0 - Use two MPI_Allgather's to collect starting and ending offsets.
* - 1 - Use MPI_Allreduce(MPI_MAX) to collect starting and ending offsets.
* - Default is 1.
*
* - BGMPIO_TUNEBLOCKING - Tune how aggregate file domains are
* calculated (block size). Possible values:
* - 0 - Evenly calculate file domains across aggregators. Also use
* MPI_Isend/MPI_Irecv to exchange domain information.
* - 1 - Align file domains with the underlying file system's block size. Also use
* MPI_Alltoallv to exchange domain information.
* - Default is 1.
*
* - BGLOCKLESSMPIO_F_TYPE - Specify a filesystem type that should run
* the ad_bglockless driver. NOTE: Using romio prefixes (such as
* "bg:" or "bglockless:") on a file name will override this environment
* variable. Possible values:
* - 0xnnnnnnnn - Any valid file system type (or "magic number") from
* statfs() field f_type.
* - The default is 0x20030528 (PVFS2_SUPER_MAGIC)
*
*/
void ad_bg_get_env_vars() {
char *x, *dummy;
bgmpio_comm = 0;
x = getenv( "BGMPIO_COMM" );
if (x) bgmpio_comm = atoi(x);
bgmpio_timing = 0;
x = getenv( "BGMPIO_TIMING" );
if (x) bgmpio_timing = atoi(x);
bgmpio_timing2 = 0;
x = getenv( "BGMPIO_TIMING2" );
if (x) bgmpio_timing2 = atoi(x);
bgmpio_tunegather = 1;
x = getenv( "BGMPIO_TUNEGATHER" );
if (x) bgmpio_tunegather = atoi(x);
bgmpio_tuneblocking = 1;
x = getenv( "BGMPIO_TUNEBLOCKING" );
if (x) bgmpio_tuneblocking = atoi(x);
bglocklessmpio_f_type = PVFS2_SUPER_MAGIC;
x = getenv( "BGLOCKLESSMPIO_F_TYPE" );
if (x) bglocklessmpio_f_type = strtol(x,&dummy,0);
DBG_FPRINTF(stderr,"BGLOCKLESSMPIO_F_TYPE=%ld/%#lX\n",
bglocklessmpio_f_type,bglocklessmpio_f_type);
}
/* report timing breakdown for MPI I/O collective call */
void ad_bg_wr_timing_report( int rw, ADIO_File fd, int myrank, int nprocs )
{
int i;
if (bgmpio_timing) {
double *bgmpio_prof_org = bgmpio_prof_cr;
if (rw) bgmpio_prof_org = bgmpio_prof_cw;
double bgmpio_prof_avg[ BGMPIO_CIO_LAST ];
double bgmpio_prof_max[ BGMPIO_CIO_LAST ];
MPI_Reduce( bgmpio_prof_org, bgmpio_prof_avg, BGMPIO_CIO_LAST, MPI_DOUBLE, MPI_SUM, 0, fd->comm );
MPI_Reduce( bgmpio_prof_org, bgmpio_prof_max, BGMPIO_CIO_LAST, MPI_DOUBLE, MPI_MAX, 0, fd->comm );
if (myrank == 0) {
for (i=0; i<BGMPIO_CIO_LAST; i++) bgmpio_prof_avg[i] /= nprocs;
if (bgmpio_timing2) {
bgmpio_prof_avg[ BGMPIO_CIO_B_POSI_RW ] = bgmpio_prof_avg[ BGMPIO_CIO_DATA_SIZE ] * nprocs /
bgmpio_prof_max[ BGMPIO_CIO_T_POSI_RW ];
bgmpio_prof_avg[ BGMPIO_CIO_B_MPIO_RW ] = bgmpio_prof_avg[ BGMPIO_CIO_DATA_SIZE ] * nprocs /
bgmpio_prof_max[ BGMPIO_CIO_T_MPIO_RW ];
} else {
bgmpio_prof_avg[ BGMPIO_CIO_B_POSI_RW ] = 0;
bgmpio_prof_avg[ BGMPIO_CIO_B_MPIO_RW ] = 0;
}
bgmpio_prof_avg[ BGMPIO_CIO_B_MPIO_CRW ] = bgmpio_prof_avg[ BGMPIO_CIO_DATA_SIZE ] * nprocs /
bgmpio_prof_max[ BGMPIO_CIO_T_MPIO_CRW ];
printf("\tTIMING-1 %1s , ", (rw ? "W" : "R") );
printf( "SZ: %12.4f , ", bgmpio_prof_avg[ BGMPIO_CIO_DATA_SIZE ] * nprocs );
printf( "SK-a: %10.3f , ", bgmpio_prof_avg[ BGMPIO_CIO_T_SEEK ] );
printf( "SK-m: %10.3f , ", bgmpio_prof_max[ BGMPIO_CIO_T_SEEK ] );
printf( "LC-a: %10.3f , ", bgmpio_prof_avg[ BGMPIO_CIO_T_LCOMP ] );
printf( "GA-m: %10.3f , ", bgmpio_prof_max[ BGMPIO_CIO_T_GATHER ] );
printf( "AN-a: %10.3f , ", bgmpio_prof_avg[ BGMPIO_CIO_T_PATANA ] );
printf( "FD-a: %10.3f , ", bgmpio_prof_avg[ BGMPIO_CIO_T_FD_PART ] );
printf( "MY-a: %10.3f , ", bgmpio_prof_avg[ BGMPIO_CIO_T_MYREQ ] );
printf( "OT-m: %10.3f , ", bgmpio_prof_max[ BGMPIO_CIO_T_OTHREQ ] );
printf( "EX-m: %10.3f , ", bgmpio_prof_max[ BGMPIO_CIO_T_DEXCH ] );
printf("\tTIMING-2 %1s , ", (rw ? "W" : "R") );
printf( "PXT-m: %10.3f , ", bgmpio_prof_avg[ BGMPIO_CIO_T_POSI_RW ] );
printf( "MPT-m: %10.3f , ", bgmpio_prof_avg[ BGMPIO_CIO_T_MPIO_RW ] );
printf("MPTC-m: %10.3f , ", bgmpio_prof_avg[ BGMPIO_CIO_T_MPIO_CRW ] );
printf( "PXB: %10.3f , ", bgmpio_prof_avg[ BGMPIO_CIO_B_POSI_RW ] );
printf( "MPB: %10.3f , ", bgmpio_prof_avg[ BGMPIO_CIO_B_MPIO_RW ] );
printf( "MPBC: %10.3f , ", bgmpio_prof_avg[ BGMPIO_CIO_B_MPIO_CRW ] );
}
}
}

Просмотреть файл

@ -0,0 +1,96 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bg_tuning.h
* \brief ???
*/
/*---------------------------------------------------------------------
* ad_bg_tuning.h
*
* declares global variables and macros for performance tuning and
* functional debugging.
*---------------------------------------------------------------------*/
#ifndef AD_BG_TUNING_H_
#define AD_BG_TUNING_H_
#include "adio.h"
#define ADIOI_BG_assert( a ) if (!(a)) { \
fprintf( stderr, "AD_BG_assert, file=%s, line=%d\n", __FILE__, __LINE__ ); \
MPI_Abort( MPI_COMM_WORLD, 1 ); \
}
/*-----------------------------------------
* Global variables for the control of
* 1. timing
* 2. select specific optimizations
*-----------------------------------------*/
/* timing fields */
enum {
BGMPIO_CIO_DATA_SIZE=0,
BGMPIO_CIO_T_SEEK,
BGMPIO_CIO_T_LCOMP, /* time for ADIOI_Calc_my_off_len(), local */
BGMPIO_CIO_T_GATHER, /* time for previous MPI_Allgather, now Allreduce */
BGMPIO_CIO_T_PATANA, /* time for a quick test if access is contiguous or not, local */
BGMPIO_CIO_T_FD_PART, /* time for file domain partitioning, local */
BGMPIO_CIO_T_MYREQ, /* time for ADIOI_BG_Calc_my_req(), local */
BGMPIO_CIO_T_OTHREQ, /* time for ADIOI_Calc_others_req(), short Alltoall */
BGMPIO_CIO_T_DEXCH, /* time for I/O data exchange */
BGMPIO_CIO_T_POSI_RW,
BGMPIO_CIO_B_POSI_RW,
BGMPIO_CIO_T_MPIO_RW, /* time for ADIOI_BG_WriteContig() */
BGMPIO_CIO_B_MPIO_RW,
BGMPIO_CIO_T_MPIO_CRW, /* time for ADIOI_BG_WriteStridedColl() */
BGMPIO_CIO_B_MPIO_CRW,
BGMPIO_CIO_LAST
};
extern double bgmpio_prof_cw [BGMPIO_CIO_LAST];
extern double bgmpio_prof_cr [BGMPIO_CIO_LAST];
/* corresponds to environment variables to select optimizations and timing level */
extern int bgmpio_timing;
extern int bgmpio_timing2;
extern int bgmpio_comm;
extern int bgmpio_tunegather;
extern int bgmpio_tuneblocking;
extern long bglocklessmpio_f_type;
/* set internal variables for tuning environment variables */
void ad_bg_get_env_vars();
/* report timing breakdown for MPI I/O collective call */
void ad_bg_timing_crw_report( int rw, ADIO_File fd, int myrank, int nprocs );
/* note:
* T := timing;
* CIO := collective I/O
*/
#define BGMPIO_T_CIO_RESET( LEVEL, RW ) \
if (bgmpio_timing_cw_level >= LEVEL) { \
int i; \
for ( i = 0; i < BGMPIO_T_LAST; i ++ ) \
bgmpio_prof_c##RW [ i ] = 0; \
}
#define BGMPIO_T_CIO_REPORT( LEVEL, RW, FD, MYRANK, NPROCS ) \
if (bgmpio_timing_cw_level >= LEVEL) { \
ad_bg_timing_crw_report ( RW, FD, MYRANK, NPROCS ); \
}
#define BGMPIO_T_CIO_SET_GET( LEVEL, RW, DOBAR, ISSET, ISGET, VAR1, VAR2 ) \
if (bgmpio_timing_cw_level >= LEVEL) { \
if ( DOBAR ) MPI_Barrier( fd->comm ); \
double temp = MPI_Wtime(); \
if ( ISSET ) bgmpio_prof_c##RW [ VAR1 ] = temp; \
if ( ISGET ) bgmpio_prof_c##RW [ VAR2 ] = temp - bgmpio_prof_c##RW [ VAR2 ] ; \
}
#endif /* AD_BG_TUNING_H_ */

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,611 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bg_write.c
* \brief ???
*/
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#include "ad_bg.h"
#include "adio_extern.h"
#include "ad_bg_tuning.h"
#ifdef AGGREGATION_PROFILE
#include "mpe.h"
#endif
void ADIOI_BG_WriteContig(ADIO_File fd, const void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int *error_code)
{
int err=-1, datatype_size;
ADIO_Offset len;
static char myname[] = "ADIOI_BG_WRITECONTIG";
#ifdef AGGREGATION_PROFILE
MPE_Log_event (5036, 0, NULL);
#endif
#if BG_PROFILE
/* timing */
double io_time, io_time2;
if (bgmpio_timing) {
io_time = MPI_Wtime();
bgmpio_prof_cw[ BGMPIO_CIO_DATA_SIZE ] += len;
}
#endif
MPI_Type_size(datatype, &datatype_size);
len = (ADIO_Offset)datatype_size * (ADIO_Offset)count;
ADIOI_Assert(len == (unsigned int) len); /* write takes an unsigned int parm */
#if BG_PROFILE
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
if (bgmpio_timing2) io_time2 = MPI_Wtime();
if (fd->fp_sys_posn != offset)
lseek(fd->fd_sys, offset, SEEK_SET);
if (bgmpio_timing2) bgmpio_prof_cw[ BGMPIO_CIO_T_SEEK ] += (MPI_Wtime() - io_time2);
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
if (bgmpio_timing2) io_time2 = MPI_Wtime();
err = write(fd->fd_sys, buf, (unsigned int)len);
if (bgmpio_timing2) bgmpio_prof_cw[ BGMPIO_CIO_T_POSI_RW ] += (MPI_Wtime() - io_time2);
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_sys_posn = offset + err;
/* individual file pointer not updated */
}
else { /* write from curr. location of ind. file pointer */
offset = fd->fp_ind;
if (bgmpio_timing2) io_time2 = MPI_Wtime();
if (fd->fp_sys_posn != fd->fp_ind)
lseek(fd->fd_sys, fd->fp_ind, SEEK_SET);
if (bgmpio_timing2) bgmpio_prof_cw[ BGMPIO_CIO_T_SEEK ] += (MPI_Wtime() - io_time2);
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
if (bgmpio_timing2) io_time2 = MPI_Wtime();
err = write(fd->fd_sys, buf, (unsigned int)len);
if (bgmpio_timing2) bgmpio_prof_cw[ BGMPIO_CIO_T_POSI_RW ] += (MPI_Wtime() - io_time2);
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_ind += err;
fd->fp_sys_posn = fd->fp_ind;
}
#else /* BG_PROFILE */
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
if (fd->fp_sys_posn != offset)
lseek(fd->fd_sys, offset, SEEK_SET);
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
err = write(fd->fd_sys, buf, (unsigned int)len);
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_sys_posn = offset + err;
/* individual file pointer not updated */
}
else { /* write from curr. location of ind. file pointer */
offset = fd->fp_ind;
if (fd->fp_sys_posn != fd->fp_ind)
lseek(fd->fd_sys, fd->fp_ind, SEEK_SET);
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
err = write(fd->fd_sys, buf, (unsigned int)len);
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_ind += err;
fd->fp_sys_posn = fd->fp_ind;
}
#endif /* BG_PROFILE */
#if BG_PROFILE
if (bgmpio_timing) bgmpio_prof_cw[ BGMPIO_CIO_T_MPIO_RW ] += (MPI_Wtime() - io_time);
#endif
/* --BEGIN ERROR HANDLING-- */
if (err == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io",
"**io %s", strerror(errno));
return;
}
/* --END ERROR HANDLING-- */
#ifdef HAVE_STATUS_SET_BYTES
MPIR_Status_set_bytes(status, datatype, err);
#endif
*error_code = MPI_SUCCESS;
#ifdef AGGREGATION_PROFILE
MPE_Log_event (5037, 0, NULL);
#endif
}
#define ADIOI_BUFFERED_WRITE \
{ \
if (req_off >= writebuf_off + writebuf_len) { \
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
err = write(fd->fd_sys, writebuf, writebuf_len); \
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
if (err == -1) err_flag = 1; \
writebuf_off = req_off; \
writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
err = read(fd->fd_sys, writebuf, writebuf_len); \
if (err == -1) { \
*error_code = MPIO_Err_create_code(MPI_SUCCESS, \
MPIR_ERR_RECOVERABLE, myname, \
__LINE__, MPI_ERR_IO, \
"**ioRMWrdwr", 0); \
return; \
} \
} \
write_sz = (unsigned) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
ADIOI_Assert((ADIO_Offset)write_sz == ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off));\
memcpy(writebuf+req_off-writebuf_off, (char *)buf +userbuf_off, write_sz);\
while (write_sz != req_len) { \
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
err = write(fd->fd_sys, writebuf, writebuf_len); \
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
if (err == -1) err_flag = 1; \
req_len -= write_sz; \
userbuf_off += write_sz; \
writebuf_off += writebuf_len; \
writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
err = read(fd->fd_sys, writebuf, writebuf_len); \
if (err == -1) { \
*error_code = MPIO_Err_create_code(MPI_SUCCESS, \
MPIR_ERR_RECOVERABLE, myname, \
__LINE__, MPI_ERR_IO, \
"**ioRMWrdwr", 0); \
return; \
} \
write_sz = ADIOI_MIN(req_len, writebuf_len); \
memcpy(writebuf, (char *)buf + userbuf_off, write_sz);\
} \
}
/* this macro is used when filetype is contig and buftype is not contig.
it does not do a read-modify-write and does not lock*/
#define ADIOI_BUFFERED_WRITE_WITHOUT_READ \
{ \
if (req_off >= writebuf_off + writebuf_len) { \
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
err = write(fd->fd_sys, writebuf, writebuf_len); \
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
if (err == -1) err_flag = 1; \
writebuf_off = req_off; \
writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
} \
write_sz = (unsigned) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
ADIOI_Assert((ADIO_Offset)write_sz == ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off));\
memcpy(writebuf+req_off-writebuf_off, (char *)buf +userbuf_off, write_sz);\
while (write_sz != req_len) { \
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
err = write(fd->fd_sys, writebuf, writebuf_len); \
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
if (err == -1) err_flag = 1; \
req_len -= write_sz; \
userbuf_off += write_sz; \
writebuf_off += writebuf_len; \
writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
write_sz = ADIOI_MIN(req_len, writebuf_len); \
memcpy(writebuf, (char *)buf + userbuf_off, write_sz);\
} \
}
void ADIOI_BG_WriteStrided(ADIO_File fd, const void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
*error_code)
{
/* offset is in units of etype relative to the filetype. */
ADIOI_Flatlist_node *flat_buf, *flat_file;
ADIO_Offset i_offset, sum, size_in_filetype;
int i, j, k, err=-1, st_index=0;
int n_etypes_in_filetype;
ADIO_Offset num, size, n_filetypes, etype_in_filetype, st_n_filetypes;
ADIO_Offset abs_off_in_filetype=0;
int filetype_size, etype_size, buftype_size;
MPI_Aint filetype_extent, buftype_extent;
int buf_count, buftype_is_contig, filetype_is_contig;
ADIO_Offset userbuf_off;
ADIO_Offset off, req_off, disp, end_offset=0, writebuf_off, start_off;
char *writebuf, *value;
unsigned bufsize, writebuf_len, max_bufsize, write_sz;
int err_flag=0, info_flag;
ADIO_Offset new_bwr_size, new_fwr_size, st_fwr_size, fwr_size=0, bwr_size, req_len;
static char myname[] = "ADIOI_BG_WRITESTRIDED";
if (fd->hints->ds_write == ADIOI_HINT_DISABLE) {
/* if user has disabled data sieving on reads, use naive
* approach instead.
*/
/*FPRINTF(stderr, "ADIOI_GEN_WriteStrided_naive(%d):\n", __LINE__);*/
ADIOI_GEN_WriteStrided_naive(fd,
buf,
count,
datatype,
file_ptr_type,
offset,
status,
error_code);
return;
}
/*FPRINTF(stderr, "%s(%d):\n",myname, __LINE__);*/
ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
MPI_Type_size(fd->filetype, &filetype_size);
if ( ! filetype_size ) {
#ifdef HAVE_STATUS_SET_BYTES
MPIR_Status_set_bytes(status, datatype, 0);
#endif
*error_code = MPI_SUCCESS;
return;
}
MPI_Type_extent(fd->filetype, &filetype_extent);
MPI_Type_size(datatype, &buftype_size);
MPI_Type_extent(datatype, &buftype_extent);
etype_size = fd->etype_size;
ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count));
bufsize = buftype_size * count;
/* get max_bufsize from the info object. */
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
ADIOI_Info_get(fd->info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value,
&info_flag);
max_bufsize = atoi(value);
ADIOI_Free(value);
if (!buftype_is_contig && filetype_is_contig) {
/* noncontiguous in memory, contiguous in file. */
ADIOI_Flatten_datatype(datatype);
flat_buf = ADIOI_Flatlist;
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
fd->disp + etype_size * offset;
start_off = off;
end_offset = off + bufsize - 1;
writebuf_off = off;
writebuf = (char *) ADIOI_Malloc(max_bufsize);
writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));
/* if atomicity is true, lock the region to be accessed */
if (fd->atomicity)
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
for (j=0; j<count; j++)
{
int i;
for (i=0; i<flat_buf->count; i++) {
userbuf_off = (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i];
req_off = off;
req_len = flat_buf->blocklens[i];
ADIOI_BUFFERED_WRITE_WITHOUT_READ
off += flat_buf->blocklens[i];
}
}
/* write the buffer out finally */
lseek(fd->fd_sys, writebuf_off, SEEK_SET);
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
err = write(fd->fd_sys, writebuf, writebuf_len);
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
if (err == -1) err_flag = 1;
if (fd->atomicity)
ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
ADIOI_Free(writebuf); /* malloced in the buffered_write macro */
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
if (err_flag) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
}
else *error_code = MPI_SUCCESS;
}
else { /* noncontiguous in file */
/* filetype already flattened in ADIO_Open */
flat_file = ADIOI_Flatlist;
while (flat_file->type != fd->filetype) flat_file = flat_file->next;
disp = fd->disp;
if (file_ptr_type == ADIO_INDIVIDUAL) {
/* Wei-keng reworked type processing to be a bit more efficient */
offset = fd->fp_ind - disp;
n_filetypes = (offset - flat_file->indices[0]) / filetype_extent;
offset -= (ADIO_Offset)n_filetypes * filetype_extent;
/* now offset is local to this extent */
/* find the block where offset is located, skip blocklens[i]==0 */
for (i=0; i<flat_file->count; i++) {
ADIO_Offset dist;
if (flat_file->blocklens[i] == 0) continue;
dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
/* fwr_size is from offset to the end of block i */
if (dist == 0) {
i++;
offset = flat_file->indices[i];
fwr_size = flat_file->blocklens[i];
break;
}
if (dist > 0) {
fwr_size = dist;
break;
}
}
st_index = i; /* starting index in flat_file->indices[] */
offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
}
else {
int i;
n_etypes_in_filetype = filetype_size/etype_size;
n_filetypes = offset / n_etypes_in_filetype;
etype_in_filetype = offset % n_etypes_in_filetype;
size_in_filetype = etype_in_filetype * etype_size;
sum = 0;
for (i=0; i<flat_file->count; i++) {
sum += flat_file->blocklens[i];
if (sum > size_in_filetype) {
st_index = i;
fwr_size = sum - size_in_filetype;
abs_off_in_filetype = flat_file->indices[i] +
size_in_filetype - (sum - flat_file->blocklens[i]);
break;
}
}
/* abs. offset in bytes in the file */
offset = disp + (ADIO_Offset) n_filetypes*filetype_extent +
abs_off_in_filetype;
}
start_off = offset;
/* Wei-keng Liao:write request is within single flat_file contig block*/
/* this could happen, for example, with subarray types that are
* actually fairly contiguous */
if (buftype_is_contig && bufsize <= fwr_size) {
ADIO_WriteContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
offset, status, error_code);
if (file_ptr_type == ADIO_INDIVIDUAL) {
/* update MPI-IO file pointer to point to the first byte
* that can be accessed in the fileview. */
fd->fp_ind = offset + bufsize;
if (bufsize == fwr_size) {
do {
st_index++;
if (st_index == flat_file->count) {
st_index = 0;
n_filetypes++;
}
} while (flat_file->blocklens[st_index] == 0);
fd->fp_ind = disp + flat_file->indices[st_index]
+ (ADIO_Offset)n_filetypes*filetype_extent;
}
}
fd->fp_sys_posn = -1; /* set it to null. */
#ifdef HAVE_STATUS_SET_BYTES
MPIR_Status_set_bytes(status, datatype, bufsize);
#endif
return;
}
/* Calculate end_offset, the last byte-offset that will be accessed.
e.g., if start_offset=0 and 100 bytes to be write, end_offset=99*/
st_fwr_size = fwr_size;
st_n_filetypes = n_filetypes;
i_offset = 0;
j = st_index;
off = offset;
fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
while (i_offset < bufsize) {
i_offset += fwr_size;
end_offset = off + fwr_size - 1;
j = (j+1) % flat_file->count;
n_filetypes += (j == 0) ? 1 : 0;
while (flat_file->blocklens[j]==0) {
j = (j+1) % flat_file->count;
n_filetypes += (j == 0) ? 1 : 0;
}
off = disp + flat_file->indices[j] +
n_filetypes*(ADIO_Offset)filetype_extent;
fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
}
/* if atomicity is true, lock the region to be accessed */
if (fd->atomicity)
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
/* initial read for the read-modify-write */
writebuf_off = offset;
writebuf = (char *) ADIOI_Malloc(max_bufsize);
writebuf_len = (unsigned)(ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
lseek(fd->fd_sys, writebuf_off, SEEK_SET);
err = read(fd->fd_sys, writebuf, writebuf_len);
if (err == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE,
myname, __LINE__,
MPI_ERR_IO,
"ADIOI_BG_WriteStrided: ROMIO tries to optimize this access by doing a read-modify-write, but is unable to read the file. Please give the file read permission and open it with MPI_MODE_RDWR.", 0);
return;
}
if (buftype_is_contig && !filetype_is_contig) {
/* contiguous in memory, noncontiguous in file. should be the most
common case. */
i_offset = 0;
j = st_index;
off = offset;
n_filetypes = st_n_filetypes;
fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
while (i_offset < bufsize) {
if (fwr_size) {
/* TYPE_UB and TYPE_LB can result in
fwr_size = 0. save system call in such cases */
/* lseek(fd->fd_sys, off, SEEK_SET);
err = write(fd->fd_sys, ((char *) buf) + i_offset, fwr_size);*/
req_off = off;
req_len = fwr_size;
userbuf_off = i_offset;
ADIOI_BUFFERED_WRITE
}
i_offset += fwr_size;
if (off + fwr_size < disp + flat_file->indices[j] +
flat_file->blocklens[j] + n_filetypes*(ADIO_Offset)filetype_extent)
off += fwr_size;
/* did not reach end of contiguous block in filetype.
no more I/O needed. off is incremented by fwr_size. */
else {
j = (j+1) % flat_file->count;
n_filetypes += (j == 0) ? 1 : 0;
while (flat_file->blocklens[j]==0) {
j = (j+1) % flat_file->count;
n_filetypes += (j == 0) ? 1 : 0;
}
off = disp + flat_file->indices[j] +
n_filetypes*(ADIO_Offset)filetype_extent;
fwr_size = ADIOI_MIN(flat_file->blocklens[j],
bufsize-i_offset);
}
}
}
else {
/* noncontiguous in memory as well as in file */
ADIOI_Flatten_datatype(datatype);
flat_buf = ADIOI_Flatlist;
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
k = num = buf_count = 0;
i_offset = flat_buf->indices[0];
j = st_index;
off = offset;
n_filetypes = st_n_filetypes;
fwr_size = st_fwr_size;
bwr_size = flat_buf->blocklens[0];
while (num < bufsize) {
size = ADIOI_MIN(fwr_size, bwr_size);
if (size) {
/* lseek(fd->fd_sys, off, SEEK_SET);
err = write(fd->fd_sys, ((char *) buf) + i_offset, size); */
req_off = off;
req_len = size;
userbuf_off = i_offset;
ADIOI_BUFFERED_WRITE
}
new_fwr_size = fwr_size;
new_bwr_size = bwr_size;
if (size == fwr_size) {
/* reached end of contiguous block in file */
j = (j+1) % flat_file->count;
n_filetypes += (j == 0) ? 1 : 0;
while (flat_file->blocklens[j]==0) {
j = (j+1) % flat_file->count;
n_filetypes += (j == 0) ? 1 : 0;
}
off = disp + flat_file->indices[j] +
n_filetypes*(ADIO_Offset)filetype_extent;
new_fwr_size = flat_file->blocklens[j];
if (size != bwr_size) {
i_offset += size;
new_bwr_size -= size;
}
}
if (size == bwr_size) {
/* reached end of contiguous block in memory */
k = (k + 1)%flat_buf->count;
buf_count++;
i_offset = (ADIO_Offset)buftype_extent*(ADIO_Offset)(buf_count/flat_buf->count) +
flat_buf->indices[k];
new_bwr_size = flat_buf->blocklens[k];
if (size != fwr_size) {
off += size;
new_fwr_size -= size;
}
}
num += size;
fwr_size = new_fwr_size;
bwr_size = new_bwr_size;
}
}
/* write the buffer out finally */
lseek(fd->fd_sys, writebuf_off, SEEK_SET);
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
err = write(fd->fd_sys, writebuf, writebuf_len);
if (!(fd->atomicity))
ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
else ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
if (err == -1) err_flag = 1;
ADIOI_Free(writebuf); /* malloced in the buffered_write macro */
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
if (err_flag) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
}
else *error_code = MPI_SUCCESS;
}
fd->fp_sys_posn = -1; /* set it to null. */
#ifdef HAVE_STATUS_SET_BYTES
MPIR_Status_set_bytes(status, datatype, bufsize);
/* This is a temporary way of filling in status. The right way is to
keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
#endif
if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
}

Просмотреть файл

@ -1,42 +0,0 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
include $(top_srcdir)/Makefile.options
noinst_LTLIBRARIES = libadio_bgl.la
libadio_bgl_la_SOURCES = \
ad_bgl_aggrs.c \
ad_bgl_aggrs.h \
ad_bgl.c \
ad_bgl_close.c \
ad_bgl_fcntl.c \
ad_bgl_flush.c \
ad_bgl_getsh.c \
ad_bgl.h \
ad_bgl_hints.c \
ad_bgl_open.c \
ad_bgl_pset.c \
ad_bgl_pset.h \
ad_bgl_rdcoll.c \
ad_bgl_read.c \
ad_bgl_setsh.c \
ad_bgl_tuning.c \
ad_bgl_tuning.h \
ad_bgl_wrcoll.c \
ad_bgl_write.c

Просмотреть файл

@ -0,0 +1,34 @@
## -*- Mode: Makefile; -*-
## vim: set ft=automake :
##
## (C) 2011 by Argonne National Laboratory.
## See COPYRIGHT in top-level directory.
##
if BUILD_AD_BGL
noinst_HEADERS += \
adio/ad_bgl/ad_bgl.h \
adio/ad_bgl/ad_bgl_aggrs.h \
adio/ad_bgl/ad_bgl_pset.h \
adio/ad_bgl/ad_bgl_tuning.h
romio_other_sources += \
adio/ad_bgl/ad_bgl_open.c \
adio/ad_bgl/ad_bgl_close.c \
adio/ad_bgl/ad_bgl_fcntl.c \
adio/ad_bgl/ad_bgl_flush.c \
adio/ad_bgl/ad_bgl_read.c \
adio/ad_bgl/ad_bgl_write.c \
adio/ad_bgl/ad_bgl_getsh.c \
adio/ad_bgl/ad_bgl_setsh.c \
adio/ad_bgl/ad_bgl.c \
adio/ad_bgl/ad_bgl_aggrs.c \
adio/ad_bgl/ad_bgl_pset.c \
adio/ad_bgl/ad_bgl_hints.c \
adio/ad_bgl/ad_bgl_rdcoll.c \
adio/ad_bgl/ad_bgl_wrcoll.c \
adio/ad_bgl/ad_bgl_tuning.c
endif BUILD_AD_BGL

Просмотреть файл

@ -484,11 +484,17 @@ void ADIOI_BGL_GPFS_Calc_file_domains(ADIO_Offset *st_offsets,
* 320k request is a (system-dependent) sweet spot
This is from the common code - the new min_fd_size parm that we didn't implement.
(And common code uses a different declaration of fd_size so beware)
(And common code uses a different declaration of fd_size so beware) */
/* this is not entirely sufficient on BlueGene: we must be mindful of
* imbalance over psets. the hint processing code has already picked, say,
* 8 processors per pset, so if we go increasing fd_size we'll end up with
* some psets with 8 processors and some psets with none. */
/*
if (fd_size < min_fd_size)
fd_size = min_fd_size;
*/
*/
fd_size = (ADIO_Offset *) ADIOI_Malloc(nprocs_for_coll * sizeof(ADIO_Offset));
*fd_start_ptr = (ADIO_Offset *) ADIOI_Malloc(nprocs_for_coll * sizeof(ADIO_Offset));
*fd_end_ptr = (ADIO_Offset *) ADIOI_Malloc(nprocs_for_coll * sizeof(ADIO_Offset));
@ -500,9 +506,16 @@ void ADIOI_BGL_GPFS_Calc_file_domains(ADIO_Offset *st_offsets,
ADIO_Offset naggs_large = n_gpfs_blk - naggs * (n_gpfs_blk/naggs);
ADIO_Offset naggs_small = naggs - naggs_large;
/* nb_cn_small * blksize: evenly split file domain among processors:
* equivalent to fd_gpfs_rnage/naggs
* (nb_cn_small+1) * blksize: keeps file domain at least 'blksize' big
*/
for (i=0; i<naggs; i++)
if (i < naggs_small) fd_size[i] = nb_cn_small * blksize;
else fd_size[i] = (nb_cn_small+1) * blksize;
else fd_size[i] = (nb_cn_small+1) * blksize;
/*potential optimization: if n_gpfs_blk smalller than
* naggs, slip in some zero-sized file
* domains to spread the work across all psets. */
# if AGG_DEBUG
DBG_FPRINTF(stderr,"%s(%d): "

Просмотреть файл

@ -3,7 +3,7 @@
/* ---------------------------------------------------------------- */
/**
* \file ad_bgl_hints.c
* \brief ???
* \brief BlueGene hint processing
*/
/* -*- Mode: C; c-basic-offset:4 ; -*- */
@ -23,6 +23,24 @@
#define ADIOI_BGL_IND_RD_BUFFER_SIZE_DFLT "4194304"
#define ADIOI_BGL_IND_WR_BUFFER_SIZE_DFLT "4194304"
#define ADIOI_BGL_NAGG_IN_PSET_HINT_NAME "bgl_nodes_pset"
/** \page mpiio_vars MPIIO Configuration
*
* BlueGene MPIIO configuration and performance tuning. Used by ad_bgl and ad_bglockless ADIO's.
*
* \section hint_sec Hints
* - bgl_nodes_pset - Specify how many aggregators to use per pset.
* This hint will override the cb_nodes hint based on BlueGene psets.
* - N - Use N nodes per pset as aggregators.
* - Default is based on partition configuration and cb_nodes.
*
* The following default key/value pairs may differ from other platform defaults.
*
* - key = cb_buffer_size value = 16777216
* - key = romio_cb_read value = enable
* - key = romio_cb_write value = enable
* - key = ind_rd_buffer_size value = 4194304
* - key = ind_wr_buffer_size value = 4194304
*/
/* Compute the aggregator-related parameters that are required in 2-phase collective IO of ADIO. */
extern int

Просмотреть файл

@ -153,7 +153,7 @@ static void scaleable_stat(ADIO_File fd)
else
{
/* Hmm. Guess we'll assume the worst-case, that it's not GPFS
* or PVFS2 below */
* or BGLOCKLESSMPIO_F_TYPE (default PVFS2) below */
buf[1] = -1; /* bogus magic number */
DBGV_FPRINTF(stderr,"Statfs '%s' failed with rc=%d, errno=%d\n",dir,rc,errno);
}
@ -171,7 +171,7 @@ static void scaleable_stat(ADIO_File fd)
/* data from statfs */
if ((bgl_statfs.f_type == GPFS_SUPER_MAGIC) ||
(bgl_statfs.f_type == PVFS2_SUPER_MAGIC))
(bgl_statfs.f_type == bglocklessmpio_f_type))
{
((ADIOI_BGL_fs*)fd->fs_ptr)->fsync_aggr =
ADIOI_BGL_FSYNC_AGGREGATION_ENABLED;
@ -231,11 +231,6 @@ void ADIOI_BGL_Open(ADIO_File fd, int *error_code)
if(fd->fd_sys != -1)
{
struct stat64 bgl_stat;
struct statfs bgl_statfs;
char* dir;
int rc;
/* Initialize the ad_bgl file system specific information */
AD_BGL_assert(fd->fs_ptr == NULL);
fd->fs_ptr = (ADIOI_BGL_fs*) ADIOI_Malloc(sizeof(ADIOI_BGL_fs));

Просмотреть файл

@ -28,6 +28,8 @@ typedef struct {
int cpuid; /* my CPU id -- for virtual node mode (t coord)*/
int rankInPset; /* my relative rank in my PSET */
int __pad; /* pad to 16 byte alignment */
} ADIOI_BGL_ProcInfo_t __attribute__((aligned(16)));
@ -47,7 +49,7 @@ typedef struct {
#undef MIN
#define MIN(a,b) ((a)<(b) ? (a) : (b))
#define MIN(a,b) ((a<b ? a : b))
/* Default is to choose 8 aggregator nodes in each 32 CN pset.

Просмотреть файл

@ -1140,5 +1140,8 @@ static void ADIOI_R_Exchange_data_alltoallv(
ADIOI_Free( all_send_buf );
ADIOI_Free( all_recv_buf );
ADIOI_Free( recv_buf );
ADIOI_Free( sdispls );
ADIOI_Free( rdispls );
return;
}

Просмотреть файл

@ -203,6 +203,9 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
MPI_Type_size(fd->filetype, &filetype_size);
if ( ! filetype_size ) {
#ifdef HAVE_STATUS_SET_BYTES
MPIR_Status_set_bytes(status, datatype, 0);
#endif
*error_code = MPI_SUCCESS;
return;
}

Просмотреть файл

@ -22,17 +22,23 @@
#include "ad_bgl_tuning.h"
#include "mpi.h"
#if !defined(PVFS2_SUPER_MAGIC)
#define PVFS2_SUPER_MAGIC (0x20030528)
#endif
int bglmpio_timing;
int bglmpio_timing2;
int bglmpio_comm;
int bglmpio_tunegather;
int bglmpio_tuneblocking;
long bglocklessmpio_f_type;
double bglmpio_prof_cw [BGLMPIO_CIO_LAST];
double bglmpio_prof_cr [BGLMPIO_CIO_LAST];
/* set internal variables for tuning environment variables */
/** \page env_vars Environment Variables
/** \page mpiio_vars MPIIO Configuration
\section env_sec Environment Variables
* - BGLMPIO_COMM - Define how data is exchanged on collective
* reads and writes. Possible values:
* - 0 - Use MPI_Alltoallv.
@ -65,9 +71,17 @@ double bglmpio_prof_cr [BGLMPIO_CIO_LAST];
* MPI_Alltoallv to exchange domain information.
* - Default is 1.
*
* - BGLOCKLESSMPIO_F_TYPE - Specify a filesystem type that should run
* the ad_bglockless driver. NOTE: Using romio prefixes (such as
* "bgl:" or "bglockless:") on a file name will override this environment
* variable. Possible values:
* - 0xnnnnnnnn - Any valid file system type (or "magic number") from
* statfs() field f_type.
* - The default is 0x20030528 (PVFS2_SUPER_MAGIC)
*
*/
void ad_bgl_get_env_vars() {
char *x;
char *x, *dummy;
bglmpio_comm = 0;
x = getenv( "BGLMPIO_COMM" );
@ -84,6 +98,11 @@ void ad_bgl_get_env_vars() {
bglmpio_tuneblocking = 1;
x = getenv( "BGLMPIO_TUNEBLOCKING" );
if (x) bglmpio_tuneblocking = atoi(x);
bglocklessmpio_f_type = PVFS2_SUPER_MAGIC;
x = getenv( "BGLOCKLESSMPIO_F_TYPE" );
if (x) bglocklessmpio_f_type = strtol(x,&dummy,0);
DBG_FPRINTF(stderr,"BGLOCKLESSMPIO_F_TYPE=%ld/%#lX\n",
bglocklessmpio_f_type,bglocklessmpio_f_type);
}
/* report timing breakdown for MPI I/O collective call */

Просмотреть файл

@ -59,6 +59,7 @@ extern int bglmpio_timing2;
extern int bglmpio_comm;
extern int bglmpio_tunegather;
extern int bglmpio_tuneblocking;
extern long bglocklessmpio_f_type;
/* set internal variables for tuning environment variables */

Просмотреть файл

@ -26,7 +26,7 @@
#endif
/* prototypes of functions used for collective writes only. */
static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
static void ADIOI_Exch_and_write(ADIO_File fd, const void *buf, MPI_Datatype
datatype, int nprocs, int myrank, ADIOI_Access
*others_req, ADIO_Offset *offset_list,
ADIO_Offset *len_list, int contig_access_count, ADIO_Offset
@ -45,10 +45,10 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
ADIO_Offset *fd_start, ADIO_Offset *fd_end,
ADIOI_Access *others_req,
int *send_buf_idx, int *curr_to_proc,
int *done_to_proc, int *hole, int iter,
int *done_to_proc, int *hole, int iter,
MPI_Aint buftype_extent, int *buf_idx, int *error_code);
static void ADIOI_W_Exchange_data_alltoallv(
ADIO_File fd, void *buf,
ADIO_File fd, void *buf,
char *write_buf, /* 1 */
ADIOI_Flatlist_node *flat_buf,
ADIO_Offset *offset_list,
@ -421,7 +421,7 @@ void ADIOI_BGL_WriteStridedColl(ADIO_File fd, void *buf, int count,
/* If successful, error_code is set to MPI_SUCCESS. Otherwise an error
* code is created and returned in error_code.
*/
static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
static void ADIOI_Exch_and_write(ADIO_File fd, const void *buf, MPI_Datatype
datatype, int nprocs,
int myrank,
ADIOI_Access
@ -732,7 +732,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
/* Sets error_code to MPI_SUCCESS if successful, or creates an error code
* in the case of error.
*/
static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
static void ADIOI_W_Exchange_data(ADIO_File fd, const void *buf, char *write_buf,
ADIOI_Flatlist_node *flat_buf, ADIO_Offset
*offset_list, ADIO_Offset *len_list, int *send_size,
int *recv_size, ADIO_Offset off, int size,
@ -799,12 +799,15 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
sum = 0;
for (i=0; i<nprocs; i++) sum += count[i];
srt_off = (ADIO_Offset *) ADIOI_Malloc((sum+1)*sizeof(ADIO_Offset));
srt_len = (int *) ADIOI_Malloc((sum+1)*sizeof(int));
/* +1 to avoid a 0-size malloc */
/* valgrind-detcted optimization: if there is no work on this process we do
* not need to search for holes */
if (sum) {
srt_off = (ADIO_Offset *) ADIOI_Malloc((sum)*sizeof(ADIO_Offset));
srt_len = (int *) ADIOI_Malloc((sum)*sizeof(int));
ADIOI_Heap_merge(others_req, count, srt_off, srt_len, start_pos,
nprocs, nprocs_recv, sum);
ADIOI_Heap_merge(others_req, count, srt_off, srt_len, start_pos,
nprocs, nprocs_recv, sum);
}
/* for partial recvs, restore original lengths */
for (i=0; i<nprocs; i++)
@ -821,23 +824,25 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
* #835). Missing these holes would result in us writing more data than
* recieved by everyone else. */
*hole = 0;
if (off != srt_off[0]) /* hole at the front */
*hole = 1;
else { /* coalesce the sorted offset-length pairs */
for (i=1; i<sum; i++) {
if (srt_off[i] <= srt_off[0] + srt_len[0]) {
int new_len = srt_off[i] + srt_len[i] - srt_off[0];
if (new_len > srt_len[0]) srt_len[0] = new_len;
}
else
break;
}
if (i < sum || size != srt_len[0]) /* hole in middle or end */
if (sum) {
if (off != srt_off[0]) /* hole at the front */
*hole = 1;
}
else { /* coalesce the sorted offset-length pairs */
for (i=1; i<sum; i++) {
if (srt_off[i] <= srt_off[0] + srt_len[0]) {
int new_len = srt_off[i] + srt_len[i] - srt_off[0];
if (new_len > srt_len[0]) srt_len[0] = new_len;
}
else
break;
}
if (i < sum || size != srt_len[0]) /* hole in middle or end */
*hole = 1;
}
ADIOI_Free(srt_off);
ADIOI_Free(srt_len);
}
if (nprocs_recv) {
if (*hole) {
@ -1251,7 +1256,7 @@ static void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count,
static void ADIOI_W_Exchange_data_alltoallv(
ADIO_File fd, void *buf,
ADIO_File fd, void *buf,
char *write_buf, /* 1 */
ADIOI_Flatlist_node *flat_buf,
ADIO_Offset *offset_list,

Просмотреть файл

@ -253,6 +253,9 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count,
MPI_Type_size(fd->filetype, &filetype_size);
if ( ! filetype_size ) {
#ifdef HAVE_STATUS_SET_BYTES
MPIR_Status_set_bytes(status, datatype, 0);
#endif
*error_code = MPI_SUCCESS;
return;
}

Просмотреть файл

@ -1,26 +0,0 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
include $(top_srcdir)/Makefile.options
noinst_LTLIBRARIES = libadio_bglockless.la
libadio_bglockless_la_SOURCES = \
ad_bglockless.c \
ad_bglockless.h \
ad_bglockless_features.c

Просмотреть файл

@ -0,0 +1,17 @@
## -*- Mode: Makefile; -*-
## vim: set ft=automake :
##
## (C) 2011 by Argonne National Laboratory.
## See COPYRIGHT in top-level directory.
##
if BUILD_AD_BGLOCKLESS
noinst_HEADERS += adio/ad_bglockless/ad_bglockless.h
romio_other_sources += \
adio/ad_bglockless/ad_bglockless.c \
adio/ad_bglockless/ad_bglockless_features.c
endif BUILD_AD_BGLOCKLESS

Просмотреть файл

@ -1,29 +1,29 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 2001 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#include "../ad_bgl/ad_bgl.h"
#include "../ad_bg/ad_bg.h"
#include "ad_bglockless.h"
/* adioi.h has the ADIOI_Fns_struct define */
#include "adioi.h"
struct ADIOI_Fns_struct ADIO_BGLOCKLESS_operations = {
ADIOI_BGL_Open, /* Open */
ADIOI_BG_Open, /* Open */
ADIOI_GEN_OpenColl, /* Collective open */
ADIOI_GEN_ReadContig, /* ReadContig */
ADIOI_GEN_WriteContig, /* WriteContig */
ADIOI_BGL_ReadStridedColl, /* ReadStridedColl */
ADIOI_BGL_WriteStridedColl, /* WriteStridedColl */
ADIOI_BG_ReadStridedColl, /* ReadStridedColl */
ADIOI_BG_WriteStridedColl, /* WriteStridedColl */
ADIOI_GEN_SeekIndividual, /* SeekIndividual */
ADIOI_GEN_Fcntl, /* Fcntl */
ADIOI_BGL_SetInfo, /* SetInfo */
ADIOI_BG_SetInfo, /* SetInfo */
ADIOI_GEN_ReadStrided, /* ReadStrided */
ADIOI_NOLOCK_WriteStrided, /* WriteStrided */
ADIOI_BGL_Close, /* Close */
ADIOI_BG_Close, /* Close */
#ifdef ROMIO_HAVE_WORKING_AIO
ADIOI_GEN_IreadContig, /* IreadContig */
ADIOI_GEN_IwriteContig, /* IwriteContig */
@ -37,7 +37,7 @@ struct ADIOI_Fns_struct ADIO_BGLOCKLESS_operations = {
ADIOI_GEN_IOComplete, /* WriteComplete */
ADIOI_GEN_IreadStrided, /* IreadStrided */
ADIOI_GEN_IwriteStrided, /* IwriteStrided */
ADIOI_BGL_Flush, /* Flush */
ADIOI_BG_Flush, /* Flush */
ADIOI_GEN_Resize, /* Resize */
ADIOI_GEN_Delete, /* Delete */
ADIOI_BGLOCKLESS_Feature /* Features */

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 2008 Uchicago Argonne LLC

Просмотреть файл

@ -1,35 +0,0 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
include $(top_srcdir)/Makefile.options
noinst_LTLIBRARIES = libadio_gridftp.la
libadio_gridftp_la_SOURCES = \
ad_gridftp.c \
ad_gridftp.h \
ad_gridftp_close.c \
ad_gridftp_delete.c \
ad_gridftp_fcntl.c \
ad_gridftp_features.c \
ad_gridftp_flush.c \
ad_gridftp_hints.c \
ad_gridftp_open.c \
ad_gridftp_read.c \
ad_gridftp_resize.c \
ad_gridftp_write.c \
globus_routines.c

Просмотреть файл

@ -0,0 +1,27 @@
## -*- Mode: Makefile; -*-
## vim: set ft=automake :
##
## (C) 2011 by Argonne National Laboratory.
## See COPYRIGHT in top-level directory.
##
if BUILD_AD_GRIDFTP
noinst_HEADERS += adio/ad_gridftp/ad_gridftp.h
romio_other_sources += \
adio/ad_gridftp/ad_gridftp_close.c \
adio/ad_gridftp/ad_gridftp_open.c \
adio/ad_gridftp/ad_gridftp_read.c \
adio/ad_gridftp/ad_gridftp_write.c \
adio/ad_gridftp/ad_gridftp_fcntl.c \
adio/ad_gridftp/ad_gridftp_flush.c \
adio/ad_gridftp/ad_gridftp_resize.c \
adio/ad_gridftp/ad_gridftp_hints.c \
adio/ad_gridftp/ad_gridftp_delete.c \
adio/ad_gridftp/ad_gridftp.c \
adio/ad_gridftp/globus_routines.c \
adio/ad_gridftp/ad_gridftp_features.c
endif BUILD_AD_GRIDFTP

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center.

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
* Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center.
* See COPYRIGHT notice in top-level directory.

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center.

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center.

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center.

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center.

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center.

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
* Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center.
* See COPYRIGHT notice in top-level directory.

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center.

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center.

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center.

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 2003 University of Chicago, Ohio Supercomputer Center.

Просмотреть файл

@ -1,31 +0,0 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
include $(top_srcdir)/Makefile.options
EXTRA_DIST = README
noinst_LTLIBRARIES = libadio_hfs.la
libadio_hfs_la_SOURCES = \
ad_hfs.h \
ad_hfs.c \
ad_hfs_fcntl.c \
ad_hfs_open.c \
ad_hfs_read.c \
ad_hfs_resize.c \
ad_hfs_write.c

Просмотреть файл

@ -0,0 +1,21 @@
## -*- Mode: Makefile; -*-
## vim: set ft=automake :
##
## (C) 2011 by Argonne National Laboratory.
## See COPYRIGHT in top-level directory.
##
if BUILD_AD_HFS
noinst_HEADERS += adio/ad_hfs/ad_hfs.h
romio_other_sources += \
adio/ad_hfs/ad_hfs_read.c \
adio/ad_hfs/ad_hfs_open.c \
adio/ad_hfs/ad_hfs_write.c \
adio/ad_hfs/ad_hfs_fcntl.c \
adio/ad_hfs/ad_hfs_resize.c \
adio/ad_hfs/ad_hfs.c
endif BUILD_AD_HFS

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 2001 University of Chicago.

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 1997 University of Chicago.

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 1997 University of Chicago.
@ -30,7 +30,7 @@ void ADIOI_HFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *er
/* not required in SPPUX since there we use pread/pwrite */
#endif
if (fcntl_struct->fsize == -1) {
#ifdef MPICH2
#ifdef MPICH
*error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
#elif defined(PRINT_ERR_MSG)
@ -51,7 +51,7 @@ void ADIOI_HFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *er
err = prealloc64(fd->fd_sys, fcntl_struct->diskspace);
/* prealloc64 works only if file is of zero length */
if (err && (errno != ENOTEMPTY)) {
#ifdef MPICH2
#ifdef MPICH
*error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
#elif defined(PRINT_ERR_MSG)
@ -72,7 +72,7 @@ void ADIOI_HFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *er
if (fcntl_struct->diskspace <= 2147483647) {
err = prealloc(fd->fd_sys, (off_t) fcntl_struct->diskspace);
if (err && (errno != ENOTEMPTY)) {
#ifdef MPICH2
#ifdef MPICH
*error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
#elif defined(PRINT_ERR_MSG)

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 1997 University of Chicago.
@ -52,7 +52,7 @@ void ADIOI_HFS_Open(ADIO_File fd, int *error_code)
#endif
if (fd->fd_sys == -1 ) {
#ifdef MPICH2
#ifdef MPICH
*error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
#elif defined(PRINT_ERR_MSG)

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 1997 University of Chicago.
@ -56,7 +56,7 @@ void ADIOI_HFS_ReadContig(ADIO_File fd, void *buf, int count,
#endif
if (err == -1 ) {
#ifdef MPICH2
#ifdef MPICH
*error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
#elif defined(PRINT_ERR_MSG)

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 1997 University of Chicago.
@ -16,7 +16,7 @@ void ADIOI_HFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code)
err = ftruncate64(fd->fd_sys, size);
if (err == -1) {
#ifdef MPICH2
#ifdef MPICH
*error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
#elif defined(PRINT_ERR_MSG)

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 1997 University of Chicago.
@ -55,7 +55,7 @@ void ADIOI_HFS_WriteContig(ADIO_File fd, void *buf, int count,
#endif
if (err == -1) {
#ifdef MPICH2
#ifdef MPICH
*error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
#elif defined(PRINT_ERR_MSG)

Просмотреть файл

@ -1,34 +0,0 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
include $(top_srcdir)/Makefile.options
EXTRA_DIST = README
noinst_LTLIBRARIES = libadio_lustre.la
libadio_lustre_la_SOURCES = \
ad_lustre.c \
ad_lustre_aggregate.c \
ad_lustre_fcntl.c \
ad_lustre.h \
ad_lustre_hints.c \
ad_lustre_open.c \
ad_lustre_wrcoll.c \
ad_lustre_rwcontig.c \
ad_lustre_wrstr.c

Просмотреть файл

@ -0,0 +1,22 @@
## -*- Mode: Makefile; -*-
## vim: set ft=automake :
##
## (C) 2011 by Argonne National Laboratory.
## See COPYRIGHT in top-level directory.
##
if BUILD_AD_LUSTRE
noinst_HEADERS += adio/ad_lustre/ad_lustre.h
romio_other_sources += \
adio/ad_lustre/ad_lustre.c \
adio/ad_lustre/ad_lustre_open.c \
adio/ad_lustre/ad_lustre_rwcontig.c \
adio/ad_lustre/ad_lustre_wrcoll.c \
adio/ad_lustre/ad_lustre_wrstr.c \
adio/ad_lustre/ad_lustre_hints.c \
adio/ad_lustre/ad_lustre_aggregate.c
endif BUILD_AD_LUSTRE

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
* Copyright (C) 2001 University of Chicago.
* See COPYRIGHT notice in top-level directory.

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
@ -15,7 +15,7 @@ void ADIOI_LUSTRE_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int
ADIO_Offset curr_fsize, alloc_size, size, len, done;
ADIO_Status status;
char *buf;
#if defined(MPICH2) || !defined(PRINT_ERR_MSG)
#if defined(MPICH) || !defined(PRINT_ERR_MSG)
static char myname[] = "ADIOI_LUSTRE_FCNTL";
#endif

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
@ -22,7 +22,7 @@ void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
struct lov_user_md *lum = NULL;
char *value;
#if defined(MPICH2) || !defined(PRINT_ERR_MSG)
#if defined(MPICH) || !defined(PRINT_ERR_MSG)
static char myname[] = "ADIOI_LUSTRE_OPEN";
#endif

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
@ -42,7 +42,7 @@ static void ADIOI_LUSTRE_W_Exchange_data(ADIO_File fd, void *buf,
ADIO_Offset *len_list, int *send_size,
int *recv_size, ADIO_Offset off,
int size, int *count,
int *start_pos, int *partial_recv,
int *start_pos,
int *sent_to_proc, int nprocs,
int myrank, int buftype_is_contig,
int contig_access_count,
@ -52,7 +52,9 @@ static void ADIOI_LUSTRE_W_Exchange_data(ADIO_File fd, void *buf,
int *curr_to_proc,
int *done_to_proc, int *hole,
int iter, MPI_Aint buftype_extent,
int *buf_idx, int *error_code);
int *buf_idx,
ADIO_Offset **srt_off, int **srt_len, int *srt_num,
int *error_code);
void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count,
ADIO_Offset *srt_off, int *srt_len, int *start_pos,
int nprocs, int nprocs_recv, int total_elements);
@ -308,7 +310,7 @@ static void ADIOI_LUSTRE_Exch_and_write(ADIO_File fd, void *buf,
int real_size, req_len, send_len;
int *recv_curr_offlen_ptr, *recv_count, *recv_size;
int *send_curr_offlen_ptr, *send_size;
int *partial_recv, *sent_to_proc, *recv_start_pos;
int *sent_to_proc, *recv_start_pos;
int *send_buf_idx, *curr_to_proc, *done_to_proc;
int *this_buf_idx;
char *write_buf = NULL;
@ -317,6 +319,11 @@ static void ADIOI_LUSTRE_Exch_and_write(ADIO_File fd, void *buf,
MPI_Aint buftype_extent;
int stripe_size = striping_info[0], avail_cb_nodes = striping_info[2];
int data_sieving = 0;
ADIO_Offset *srt_off = NULL;
int *srt_len = NULL;
int srt_num = 0;
ADIO_Offset block_offset;
int block_len;
*error_code = MPI_SUCCESS; /* changed below if error */
/* only I/O errors are currently reported */
@ -508,12 +515,14 @@ static void ADIOI_LUSTRE_Exch_and_write(ADIO_File fd, void *buf,
hole = data_sieving;
ADIOI_LUSTRE_W_Exchange_data(fd, buf, write_buf, flat_buf, offset_list,
len_list, send_size, recv_size, off, real_size,
recv_count, recv_start_pos, partial_recv,
recv_count, recv_start_pos,
sent_to_proc, nprocs, myrank,
buftype_is_contig, contig_access_count,
striping_info, others_req, send_buf_idx,
curr_to_proc, done_to_proc, &hole, m,
buftype_extent, this_buf_idx, error_code);
buftype_extent, this_buf_idx,
&srt_off, &srt_len, &srt_num, error_code);
if (*error_code != MPI_SUCCESS)
goto over;
@ -537,23 +546,42 @@ static void ADIOI_LUSTRE_Exch_and_write(ADIO_File fd, void *buf,
ADIO_EXPLICIT_OFFSET, off, &status,
error_code);
} else {
for (i = 0; i < nprocs; i++) {
if (others_req[i].count) {
for (j = 0; j < others_req[i].count; j++) {
if (others_req[i].offsets[j] < off + real_size &&
others_req[i].offsets[j] >= off) {
block_offset = -1;
block_len = 0;
for (i = 0; i < srt_num; ++i) {
if (srt_off[i] < off + real_size &&
srt_off[i] >= off) {
if (block_offset == -1) {
block_offset = srt_off[i];
block_len = srt_len[i];
} else {
if (srt_off[i] == block_offset + block_len) {
block_len += srt_len[i];
} else {
ADIO_WriteContig(fd,
write_buf + others_req[i].offsets[j] - off,
others_req[i].lens[j],
write_buf + block_offset - off,
block_len,
MPI_BYTE, ADIO_EXPLICIT_OFFSET,
others_req[i].offsets[j], &status,
block_offset, &status,
error_code);
if (*error_code != MPI_SUCCESS)
goto over;
block_offset = srt_off[i];
block_len = srt_len[i];
}
}
}
}
if (block_offset != -1) {
ADIO_WriteContig(fd,
write_buf + block_offset - off,
block_len,
MPI_BYTE, ADIO_EXPLICIT_OFFSET,
block_offset, &status,
error_code);
if (*error_code != MPI_SUCCESS)
goto over;
}
}
}
if (*error_code != MPI_SUCCESS)
@ -562,6 +590,10 @@ static void ADIOI_LUSTRE_Exch_and_write(ADIO_File fd, void *buf,
iter_st_off += max_size;
}
over:
if (srt_off)
ADIOI_Free(srt_off);
if (srt_len)
ADIOI_Free(srt_len);
if (ntimes)
ADIOI_Free(write_buf);
ADIOI_Free(recv_curr_offlen_ptr);
@ -588,7 +620,7 @@ static void ADIOI_LUSTRE_W_Exchange_data(ADIO_File fd, void *buf,
ADIO_Offset *len_list, int *send_size,
int *recv_size, ADIO_Offset off,
int size, int *count,
int *start_pos, int *partial_recv,
int *start_pos,
int *sent_to_proc, int nprocs,
int myrank, int buftype_is_contig,
int contig_access_count,
@ -598,15 +630,16 @@ static void ADIOI_LUSTRE_W_Exchange_data(ADIO_File fd, void *buf,
int *curr_to_proc, int *done_to_proc,
int *hole, int iter,
MPI_Aint buftype_extent,
int *buf_idx, int *error_code)
int *buf_idx,
ADIO_Offset **srt_off, int **srt_len, int *srt_num,
int *error_code)
{
int i, j, nprocs_recv, nprocs_send, err;
char **send_buf = NULL;
MPI_Request *requests, *send_req;
MPI_Datatype *recv_types;
MPI_Status *statuses, status;
int *srt_len, sum, sum_recv;
ADIO_Offset *srt_off;
int sum_recv;
int data_sieving = *hole;
static char myname[] = "ADIOI_W_EXCHANGE_DATA";
@ -638,20 +671,26 @@ static void ADIOI_LUSTRE_W_Exchange_data(ADIO_File fd, void *buf,
* For this, merge the (sorted) offset lists others_req using a heap-merge.
*/
sum = 0;
*srt_num = 0;
for (i = 0; i < nprocs; i++)
sum += count[i];
srt_off = (ADIO_Offset *) ADIOI_Malloc((sum + 1) * sizeof(ADIO_Offset));
srt_len = (int *) ADIOI_Malloc((sum + 1) * sizeof(int));
*srt_num += count[i];
if (*srt_off)
*srt_off = (ADIO_Offset *) ADIOI_Realloc(*srt_off, (*srt_num + 1) * sizeof(ADIO_Offset));
else
*srt_off = (ADIO_Offset *) ADIOI_Malloc((*srt_num + 1) * sizeof(ADIO_Offset));
if (*srt_len)
*srt_len = (int *) ADIOI_Realloc(*srt_len, (*srt_num + 1) * sizeof(int));
else
*srt_len = (int *) ADIOI_Malloc((*srt_num + 1) * sizeof(int));
/* +1 to avoid a 0-size malloc */
ADIOI_Heap_merge(others_req, count, srt_off, srt_len, start_pos,
nprocs, nprocs_recv, sum);
ADIOI_Heap_merge(others_req, count, *srt_off, *srt_len, start_pos,
nprocs, nprocs_recv, *srt_num);
/* check if there are any holes */
*hole = 0;
for (i = 0; i < sum - 1; i++) {
if (srt_off[i] + srt_len[i] < srt_off[i + 1]) {
for (i = 0; i < *srt_num - 1; i++) {
if ((*srt_off)[i] + (*srt_len)[i] < (*srt_off)[i + 1]) {
*hole = 1;
break;
}
@ -681,14 +720,10 @@ static void ADIOI_LUSTRE_W_Exchange_data(ADIO_File fd, void *buf,
MPI_ERR_IO,
"**ioRMWrdwr", 0);
ADIOI_Free(recv_types);
ADIOI_Free(srt_off);
ADIOI_Free(srt_len);
return;
}
// --END ERROR HANDLING--
}
ADIOI_Free(srt_off);
ADIOI_Free(srt_len);
nprocs_send = 0;
for (i = 0; i < nprocs; i++)

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
@ -188,6 +188,9 @@ void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, void *buf, int count,
MPI_Type_size(fd->filetype, &filetype_size);
if (!filetype_size) {
#ifdef HAVE_STATUS_SET_BYTES
MPIR_Status_set_bytes(status, datatype, 0);
#endif
*error_code = MPI_SUCCESS;
return;
}

Просмотреть файл

@ -1,37 +0,0 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
include $(top_srcdir)/Makefile.options
noinst_LTLIBRARIES = libadio_nfs.la
libadio_nfs_la_SOURCES = \
ad_nfs.c \
ad_nfs.h \
ad_nfs_done.c \
ad_nfs_fcntl.c \
ad_nfs_features.c \
ad_nfs_getsh.c \
ad_nfs_hints.c \
ad_nfs_iread.c \
ad_nfs_iwrite.c \
ad_nfs_open.c \
ad_nfs_read.c \
ad_nfs_resize.c \
ad_nfs_setsh.c \
ad_nfs_wait.c \
ad_nfs_write.c

Просмотреть файл

@ -0,0 +1,28 @@
## -*- Mode: Makefile; -*-
## vim: set ft=automake :
##
## (C) 2011 by Argonne National Laboratory.
## See COPYRIGHT in top-level directory.
##
if BUILD_AD_NFS
noinst_HEADERS += adio/ad_nfs/ad_nfs.h
romio_other_sources += \
adio/ad_nfs/ad_nfs_read.c \
adio/ad_nfs/ad_nfs_open.c \
adio/ad_nfs/ad_nfs_write.c \
adio/ad_nfs/ad_nfs_done.c \
adio/ad_nfs/ad_nfs_fcntl.c \
adio/ad_nfs/ad_nfs_iread.c \
adio/ad_nfs/ad_nfs_iwrite.c \
adio/ad_nfs/ad_nfs_wait.c \
adio/ad_nfs/ad_nfs_setsh.c \
adio/ad_nfs/ad_nfs_getsh.c \
adio/ad_nfs/ad_nfs.c \
adio/ad_nfs/ad_nfs_resize.c \
adio/ad_nfs/ad_nfs_features.c
endif BUILD_AD_NFS

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 2001 University of Chicago.

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
@ -43,7 +43,7 @@ void ADIOI_NFS_ReadContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
*error_code);
void ADIOI_NFS_WriteContig(ADIO_File fd, void *buf, int count,
void ADIOI_NFS_WriteContig(ADIO_File fd, const void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
*error_code);
@ -65,7 +65,7 @@ void ADIOI_NFS_WriteComplete(ADIO_Request *request, ADIO_Status *status,
int *error_code);
void ADIOI_NFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int
*error_code);
void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count,
void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
*error_code);

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 1997 University of Chicago.

Просмотреть файл

@ -10,6 +10,7 @@ int ADIOI_NFS_Feature(ADIO_File fd, int flag)
case ADIO_DATA_SIEVING_WRITES:
return 1;
case ADIO_SCALABLE_OPEN:
case ADIO_UNLINK_AFTER_CLOSE:
default:
return 0;
}

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 1997 University of Chicago.
@ -73,6 +73,8 @@ void ADIOI_NFS_Get_shared_fp(ADIO_File fd, int incr, ADIO_Offset *shared_fp,
}
}
if (incr == 0) {goto done;}
new_fp = *shared_fp + incr;
#ifdef ADIOI_MPE_LOGGING
@ -91,6 +93,7 @@ void ADIOI_NFS_Get_shared_fp(ADIO_File fd, int incr, ADIO_Offset *shared_fp,
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
}
done:
ADIOI_UNLOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset));
if (err == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 1997 University of Chicago.

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 1997 University of Chicago.
@ -91,6 +91,19 @@ void ADIOI_NFS_Open(ADIO_File fd, int *error_code)
__LINE__, MPI_ERR_READ_ONLY,
"**ioneedrd", 0);
}
else if(errno == EISDIR) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_BAD_FILE,
"**filename", 0);
}
else if(errno == EEXIST) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_FILE_EXISTS,
"**fileexist", 0);
}
else {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 1997 University of Chicago.
@ -187,6 +187,9 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count,
MPI_Type_size(fd->filetype, &filetype_size);
if ( ! filetype_size ) {
#ifdef HAVE_STATUS_SET_BYTES
MPIR_Status_set_bytes(status, datatype, 0);
#endif
*error_code = MPI_SUCCESS;
return;
}

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 2004 University of Chicago.
@ -20,15 +20,13 @@
void ADIOI_NFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code)
{
int err;
static char myname[] = "ADIOI_GEN_RESIZE";
static char myname[] = "ADIOI_NFS_RESIZE";
err = ftruncate(fd->fd_sys, size);
/* --BEGIN ERROR HANDLING-- */
if (err == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io", "**io %s", strerror(errno));
*error_code = ADIOI_Err_create_code(myname, fd->filename, errno);
return;
}
/* --END ERROR HANDLING-- */

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 1997 University of Chicago.

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 1997 University of Chicago.
@ -8,7 +8,7 @@
#include "ad_nfs.h"
#include "adio_extern.h"
void ADIOI_NFS_WriteContig(ADIO_File fd, void *buf, int count,
void ADIOI_NFS_WriteContig(ADIO_File fd, const void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int *error_code)
{
@ -263,7 +263,7 @@ void ADIOI_NFS_WriteContig(ADIO_File fd, void *buf, int count,
#endif
void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count,
void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
*error_code)
@ -290,6 +290,9 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count,
MPI_Type_size(fd->filetype, &filetype_size);
if ( ! filetype_size ) {
#ifdef HAVE_STATUS_SET_BYTES
MPIR_Status_set_bytes(status, datatype, 0);
#endif
*error_code = MPI_SUCCESS;
return;
}

Просмотреть файл

@ -1,35 +0,0 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
include $(top_srcdir)/Makefile.options
noinst_LTLIBRARIES = libadio_ntfs.la
libadio_ntfs_la_SOURCES = \
ad_ntfs.c \
ad_ntfs.h \
ad_ntfs_close.c \
ad_ntfs_done.c \
ad_ntfs_fcntl.c \
ad_ntfs_flush.c \
ad_ntfs_iread.c \
ad_ntfs_iwrite.c \
ad_ntfs_open.c \
ad_ntfs_read.c \
ad_ntfs_resize.c \
ad_ntfs_wait.c \
ad_ntfs_write.c

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 2001 University of Chicago.
@ -34,5 +34,5 @@ struct ADIOI_Fns_struct ADIO_NTFS_operations = {
ADIOI_NTFS_Flush, /* Flush */
ADIOI_NTFS_Resize, /* Resize */
ADIOI_GEN_Delete, /* Delete */
ADIOI_GEN_Feature /* Features */
ADIOI_NTFS_Feature /* Features */
};

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
*

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
*
* Copyright (C) 1997 University of Chicago.

Просмотреть файл

@ -1,4 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше