1
1
Update the version of ROMIO to that which was contained in
MPICH2-1.0.7, plus a few patches from the upstream ROMIO maintainers
(because OMPI uses a few code paths in ROMIO that MPICH2 does not;
there were a few compile bugs in the ROMIO from MPICH2-1.0.7).

Added an info MCA param to be able to tell which version of ROMIO is
contained in OMPI: io_romio_version.

Many, many thanks to romio-maint@mcs.anl.gov for all their help in
integrating this new version of ROMIO into Open MPI.

This commit was SVN r19045.

The following Trac tickets were found above:
  Ticket 1370 --> https://svn.open-mpi.org/trac/ompi/ticket/1370
Этот коммит содержится в:
Jeff Squyres 2008-07-26 12:23:30 +00:00
родитель 0735d6f1c2
Коммит 63b63d48c3
194 изменённых файлов: 19589 добавлений и 2571 удалений

2
NEWS
Просмотреть файл

@ -60,7 +60,7 @@ Trunk (not on release branches yet)
- Remove use of STL from C++ bindings.
- Added support for Platform/LSF job launchers. Must be Platform LSF
v7.0.2 or later.
- Updated ROMIO with the version from MPICH2 1.0.5p4
- Updated ROMIO with the version from MPICH2 1.0.7
- Added RDMA capable one-sided component (called rdma), which
can be used with BTL components that expose a full one-sided
interface.

Просмотреть файл

@ -220,8 +220,8 @@ int mca_io_base_request_alloc(ompi_file_t *file,
/*
* Free a module-specific IO MPI_Request
*/
void mca_io_base_request_free(ompi_file_t *file,
mca_io_base_request_t *req)
OMPI_DECLSPEC void mca_io_base_request_free(ompi_file_t *file,
mca_io_base_request_t *req)
{
/* Put the request back on the per-module freelist, since it's
been initialized for that module */
@ -295,7 +295,7 @@ mca_io_base_request_progress_init(void)
}
void
OMPI_DECLSPEC void
mca_io_base_request_progress_add(void)
{
#if OMPI_ENABLE_PROGRESS_THREADS
@ -319,7 +319,7 @@ mca_io_base_request_progress_add(void)
}
void
OMPI_DECLSPEC void
mca_io_base_request_progress_del(void)
{
OPAL_THREAD_ADD32(&mca_io_base_request_num_pending, -1);

Просмотреть файл

@ -114,8 +114,8 @@ extern "C" {
*
* For optimization reasons, \em no error checking is performed.
*/
void mca_io_base_request_free(ompi_file_t *file,
mca_io_base_request_t *req);
OMPI_DECLSPEC void mca_io_base_request_free(ompi_file_t *file,
mca_io_base_request_t *req);
/*
@ -135,12 +135,12 @@ extern "C" {
/**
*
*/
void mca_io_base_request_progress_add(void);
OMPI_DECLSPEC void mca_io_base_request_progress_add(void);
/**
*
*/
void mca_io_base_request_progress_del(void);
OMPI_DECLSPEC void mca_io_base_request_progress_del(void);
/**
* Finalize the request progress code

Просмотреть файл

@ -10,6 +10,7 @@
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -28,12 +29,14 @@ AC_DEFUN([MCA_io_romio_POST_CONFIG], [
# MCA_io_romio_CONFIG([action-if-found], [action-if-not-found])
# -----------------------------------------------------------
AC_DEFUN([MCA_io_romio_CONFIG],[
OMPI_VAR_SCOPE_PUSH([io_romio_flags io_romio_flags_define io_romio_happy io_romio_save_LIBS])
AC_ARG_ENABLE([io-romio],
[AC_HELP_STRING([--disable-io-romio],
[Disable the ROMIO MPI-IO component])])
AC_ARG_WITH([io-romio-flags],
[AC_HELP_STRING([--with-io-romio-flags=FLAGS],
[Pass FLAGS to the ROMIO distribution configuration script])])
AC_DEFINE_UNQUOTED([MCA_io_romio_USER_CONFIGURE_FLAGS], ["$with_io_romio_flags"], [Set of user-defined configure flags given to ROMIOs configure script via --with-io-romio-flags])
AC_MSG_CHECKING([if want ROMIO component])
AS_IF([test "$enable_io_romio" = "no"],
[AC_MSG_RESULT([no])
@ -73,7 +76,10 @@ AC_DEFUN([MCA_io_romio_CONFIG],[
[AS_IF([test ! -z $build], [io_romio_flags="$io_romio_flags --build=$build"])
AS_IF([test ! -z $host], [io_romio_flags="$io_romio_flags --host=$host"])
AS_IF([test ! -z $target], [io_romio_flags="$io_romio_flags --target=$target"])])
io_romio_flags="$io_romio_flags CFLAGS="'"'"$CFLAGS"'"'" CPPFLAGS="'"'"$CPPFLAGS"'"'" FFLAGS="'"'"$FFLAGS"'"'" LDFLAGS="'"'"$LDFLAGS"'"'" --$io_romio_shared-shared --$io_romio_static-static $io_romio_flags $io_romio_prefix_arg --with-mpi=open_mpi"
io_romio_flags_define="$io_romio_flags CFLAGS='$CFLAGS' CPPFLAGS='$CPPFLAGS' FFLAGS='$FFLAGS' LDFLAGS='$LDFLAGS' --$io_romio_shared-shared --$io_romio_static-static $io_romio_flags $io_romio_prefix_arg --with-mpi=open_mpi --disable-aio"
AC_DEFINE_UNQUOTED([MCA_io_romio_COMPLETE_CONFIGURE_FLAGS], ["$io_romio_flags_define"], [Complete set of command line arguments given to ROMIOs configure script])
io_romio_flags="$io_romio_flags CFLAGS="'"'"$CFLAGS"'"'" CPPFLAGS="'"'"$CPPFLAGS"'"'" FFLAGS="'"'"$FFLAGS"'"'" LDFLAGS="'"'"$LDFLAGS"'"'" --$io_romio_shared-shared --$io_romio_static-static $io_romio_flags $io_romio_prefix_arg --with-mpi=open_mpi --disable-aio"
ompi_show_subtitle "Configuring ROMIO distribution"
OMPI_CONFIG_SUBDIR([ompi/mca/io/romio/romio],
@ -98,4 +104,5 @@ AC_DEFUN([MCA_io_romio_CONFIG],[
[AC_MSG_ERROR([ROMIO distribution did not configure successfully])],
[AC_MSG_WARN([ROMIO distribution did not configure successfully])])
$2])])])
OMPI_VAR_SCOPE_POP
])

Просмотреть файл

@ -9,6 +9,7 @@
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2008 Cisco Systems Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -18,7 +19,9 @@
include $(top_srcdir)/Makefile.options
SUBDIRS = include adio mpi-io
# Left out common/dataloop -- it's not enabled in MPICH2-1.0.7.
SUBDIRS = include adio mpi-io
DIST_SUBDIRS = $(SUBDIRS) doc util
EXTRA_DIST = README COPYRIGHT README_OMPI

Просмотреть файл

@ -1,6 +1,6 @@
ROMIO: A High-Performance, Portable MPI-IO Implementation
Version 2005-06-09
Version 2008-03-09
Major Changes in this version:
------------------------------

Просмотреть файл

@ -9,6 +9,7 @@
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -20,6 +21,22 @@ include $(top_srcdir)/Makefile.options
# Conditionals whether to build each subdir or not
if BUILD_BGL
BGL_DIR = ad_bgl
BGL_LIB = ad_bgl/libadio_bgl.la
else
BGL_DIR =
BGL_LIB =
endif
if BUILD_BGLOCKLESS
BGLOCKLESS_DIR = ad_bglockless
BGLOCKLESS_LIB = ad_bglockless/libadio_bglockless.la
else
BGLOCKLESS_DIR =
BGLOCKLESS_LIB =
endif
if BUILD_GRIDFTP
GRIDFTP_DIR = ad_gridftp
GRIDFTP_LIB = ad_gridftp/libadio_gridftp.la
@ -28,12 +45,21 @@ GRIDFTP_DIR =
GRIDFTP_LIB =
endif
if BUILD_HFS
HFS_DIR = ad_hfs
HFS_LIB = ad_hfs/libadio_hfs.la
# 8 July 2008: romio-maint@mcs.anl.gov says that this is deprecated
#if BUILD_HFS
#HFS_DIR = ad_hfs
#HFS_LIB = ad_hfs/libadio_hfs.la
#else
#HFS_DIR =
#HFS_LIB =
#endif
if BUILD_LUSTRE
LUSTRE_DIR = ad_lustre
LUSTRE_LIB = ad_lustre/libadio_lustre.la
else
HFS_DIR =
HFS_LIB =
LUSTRE_DIR =
LUSTRE_LIB =
endif
if BUILD_NFS
@ -69,13 +95,14 @@ PFS_DIR =
PFS_LIB =
endif
if BUILD_PIOFS
PIOFS_DIR = ad_piofs
PIOFS_LIB = ad_piofs/libadio_piofs.la
else
PIOFS_DIR =
PIOFS_LIB =
endif
# 8 July 2008: romio-maint@mcs.anl.gov says that this is deprecated
#if BUILD_PIOFS
#PIOFS_DIR = ad_piofs
#PIOFS_LIB = ad_piofs/libadio_piofs.la
#else
#PIOFS_DIR =
#PIOFS_LIB =
#endif
if BUILD_PVFS
PVFS_DIR = ad_pvfs
@ -126,21 +153,20 @@ XFS_LIB =
endif
SUBDIRS = common include \
$(GRIDFTP_DIR) $(HFS_DIR) $(NFS_DIR) $(NTFS_DIR) $(PANFS_DIR) \
$(PFS_DIR) $(PIOFS_DIR) $(PVFS_DIR) $(PVFS2_DIR) $(SFS_DIR) \
$(BG_DIR) $(BGLOCKLESS_DIR) \
$(GRIDFTP_DIR) $(LUSTRE_DIR) $(NFS_DIR) $(NTFS_DIR) $(PANFS_DIR) \
$(PFS_DIR) $(PVFS_DIR) $(PVFS2_DIR) $(SFS_DIR) \
$(TESTFS_DIR) $(UFS_DIR) $(XFS_DIR)
DIST_SUBDIRS = common include \
ad_gridftp ad_hfs ad_nfs ad_ntfs ad_panfs ad_pfs ad_piofs ad_pvfs \
ad_pvfs2 ad_sfs ad_testfs ad_ufs ad_xfs
ad_bgl ad_bglockless ad_gridftp ad_lustre ad_nfs ad_ntfs \
ad_panfs ad_pfs ad_pvfs ad_pvfs2 ad_sfs ad_testfs ad_ufs ad_xfs
# Library
noinst_LTLIBRARIES = libadio.la
libadio_la_SOURCES =
libadio_la_LIBADD = \
common/libadio_common.la \
$(GRIDFTP_LIB) $(HFS_LIB) $(NFS_LIB) $(NTFS_LIB) $(PANFS_LIB) \
$(PFS_LIB) $(PIOFS_LIB) $(PVFS_LIB) $(PVFS2_LIB) $(SFS_LIB) \
$(BG_LIB) $(BGLOCKLESS_LIB) \
$(GRIDFTP_LIB) $(LUSTRE_LIB) $(NFS_LIB) $(NTFS_LIB) $(PANFS_LIB) \
$(PFS_LIB) $(PVFS_LIB) $(PVFS2_LIB) $(SFS_LIB) \
$(TESTFS_LIB) $(UFS_LIB) $(XFS_LIB)
libadio_la_DEPENDENCIES = \
$(libadio_la_LIBADD)

Просмотреть файл

@ -0,0 +1,58 @@
<dir>
<file name="ad_bgl_getsh.c" info="1205188711"/>
<file name="ad_bgl_fcntl.c" info="1205188711"/>
<file name="ad_bgl_tuning.c" info="1205188711"/>
<file name="ad_bgl_pset.h" info="1205188711"/>
<file name="ad_bgl_aggrs.c" info="1205188711"/>
<file name="ad_bgl_wrcoll.c" info="1205188711"/>
<file name="ad_bgl_aggrs.h" info="1205188711"/>
<file name="ad_bgl_pset.c" info="1205188711"/>
<file name="ad_bgl_setsh.c" info="1205188711"/>
<file name="ad_bgl_close.c" info="1206398065"/>
<file name="ad_bgl.h" info="1205188711"/>
<file name="ad_bgl_read.c" info="1205188711"/>
<file name="ad_bgl_rdcoll.c" info="1205188711"/>
<file name="ad_bgl_open.c" info="1205188711"/>
<file name="ad_bgl_tuning.h" info="1205188711"/>
<file name="ad_bgl_write.c" info="1205188711"/>
<file name="ad_bgl_hints.c" info="1205188711"/>
<file name="ad_bgl.c" info="1205188711"/>
</dir>
<data>
<fileinfo name="ad_bgl_getsh.c">
</fileinfo>
<fileinfo name="ad_bgl_fcntl.c">
</fileinfo>
<fileinfo name="ad_bgl_tuning.c">
</fileinfo>
<fileinfo name="ad_bgl_pset.h">
</fileinfo>
<fileinfo name="ad_bgl_aggrs.c">
</fileinfo>
<fileinfo name="ad_bgl_wrcoll.c">
</fileinfo>
<fileinfo name="ad_bgl_aggrs.h">
</fileinfo>
<fileinfo name="ad_bgl_pset.c">
</fileinfo>
<fileinfo name="ad_bgl_setsh.c">
</fileinfo>
<fileinfo name="ad_bgl_close.c">
</fileinfo>
<fileinfo name="ad_bgl.h">
</fileinfo>
<fileinfo name="ad_bgl_read.c">
</fileinfo>
<fileinfo name="ad_bgl_rdcoll.c">
</fileinfo>
<fileinfo name="ad_bgl_open.c">
</fileinfo>
<fileinfo name="ad_bgl_tuning.h">
</fileinfo>
<fileinfo name="ad_bgl_write.c">
</fileinfo>
<fileinfo name="ad_bgl_hints.c">
</fileinfo>
<fileinfo name="ad_bgl.c">
</fileinfo>
</data>

Просмотреть файл

@ -0,0 +1,47 @@
CC = @CC@
AR = @AR@
LIBNAME = @LIBNAME@
srcdir = @srcdir@
CC_SHL = @CC_SHL@
SHLIBNAME = @SHLIBNAME@
INCLUDE_DIR = -I@MPI_INCLUDE_DIR@ -I${srcdir}/../include -I../include -I../../include -I${srcdir}/../../../../include -I../../../../include @CPPFLAGS@
CFLAGS = @CFLAGS@ $(INCLUDE_DIR) -DBGL_OPTIM_STEP1_2=1 -DBGL_OPTIM_STEP1_1=1
C_COMPILE_SHL = $(CC_SHL) @CFLAGS@ $(INCLUDE_DIR)
@VPATH@
AD_BGL_OBJECTS = \
ad_bgl_open.o ad_bgl_close.o \
ad_bgl_fcntl.o \
ad_bgl_read.o ad_bgl_write.o ad_bgl_getsh.o ad_bgl_setsh.o \
ad_bgl.o ad_bgl_aggrs.o ad_bgl_pset.o ad_bgl_hints.o \
ad_bgl_rdcoll.o ad_bgl_wrcoll.o ad_bgl_tuning.o
default: $(LIBNAME)
@if [ "@ENABLE_SHLIB@" != "none" ] ; then \
$(MAKE) $(SHLIBNAME).la ;\
fi
.SUFFIXES: $(SUFFIXES) .p .lo
.c.o:
$(CC) $(CFLAGS) -c $<
.c.lo:
$(C_COMPILE_SHL) -c $<
@mv -f $*.o $*.lo
$(LIBNAME): $(AD_BGL_OBJECTS)
$(AR) $(LIBNAME) $(AD_BGL_OBJECTS)
AD_BGL_LOOBJECTS=$(AD_BGL_OBJECTS:.o=.lo)
$(SHLIBNAME).la: $(AD_BGL_LOOBJECTS)
$(AR) $(SHLIBNAME).la $(AD_BGL_LOOBJECTS)
coverage:
-@for file in ${AD_BGL_OBJECTS:.o=.c} ; do \
gcov -b -f $$file ; done
clean:
@rm -f *.o *.lo

Просмотреть файл

@ -0,0 +1,57 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/**
* \file ad_bgl.c
* \brief ???
*/
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 2001 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#include "ad_bgl.h"
/* adioi.h has the ADIOI_Fns_struct define */
#include "adioi.h"
struct ADIOI_Fns_struct ADIO_BGL_operations = {
ADIOI_BGL_Open, /* Open */
ADIOI_BGL_ReadContig, /* ReadContig */
ADIOI_BGL_WriteContig, /* WriteContig */
#if BGL_OPTIM_STEP1_2
ADIOI_BGL_ReadStridedColl, /* ReadStridedColl */
ADIOI_BGL_WriteStridedColl, /* WriteStridedColl */
#else
ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
ADIOI_GEN_WriteStridedColl, /* WriteStridedColl */
#endif
ADIOI_GEN_SeekIndividual, /* SeekIndividual */
ADIOI_BGL_Fcntl, /* Fcntl */
#if BGL_OPTIM_STEP1_1
ADIOI_BGL_SetInfo, /* SetInfo */
#else
ADIOI_GEN_SetInfo, /* SetInfo */
#endif
ADIOI_BGL_ReadStrided, /* ReadStrided */
ADIOI_BGL_WriteStrided, /* WriteStrided */
ADIOI_BGL_Close, /* Close */
#ifdef ROMIO_HAVE_WORKING_AIO
#warning Consider BG support for NFS before enabling this.
ADIOI_GEN_IreadContig, /* IreadContig */
ADIOI_GEN_IwriteContig, /* IwriteContig */
#else
ADIOI_FAKE_IreadContig, /* IreadContig */
ADIOI_FAKE_IwriteContig, /* IwriteContig */
#endif
ADIOI_GEN_IODone, /* ReadDone */
ADIOI_GEN_IODone, /* WriteDone */
ADIOI_GEN_IOComplete, /* ReadComplete */
ADIOI_GEN_IOComplete, /* WriteComplete */
ADIOI_GEN_IreadStrided, /* IreadStrided */
ADIOI_GEN_IwriteStrided, /* IwriteStrided */
ADIOI_GEN_Flush, /* Flush */
ADIOI_GEN_Resize, /* Resize */
ADIOI_GEN_Delete, /* Delete */
};

Просмотреть файл

@ -0,0 +1,94 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bgl.h
* \brief ???
*/
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#ifndef AD_BGL_INCLUDE
#define AD_BGL_INCLUDE
#include <unistd.h>
#include <stdlib.h>
#include <sys/types.h>
#include <fcntl.h>
#include "adio.h"
#ifdef HAVE_SIGNAL_H
#include <signal.h>
#endif
#ifdef HAVE_AIO_H
#include <aio.h>
#endif
int ADIOI_BGL_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
int wr, void *handle);
void ADIOI_BGL_Open(ADIO_File fd, int *error_code);
void ADIOI_BGL_Close(ADIO_File fd, int *error_code);
void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
*error_code);
void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
*error_code);
#if 0
void ADIOI_BGL_IwriteContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Request *request, int
*error_code);
void ADIOI_BGL_IreadContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Request *request, int
*error_code);
int ADIOI_BGL_ReadDone(ADIO_Request *request, ADIO_Status *status, int
*error_code);
int ADIOI_BGL_WriteDone(ADIO_Request *request, ADIO_Status *status, int
*error_code);
void ADIOI_BGL_ReadComplete(ADIO_Request *request, ADIO_Status *status, int
*error_code);
void ADIOI_BGL_WriteComplete(ADIO_Request *request, ADIO_Status *status,
int *error_code);
#endif
void ADIOI_BGL_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int
*error_code);
void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code);
void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
*error_code);
void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
*error_code);
void ADIOI_BGL_ReadStridedColl(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
*error_code);
void ADIOI_BGL_WriteStridedColl(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
*error_code);
void ADIOI_BGL_Get_shared_fp(ADIO_File fd, int size, ADIO_Offset *shared_fp, int *error_code);
void ADIOI_BGL_Set_shared_fp(ADIO_File fd, ADIO_Offset offset, int *error_code);
#include "ad_bgl_tuning.h"
#endif

Просмотреть файл

@ -0,0 +1,970 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/**
* \file ad_bgl_aggrs.c
* \brief The externally used function from this file is is declared in ad_bgl_aggrs.h
*/
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#include "adio.h"
#include "adio_cb_config_list.h"
#include "ad_bgl.h"
#include "ad_bgl_pset.h"
#include "ad_bgl_aggrs.h"
int aggrsInPsetSize=0;
int *aggrsInPset=NULL;
/* forward declaration */
static void
ADIOI_BGL_compute_agg_ranklist_serial ( ADIO_File fd,
const ADIOI_BGL_ConfInfo_t *confInfo,
ADIOI_BGL_ProcInfo_t *all_procInfo,
int *aggrsInPset );
/*
* Compute the aggregator-related parameters that are required in 2-phase collective IO of ADIO.
* The parameters are
* . the number of aggregators (proxies) : fd->hints->cb_nodes
* . the ranks of the aggregators : fd->hints->ranklist
* By compute these two parameters in a BGL-PSET-aware way, the default 2-phase collective IO of
* ADIO can work more efficiently.
*/
int
ADIOI_BGL_gen_agg_ranklist(ADIO_File fd, int n_aggrs_per_pset)
{
int r, s;
ADIOI_BGL_ProcInfo_t *procInfo, *all_procInfo;
ADIOI_BGL_ConfInfo_t *confInfo;
MPI_Comm_size( fd->comm, &s );
MPI_Comm_rank( fd->comm, &r );
/* Collect individual BGL personality information */
confInfo = ADIOI_BGL_ConfInfo_new ();
procInfo = ADIOI_BGL_ProcInfo_new ();
ADIOI_BGL_persInfo_init( confInfo, procInfo, s, r, n_aggrs_per_pset );
/* Gather BGL personality infomation onto process 0 */
// if (r == 0)
all_procInfo = ADIOI_BGL_ProcInfo_new_n (s);
if(s > aggrsInPsetSize)
{
if(aggrsInPset) ADIOI_Free(aggrsInPset);
aggrsInPset = (int *) ADIOI_Malloc (s *sizeof(int));
aggrsInPsetSize = s;
}
MPI_Gather( (void *)procInfo, sizeof(ADIOI_BGL_ProcInfo_t), MPI_BYTE,
(void *)all_procInfo, sizeof(ADIOI_BGL_ProcInfo_t), MPI_BYTE,
0,
fd->comm );
/* Compute a list of the ranks of chosen IO proxy CN on process 0 */
if (r == 0) {
ADIOI_BGL_compute_agg_ranklist_serial (fd, confInfo, all_procInfo, aggrsInPset);
// ADIOI_BGL_ProcInfo_free (all_procInfo);
}
ADIOI_BGL_ProcInfo_free (all_procInfo);
/* Send the info of IO proxy CN to all processes and keep the info in fd->hints struct.
Declared in adio_cb_config_list.h */
ADIOI_cb_bcast_rank_map(fd);
/* Broadcast the BGL-GPFS related file domain info */
MPI_Bcast( (void *)aggrsInPset,
fd->hints->cb_nodes * sizeof(int), MPI_BYTE,
0,
fd->comm );
ADIOI_BGL_persInfo_free( confInfo, procInfo );
return 0;
}
/*
* the purpose of abstracting out this routine is to make it easy for trying different proxy-selection criteria.
*/
static int
ADIOI_BGL_select_agg_in_pset (const ADIOI_BGL_ConfInfo_t *confInfo,
ADIOI_BGL_ProcInfo_t *pset_procInfo,
int nCN_in_pset,
int *tmp_ranklist)
{
/* first implementation, based on their rank order. */
int i, j, k;
/* The number of aggregators in the PSET is proportional to the CNs in the PSET */
int nAggrs = nCN_in_pset * confInfo->aggRatio;
if (nAggrs < ADIOI_BGL_NAGG_PSET_MIN) nAggrs = ADIOI_BGL_NAGG_PSET_MIN;
/* for not virtual-node-mode, pick aggregators in this PSET based on the order of the global rank */
if (!confInfo->isVNM)
{
for (i=0; i<nAggrs; i++) tmp_ranklist[i] = pset_procInfo[i].rank;
}
/* for virtual-node-mode, first pick aggregators among CPU-0 */
else
{
/* Try to pick from CPU-0 first, then CPU-1, then ... CPU-n */
j = 0;
for (k=0; k < confInfo->cpuidSize; k++){
for (i=0; i< nCN_in_pset ; i++) {
if (pset_procInfo[i].cpuid == k)
tmp_ranklist[j++] = pset_procInfo[i].rank;
if ( j >= nAggrs) break;
}
if ( j >= nAggrs) break;
}
}
return nAggrs;
}
/*
* Pick IO aggregators based on the under PSET organization and stores the ranks of the proxy CNs in tmp_ranklist.
* The first order of tmp_ranklist is : PSET number
* The secondary order of the list is determined in ADIOI_BGL_select_agg_in_pset() and thus adjustable.
*/
static int
ADIOI_BGL_compute_agg_ranklist_serial_do (const ADIOI_BGL_ConfInfo_t *confInfo,
ADIOI_BGL_ProcInfo_t *all_procInfo,
int *aggrsInPset,
int *tmp_ranklist)
{
int i, j;
/* a list of the numbers of all the PSETS */
int *psetNumList = (int *) ADIOI_Malloc ( confInfo->nProcs * sizeof(int) );
/* sweep through all processes' records, collect the numbers of all the PSETS.
* The reason for not doing MIN, MAX is that the owned PSETs may not have contiguous numbers */
int n_psets=0;
for (i=0; i<confInfo->nProcs; i++) {
ADIOI_BGL_ProcInfo_t *info_p = all_procInfo+i;
int exist = 0;
for (j=n_psets-1; j>=0; j--)
if (info_p->psetNum == psetNumList[j]) { exist=1; break; }
if (!exist) {
psetNumList [n_psets] = info_p->psetNum;
n_psets ++;
}
}
/* bucket sort: put the CN nodes into ordered buckets, each of which represents a PSET */
/* bucket space for bucket sort */
ADIOI_BGL_ProcInfo_t *sorted_procInfo = ADIOI_BGL_ProcInfo_new_n ( n_psets * confInfo->virtualPsetSize );
int *PsetIdx = (int *) ADIOI_Malloc ( n_psets * sizeof(int) );
AD_BGL_assert ( (PsetIdx != NULL) );
/* initialize bucket pointer */
for (i=0; i<n_psets; i++) {
PsetIdx[i] = i*confInfo->virtualPsetSize;
}
/* sort */
for (i=0; i<confInfo->nProcs; i++) {
int pset_id = all_procInfo[i].psetNum;
for (j=n_psets-1; j>=0; j--) if (pset_id == psetNumList[j]) break;
AD_BGL_assert ( (j >= 0) ); /* got to find a PSET bucket */
sorted_procInfo[ PsetIdx[j] ++ ] = all_procInfo[i];
}
ADIOI_Free(psetNumList);
/* select a number of CN aggregators from each Pset */
int naggs = 0;
for (i=0; i<n_psets; i++) {
/* the number of CN in this PSET -- may not be a full PSET */
int nCN_in_pset = PsetIdx[i] - i*confInfo->virtualPsetSize;
/* select aggregators and put them into tmp_ranklist contiguously. */
int local_naggs = ADIOI_BGL_select_agg_in_pset( confInfo,
sorted_procInfo + i*confInfo->virtualPsetSize,
nCN_in_pset,
tmp_ranklist + naggs);
aggrsInPset[i+1] = local_naggs;
naggs += local_naggs;
}
aggrsInPset[0] = n_psets;
/* leave */
ADIOI_Free ( PsetIdx );
ADIOI_BGL_ProcInfo_free ( sorted_procInfo );
return naggs;
}
/*
* compute aggregators ranklist and put it into fd->hints struct
*/
static void
ADIOI_BGL_compute_agg_ranklist_serial ( ADIO_File fd,
const ADIOI_BGL_ConfInfo_t *confInfo,
ADIOI_BGL_ProcInfo_t *all_procInfo,
int *aggrsInPset )
{
# define DEBUG 0
# if DEBUG
int i;
# endif
int naggs;
int *tmp_ranklist;
/* compute the ranklist of IO aggregators and put into tmp_ranklist */
tmp_ranklist = (int *) ADIOI_Malloc (confInfo->nProcs * sizeof(int));
# if DEBUG
for (i=0; i<confInfo->nProcs; i++)
printf( "\tcpuid %1d, rank = %6d\n", all_procInfo[i].cpuid, all_procInfo[i].rank );
# endif
naggs =
ADIOI_BGL_compute_agg_ranklist_serial_do (confInfo, all_procInfo, aggrsInPset, tmp_ranklist);
# define VERIFY 0
# if VERIFY
printf( "\tconfInfo = %3d,%3d,%3d,%3d,%3d,%3d,%.4f; naggs = %d\n",
confInfo->PsetSize ,
confInfo->numPsets ,
confInfo->isVNM ,
confInfo->virtualPsetSize ,
confInfo->nProcs ,
confInfo->nAggrs ,
confInfo->aggRatio ,
naggs );
# endif
# if DEBUG
for (i=0; i<naggs; i++)
printf( "\taggr %-4d = %6d\n", i, tmp_ranklist[i] );
# endif
/* copy the ranklist of IO aggregators to fd->hints */
if(fd->hints->ranklist != NULL) ADIOI_Free (fd->hints->ranklist);
fd->hints->cb_nodes = naggs;
fd->hints->ranklist = (int *) ADIOI_Malloc (naggs * sizeof(int));
memcpy( fd->hints->ranklist, tmp_ranklist, naggs*sizeof(int) );
/* */
ADIOI_Free( tmp_ranklist );
return;
}
/*
* Compute a dynamic access range based file domain partition among I/O aggregators,
* which align to the GPFS block size
* Divide the I/O workload among "nprocs_for_coll" processes. This is
* done by (logically) dividing the file into file domains (FDs); each
* process may directly access only its own file domain.
* Additional effort is to make sure that each I/O aggregator get
* a file domain that aligns to the GPFS block size. So, there will
* not be any false sharing of GPFS file blocks among multiple I/O nodes.
*/
void ADIOI_BGL_GPFS_Calc_file_domains(ADIO_Offset *st_offsets,
ADIO_Offset *end_offsets,
int nprocs,
int nprocs_for_coll,
ADIO_Offset *min_st_offset_ptr,
ADIO_Offset **fd_start_ptr,
ADIO_Offset **fd_end_ptr,
ADIO_Offset *fd_size_ptr,
void *fs_ptr)
{
ADIO_Offset min_st_offset, max_end_offset, *fd_start, *fd_end, *fd_size;
int i, aggr;
static char myname[] = "ADIOI_BGL_GPFS_Calc_file_domains";
__blksize_t blksize = 1048576; /* default to 1M */
if(fs_ptr && ((ADIOI_BGL_fs*)fs_ptr)->blksize) /* ignore null ptr or 0 blksize */
blksize = ((ADIOI_BGL_fs*)fs_ptr)->blksize;
/* FPRINTF(stderr,"%s(%d): Blocksize=%ld\n",myname,__LINE__,blksize);*/
/* find the range of all the requests */
min_st_offset = st_offsets [0];
max_end_offset = end_offsets[0];
for (i=1; i<nprocs; i++) {
min_st_offset = ADIOI_MIN(min_st_offset, st_offsets[i]);
max_end_offset = ADIOI_MAX(max_end_offset, end_offsets[i]);
}
// printf( "_calc_file_domains, min_st_offset, max_ = %qd, %qd\n", min_st_offset, max_end_offset );
/* determine the "file domain (FD)" of each process, i.e., the portion of
the file that will be "owned" by each process */
ADIO_Offset gpfs_ub = (max_end_offset +blksize-1) / blksize * blksize - 1;
ADIO_Offset gpfs_lb = min_st_offset / blksize * blksize;
ADIO_Offset gpfs_ub_rdoff = (max_end_offset +blksize-1) / blksize * blksize - 1 - max_end_offset;
ADIO_Offset gpfs_lb_rdoff = min_st_offset - min_st_offset / blksize * blksize;
ADIO_Offset fd_gpfs_range = gpfs_ub - gpfs_lb + 1;
int naggs = nprocs_for_coll;
fd_size = (ADIO_Offset *) ADIOI_Malloc(nprocs_for_coll * sizeof(ADIO_Offset));
*fd_start_ptr = (ADIO_Offset *) ADIOI_Malloc(nprocs_for_coll * sizeof(ADIO_Offset));
*fd_end_ptr = (ADIO_Offset *) ADIOI_Malloc(nprocs_for_coll * sizeof(ADIO_Offset));
fd_start = *fd_start_ptr;
fd_end = *fd_end_ptr;
ADIO_Offset n_gpfs_blk = fd_gpfs_range / blksize;
ADIO_Offset nb_cn_small = n_gpfs_blk/naggs;
ADIO_Offset naggs_large = n_gpfs_blk - naggs * (n_gpfs_blk/naggs);
ADIO_Offset naggs_small = naggs - naggs_large;
for (i=0; i<naggs; i++)
if (i < naggs_small) fd_size[i] = nb_cn_small * blksize;
else fd_size[i] = (nb_cn_small+1) * blksize;
/* FPRINTF(stderr,"%s(%d): "
"gpfs_ub %llu, "
"gpfs_lb %llu, "
"gpfs_ub_rdoff %llu, "
"gpfs_lb_rdoff %llu, "
"fd_gpfs_range %llu, "
"n_gpfs_blk %llu, "
"nb_cn_small %llu, "
"naggs_large %llu, "
"naggs_small %llu, "
"\n",
myname,__LINE__,
gpfs_ub ,
gpfs_lb ,
gpfs_ub_rdoff,
gpfs_lb_rdoff,
fd_gpfs_range,
n_gpfs_blk ,
nb_cn_small ,
naggs_large ,
naggs_small
);
*/
fd_size[0] -= gpfs_lb_rdoff;
fd_size[naggs-1] -= gpfs_ub_rdoff;
/* compute the file domain for each aggr */
ADIO_Offset offset = min_st_offset;
for (aggr=0; aggr<naggs; aggr++) {
fd_start[aggr] = offset;
fd_end [aggr] = offset + fd_size[aggr] - 1;
offset += fd_size[aggr];
}
*fd_size_ptr = fd_size[0];
*min_st_offset_ptr = min_st_offset;
ADIOI_Free (fd_size);
}
/*
* deprecated
*
void ADIOI_BGL_GPFS_Calc_file_domain0(ADIO_Offset *st_offsets,
ADIO_Offset *end_offsets,
int nprocs,
int nprocs_for_coll,
ADIO_Offset *min_st_offset_ptr,
ADIO_Offset **fd_start_ptr,
ADIO_Offset **fd_end_ptr,
ADIO_Offset *fd_size_ptr)
{
ADIO_Offset min_st_offset, max_end_offset, *fd_start, *fd_end, *fd_size;
int i;
static int GPFS_BSIZE=1048576;
* find the range of all the requests *
min_st_offset = st_offsets [0];
max_end_offset = end_offsets[0];
for (i=1; i<nprocs; i++) {
min_st_offset = ADIOI_MIN(min_st_offset, st_offsets[i]);
max_end_offset = ADIOI_MAX(max_end_offset, end_offsets[i]);
}
* determine the "file domain (FD)" of each process, i.e., the portion of
the file that will be "owned" by each process *
* GPFS specific, pseudo starting/end point has to round to GPFS_BSIZE *
ADIO_Offset gpfs_ub = (max_end_offset +GPFS_BSIZE-1) / GPFS_BSIZE * GPFS_BSIZE - 1;
ADIO_Offset gpfs_lb = min_st_offset / GPFS_BSIZE * GPFS_BSIZE;
ADIO_Offset gpfs_ub_rdoff = (max_end_offset +GPFS_BSIZE-1) / GPFS_BSIZE * GPFS_BSIZE - 1 - max_end_offset;
ADIO_Offset gpfs_lb_rdoff = min_st_offset - min_st_offset / GPFS_BSIZE * GPFS_BSIZE;
ADIO_Offset fd_gpfs_range = gpfs_ub - gpfs_lb + 1;
* all computation of partition is based on the rounded pseudo-range *
ADIO_Offset fds_ub = (fd_gpfs_range +nprocs_for_coll-1) / nprocs_for_coll;
ADIO_Offset fds_lb = fd_gpfs_range / nprocs_for_coll;
int naggs = nprocs_for_coll;
int npsets = aggrsInPset[0]; * special meaning for element 0 *
fd_size = (ADIO_Offset *) ADIOI_Malloc(naggs * sizeof(ADIO_Offset));
*fd_start_ptr = (ADIO_Offset *) ADIOI_Malloc(naggs * sizeof(ADIO_Offset));
*fd_end_ptr = (ADIO_Offset *) ADIOI_Malloc(naggs * sizeof(ADIO_Offset));
fd_start = *fd_start_ptr;
fd_end = *fd_end_ptr;
* some pre-computation to determine rough ratio of when to up-fit, when to low-fit *
* 1. get the estimated data per pset *
* 2. determine a factor between up and down *
int avg_aggrsInPset = (naggs +npsets-1)/npsets;
ADIO_Offset avg_bytes_perPset = fd_gpfs_range / npsets;
ADIO_Offset resid = avg_bytes_perPset % GPFS_BSIZE;
ADIO_Offset downr = GPFS_BSIZE - resid;
int small = (resid < downr);
int ratio = downr == 0 ? npsets + 2 : (resid +downr-1)/downr;
if (small) ratio = resid == 0 ? npsets + 2 : (downr +resid-1)/resid;
* go through aggrsInfo of all PSETs *
ADIO_Offset fd_range = fd_gpfs_range;
int aggr = 0, pset;
for (pset=0; pset<npsets; pset++) {
ADIO_Offset fds_try = fds_lb;
int my_naggs = aggrsInPset[pset+1];
ADIO_Offset fds_pset;
* Last pset will deal with the residuals *
if (pset == npsets-1)
fds_pset = fd_range;
else
{
int cond1 = ((pset+1) % ratio == 0);
int cond2 = ((pset+1) % ratio != 0);
if (small) {
int temp = cond1; cond1 = cond2; cond2 = temp;
}
if (cond1) {
fds_pset = fds_try * my_naggs;
if (fds_pset % GPFS_BSIZE) // align to GPFS_BSIZE
fds_pset = ((fds_pset +GPFS_BSIZE-1)/GPFS_BSIZE) * GPFS_BSIZE;
}
if (cond2)
{
fds_try = fds_ub;
fds_pset = fds_try * my_naggs;
if (fds_pset % GPFS_BSIZE) // align to GPFS_BSIZE
fds_pset = (fds_pset / GPFS_BSIZE) * GPFS_BSIZE;
}
}
* for aggrs in each PSET, divide evenly the data range *
#define CN_ALIGN 1
#if !CN_ALIGN
fd_range -= fds_pset;
if ( pset == 0 ) fds_pset -= gpfs_lb_rdoff;
if ( pset == npsets-1 ) fds_pset -= gpfs_ub_rdoff;
int p;
for (p=0; p<my_naggs; p++) {
fd_size[aggr] = (fds_pset +my_naggs-1) / my_naggs;
if (p== my_naggs-1)
fd_size[aggr] -= (fd_size[aggr]*my_naggs - fds_pset);
aggr++;
}
#else
ADIO_Offset avg_bytes_perP = fds_pset / my_naggs;
ADIO_Offset resid2 = avg_bytes_perP % GPFS_BSIZE;
ADIO_Offset downr2 = GPFS_BSIZE - resid2;
int small2 = (resid2 < downr2);
int ratio2 = downr2 == 0 ? my_naggs + 2 : (resid2 +downr2-1)/downr2;
if (small2) ratio2 = resid2 == 0 ? my_naggs + 2 : (downr2 +resid2-1)/resid2;
ADIO_Offset accu = 0;
int p;
for (p=0; p<my_naggs; p++) {
int cond1 = ((p+1) % ratio2 == 0);
int cond2 = ((p+1) % ratio2 != 0);
if (small2) {
int temp = cond1; cond1 = cond2; cond2 = temp;
}
fd_size[aggr] = avg_bytes_perP;
if (cond2) fd_size[aggr] = ((fd_size[aggr] +GPFS_BSIZE-1)/GPFS_BSIZE) * GPFS_BSIZE;
if (cond1) fd_size[aggr] = ((fd_size[aggr] )/GPFS_BSIZE) * GPFS_BSIZE;
if (p== my_naggs-1)
fd_size[aggr] = (fds_pset - accu);
accu += fd_size[aggr];
fd_range -= fd_size[aggr];
aggr++;
}
#endif
}
* after scheduling, the first and the last region has to remove the round-off effect *
#if CN_ALIGN
fd_size[0] -= gpfs_lb_rdoff;
fd_size[naggs-1] -= gpfs_ub_rdoff;
#endif
* compute the file domain for each aggr *
ADIO_Offset offset = min_st_offset;
for (aggr=0; aggr<naggs; aggr++) {
fd_start[aggr] = offset;
fd_end [aggr] = offset + fd_size[aggr] - 1;
offset += fd_size[aggr];
}
*
printf( "\t%6d : %12qd:%12qd, %12qd:%12qd:%12qd, %12qd:%12qd:%12qd\n",
naggs,
min_st_offset,
max_end_offset,
fd_start[0],
fd_end [0],
fd_size [0],
fd_start[naggs-1],
fd_end [naggs-1],
fd_size [naggs-1] );
*
*fd_size_ptr = fd_size[0];
*min_st_offset_ptr = min_st_offset;
ADIOI_Free (fd_size);
}
*/
/*
* When a process is an IO aggregator, this will return its index in the aggrs list.
* Otherwise, this will return -1
*/
int ADIOI_BGL_Aggrs_index( ADIO_File fd, int myrank )
{
int i;
for (i=0; i<fd->hints->cb_nodes; i++)
if (fd->hints->ranklist[i] == myrank) return i;
return -1;
}
/*
* This is more general aggregator search function which does not base on the assumption
* that each aggregator hosts the file domain with the same size
*/
int ADIOI_BGL_Calc_aggregator(ADIO_File fd,
ADIO_Offset off,
ADIO_Offset min_off,
ADIO_Offset *len,
ADIO_Offset fd_size,
ADIO_Offset *fd_start,
ADIO_Offset *fd_end)
{
int rank_index, rank;
ADIO_Offset avail_bytes;
AD_BGL_assert ( (off <= fd_end[fd->hints->cb_nodes-1] && off >= min_off && fd_start[0] >= min_off ) );
/* binary search --> rank_index is returned */
int ub = fd->hints->cb_nodes;
int lb = 0;
rank_index = fd->hints->cb_nodes / 2;
while ( off < fd_start[rank_index] || off > fd_end[rank_index] ) {
if ( off > fd_end [rank_index] ) {
lb = rank_index;
rank_index = (rank_index + ub) / 2;
}
else
if ( off < fd_start[rank_index] ) {
ub = rank_index;
rank_index = (rank_index + lb) / 2;
}
}
// printf ("ADIOI_BGL_Calc_aggregator: rank_index = %d\n", rank_index );
/*
* remember here that even in Rajeev's original code it was the case that
* different aggregators could end up with different amounts of data to
* aggregate. here we use fd_end[] to make sure that we know how much
* data this aggregator is working with.
*
* the +1 is to take into account the end vs. length issue.
*/
avail_bytes = fd_end[rank_index] + 1 - off;
if (avail_bytes < *len && avail_bytes > 0) {
/* this file domain only has part of the requested contig. region */
*len = avail_bytes;
}
/* map our index to a rank */
/* NOTE: FOR NOW WE DON'T HAVE A MAPPING...JUST DO 0..NPROCS_FOR_COLL */
rank = fd->hints->ranklist[rank_index];
return rank;
}
/*
* ADIOI_BGL_Calc_my_req() overrides ADIOI_Calc_my_req for the default implementation
* is specific for static file domain partitioning.
*
* ADIOI_Calc_my_req() calculate what portions of the access requests
* of this process are located in the file domains of various processes
* (including this one)
*/
void ADIOI_BGL_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, int *len_list,
int contig_access_count, ADIO_Offset
min_st_offset, ADIO_Offset *fd_start,
ADIO_Offset *fd_end, ADIO_Offset fd_size,
int nprocs,
int *count_my_req_procs_ptr,
int **count_my_req_per_proc_ptr,
ADIOI_Access **my_req_ptr,
int **buf_idx_ptr)
{
int *count_my_req_per_proc, count_my_req_procs, *buf_idx;
int i, l, proc;
ADIO_Offset fd_len, rem_len, curr_idx, off;
ADIOI_Access *my_req;
*count_my_req_per_proc_ptr = (int *) ADIOI_Calloc(nprocs,sizeof(int));
count_my_req_per_proc = *count_my_req_per_proc_ptr;
/* count_my_req_per_proc[i] gives the no. of contig. requests of this
process in process i's file domain. calloc initializes to zero.
I'm allocating memory of size nprocs, so that I can do an
MPI_Alltoall later on.*/
buf_idx = (int *) ADIOI_Malloc(nprocs*sizeof(int));
/* buf_idx is relevant only if buftype_is_contig.
buf_idx[i] gives the index into user_buf where data received
from proc. i should be placed. This allows receives to be done
without extra buffer. This can't be done if buftype is not contig. */
/* initialize buf_idx to -1 */
for (i=0; i < nprocs; i++) buf_idx[i] = -1;
/* one pass just to calculate how much space to allocate for my_req;
* contig_access_count was calculated way back in ADIOI_Calc_my_off_len()
*/
for (i=0; i < contig_access_count; i++) {
/* When there is no data being processed, bypass this loop */
if (len_list[i] == 0) continue;
off = offset_list[i];
fd_len = len_list[i];
/* note: we set fd_len to be the total size of the access. then
* ADIOI_Calc_aggregator() will modify the value to return the
* amount that was available from the file domain that holds the
* first part of the access.
*/
proc = ADIOI_BGL_Calc_aggregator(fd, off, min_st_offset, &fd_len, fd_size,
fd_start, fd_end);
count_my_req_per_proc[proc]++;
/* figure out how much data is remaining in the access (i.e. wasn't
* part of the file domain that had the starting byte); we'll take
* care of this data (if there is any) in the while loop below.
*/
rem_len = len_list[i] - fd_len;
while (rem_len > 0) {
off += fd_len; /* point to first remaining byte */
fd_len = rem_len; /* save remaining size, pass to calc */
proc = ADIOI_BGL_Calc_aggregator(fd, off, min_st_offset, &fd_len,
fd_size, fd_start, fd_end);
count_my_req_per_proc[proc]++;
rem_len -= fd_len; /* reduce remaining length by amount from fd */
}
}
/* now allocate space for my_req, offset, and len */
*my_req_ptr = (ADIOI_Access *)
ADIOI_Malloc(nprocs*sizeof(ADIOI_Access));
my_req = *my_req_ptr;
count_my_req_procs = 0;
for (i=0; i < nprocs; i++) {
if (count_my_req_per_proc[i]) {
my_req[i].offsets = (ADIO_Offset *)
ADIOI_Malloc(count_my_req_per_proc[i] * sizeof(ADIO_Offset));
my_req[i].lens = (int *)
ADIOI_Malloc(count_my_req_per_proc[i] * sizeof(int));
count_my_req_procs++;
}
my_req[i].count = 0; /* will be incremented where needed
later */
}
/* now fill in my_req */
curr_idx = 0;
for (i=0; i<contig_access_count; i++) {
/* When there is no data being processed, bypass this loop */
if (len_list[i] == 0) continue;
off = offset_list[i];
fd_len = len_list[i];
proc = ADIOI_BGL_Calc_aggregator(fd, off, min_st_offset, &fd_len, fd_size,
fd_start, fd_end);
/* for each separate contiguous access from this process */
if (buf_idx[proc] == -1) buf_idx[proc] = (int) curr_idx;
l = my_req[proc].count;
curr_idx += (int) fd_len; /* NOTE: Why is curr_idx an int? Fix? */
rem_len = len_list[i] - fd_len;
/* store the proc, offset, and len information in an array
* of structures, my_req. Each structure contains the
* offsets and lengths located in that process's FD,
* and the associated count.
*/
my_req[proc].offsets[l] = off;
my_req[proc].lens[l] = (int) fd_len;
my_req[proc].count++;
while (rem_len > 0) {
off += fd_len;
fd_len = rem_len;
proc = ADIOI_BGL_Calc_aggregator(fd, off, min_st_offset, &fd_len,
fd_size, fd_start, fd_end);
if (buf_idx[proc] == -1) buf_idx[proc] = (int) curr_idx;
l = my_req[proc].count;
curr_idx += fd_len;
rem_len -= fd_len;
my_req[proc].offsets[l] = off;
my_req[proc].lens[l] = (int) fd_len;
my_req[proc].count++;
}
}
#ifdef AGG_DEBUG
for (i=0; i<nprocs; i++) {
if (count_my_req_per_proc[i] > 0) {
FPRINTF(stdout, "data needed from %d (count = %d):\n", i,
my_req[i].count);
for (l=0; l < my_req[i].count; l++) {
FPRINTF(stdout, " off[%d] = %Ld, len[%d] = %d\n", l,
my_req[i].offsets[l], l, my_req[i].lens[l]);
}
}
}
#if 0
for (i=0; i<nprocs; i++) {
FPRINTF(stdout, "buf_idx[%d] = 0x%x\n", i, buf_idx[i]);
}
#endif
#endif
*count_my_req_procs_ptr = count_my_req_procs;
*buf_idx_ptr = buf_idx;
}
/*
* ADIOI_Calc_others_req
*
* param[in] count_my_req_procs Number of processes whose file domain my
* request touches.
* param[in] count_my_req_per_proc count_my_req_per_proc[i] gives the no. of
* contig. requests of this process in
* process i's file domain.
* param[in] my_req A structure defining my request
* param[in] nprocs Number of nodes in the block
* param[in] myrank Rank of this node
* param[out] count_others_req_proc_ptr Number of processes whose requests lie in
* my process's file domain (including my
* process itself)
* param[out] others_req_ptr Array of other process' requests that lie
* in my process's file domain
*/
void ADIOI_BGL_Calc_others_req(ADIO_File fd, int count_my_req_procs,
int *count_my_req_per_proc,
ADIOI_Access *my_req,
int nprocs, int myrank,
int *count_others_req_procs_ptr,
ADIOI_Access **others_req_ptr)
{
/* determine what requests of other processes lie in this process's
file domain */
/* count_others_req_procs = number of processes whose requests lie in
this process's file domain (including this process itself)
count_others_req_per_proc[i] indicates how many separate contiguous
requests of proc. i lie in this process's file domain. */
int *count_others_req_per_proc, count_others_req_procs;
int i;
ADIOI_Access *others_req;
/* Parameters for MPI_Alltoallv */
int *scounts, *sdispls, *rcounts, *rdispls;
/* Parameters for MPI_Alltoallv. These are the buffers, which
* are later computed to be the lowest address of all buffers
* to be sent/received for offsets and lengths. Initialize to
* the highest possible address which is the current minimum.
*/
void *sendBufForOffsets=(void*)0xFFFFFFFF,
*sendBufForLens =(void*)0xFFFFFFFF,
*recvBufForOffsets=(void*)0xFFFFFFFF,
*recvBufForLens =(void*)0xFFFFFFFF;
/* first find out how much to send/recv and from/to whom */
/* Send 1 int to each process. count_my_req_per_proc[i] is the number of
* requests that my process will do to the file domain owned by process[i].
* Receive 1 int from each process. count_others_req_per_proc[i] is the number of
* requests that process[i] will do to the file domain owned by my process.
*/
count_others_req_per_proc = (int *) ADIOI_Malloc(nprocs*sizeof(int));
/* cora2a1=timebase(); */
MPI_Alltoall(count_my_req_per_proc, 1, MPI_INT,
count_others_req_per_proc, 1, MPI_INT, fd->comm);
/* total_cora2a+=timebase()-cora2a1; */
/* Allocate storage for an array of other nodes' accesses of our
* node's file domain. Also allocate storage for the alltoallv
* parameters.
*/
*others_req_ptr = (ADIOI_Access *)
ADIOI_Malloc(nprocs*sizeof(ADIOI_Access));
others_req = *others_req_ptr;
scounts = ADIOI_Malloc(nprocs*sizeof(int));
sdispls = ADIOI_Malloc(nprocs*sizeof(int));
rcounts = ADIOI_Malloc(nprocs*sizeof(int));
rdispls = ADIOI_Malloc(nprocs*sizeof(int));
/* If process[i] has any requests in my file domain,
* initialize an ADIOI_Access structure that will describe each request
* from process[i]. The offsets, lengths, and buffer pointers still need
* to be obtained to complete the setting of this structure.
*/
count_others_req_procs = 0;
for (i=0; i<nprocs; i++) {
if (count_others_req_per_proc[i]) {
others_req[i].count = count_others_req_per_proc[i];
others_req[i].offsets = (ADIO_Offset *)
ADIOI_Malloc(count_others_req_per_proc[i]*sizeof(ADIO_Offset));
others_req[i].lens = (int *)
ADIOI_Malloc(count_others_req_per_proc[i]*sizeof(int));
if ( (unsigned)others_req[i].offsets < (unsigned)recvBufForOffsets )
recvBufForOffsets = others_req[i].offsets;
if ( (unsigned)others_req[i].lens < (unsigned)recvBufForLens )
recvBufForLens = others_req[i].lens;
others_req[i].mem_ptrs = (MPI_Aint *)
ADIOI_Malloc(count_others_req_per_proc[i]*sizeof(MPI_Aint));
count_others_req_procs++;
}
else
{
others_req[i].count = 0;
others_req[i].offsets = NULL;
others_req[i].lens = NULL;
}
}
/* Now send the calculated offsets and lengths to respective processes */
/************************/
/* Exchange the offsets */
/************************/
/* Determine the lowest sendBufForOffsets/Lens */
for (i=0; i<nprocs; i++)
{
if ( (my_req[i].count) &&
((unsigned)my_req[i].offsets <= (unsigned)sendBufForOffsets) )
sendBufForOffsets = my_req[i].offsets;
if ( (my_req[i].count) &&
((unsigned)my_req[i].lens <= (unsigned)sendBufForLens) )
sendBufForLens = my_req[i].lens;
}
/* Calculate the displacements from the sendBufForOffsets/Lens */
for (i=0; i<nprocs; i++)
{
// Send these offsets to process i.
scounts[i] = count_my_req_per_proc[i];
if ( scounts[i] == 0 )
sdispls[i] = 0;
else
sdispls[i] = ( (unsigned)my_req[i].offsets -
(unsigned)sendBufForOffsets ) / sizeof(ADIO_Offset);
// Receive these offsets from process i.
rcounts[i] = count_others_req_per_proc[i];
if ( rcounts[i] == 0 )
rdispls[i] = 0;
else
rdispls[i] = ( (unsigned)others_req[i].offsets -
(unsigned)recvBufForOffsets ) / sizeof(ADIO_Offset);
}
/* Exchange the offsets */
MPI_Alltoallv(sendBufForOffsets,
scounts, sdispls, ADIO_OFFSET,
recvBufForOffsets,
rcounts, rdispls, ADIO_OFFSET,
fd->comm);
/************************/
/* Exchange the lengths */
/************************/
for (i=0; i<nprocs; i++)
{
// Send these lengths to process i.
scounts[i] = count_my_req_per_proc[i];
if ( scounts[i] == 0 )
sdispls[i] = 0;
else
sdispls[i] = ( (unsigned)my_req[i].lens -
(unsigned)sendBufForLens ) / sizeof(int);
// Receive these offsets from process i.
rcounts[i] = count_others_req_per_proc[i];
if ( rcounts[i] == 0 )
rdispls[i] = 0;
else
rdispls[i] = ( (unsigned)others_req[i].lens -
(unsigned)recvBufForLens ) / sizeof(int);
}
/* Exchange the lengths */
MPI_Alltoallv(sendBufForLens,
scounts, sdispls, MPI_INT,
recvBufForLens,
rcounts, rdispls, MPI_INT,
fd->comm);
/* Clean up */
ADIOI_Free(count_others_req_per_proc);
ADIOI_Free (scounts);
ADIOI_Free (sdispls);
ADIOI_Free (rcounts);
ADIOI_Free (rdispls);
*count_others_req_procs_ptr = count_others_req_procs;
}

Просмотреть файл

@ -0,0 +1,98 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/**
* \file ad_bgl_aggrs.h
* \brief ???
*/
/*
* File: ad_bgl_aggrs.h
*
* Declares functions specific for BG/L - GPFS parallel I/O solution. The implemented optimizations are:
* . Aligned file-domain partitioning, integrated in 7/28/2005
*
* In addition, following optimizations are planned:
* . Integrating multiple file-domain partitioning schemes
* (corresponding to Alok Chouhdary's persistent file domain work).
*/
#ifndef AD_BGL_AGGRS_H_
#define AD_BGL_AGGRS_H_
#include "adio.h"
#include <sys/stat.h>
extern int *aggrsInPset; /* defined in ad_bgl_aggrs.c */
/* File system (BGL) specific information -
hung off of ADIOI_FileD file descriptor (fd->fs_ptr) at open */
typedef struct ADIOI_BGL_fs_s {
__blksize_t blksize;
} ADIOI_BGL_fs;
/* generate a list of I/O aggregators that utilizes BGL-PSET orginization. */
int ADIOI_BGL_gen_agg_ranklist(ADIO_File fd, int n_aggrs_per_pset);
/* overriding ADIOI_Calc_file_domains() to apply 'aligned file domain partitioning'. */
void ADIOI_BGL_GPFS_Calc_file_domains(ADIO_Offset *st_offsets,
ADIO_Offset *end_offsets,
int nprocs,
int nprocs_for_coll,
ADIO_Offset *min_st_offset_ptr,
ADIO_Offset **fd_start_ptr,
ADIO_Offset **fd_end_ptr,
ADIO_Offset *fd_size_ptr,
void *fs_ptr);
/* a utilitiy function for debugging */
int ADIOI_BGL_Aggrs_index(ADIO_File fd, int myrank );
/* overriding ADIOI_Calc_aggregator() for the default implementation is specific for
static file domain partitioning */
int ADIOI_BGL_Calc_aggregator(ADIO_File fd,
ADIO_Offset off,
ADIO_Offset min_off,
ADIO_Offset *len,
ADIO_Offset fd_size,
ADIO_Offset *fd_start,
ADIO_Offset *fd_end);
/* overriding ADIOI_Calc_my_req for the default implementation is specific for
static file domain partitioning */
void ADIOI_BGL_Calc_my_req ( ADIO_File fd, ADIO_Offset *offset_list, int *len_list,
int contig_access_count, ADIO_Offset
min_st_offset, ADIO_Offset *fd_start,
ADIO_Offset *fd_end, ADIO_Offset fd_size,
int nprocs,
int *count_my_req_procs_ptr,
int **count_my_req_per_proc_ptr,
ADIOI_Access **my_req_ptr,
int **buf_idx_ptr);
/*
* ADIOI_Calc_others_req
*
* param[in] count_my_req_procs Number of processes whose file domain my
* request touches.
* param[in] count_my_req_per_proc count_my_req_per_proc[i] gives the no. of
* contig. requests of this process in
* process i's file domain.
* param[in] my_req A structure defining my request
* param[in] nprocs Number of nodes in the block
* param[in] myrank Rank of this node
* param[out] count_others_req_proc_ptr Number of processes whose requests lie in
* my process's file domain (including my
* process itself)
* param[out] others_req_ptr Array of other process' requests that lie
* in my process's file domain
*/
void ADIOI_BGL_Calc_others_req(ADIO_File fd, int count_my_req_procs,
int *count_my_req_per_proc,
ADIOI_Access *my_req,
int nprocs, int myrank,
int *count_others_req_procs_ptr,
ADIOI_Access **others_req_ptr);
#endif /* AD_BGL_AGGRS_H_ */

Просмотреть файл

@ -0,0 +1,52 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/**
* \file ad_bgl_open.c
* \brief ???
*/
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#include "ad_bgl.h"
#include "ad_bgl_aggrs.h"
void ADIOI_BGL_Close(ADIO_File fd, int *error_code)
{
int err, derr=0;
static char myname[] = "ADIOI_BGL_CLOSE";
#ifdef PROFILE
MPE_Log_event(9, 0, "start close");
#endif
err = close(fd->fd_sys);
if (fd->fd_direct >= 0)
{
derr = close(fd->fd_direct);
}
#ifdef PROFILE
MPE_Log_event(10, 0, "end close");
#endif
/* FPRINTF(stderr,"%s(%d):'%s'. Free %#X\n",myname,__LINE__,fd->filename,(int)fd->fs_ptr);*/
if (fd->fs_ptr != NULL) {
ADIOI_Free(fd->fs_ptr);
fd->fs_ptr = NULL;
}
fd->fd_sys = -1;
fd->fd_direct = -1;
if (err == -1 || derr == -1)
{
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io",
"**io %s", strerror(errno));
}
else *error_code = MPI_SUCCESS;
}

Просмотреть файл

@ -0,0 +1,57 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/**
* \file ad_bgl_fcntl.c
* \brief ???
*/
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#include "ad_bgl.h"
#include "adio_extern.h"
/* #ifdef MPISGI
#include "mpisgi2.h"
#endif */
void ADIOI_BGL_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct,
int *error_code)
{
static char myname[] = "ADIOI_BGL_FCNTL";
switch(flag) {
case ADIO_FCNTL_GET_FSIZE:
fcntl_struct->fsize = lseek(fd->fd_sys, 0, SEEK_END);
if (fd->fp_sys_posn != -1)
lseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET);
if (fcntl_struct->fsize == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
}
else *error_code = MPI_SUCCESS;
break;
case ADIO_FCNTL_SET_DISKSPACE:
ADIOI_GEN_Prealloc(fd, fcntl_struct->diskspace, error_code);
break;
case ADIO_FCNTL_SET_ATOMICITY:
fd->atomicity = (fcntl_struct->atomicity == 0) ? 0 : 1;
*error_code = MPI_SUCCESS;
break;
/* --BEGIN ERROR HANDLING-- */
default:
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE,
myname, __LINE__,
MPI_ERR_ARG,
"**flag", "**flag %d", flag);
/* --END ERROR HANDLING-- */
}
}

Просмотреть файл

@ -0,0 +1,84 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bgl_getsh.c
* \brief ???
*/
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#include "ad_bgl.h"
/* returns the current location of the shared_fp in terms of the
no. of etypes relative to the current view, and also increments the
shared_fp by the number of etypes to be accessed (incr) in the read
or write following this function. */
void ADIOI_BGL_Get_shared_fp(ADIO_File fd, int incr, ADIO_Offset *shared_fp,
int *error_code)
{
ADIO_Offset new_fp;
int err;
MPI_Comm dupcommself;
static char myname[] = "ADIOI_BGL_GET_SHARED_FP";
if (fd->shared_fp_fd == ADIO_FILE_NULL) {
MPI_Comm_dup(MPI_COMM_SELF, &dupcommself);
fd->shared_fp_fd = ADIO_Open(MPI_COMM_SELF,
dupcommself,
fd->shared_fp_fname,
fd->file_system,
fd->fns,
ADIO_CREATE | ADIO_RDWR | ADIO_DELETE_ON_CLOSE,
0,
MPI_BYTE,
MPI_BYTE,
MPI_INFO_NULL,
ADIO_PERM_NULL,
error_code);
if (*error_code != MPI_SUCCESS) return;
*shared_fp = 0;
ADIOI_WRITE_LOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset));
err = read(fd->shared_fp_fd->fd_sys, shared_fp, sizeof(ADIO_Offset));
/* if the file is empty, the above read may return error
(reading beyond end of file). In that case, shared_fp = 0,
set above, is the correct value. */
}
else {
ADIOI_WRITE_LOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset));
err = lseek(fd->shared_fp_fd->fd_sys, 0, SEEK_SET);
if (err == 0) {
err = read(fd->shared_fp_fd->fd_sys, shared_fp,
sizeof(ADIO_Offset));
}
if (err == -1) {
ADIOI_UNLOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset));
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
return;
}
}
new_fp = *shared_fp + incr;
err = lseek(fd->shared_fp_fd->fd_sys, 0, SEEK_SET);
if (err == 0) {
err = write(fd->shared_fp_fd->fd_sys, &new_fp, sizeof(ADIO_Offset));
}
ADIOI_UNLOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset));
if (err == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io",
"**io %s", strerror(errno));
}
else *error_code = MPI_SUCCESS;
}

Просмотреть файл

@ -0,0 +1,338 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bgl_hints.c
* \brief ???
*/
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#include "adio.h"
#include "adio_extern.h"
#include "ad_bgl.h"
#include "ad_bgl_pset.h"
#include "ad_bgl_aggrs.h"
#define ADIOI_BGL_CB_BUFFER_SIZE_DFLT "16777216"
#define ADIOI_BGL_IND_RD_BUFFER_SIZE_DFLT "4194304"
#define ADIOI_BGL_IND_WR_BUFFER_SIZE_DFLT "4194304"
#define ADIOI_BGL_NAGG_IN_PSET_HINT_NAME "bgl_nodes_pset"
/* Compute the aggregator-related parameters that are required in 2-phase collective IO of ADIO. */
extern int
ADIOI_BGL_gen_agg_ranklist(ADIO_File fd, int n_proxy_per_pset);
void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
{
/* if fd->info is null, create a new info object.
Initialize fd->info to default values.
Initialize fd->hints to default values.
Examine the info object passed by the user. If it contains values that
ROMIO understands, override the default. */
MPI_Info info;
char *value;
int flag, intval, tmp_val, nprocs, nprocs_is_valid = 0;
static char myname[] = "ADIOI_GEN_SETINFO";
int did_anything = 0;
if (fd->info == MPI_INFO_NULL) MPI_Info_create(&(fd->info));
info = fd->info;
/* Note that fd->hints is allocated at file open time; thus it is
* not necessary to allocate it, or check for allocation, here.
*/
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
AD_BGL_assert ((value != NULL));
/* initialize info and hints to default values if they haven't been
* previously initialized
*/
if (!fd->hints->initialized) {
did_anything = 1;
/* buffer size for collective I/O */
MPI_Info_set(info, "cb_buffer_size", ADIOI_BGL_CB_BUFFER_SIZE_DFLT);
fd->hints->cb_buffer_size = atoi(ADIOI_BGL_CB_BUFFER_SIZE_DFLT);
/* default is to let romio automatically decide when to use
* collective buffering
*/
MPI_Info_set(info, "romio_cb_read", "enable");
fd->hints->cb_read = ADIOI_HINT_ENABLE;
MPI_Info_set(info, "romio_cb_write", "enable");
fd->hints->cb_write = ADIOI_HINT_ENABLE;
if ( fd->hints->cb_config_list != NULL ) ADIOI_Free (fd->hints->cb_config_list);
fd->hints->cb_config_list = NULL;
/* number of processes that perform I/O in collective I/O */
MPI_Comm_size(fd->comm, &nprocs);
nprocs_is_valid = 1;
sprintf(value, "%d", nprocs);
MPI_Info_set(info, "cb_nodes", value);
fd->hints->cb_nodes = -1;
/* hint indicating that no indep. I/O will be performed on this file */
MPI_Info_set(info, "romio_no_indep_rw", "false");
fd->hints->no_indep_rw = 0;
/* deferred_open derrived from no_indep_rw and cb_{read,write} */
fd->hints->deferred_open = 0;
/* buffer size for data sieving in independent reads */
MPI_Info_set(info, "ind_rd_buffer_size", ADIOI_BGL_IND_RD_BUFFER_SIZE_DFLT);
fd->hints->ind_rd_buffer_size = atoi(ADIOI_BGL_IND_RD_BUFFER_SIZE_DFLT);
/* buffer size for data sieving in independent writes */
MPI_Info_set(info, "ind_wr_buffer_size", ADIOI_BGL_IND_WR_BUFFER_SIZE_DFLT);
fd->hints->ind_wr_buffer_size = atoi(ADIOI_BGL_IND_WR_BUFFER_SIZE_DFLT);
if(fd->file_system == ADIO_UFS)
{
/* default for ufs/pvfs is to disable data sieving */
MPI_Info_set(info, "romio_ds_read", "disable");
fd->hints->ds_read = ADIOI_HINT_DISABLE;
MPI_Info_set(info, "romio_ds_write", "disable");
fd->hints->ds_write = ADIOI_HINT_DISABLE;
}
else
{
/* default is to let romio automatically decide when to use data
* sieving
*/
MPI_Info_set(info, "romio_ds_read", "automatic");
fd->hints->ds_read = ADIOI_HINT_AUTO;
MPI_Info_set(info, "romio_ds_write", "automatic");
fd->hints->ds_write = ADIOI_HINT_AUTO;
}
fd->hints->initialized = 1;
}
/* add in user's info if supplied */
if (users_info != MPI_INFO_NULL) {
MPI_Info_get(users_info, "cb_buffer_size", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && ((intval=atoi(value)) > 0)) {
tmp_val = intval;
MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
/* --BEGIN ERROR HANDLING-- */
if (tmp_val != intval) {
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
"cb_buffer_size",
error_code);
return;
}
/* --END ERROR HANDLING-- */
MPI_Info_set(info, "cb_buffer_size", value);
fd->hints->cb_buffer_size = intval;
}
/* new hints for enabling/disabling coll. buffering on
* reads/writes
*/
MPI_Info_get(users_info, "romio_cb_read", MPI_MAX_INFO_VAL, value, &flag);
if (flag) {
if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
MPI_Info_set(info, "romio_cb_read", value);
fd->hints->cb_read = ADIOI_HINT_ENABLE;
}
else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
/* romio_cb_read overrides no_indep_rw */
MPI_Info_set(info, "romio_cb_read", value);
MPI_Info_set(info, "romio_no_indep_rw", "false");
fd->hints->cb_read = ADIOI_HINT_DISABLE;
fd->hints->no_indep_rw = ADIOI_HINT_DISABLE;
}
else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
{
MPI_Info_set(info, "romio_cb_read", value);
fd->hints->cb_read = ADIOI_HINT_AUTO;
}
tmp_val = fd->hints->cb_read;
MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
/* --BEGIN ERROR HANDLING-- */
if (tmp_val != fd->hints->cb_read) {
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
"romio_cb_read",
error_code);
return;
}
/* --END ERROR HANDLING-- */
}
MPI_Info_get(users_info, "romio_cb_write", MPI_MAX_INFO_VAL, value, &flag);
if (flag) {
if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
MPI_Info_set(info, "romio_cb_write", value);
fd->hints->cb_write = ADIOI_HINT_ENABLE;
}
else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE"))
{
/* romio_cb_write overrides no_indep_rw, too */
MPI_Info_set(info, "romio_cb_write", value);
MPI_Info_set(info, "romio_no_indep_rw", "false");
fd->hints->cb_write = ADIOI_HINT_DISABLE;
fd->hints->no_indep_rw = ADIOI_HINT_DISABLE;
}
else if (!strcmp(value, "automatic") ||
!strcmp(value, "AUTOMATIC"))
{
MPI_Info_set(info, "romio_cb_write", value);
fd->hints->cb_write = ADIOI_HINT_AUTO;
}
tmp_val = fd->hints->cb_write;
MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
/* --BEGIN ERROR HANDLING-- */
if (tmp_val != fd->hints->cb_write) {
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
"romio_cb_write",
error_code);
return;
}
/* --END ERROR HANDLING-- */
}
/* new hint for specifying no indep. read/write will be performed */
MPI_Info_get(users_info, "romio_no_indep_rw", MPI_MAX_INFO_VAL, value, &flag);
if (flag) {
if (!strcmp(value, "true") || !strcmp(value, "TRUE")) {
/* if 'no_indep_rw' set, also hint that we will do
* collective buffering: if we aren't doing independent io,
* then we have to do collective */
MPI_Info_set(info, "romio_no_indep_rw", value);
MPI_Info_set(info, "romio_cb_write", "enable");
MPI_Info_set(info, "romio_cb_read", "enable");
fd->hints->no_indep_rw = 1;
fd->hints->cb_read = 1;
fd->hints->cb_write = 1;
tmp_val = 1;
}
else if (!strcmp(value, "false") || !strcmp(value, "FALSE")) {
MPI_Info_set(info, "romio_no_indep_rw", value);
fd->hints->no_indep_rw = 0;
tmp_val = 0;
}
else {
/* default is above */
tmp_val = 0;
}
MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
/* --BEGIN ERROR HANDLING-- */
if (tmp_val != fd->hints->no_indep_rw) {
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
"romio_no_indep_rw",
error_code);
return;
}
/* --END ERROR HANDLING-- */
}
/* new hints for enabling/disabling data sieving on
* reads/writes
*/
MPI_Info_get(users_info, "romio_ds_read", MPI_MAX_INFO_VAL, value,
&flag);
if (flag) {
if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
MPI_Info_set(info, "romio_ds_read", value);
fd->hints->ds_read = ADIOI_HINT_ENABLE;
}
else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
MPI_Info_set(info, "romio_ds_read", value);
fd->hints->ds_read = ADIOI_HINT_DISABLE;
}
else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
{
MPI_Info_set(info, "romio_ds_read", value);
fd->hints->ds_read = ADIOI_HINT_AUTO;
}
/* otherwise ignore */
}
MPI_Info_get(users_info, "romio_ds_write", MPI_MAX_INFO_VAL, value,
&flag);
if (flag) {
if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
MPI_Info_set(info, "romio_ds_write", value);
fd->hints->ds_write = ADIOI_HINT_ENABLE;
}
else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
MPI_Info_set(info, "romio_ds_write", value);
fd->hints->ds_write = ADIOI_HINT_DISABLE;
}
else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
{
MPI_Info_set(info, "romio_ds_write", value);
fd->hints->ds_write = ADIOI_HINT_AUTO;
}
/* otherwise ignore */
}
MPI_Info_get(users_info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && ((intval = atoi(value)) > 0)) {
MPI_Info_set(info, "ind_wr_buffer_size", value);
fd->hints->ind_wr_buffer_size = intval;
}
MPI_Info_get(users_info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && ((intval = atoi(value)) > 0)) {
MPI_Info_set(info, "ind_rd_buffer_size", value);
fd->hints->ind_rd_buffer_size = intval;
}
memset( value, 0, MPI_MAX_INFO_VAL+1 );
MPI_Info_get(users_info, ADIOI_BGL_NAGG_IN_PSET_HINT_NAME, MPI_MAX_INFO_VAL,
value, &flag);
if (flag && ((intval = atoi(value)) > 0)) {
did_anything = 1;
MPI_Info_set(info, ADIOI_BGL_NAGG_IN_PSET_HINT_NAME, value);
fd->hints->cb_nodes = intval;
}
}
/* associate CB aggregators to certain CNs in every involved PSET */
if (did_anything) {
ADIOI_BGL_gen_agg_ranklist(fd, fd->hints->cb_nodes);
}
/* deferred_open won't be set by callers, but if the user doesn't
* explicitly disable collecitve buffering (two-phase) and does hint that
* io w/o independent io is going on, we'll set this internal hint as a
* convenience */
if ( ( (fd->hints->cb_read != ADIOI_HINT_DISABLE)
&& (fd->hints->cb_write != ADIOI_HINT_DISABLE)
&& fd->hints->no_indep_rw ) )
{
fd->hints->deferred_open = 1;
} else {
/* setting romio_no_indep_rw enable and romio_cb_{read,write}
* disable at the same time doesn't make sense. honor
* romio_cb_{read,write} and force the no_indep_rw hint to
* 'disable' */
MPI_Info_set(info, "romio_no_indep_rw", "false");
fd->hints->no_indep_rw = 0;
fd->hints->deferred_open = 0;
}
ADIOI_Free(value);
*error_code = MPI_SUCCESS;
}

Просмотреть файл

@ -0,0 +1,114 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bgl_open.c
* \brief ???
*/
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#include "ad_bgl.h"
#include "ad_bgl_aggrs.h"
void ADIOI_BGL_Open(ADIO_File fd, int *error_code)
{
int perm, old_mask, amode;
static char myname[] = "ADIOI_BGL_OPEN";
/* set internal variables for tuning environment variables */
ad_bgl_get_env_vars();
if (fd->perm == ADIO_PERM_NULL) {
old_mask = umask(022);
umask(old_mask);
perm = old_mask ^ 0666;
}
else perm = fd->perm;
amode = 0;
if (fd->access_mode & ADIO_CREATE)
amode = amode | O_CREAT;
if (fd->access_mode & ADIO_RDONLY)
amode = amode | O_RDONLY;
if (fd->access_mode & ADIO_WRONLY)
amode = amode | O_WRONLY;
if (fd->access_mode & ADIO_RDWR)
amode = amode | O_RDWR;
if (fd->access_mode & ADIO_EXCL)
amode = amode | O_EXCL;
fd->fd_sys = open(fd->filename, amode, perm);
fd->fd_direct = -1;
if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND))
fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
if(fd->fd_sys != -1)
{
struct stat64 bgl_stat;
int rc = stat64(fd->filename,&bgl_stat);
if (rc >= 0)
{
/* store the blksize in the file system specific storage */
AD_BGL_assert(fd->fs_ptr == NULL);
fd->fs_ptr = (ADIOI_BGL_fs*) ADIOI_Malloc(sizeof(ADIOI_BGL_fs));
((ADIOI_BGL_fs*)fd->fs_ptr)->blksize = bgl_stat.st_blksize;
/* FPRINTF(stderr,"%s(%d):Successful stat '%s'. Blocksize=%ld\n",myname,__LINE__,fd->filename,bgl_stat.st_blksize);*/
}
/* else
FPRINTF(stderr,"%s(%d):Stat '%s' failed with rc=%d, errno=%d\n",myname,__LINE__,fd->filename,rc,errno);*/
}
if (fd->fd_sys == -1) {
if (errno == ENAMETOOLONG)
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_BAD_FILE,
"**filenamelong",
"**filenamelong %s %d",
fd->filename,
strlen(fd->filename));
else if (errno == ENOENT)
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_NO_SUCH_FILE,
"**filenoexist",
"**filenoexist %s",
fd->filename);
else if (errno == ENOTDIR || errno == ELOOP)
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE,
myname, __LINE__,
MPI_ERR_BAD_FILE,
"**filenamedir",
"**filenamedir %s",
fd->filename);
else if (errno == EACCES) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_ACCESS,
"**fileaccess",
"**fileaccess %s",
fd->filename );
}
else if (errno == EROFS) {
/* Read only file or file system and write access requested */
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_READ_ONLY,
"**ioneedrd", 0 );
}
else {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
}
}
else *error_code = MPI_SUCCESS;
}

Просмотреть файл

@ -0,0 +1,109 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bgl_pset.c
* \brief Definition of functions associated to structs ADIOI_BGL_ProcInfo_t and ADIOI_BGL_ConfInfo_t
*/
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#include <stdlib.h>
#include "ad_bgl.h"
#include "ad_bgl_pset.h"
#include "mpidimpl.h"
ADIOI_BGL_ProcInfo_t *
ADIOI_BGL_ProcInfo_new()
{
ADIOI_BGL_ProcInfo_t *p = (ADIOI_BGL_ProcInfo_t *) ADIOI_Malloc (sizeof(ADIOI_BGL_ProcInfo_t));
AD_BGL_assert ((p != NULL));
return p;
}
ADIOI_BGL_ProcInfo_t *
ADIOI_BGL_ProcInfo_new_n( int n )
{
ADIOI_BGL_ProcInfo_t *p = (ADIOI_BGL_ProcInfo_t *) ADIOI_Malloc (n * sizeof(ADIOI_BGL_ProcInfo_t));
AD_BGL_assert ((p != NULL));
return p;
}
void
ADIOI_BGL_ProcInfo_free( ADIOI_BGL_ProcInfo_t *info )
{
if (info != NULL) ADIOI_Free (info);
}
static
void
ADIOI_BGL_ProcInfo_set(ADIOI_BGL_ProcInfo_t *info, const DCMF_Hardware_t *hw, int r)
{
info->psetNum = hw->idOfPset;
info->xInPset = hw->xCoord;
info->yInPset = hw->yCoord;
info->zInPset = hw->zCoord;
info->cpuid = hw->tCoord;
info->rank = r;
info->rankInPset = hw->rankInPset;
}
ADIOI_BGL_ConfInfo_t *
ADIOI_BGL_ConfInfo_new ()
{
ADIOI_BGL_ConfInfo_t *p = (ADIOI_BGL_ConfInfo_t *) ADIOI_Malloc (sizeof(ADIOI_BGL_ConfInfo_t));
AD_BGL_assert ((p != NULL));
return p;
}
static
void
ADIOI_BGL_ConfInfo_set(ADIOI_BGL_ConfInfo_t *info, const DCMF_Hardware_t *hw, int s, int n_aggrs)
{
info->PsetSize = hw->sizeOfPset;
info->numPsets = (hw->xSize * hw->ySize *
hw->zSize) / hw->sizeOfPset;
info->isVNM = (hw->tSize != 1);
info->cpuidSize = hw->tSize;
info->virtualPsetSize = hw->sizeOfPset * hw->tSize;
info->nProcs = s;
/* More complicated logic maybe needed for nAggrs specification */
info->nAggrs = n_aggrs;
if ( info->nAggrs <=0 || MIN(info->nProcs, info->virtualPsetSize) < info->nAggrs )
info->nAggrs = ADIOI_BGL_NAGG_PSET_DFLT;
if ( info->nAggrs > info->virtualPsetSize ) info->nAggrs = info->virtualPsetSize;
info->aggRatio = 1. * info->nAggrs / info->virtualPsetSize;
if (info->aggRatio > 1) info->aggRatio = 1.;
}
void
ADIOI_BGL_ConfInfo_free( ADIOI_BGL_ConfInfo_t *info )
{
if (info != NULL) ADIOI_Free (info);
}
void
ADIOI_BGL_persInfo_init(ADIOI_BGL_ConfInfo_t *conf,
ADIOI_BGL_ProcInfo_t *proc,
int s, int r, int n_aggrs)
{
DCMF_Hardware_t hw;
DCMF_Hardware(&hw);
ADIOI_BGL_ConfInfo_set (conf, &hw, s, n_aggrs);
ADIOI_BGL_ProcInfo_set (proc, &hw, r);
}
void
ADIOI_BGL_persInfo_free( ADIOI_BGL_ConfInfo_t *conf, ADIOI_BGL_ProcInfo_t *proc )
{
ADIOI_BGL_ConfInfo_free( conf );
ADIOI_BGL_ProcInfo_free( proc );
}

Просмотреть файл

@ -0,0 +1,80 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bgl_pset.h
* \brief ???
*/
/* File: ad_bgl_pset.h
*
* Defines two structures that keep BG/L PSET specific information and their public interfaces:
* . ADIOI_BGL_ProcInfo_t object keeps specific information to each process
* . ADIOI_BGL_ConfInfo_t object keeps general information for the whole communicator, only kept
* on process 0.
*/
#ifndef AD_BGL_PSET_H_
#define AD_BGL_PSET_H_
/* Keeps specific information to each process, will be exchanged among processes */
typedef struct {
int psetNum; /* which PSET I am in */
int rank; /* my rank */
int xInPset; /* my relative coordinates in my PSET */
int yInPset;
int zInPset;
int cpuid; /* my CPU id -- for virtual node mode (t coord)*/
int rankInPset; /* my relative rank in my PSET */
} ADIOI_BGL_ProcInfo_t __attribute__((aligned(16)));
/* Keeps general information for the whole communicator, only on process 0 */
typedef struct {
int PsetSize;
int nAggrs;
int numPsets;
int isVNM;
int virtualPsetSize;
int nProcs;
float aggRatio;
int cpuidSize; /* how many cpu ids? (t size) */
} ADIOI_BGL_ConfInfo_t __attribute__((aligned(16)));
#undef MIN
#define MIN(a,b) ((a<b ? a : b))
/* Default is to choose 8 aggregator nodes in each 32 CN pset.
Also defines default ratio of aggregator nodes in each a pset.
For Virtual Node Mode, the ratio is 8/64 */
#define ADIOI_BGL_NAGG_PSET_MIN 1
#define ADIOI_BGL_NAGG_PSET_DFLT 8
#define ADIOI_BGL_PSET_SIZE_DFLT 32
/* public funcs for ADIOI_BGL_ProcInfo_t objects */
ADIOI_BGL_ProcInfo_t * ADIOI_BGL_ProcInfo_new();
ADIOI_BGL_ProcInfo_t * ADIOI_BGL_ProcInfo_new_n( int n );
void ADIOI_BGL_ProcInfo_free( ADIOI_BGL_ProcInfo_t *info );
/* public funcs for ADIOI_BGL_ConfInfo_t objects */
ADIOI_BGL_ConfInfo_t * ADIOI_BGL_ConfInfo_new ();
void ADIOI_BGL_ConfInfo_free( ADIOI_BGL_ConfInfo_t *info );
/* public funcs for a pair of ADIOI_BGL_ConfInfo_t and ADIOI_BGL_ProcInfo_t objects */
void ADIOI_BGL_persInfo_init( ADIOI_BGL_ConfInfo_t *conf,
ADIOI_BGL_ProcInfo_t *proc,
int s, int r, int n_aggrs );
void ADIOI_BGL_persInfo_free( ADIOI_BGL_ConfInfo_t *conf,
ADIOI_BGL_ProcInfo_t *proc );
#endif /* AD_BGL_PSET_H_ */

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,496 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bgl_read.c
* \brief ???
*/
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#include "ad_bgl.h"
#include "adio_extern.h"
#include "ad_bgl_tuning.h"
void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int *error_code)
{
int err=-1, datatype_size, len;
static char myname[] = "ADIOI_BGL_READCONTIG";
#if BGL_PROFILE
/* timing */
double io_time, io_time2;
if (bglmpio_timing) {
io_time = MPI_Wtime();
bglmpio_prof_cr[ BGLMPIO_CIO_DATA_SIZE ] += len;
}
#endif
MPI_Type_size(datatype, &datatype_size);
len = datatype_size * count;
#if BGL_PROFILE
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
if (bglmpio_timing2) io_time2 = MPI_Wtime();
if (fd->fp_sys_posn != offset)
lseek(fd->fd_sys, offset, SEEK_SET);
if (bglmpio_timing2) bglmpio_prof_cr[ BGLMPIO_CIO_T_SEEK ] += (MPI_Wtime() - io_time2);
if (fd->atomicity)
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len);
if (bglmpio_timing2) io_time2 = MPI_Wtime();
err = read(fd->fd_sys, buf, len);
if (bglmpio_timing2) bglmpio_prof_cr[ BGLMPIO_CIO_T_POSI_RW ] += (MPI_Wtime() - io_time2);
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_sys_posn = offset + err;
/* individual file pointer not updated */
}
else { /* read from curr. location of ind. file pointer */
offset = fd->fp_ind;
if (bglmpio_timing2) io_time2 = MPI_Wtime();
if (fd->fp_sys_posn != fd->fp_ind)
lseek(fd->fd_sys, fd->fp_ind, SEEK_SET);
if (bglmpio_timing2) bglmpio_prof_cr[ BGLMPIO_CIO_T_SEEK ] += (MPI_Wtime() - io_time2);
if (fd->atomicity)
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len);
if (bglmpio_timing2) io_time2 = MPI_Wtime();
err = read(fd->fd_sys, buf, len);
if (bglmpio_timing2) bglmpio_prof_cr[ BGLMPIO_CIO_T_POSI_RW ] += (MPI_Wtime() - io_time2);
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_ind += err;
fd->fp_sys_posn = fd->fp_ind;
}
#else /* BGL_PROFILE */
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
if (fd->fp_sys_posn != offset)
lseek(fd->fd_sys, offset, SEEK_SET);
if (fd->atomicity)
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len);
err = read(fd->fd_sys, buf, len);
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_sys_posn = offset + err;
/* individual file pointer not updated */
}
else { /* read from curr. location of ind. file pointer */
offset = fd->fp_ind;
if (fd->fp_sys_posn != fd->fp_ind)
lseek(fd->fd_sys, fd->fp_ind, SEEK_SET);
if (fd->atomicity)
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len);
err = read(fd->fd_sys, buf, len);
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_ind += err;
fd->fp_sys_posn = fd->fp_ind;
}
#endif /* BGL_PROFILE */
#if BGL_PROFILE
if (bglmpio_timing) bglmpio_prof_cr[ BGLMPIO_CIO_T_MPIO_RW ] += (MPI_Wtime() - io_time);
#endif
/* --BEGIN ERROR HANDLING-- */
if (err == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io", "**io %s", strerror(errno));
return;
}
/* --END ERROR HANDLING-- */
#ifdef HAVE_STATUS_SET_BYTES
MPIR_Status_set_bytes(status, datatype, err);
#endif
*error_code = MPI_SUCCESS;
}
#define ADIOI_BUFFERED_READ \
{ \
if (req_off >= readbuf_off + readbuf_len) { \
readbuf_off = req_off; \
readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));\
lseek(fd->fd_sys, readbuf_off, SEEK_SET);\
if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, readbuf_off, SEEK_SET, readbuf_len);\
err = read(fd->fd_sys, readbuf, readbuf_len);\
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, readbuf_off, SEEK_SET, readbuf_len);\
if (err == -1) err_flag = 1; \
} \
while (req_len > readbuf_off + readbuf_len - req_off) { \
partial_read = (int) (readbuf_off + readbuf_len - req_off); \
tmp_buf = (char *) ADIOI_Malloc(partial_read); \
memcpy(tmp_buf, readbuf+readbuf_len-partial_read, partial_read); \
ADIOI_Free(readbuf); \
readbuf = (char *) ADIOI_Malloc(partial_read + max_bufsize); \
memcpy(readbuf, tmp_buf, partial_read); \
ADIOI_Free(tmp_buf); \
readbuf_off += readbuf_len-partial_read; \
readbuf_len = (int) (partial_read + ADIOI_MIN(max_bufsize, \
end_offset-readbuf_off+1)); \
lseek(fd->fd_sys, readbuf_off+partial_read, SEEK_SET);\
if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, readbuf_off+partial_read, SEEK_SET, readbuf_len-partial_read);\
err = read(fd->fd_sys, readbuf+partial_read, readbuf_len-partial_read);\
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, readbuf_off+partial_read, SEEK_SET, readbuf_len-partial_read);\
if (err == -1) err_flag = 1; \
} \
memcpy((char *)buf + userbuf_off, readbuf+req_off-readbuf_off, req_len); \
}
void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
*error_code)
{
/* offset is in units of etype relative to the filetype. */
ADIOI_Flatlist_node *flat_buf, *flat_file;
int i, j, k, err=-1, brd_size, frd_size=0, st_index=0;
int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype;
int n_filetypes, etype_in_filetype;
ADIO_Offset abs_off_in_filetype=0;
int filetype_size, etype_size, buftype_size, req_len, partial_read;
MPI_Aint filetype_extent, buftype_extent;
int buf_count, buftype_is_contig, filetype_is_contig;
ADIO_Offset userbuf_off;
ADIO_Offset off, req_off, disp, end_offset=0, readbuf_off, start_off;
char *readbuf, *tmp_buf, *value;
int flag, st_frd_size, st_n_filetypes, readbuf_len;
int new_brd_size, new_frd_size, err_flag=0, info_flag, max_bufsize;
static char myname[] = "ADIOI_BGL_READSTRIDED";
if (fd->hints->ds_read == ADIOI_HINT_DISABLE) {
/* if user has disabled data sieving on reads, use naive
* approach instead.
*/
/*FPRINTF(stderr, "ADIOI_GEN_ReadStrided_naive(%d):\n", __LINE__);*/
ADIOI_GEN_ReadStrided_naive(fd,
buf,
count,
datatype,
file_ptr_type,
offset,
status,
error_code);
return;
}
/*FPRINTF(stderr, "%s(%d):\n",myname, __LINE__);*/
ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
MPI_Type_size(fd->filetype, &filetype_size);
if ( ! filetype_size ) {
*error_code = MPI_SUCCESS;
return;
}
MPI_Type_extent(fd->filetype, &filetype_extent);
MPI_Type_size(datatype, &buftype_size);
MPI_Type_extent(datatype, &buftype_extent);
etype_size = fd->etype_size;
bufsize = buftype_size * count;
/* get max_bufsize from the info object. */
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
MPI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value,
&info_flag);
max_bufsize = atoi(value);
ADIOI_Free(value);
if (!buftype_is_contig && filetype_is_contig) {
/* noncontiguous in memory, contiguous in file. */
ADIOI_Flatten_datatype(datatype);
flat_buf = ADIOI_Flatlist;
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
fd->disp + etype_size * offset;
start_off = off;
end_offset = off + bufsize - 1;
readbuf_off = off;
readbuf = (char *) ADIOI_Malloc(max_bufsize);
readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));
/* if atomicity is true, lock (exclusive) the region to be accessed */
if (fd->atomicity)
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
lseek(fd->fd_sys, readbuf_off, SEEK_SET);
if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, readbuf_off, SEEK_SET, readbuf_len);
err = read(fd->fd_sys, readbuf, readbuf_len);
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, readbuf_off, SEEK_SET, readbuf_len);
if (err == -1) err_flag = 1;
for (j=0; j<count; j++)
for (i=0; i<flat_buf->count; i++) {
userbuf_off = j*buftype_extent + flat_buf->indices[i];
req_off = off;
req_len = flat_buf->blocklens[i];
ADIOI_BUFFERED_READ
off += flat_buf->blocklens[i];
}
if (fd->atomicity)
ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
ADIOI_Free(readbuf); /* malloced in the buffered_read macro */
if (err_flag) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
}
else *error_code = MPI_SUCCESS;
}
else { /* noncontiguous in file */
/* filetype already flattened in ADIO_Open */
flat_file = ADIOI_Flatlist;
while (flat_file->type != fd->filetype) flat_file = flat_file->next;
disp = fd->disp;
if (file_ptr_type == ADIO_INDIVIDUAL) {
offset = fd->fp_ind; /* in bytes */
n_filetypes = -1;
flag = 0;
while (!flag) {
n_filetypes++;
for (i=0; i<flat_file->count; i++) {
if (disp + flat_file->indices[i] +
(ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i]
>= offset) {
st_index = i;
frd_size = (int) (disp + flat_file->indices[i] +
(ADIO_Offset) n_filetypes*filetype_extent
+ flat_file->blocklens[i] - offset);
flag = 1;
break;
}
}
}
}
else {
n_etypes_in_filetype = filetype_size/etype_size;
n_filetypes = (int) (offset / n_etypes_in_filetype);
etype_in_filetype = (int) (offset % n_etypes_in_filetype);
size_in_filetype = etype_in_filetype * etype_size;
sum = 0;
for (i=0; i<flat_file->count; i++) {
sum += flat_file->blocklens[i];
if (sum > size_in_filetype) {
st_index = i;
frd_size = sum - size_in_filetype;
abs_off_in_filetype = flat_file->indices[i] +
size_in_filetype - (sum - flat_file->blocklens[i]);
break;
}
}
/* abs. offset in bytes in the file */
offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
}
start_off = offset;
/* Calculate end_offset, the last byte-offset that will be accessed.
e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/
st_frd_size = frd_size;
st_n_filetypes = n_filetypes;
i = 0;
j = st_index;
off = offset;
frd_size = ADIOI_MIN(st_frd_size, bufsize);
while (i < bufsize) {
i += frd_size;
end_offset = off + frd_size - 1;
if (j < (flat_file->count - 1)) j++;
else {
j = 0;
n_filetypes++;
}
off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent;
frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
}
/* if atomicity is true, lock (exclusive) the region to be accessed */
if (fd->atomicity)
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
/* initial read into readbuf */
readbuf_off = offset;
readbuf = (char *) ADIOI_Malloc(max_bufsize);
readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));
lseek(fd->fd_sys, offset, SEEK_SET);
if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, offset, SEEK_SET, readbuf_len);
err = read(fd->fd_sys, readbuf, readbuf_len);
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, offset, SEEK_SET, readbuf_len);
if (err == -1) err_flag = 1;
if (buftype_is_contig && !filetype_is_contig) {
/* contiguous in memory, noncontiguous in file. should be the most
common case. */
i = 0;
j = st_index;
off = offset;
n_filetypes = st_n_filetypes;
frd_size = ADIOI_MIN(st_frd_size, bufsize);
while (i < bufsize) {
if (frd_size) {
/* TYPE_UB and TYPE_LB can result in
frd_size = 0. save system call in such cases */
/* lseek(fd->fd_sys, off, SEEK_SET);
err = read(fd->fd_sys, ((char *) buf) + i, frd_size);*/
req_off = off;
req_len = frd_size;
userbuf_off = i;
ADIOI_BUFFERED_READ
}
i += frd_size;
if (off + frd_size < disp + flat_file->indices[j] +
flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent)
off += frd_size;
/* did not reach end of contiguous block in filetype.
no more I/O needed. off is incremented by frd_size. */
else {
if (j < (flat_file->count - 1)) j++;
else {
j = 0;
n_filetypes++;
}
off = disp + flat_file->indices[j] +
(ADIO_Offset) n_filetypes*filetype_extent;
frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
}
}
}
else {
/* noncontiguous in memory as well as in file */
ADIOI_Flatten_datatype(datatype);
flat_buf = ADIOI_Flatlist;
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
k = num = buf_count = 0;
i = (int) (flat_buf->indices[0]);
j = st_index;
off = offset;
n_filetypes = st_n_filetypes;
frd_size = st_frd_size;
brd_size = flat_buf->blocklens[0];
while (num < bufsize) {
size = ADIOI_MIN(frd_size, brd_size);
if (size) {
/* lseek(fd->fd_sys, off, SEEK_SET);
err = read(fd->fd_sys, ((char *) buf) + i, size); */
req_off = off;
req_len = size;
userbuf_off = i;
ADIOI_BUFFERED_READ
}
new_frd_size = frd_size;
new_brd_size = brd_size;
if (size == frd_size) {
/* reached end of contiguous block in file */
if (j < (flat_file->count - 1)) j++;
else {
j = 0;
n_filetypes++;
}
off = disp + flat_file->indices[j] +
(ADIO_Offset) n_filetypes*filetype_extent;
new_frd_size = flat_file->blocklens[j];
if (size != brd_size) {
i += size;
new_brd_size -= size;
}
}
if (size == brd_size) {
/* reached end of contiguous block in memory */
k = (k + 1)%flat_buf->count;
buf_count++;
i = (int) (buftype_extent*(buf_count/flat_buf->count) +
flat_buf->indices[k]);
new_brd_size = flat_buf->blocklens[k];
if (size != frd_size) {
off += size;
new_frd_size -= size;
}
}
num += size;
frd_size = new_frd_size;
brd_size = new_brd_size;
}
}
if (fd->atomicity)
ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
ADIOI_Free(readbuf); /* malloced in the buffered_read macro */
if (err_flag) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
}
else *error_code = MPI_SUCCESS;
}
fd->fp_sys_posn = -1; /* set it to null. */
#ifdef HAVE_STATUS_SET_BYTES
MPIR_Status_set_bytes(status, datatype, bufsize);
/* This is a temporary way of filling in status. The right way is to
keep track of how much data was actually read and placed in buf
by ADIOI_BUFFERED_READ. */
#endif
if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
}

Просмотреть файл

@ -0,0 +1,68 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bgl_setsh.c
* \brief ???
*/
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#include "ad_bgl.h"
/* set the shared file pointer to "offset" etypes relative to the current
view */
/*
This looks very similar to ADIOI_GEN_Set_shared_fp, except this
function avoids locking the file twice. The generic version does
Write lock
ADIO_WriteContig
Unlock
For BGL, ADIOI_BGL_WriteContig does a lock before writing to disable
caching. To avoid the lock being called twice, this version for BGL does
Write lock
Lseek
Write
Unlock
*/
void ADIOI_BGL_Set_shared_fp(ADIO_File fd, ADIO_Offset offset, int *error_code)
{
int err;
MPI_Comm dupcommself;
static char myname[] = "ADIOI_BGL_SET_SHARED_FP";
if (fd->shared_fp_fd == ADIO_FILE_NULL) {
MPI_Comm_dup(MPI_COMM_SELF, &dupcommself);
fd->shared_fp_fd = ADIO_Open(MPI_COMM_SELF, dupcommself,
fd->shared_fp_fname,
fd->file_system, fd->fns,
ADIO_CREATE | ADIO_RDWR | ADIO_DELETE_ON_CLOSE,
0, MPI_BYTE, MPI_BYTE, MPI_INFO_NULL,
ADIO_PERM_NULL, error_code);
}
if (*error_code != MPI_SUCCESS) return;
ADIOI_WRITE_LOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset));
lseek(fd->shared_fp_fd->fd_sys, 0, SEEK_SET);
err = write(fd->shared_fp_fd->fd_sys, &offset, sizeof(ADIO_Offset));
ADIOI_UNLOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset));
if (err == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io",
"**io %s", strerror(errno));
}
else *error_code = MPI_SUCCESS;
}

Просмотреть файл

@ -0,0 +1,104 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bgl_tuning.c
* \brief ???
*/
/*---------------------------------------------------------------------
* ad_bgl_tuning.c
*
* defines global variables and functions for performance tuning and
* functional debugging.
*---------------------------------------------------------------------*/
#include "ad_bgl_tuning.h"
#include "mpi.h"
int bglmpio_timing;
int bglmpio_timing2;
int bglmpio_comm;
int bglmpio_tunegather;
int bglmpio_tuneblocking;
double bglmpio_prof_cw [BGLMPIO_CIO_LAST];
double bglmpio_prof_cr [BGLMPIO_CIO_LAST];
/* set internal variables for tuning environment variables */
void ad_bgl_get_env_vars() {
char *x;
bglmpio_comm = 0;
x = getenv( "BGLMPIO_COMM" );
if (x) bglmpio_comm = atoi(x);
bglmpio_timing = 0;
x = getenv( "BGLMPIO_TIMING" );
if (x) bglmpio_timing = atoi(x);
bglmpio_timing2 = 0;
x = getenv( "BGLMPIO_TIMING2" );
if (x) bglmpio_timing2 = atoi(x);
bglmpio_tunegather = 1;
x = getenv( "BGLMPIO_TUNEGATHER" );
if (x) bglmpio_tunegather = atoi(x);
bglmpio_tuneblocking = 1;
x = getenv( "BGLMPIO_TUNEBLOCKING" );
if (x) bglmpio_tuneblocking = atoi(x);
}
/* report timing breakdown for MPI I/O collective call */
void ad_bgl_wr_timing_report( int rw, ADIO_File fd, int myrank, int nprocs )
{
int i;
if (bglmpio_timing) {
double *bglmpio_prof_org = bglmpio_prof_cr;
if (rw) bglmpio_prof_org = bglmpio_prof_cw;
double bglmpio_prof_avg[ BGLMPIO_CIO_LAST ];
double bglmpio_prof_max[ BGLMPIO_CIO_LAST ];
MPI_Reduce( bglmpio_prof_org, bglmpio_prof_avg, BGLMPIO_CIO_LAST, MPI_DOUBLE, MPI_SUM, 0, fd->comm );
MPI_Reduce( bglmpio_prof_org, bglmpio_prof_max, BGLMPIO_CIO_LAST, MPI_DOUBLE, MPI_MAX, 0, fd->comm );
if (myrank == 0) {
for (i=0; i<BGLMPIO_CIO_LAST; i++) bglmpio_prof_avg[i] /= nprocs;
if (bglmpio_timing2) {
bglmpio_prof_avg[ BGLMPIO_CIO_B_POSI_RW ] = bglmpio_prof_avg[ BGLMPIO_CIO_DATA_SIZE ] * nprocs /
bglmpio_prof_max[ BGLMPIO_CIO_T_POSI_RW ];
bglmpio_prof_avg[ BGLMPIO_CIO_B_MPIO_RW ] = bglmpio_prof_avg[ BGLMPIO_CIO_DATA_SIZE ] * nprocs /
bglmpio_prof_max[ BGLMPIO_CIO_T_MPIO_RW ];
} else {
bglmpio_prof_avg[ BGLMPIO_CIO_B_POSI_RW ] = 0;
bglmpio_prof_avg[ BGLMPIO_CIO_B_MPIO_RW ] = 0;
}
bglmpio_prof_avg[ BGLMPIO_CIO_B_MPIO_CRW ] = bglmpio_prof_avg[ BGLMPIO_CIO_DATA_SIZE ] * nprocs /
bglmpio_prof_max[ BGLMPIO_CIO_T_MPIO_CRW ];
printf("\tTIMING-1 %1s , ", (rw ? "W" : "R") );
printf( "SZ: %12.4f , ", bglmpio_prof_avg[ BGLMPIO_CIO_DATA_SIZE ] * nprocs );
printf( "SK-a: %10.3f , ", bglmpio_prof_avg[ BGLMPIO_CIO_T_SEEK ] );
printf( "SK-m: %10.3f , ", bglmpio_prof_max[ BGLMPIO_CIO_T_SEEK ] );
printf( "LC-a: %10.3f , ", bglmpio_prof_avg[ BGLMPIO_CIO_T_LCOMP ] );
printf( "GA-m: %10.3f , ", bglmpio_prof_max[ BGLMPIO_CIO_T_GATHER ] );
printf( "AN-a: %10.3f , ", bglmpio_prof_avg[ BGLMPIO_CIO_T_PATANA ] );
printf( "FD-a: %10.3f , ", bglmpio_prof_avg[ BGLMPIO_CIO_T_FD_PART ] );
printf( "MY-a: %10.3f , ", bglmpio_prof_avg[ BGLMPIO_CIO_T_MYREQ ] );
printf( "OT-m: %10.3f , ", bglmpio_prof_max[ BGLMPIO_CIO_T_OTHREQ ] );
printf( "EX-m: %10.3f , ", bglmpio_prof_max[ BGLMPIO_CIO_T_DEXCH ] );
printf("\tTIMING-2 %1s , ", (rw ? "W" : "R") );
printf( "PXT-m: %10.3f , ", bglmpio_prof_avg[ BGLMPIO_CIO_T_POSI_RW ] );
printf( "MPT-m: %10.3f , ", bglmpio_prof_avg[ BGLMPIO_CIO_T_MPIO_RW ] );
printf("MPTC-m: %10.3f , ", bglmpio_prof_avg[ BGLMPIO_CIO_T_MPIO_CRW ] );
printf( "PXB: %10.3f , ", bglmpio_prof_avg[ BGLMPIO_CIO_B_POSI_RW ] );
printf( "MPB: %10.3f , ", bglmpio_prof_avg[ BGLMPIO_CIO_B_MPIO_RW ] );
printf( "MPBC: %10.3f , ", bglmpio_prof_avg[ BGLMPIO_CIO_B_MPIO_CRW ] );
}
}
}

Просмотреть файл

@ -0,0 +1,94 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bgl_tuning.h
* \brief ???
*/
/*---------------------------------------------------------------------
* ad_bgl_tuning.h
*
* declares global variables and macros for performance tuning and
* functional debugging.
*---------------------------------------------------------------------*/
#ifndef AD_BGL_TUNING_H_
#define AD_BGL_TUNING_H_
#include "adio.h"
#define AD_BGL_assert( a ) if (!(a)) { \
fprintf( stderr, "AD_BGL_assert, file=%s, line=%d\n", __FILE__, __LINE__ ); \
MPI_Abort( MPI_COMM_WORLD, 1 ); \
}
/*-----------------------------------------
* Global variables for the control of
* 1. timing
* 2. select specific optimizations
*-----------------------------------------*/
/* timing fields */
enum {
BGLMPIO_CIO_DATA_SIZE=0,
BGLMPIO_CIO_T_SEEK,
BGLMPIO_CIO_T_LCOMP, /* time for ADIOI_Calc_my_off_len(), local */
BGLMPIO_CIO_T_GATHER, /* time for previous MPI_Allgather, now Allreduce */
BGLMPIO_CIO_T_PATANA, /* time for a quick test if access is contiguous or not, local */
BGLMPIO_CIO_T_FD_PART, /* time for file domain partitioning, local */
BGLMPIO_CIO_T_MYREQ, /* time for ADIOI_BGL_Calc_my_req(), local */
BGLMPIO_CIO_T_OTHREQ, /* time for ADIOI_Calc_others_req(), short Alltoall */
BGLMPIO_CIO_T_DEXCH, /* time for I/O data exchange */
BGLMPIO_CIO_T_POSI_RW,
BGLMPIO_CIO_B_POSI_RW,
BGLMPIO_CIO_T_MPIO_RW, /* time for ADIOI_BGL_WriteContig() */
BGLMPIO_CIO_B_MPIO_RW,
BGLMPIO_CIO_T_MPIO_CRW, /* time for ADIOI_BGL_WriteStridedColl() */
BGLMPIO_CIO_B_MPIO_CRW,
BGLMPIO_CIO_LAST
};
extern double bglmpio_prof_cw [BGLMPIO_CIO_LAST];
extern double bglmpio_prof_cr [BGLMPIO_CIO_LAST];
/* corresponds to environment variables to select optimizations and timing level */
extern int bglmpio_timing;
extern int bglmpio_timing2;
extern int bglmpio_comm;
extern int bglmpio_tunegather;
extern int bglmpio_tuneblocking;
/* set internal variables for tuning environment variables */
void ad_bgl_get_env_vars();
/* report timing breakdown for MPI I/O collective call */
void ad_bgl_timing_crw_report( int rw, ADIO_File fd, int myrank, int nprocs );
/* note:
* T := timing;
* CIO := collective I/O
*/
#define BGLMPIO_T_CIO_RESET( LEVEL, RW ) \
if (bglmpio_timing_cw_level >= LEVEL) { \
int i; \
for ( i = 0; i < BGLMPIO_T_LAST; i ++ ) \
bglmpio_prof_c##RW [ i ] = 0; \
}
#define BGLMPIO_T_CIO_REPORT( LEVEL, RW, FD, MYRANK, NPROCS ) \
if (bglmpio_timing_cw_level >= LEVEL) { \
ad_bgl_timing_crw_report ( RW, FD, MYRANK, NPROCS ); \
}
#define BGLMPIO_T_CIO_SET_GET( LEVEL, RW, DOBAR, ISSET, ISGET, VAR1, VAR2 ) \
if (bglmpio_timing_cw_level >= LEVEL) { \
if ( DOBAR ) MPI_Barrier( fd->comm ); \
double temp = MPI_Wtime(); \
if ( ISSET ) bglmpio_prof_c##RW [ VAR1 ] = temp; \
if ( ISGET ) bglmpio_prof_c##RW [ VAR2 ] = temp - bglmpio_prof_c##RW [ VAR2 ] ; \
}
#endif /* AD_BGL_TUNING_H_ */

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,546 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
/* ---------------------------------------------------------------- */
/**
* \file ad_bgl_write.c
* \brief ???
*/
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#include "ad_bgl.h"
#include "adio_extern.h"
#include "ad_bgl_tuning.h"
void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int *error_code)
{
int err=-1, datatype_size, len;
static char myname[] = "ADIOI_BGL_WRITECONTIG";
#if BGL_PROFILE
/* timing */
double io_time, io_time2;
if (bglmpio_timing) {
io_time = MPI_Wtime();
bglmpio_prof_cw[ BGLMPIO_CIO_DATA_SIZE ] += len;
}
#endif
MPI_Type_size(datatype, &datatype_size);
len = datatype_size * count;
#if BGL_PROFILE
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
if (bglmpio_timing2) io_time2 = MPI_Wtime();
if (fd->fp_sys_posn != offset)
lseek(fd->fd_sys, offset, SEEK_SET);
if (bglmpio_timing2) bglmpio_prof_cw[ BGLMPIO_CIO_T_SEEK ] += (MPI_Wtime() - io_time2);
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
if (bglmpio_timing2) io_time2 = MPI_Wtime();
err = write(fd->fd_sys, buf, len);
if (bglmpio_timing2) bglmpio_prof_cw[ BGLMPIO_CIO_T_POSI_RW ] += (MPI_Wtime() - io_time2);
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_sys_posn = offset + err;
/* individual file pointer not updated */
}
else { /* write from curr. location of ind. file pointer */
offset = fd->fp_ind;
if (bglmpio_timing2) io_time2 = MPI_Wtime();
if (fd->fp_sys_posn != fd->fp_ind)
lseek(fd->fd_sys, fd->fp_ind, SEEK_SET);
if (bglmpio_timing2) bglmpio_prof_cw[ BGLMPIO_CIO_T_SEEK ] += (MPI_Wtime() - io_time2);
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
if (bglmpio_timing2) io_time2 = MPI_Wtime();
err = write(fd->fd_sys, buf, len);
if (bglmpio_timing2) bglmpio_prof_cw[ BGLMPIO_CIO_T_POSI_RW ] += (MPI_Wtime() - io_time2);
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_ind += err;
fd->fp_sys_posn = fd->fp_ind;
}
#else /* BGL_PROFILE */
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
if (fd->fp_sys_posn != offset)
lseek(fd->fd_sys, offset, SEEK_SET);
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
err = write(fd->fd_sys, buf, len);
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_sys_posn = offset + err;
/* individual file pointer not updated */
}
else { /* write from curr. location of ind. file pointer */
offset = fd->fp_ind;
if (fd->fp_sys_posn != fd->fp_ind)
lseek(fd->fd_sys, fd->fp_ind, SEEK_SET);
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
err = write(fd->fd_sys, buf, len);
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_ind += err;
fd->fp_sys_posn = fd->fp_ind;
}
#endif /* BGL_PROFILE */
#if BGL_PROFILE
if (bglmpio_timing) bglmpio_prof_cw[ BGLMPIO_CIO_T_MPIO_RW ] += (MPI_Wtime() - io_time);
#endif
/* --BEGIN ERROR HANDLING-- */
if (err == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io",
"**io %s", strerror(errno));
return;
}
/* --END ERROR HANDLING-- */
#ifdef HAVE_STATUS_SET_BYTES
MPIR_Status_set_bytes(status, datatype, err);
#endif
*error_code = MPI_SUCCESS;
}
#define ADIOI_BUFFERED_WRITE \
{ \
if (req_off >= writebuf_off + writebuf_len) { \
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
err = write(fd->fd_sys, writebuf, writebuf_len); \
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
if (err == -1) err_flag = 1; \
writebuf_off = req_off; \
writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
err = read(fd->fd_sys, writebuf, writebuf_len); \
if (err == -1) { \
*error_code = MPIO_Err_create_code(MPI_SUCCESS, \
MPIR_ERR_RECOVERABLE, myname, \
__LINE__, MPI_ERR_IO, \
"**ioRMWrdwr", 0); \
return; \
} \
} \
write_sz = (int) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
memcpy(writebuf+req_off-writebuf_off, (char *)buf +userbuf_off, write_sz);\
while (write_sz != req_len) { \
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
err = write(fd->fd_sys, writebuf, writebuf_len); \
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
if (err == -1) err_flag = 1; \
req_len -= write_sz; \
userbuf_off += write_sz; \
writebuf_off += writebuf_len; \
writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
err = read(fd->fd_sys, writebuf, writebuf_len); \
if (err == -1) { \
*error_code = MPIO_Err_create_code(MPI_SUCCESS, \
MPIR_ERR_RECOVERABLE, myname, \
__LINE__, MPI_ERR_IO, \
"**ioRMWrdwr", 0); \
return; \
} \
write_sz = ADIOI_MIN(req_len, writebuf_len); \
memcpy(writebuf, (char *)buf + userbuf_off, write_sz);\
} \
}
/* this macro is used when filetype is contig and buftype is not contig.
it does not do a read-modify-write and does not lock*/
#define ADIOI_BUFFERED_WRITE_WITHOUT_READ \
{ \
if (req_off >= writebuf_off + writebuf_len) { \
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
err = write(fd->fd_sys, writebuf, writebuf_len); \
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
if (err == -1) err_flag = 1; \
writebuf_off = req_off; \
writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
} \
write_sz = (int) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
memcpy(writebuf+req_off-writebuf_off, (char *)buf +userbuf_off, write_sz);\
while (write_sz != req_len) { \
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
err = write(fd->fd_sys, writebuf, writebuf_len); \
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
if (err == -1) err_flag = 1; \
req_len -= write_sz; \
userbuf_off += write_sz; \
writebuf_off += writebuf_len; \
writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
write_sz = ADIOI_MIN(req_len, writebuf_len); \
memcpy(writebuf, (char *)buf + userbuf_off, write_sz);\
} \
}
void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
*error_code)
{
/* offset is in units of etype relative to the filetype. */
ADIOI_Flatlist_node *flat_buf, *flat_file;
int i, j, k, err=-1, bwr_size, fwr_size=0, st_index=0;
int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype;
int n_filetypes, etype_in_filetype;
ADIO_Offset abs_off_in_filetype=0;
int filetype_size, etype_size, buftype_size, req_len;
MPI_Aint filetype_extent, buftype_extent;
int buf_count, buftype_is_contig, filetype_is_contig;
ADIO_Offset userbuf_off;
ADIO_Offset off, req_off, disp, end_offset=0, writebuf_off, start_off;
char *writebuf, *value;
int flag, st_fwr_size, st_n_filetypes, writebuf_len, write_sz;
int new_bwr_size, new_fwr_size, err_flag=0, info_flag, max_bufsize;
static char myname[] = "ADIOI_BGL_WRITESTRIDED";
if (fd->hints->ds_write == ADIOI_HINT_DISABLE) {
/* if user has disabled data sieving on reads, use naive
* approach instead.
*/
/*FPRINTF(stderr, "ADIOI_GEN_WriteStrided_naive(%d):\n", __LINE__);*/
ADIOI_GEN_WriteStrided_naive(fd,
buf,
count,
datatype,
file_ptr_type,
offset,
status,
error_code);
return;
}
/*FPRINTF(stderr, "%s(%d):\n",myname, __LINE__);*/
ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
MPI_Type_size(fd->filetype, &filetype_size);
if ( ! filetype_size ) {
*error_code = MPI_SUCCESS;
return;
}
MPI_Type_extent(fd->filetype, &filetype_extent);
MPI_Type_size(datatype, &buftype_size);
MPI_Type_extent(datatype, &buftype_extent);
etype_size = fd->etype_size;
bufsize = buftype_size * count;
/* get max_bufsize from the info object. */
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
MPI_Info_get(fd->info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value,
&info_flag);
max_bufsize = atoi(value);
ADIOI_Free(value);
if (!buftype_is_contig && filetype_is_contig) {
/* noncontiguous in memory, contiguous in file. */
ADIOI_Flatten_datatype(datatype);
flat_buf = ADIOI_Flatlist;
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
fd->disp + etype_size * offset;
start_off = off;
end_offset = off + bufsize - 1;
writebuf_off = off;
writebuf = (char *) ADIOI_Malloc(max_bufsize);
writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));
/* if atomicity is true, lock the region to be accessed */
if (fd->atomicity)
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
for (j=0; j<count; j++)
for (i=0; i<flat_buf->count; i++) {
userbuf_off = j*buftype_extent + flat_buf->indices[i];
req_off = off;
req_len = flat_buf->blocklens[i];
ADIOI_BUFFERED_WRITE_WITHOUT_READ
off += flat_buf->blocklens[i];
}
/* write the buffer out finally */
lseek(fd->fd_sys, writebuf_off, SEEK_SET);
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
err = write(fd->fd_sys, writebuf, writebuf_len);
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
if (err == -1) err_flag = 1;
if (fd->atomicity)
ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
ADIOI_Free(writebuf); /* malloced in the buffered_write macro */
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
if (err_flag) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
}
else *error_code = MPI_SUCCESS;
}
else { /* noncontiguous in file */
/* filetype already flattened in ADIO_Open */
flat_file = ADIOI_Flatlist;
while (flat_file->type != fd->filetype) flat_file = flat_file->next;
disp = fd->disp;
if (file_ptr_type == ADIO_INDIVIDUAL) {
offset = fd->fp_ind; /* in bytes */
n_filetypes = -1;
flag = 0;
while (!flag) {
n_filetypes++;
for (i=0; i<flat_file->count; i++) {
if (disp + flat_file->indices[i] +
(ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i]
>= offset) {
st_index = i;
fwr_size = (int) (disp + flat_file->indices[i] +
(ADIO_Offset) n_filetypes*filetype_extent
+ flat_file->blocklens[i] - offset);
flag = 1;
break;
}
}
}
}
else {
n_etypes_in_filetype = filetype_size/etype_size;
n_filetypes = (int) (offset / n_etypes_in_filetype);
etype_in_filetype = (int) (offset % n_etypes_in_filetype);
size_in_filetype = etype_in_filetype * etype_size;
sum = 0;
for (i=0; i<flat_file->count; i++) {
sum += flat_file->blocklens[i];
if (sum > size_in_filetype) {
st_index = i;
fwr_size = sum - size_in_filetype;
abs_off_in_filetype = flat_file->indices[i] +
size_in_filetype - (sum - flat_file->blocklens[i]);
break;
}
}
/* abs. offset in bytes in the file */
offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
}
start_off = offset;
/* Calculate end_offset, the last byte-offset that will be accessed.
e.g., if start_offset=0 and 100 bytes to be write, end_offset=99*/
st_fwr_size = fwr_size;
st_n_filetypes = n_filetypes;
i = 0;
j = st_index;
off = offset;
fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
while (i < bufsize) {
i += fwr_size;
end_offset = off + fwr_size - 1;
if (j < (flat_file->count - 1)) j++;
else {
j = 0;
n_filetypes++;
}
off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent;
fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
}
/* if atomicity is true, lock the region to be accessed */
if (fd->atomicity)
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
/* initial read for the read-modify-write */
writebuf_off = offset;
writebuf = (char *) ADIOI_Malloc(max_bufsize);
writebuf_len = (int)(ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
lseek(fd->fd_sys, writebuf_off, SEEK_SET);
err = read(fd->fd_sys, writebuf, writebuf_len);
if (err == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE,
myname, __LINE__,
MPI_ERR_IO,
"ADIOI_BGL_WriteStrided: ROMIO tries to optimize this access by doing a read-modify-write, but is unable to read the file. Please give the file read permission and open it with MPI_MODE_RDWR.", 0);
return;
}
if (buftype_is_contig && !filetype_is_contig) {
/* contiguous in memory, noncontiguous in file. should be the most
common case. */
i = 0;
j = st_index;
off = offset;
n_filetypes = st_n_filetypes;
fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
while (i < bufsize) {
if (fwr_size) {
/* TYPE_UB and TYPE_LB can result in
fwr_size = 0. save system call in such cases */
/* lseek(fd->fd_sys, off, SEEK_SET);
err = write(fd->fd_sys, ((char *) buf) + i, fwr_size);*/
req_off = off;
req_len = fwr_size;
userbuf_off = i;
ADIOI_BUFFERED_WRITE
}
i += fwr_size;
if (off + fwr_size < disp + flat_file->indices[j] +
flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent)
off += fwr_size;
/* did not reach end of contiguous block in filetype.
no more I/O needed. off is incremented by fwr_size. */
else {
if (j < (flat_file->count - 1)) j++;
else {
j = 0;
n_filetypes++;
}
off = disp + flat_file->indices[j] +
(ADIO_Offset) n_filetypes*filetype_extent;
fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
}
}
}
else {
/* noncontiguous in memory as well as in file */
ADIOI_Flatten_datatype(datatype);
flat_buf = ADIOI_Flatlist;
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
k = num = buf_count = 0;
i = (int) (flat_buf->indices[0]);
j = st_index;
off = offset;
n_filetypes = st_n_filetypes;
fwr_size = st_fwr_size;
bwr_size = flat_buf->blocklens[0];
while (num < bufsize) {
size = ADIOI_MIN(fwr_size, bwr_size);
if (size) {
/* lseek(fd->fd_sys, off, SEEK_SET);
err = write(fd->fd_sys, ((char *) buf) + i, size); */
req_off = off;
req_len = size;
userbuf_off = i;
ADIOI_BUFFERED_WRITE
}
new_fwr_size = fwr_size;
new_bwr_size = bwr_size;
if (size == fwr_size) {
/* reached end of contiguous block in file */
if (j < (flat_file->count - 1)) j++;
else {
j = 0;
n_filetypes++;
}
off = disp + flat_file->indices[j] +
(ADIO_Offset) n_filetypes*filetype_extent;
new_fwr_size = flat_file->blocklens[j];
if (size != bwr_size) {
i += size;
new_bwr_size -= size;
}
}
if (size == bwr_size) {
/* reached end of contiguous block in memory */
k = (k + 1)%flat_buf->count;
buf_count++;
i = (int) (buftype_extent*(buf_count/flat_buf->count) +
flat_buf->indices[k]);
new_bwr_size = flat_buf->blocklens[k];
if (size != fwr_size) {
off += size;
new_fwr_size -= size;
}
}
num += size;
fwr_size = new_fwr_size;
bwr_size = new_bwr_size;
}
}
/* write the buffer out finally */
lseek(fd->fd_sys, writebuf_off, SEEK_SET);
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
err = write(fd->fd_sys, writebuf, writebuf_len);
if (!(fd->atomicity))
ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
else ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
if (err == -1) err_flag = 1;
ADIOI_Free(writebuf); /* malloced in the buffered_write macro */
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
if (err_flag) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
}
else *error_code = MPI_SUCCESS;
}
fd->fp_sys_posn = -1; /* set it to null. */
#ifdef HAVE_STATUS_SET_BYTES
MPIR_Status_set_bytes(status, datatype, bufsize);
/* This is a temporary way of filling in status. The right way is to
keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
#endif
if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
}

Просмотреть файл

@ -0,0 +1,7 @@
<dir>
<file name="ad_bglockless.c" info="1205188711"/>
</dir>
<data>
<fileinfo name="ad_bglockless.c">
</fileinfo>
</data>

Просмотреть файл

@ -0,0 +1,24 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
include $(top_srcdir)/Makefile.options
noinst_LTLIBRARIES = libadio_bglockless.la
libadio_bglockless_la_SOURCES = \
ad_bglockless.c

Просмотреть файл

@ -0,0 +1,49 @@
CC = @CC@
AR = @AR@
RANLIB = @RANLIB@
LIBNAME = @LIBNAME@
srcdir = @srcdir@
CC_SHL = @CC_SHL@
SHLIBNAME = @SHLIBNAME@
INCLUDE_DIR = -I@MPI_INCLUDE_DIR@ -I${srcdir}/../include -I../include -I../../include -I${srcdir}/../../../../include -I../../../../include
CFLAGS = @CPPFLAGS@ @CFLAGS@ $(INCLUDE_DIR)
top_builddir = @master_topbuild_dir@
LIBTOOL = @LIBTOOL@
C_COMPILE_SHL = $(CC_SHL)
@VPATH@
AD_BGLOCKLESS_OBJECTS = ad_bglockless.o
default: $(LIBNAME)
@if [ "@ENABLE_SHLIB@" != "none" ] ; then \
$(MAKE) $(SHLIBNAME).la ;\
fi
.SUFFIXES: $(SUFFIXES) .p .lo
.c.o:
$(CC) $(CFLAGS) -c $<
.c.lo:
$(C_COMPILE_SHL) $(CFLAGS) -c $< -o _s$*.o
@mv -f _s$*.o $*.lo
$(LIBNAME): $(AD_BGLOCKLESS_OBJECTS)
$(AR) $(LIBNAME) $(AD_BGLOCKLESS_OBJECTS)
$(RANLIB) $(LIBNAME)
AD_BGLOCKLESS_LOOBJECTS=$(AD_BGLOCKLESS_OBJECTS:.o=.lo)
$(SHLIBNAME).la: $(AD_BGLOCKLESS_LOOBJECTS)
$(AR) $(SHLIBNAME).la $(AD_BGLOCKLESS_LOOBJECTS)
coverage:
-@for file in ${AD_BGLOCKLESS_OBJECTS:.o=.c} ; do \
gcov -b -f $$file ; done
clean:
@rm -f *.o *.lo *.gcno *.gcda *.bb *.bbg
@rm -f ${srcdir}/*.gcno ${srcdir}/*.gcda
@rm -f ${srcdir}/*.bb ${srcdir}/*.bbg

Просмотреть файл

@ -0,0 +1,41 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
*
* Copyright (C) 2001 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#include "../ad_bgl/ad_bgl.h"
/* adioi.h has the ADIOI_Fns_struct define */
#include "adioi.h"
struct ADIOI_Fns_struct ADIO_BGLOCKLESS_operations = {
ADIOI_BGL_Open, /* Open */
ADIOI_GEN_ReadContig, /* ReadContig */
ADIOI_GEN_WriteContig, /* WriteContig */
ADIOI_BGL_ReadStridedColl, /* ReadStridedColl */
ADIOI_BGL_WriteStridedColl, /* WriteStridedColl */
ADIOI_GEN_SeekIndividual, /* SeekIndividual */
ADIOI_GEN_Fcntl, /* Fcntl */
ADIOI_BGL_SetInfo, /* SetInfo */
ADIOI_GEN_ReadStrided, /* ReadStrided */
ADIOI_NOLOCK_WriteStrided, /* WriteStrided */
ADIOI_BGL_Close, /* Close */
#ifdef ROMIO_HAVE_WORKING_AIO
ADIOI_GEN_IreadContig, /* IreadContig */
ADIOI_GEN_IwriteContig, /* IwriteContig */
#else
ADIOI_FAKE_IreadContig, /* IreadContig */
ADIOI_FAKE_IwriteContig, /* IwriteContig */
#endif
ADIOI_GEN_IODone, /* ReadDone */
ADIOI_GEN_IODone, /* WriteDone */
ADIOI_GEN_IOComplete, /* ReadComplete */
ADIOI_GEN_IOComplete, /* WriteComplete */
ADIOI_GEN_IreadStrided, /* IreadStrided */
ADIOI_GEN_IwriteStrided, /* IwriteStrided */
ADIOI_GEN_Flush, /* Flush */
ADIOI_GEN_Resize, /* Resize */
ADIOI_GEN_Delete, /* Delete */
};

Просмотреть файл

@ -0,0 +1,22 @@
<dir>
<file name="ad_lustre_fcntl.c" info="1204573775"/>
<file name="ad_lustre_hints.c" info="1204573775"/>
<file name="ad_lustre_open.c" info="1204573775"/>
<file name="ad_lustre_rwcontig.c" info="1204573775"/>
<file name="ad_lustre.h" info="1204573775"/>
<file name="ad_lustre.c" info="1204573775"/>
</dir>
<data>
<fileinfo name="ad_lustre_fcntl.c">
</fileinfo>
<fileinfo name="ad_lustre_hints.c">
</fileinfo>
<fileinfo name="ad_lustre_open.c">
</fileinfo>
<fileinfo name="ad_lustre_rwcontig.c">
</fileinfo>
<fileinfo name="ad_lustre.h">
</fileinfo>
<fileinfo name="ad_lustre.c">
</fileinfo>
</data>

Просмотреть файл

@ -0,0 +1,31 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
include $(top_srcdir)/Makefile.options
EXTRA_DIST = README
noinst_LTLIBRARIES = libadio_lustre.la
libadio_lustre_la_SOURCES = \
ad_lustre.c \
ad_lustre_fcntl.c \
ad_lustre.h \
ad_lustre_hints.c \
ad_lustre_open.c \
ad_lustre_rwcontig.c

Просмотреть файл

@ -0,0 +1,47 @@
CC = @CC@
AR = @AR@
RANLIB = @RANLIB@
LIBNAME = @LIBNAME@
srcdir = @srcdir@
CC_SHL = @CC_SHL@
SHLIBNAME = @SHLIBNAME@
INCLUDE_DIR = -I@MPI_INCLUDE_DIR@ -I${srcdir}/../include -I../include -I../../include -I${srcdir}/../../../../include -I../../../../include
CFLAGS = @CPPFLAGS@ @CFLAGS@ $(INCLUDE_DIR)
top_builddir = @master_topbuild_dir@
LIBTOOL = @LIBTOOL@
C_COMPILE_SHL = $(CC_SHL) @CFLAGS@ $(INCLUDE_DIR)
@VPATH@
AD_LUSTRE_OBJECTS = ad_lustre.o ad_lustre_open.o \
ad_lustre_rwcontig.o ad_lustre_hints.o
default: $(LIBNAME)
@if [ "@ENABLE_SHLIB@" != "none" ] ; then \
$(MAKE) $(SHLIBNAME).la ;\
fi
.SUFFIXES: $(SUFFIXES) .p .lo
.c.o:
$(CC) $(CFLAGS) -c $<
.c.lo:
$(C_COMPILE_SHL) -c $< -o _s$*.o
@mv -f _s$*.o $*.lo
$(LIBNAME): $(AD_LUSTRE_OBJECTS)
$(AR) $(LIBNAME) $(AD_LUSTRE_OBJECTS)
$(RANLIB) $(LIBNAME)
AD_LUSTRE_LOOBJECTS=$(AD_LUSTRE_OBJECTS:.o=.lo)
$(SHLIBNAME).la: $(AD_LUSTRE_LOOBJECTS)
$(AR) $(SHLIBNAME).la $(AD_LUSTRE_LOOBJECTS)
coverage:
-@for file in ${AD_LUSTRE_OBJECTS:.o=.c} ; do \
gcov -b -f $$file ; done
clean:
@rm -f *.o *.lo

Просмотреть файл

@ -0,0 +1,40 @@
Upcoming soon:
o Hierarchical striping as described in the paper from CCGrid2007
http://ft.ornl.gov/projects/io/pubs/CCGrid-2007-file-joining.pdf
Further out:
o To post the code for ParColl (Partitioned collective IO)
-----------------------------------------------------
V04:
-----------------------------------------------------
o Direct IO and Lockless IO support
-----------------------------------------------------
V03:
-----------------------------------------------------
o Correct detection of fs_type when lustre: prefix is not given
o Further fix on stripe alignment
o Tested/Enabled striping hints over Cray XT (Catamount and CNL)
-----------------------------------------------------
V02:
-----------------------------------------------------
The Lustre ADIO driver has been cleaned up quite a lot. Compared
to the intital posting, here are the changes:
o Removal of dead/redundant code
o Removal of asynchronous IO piece as it appears outdated
o Bug fixes for setting Lustre Hints
o Bug fixes for data sieving
o Improved Setsize operation with one process calling ftruncate
o Improved collective IO with domain partitioning on
Lustre stripe boundary
Contributing:
o You may contribute via many different ways, such as
testing results, bug reports, and new feature patches.
o We appreciate any courtesy reference of this work.
o Disclaimer: you are welcome to try the code, but at your own risk.
Contact info:
For more info, visit http://ft.ornl.gov/projects/io/

Просмотреть файл

@ -0,0 +1,39 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 2001 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*
* Copyright (C) 2007 Oak Ridge National Laboratory
*/
#include "ad_lustre.h"
struct ADIOI_Fns_struct ADIO_LUSTRE_operations = {
ADIOI_LUSTRE_Open, /* Open */
ADIOI_LUSTRE_ReadContig, /* ReadContig */
ADIOI_LUSTRE_WriteContig, /* WriteContig */
ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
ADIOI_GEN_WriteStridedColl, /* WriteStridedColl */
ADIOI_GEN_SeekIndividual, /* SeekIndividual */
ADIOI_GEN_Fcntl, /* Fcntl */
ADIOI_LUSTRE_SetInfo, /* SetInfo */
ADIOI_GEN_ReadStrided, /* ReadStrided */
ADIOI_GEN_WriteStrided, /* WriteStrided */
ADIOI_GEN_Close, /* Close */
#if defined(ROMIO_HAVE_WORKING_AIO) && !defined(CRAY_XT_LUSTRE)
ADIOI_GEN_IreadContig, /* IreadContig */
ADIOI_GEN_IwriteContig, /* IwriteContig */
#else
ADIOI_FAKE_IreadContig, /* IreadContig */
ADIOI_FAKE_IwriteContig, /* IwriteContig */
#endif
ADIOI_GEN_IODone, /* ReadDone */
ADIOI_GEN_IODone, /* WriteDone */
ADIOI_GEN_IOComplete, /* ReadComplete */
ADIOI_GEN_IOComplete, /* WriteComplete */
ADIOI_GEN_IreadStrided, /* IreadStrided */
ADIOI_GEN_IwriteStrided, /* IwriteStrided */
ADIOI_GEN_Flush, /* Flush */
ADIOI_GEN_Resize, /* Resize */
ADIOI_GEN_Delete, /* Delete */
};

Просмотреть файл

@ -0,0 +1,64 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*
* Copyright (C) 2007 Oak Ridge National Laboratory
*/
#ifndef AD_UNIX_INCLUDE
#define AD_UNIX_INCLUDE
/* temp*/
#define HAVE_ASM_TYPES_H 1
#include <unistd.h>
#include <linux/types.h>
#ifdef __linux__
# include <sys/ioctl.h> /* necessary for: */
# define __USE_GNU /* O_DIRECT and */
# include <fcntl.h> /* IO operations */
# undef __USE_GNU
#endif /* __linux__ */
/*#include <fcntl.h>*/
#include <sys/ioctl.h>
#include "lustre/lustre_user.h"
#include "adio.h"
/*#include "adioi.h"*/
#ifdef HAVE_SIGNAL_H
#include <signal.h>
#endif
#ifdef HAVE_AIO_H
#include <aio.h>
#ifdef HAVE_SYS_AIO_H
#include <sys/aio.h>
#endif
#endif /* End of HAVE_SYS_AIO_H */
void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code);
void ADIOI_LUSTRE_Close(ADIO_File fd, int *error_code);
void ADIOI_LUSTRE_ReadContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
*error_code);
void ADIOI_LUSTRE_WriteContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
*error_code);
void ADIOI_LUSTRE_WriteStridedColl(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
*error_code);
void ADIOI_LUSTRE_ReadStridedColl(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
*error_code);
void ADIOI_LUSTRE_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct,
int *error_code);
void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code);
#endif /* End of AD_UNIX_INCLUDE */

Просмотреть файл

@ -0,0 +1,97 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*
* Copyright (C) 2007 Oak Ridge National Laboratory
*/
#include "ad_lustre.h"
#include "adio_extern.h"
void ADIOI_LUSTRE_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code)
{
int i, ntimes;
ADIO_Offset curr_fsize, alloc_size, size, len, done;
ADIO_Status status;
char *buf;
#if defined(MPICH2) || !defined(PRINT_ERR_MSG)
static char myname[] = "ADIOI_LUSTRE_FCNTL";
#endif
switch(flag) {
case ADIO_FCNTL_GET_FSIZE:
fcntl_struct->fsize = lseek(fd->fd_sys, 0, SEEK_END);
if (fd->fp_sys_posn != -1)
lseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET);
if (fcntl_struct->fsize == -1) {
*error_code = MPIR_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname, __LINE__,
MPI_ERR_IO, "**io", "**io %s", strerror(errno));
}
else *error_code = MPI_SUCCESS;
break;
case ADIO_FCNTL_SET_DISKSPACE:
/* will be called by one process only */
/* On file systems with no preallocation function, I have to
explicitly write
to allocate space. Since there could be holes in the file,
I need to read up to the current file size, write it back,
and then write beyond that depending on how much
preallocation is needed.
read/write in sizes of no more than ADIOI_PREALLOC_BUFSZ */
curr_fsize = lseek(fd->fd_sys, 0, SEEK_END);
alloc_size = fcntl_struct->diskspace;
size = ADIOI_MIN(curr_fsize, alloc_size);
ntimes = (size + ADIOI_PREALLOC_BUFSZ - 1)/ADIOI_PREALLOC_BUFSZ;
buf = (char *) ADIOI_Malloc(ADIOI_PREALLOC_BUFSZ);
done = 0;
for (i=0; i<ntimes; i++) {
len = ADIOI_MIN(size-done, ADIOI_PREALLOC_BUFSZ);
ADIO_ReadContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, done,
&status, error_code);
if (*error_code != MPI_SUCCESS) {
*error_code = MPIR_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname, __LINE__,
MPI_ERR_IO, "**io", "**io %s", strerror(errno));
return;
}
ADIO_WriteContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
done, &status, error_code);
if (*error_code != MPI_SUCCESS) return;
done += len;
}
if (alloc_size > curr_fsize) {
memset(buf, 0, ADIOI_PREALLOC_BUFSZ);
size = alloc_size - curr_fsize;
ntimes = (size + ADIOI_PREALLOC_BUFSZ - 1)/ADIOI_PREALLOC_BUFSZ;
for (i=0; i<ntimes; i++) {
len = ADIOI_MIN(alloc_size-done, ADIOI_PREALLOC_BUFSZ);
ADIO_WriteContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
done, &status, error_code);
if (*error_code != MPI_SUCCESS) return;
done += len;
}
}
ADIOI_Free(buf);
if (fd->fp_sys_posn != -1)
lseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET);
*error_code = MPI_SUCCESS;
break;
case ADIO_FCNTL_SET_ATOMICITY:
fd->atomicity = (fcntl_struct->atomicity == 0) ? 0 : 1;
*error_code = MPI_SUCCESS;
break;
default:
FPRINTF(stderr, "Unknown flag passed to ADIOI_LUSTRE_Fcntl\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
}

Просмотреть файл

@ -0,0 +1,140 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*
* Copyright (C) 2007 Oak Ridge National Laboratory
*/
#include "ad_lustre.h"
#include "adio_extern.h"
void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
{
char *value, *value_in_fd;
int flag, tmp_val[3], str_factor=-1, str_unit=0, start_iodev=-1;
struct lov_user_md lum = { 0 };
int err, myrank, fd_sys, perm, amode, old_mask;
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
if ( (fd->info) == MPI_INFO_NULL) {
/* This must be part of the open call. can set striping parameters
if necessary. */
MPI_Info_create(&(fd->info));
MPI_Info_set(fd->info, "direct_read", "false");
MPI_Info_set(fd->info, "direct_write", "false");
fd->direct_read = fd->direct_write = 0;
/* has user specified striping or server buffering parameters
and do they have the same value on all processes? */
if (users_info != MPI_INFO_NULL) {
MPI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
value, &flag);
if (flag)
str_unit=atoi(value);
MPI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
value, &flag);
if (flag)
str_factor=atoi(value);
MPI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL,
value, &flag);
if (flag)
start_iodev=atoi(value);
MPI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && (!strcmp(value, "true") || !strcmp(value, "TRUE"))) {
MPI_Info_set(fd->info, "direct_read", "true");
fd->direct_read = 1;
}
MPI_Info_get(users_info, "direct_write", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && (!strcmp(value, "true") || !strcmp(value, "TRUE"))) {
MPI_Info_set(fd->info, "direct_write", "true");
fd->direct_write = 1;
}
}
MPI_Comm_rank(fd->comm, &myrank);
if (myrank == 0) {
tmp_val[0] = str_factor;
tmp_val[1] = str_unit;
tmp_val[2] = start_iodev;
}
MPI_Bcast(tmp_val, 3, MPI_INT, 0, fd->comm);
if (tmp_val[0] != str_factor
|| tmp_val[1] != str_unit
|| tmp_val[2] != start_iodev) {
FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: All keys"
"-striping_factor:striping_unit:start_iodevice "
"need to be identical across all processes\n");
MPI_Abort(MPI_COMM_WORLD, 1);
} else if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0)) {
/* if user has specified striping info, process 0 tries to set it */
if (!myrank) {
if (fd->perm == ADIO_PERM_NULL) {
old_mask = umask(022);
umask(old_mask);
perm = old_mask ^ 0666;
}
else perm = fd->perm;
amode = 0;
if (fd->access_mode & ADIO_CREATE)
amode = amode | O_CREAT;
if (fd->access_mode & ADIO_RDONLY)
amode = amode | O_RDONLY;
if (fd->access_mode & ADIO_WRONLY)
amode = amode | O_WRONLY;
if (fd->access_mode & ADIO_RDWR)
amode = amode | O_RDWR;
if (fd->access_mode & ADIO_EXCL)
amode = amode | O_EXCL;
/* we need to create file so ensure this is set */
amode = amode | O_LOV_DELAY_CREATE | O_CREAT;
fd_sys = open(fd->filename, amode, perm);
if (fd_sys == -1) {
if (errno != EEXIST)
fprintf(stderr,
"Failure to open file %s %d %d\n",strerror(errno), amode, perm);
} else {
lum.lmm_magic = LOV_USER_MAGIC;
lum.lmm_pattern = 0;
lum.lmm_stripe_size = str_unit;
lum.lmm_stripe_count = str_factor;
lum.lmm_stripe_offset = start_iodev;
err = ioctl(fd_sys, LL_IOC_LOV_SETSTRIPE, &lum);
if (err == -1 && errno != EEXIST) {
fprintf(stderr, "Failure to set stripe info %s \n", strerror(errno));
}
close(fd_sys);
}
} /* End of striping parameters validation */
}
MPI_Barrier(fd->comm);
/* set the values for collective I/O and data sieving parameters */
ADIOI_GEN_SetInfo(fd, users_info, error_code);
} else {
/* The file has been opened previously and fd->fd_sys is a valid
file descriptor. cannot set striping parameters now. */
/* set the values for collective I/O and data sieving parameters */
ADIOI_GEN_SetInfo(fd, users_info, error_code);
}
if (ADIOI_Direct_read) fd->direct_read = 1;
if (ADIOI_Direct_write) fd->direct_write = 1;
ADIOI_Free(value);
*error_code = MPI_SUCCESS;
}

Просмотреть файл

@ -0,0 +1,134 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*
* Copyright (C) 2007 Oak Ridge National Laboratory
*/
#include "ad_lustre.h"
void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
{
int perm, old_mask, amode, amode_direct;
struct lov_user_md lum = { 0 };
char *value;
#if defined(MPICH2) || !defined(PRINT_ERR_MSG)
static char myname[] = "ADIOI_LUSTRE_OPEN";
#endif
if (fd->perm == ADIO_PERM_NULL) {
old_mask = umask(022);
umask(old_mask);
perm = old_mask ^ 0666;
}
else perm = fd->perm;
amode = 0;
if (fd->access_mode & ADIO_CREATE)
amode = amode | O_CREAT;
if (fd->access_mode & ADIO_RDONLY)
amode = amode | O_RDONLY;
if (fd->access_mode & ADIO_WRONLY)
amode = amode | O_WRONLY;
if (fd->access_mode & ADIO_RDWR)
amode = amode | O_RDWR;
if (fd->access_mode & ADIO_EXCL)
amode = amode | O_EXCL;
amode_direct = amode | O_DIRECT;
fd->fd_sys = open(fd->filename, amode|O_CREAT, perm);
if (fd->fd_sys != -1) {
int err;
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
/* get file striping information and set it in info */
lum.lmm_magic = LOV_USER_MAGIC;
err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *) &lum);
if (!err) {
sprintf(value, "%d", lum.lmm_stripe_size);
MPI_Info_set(fd->info, "striping_unit", value);
sprintf(value, "%d", lum.lmm_stripe_count);
MPI_Info_set(fd->info, "striping_factor", value);
sprintf(value, "%d", lum.lmm_stripe_offset);
MPI_Info_set(fd->info, "start_iodevice", value);
}
ADIOI_Free(value);
if (fd->access_mode & ADIO_APPEND)
fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
}
if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND))
fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
fd->fd_direct = -1;
if (fd->direct_write || fd->direct_read) {
fd->fd_direct = open(fd->filename, amode_direct, perm);
if (fd->fd_direct != -1) {
fd->d_mem = fd->d_miniosz = (1<<12);
} else {
perror("cannot open file with O_Direct");
fd->direct_write = fd->direct_read = 0;
}
}
/* --BEGIN ERROR HANDLING-- */
if (fd->fd_sys == -1 || ((fd->fd_direct == -1) &&
(fd->direct_write || fd->direct_read))) {
if (errno == ENAMETOOLONG)
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_BAD_FILE,
"**filenamelong",
"**filenamelong %s %d",
fd->filename,
strlen(fd->filename));
else if (errno == ENOENT)
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_NO_SUCH_FILE,
"**filenoexist",
"**filenoexist %s",
fd->filename);
else if (errno == ENOTDIR || errno == ELOOP)
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE,
myname, __LINE__,
MPI_ERR_BAD_FILE,
"**filenamedir",
"**filenamedir %s",
fd->filename);
else if (errno == EACCES) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_ACCESS,
"**fileaccess",
"**fileaccess %s",
fd->filename );
}
else if (errno == EROFS) {
/* Read only file or file system and write access requested */
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_READ_ONLY,
"**ioneedrd", 0 );
}
else {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
}
}
/* --END ERROR HANDLING-- */
else *error_code = MPI_SUCCESS;
}

Просмотреть файл

@ -0,0 +1,187 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*
* Copyright (C) 2007 Oak Ridge National Laboratory
*/
#define _XOPEN_SOURCE 600
#include <stdlib.h>
#include <malloc.h>
#include "ad_lustre.h"
#define LUSTRE_MEMALIGN (1<<12) /* to use page_shift */
static void ADIOI_LUSTRE_Aligned_Mem_File_Write(ADIO_File fd, void *buf, int len,
ADIO_Offset offset, int *err);
static void ADIOI_LUSTRE_Aligned_Mem_File_Write(ADIO_File fd, void *buf, int len,
ADIO_Offset offset, int *err)
{
int ntimes, rem, newrem, i, size, nbytes;
if (!(len % fd->d_miniosz) && (len >= fd->d_miniosz)) {
*err = pwrite(fd->fd_direct, buf, len, offset);
} else if (len < fd->d_miniosz) {
*err = pwrite(fd->fd_sys, buf, len, offset);
} else {
rem = len % fd->d_miniosz;
size = len - rem;
nbytes = pwrite(fd->fd_direct, buf, size, offset);
nbytes += pwrite(fd->fd_sys, ((char *)buf) + size, rem, offset+size);
*err = nbytes;
}
}
static void ADIOI_LUSTRE_Aligned_Mem_File_Read(ADIO_File fd, void *buf, int len,
ADIO_Offset offset, int *err);
static void ADIOI_LUSTRE_Aligned_Mem_File_Read(ADIO_File fd, void *buf, int len,
ADIO_Offset offset, int *err)
{
int ntimes, rem, newrem, i, size, nbytes;
if (!(len % fd->d_miniosz) && (len >= fd->d_miniosz))
*err = pread(fd->fd_direct, buf, len, offset);
else if (len < fd->d_miniosz)
*err = pread(fd->fd_sys, buf, len, offset);
else {
rem = len % fd->d_miniosz;
size = len - rem;
nbytes = pread(fd->fd_direct, buf, size, offset);
nbytes += pread(fd->fd_sys, ((char *)buf) + size, rem, offset+size);
*err = nbytes;
}
}
static int ADIOI_LUSTRE_Directio(ADIO_File fd, void *buf, int len,
off_t offset, int rw);
static int ADIOI_LUSTRE_Directio(ADIO_File fd, void *buf, int len,
off_t offset, int rw)
{
int err=-1, diff, size=len, nbytes = 0;
void *newbuf;
static char myname[] = "ADIOI_LUSTRE_Directio";
if (offset % fd->d_miniosz) {
diff = fd->d_miniosz - (offset % fd->d_miniosz);
diff = ADIOI_MIN(diff, len);
if (rw)
nbytes = pwrite(fd->fd_sys, buf, diff, offset);
else
nbytes = pread(fd->fd_sys, buf, diff, offset);
buf = ((char *) buf) + diff;
offset += diff;
size = len - diff;
}
if (!size) {
return diff;
}
if (rw) { /* direct I/O enabled */
if (!(((long) buf) % fd->d_mem)) {
ADIOI_LUSTRE_Aligned_Mem_File_Write(fd, buf, size, offset, &err);
nbytes += err;
} else {
newbuf = (void *) memalign(LUSTRE_MEMALIGN, size);
if (newbuf) {
memcpy(newbuf, buf, size);
ADIOI_LUSTRE_Aligned_Mem_File_Write(fd, newbuf, size, offset, &err);
nbytes += err;
free(newbuf);
}
else nbytes += pwrite(fd->fd_sys, buf, size, offset);
}
err = nbytes;
} else {
if (!(((long) buf) % fd->d_mem)) {
ADIOI_LUSTRE_Aligned_Mem_File_Read(fd, buf, size, offset, &err);
nbytes += err;
} else {
newbuf = (void *) memalign(LUSTRE_MEMALIGN, size);
if (newbuf) {
ADIOI_LUSTRE_Aligned_Mem_File_Read(fd, newbuf, size, offset, &err);
if (err > 0) memcpy(buf, newbuf, err);
nbytes += err;
free(newbuf);
}
else nbytes += pread(fd->fd_sys, buf, size, offset);
}
err = nbytes;
}
return err;
}
static void ADIOI_LUSTRE_IOContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status,
int io_mode, int *error_code);
static void ADIOI_LUSTRE_IOContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status,
int io_mode, int *error_code)
{
int err=-1, datatype_size, len;
static char myname[] = "ADIOI_LUSTRE_IOCONTIG";
MPI_Type_size(datatype, &datatype_size);
len = datatype_size * count;
if (file_ptr_type == ADIO_INDIVIDUAL) {
offset = fd->fp_ind;
}
if (!(fd->direct_read || fd->direct_write)) {
if (fd->fp_sys_posn != offset) {
err = lseek(fd->fd_sys, offset, SEEK_SET);
if (err == -1) goto ioerr;
}
if (io_mode)
err = write(fd->fd_sys, buf, len);
else
err = read(fd->fd_sys, buf, len);
} else {
err = ADIOI_LUSTRE_Directio(fd, buf, len, offset, io_mode);
}
if (err == -1) goto ioerr;
fd->fp_sys_posn = offset + err;
if (file_ptr_type == ADIO_INDIVIDUAL) {
fd->fp_ind += err;
}
#ifdef HAVE_STATUS_SET_BYTES
if (status) MPIR_Status_set_bytes(status, datatype, err);
#endif
*error_code = MPI_SUCCESS;
ioerr:
/* --BEGIN ERROR HANDLING-- */
if (err == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE,
myname, __LINE__,
MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
fd->fp_sys_posn = -1;
return;
}
/* --END ERROR HANDLING-- */
}
void ADIOI_LUSTRE_WriteContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int *error_code)
{
ADIOI_LUSTRE_IOContig(fd, buf, count, datatype, file_ptr_type,
offset, status, 1, error_code);
}
void ADIOI_LUSTRE_ReadContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int *error_code)
{
ADIOI_LUSTRE_IOContig(fd, buf, count, datatype, file_ptr_type,
offset, status, 0, error_code);
}

Просмотреть файл

@ -22,8 +22,10 @@ struct ADIOI_Fns_struct ADIO_NFS_operations = {
ADIOI_NFS_ReadStrided, /* ReadStrided */
ADIOI_NFS_WriteStrided, /* WriteStrided */
ADIOI_GEN_Close, /* Close */
ADIOI_NFS_IreadContig, /* IreadContig */
ADIOI_NFS_IwriteContig, /* IwriteContig */
/* Even with lockd running and NFS mounted 'noac', we have been unable to
* gaurantee correct behavior over NFS with asyncronous I/O operations */
ADIOI_FAKE_IreadContig, /* IreadContig */
ADIOI_FAKE_IwriteContig, /* IwriteContig */
ADIOI_NFS_ReadDone, /* ReadDone */
ADIOI_NFS_WriteDone, /* WriteDone */
ADIOI_NFS_ReadComplete, /* ReadComplete */

Просмотреть файл

@ -15,6 +15,9 @@
#ifdef HAVE_SIGNAL_H
#include <signal.h>
#endif
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_AIO_H
#include <aio.h>
#endif
@ -29,7 +32,7 @@
#endif
int ADIOI_NFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
int wr, void *handle);
int wr, MPI_Request *request);
#ifdef SX4
#define lseek llseek

Просмотреть файл

@ -9,119 +9,9 @@
int ADIOI_NFS_ReadDone(ADIO_Request *request, ADIO_Status *status,
int *error_code)
{
#ifdef ROMIO_HAVE_WORKING_AIO
int done=0;
int err;
static char myname[] = "ADIOI_NFS_READDONE";
#ifdef ROMIO_HAVE_STRUCT_AIOCB_WITH_AIO_HANDLE
struct aiocb *tmp1;
#endif
#endif
if (*request == ADIO_REQUEST_NULL) {
*error_code = MPI_SUCCESS;
return 1;
}
#ifndef ROMIO_HAVE_WORKING_AIO
# ifdef HAVE_STATUS_SET_BYTES
MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
# endif
(*request)->fd->async_count--;
ADIOI_Free_request((ADIOI_Req_node *) (*request));
*request = ADIO_REQUEST_NULL;
*error_code = MPI_SUCCESS;
return 1;
#else
#ifndef ROMIO_HAVE_STRUCT_AIOCB_WITH_AIO_FILDES
/* old IBM API */
if ((*request)->queued) {
tmp1 = (struct aiocb *) (*request)->handle;
errno = aio_error(tmp1->aio_handle);
if (errno == EINPROG) {
done = 0;
*error_code = MPI_SUCCESS;
}
else {
err = aio_return(tmp1->aio_handle);
(*request)->nbytes = err;
errno = aio_error(tmp1->aio_handle);
done = 1;
if (err == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE,
myname, __LINE__,
MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
}
else *error_code = MPI_SUCCESS;
}
}
else {
done = 1;
*error_code = MPI_SUCCESS;
}
#ifdef HAVE_STATUS_SET_BYTES
if (done && ((*request)->nbytes != -1))
MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
#endif
#else
/* everything other than old IBM */
if ((*request)->queued) {
errno = aio_error((const struct aiocb *) (*request)->handle);
if (errno == EINPROGRESS) {
done = 0;
*error_code = MPI_SUCCESS;
}
else {
err = aio_return((struct aiocb *) (*request)->handle);
(*request)->nbytes = err;
errno = aio_error((struct aiocb *) (*request)->handle);
done = 1;
if (err == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE,
myname, __LINE__,
MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
}
else *error_code = MPI_SUCCESS;
}
}
else {
done = 1;
*error_code = MPI_SUCCESS;
}
#ifdef HAVE_STATUS_SET_BYTES
if (done && ((*request)->nbytes != -1))
MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
#endif
#endif
if (done) {
/* if request is still queued in the system, it is also there
on ADIOI_Async_list. Delete it from there. */
if ((*request)->queued) ADIOI_Del_req_from_list(request);
(*request)->fd->async_count--;
if ((*request)->handle) ADIOI_Free((*request)->handle);
ADIOI_Free_request((ADIOI_Req_node *) (*request));
*request = ADIO_REQUEST_NULL;
}
return done;
#endif
}
int ADIOI_NFS_WriteDone(ADIO_Request *request, ADIO_Status *status,
int *error_code)
{

Просмотреть файл

@ -18,10 +18,22 @@ void ADIOI_NFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *er
switch(flag) {
case ADIO_FCNTL_GET_FSIZE:
ADIOI_READ_LOCK(fd, 0, SEEK_SET, 1);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
fcntl_struct->fsize = lseek(fd->fd_sys, 0, SEEK_END);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
ADIOI_UNLOCK(fd, 0, SEEK_SET, 1);
if (fd->fp_sys_posn != -1) {
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
lseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
}
if (fcntl_struct->fsize == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,

Просмотреть файл

@ -32,7 +32,13 @@ void ADIOI_NFS_Get_shared_fp(ADIO_File fd, int incr, ADIO_Offset *shared_fp,
if (*error_code != MPI_SUCCESS) return;
*shared_fp = 0;
ADIOI_WRITE_LOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset));
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
#endif
err = read(fd->shared_fp_fd->fd_sys, shared_fp, sizeof(ADIO_Offset));
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
/* if the file is empty, the above read may return error
(reading beyond end of file). In that case, shared_fp = 0,
set above, is the correct value. */
@ -40,10 +46,22 @@ void ADIOI_NFS_Get_shared_fp(ADIO_File fd, int incr, ADIO_Offset *shared_fp,
else {
ADIOI_WRITE_LOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset));
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
err = lseek(fd->shared_fp_fd->fd_sys, 0, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
if (err == 0) {
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
#endif
err = read(fd->shared_fp_fd->fd_sys, shared_fp,
sizeof(ADIO_Offset));
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
}
if (err == -1) {
ADIOI_UNLOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset));
@ -57,9 +75,21 @@ void ADIOI_NFS_Get_shared_fp(ADIO_File fd, int incr, ADIO_Offset *shared_fp,
new_fp = *shared_fp + incr;
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
err = lseek(fd->shared_fp_fd->fd_sys, 0, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
if (err == 0) {
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
err = write(fd->shared_fp_fd->fd_sys, &new_fp, sizeof(ADIO_Offset));
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
}
ADIOI_UNLOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset));
if (err == -1) {

Просмотреть файл

@ -6,50 +6,24 @@
#include "ad_nfs.h"
#ifdef ROMIO_HAVE_WORKING_AIO
/* nearly identical to ADIOI_GEN_IreadContig, except we lock around I/O */
void ADIOI_NFS_IreadContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Request *request,
int *error_code)
{
int len, typesize;
#ifndef ROMIO_HAVE_WORKING_AIO
ADIO_Status status;
#else
int aio_errno = 0;
static char myname[] = "ADIOI_NFS_IREADCONTIG";
#endif
(*request) = ADIOI_Malloc_request();
(*request)->optype = ADIOI_READ;
(*request)->fd = fd;
(*request)->datatype = datatype;
MPI_Type_size(datatype, &typesize);
len = count * typesize;
#ifndef ROMIO_HAVE_WORKING_AIO
/* no support for nonblocking I/O. Use blocking I/O. */
ADIOI_NFS_ReadContig(fd, buf, len, MPI_BYTE, file_ptr_type, offset,
&status, error_code);
(*request)->queued = 0;
#ifdef HAVE_STATUS_SET_BYTES
if (*error_code == MPI_SUCCESS) {
MPI_Get_elements(&status, MPI_BYTE, &len);
(*request)->nbytes = len;
}
#endif
fd->fp_sys_posn = -1;
#else
if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind;
aio_errno = ADIOI_NFS_aio(fd, buf, len, offset, 0, &((*request)->handle));
aio_errno = ADIOI_NFS_aio(fd, buf, len, offset, 0, request);
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len;
(*request)->queued = 1;
ADIOI_Add_req_to_list(request);
fd->fp_sys_posn = -1;
if (aio_errno != 0) {
@ -59,7 +33,5 @@ void ADIOI_NFS_IreadContig(ADIO_File fd, void *buf, int count,
/* --END ERROR HANDLING-- */
}
else *error_code = MPI_SUCCESS;
#endif
fd->async_count++;
}
#endif

Просмотреть файл

@ -6,51 +6,31 @@
#include "ad_nfs.h"
#include "../../mpi-io/mpioimpl.h"
#include "../../mpi-io/mpioprof.h"
#include "mpiu_greq.h"
#include <string.h>
#ifdef ROMIO_HAVE_WORKING_AIO
static MPIX_Grequest_class ADIOI_GEN_greq_class = 0;
/* this routine is nearly identical to ADIOI_GEN_IwriteContig, except we lock
* around I/O */
void ADIOI_NFS_IwriteContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Request *request, int *error_code)
{
int len, typesize;
#ifndef ROMIO_HAVE_WORKING_AIO
ADIO_Status status;
#else
int aio_errno = 0;
static char myname[] = "ADIOI_NFS_IWRITECONTIG";
#endif
*request = ADIOI_Malloc_request();
(*request)->optype = ADIOI_WRITE;
(*request)->fd = fd;
(*request)->datatype = datatype;
MPI_Type_size(datatype, &typesize);
len = count * typesize;
#ifndef ROMIO_HAVE_WORKING_AIO
/* HP, FreeBSD, Linux */
/* no support for nonblocking I/O. Use blocking I/O. */
ADIOI_NFS_WriteContig(fd, buf, len, MPI_BYTE, file_ptr_type, offset,
&status,
error_code);
(*request)->queued = 0;
#ifdef HAVE_STATUS_SET_BYTES
if (*error_code == MPI_SUCCESS) {
MPI_Get_elements(&status, MPI_BYTE, &len);
(*request)->nbytes = len;
}
#endif
fd->fp_sys_posn = -1;
#else
if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind;
aio_errno = ADIOI_NFS_aio(fd, buf, len, offset, 1, &((*request)->handle));
aio_errno = ADIOI_NFS_aio(fd, buf, len, offset, 1, request);
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len;
(*request)->queued = 1;
ADIOI_Add_req_to_list(request);
fd->fp_sys_posn = -1;
if (aio_errno != 0) {
@ -60,11 +40,9 @@ void ADIOI_NFS_IwriteContig(ADIO_File fd, void *buf, int count,
/* --END ERROR HANDLING-- */
}
else *error_code = MPI_SUCCESS;
#endif
fd->async_count++;
return;
}
#endif
/* This function is for implementation convenience. It is not user-visible.
* It takes care of the differences in the interface for nonblocking I/O
@ -74,21 +52,22 @@ void ADIOI_NFS_IwriteContig(ADIO_File fd, void *buf, int count,
*/
#ifdef ROMIO_HAVE_WORKING_AIO
int ADIOI_NFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
int wr, void *handle)
int wr, MPI_Request *request)
{
int err=-1, fd_sys;
int error_code, this_errno;
struct aiocb *aiocbp;
ADIOI_AIO_Request *aio_req;
fd_sys = fd->fd_sys;
aio_req = (ADIOI_AIO_Request*)ADIOI_Calloc(sizeof(ADIOI_AIO_Request), 1);
aiocbp = (struct aiocb *) ADIOI_Calloc(sizeof(struct aiocb), 1);
aiocbp->aio_offset = offset;
aiocbp->aio_buf = buf;
aiocbp->aio_nbytes = len;
/* This madness is mostly here to deal with IBM AIO implementation */
#ifdef ROMIO_HAVE_STRUCT_AIOCB_WITH_AIO_WHENCE
aiocbp->aio_whence = SEEK_SET;
#endif
@ -112,7 +91,7 @@ int ADIOI_NFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
if (wr) ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len);
#ifdef ROMIO_HAVE_STRUCT_AIOCB_WITH_AIO_FILDES
#ifndef ROMIO_HAVE_AIO_CALLS_NEED_FILEDES
if (wr) err = aio_write(aiocbp);
else err = aio_read(aiocbp);
#else
@ -128,43 +107,23 @@ int ADIOI_NFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
if (this_errno == EAGAIN) {
/* exceeded the max. no. of outstanding requests.
complete all previous async. requests and try again. */
ADIOI_Complete_async(&error_code);
if (error_code != MPI_SUCCESS) return -EIO;
while (err == -1 && this_errno == EAGAIN) {
if (wr) ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len);
#ifdef ROMIO_HAVE_STRUCT_AIOCB_WITH_AIO_FILDES
if (wr) err = aio_write(aiocbp);
else err = aio_read(aiocbp);
#else
/* Broken IBM interface */
if (wr) err = aio_write(fd_sys, aiocbp);
else err = aio_read(fd_sys, aiocbp);
#endif
this_errno = errno;
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
if (err == -1 && this_errno == EAGAIN) {
/* sleep and try again */
sleep(1);
}
else if (err == -1) {
/* real error */
return -errno;
}
}
}
else {
ADIO_WriteContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
offset, NULL, &error_code);
MPIO_Completed_request_create(&fd, len, &error_code, request);
return 0;
} else {
return -this_errno;
}
}
*((struct aiocb **) handle) = aiocbp;
aio_req->aiocbp = aiocbp;
if (ADIOI_GEN_greq_class == 0) {
MPIX_Grequest_class_create(ADIOI_GEN_aio_query_fn,
ADIOI_GEN_aio_free_fn, MPIU_Greq_cancel_fn,
ADIOI_GEN_aio_poll_fn, ADIOI_GEN_aio_wait_fn,
&ADIOI_GEN_greq_class);
}
MPIX_Grequest_class_allocate(ADIOI_GEN_greq_class, aio_req, request);
memcpy(&(aio_req->req), request, sizeof(MPI_Request));
return 0;
}
#endif

Просмотреть файл

@ -32,11 +32,24 @@ void ADIOI_NFS_Open(ADIO_File fd, int *error_code)
if (fd->access_mode & ADIO_EXCL)
amode = amode | O_EXCL;
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_open_a, 0, NULL );
#endif
fd->fd_sys = open(fd->filename, amode, perm);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_open_b, 0, NULL );
#endif
fd->fd_direct = -1;
if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND))
if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND)) {
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
}
if (fd->fd_sys == -1) {
/* Check for special error codes for those MPI error

Просмотреть файл

@ -19,24 +19,50 @@ void ADIOI_NFS_ReadContig(ADIO_File fd, void *buf, int count,
len = datatype_size * count;
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
if (fd->fp_sys_posn != offset)
if (fd->fp_sys_posn != offset) {
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
lseek(fd->fd_sys, offset, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
}
if (fd->atomicity)
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
#endif
err = read(fd->fd_sys, buf, len);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_sys_posn = offset + err;
/* individual file pointer not updated */
}
else { /* read from curr. location of ind. file pointer */
offset = fd->fp_ind;
if (fd->fp_sys_posn != fd->fp_ind)
if (fd->fp_sys_posn != fd->fp_ind) {
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
lseek(fd->fd_sys, fd->fp_ind, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
}
if (fd->atomicity)
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
#endif
err = read(fd->fd_sys, buf, len);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_ind += err;
fd->fp_sys_posn = fd->fp_ind;
@ -60,6 +86,46 @@ void ADIOI_NFS_ReadContig(ADIO_File fd, void *buf, int count,
#ifdef ADIOI_MPE_LOGGING
#define ADIOI_BUFFERED_READ \
{ \
if (req_off >= readbuf_off + readbuf_len) { \
readbuf_off = req_off; \
readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));\
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL ); \
lseek(fd->fd_sys, readbuf_off, SEEK_SET);\
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL ); \
if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, readbuf_off, SEEK_SET, readbuf_len);\
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL ); \
err = read(fd->fd_sys, readbuf, readbuf_len);\
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); \
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, readbuf_off, SEEK_SET, readbuf_len);\
if (err == -1) err_flag = 1; \
} \
while (req_len > readbuf_off + readbuf_len - req_off) { \
partial_read = (int) (readbuf_off + readbuf_len - req_off); \
tmp_buf = (char *) ADIOI_Malloc(partial_read); \
memcpy(tmp_buf, readbuf+readbuf_len-partial_read, partial_read); \
ADIOI_Free(readbuf); \
readbuf = (char *) ADIOI_Malloc(partial_read + max_bufsize); \
memcpy(readbuf, tmp_buf, partial_read); \
ADIOI_Free(tmp_buf); \
readbuf_off += readbuf_len-partial_read; \
readbuf_len = (int) (partial_read + ADIOI_MIN(max_bufsize, \
end_offset-readbuf_off+1)); \
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL ); \
lseek(fd->fd_sys, readbuf_off+partial_read, SEEK_SET);\
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL ); \
if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, readbuf_off+partial_read, SEEK_SET, readbuf_len-partial_read);\
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL ); \
err = read(fd->fd_sys, readbuf+partial_read, readbuf_len-partial_read);\
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); \
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, readbuf_off+partial_read, SEEK_SET, readbuf_len-partial_read);\
if (err == -1) err_flag = 1; \
} \
memcpy((char *)buf + userbuf_off, readbuf+req_off-readbuf_off, req_len); \
}
#else
#define ADIOI_BUFFERED_READ \
{ \
if (req_off >= readbuf_off + readbuf_len) { \
@ -90,6 +156,7 @@ void ADIOI_NFS_ReadContig(ADIO_File fd, void *buf, int count,
} \
memcpy((char *)buf + userbuf_off, readbuf+req_off-readbuf_off, req_len); \
}
#endif
void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count,
@ -160,9 +227,21 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count,
if (fd->atomicity)
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
lseek(fd->fd_sys, readbuf_off, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, readbuf_off, SEEK_SET, readbuf_len);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
#endif
err = read(fd->fd_sys, readbuf, readbuf_len);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, readbuf_off, SEEK_SET, readbuf_len);
if (err == -1) err_flag = 1;
@ -274,9 +353,21 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count,
readbuf = (char *) ADIOI_Malloc(max_bufsize);
readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
lseek(fd->fd_sys, offset, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, offset, SEEK_SET, readbuf_len);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
#endif
err = read(fd->fd_sys, readbuf, readbuf_len);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, offset, SEEK_SET, readbuf_len);
if (err == -1) err_flag = 1;

Просмотреть файл

@ -47,8 +47,20 @@ void ADIOI_NFS_Set_shared_fp(ADIO_File fd, ADIO_Offset offset, int *error_code)
if (*error_code != MPI_SUCCESS) return;
ADIOI_WRITE_LOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset));
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
lseek(fd->shared_fp_fd->fd_sys, 0, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
err = write(fd->shared_fp_fd->fd_sys, &offset, sizeof(ADIO_Offset));
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
ADIOI_UNLOCK(fd->shared_fp_fd, 0, SEEK_SET, sizeof(ADIO_Offset));
if (err == -1) {

Просмотреть файл

@ -9,6 +9,8 @@
void ADIOI_NFS_ReadComplete(ADIO_Request *request, ADIO_Status *status,
int *error_code)
{
return;
#if 0
#ifdef ROMIO_HAVE_WORKING_AIO
int err;
static char myname[] = "ADIOI_NFS_READCOMPLETE";
@ -121,6 +123,7 @@ void ADIOI_NFS_ReadComplete(ADIO_Request *request, ADIO_Status *status,
*request = ADIO_REQUEST_NULL;
*error_code = MPI_SUCCESS;
#endif
#endif
}

Просмотреть файл

@ -19,20 +19,46 @@ void ADIOI_NFS_WriteContig(ADIO_File fd, void *buf, int count,
len = datatype_size * count;
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
if (fd->fp_sys_posn != offset)
if (fd->fp_sys_posn != offset) {
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
lseek(fd->fd_sys, offset, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
}
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
err = write(fd->fd_sys, buf, len);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_sys_posn = offset + err;
/* individual file pointer not updated */
}
else { /* write from curr. location of ind. file pointer */
offset = fd->fp_ind;
if (fd->fp_sys_posn != fd->fp_ind)
if (fd->fp_sys_posn != fd->fp_ind) {
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
lseek(fd->fd_sys, fd->fp_ind, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
}
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
err = write(fd->fd_sys, buf, len);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_ind += err;
fd->fp_sys_posn = fd->fp_ind;
@ -58,6 +84,69 @@ void ADIOI_NFS_WriteContig(ADIO_File fd, void *buf, int count,
#ifdef ADIOI_MPE_LOGGING
#define ADIOI_BUFFERED_WRITE \
{ \
if (req_off >= writebuf_off + writebuf_len) { \
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL ); \
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL ); \
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); \
err = write(fd->fd_sys, writebuf, writebuf_len); \
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); \
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
if (err == -1) err_flag = 1; \
writebuf_off = req_off; \
writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL ); \
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL ); \
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL ); \
err = read(fd->fd_sys, writebuf, writebuf_len); \
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); \
if (err == -1) { \
*error_code = MPIO_Err_create_code(MPI_SUCCESS, \
MPIR_ERR_RECOVERABLE, myname, \
__LINE__, MPI_ERR_IO, \
"**ioRMWrdwr", 0); \
return; \
} \
} \
write_sz = (int) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
memcpy(writebuf+req_off-writebuf_off, (char *)buf +userbuf_off, write_sz);\
while (write_sz != req_len) { \
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL ); \
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL ); \
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); \
err = write(fd->fd_sys, writebuf, writebuf_len); \
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); \
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
if (err == -1) err_flag = 1; \
req_len -= write_sz; \
userbuf_off += write_sz; \
writebuf_off += writebuf_len; \
writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL ); \
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL ); \
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL ); \
err = read(fd->fd_sys, writebuf, writebuf_len); \
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); \
if (err == -1) { \
*error_code = MPIO_Err_create_code(MPI_SUCCESS, \
MPIR_ERR_RECOVERABLE, myname, \
__LINE__, MPI_ERR_IO, \
"**ioRMWrdwr", 0); \
return; \
} \
write_sz = ADIOI_MIN(req_len, writebuf_len); \
memcpy(writebuf, (char *)buf + userbuf_off, write_sz);\
} \
}
#else
#define ADIOI_BUFFERED_WRITE \
{ \
if (req_off >= writebuf_off + writebuf_len) { \
@ -103,10 +192,47 @@ void ADIOI_NFS_WriteContig(ADIO_File fd, void *buf, int count,
memcpy(writebuf, (char *)buf + userbuf_off, write_sz);\
} \
}
#endif
/* this macro is used when filetype is contig and buftype is not contig.
it does not do a read-modify-write and does not lock*/
#ifdef ADIOI_MPE_LOGGING
#define ADIOI_BUFFERED_WRITE_WITHOUT_READ \
{ \
if (req_off >= writebuf_off + writebuf_len) { \
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL ); \
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL ); \
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); \
err = write(fd->fd_sys, writebuf, writebuf_len); \
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); \
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
if (err == -1) err_flag = 1; \
writebuf_off = req_off; \
writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
} \
write_sz = (int) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
memcpy(writebuf+req_off-writebuf_off, (char *)buf +userbuf_off, write_sz);\
while (write_sz != req_len) { \
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL ); \
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL ); \
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); \
err = write(fd->fd_sys, writebuf, writebuf_len); \
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); \
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
if (err == -1) err_flag = 1; \
req_len -= write_sz; \
userbuf_off += write_sz; \
writebuf_off += writebuf_len; \
writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
write_sz = ADIOI_MIN(req_len, writebuf_len); \
memcpy(writebuf, (char *)buf + userbuf_off, write_sz);\
} \
}
#else
#define ADIOI_BUFFERED_WRITE_WITHOUT_READ \
{ \
if (req_off >= writebuf_off + writebuf_len) { \
@ -134,7 +260,7 @@ void ADIOI_NFS_WriteContig(ADIO_File fd, void *buf, int count,
memcpy(writebuf, (char *)buf + userbuf_off, write_sz);\
} \
}
#endif
void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count,
@ -214,9 +340,21 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count,
}
/* write the buffer out finally */
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
lseek(fd->fd_sys, writebuf_off, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
err = write(fd->fd_sys, writebuf, writebuf_len);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
if (err == -1) err_flag = 1;
@ -318,8 +456,20 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count,
writebuf = (char *) ADIOI_Malloc(max_bufsize);
writebuf_len = (int)(ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
lseek(fd->fd_sys, writebuf_off, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
#endif
err = read(fd->fd_sys, writebuf, writebuf_len);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
if (err == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE,
@ -438,9 +588,21 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count,
}
/* write the buffer out finally */
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
lseek(fd->fd_sys, writebuf_off, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
err = write(fd->fd_sys, writebuf, writebuf_len);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
if (!(fd->atomicity))
ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len);

Просмотреть файл

@ -61,6 +61,8 @@ void ADIOI_NTFS_IwriteStrided(ADIO_File fd, void *buf, int count,
void ADIOI_NTFS_Flush(ADIO_File fd, int *error_code);
void ADIOI_NTFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code);
const char * ADIOI_NTFS_Strerror(int error);
#define FORMAT_MESSAGE_MIN_SIZE 100
#define ADIOI_NTFS_ERR_MSG_MAX FORMAT_MESSAGE_MIN_SIZE
void ADIOI_NTFS_Strerror(int error, char *errMsg, int errMsgLen);
#endif

Просмотреть файл

@ -16,11 +16,13 @@ void ADIOI_NTFS_Close(ADIO_File fd, int *error_code)
/* --BEGIN ERROR HANDLING-- */
if (err == FALSE)
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
err = GetLastError();
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io",
"**io %s", ADIOI_NTFS_Strerror(err));
"**io %s", errMsg);
return;
}
/* --END ERROR HANDLING-- */

Просмотреть файл

@ -9,92 +9,12 @@
int ADIOI_NTFS_ReadDone(ADIO_Request *request, ADIO_Status *status,
int *error_code)
{
DWORD ret_val;
int done = 0;
static char myname[] = "ADIOI_NTFS_ReadDone";
if (*request == ADIO_REQUEST_NULL)
{
*error_code = MPI_SUCCESS;
return 1;
}
if ((*request)->queued)
{
(*request)->nbytes = 0;
ret_val = GetOverlappedResult((*request)->fd, (*request)->handle, &(*request)->nbytes, FALSE);
if (!ret_val)
{
/* --BEGIN ERROR HANDLING-- */
ret_val = GetLastError();
if (ret_val == ERROR_IO_INCOMPLETE)
{
done = 0;
*error_code = MPI_SUCCESS;
}
else
{
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io", "**io %s", ADIOI_NTFS_Strerror(ret_val));
}
/* --END ERROR HANDLING-- */
}
else
{
done = 1;
*error_code = MPI_SUCCESS;
}
}
else
{
done = 1;
*error_code = MPI_SUCCESS;
}
#ifdef HAVE_STATUS_SET_BYTES
if (done && ((*request)->nbytes != -1))
MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
#endif
if (done)
{
/* if request is still queued in the system, it is also there
on ADIOI_Async_list. Delete it from there. */
if ((*request)->queued) ADIOI_Del_req_from_list(request);
(*request)->fd->async_count--;
if ((*request)->handle)
{
if (!CloseHandle(((OVERLAPPED*)((*request)->handle))->hEvent))
{
ret_val = GetLastError();
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io", "**io %s", ADIOI_NTFS_Strerror(ret_val));
}
ADIOI_Free((*request)->handle);
}
ADIOI_Free_request((ADIOI_Req_node *) (*request));
*request = ADIO_REQUEST_NULL;
}
return done;
return 0;
}
int ADIOI_NTFS_WriteDone(ADIO_Request *request, ADIO_Status *status,
int *error_code)
{
static char myname[] = "ADIOI_NTFS_WriteDone";
int ret_val;
ret_val = ADIOI_NTFS_ReadDone(request, status, error_code);
/* --BEGIN ERROR HANDLING-- */
if (*error_code != MPI_SUCCESS)
{
*error_code = MPIO_Err_create_code(*error_code, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io", 0);
}
/* --END ERROR HANDLING-- */
return ret_val;
return 0;
}

Просмотреть файл

@ -22,12 +22,14 @@ void ADIOI_NTFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *e
dwTemp = DWORDHIGH(fd->fp_sys_posn);
if (SetFilePointer(fd->fd_sys, DWORDLOW(fd->fp_sys_posn), &dwTemp, FILE_BEGIN) == INVALID_SET_FILE_POINTER)
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
err = GetLastError();
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
if (err != NO_ERROR)
{
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io", "**io %s", ADIOI_NTFS_Strerror(err));
"**io", "**io %s", errMsg);
return;
}
}
@ -35,11 +37,13 @@ void ADIOI_NTFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *e
/* --BEGIN ERROR HANDLING-- */
if (fcntl_struct->fsize == INVALID_SET_FILE_POINTER)
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
dwTemp = GetLastError();
ADIOI_NTFS_Strerror(dwTemp, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_IO, "**io",
"**io %s", ADIOI_NTFS_Strerror(dwTemp));
"**io %s", errMsg);
return;
}
/* --END ERROR HANDLING-- */

Просмотреть файл

@ -18,11 +18,13 @@ void ADIOI_NTFS_Flush(ADIO_File fd, int *error_code)
/* --BEGIN ERROR HANDLING-- */
if (err == FALSE)
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
err = GetLastError();
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io",
"**io %s", ADIOI_NTFS_Strerror(err));
"**io %s", errMsg);
return;
}
/* --END ERROR HANDLING-- */

Просмотреть файл

@ -14,18 +14,6 @@ void ADIOI_NTFS_IreadContig(ADIO_File fd, void *buf, int count,
int err;
static char myname[] = "ADIOI_NTFS_IreadContig";
(*request) = ADIOI_Malloc_request();
if ((*request) == NULL)
{
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**nomem", "**nomem %s", "ADIOI_Request");
return;
}
(*request)->optype = ADIOI_READ;
(*request)->fd = fd;
(*request)->datatype = datatype;
MPI_Type_size(datatype, &typesize);
len = count * typesize;
@ -33,15 +21,12 @@ void ADIOI_NTFS_IreadContig(ADIO_File fd, void *buf, int count,
{
offset = fd->fp_ind;
}
err = ADIOI_NTFS_aio(fd, buf, len, offset, 0, &((*request)->handle));
err = ADIOI_NTFS_aio(fd, buf, len, offset, 0, request);
if (file_ptr_type == ADIO_INDIVIDUAL)
{
fd->fp_ind += len;
}
(*request)->queued = 1;
ADIOI_Add_req_to_list(request);
/* --BEGIN ERROR HANDLING-- */
if (err != MPI_SUCCESS)
{
@ -54,5 +39,4 @@ void ADIOI_NTFS_IreadContig(ADIO_File fd, void *buf, int count,
*error_code = MPI_SUCCESS;
fd->fp_sys_posn = -1; /* set it to null. */
fd->async_count++;
}

Просмотреть файл

@ -6,6 +6,171 @@
#include "ad_ntfs.h"
#include "../../mpi-io/mpioimpl.h"
#include "../../mpi-io/mpioprof.h"
#include "mpiu_greq.h"
static MPIX_Grequest_class ADIOI_NTFS_greq_class = 0;
/* Fills the input buffer, errMsg, with the error message
corresponding to error code, error */
void ADIOI_NTFS_Strerror(int error, char *errMsg, int errMsgLen)
{
LPTSTR str;
int num_bytes;
num_bytes = FormatMessage(
FORMAT_MESSAGE_FROM_SYSTEM |
FORMAT_MESSAGE_ALLOCATE_BUFFER,
NULL,
error,
0,
&str,
FORMAT_MESSAGE_MIN_SIZE,
0);
if (num_bytes == 0)
{
strncpy(errMsg, "\0", errMsgLen);
}
else
{
strncpy(errMsg, str, errMsgLen);
LocalFree(str);
}
}
/* poll for completion of a single outstanding AIO request */
int ADIOI_NTFS_aio_poll_fn(void *extra_state, MPI_Status *status)
{
ADIOI_AIO_Request *aio_req;
int mpi_errno = MPI_SUCCESS;
/* FIXME: Validate the args -- has it already been done by the
caller ? */
aio_req = (ADIOI_AIO_Request *)extra_state;
/* XXX: test for AIO completion here */
if(!GetOverlappedResult( aio_req->fd, aio_req->lpOvl,
&(aio_req->nbytes), FALSE)){
if(GetLastError() == ERROR_IO_INCOMPLETE){
/* IO in progress */
/* TODO: need to diddle with status somehow */
}else{
/* Error occured */
/* TODO: unsure how to handle this */
}
}else{
MPIR_Nest_incr();
mpi_errno = MPI_Grequest_complete(aio_req->req);
if (mpi_errno != MPI_SUCCESS) {
mpi_errno = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE,
"ADIOI_NTFS_aio_poll_fn", __LINE__,
MPI_ERR_IO, "**mpi_grequest_complete",
0);
}
MPIR_Nest_decr();
}
return mpi_errno;
}
/* Wait for completion of one of the outstanding AIO requests */
int ADIOI_NTFS_aio_wait_fn(int count, void **array_of_states,
double timeout, MPI_Status *status)
{
int i, mpi_errno = MPI_SUCCESS;
ADIOI_AIO_Request **aio_reqlist;
LPHANDLE lpHandles;
DWORD retObject=0;
/* FIXME: Validate the args -- has it already been done by the
caller ? */
aio_reqlist = (ADIOI_AIO_Request **)array_of_states;
lpHandles = (LPHANDLE) ADIOI_Calloc(count, sizeof(HANDLE));
if (lpHandles == NULL)
{
mpi_errno = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
"ADIOI_NTFS_aio_wait_fn", __LINE__, MPI_ERR_IO,
"**nomem", "**nomem %s", "Event handles");
return mpi_errno;
}
/* XXX: set-up arrays of outstanding requests */
for(i=0; i<count; i++){
lpHandles[i] = (aio_reqlist[i])->lpOvl->hEvent;
}
/* XXX: wait for one request to complete */
/* FIXME: Is the timeout in seconds ? */
timeout = (timeout <= 0) ? INFINITE : (timeout * 1000);
if((retObject = WaitForMultipleObjects(count, lpHandles,
FALSE, timeout)) != WAIT_FAILED){
retObject = retObject - WAIT_OBJECT_0;
if(GetOverlappedResult( aio_reqlist[retObject]->fd,
aio_reqlist[retObject]->lpOvl, &(aio_reqlist[retObject]->nbytes),
FALSE)){
/* XXX: mark completed requests as 'done'*/
MPIR_Nest_incr();
mpi_errno = MPI_Grequest_complete(aio_reqlist[retObject]->req);
if (mpi_errno != MPI_SUCCESS) {
mpi_errno = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE,
"ADIOI_NTFS_aio_wait_fn", __LINE__,
MPI_ERR_IO, "**mpi_grequest_complete",
0);
}
MPIR_Nest_decr();
}else{
if(GetLastError() == ERROR_IO_INCOMPLETE){
/* IO in progress */
/* TODO: need to diddle with status somehow */
}else{
/* Error occured */
/* TODO: not sure how to handle this */
}
}
}else{
/* TODO: How to handle error while waiting ? */
}
ADIOI_Free(lpHandles);
return mpi_errno;
}
int ADIOI_NTFS_aio_query_fn(void *extra_state, MPI_Status *status)
{
ADIOI_AIO_Request *aio_req;
aio_req = (ADIOI_AIO_Request *)extra_state;
MPI_Status_set_elements(status, MPI_BYTE, aio_req->nbytes);
/* do i need to nest_incr/nest_decr here? */
/* can never cancel so always true */
MPI_Status_set_cancelled(status, 0);
/* choose not to return a value for this */
status->MPI_SOURCE = MPI_UNDEFINED;
/* tag has no meaning for this generalized request */
status->MPI_TAG = MPI_UNDEFINED;
/* this generalized request never fails */
return MPI_SUCCESS;
}
int ADIOI_NTFS_aio_free_fn(void *extra_state)
{
ADIOI_AIO_Request *aio_req;
/* FIXME: Validate the args -- has it already been done by the
caller ? */
aio_req = (ADIOI_AIO_Request*)extra_state;
CloseHandle(aio_req->lpOvl->hEvent);
ADIOI_Free(aio_req->lpOvl);
ADIOI_Free(aio_req);
return MPI_SUCCESS;
}
void ADIOI_NTFS_IwriteContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Request *request,
@ -15,18 +180,6 @@ void ADIOI_NTFS_IwriteContig(ADIO_File fd, void *buf, int count,
int err;
static char myname[] = "ADIOI_NTFS_IwriteContig";
*request = ADIOI_Malloc_request();
if ((*request) == NULL)
{
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**nomem", "**nomem %s", "ADIOI_Request");
return;
}
(*request)->optype = ADIOI_WRITE;
(*request)->fd = fd;
(*request)->datatype = datatype;
MPI_Type_size(datatype, &typesize);
len = count * typesize;
@ -34,15 +187,12 @@ void ADIOI_NTFS_IwriteContig(ADIO_File fd, void *buf, int count,
{
offset = fd->fp_ind;
}
err = ADIOI_NTFS_aio(fd, buf, len, offset, 1, &((*request)->handle));
err = ADIOI_NTFS_aio(fd, buf, len, offset, 1, request);
if (file_ptr_type == ADIO_INDIVIDUAL)
{
fd->fp_ind += len;
}
(*request)->queued = 1;
ADIOI_Add_req_to_list(request);
/* --BEGIN ERROR HANDLING-- */
if (err != MPI_SUCCESS)
{
@ -55,7 +205,6 @@ void ADIOI_NTFS_IwriteContig(ADIO_File fd, void *buf, int count,
*error_code = MPI_SUCCESS;
fd->fp_sys_posn = -1; /* set it to null. */
fd->async_count++;
}
@ -65,56 +214,61 @@ void ADIOI_NTFS_IwriteContig(ADIO_File fd, void *buf, int count,
* Returns MPI_SUCCESS on success, mpi_errno on failure.
*/
int ADIOI_NTFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
int wr, void *handle)
int wr, MPI_Request *request)
{
static char myname[] = "ADIOI_NTFS_aio";
ADIOI_AIO_Request *aio_req;
static DWORD dwNumWritten, dwNumRead;
BOOL ret_val = FALSE;
FDTYPE fd_sys;
int mpi_errno = MPI_SUCCESS;
OVERLAPPED *pOvl;
DWORD err;
fd_sys = fd->fd_sys;
pOvl = (OVERLAPPED *) ADIOI_Calloc(sizeof(OVERLAPPED), 1);
if (pOvl == NULL)
aio_req = (ADIOI_AIO_Request *)ADIOI_Calloc(sizeof(ADIOI_AIO_Request), 1);
if (aio_req == NULL)
{
mpi_errno = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**nomem", "**nomem %s", "AIO_REQ");
return mpi_errno;
}
aio_req->lpOvl = (LPOVERLAPPED ) ADIOI_Calloc(sizeof(OVERLAPPED), 1);
if (aio_req->lpOvl == NULL)
{
mpi_errno = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**nomem", "**nomem %s", "OVERLAPPED");
ADIOI_Free(aio_req);
return mpi_errno;
}
pOvl->hEvent = CreateEvent(NULL, TRUE, TRUE, NULL);
if (pOvl->hEvent == NULL)
aio_req->lpOvl->hEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
if (aio_req->lpOvl->hEvent == NULL)
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
err = GetLastError();
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
mpi_errno = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io", "**io %s", ADIOI_NTFS_Strerror(err));
ADIOI_Free(pOvl);
"**io", "**io %s", errMsg);
ADIOI_Free(aio_req->lpOvl);
ADIOI_Free(aio_req);
return mpi_errno;
}
pOvl->Offset = DWORDLOW(offset);
pOvl->OffsetHigh = DWORDHIGH(offset);
aio_req->lpOvl->Offset = DWORDLOW(offset);
aio_req->lpOvl->OffsetHigh = DWORDHIGH(offset);
aio_req->fd = fd_sys;
/* XXX: initiate async I/O */
if (wr)
{
/*printf("WriteFile(%d bytes)\n", len);fflush(stdout);*/
ret_val = WriteFile(fd_sys, buf, len, &dwNumWritten, pOvl);
ret_val = WriteFile(fd_sys, buf, len, &dwNumWritten, aio_req->lpOvl);
}
else
{
/*
{
ADIO_Fcntl_t fcntl_struct;
int error_code;
ADIO_Fcntl(fd, ADIO_FCNTL_GET_FSIZE, &fcntl_struct, &error_code);
printf("File size a: %d\n", fcntl_struct.fsize);
}
printf("ReadFile(%d bytes)\n", len);fflush(stdout);
*/
ret_val = ReadFile(fd_sys, buf, len, &dwNumRead, pOvl);
ret_val = ReadFile(fd_sys, buf, len, &dwNumRead, aio_req->lpOvl);
}
/* --BEGIN ERROR HANDLING-- */
@ -123,44 +277,32 @@ int ADIOI_NTFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
mpi_errno = GetLastError();
if (mpi_errno != ERROR_IO_PENDING)
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
ADIOI_NTFS_Strerror(mpi_errno, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
mpi_errno = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io",
"**io %s", ADIOI_NTFS_Strerror(mpi_errno));
"**io %s", errMsg);
return mpi_errno;
}
mpi_errno = MPI_SUCCESS;
}
/* --END ERROR HANDLING-- */
*((OVERLAPPED **) handle) = pOvl;
/* XXX: set up generalized request class and request */
if (ADIOI_NTFS_greq_class == 0) {
mpi_errno = MPIX_Grequest_class_create(ADIOI_NTFS_aio_query_fn,
ADIOI_NTFS_aio_free_fn, MPIU_Greq_cancel_fn,
ADIOI_NTFS_aio_poll_fn, ADIOI_NTFS_aio_wait_fn,
&ADIOI_NTFS_greq_class);
if(mpi_errno != MPI_SUCCESS){
/* FIXME: Pass appropriate error code to user */
}
}
mpi_errno = MPIX_Grequest_class_allocate(ADIOI_NTFS_greq_class, aio_req, request);
if(mpi_errno != MPI_SUCCESS){
/* FIXME: Pass appropriate error code to user */
}
memcpy(&(aio_req->req), request, sizeof(MPI_Request));
return mpi_errno;
}
const char * ADIOI_NTFS_Strerror(int error)
{
/* obviously not thread safe to store a message like this */
static char msg[1024];
HLOCAL str;
int num_bytes;
num_bytes = FormatMessage(
FORMAT_MESSAGE_FROM_SYSTEM |
FORMAT_MESSAGE_ALLOCATE_BUFFER,
0,
error,
MAKELANGID( LANG_NEUTRAL, SUBLANG_DEFAULT ),
(LPTSTR) &str,
0,0);
if (num_bytes == 0)
{
*msg = '\0';
}
else
{
memcpy(msg, str, num_bytes+1);
LocalFree(str);
strtok(msg, "\r\n");
}
return msg;
}

Просмотреть файл

@ -71,12 +71,14 @@ void ADIOI_NTFS_Open(ADIO_File fd, int *error_code)
fd->fp_ind = fd->fp_sys_posn = SetFilePointer(fd->fd_sys, 0, NULL, FILE_END);
if (fd->fp_ind == INVALID_SET_FILE_POINTER)
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
err = GetLastError();
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
if (err != NO_ERROR)
{
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io", "**io %s", ADIOI_NTFS_Strerror(err));
"**io", "**io %s", errMsg);
return;
}
}
@ -85,11 +87,13 @@ void ADIOI_NTFS_Open(ADIO_File fd, int *error_code)
/* --BEGIN ERROR HANDLING-- */
if (fd->fd_sys == INVALID_HANDLE_VALUE)
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
err = GetLastError();
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io",
"**io %s", ADIOI_NTFS_Strerror(err));
"**io %s", errMsg);
return;
}
/* --END ERROR HANDLING-- */

Просмотреть файл

@ -37,10 +37,12 @@ void ADIOI_NTFS_ReadContig(ADIO_File fd, void *buf, int count,
pOvl->hEvent = CreateEvent(NULL, TRUE, TRUE, NULL);
if (pOvl->hEvent == NULL)
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
err = GetLastError();
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io", "**io %s", ADIOI_NTFS_Strerror(err));
"**io", "**io %s", errMsg);
ADIOI_Free(pOvl);
return;
}
@ -54,12 +56,14 @@ void ADIOI_NTFS_ReadContig(ADIO_File fd, void *buf, int count,
dwTemp = DWORDHIGH(offset);
if (SetFilePointer(fd->fd_sys, DWORDLOW(offset), &dwTemp, FILE_BEGIN) == INVALID_SET_FILE_POINTER)
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
err = GetLastError();
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
if (err != NO_ERROR)
{
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io", "**io %s", ADIOI_NTFS_Strerror(err));
"**io", "**io %s", errMsg);
CloseHandle(pOvl->hEvent);
ADIOI_Free(pOvl);
return;
@ -79,7 +83,9 @@ void ADIOI_NTFS_ReadContig(ADIO_File fd, void *buf, int count,
/* --BEGIN ERROR HANDLING-- */
if (err == FALSE)
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
err = GetLastError();
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
switch (err)
{
case ERROR_IO_PENDING:
@ -92,7 +98,7 @@ void ADIOI_NTFS_ReadContig(ADIO_File fd, void *buf, int count,
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io",
"**io %s", ADIOI_NTFS_Strerror(err));
"**io %s", errMsg);
CloseHandle(pOvl->hEvent);
ADIOI_Free(pOvl);
return;
@ -103,13 +109,15 @@ void ADIOI_NTFS_ReadContig(ADIO_File fd, void *buf, int count,
/* --BEGIN ERROR HANDLING-- */
if (err == FALSE)
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
err = GetLastError();
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
if (err != ERROR_HANDLE_EOF) /* Ignore EOF errors */
{
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_IO, "**io",
"**io %s", ADIOI_NTFS_Strerror(err));
"**io %s", errMsg);
CloseHandle(pOvl->hEvent);
ADIOI_Free(pOvl);
return;
@ -118,10 +126,12 @@ void ADIOI_NTFS_ReadContig(ADIO_File fd, void *buf, int count,
/* --END ERROR HANDLING-- */
if (!CloseHandle(pOvl->hEvent))
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
err = GetLastError();
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io", "**io %s", ADIOI_NTFS_Strerror(err));
"**io", "**io %s", errMsg);
CloseHandle(pOvl->hEvent);
ADIOI_Free(pOvl);
return;
@ -139,12 +149,14 @@ void ADIOI_NTFS_ReadContig(ADIO_File fd, void *buf, int count,
dwTemp = DWORDHIGH(fd->fp_ind);
if (SetFilePointer(fd->fd_sys, DWORDLOW(fd->fp_ind), &dwTemp, FILE_BEGIN) == INVALID_SET_FILE_POINTER)
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
err = GetLastError();
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
if (err != NO_ERROR)
{
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io", "**io %s", ADIOI_NTFS_Strerror(err));
"**io", "**io %s", errMsg);
CloseHandle(pOvl->hEvent);
ADIOI_Free(pOvl);
return;
@ -164,7 +176,9 @@ void ADIOI_NTFS_ReadContig(ADIO_File fd, void *buf, int count,
/* --BEGIN ERROR HANDLING-- */
if (err == FALSE)
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
err = GetLastError();
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
switch (err)
{
case ERROR_IO_PENDING:
@ -177,7 +191,7 @@ void ADIOI_NTFS_ReadContig(ADIO_File fd, void *buf, int count,
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io",
"**io %s", ADIOI_NTFS_Strerror(err));
"**io %s", errMsg);
CloseHandle(pOvl->hEvent);
ADIOI_Free(pOvl);
return;
@ -188,13 +202,15 @@ void ADIOI_NTFS_ReadContig(ADIO_File fd, void *buf, int count,
/* --BEGIN ERROR HANDLING-- */
if (err == FALSE)
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
err = GetLastError();
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
if (err != ERROR_HANDLE_EOF) /* Ignore EOF errors */
{
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_IO, "**io",
"**io %s", ADIOI_NTFS_Strerror(err));
"**io %s", errMsg);
CloseHandle(pOvl->hEvent);
ADIOI_Free(pOvl);
return;
@ -203,10 +219,12 @@ void ADIOI_NTFS_ReadContig(ADIO_File fd, void *buf, int count,
/* --END ERROR HANDLING-- */
if (!CloseHandle(pOvl->hEvent))
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
err = GetLastError();
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io", "**io %s", ADIOI_NTFS_Strerror(err));
"**io", "**io %s", errMsg);
ADIOI_Free(pOvl);
return;
}
@ -226,11 +244,13 @@ void ADIOI_NTFS_ReadContig(ADIO_File fd, void *buf, int count,
/* --BEGIN ERROR HANDLING-- */
if (err == FALSE)
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
err = GetLastError();
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io",
"**io %s", ADIOI_NTFS_Strerror(err));
"**io %s", errMsg);
return;
}
/* --END ERROR HANDLING-- */

Просмотреть файл

@ -22,10 +22,12 @@ void ADIOI_NTFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code)
err = GetLastError();
if (err != NO_ERROR)
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io",
"**io %s", ADIOI_NTFS_Strerror(err));
"**io %s", errMsg);
return;
}
}
@ -35,11 +37,13 @@ void ADIOI_NTFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code)
/* --BEGIN ERROR HANDLING-- */
if (result == FALSE)
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
err = GetLastError();
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io",
"**io %s", ADIOI_NTFS_Strerror(err));
"**io %s", errMsg);
return;
}
/* --END ERROR HANDLING-- */

Просмотреть файл

@ -9,80 +9,12 @@
void ADIOI_NTFS_ReadComplete(ADIO_Request *request, ADIO_Status *status,
int *error_code)
{
DWORD ret_val;
static char myname[] = "ADIOI_NTFS_ReadComplete";
if (*request == ADIO_REQUEST_NULL)
{
*error_code = MPI_SUCCESS;
return;
}
if ((*request)->queued)
{
ret_val = GetOverlappedResult((*request)->fd, (*request)->handle,
&(*request)->nbytes, TRUE);
if (!ret_val)
(*request)->nbytes = -1;
/* --BEGIN ERROR HANDLING-- */
if (ret_val == FALSE)
{
ret_val = GetLastError();
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_IO, "**io",
"**io %s", ADIOI_NTFS_Strerror(ret_val));
return;
}
/* --END ERROR HANDLING-- */
} /* if ((*request)->queued) ... */
*error_code = MPI_SUCCESS;
#ifdef HAVE_STATUS_SET_BYTES
if ((*request)->nbytes != -1)
{
MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
}
#endif
if ((*request)->queued != -1)
{
/* queued = -1 is an internal hack used when the request must
be completed, but the request object should not be
freed. This is used in ADIOI_Complete_async, because the user
will call MPI_Wait later, which would require status to
be filled. Ugly but works. queued = -1 should be used only
in ADIOI_Complete_async.
This should not affect the user in any way. */
/* if request is still queued in the system, it is also there
on ADIOI_Async_list. Delete it from there. */
if ((*request)->queued) ADIOI_Del_req_from_list(request);
(*request)->fd->async_count--;
if ((*request)->handle)
{
CloseHandle(((OVERLAPPED*)((*request)->handle))->hEvent);
ADIOI_Free((*request)->handle);
}
ADIOI_Free_request((ADIOI_Req_node *) (*request));
*request = ADIO_REQUEST_NULL;
}
return;
}
void ADIOI_NTFS_WriteComplete(ADIO_Request *request, ADIO_Status *status,
int *error_code)
{
static char myname[] = "ADIOI_NTFS_WriteComplete";
ADIOI_NTFS_ReadComplete(request, status, error_code);
/* --BEGIN ERROR HANDLING-- */
if (*error_code != MPI_SUCCESS)
{
*error_code = MPIO_Err_create_code(*error_code,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_IO, "**io", 0);
}
/* --END ERROR HANDLING-- */
return;
}

Просмотреть файл

@ -37,10 +37,12 @@ void ADIOI_NTFS_WriteContig(ADIO_File fd, void *buf, int count,
pOvl->hEvent = CreateEvent(NULL, TRUE, TRUE, NULL);
if (pOvl->hEvent == NULL)
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
err = GetLastError();
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io", "**io %s", ADIOI_NTFS_Strerror(err));
"**io", "**io %s", errMsg);
ADIOI_Free(pOvl);
return;
}
@ -57,9 +59,11 @@ void ADIOI_NTFS_WriteContig(ADIO_File fd, void *buf, int count,
err = GetLastError();
if (err != NO_ERROR)
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io", "**io %s", ADIOI_NTFS_Strerror(err));
"**io", "**io %s", errMsg);
CloseHandle(pOvl->hEvent);
ADIOI_Free(pOvl);
return;
@ -74,10 +78,12 @@ void ADIOI_NTFS_WriteContig(ADIO_File fd, void *buf, int count,
err = GetLastError();
if (err != ERROR_IO_PENDING)
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io",
"**io %s", ADIOI_NTFS_Strerror(err));
"**io %s", errMsg);
CloseHandle(pOvl->hEvent);
ADIOI_Free(pOvl);
return;
@ -88,11 +94,13 @@ void ADIOI_NTFS_WriteContig(ADIO_File fd, void *buf, int count,
/* --BEGIN ERROR HANDLING-- */
if (err == FALSE)
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
err = GetLastError();
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_IO, "**io",
"**io %s", ADIOI_NTFS_Strerror(err));
"**io %s", errMsg);
CloseHandle(pOvl->hEvent);
ADIOI_Free(pOvl);
return;
@ -100,10 +108,12 @@ void ADIOI_NTFS_WriteContig(ADIO_File fd, void *buf, int count,
/* --END ERROR HANDLING-- */
if (!CloseHandle(pOvl->hEvent))
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
err = GetLastError();
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io", "**io %s", ADIOI_NTFS_Strerror(err));
"**io", "**io %s", errMsg);
CloseHandle(pOvl->hEvent);
ADIOI_Free(pOvl);
return;
@ -124,9 +134,11 @@ void ADIOI_NTFS_WriteContig(ADIO_File fd, void *buf, int count,
err = GetLastError();
if (err != NO_ERROR)
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io", "**io %s", ADIOI_NTFS_Strerror(err));
"**io", "**io %s", errMsg);
CloseHandle(pOvl->hEvent);
ADIOI_Free(pOvl);
return;
@ -141,10 +153,12 @@ void ADIOI_NTFS_WriteContig(ADIO_File fd, void *buf, int count,
err = GetLastError();
if (err != ERROR_IO_PENDING)
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io",
"**io %s", ADIOI_NTFS_Strerror(err));
"**io %s", errMsg);
CloseHandle(pOvl->hEvent);
ADIOI_Free(pOvl);
return;
@ -155,11 +169,13 @@ void ADIOI_NTFS_WriteContig(ADIO_File fd, void *buf, int count,
/* --BEGIN ERROR HANDLING-- */
if (err == FALSE)
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
err = GetLastError();
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,
__LINE__, MPI_ERR_IO, "**io",
"**io %s", ADIOI_NTFS_Strerror(err));
"**io %s", errMsg);
CloseHandle(pOvl->hEvent);
ADIOI_Free(pOvl);
return;
@ -167,10 +183,12 @@ void ADIOI_NTFS_WriteContig(ADIO_File fd, void *buf, int count,
/* --END ERROR HANDLING-- */
if (!CloseHandle(pOvl->hEvent))
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
err = GetLastError();
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io", "**io %s", ADIOI_NTFS_Strerror(err));
"**io", "**io %s", errMsg);
ADIOI_Free(pOvl);
return;
}
@ -190,11 +208,13 @@ void ADIOI_NTFS_WriteContig(ADIO_File fd, void *buf, int count,
/* --BEGIN ERROR HANDLING-- */
if (err == FALSE)
{
char errMsg[ADIOI_NTFS_ERR_MSG_MAX];
err = GetLastError();
ADIOI_NTFS_Strerror(err, errMsg, ADIOI_NTFS_ERR_MSG_MAX);
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io",
"**io %s", ADIOI_NTFS_Strerror(err));
"**io %s", errMsg);
return;
}
/* --END ERROR HANDLING-- */

Просмотреть файл

@ -9,6 +9,7 @@
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -23,4 +24,7 @@ libadio_panfs_la_SOURCES = \
ad_panfs.c \
ad_panfs.h \
ad_panfs_hints.c \
ad_panfs_open.c
ad_panfs_open.c \
ad_panfs_read.c \
ad_panfs_resize.c \
ad_panfs_write.c

Просмотреть файл

@ -13,8 +13,8 @@
struct ADIOI_Fns_struct ADIO_PANFS_operations = {
ADIOI_PANFS_Open, /* Open */
ADIOI_GEN_ReadContig, /* ReadContig */
ADIOI_GEN_WriteContig, /* WriteContig */
ADIOI_PANFS_ReadContig, /* ReadContig */
ADIOI_PANFS_WriteContig, /* WriteContig */
ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
ADIOI_GEN_WriteStridedColl, /* WriteStridedColl */
ADIOI_GEN_SeekIndividual, /* SeekIndividual */
@ -23,8 +23,13 @@ struct ADIOI_Fns_struct ADIO_PANFS_operations = {
ADIOI_GEN_ReadStrided, /* ReadStrided */
ADIOI_GEN_WriteStrided, /* WriteStrided */
ADIOI_GEN_Close, /* Close */
#ifdef ROMIO_HAVE_WORKING_AIO
ADIOI_GEN_IreadContig, /* IreadContig */
ADIOI_GEN_IwriteContig, /* IwriteContig */
#else
ADIOI_FAKE_IreadContig, /* IreadContig */
ADIOI_FAKE_IwriteContig, /* IwriteContig */
#endif
ADIOI_GEN_IODone, /* ReadDone */
ADIOI_GEN_IODone, /* WriteDone */
ADIOI_GEN_IOComplete, /* ReadComplete */
@ -32,6 +37,6 @@ struct ADIOI_Fns_struct ADIO_PANFS_operations = {
ADIOI_GEN_IreadStrided, /* IreadStrided */
ADIOI_GEN_IwriteStrided, /* IwriteStrided */
ADIOI_GEN_Flush, /* Flush */
ADIOI_GEN_Resize, /* Resize */
ADIOI_PANFS_Resize, /* Resize */
ADIOI_GEN_Delete, /* Delete */
};

Просмотреть файл

@ -6,8 +6,8 @@
* See COPYRIGHT notice in top-level directory.
*/
#ifndef AD_UNIX_INCLUDE
#define AD_UNIX_INCLUDE
#ifndef AD_PANFS_INCLUDE
#define AD_PANFS_INCLUDE
#include <unistd.h>
#include <sys/types.h>
@ -25,28 +25,32 @@ typedef struct adiocb adiocb_t;
#endif
#endif
int ADIOI_PANFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
int wr, void *handle);
void ADIOI_PANFS_Open(ADIO_File fd, int *error_code);
void ADIOI_PANFS_IwriteContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Request *request, int
*error_code);
void ADIOI_PANFS_IreadContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Request *request, int
*error_code);
int ADIOI_PANFS_ReadDone(ADIO_Request *request, ADIO_Status *status, int
*error_code);
int ADIOI_PANFS_WriteDone(ADIO_Request *request, ADIO_Status *status, int
*error_code);
void ADIOI_PANFS_ReadComplete(ADIO_Request *request, ADIO_Status *status, int
*error_code);
void ADIOI_PANFS_WriteComplete(ADIO_Request *request, ADIO_Status *status,
int *error_code);
void ADIOI_PANFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int
*error_code);
void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code);
void ADIOI_PANFS_ReadContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status,
int *error_code);
void ADIOI_PANFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code);
void ADIOI_PANFS_WriteContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status,
int *error_code);
/* Delay 1 ms */
#define AD_PANFS_RETRY_DELAY 1000
#define AD_PANFS_RETRY(_op_,_rc_) \
{ \
_rc_ = (_op_); \
while(_rc_ == -1 && errno == EAGAIN) \
{ \
if(usleep(AD_PANFS_RETRY_DELAY) == -1) \
{ \
break; \
} \
_rc_ = (_op_); \
} \
}
#endif

Просмотреть файл

@ -8,7 +8,6 @@
#include "ad_panfs.h"
#include <pan_fs_client_cw_mode.h>
#include "opal/mca/base/mca_base_param.h"
void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
{
@ -117,7 +116,7 @@ void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
MPI_Info_get(users_info, "panfs_layout_visit_policy", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE)) {
if (flag && (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE || layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID10)) {
layout_visit_policy = strtoul(value,NULL,10);
tmp_val = layout_visit_policy;
MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);

Просмотреть файл

@ -33,6 +33,9 @@ void ADIOI_PANFS_Open(ADIO_File fd, int *error_code)
unsigned long int layout_parity_stripe_depth = 0;
unsigned long int layout_total_num_comps = 0;
pan_fs_client_layout_visit_t layout_visit_policy = PAN_FS_CLIENT_LAYOUT_VISIT__ROUND_ROBIN;
int myrank;
MPI_Comm_rank(fd->comm, &myrank);
*error_code = MPI_SUCCESS;
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
@ -68,10 +71,10 @@ void ADIOI_PANFS_Open(ADIO_File fd, int *error_code)
}
ADIOI_Free(value);
amode = amode | O_CREAT;
amode = amode | O_CREAT;
/* Check for valid set of hints */
if ((layout_type < PAN_FS_CLIENT_LAYOUT_TYPE__DEFAULT) ||
(layout_type > PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE))
(layout_type > PAN_FS_CLIENT_LAYOUT_TYPE__RAID10))
{
FPRINTF(stderr, "%s: panfs_layout_type is not a valid value: %u.\n", myname, layout_type);
MPI_Abort(MPI_COMM_WORLD, 1);
@ -121,99 +124,136 @@ void ADIOI_PANFS_Open(ADIO_File fd, int *error_code)
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
if ((layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID0) ||
(layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE))
{
int myrank;
MPI_Comm_rank(fd->comm, &myrank);
if (myrank == 0) {
pan_fs_client_layout_create_args_t file_create_args;
int fd_dir;
char* slash;
struct stat stat_buf;
int err;
char *value, *path, *file_name_ptr;
/* Check that the file does not exist before
* trying to create it. The ioctl itself should
* be able to handle this condition. Currently,
* the ioctl will return successfully if the file
* has been previously created. Filed bug 33862
* to track the problem.
*/
err = stat(fd->filename,&stat_buf);
if((err == -1) && (errno != ENOENT))
if (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID10)
{
if ((layout_stripe_unit == 0) || (layout_total_num_comps == 0))
{
if(layout_stripe_unit == 0)
{
FPRINTF(stderr,"%s: Unexpected I/O Error calling stat() on PanFS file: %s.\n", myname, strerror(errno));
MPI_Abort(MPI_COMM_WORLD, 1);
FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_stripe_unit hint which is necessary to specify a valid RAID10 layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname);
}
else if (err == 0)
if(layout_total_num_comps == 0)
{
FPRINTF(stderr,"%s: Cannot create PanFS file with ioctl when file already exists.\n", myname);
FPRINTF(stderr, "%s: MPI_Info does not contain the panfs_layout_total_num_comps hint which is necessary to specify a valid RAID10 layout to the PAN_FS_CLIENT_LAYOUT_CREATE_FILE ioctl.\n", myname);
}
MPI_Abort(MPI_COMM_WORLD, 1);
}
if ((layout_visit_policy < PAN_FS_CLIENT_LAYOUT_VISIT__ROUND_ROBIN) ||
(layout_visit_policy > PAN_FS_CLIENT_LAYOUT_VISIT__ROUND_ROBIN_WITH_HASHED_OFFSET))
{
FPRINTF(stderr, "%s: panfs_layout_visit_policy is not a valid value: %u.\n", myname, layout_visit_policy);
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
/* Create the file via ioctl() or open(). ADIOI_PANFS_Open's caller
* already optimizes performance by only calling this function with
* ADIO_CREATE on rank 0. Therefore, we don't need to worry about
* implementing that optimization here. */
if((layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID0) || (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE)
|| (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID10)) {
pan_fs_client_layout_create_args_t file_create_args;
int fd_dir;
char* slash;
struct stat stat_buf;
int err;
char *value, *path, *file_name_ptr;
/* Check that the file does not exist before
* trying to create it. The ioctl itself should
* be able to handle this condition. Currently,
* the ioctl will return successfully if the file
* has been previously created. Filed bug 33862
* to track the problem.
*/
err = stat(fd->filename,&stat_buf);
if((err == -1) && (errno != ENOENT))
{
FPRINTF(stderr,"%s: Unexpected I/O Error calling stat() on PanFS file: %s.\n", myname, strerror(errno));
MPI_Abort(MPI_COMM_WORLD, 1);
}
else if (err == 0)
{
FPRINTF(stderr,"%s: Cannot create PanFS file with ioctl when file already exists.\n", myname);
MPI_Abort(MPI_COMM_WORLD, 1);
}
else
{
/* (err == -1) && (errno == ENOENT) */
/* File does not exist */
path = ADIOI_Strdup(fd->filename);
slash = strrchr(path, '/');
if (!slash)
ADIOI_Strncpy(path, ".", 2);
else {
if (slash == path)
*(path + 1) = '\0';
else *slash = '\0';
}
/* create PanFS object */
bzero(&file_create_args,sizeof(pan_fs_client_layout_create_args_t));
/* open directory */
fd_dir = open(path, O_RDONLY);
if (fd_dir < 0) {
FPRINTF(stderr, "%s: I/O Error opening parent directory to create PanFS file using ioctl: %s.\n", myname, strerror(errno));
MPI_Abort(MPI_COMM_WORLD, 1);
}
else
{
/* (err == -1) && (errno == ENOENT) */
/* File does not exist */
path = ADIOI_Strdup(fd->filename);
slash = strrchr(path, '/');
if (!slash)
ADIOI_Strncpy(path, ".", 2);
else {
if (slash == path)
*(path + 1) = '\0';
else *slash = '\0';
char *file_name_ptr = fd->filename;
slash = strrchr(fd->filename, '/');
if (slash)
{
file_name_ptr = slash + 1;
}
/* create PanFS object */
bzero(&file_create_args,sizeof(pan_fs_client_layout_create_args_t));
/* open directory */
fd_dir = open(path, O_RDONLY);
if (fd_dir < 0) {
FPRINTF(stderr, "%s: I/O Error opening parent directory to create PanFS file using ioctl: %s.\n", myname, strerror(errno));
/* create file in the directory */
file_create_args.mode = perm;
file_create_args.version = PAN_FS_CLIENT_LAYOUT_VERSION;
file_create_args.flags = PAN_FS_CLIENT_LAYOUT_CREATE_F__NONE;
ADIOI_Strncpy(file_create_args.filename, file_name_ptr, strlen(fd->filename)+1);
file_create_args.layout.agg_type = layout_type;
file_create_args.layout.layout_is_valid = 1;
if(layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE)
{
file_create_args.layout.u.raid1_5_parity_stripe.total_num_comps = layout_total_num_comps;
file_create_args.layout.u.raid1_5_parity_stripe.parity_stripe_width = layout_parity_stripe_width;
file_create_args.layout.u.raid1_5_parity_stripe.parity_stripe_depth = layout_parity_stripe_depth;
file_create_args.layout.u.raid1_5_parity_stripe.stripe_unit = layout_stripe_unit;
file_create_args.layout.u.raid1_5_parity_stripe.layout_visit_policy = layout_visit_policy;
}
else if(layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID0)
{
file_create_args.layout.u.raid0.total_num_comps = layout_total_num_comps;
file_create_args.layout.u.raid0.stripe_unit = layout_stripe_unit;
}
else if(layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID10)
{
file_create_args.layout.u.raid10.total_num_comps = layout_total_num_comps;
file_create_args.layout.u.raid10.stripe_unit = layout_stripe_unit;
file_create_args.layout.u.raid10.layout_visit_policy = layout_visit_policy;
}
err = ioctl(fd_dir, PAN_FS_CLIENT_LAYOUT_CREATE_FILE, &file_create_args);
if (err < 0) {
FPRINTF(stderr, "%s: I/O Error doing ioctl on parent directory to create PanFS file using ioctl: %s.\n", myname, strerror(errno));
MPI_Abort(MPI_COMM_WORLD, 1);
}
else
{
char *file_name_ptr = fd->filename;
slash = strrchr(fd->filename, '/');
if (slash)
{
file_name_ptr = slash + 1;
}
/* create file in the directory */
file_create_args.mode = perm;
file_create_args.version = PAN_FS_CLIENT_LAYOUT_VERSION;
file_create_args.flags = PAN_FS_CLIENT_LAYOUT_CREATE_F__NONE;
ADIOI_Strncpy(file_create_args.filename, file_name_ptr, strlen(fd->filename)+1);
file_create_args.layout.agg_type = layout_type;
file_create_args.layout.layout_is_valid = 1;
if(layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE)
{
file_create_args.layout.u.raid1_5_parity_stripe.total_num_comps = layout_total_num_comps;
file_create_args.layout.u.raid1_5_parity_stripe.parity_stripe_width = layout_parity_stripe_width;
file_create_args.layout.u.raid1_5_parity_stripe.parity_stripe_depth = layout_parity_stripe_depth;
file_create_args.layout.u.raid1_5_parity_stripe.stripe_unit = layout_stripe_unit;
file_create_args.layout.u.raid1_5_parity_stripe.layout_visit_policy = layout_visit_policy;
}
else if(layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID0)
{
file_create_args.layout.u.raid0.total_num_comps = layout_total_num_comps;
file_create_args.layout.u.raid0.stripe_unit = layout_stripe_unit;
}
err = ioctl(fd_dir, PAN_FS_CLIENT_LAYOUT_CREATE_FILE, &file_create_args);
if (err < 0) {
FPRINTF(stderr, "%s: I/O Error doing ioctl on parent directory to create PanFS file using ioctl: %s.\n", myname, strerror(errno));
MPI_Abort(MPI_COMM_WORLD, 1);
}
err = close(fd_dir);
}
ADIOI_Free(path);
err = close(fd_dir);
}
ADIOI_Free(path);
}
}
else
{
int create_fd = open(fd->filename,amode,perm);
if(create_fd != -1)
{
close(create_fd);
}
else
{
FPRINTF(stderr, "%s: I/O Error creating PanFS file using open: %s.\n", myname, strerror(errno));
MPI_Abort(MPI_COMM_WORLD, 1);
}
MPI_Barrier(fd->comm);
}
}
if (fd->access_mode & ADIO_RDONLY)
@ -279,6 +319,14 @@ void ADIOI_PANFS_Open(ADIO_File fd, int *error_code)
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.layout_visit_policy);
MPI_Info_set(fd->info, "panfs_layout_visit_policy", temp_buffer);
break;
case PAN_FS_CLIENT_LAYOUT_TYPE__RAID10:
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid10.stripe_unit);
MPI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid10.total_num_comps);
MPI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid10.layout_visit_policy);
MPI_Info_set(fd->info, "panfs_layout_visit_policy", temp_buffer);
break;
}
}
}

Просмотреть файл

@ -0,0 +1,68 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#include "ad_panfs.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
void ADIOI_PANFS_ReadContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status,
int *error_code)
{
int err = -1, datatype_size, len;
static char myname[] = "ADIOI_PANFS_READCONTIG";
MPI_Type_size(datatype, &datatype_size);
len = datatype_size * count;
if (file_ptr_type == ADIO_INDIVIDUAL) {
offset = fd->fp_ind;
}
if (fd->fp_sys_posn != offset) {
err = lseek(fd->fd_sys, offset, SEEK_SET);
/* --BEGIN ERROR HANDLING-- */
if (err == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE,
myname, __LINE__,
MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
fd->fp_sys_posn = -1;
return;
}
/* --END ERROR HANDLING-- */
}
AD_PANFS_RETRY(read(fd->fd_sys, buf, len),err)
/* --BEGIN ERROR HANDLING-- */
if (err == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE,
myname, __LINE__,
MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
fd->fp_sys_posn = -1;
return;
}
/* --END ERROR HANDLING-- */
fd->fp_sys_posn = offset + err;
if (file_ptr_type == ADIO_INDIVIDUAL) {
fd->fp_ind += err;
}
#ifdef HAVE_STATUS_SET_BYTES
if (err != -1) MPIR_Status_set_bytes(status, datatype, err);
#endif
*error_code = MPI_SUCCESS;
}

Просмотреть файл

@ -0,0 +1,49 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
*
* Copyright (C) 2004 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#include "ad_panfs.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
void ADIOI_PANFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code)
{
int err;
int myrank;
struct stat stat_buf;
static char myname[] = "ADIOI_PANFS_RESIZE";
MPI_Comm_rank(fd->comm, &myrank);
if (!myrank)
{
AD_PANFS_RETRY(ftruncate(fd->fd_sys,size),err);
MPI_Barrier(fd->comm);
}
else
{
MPI_Barrier(fd->comm);
AD_PANFS_RETRY(fstat(fd->fd_sys,&stat_buf),err);
if(((ADIO_Offset)stat_buf.st_size) != size)
{
/* This should never happen otherwise there is a coherency problem. */
FPRINTF(stderr, "%s: Rank %d: Resize failed: requested=%llu actual=%llu.\n",myname,myrank,size,stat_buf.st_size);
MPI_Abort(MPI_COMM_WORLD, 1);
}
}
/* --BEGIN ERROR HANDLING-- */
if (err == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO,
"**io", "**io %s", strerror(errno));
return;
}
/* --END ERROR HANDLING-- */
*error_code = MPI_SUCCESS;
}

Просмотреть файл

@ -0,0 +1,68 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
*
* Copyright (C) 2004 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#include "ad_panfs.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
void ADIOI_PANFS_WriteContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status,
int *error_code)
{
int err = -1, datatype_size, len;
static char myname[] = "ADIOI_PANFS_WRITECONTIG";
MPI_Type_size(datatype, &datatype_size);
len = datatype_size * count;
if (file_ptr_type == ADIO_INDIVIDUAL) {
offset = fd->fp_ind;
}
if (fd->fp_sys_posn != offset) {
err = lseek(fd->fd_sys, offset, SEEK_SET);
/* --BEGIN ERROR HANDLING-- */
if (err == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE,
myname, __LINE__,
MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
fd->fp_sys_posn = -1;
return;
}
/* --END ERROR HANDLING-- */
}
AD_PANFS_RETRY(write(fd->fd_sys, buf, len),err)
/* --BEGIN ERROR HANDLING-- */
if (err == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE,
myname, __LINE__,
MPI_ERR_IO, "**io",
"**io %s", strerror(errno));
fd->fp_sys_posn = -1;
return;
}
/* --END ERROR HANDLING-- */
fd->fp_sys_posn = offset + err;
if (file_ptr_type == ADIO_INDIVIDUAL) {
fd->fp_ind += err;
}
#ifdef HAVE_STATUS_SET_BYTES
if (err != -1 && status) MPIR_Status_set_bytes(status, datatype, err);
#endif
*error_code = MPI_SUCCESS;
}

Просмотреть файл

@ -39,7 +39,7 @@ void ADIOI_PFS_IreadContig(ADIO_File fd, void *buf, int count,
/* exceeded the max. no. of outstanding requests. */
/* complete all previous async. requests */
ADIOI_Complete_async(error_code);
/*ADIOI_Complete_async(error_code); */
if (*error_code != MPI_SUCCESS) return;
/* try again */

Просмотреть файл

@ -6,9 +6,6 @@
*/
#include "ad_pfs.h"
#ifdef PROFILE
#include "mpe.h"
#endif
void ADIOI_PFS_Open(ADIO_File fd, int *error_code)
{
@ -39,15 +36,9 @@ void ADIOI_PFS_Open(ADIO_File fd, int *error_code)
MPI_Comm_size(MPI_COMM_WORLD, &np_total);
MPI_Comm_size(fd->comm, &np_comm);
#ifdef PROFILE
MPE_Log_event(1, 0, "start open");
#endif
if (np_total == np_comm)
fd->fd_sys = _gopen(fd->filename, amode, M_ASYNC, perm);
else fd->fd_sys = open(fd->filename, amode, perm);
#ifdef PROFILE
MPE_Log_event(2, 0, "end open");
#endif
fd->fd_direct = -1;
if (fd->fd_sys != -1) {

Просмотреть файл

@ -6,9 +6,6 @@
*/
#include "ad_pfs.h"
#ifdef PROFILE
#include "mpe.h"
#endif
void ADIOI_PFS_ReadContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
@ -22,41 +19,17 @@ void ADIOI_PFS_ReadContig(ADIO_File fd, void *buf, int count,
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
if (fd->fp_sys_posn != offset) {
#ifdef PROFILE
MPE_Log_event(11, 0, "start seek");
#endif
lseek(fd->fd_sys, offset, SEEK_SET);
#ifdef PROFILE
MPE_Log_event(12, 0, "end seek");
#endif
}
#ifdef PROFILE
MPE_Log_event(3, 0, "start read");
#endif
err = _cread(fd->fd_sys, buf, len);
#ifdef PROFILE
MPE_Log_event(4, 0, "end read");
#endif
fd->fp_sys_posn = offset + err;
/* individual file pointer not updated */
}
else { /* read from curr. location of ind. file pointer */
if (fd->fp_sys_posn != fd->fp_ind) {
#ifdef PROFILE
MPE_Log_event(11, 0, "start seek");
#endif
lseek(fd->fd_sys, fd->fp_ind, SEEK_SET);
#ifdef PROFILE
MPE_Log_event(12, 0, "end seek");
#endif
}
#ifdef PROFILE
MPE_Log_event(3, 0, "start read");
#endif
err = _cread(fd->fd_sys, buf, len);
#ifdef PROFILE
MPE_Log_event(4, 0, "end read");
#endif
fd->fp_ind += err;
fd->fp_sys_posn = fd->fp_ind;
}

Просмотреть файл

@ -6,9 +6,6 @@
*/
#include "ad_pfs.h"
#ifdef PROFILE
#include "mpe.h"
#endif
void ADIOI_PFS_WriteContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
@ -23,41 +20,17 @@ void ADIOI_PFS_WriteContig(ADIO_File fd, void *buf, int count,
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
if (fd->fp_sys_posn != offset) {
#ifdef PROFILE
MPE_Log_event(11, 0, "start seek");
#endif
lseek(fd->fd_sys, offset, SEEK_SET);
#ifdef PROFILE
MPE_Log_event(12, 0, "end seek");
#endif
}
#ifdef PROFILE
MPE_Log_event(5, 0, "start write");
#endif
err = _cwrite(fd->fd_sys, buf, len);
#ifdef PROFILE
MPE_Log_event(6, 0, "end write");
#endif
fd->fp_sys_posn = offset + err;
/* individual file pointer not updated */
}
else { /* write from curr. location of ind. file pointer */
if (fd->fp_sys_posn != fd->fp_ind) {
#ifdef PROFILE
MPE_Log_event(11, 0, "start seek");
#endif
lseek(fd->fd_sys, fd->fp_ind, SEEK_SET);
#ifdef PROFILE
MPE_Log_event(12, 0, "end seek");
#endif
}
#ifdef PROFILE
MPE_Log_event(5, 0, "start write");
#endif
err = _cwrite(fd->fd_sys, buf, len);
#ifdef PROFILE
MPE_Log_event(6, 0, "end write");
#endif
fd->fp_ind += err;
fd->fp_sys_posn = fd->fp_ind;
}

Просмотреть файл

@ -6,9 +6,6 @@
*/
#include "ad_piofs.h"
#ifdef PROFILE
#include "mpe.h"
#endif
void ADIOI_PIOFS_Open(ADIO_File fd, int *error_code)
{
@ -38,16 +35,9 @@ void ADIOI_PIOFS_Open(ADIO_File fd, int *error_code)
if (fd->access_mode & ADIO_EXCL)
amode = amode | O_EXCL;
#ifdef PROFILE
MPE_Log_event(1, 0, "start open");
#endif
fd->fd_sys = open(fd->filename, amode, perm);
fd->fd_direct = -1;
#ifdef PROFILE
MPE_Log_event(2, 0, "end open");
#endif
llseek(fd->fd_sys, 0, SEEK_SET);
/* required to initiate use of 64-bit offset */

Просмотреть файл

@ -6,9 +6,6 @@
*/
#include "ad_piofs.h"
#ifdef PROFILE
#include "mpe.h"
#endif
void ADIOI_PIOFS_ReadContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
@ -24,41 +21,17 @@ void ADIOI_PIOFS_ReadContig(ADIO_File fd, void *buf, int count,
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
if (fd->fp_sys_posn != offset) {
#ifdef PROFILE
MPE_Log_event(11, 0, "start seek");
#endif
llseek(fd->fd_sys, offset, SEEK_SET);
#ifdef PROFILE
MPE_Log_event(12, 0, "end seek");
#endif
}
#ifdef PROFILE
MPE_Log_event(3, 0, "start read");
#endif
err = read(fd->fd_sys, buf, len);
#ifdef PROFILE
MPE_Log_event(4, 0, "end read");
#endif
fd->fp_sys_posn = offset + err;
/* individual file pointer not updated */
}
else { /* read from curr. location of ind. file pointer */
if (fd->fp_sys_posn != fd->fp_ind) {
#ifdef PROFILE
MPE_Log_event(11, 0, "start seek");
#endif
llseek(fd->fd_sys, fd->fp_ind, SEEK_SET);
#ifdef PROFILE
MPE_Log_event(12, 0, "end seek");
#endif
}
#ifdef PROFILE
MPE_Log_event(3, 0, "start read");
#endif
err = read(fd->fd_sys, buf, len);
#ifdef PROFILE
MPE_Log_event(4, 0, "end read");
#endif
fd->fp_ind += err;
fd->fp_sys_posn = fd->fp_ind;
}

Просмотреть файл

@ -7,9 +7,6 @@
#include "ad_piofs.h"
#include "adio_extern.h"
#ifdef PROFILE
#include "mpe.h"
#endif
void ADIOI_PIOFS_WriteContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
@ -25,41 +22,17 @@ void ADIOI_PIOFS_WriteContig(ADIO_File fd, void *buf, int count,
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
if (fd->fp_sys_posn != offset) {
#ifdef PROFILE
MPE_Log_event(11, 0, "start seek");
#endif
llseek(fd->fd_sys, offset, SEEK_SET);
#ifdef PROFILE
MPE_Log_event(12, 0, "end seek");
#endif
}
#ifdef PROFILE
MPE_Log_event(5, 0, "start write");
#endif
err = write(fd->fd_sys, buf, len);
#ifdef PROFILE
MPE_Log_event(6, 0, "end write");
#endif
fd->fp_sys_posn = offset + err;
/* individual file pointer not updated */
}
else { /* write from curr. location of ind. file pointer */
if (fd->fp_sys_posn != fd->fp_ind) {
#ifdef PROFILE
MPE_Log_event(11, 0, "start seek");
#endif
llseek(fd->fd_sys, fd->fp_ind, SEEK_SET);
#ifdef PROFILE
MPE_Log_event(12, 0, "end seek");
#endif
}
#ifdef PROFILE
MPE_Log_event(5, 0, "start write");
#endif
err = write(fd->fd_sys, buf, len);
#ifdef PROFILE
MPE_Log_event(6, 0, "end write");
#endif
fd->fp_ind += err;
fd->fp_sys_posn = fd->fp_ind;
}
@ -255,18 +228,8 @@ void ADIOI_PIOFS_WriteStrided(ADIO_File fd, void *buf, int count,
if (fwr_size) {
/* TYPE_UB and TYPE_LB can result in
fwr_size = 0. save system call in such cases */
#ifdef PROFILE
MPE_Log_event(11, 0, "start seek");
#endif
llseek(fd->fd_sys, off, SEEK_SET);
#ifdef PROFILE
MPE_Log_event(12, 0, "end seek");
MPE_Log_event(5, 0, "start write");
#endif
err = write(fd->fd_sys, ((char *) buf) + i, fwr_size);
#ifdef PROFILE
MPE_Log_event(6, 0, "end write");
#endif
if (err == -1) err_flag = 1;
}
i += fwr_size;
@ -304,18 +267,8 @@ void ADIOI_PIOFS_WriteStrided(ADIO_File fd, void *buf, int count,
while (num < bufsize) {
size = ADIOI_MIN(fwr_size, bwr_size);
if (size) {
#ifdef PROFILE
MPE_Log_event(11, 0, "start seek");
#endif
llseek(fd->fd_sys, off, SEEK_SET);
#ifdef PROFILE
MPE_Log_event(12, 0, "end seek");
MPE_Log_event(5, 0, "start write");
#endif
err = write(fd->fd_sys, ((char *) buf) + indx, size);
#ifdef PROFILE
MPE_Log_event(6, 0, "end write");
#endif
if (err == -1) err_flag = 1;
}

Просмотреть файл

@ -12,7 +12,13 @@ void ADIOI_PVFS_Close(ADIO_File fd, int *error_code)
int err;
static char myname[] = "ADIOI_PVFS_CLOSE";
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_close_a, 0, NULL );
#endif
err = pvfs_close(fd->fd_sys);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_close_b, 0, NULL );
#endif
fd->fd_sys = -1;
if (err == -1) {

Просмотреть файл

@ -15,9 +15,22 @@ void ADIOI_PVFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct,
switch(flag) {
case ADIO_FCNTL_GET_FSIZE:
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
fcntl_struct->fsize = pvfs_lseek64(fd->fd_sys, 0, SEEK_END);
if (fd->fp_sys_posn != -1)
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
if (fd->fp_sys_posn != -1) {
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
pvfs_lseek64(fd->fd_sys, fd->fp_sys_posn, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
}
if (fcntl_struct->fsize == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname,

Просмотреть файл

@ -49,11 +49,24 @@ void ADIOI_PVFS_Open(ADIO_File fd, int *error_code)
value, &flag);
if (flag && (atoi(value) >= 0)) pstat.base = atoi(value);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_open_a, 0, NULL );
#endif
fd->fd_sys = pvfs_open64(fd->filename, amode, perm, &pstat, NULL);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_open_b, 0, NULL );
#endif
fd->fd_direct = -1;
if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND))
if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND)) {
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
fd->fp_ind = fd->fp_sys_posn = pvfs_lseek64(fd->fd_sys, 0, SEEK_END);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
}
if (fd->fd_sys != -1) {
pvfs_ioctl(fd->fd_sys, GETMETA, &pstat);

Просмотреть файл

@ -27,16 +27,42 @@ void ADIOI_PVFS_ReadContig(ADIO_File fd, void *buf, int count,
len = datatype_size * count;
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
if (fd->fp_sys_posn != offset)
if (fd->fp_sys_posn != offset) {
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
pvfs_lseek64(fd->fd_sys, offset, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
}
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
#endif
err = pvfs_read(fd->fd_sys, buf, len);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
fd->fp_sys_posn = offset + err;
/* individual file pointer not updated */
}
else { /* read from curr. location of ind. file pointer */
if (fd->fp_sys_posn != fd->fp_ind)
if (fd->fp_sys_posn != fd->fp_ind) {
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
pvfs_lseek64(fd->fd_sys, fd->fp_ind, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
}
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
#endif
err = pvfs_read(fd->fd_sys, buf, len);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
fd->fp_ind += err;
fd->fp_sys_posn = fd->fp_ind;
}

Просмотреть файл

@ -27,16 +27,42 @@ void ADIOI_PVFS_WriteContig(ADIO_File fd, void *buf, int count,
len = datatype_size * count;
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
if (fd->fp_sys_posn != offset)
if (fd->fp_sys_posn != offset) {
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
pvfs_lseek64(fd->fd_sys, offset, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
}
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
err = pvfs_write(fd->fd_sys, buf, len);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
fd->fp_sys_posn = offset + err;
/* individual file pointer not updated */
}
else { /* write from curr. location of ind. file pointer */
if (fd->fp_sys_posn != fd->fp_ind)
if (fd->fp_sys_posn != fd->fp_ind) {
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
pvfs_lseek64(fd->fd_sys, fd->fp_ind, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
}
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
err = pvfs_write(fd->fd_sys, buf, len);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
fd->fp_ind += err;
fd->fp_sys_posn = fd->fp_ind;
}
@ -130,9 +156,23 @@ void ADIOI_PVFS_WriteStrided(ADIO_File fd, void *buf, int count,
/* seek to the right spot in the file */
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
off = fd->disp + etype_size * offset;
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
pvfs_lseek64(fd->fd_sys, off, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
}
else off = pvfs_lseek64(fd->fd_sys, fd->fp_ind, SEEK_SET);
else {
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
off = pvfs_lseek64(fd->fd_sys, fd->fp_ind, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
}
/* loop through all the flattened pieces. combine into buffer until
* no more will fit, then write.
@ -144,9 +184,15 @@ void ADIOI_PVFS_WriteStrided(ADIO_File fd, void *buf, int count,
for (i=0; i<flat_buf->count; i++) {
if (flat_buf->blocklens[i] > combine_buf_remain && combine_buf != combine_buf_ptr) {
/* there is data in the buffer; write out the buffer so far */
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
err = pvfs_write(fd->fd_sys,
combine_buf,
fd->hints->ind_wr_buffer_size - combine_buf_remain);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
if (err == -1) err_flag = 1;
/* reset our buffer info */
@ -159,9 +205,15 @@ void ADIOI_PVFS_WriteStrided(ADIO_File fd, void *buf, int count,
/* special case: blocklen is as big as or bigger than the combine buf;
* write directly
*/
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
err = pvfs_write(fd->fd_sys,
((char *) buf) + j*buftype_extent + flat_buf->indices[i],
flat_buf->blocklens[i]);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
if (err == -1) err_flag = 1;
off += flat_buf->blocklens[i]; /* keep up with the final file offset too */
}
@ -179,9 +231,15 @@ void ADIOI_PVFS_WriteStrided(ADIO_File fd, void *buf, int count,
if (combine_buf_ptr != combine_buf) {
/* data left in buffer to write */
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
err = pvfs_write(fd->fd_sys,
combine_buf,
fd->hints->ind_wr_buffer_size - combine_buf_remain);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
if (err == -1) err_flag = 1;
}
@ -264,17 +322,19 @@ void ADIOI_PVFS_WriteStrided(ADIO_File fd, void *buf, int count,
if (fwr_size) {
/* TYPE_UB and TYPE_LB can result in
fwr_size = 0. save system call in such cases */
#ifdef PROFILE
MPE_Log_event(11, 0, "start seek");
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
pvfs_lseek64(fd->fd_sys, off, SEEK_SET);
#ifdef PROFILE
MPE_Log_event(12, 0, "end seek");
MPE_Log_event(5, 0, "start write");
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
err = pvfs_write(fd->fd_sys, ((char *) buf) + i, fwr_size);
#ifdef PROFILE
MPE_Log_event(6, 0, "end write");
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
if (err == -1) err_flag = 1;
}
@ -313,17 +373,19 @@ void ADIOI_PVFS_WriteStrided(ADIO_File fd, void *buf, int count,
while (num < bufsize) {
size = ADIOI_MIN(fwr_size, bwr_size);
if (size) {
#ifdef PROFILE
MPE_Log_event(11, 0, "start seek");
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
pvfs_lseek64(fd->fd_sys, off, SEEK_SET);
#ifdef PROFILE
MPE_Log_event(12, 0, "end seek");
MPE_Log_event(5, 0, "start write");
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
err = pvfs_write(fd->fd_sys, ((char *) buf) + indx, size);
#ifdef PROFILE
MPE_Log_event(6, 0, "end write");
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
if (err == -1) err_flag = 1;
}
@ -473,9 +535,23 @@ void ADIOI_PVFS_WriteStridedListIO(ADIO_File fd, void *buf, int count,
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
off = fd->disp + etype_size * offset;
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
pvfs_lseek64(fd->fd_sys, fd->fp_ind, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
}
else off = pvfs_lseek64(fd->fd_sys, fd->fp_ind, SEEK_SET);
else {
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
off = pvfs_lseek64(fd->fd_sys, fd->fp_ind, SEEK_SET);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
}
file_list_count = 1;
file_offsets = off;

Просмотреть файл

@ -9,6 +9,7 @@
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow

Просмотреть файл

@ -22,8 +22,13 @@ struct ADIOI_Fns_struct ADIO_PVFS2_operations = {
ADIOI_PVFS2_ReadStrided, /* ReadStrided */
ADIOI_PVFS2_WriteStrided, /* WriteStrided */
ADIOI_PVFS2_Close, /* Close */
#ifdef ROMIO_HAVE_WORKING_AIO
ADIOI_PVFS2_IReadContig, /* IreadContig */
ADIOI_PVFS2_IWriteContig, /* IwriteContig */
#else
ADIOI_FAKE_IreadContig, /* IreadContig */
ADIOI_FAKE_IwriteContig, /* IwriteContig */
#endif
ADIOI_FAKE_IODone, /* ReadDone */
ADIOI_FAKE_IODone, /* WriteDone */
ADIOI_FAKE_IOComplete, /* ReadComplete */

Просмотреть файл

@ -37,4 +37,16 @@ void ADIOI_PVFS2_Flush(ADIO_File fd, int *error_code);
void ADIOI_PVFS2_Delete(char *filename, int *error_code);
void ADIOI_PVFS2_Resize(ADIO_File fd, ADIO_Offset size, int *error_code);
void ADIOI_PVFS2_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code);
void ADIOI_PVFS2_IReadContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, MPI_Request *request,
int *error_code);
void ADIOI_PVFS2_IWriteContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, MPI_Request *request,
int *error_code);
void ADIOI_PVFS2_AIO_contig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, MPI_Request *request,
int flag, int *error_code);
#endif

Просмотреть файл

@ -0,0 +1,220 @@
/* -*- Mode: C; c-basic-offset:4 ; -*-
* vim: ts=8 sts=4 sw=4 noexpandtab
*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
#include "adio.h"
#include "adio_extern.h"
#include "ad_pvfs2.h"
#include <string.h>
#include "ad_pvfs2_common.h"
#include "mpiu_greq.h"
#include "../../mpi-io/mpioimpl.h"
#define READ 0
#define WRITE 1
#ifdef ROMIO_HAVE_WORKING_AIO
static int ADIOI_PVFS2_greq_class = 0;
int ADIOI_PVFS2_aio_free_fn(void *extra_state);
int ADIOI_PVFS2_aio_poll_fn(void *extra_state, MPI_Status *status);
int ADIOI_PVFS2_aio_wait_fn(int count, void ** array_of_states,
double timeout, MPI_Status *status);
void ADIOI_PVFS2_IReadContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, MPI_Request *request,
int *error_code)
{
ADIOI_PVFS2_AIO_contig(fd, buf, count, datatype, file_ptr_type,
offset, request, READ, error_code);
}
void ADIOI_PVFS2_IWriteContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, MPI_Request *request,
int *error_code)
{
ADIOI_PVFS2_AIO_contig(fd, buf, count, datatype, file_ptr_type,
offset, request, WRITE, error_code);
}
void ADIOI_PVFS2_AIO_contig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, MPI_Request *request,
int flag, int *error_code)
{
int ret, datatype_size, len;
ADIOI_PVFS2_fs *pvfs_fs;
ADIOI_AIO_Request *aio_req;
static char myname[] = "ADIOI_PVFS2_AIO_contig";
pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr;
aio_req = (ADIOI_AIO_Request*)ADIOI_Calloc(sizeof(ADIOI_AIO_Request), 1);
MPI_Type_size(datatype, &datatype_size);
len = datatype_size * count;
ret = PVFS_Request_contiguous(len, PVFS_BYTE, &(aio_req->mem_req));
/* --BEGIN ERROR HANDLING-- */
if (ret != 0) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE,
myname, __LINE__,
ADIOI_PVFS2_error_convert(ret),
"Error in pvfs_request_contig (memory)", 0);
return;
}
/* --END ERROR HANDLING-- */
ret = PVFS_Request_contiguous(len, PVFS_BYTE, &(aio_req->file_req));
/* --BEGIN ERROR HANDLING-- */
if (ret != 0) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE,
myname, __LINE__,
ADIOI_PVFS2_error_convert(ret),
"Error in pvfs_request_contig (file)", 0);
return;
}
/* --END ERROR HANDLING-- */
if (file_ptr_type == ADIO_INDIVIDUAL) {
/* copy individual file pointer into offset variable, continue */
offset = fd->fp_ind;
}
if (flag == READ) {
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_iread_a, 0, NULL );
#endif
ret = PVFS_isys_read(pvfs_fs->object_ref, aio_req->file_req, offset,
buf, aio_req->mem_req, &(pvfs_fs->credentials),
&(aio_req->resp_io), &(aio_req->op_id), NULL);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_iread_b, 0, NULL );
#endif
} else if (flag == WRITE) {
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_iwrite_a, 0, NULL );
#endif
ret = PVFS_isys_write(pvfs_fs->object_ref, aio_req->file_req, offset,
buf, aio_req->mem_req, &(pvfs_fs->credentials),
&(aio_req->resp_io), &(aio_req->op_id), NULL);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_iwrite_b, 0, NULL );
#endif
}
/* --BEGIN ERROR HANDLING-- */
if (ret < 0 ) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE,
myname, __LINE__,
ADIOI_PVFS2_error_convert(ret),
"Error in PVFS_isys_io", 0);
goto fn_exit;
}
/* --END ERROR HANDLING-- */
/* posted. defered completion */
if (ret == 0) {
if (ADIOI_PVFS2_greq_class == 0) {
MPIX_Grequest_class_create(ADIOI_GEN_aio_query_fn,
ADIOI_PVFS2_aio_free_fn, MPIU_Greq_cancel_fn,
ADIOI_PVFS2_aio_poll_fn, ADIOI_PVFS2_aio_wait_fn,
&ADIOI_PVFS2_greq_class);
}
MPIX_Grequest_class_allocate(ADIOI_PVFS2_greq_class, aio_req, request);
memcpy(&(aio_req->req), request, sizeof(request));
}
/* immediate completion */
if (ret == 1) {
MPIO_Completed_request_create(&fd, len, error_code, request);
}
if (file_ptr_type == ADIO_INDIVIDUAL) {
fd->fp_ind += len;
}
fd->fp_sys_posn = offset + len;
*error_code = MPI_SUCCESS;
fn_exit:
return;
}
int ADIOI_PVFS2_aio_free_fn(void *extra_state)
{
ADIOI_AIO_Request *aio_req;
aio_req = (ADIOI_AIO_Request*)extra_state;
PVFS_Request_free(&(aio_req->mem_req));
PVFS_Request_free(&(aio_req->file_req));
ADIOI_Free(aio_req);
return MPI_SUCCESS;
}
int ADIOI_PVFS2_aio_poll_fn(void *extra_state, MPI_Status *status)
{
ADIOI_AIO_Request *aio_req;
int ret, error;
aio_req = (ADIOI_AIO_Request *)extra_state;
/* BUG: cannot PVFS_sys_testsome: does not work for a specific request */
ret = PVFS_sys_wait(aio_req->op_id, __FUNCTION__, &error);
if (ret == 0) {
aio_req->nbytes = aio_req->resp_io.total_completed;
MPIR_Nest_incr();
MPI_Grequest_complete(aio_req->req);
MPIR_Nest_decr();
return MPI_SUCCESS;
} else
return MPI_UNDEFINED; /* TODO: what's this error? */
}
/* wait for multiple requests to complete */
int ADIOI_PVFS2_aio_wait_fn(int count, void ** array_of_states,
double timeout, MPI_Status *status)
{
ADIOI_AIO_Request **aio_reqlist;
PVFS_sys_op_id *op_id_array;
int i,j, greq_count;
int *error_array;
aio_reqlist = (ADIOI_AIO_Request **)array_of_states;
op_id_array = (PVFS_sys_op_id*)ADIOI_Calloc(count, sizeof(PVFS_sys_op_id));
error_array = (int *)ADIOI_Calloc(count, sizeof(int));
greq_count = count;
/* PVFS-2.6: testsome actually tests all requests and fills in op_id_array
* with the ones that have completed. count is an in/out parameter.
* returns with the number of completed operations. what a mess! */
PVFS_sys_testsome(op_id_array, &count, NULL, error_array, INT_MAX);
for (i=0; i< count; i++) {
for (j=0; j<greq_count; j++) {
if (op_id_array[i] == aio_reqlist[j]->op_id) {
aio_reqlist[j]->nbytes =
aio_reqlist[j]->resp_io.total_completed;
MPIR_Nest_incr();
MPI_Grequest_complete(aio_reqlist[j]->req);
MPIR_Nest_decr();
}
}
}
return MPI_SUCCESS; /* TODO: no idea how to deal with errors */
}
#endif
/*
* vim: ts=8 sts=4 sw=4 noexpandtab
*/

Просмотреть файл

@ -9,6 +9,7 @@
#include <unistd.h>
#include <sys/types.h>
#include <time.h>
#include <stdlib.h>
/* maybe give romio access to the globalconfig struct */
/* keyval hack to both tell us if we've already initialized pvfs2 and also

Просмотреть файл

@ -30,7 +30,7 @@ typedef struct open_status_s open_status;
* handle to everyone else in the communicator
*/
static void fake_an_open(PVFS_fs_id fs_id, char *pvfs_name, int access_mode,
int nr_datafiles, int strip_size,
int nr_datafiles, PVFS_size strip_size,
ADIOI_PVFS2_fs *pvfs2_fs,
open_status *o_status)
{
@ -82,9 +82,15 @@ static void fake_an_open(PVFS_fs_id fs_id, char *pvfs_name, int access_mode,
}
/* Perform file creation */
#ifdef HAVE_PVFS2_CREATE_WITHOUT_LAYOUT
ret = PVFS_sys_create(resp_getparent.basename,
resp_getparent.parent_ref, attribs,
&(pvfs2_fs->credentials), dist, &resp_create);
#else
ret = PVFS_sys_create(resp_getparent.basename,
resp_getparent.parent_ref, attribs,
&(pvfs2_fs->credentials), dist, NULL, &resp_create);
#endif
/* if many creates are happening in this directory, the earlier
* sys_lookup may have returned ENOENT, but the sys_create could
@ -175,6 +181,9 @@ void ADIOI_PVFS2_Open(ADIO_File fd, int *error_code)
ADIOI_PVFS2_makecredentials(&(pvfs2_fs->credentials));
/* one process resolves name and will later bcast to others */
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_open_a, 0, NULL );
#endif
if (rank == fd->hints->ranklist[0] && fd->fs_ptr == NULL) {
/* given the filename, figure out which pvfs filesystem it is on */
ret = PVFS_util_resolve(fd->filename, &cur_fs,
@ -194,6 +203,9 @@ void ADIOI_PVFS2_Open(ADIO_File fd, int *error_code)
pvfs2_fs->object_ref = o_status.object_ref;
fd->fs_ptr = pvfs2_fs;
}
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_open_b, 0, NULL );
#endif
/* broadcast status and (possibly valid) object reference */
MPI_Address(&o_status.error, &offsets[0]);

Просмотреть файл

@ -56,8 +56,14 @@ void ADIOI_PVFS2_ReadContig(ADIO_File fd, void *buf, int count,
offset = fd->fp_ind;
}
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
#endif
ret = PVFS_sys_read(pvfs_fs->object_ref, file_req, offset, buf,
mem_req, &(pvfs_fs->credentials), &resp_io);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
/* --BEGIN ERROR HANDLING-- */
if (ret != 0 ) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
@ -140,7 +146,7 @@ void ADIOI_PVFS2_ReadStrided(ADIO_File fd, void *buf, int count,
if (!filetype_is_contig) {
flat_file = ADIOI_Flatlist;
while (flat_file->type != fd->filetype) flat_file = flat_file->next;
if (flat_file->count == 1)
if (flat_file->count == 1 && !buftype_is_contig)
filetype_is_contig = 1;
}
@ -214,9 +220,15 @@ void ADIOI_PVFS2_ReadStrided(ADIO_File fd, void *buf, int count,
err_flag = PVFS_Request_contiguous(file_lengths,
PVFS_BYTE, &file_req);
if (err_flag < 0) break;
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
#endif
err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req,
file_offsets, PVFS_BOTTOM, mem_req,
&(pvfs_fs->credentials), &resp_io);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
/* --BEGIN ERROR HANDLING-- */
if (err_flag != 0) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
@ -284,11 +296,11 @@ void ADIOI_PVFS2_ReadStrided(ADIO_File fd, void *buf, int count,
n_filetypes++;
for (i=0; i<flat_file->count; i++) {
if (disp + flat_file->indices[i] +
(ADIO_Offset) n_filetypes*filetype_extent +
((ADIO_Offset) n_filetypes)*filetype_extent +
flat_file->blocklens[i] >= offset) {
st_index = i;
frd_size = (int) (disp + flat_file->indices[i] +
(ADIO_Offset) n_filetypes*filetype_extent
((ADIO_Offset) n_filetypes)*filetype_extent
+ flat_file->blocklens[i] - offset);
flag = 1;
break;
@ -315,7 +327,7 @@ void ADIOI_PVFS2_ReadStrided(ADIO_File fd, void *buf, int count,
}
/* abs. offset in bytes in the file */
offset = disp + (ADIO_Offset) n_filetypes*filetype_extent +
offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent +
abs_off_in_filetype;
} /* else [file_ptr_type != ADIO_INDIVIDUAL] */
@ -340,7 +352,11 @@ void ADIOI_PVFS2_ReadStrided(ADIO_File fd, void *buf, int count,
/* determine how many blocks in file to read */
f_data_read = ADIOI_MIN(st_frd_size, bufsize);
total_blks_to_read = 1;
j++;
if (j < (flat_file->count-1)) j++;
else {
j = 0;
n_filetypes++;
}
while (f_data_read < bufsize) {
f_data_read += flat_file->blocklens[j];
total_blks_to_read++;
@ -383,7 +399,8 @@ void ADIOI_PVFS2_ReadStrided(ADIO_File fd, void *buf, int count,
}
for (k=0; k<MAX_ARRAY_SIZE; k++) {
if (i || k) {
file_offsets[k] = disp + n_filetypes*filetype_extent
file_offsets[k] = disp +
((ADIO_Offset)n_filetypes)*filetype_extent
+ flat_file->indices[j];
file_lengths[k] = flat_file->blocklens[j];
mem_lengths += file_lengths[k];
@ -424,9 +441,15 @@ void ADIOI_PVFS2_ReadStrided(ADIO_File fd, void *buf, int count,
/* PVFS_Request_hindexed already expresses the offsets into the
* file, so we should not pass in an offset if we are using
* hindexed for the file type */
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
#endif
err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req, 0,
mem_offsets, mem_req,
&(pvfs_fs->credentials), &resp_io);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
/* --BEGIN ERROR HANDLING-- */
if (err_flag != 0) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
@ -455,8 +478,9 @@ void ADIOI_PVFS2_ReadStrided(ADIO_File fd, void *buf, int count,
}
for (k=0; k<extra_blks; k++) {
if(i || k) {
file_offsets[k] = disp + n_filetypes*filetype_extent +
flat_file->indices[j];
file_offsets[k] = disp +
((ADIO_Offset)n_filetypes)*filetype_extent +
flat_file->indices[j];
if (k == (extra_blks - 1)) {
file_lengths[k] = bufsize - (int32_t) mem_lengths
- (int32_t) mem_offsets + (int32_t) buf;
@ -497,8 +521,14 @@ void ADIOI_PVFS2_ReadStrided(ADIO_File fd, void *buf, int count,
/* --END ERROR HANDLING-- */
/* as above, use 0 for 'offset' when using hindexed file type */
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
#endif
err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req, 0,
mem_offsets, mem_req, &(pvfs_fs->credentials), &resp_io);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
/* --BEGIN ERROR HANDLING-- */
if (err_flag != 0) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
@ -848,8 +878,8 @@ void ADIOI_PVFS2_ReadStrided(ADIO_File fd, void *buf, int count,
k = (k + 1)%flat_buf->count;
} /* for (i=0; i<mem_list_count; i++) */
for (i=0; i<file_list_count; i++) {
file_offsets[i] = disp + flat_file->indices[j] + n_filetypes *
filetype_extent;
file_offsets[i] = disp + flat_file->indices[j] +
((ADIO_Offset)n_filetypes) * filetype_extent;
if (!i) {
file_lengths[0] = frd_size;
file_offsets[0] += flat_file->blocklens[j] - frd_size;
@ -899,8 +929,14 @@ void ADIOI_PVFS2_ReadStrided(ADIO_File fd, void *buf, int count,
/* --END ERROR HANDLING-- */
/* offset will be expressed in memory and file datatypes */
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
#endif
err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req, 0,
PVFS_BOTTOM, mem_req, &(pvfs_fs->credentials), &resp_io);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
/* --BEGIN ERROR HANDLING-- */
if (err_flag != 0) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
@ -924,7 +960,16 @@ void ADIOI_PVFS2_ReadStrided(ADIO_File fd, void *buf, int count,
ADIOI_Free(file_lengths);
/* Other ADIO routines will convert absolute bytes into counts of datatypes */
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += total_bytes_read;
/* when incrementing fp_ind, need to also take into account the file type:
* consider an N-element 1-d subarray with a lb and ub: ( |---xxxxx-----|
* if we wrote N elements, offset needs to point at beginning of type, not
* at empty region at offset N+1) */
if (file_ptr_type == ADIO_INDIVIDUAL) {
/* this is closer, but still incorrect for the cases where a small
* amount of a file type is "leftover" after a write */
fd->fp_ind = disp + flat_file->indices[j] +
((ADIO_Offset)n_filetypes)*filetype_extent;
}
if (err_flag == 0) *error_code = MPI_SUCCESS;
error_state:

Просмотреть файл

@ -51,8 +51,14 @@ void ADIOI_PVFS2_WriteContig(ADIO_File fd, void *buf, int count,
/* --END ERROR HANDLING-- */
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
ret = PVFS_sys_write(pvfs_fs->object_ref, file_req, offset, buf,
mem_req, &(pvfs_fs->credentials), &resp_io);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
/* --BEGIN ERROR HANDLING-- */
if (ret != 0) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
@ -67,8 +73,14 @@ void ADIOI_PVFS2_WriteContig(ADIO_File fd, void *buf, int count,
fd->fp_sys_posn = offset + (int) resp_io.total_completed;
}
else {
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
ret = PVFS_sys_write(pvfs_fs->object_ref, file_req, fd->fp_ind, buf,
mem_req, &(pvfs_fs->credentials), &resp_io);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
/* --BEGIN ERROR HANDLING-- */
if (ret != 0) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
@ -161,7 +173,7 @@ void ADIOI_PVFS2_WriteStrided(ADIO_File fd, void *buf, int count,
if (!filetype_is_contig) {
flat_file = ADIOI_Flatlist;
while (flat_file->type != fd->filetype) flat_file = flat_file->next;
if (flat_file->count == 1)
if (flat_file->count == 1 && !buftype_is_contig)
filetype_is_contig = 1;
}
@ -258,11 +270,17 @@ void ADIOI_PVFS2_WriteStrided(ADIO_File fd, void *buf, int count,
}
/* --END ERROR HANDLING-- */
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req,
file_offsets, PVFS_BOTTOM,
mem_req,
&(pvfs_fs->credentials),
&resp_io);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
total_bytes_written += resp_io.total_completed;
/* in the case of error or the last write list call,
@ -330,11 +348,11 @@ void ADIOI_PVFS2_WriteStrided(ADIO_File fd, void *buf, int count,
n_filetypes++;
for (i=0; i<flat_file->count; i++) {
if (disp + flat_file->indices[i] +
(ADIO_Offset) n_filetypes*filetype_extent +
((ADIO_Offset) n_filetypes)*filetype_extent +
flat_file->blocklens[i] >= offset) {
st_index = i;
fwr_size = disp + flat_file->indices[i] +
(ADIO_Offset) n_filetypes*filetype_extent
((ADIO_Offset) n_filetypes)*filetype_extent
+ flat_file->blocklens[i] - offset;
flag = 1;
break;
@ -361,7 +379,7 @@ void ADIOI_PVFS2_WriteStrided(ADIO_File fd, void *buf, int count,
}
/* abs. offset in bytes in the file */
offset = disp + (ADIO_Offset) n_filetypes*filetype_extent +
offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent +
abs_off_in_filetype;
} /* else [file_ptr_type != ADIO_INDIVIDUAL] */
@ -387,7 +405,11 @@ void ADIOI_PVFS2_WriteStrided(ADIO_File fd, void *buf, int count,
/* determine how many blocks in file to write */
f_data_wrote = ADIOI_MIN(st_fwr_size, bufsize);
total_blks_to_write = 1;
j++;
if (j < (flat_file->count -1)) j++;
else {
j = 0;
n_filetypes++;
}
while (f_data_wrote < bufsize) {
f_data_wrote += flat_file->blocklens[j];
total_blks_to_write++;
@ -430,8 +452,9 @@ void ADIOI_PVFS2_WriteStrided(ADIO_File fd, void *buf, int count,
}
for (k=0; k<MAX_ARRAY_SIZE; k++) {
if (i || k) {
file_offsets[k] = disp + n_filetypes*filetype_extent
+ flat_file->indices[j];
file_offsets[k] = disp +
((ADIO_Offset)n_filetypes)*filetype_extent
+ flat_file->indices[j];
file_lengths[k] = flat_file->blocklens[j];
mem_lengths += file_lengths[k];
}
@ -472,9 +495,15 @@ void ADIOI_PVFS2_WriteStrided(ADIO_File fd, void *buf, int count,
/* PVFS_Request_hindexed already expresses the offsets into the
* file, so we should not pass in an offset if we are using
* hindexed for the file type */
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0,
mem_offsets, mem_req,
&(pvfs_fs->credentials), &resp_io);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
/* --BEGIN ERROR HANDLING-- */
if (err_flag != 0) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
@ -503,8 +532,9 @@ void ADIOI_PVFS2_WriteStrided(ADIO_File fd, void *buf, int count,
}
for (k=0; k<extra_blks; k++) {
if(i || k) {
file_offsets[k] = disp + n_filetypes*filetype_extent +
flat_file->indices[j];
file_offsets[k] = disp +
((ADIO_Offset)n_filetypes)*filetype_extent +
flat_file->indices[j];
if (k == (extra_blks - 1)) {
file_lengths[k] = bufsize - (int32_t) mem_lengths
- (int32_t) mem_offsets + (int32_t) buf;
@ -547,9 +577,15 @@ void ADIOI_PVFS2_WriteStrided(ADIO_File fd, void *buf, int count,
/* --END ERROR HANDLING-- */
/* as above, use 0 for 'offset' when using hindexed file type*/
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0,
mem_offsets, mem_req,
&(pvfs_fs->credentials), &resp_io);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
/* --BEGIN ERROR HANDLING-- */
if (err_flag != 0) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
@ -904,8 +940,8 @@ void ADIOI_PVFS2_WriteStrided(ADIO_File fd, void *buf, int count,
k = (k + 1)%flat_buf->count;
} /* for (i=0; i<mem_list_count; i++) */
for (i=0; i<file_list_count; i++) {
file_offsets[i] = disp + flat_file->indices[j] + n_filetypes *
filetype_extent;
file_offsets[i] = disp + flat_file->indices[j] +
((ADIO_Offset)n_filetypes) * filetype_extent;
if (!i) {
file_lengths[0] = fwr_size;
file_offsets[0] += flat_file->blocklens[j] - fwr_size;
@ -959,9 +995,15 @@ void ADIOI_PVFS2_WriteStrided(ADIO_File fd, void *buf, int count,
/* offset will be expressed in memory and file datatypes */
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0,
PVFS_BOTTOM, mem_req,
&(pvfs_fs->credentials), &resp_io);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
/* --BEGIN ERROR HANDLING-- */
if (err_flag != 0) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS,
@ -986,7 +1028,16 @@ void ADIOI_PVFS2_WriteStrided(ADIO_File fd, void *buf, int count,
ADIOI_Free(file_offsets);
ADIOI_Free(file_lengths);
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += total_bytes_written;
/* when incrementing fp_ind, need to also take into account the file type:
* consider an N-element 1-d subarray with a lb and ub: ( |---xxxxx-----|
* if we wrote N elements, offset needs to point at beginning of type, not
* at empty region at offset N+1) */
if (file_ptr_type == ADIO_INDIVIDUAL) {
/* this is closer, but still incorrect for the cases where a small
* amount of a file type is "leftover" after a write */
fd->fp_ind = disp + flat_file->indices[j] +
((ADIO_Offset)n_filetypes)*filetype_extent;
}
*error_code = MPI_SUCCESS;
error_state:

Просмотреть файл

@ -15,29 +15,14 @@ int ADIOI_TESTFS_ReadDone(ADIO_Request *request, ADIO_Status *status, int
*error_code = MPI_SUCCESS;
if (*request == ADIO_REQUEST_NULL) {
MPI_Comm_size( MPI_COMM_WORLD, &nprocs );
MPI_Comm_rank( MPI_COMM_WORLD, &myrank );
FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_ReadDone called on ADIO_REQUEST_NULL\n",
myrank, nprocs);
return 1;
}
MPI_Comm_size((*request)->fd->comm, &nprocs);
MPI_Comm_rank((*request)->fd->comm, &myrank);
FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_ReadDone called on %s\n",
myrank, nprocs, (*request)->fd->filename);
#ifdef HAVE_STATUS_SET_BYTES
MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
#endif
(*request)->fd->async_count--;
ADIOI_Free_request((ADIOI_Req_node *) (*request));
*request = ADIO_REQUEST_NULL;
MPI_Comm_size( MPI_COMM_WORLD, &nprocs );
MPI_Comm_rank( MPI_COMM_WORLD, &myrank );
FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_ReadDone called on ADIO_REQUEST_NULL\n",
myrank, nprocs);
return 1;
}
int ADIOI_TESTFS_WriteDone(ADIO_Request *request, ADIO_Status *status, int
*error_code)
{
@ -45,24 +30,10 @@ int ADIOI_TESTFS_WriteDone(ADIO_Request *request, ADIO_Status *status, int
*error_code = MPI_SUCCESS;
if (*request == ADIO_REQUEST_NULL) {
MPI_Comm_size( MPI_COMM_WORLD, &nprocs );
MPI_Comm_rank( MPI_COMM_WORLD, &myrank );
FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_WriteDone called on ADIO_REQUEST_NULL\n",
MPI_Comm_size( MPI_COMM_WORLD, &nprocs );
MPI_Comm_rank( MPI_COMM_WORLD, &myrank );
FPRINTF(stdout,
"[%d/%d] ADIOI_TESTFS_WriteDone called on ADIO_REQUEST_NULL\n",
myrank, nprocs);
return 1;
}
MPI_Comm_size((*request)->fd->comm, &nprocs);
MPI_Comm_rank((*request)->fd->comm, &myrank);
FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_WriteDone called on %s\n",
myrank, nprocs, (*request)->fd->filename);
#ifdef HAVE_STATUS_SET_BYTES
MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
#endif
(*request)->fd->async_count--;
ADIOI_Free_request((ADIOI_Req_node *) (*request));
*request = ADIO_REQUEST_NULL;
return 1;
}

Просмотреть файл

@ -22,15 +22,9 @@ void ADIOI_TESTFS_IreadContig(ADIO_File fd, void *buf, int count,
*error_code = MPI_SUCCESS;
*request = ADIOI_Malloc_request();
(*request)->optype = ADIOI_WRITE;
(*request)->fd = fd;
(*request)->queued = 0;
(*request)->datatype = datatype;
MPI_Type_size(datatype, &typesize);
MPI_Comm_size(fd->comm, &nprocs);
MPI_Comm_rank(fd->comm, &myrank);
MPI_Type_size(datatype, &typesize);
FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_IreadContig called on %s\n",
myrank, nprocs, fd->filename);
FPRINTF(stdout, "[%d/%d] calling ADIOI_TESTFS_ReadContig\n",
@ -40,13 +34,7 @@ void ADIOI_TESTFS_IreadContig(ADIO_File fd, void *buf, int count,
ADIOI_TESTFS_ReadContig(fd, buf, len, MPI_BYTE, file_ptr_type,
offset, &status, error_code);
#ifdef HAVE_STATUS_SET_BYTES
if (*error_code == MPI_SUCCESS) {
MPI_Get_elements(&status, MPI_BYTE, &len);
(*request)->nbytes = len;
}
#endif
fd->async_count++;
MPIO_Completed_request_create(&fd, len, error_code, request);
}
void ADIOI_TESTFS_IreadStrided(ADIO_File fd, void *buf, int count,
@ -56,34 +44,18 @@ void ADIOI_TESTFS_IreadStrided(ADIO_File fd, void *buf, int count,
{
ADIO_Status status;
int myrank, nprocs;
#ifdef HAVE_STATUS_SET_BYTES
int typesize;
#endif
*error_code = MPI_SUCCESS;
*request = ADIOI_Malloc_request();
(*request)->optype = ADIOI_WRITE;
(*request)->fd = fd;
(*request)->queued = 0;
(*request)->datatype = datatype;
MPI_Comm_size(fd->comm, &nprocs);
MPI_Comm_rank(fd->comm, &myrank);
MPI_Type_size(datatype, &typesize);
FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_IreadStrided called on %s\n",
myrank, nprocs, fd->filename);
FPRINTF(stdout, "[%d/%d] calling ADIOI_TESTFS_ReadStrided\n",
myrank, nprocs);
ADIOI_TESTFS_ReadStrided(fd, buf, count, datatype, file_ptr_type,
offset, &status, error_code);
#ifdef HAVE_STATUS_SET_BYTES
if (*error_code == MPI_SUCCESS) {
MPI_Type_size(datatype, &typesize);
(*request)->nbytes = count * typesize;
}
#endif
fd->async_count++;
offset, &status, error_code);
MPIO_Completed_request_create(&fd, count*typesize, error_code, request);
}

Просмотреть файл

@ -8,6 +8,9 @@
#include "ad_testfs.h"
#include "adioi.h"
#include "mpiu_greq.h"
#include "../../mpi-io/mpioimpl.h"
/* ADIOI_TESTFS_IwriteContig()
*
* Implemented by immediately calling WriteContig()
@ -22,12 +25,6 @@ void ADIOI_TESTFS_IwriteContig(ADIO_File fd, void *buf, int count,
*error_code = MPI_SUCCESS;
*request = ADIOI_Malloc_request();
(*request)->optype = ADIOI_WRITE;
(*request)->fd = fd;
(*request)->queued = 0;
(*request)->datatype = datatype;
MPI_Type_size(datatype, &typesize);
MPI_Comm_size(fd->comm, &nprocs);
MPI_Comm_rank(fd->comm, &myrank);
@ -39,14 +36,8 @@ void ADIOI_TESTFS_IwriteContig(ADIO_File fd, void *buf, int count,
len = count * typesize;
ADIOI_TESTFS_WriteContig(fd, buf, len, MPI_BYTE, file_ptr_type,
offset, &status, error_code);
MPIO_Completed_request_create(&fd, len, error_code, request);
#ifdef HAVE_STATUS_SET_BYTES
if (*error_code == MPI_SUCCESS) {
MPI_Get_elements(&status, MPI_BYTE, &len);
(*request)->nbytes = len;
}
#endif
fd->async_count++;
}
void ADIOI_TESTFS_IwriteStrided(ADIO_File fd, void *buf, int count,
@ -56,20 +47,14 @@ void ADIOI_TESTFS_IwriteStrided(ADIO_File fd, void *buf, int count,
{
ADIO_Status status;
int myrank, nprocs;
#ifdef HAVE_STATUS_SET_BYTES
int typesize;
#endif
*error_code = MPI_SUCCESS;
*request = ADIOI_Malloc_request();
(*request)->optype = ADIOI_WRITE;
(*request)->fd = fd;
(*request)->queued = 0;
(*request)->datatype = datatype;
MPI_Comm_size(fd->comm, &nprocs);
MPI_Comm_rank(fd->comm, &myrank);
MPI_Type_size(datatype, &typesize);
FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_IwriteStrided called on %s\n",
myrank, nprocs, fd->filename);
FPRINTF(stdout, "[%d/%d] calling ADIOI_TESTFS_WriteStrided\n",
@ -77,12 +62,6 @@ void ADIOI_TESTFS_IwriteStrided(ADIO_File fd, void *buf, int count,
ADIOI_TESTFS_WriteStrided(fd, buf, count, datatype, file_ptr_type,
offset, &status, error_code);
MPIO_Completed_request_create(&fd, count*typesize, error_code, request);
#ifdef HAVE_STATUS_SET_BYTES
if (*error_code == MPI_SUCCESS) {
MPI_Type_size(datatype, &typesize);
(*request)->nbytes = count * typesize;
}
#endif
fd->async_count++;
}

Просмотреть файл

@ -15,22 +15,12 @@ void ADIOI_TESTFS_ReadComplete(ADIO_Request *request, ADIO_Status *status, int
*error_code = MPI_SUCCESS;
if (*request == ADIO_REQUEST_NULL) {
FPRINTF(stdout, "[xx/xx] ADIOI_TESTFS_ReadComplete called on ADIO_REQUEST_NULL\n");
return;
}
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_ReadComplete called \n",
myrank, nprocs);
MPI_Comm_size((*request)->fd->comm, &nprocs);
MPI_Comm_rank((*request)->fd->comm, &myrank);
FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_ReadComplete called on %s\n",
myrank, nprocs, (*request)->fd->filename);
#ifdef HAVE_STATUS_SET_BYTES
MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
#endif
(*request)->fd->async_count--;
ADIOI_Free_request((ADIOI_Req_node *) (*request));
*request = ADIO_REQUEST_NULL;
/* do something with status set bytes? */
}
void ADIOI_TESTFS_WriteComplete(ADIO_Request *request, ADIO_Status *status, int
@ -40,20 +30,10 @@ void ADIOI_TESTFS_WriteComplete(ADIO_Request *request, ADIO_Status *status, int
*error_code = MPI_SUCCESS;
if (*request == ADIO_REQUEST_NULL) {
FPRINTF(stdout, "[xx/xx] ADIOI_TESTFS_WriteComplete called on ADIO_REQUEST_NULL\n");
return;
}
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_WriteComplete called\n",
myrank, nprocs);
MPI_Comm_size((*request)->fd->comm, &nprocs);
MPI_Comm_rank((*request)->fd->comm, &myrank);
FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_WriteComplete called on %s\n",
myrank, nprocs, (*request)->fd->filename);
#ifdef HAVE_STATUS_SET_BYTES
MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
#endif
(*request)->fd->async_count--;
ADIOI_Free_request((ADIOI_Req_node *) (*request));
*request = ADIO_REQUEST_NULL;
/* do something with status_set_bytes? */
}

Просмотреть файл

@ -22,8 +22,13 @@ struct ADIOI_Fns_struct ADIO_UFS_operations = {
ADIOI_GEN_ReadStrided, /* ReadStrided */
ADIOI_GEN_WriteStrided, /* WriteStrided */
ADIOI_GEN_Close, /* Close */
#ifdef ROMIO_HAVE_WORKING_AIO
ADIOI_GEN_IreadContig, /* IreadContig */
ADIOI_GEN_IwriteContig, /* IwriteContig */
#else
ADIOI_FAKE_IreadContig, /* IreadContig */
ADIOI_FAKE_IwriteContig, /* IwriteContig */
#endif
ADIOI_GEN_IODone, /* ReadDone */
ADIOI_GEN_IODone, /* WriteDone */
ADIOI_GEN_IOComplete, /* ReadComplete */

Просмотреть файл

@ -15,6 +15,9 @@
#ifdef HAVE_SIGNAL_H
#include <signal.h>
#endif
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_AIO_H
#include <aio.h>
#endif

Просмотреть файл

@ -31,11 +31,25 @@ void ADIOI_UFS_Open(ADIO_File fd, int *error_code)
if (fd->access_mode & ADIO_EXCL)
amode = amode | O_EXCL;
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_open_a, 0, NULL );
#endif
fd->fd_sys = open(fd->filename, amode, perm);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_open_b, 0, NULL );
#endif
fd->fd_direct = -1;
if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND))
if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND)) {
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_a, 0, NULL );
#endif
fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_lseek_b, 0, NULL );
#endif
}
/* --BEGIN ERROR HANDLING-- */
if (fd->fd_sys == -1) {

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше