diff --git a/ompi/mca/io/romio/romio/.config_params b/ompi/mca/io/romio/romio/.config_params
index 6a875db219..07ac7a9c00 100644
--- a/ompi/mca/io/romio/romio/.config_params
+++ b/ompi/mca/io/romio/romio/.config_params
@@ -35,4 +35,5 @@ __sgi_mpi
__hp_mpi
__cray_mpi
__lam_mpi
+__Darwin
__open_mpi
diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/.state-cache b/ompi/mca/io/romio/romio/adio/ad_bgl/.state-cache
deleted file mode 100644
index 10ea8a7bf6..0000000000
--- a/ompi/mca/io/romio/romio/adio/ad_bgl/.state-cache
+++ /dev/null
@@ -1,58 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/Makefile.am b/ompi/mca/io/romio/romio/adio/ad_bgl/Makefile.am
index ac55cf7d76..b0ac2f8b32 100644
--- a/ompi/mca/io/romio/romio/adio/ad_bgl/Makefile.am
+++ b/ompi/mca/io/romio/romio/adio/ad_bgl/Makefile.am
@@ -26,6 +26,7 @@ libadio_bgl_la_SOURCES = \
ad_bgl.c \
ad_bgl_close.c \
ad_bgl_fcntl.c \
+ ad_bgl_flush.c \
ad_bgl_getsh.c \
ad_bgl.h \
ad_bgl_hints.c \
diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl.c b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl.c
index 2911543cd9..e17cf31fa1 100644
--- a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl.c
+++ b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl.c
@@ -1,5 +1,6 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
+/* ---------------------------------------------------------------- */
/**
* \file ad_bgl.c
* \brief ???
@@ -18,6 +19,7 @@
struct ADIOI_Fns_struct ADIO_BGL_operations = {
ADIOI_BGL_Open, /* Open */
+ ADIOI_GEN_OpenColl, /* Collective open */
ADIOI_BGL_ReadContig, /* ReadContig */
ADIOI_BGL_WriteContig, /* WriteContig */
#if BGL_OPTIM_STEP1_2
@@ -51,7 +53,8 @@ struct ADIOI_Fns_struct ADIO_BGL_operations = {
ADIOI_GEN_IOComplete, /* WriteComplete */
ADIOI_GEN_IreadStrided, /* IreadStrided */
ADIOI_GEN_IwriteStrided, /* IwriteStrided */
- ADIOI_GEN_Flush, /* Flush */
+ ADIOI_BGL_Flush, /* Flush */
ADIOI_GEN_Resize, /* Resize */
ADIOI_GEN_Delete, /* Delete */
+ ADIOI_GEN_Feature, /* Features */
};
diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl.h b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl.h
index 1209785be1..89d247b0df 100644
--- a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl.h
+++ b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl.h
@@ -28,8 +28,10 @@
#include
#endif
+#if 0
int ADIOI_BGL_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
int wr, void *handle);
+#endif
void ADIOI_BGL_Open(ADIO_File fd, int *error_code);
@@ -87,6 +89,7 @@ void ADIOI_BGL_WriteStridedColl(ADIO_File fd, void *buf, int count,
void ADIOI_BGL_Get_shared_fp(ADIO_File fd, int size, ADIO_Offset *shared_fp, int *error_code);
void ADIOI_BGL_Set_shared_fp(ADIO_File fd, ADIO_Offset offset, int *error_code);
+void ADIOI_BGL_Flush(ADIO_File fd, int *error_code);
#include "ad_bgl_tuning.h"
diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_aggrs.c b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_aggrs.c
index 5e14af114c..39aeb2c05f 100644
--- a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_aggrs.c
+++ b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_aggrs.c
@@ -1,5 +1,6 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
+/* ---------------------------------------------------------------- */
/**
* \file ad_bgl_aggrs.c
* \brief The externally used function from this file is is declared in ad_bgl_aggrs.h
@@ -7,7 +8,7 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
- * Copyright (C) 1997 University of Chicago.
+ * Copyright (C) 1997-2001 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
@@ -16,10 +17,49 @@
#include "ad_bgl.h"
#include "ad_bgl_pset.h"
#include "ad_bgl_aggrs.h"
+#ifdef AGGREGATION_PROFILE
+#include "mpe.h"
+#endif
+
+#ifdef USE_DBG_LOGGING
+ #define AGG_DEBUG 1
+#endif
-int aggrsInPsetSize=0;
-int *aggrsInPset=NULL;
+
+static int aggrsInPsetSize=0;
+static int *aggrsInPset=NULL;
+
+/* Comments copied from common:
+ * This file contains four functions:
+ *
+ * ADIOI_Calc_aggregator()
+ * ADIOI_Calc_file_domains()
+ * ADIOI_Calc_my_req()
+ * ADIOI_Calc_others_req()
+ *
+ * The last three of these were originally in ad_read_coll.c, but they are
+ * also shared with ad_write_coll.c. I felt that they were better kept with
+ * the rest of the shared aggregation code.
+ */
+
+/* Discussion of values available from above:
+ *
+ * ADIO_Offset st_offsets[0..nprocs-1]
+ * ADIO_Offset end_offsets[0..nprocs-1]
+ * These contain a list of start and end offsets for each process in
+ * the communicator. For example, an access at loc 10, size 10 would
+ * have a start offset of 10 and end offset of 19.
+ * int nprocs
+ * number of processors in the collective I/O communicator
+ * ADIO_Offset min_st_offset
+ * ADIO_Offset fd_start[0..nprocs_for_coll-1]
+ * starting location of "file domain"; region that a given process will
+ * perform aggregation for (i.e. actually do I/O)
+ * ADIO_Offset fd_end[0..nprocs_for_coll-1]
+ * start + size - 1 roughly, but it can be less, or 0, in the case of
+ * uneven distributions
+ */
/* forward declaration */
static void
@@ -219,8 +259,7 @@ ADIOI_BGL_compute_agg_ranklist_serial ( ADIO_File fd,
ADIOI_BGL_ProcInfo_t *all_procInfo,
int *aggrsInPset )
{
-# define DEBUG 0
-# if DEBUG
+# if AGG_DEBUG
int i;
# endif
int naggs;
@@ -229,9 +268,10 @@ ADIOI_BGL_compute_agg_ranklist_serial ( ADIO_File fd,
/* compute the ranklist of IO aggregators and put into tmp_ranklist */
tmp_ranklist = (int *) ADIOI_Malloc (confInfo->nProcs * sizeof(int));
-# if DEBUG
- for (i=0; inProcs; i++)
- printf( "\tcpuid %1d, rank = %6d\n", all_procInfo[i].cpuid, all_procInfo[i].rank );
+# if AGG_DEBUG
+ for (i=0; inProcs; i++) {
+ DBG_FPRINTF(stderr, "\tcpuid %1d, rank = %6d\n", all_procInfo[i].cpuid, all_procInfo[i].rank );
+ }
# endif
naggs =
@@ -239,7 +279,7 @@ ADIOI_BGL_compute_agg_ranklist_serial ( ADIO_File fd,
# define VERIFY 0
# if VERIFY
- printf( "\tconfInfo = %3d,%3d,%3d,%3d,%3d,%3d,%.4f; naggs = %d\n",
+ DBG_FPRINTF(stderr, "\tconfInfo = %3d,%3d,%3d,%3d,%3d,%3d,%.4f; naggs = %d\n",
confInfo->PsetSize ,
confInfo->numPsets ,
confInfo->isVNM ,
@@ -250,9 +290,10 @@ ADIOI_BGL_compute_agg_ranklist_serial ( ADIO_File fd,
naggs );
# endif
-# if DEBUG
- for (i=0; ihints */
@@ -267,293 +308,34 @@ ADIOI_BGL_compute_agg_ranklist_serial ( ADIO_File fd,
return;
}
-
-
-/*
- * Compute a dynamic access range based file domain partition among I/O aggregators,
- * which align to the GPFS block size
- * Divide the I/O workload among "nprocs_for_coll" processes. This is
- * done by (logically) dividing the file into file domains (FDs); each
- * process may directly access only its own file domain.
- * Additional effort is to make sure that each I/O aggregator get
- * a file domain that aligns to the GPFS block size. So, there will
- * not be any false sharing of GPFS file blocks among multiple I/O nodes.
- */
-void ADIOI_BGL_GPFS_Calc_file_domains(ADIO_Offset *st_offsets,
- ADIO_Offset *end_offsets,
- int nprocs,
- int nprocs_for_coll,
- ADIO_Offset *min_st_offset_ptr,
- ADIO_Offset **fd_start_ptr,
- ADIO_Offset **fd_end_ptr,
- ADIO_Offset *fd_size_ptr,
- void *fs_ptr)
-{
- ADIO_Offset min_st_offset, max_end_offset, *fd_start, *fd_end, *fd_size;
- int i, aggr;
- static char myname[] = "ADIOI_BGL_GPFS_Calc_file_domains";
- __blksize_t blksize = 1048576; /* default to 1M */
- if(fs_ptr && ((ADIOI_BGL_fs*)fs_ptr)->blksize) /* ignore null ptr or 0 blksize */
- blksize = ((ADIOI_BGL_fs*)fs_ptr)->blksize;
-/* FPRINTF(stderr,"%s(%d): Blocksize=%ld\n",myname,__LINE__,blksize);*/
-
- /* find the range of all the requests */
- min_st_offset = st_offsets [0];
- max_end_offset = end_offsets[0];
- for (i=1; ihints->cb_nodes; i++)
- if (fd->hints->ranklist[i] == myrank) return i;
- return -1;
-}
-
/*
* This is more general aggregator search function which does not base on the assumption
* that each aggregator hosts the file domain with the same size
@@ -574,6 +356,21 @@ int ADIOI_BGL_Calc_aggregator(ADIO_File fd,
/* binary search --> rank_index is returned */
int ub = fd->hints->cb_nodes;
int lb = 0;
+ /* get an index into our array of aggregators */
+ /* Common code for striping - bgl doesn't use it but it's
+ here to make diff'ing easier.
+ rank_index = (int) ((off - min_off + fd_size)/ fd_size - 1);
+
+ if (fd->hints->striping_unit > 0) {
+ * wkliao: implementation for file domain alignment
+ fd_start[] and fd_end[] have been aligned with file lock
+ boundaries when returned from ADIOI_Calc_file_domains() so cannot
+ just use simple arithmatic as above *
+ rank_index = 0;
+ while (off > fd_end[rank_index]) rank_index++;
+ }
+ bgl does it's own striping below
+ */
rank_index = fd->hints->cb_nodes / 2;
while ( off < fd_start[rank_index] || off > fd_end[rank_index] ) {
if ( off > fd_end [rank_index] ) {
@@ -586,8 +383,15 @@ int ADIOI_BGL_Calc_aggregator(ADIO_File fd,
rank_index = (rank_index + lb) / 2;
}
}
-
- // printf ("ADIOI_BGL_Calc_aggregator: rank_index = %d\n", rank_index );
+ /* we index into fd_end with rank_index, and fd_end was allocated to be no
+ * bigger than fd->hins->cb_nodes. If we ever violate that, we're
+ * overrunning arrays. Obviously, we should never ever hit this abort */
+ if (rank_index >= fd->hints->cb_nodes || rank_index < 0) {
+ FPRINTF(stderr, "Error in ADIOI_Calc_aggregator(): rank_index(%d) >= fd->hints->cb_nodes (%d) fd_size=%lld off=%lld\n",
+ rank_index,fd->hints->cb_nodes,fd_size,off);
+ MPI_Abort(MPI_COMM_WORLD, 1);
+ }
+ // DBG_FPRINTF ("ADIOI_BGL_Calc_aggregator: rank_index = %d\n", rank_index );
/*
* remember here that even in Rajeev's original code it was the case that
@@ -611,16 +415,161 @@ int ADIOI_BGL_Calc_aggregator(ADIO_File fd,
return rank;
}
+/*
+ * Compute a dynamic access range based file domain partition among I/O aggregators,
+ * which align to the GPFS block size
+ * Divide the I/O workload among "nprocs_for_coll" processes. This is
+ * done by (logically) dividing the file into file domains (FDs); each
+ * process may directly access only its own file domain.
+ * Additional effort is to make sure that each I/O aggregator get
+ * a file domain that aligns to the GPFS block size. So, there will
+ * not be any false sharing of GPFS file blocks among multiple I/O nodes.
+ *
+ * The common version of this now accepts a min_fd_size and striping_unit.
+ * It doesn't seem necessary here (using GPFS block sizes) but keep it in mind
+ * (e.g. we could pass striping unit instead of using fs_ptr->blksize).
+ */
+void ADIOI_BGL_GPFS_Calc_file_domains(ADIO_Offset *st_offsets,
+ ADIO_Offset *end_offsets,
+ int nprocs,
+ int nprocs_for_coll,
+ ADIO_Offset *min_st_offset_ptr,
+ ADIO_Offset **fd_start_ptr,
+ ADIO_Offset **fd_end_ptr,
+ ADIO_Offset *fd_size_ptr,
+ void *fs_ptr)
+{
+ ADIO_Offset min_st_offset, max_end_offset, *fd_start, *fd_end, *fd_size;
+ int i, aggr;
+
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5004, 0, NULL);
+#endif
+
+# if AGG_DEBUG
+ static char myname[] = "ADIOI_BGL_GPFS_Calc_file_domains";
+ DBG_FPRINTF(stderr, "%s(%d): %d aggregator(s)\n",
+ myname,__LINE__,nprocs_for_coll);
+# endif
+ __blksize_t blksize = 1048576; /* default to 1M */
+ if(fs_ptr && ((ADIOI_BGL_fs*)fs_ptr)->blksize) /* ignore null ptr or 0 blksize */
+ blksize = ((ADIOI_BGL_fs*)fs_ptr)->blksize;
+# if AGG_DEBUG
+ DBG_FPRINTF(stderr,"%s(%d): Blocksize=%ld\n",myname,__LINE__,blksize);
+# endif
+/* find min of start offsets and max of end offsets of all processes */
+ min_st_offset = st_offsets [0];
+ max_end_offset = end_offsets[0];
+ for (i=1; ihints->cb_nodes; i++)
+ if (fd->hints->ranklist[i] == myrank) return i;
+ return -1;
+}
/*
* ADIOI_BGL_Calc_my_req() overrides ADIOI_Calc_my_req for the default implementation
* is specific for static file domain partitioning.
*
- * ADIOI_Calc_my_req() calculate what portions of the access requests
+ * ADIOI_Calc_my_req() - calculate what portions of the access requests
* of this process are located in the file domains of various processes
* (including this one)
*/
-void ADIOI_BGL_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, int *len_list,
+void ADIOI_BGL_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, ADIO_Offset *len_list,
int contig_access_count, ADIO_Offset
min_st_offset, ADIO_Offset *fd_start,
ADIO_Offset *fd_end, ADIO_Offset fd_size,
@@ -629,12 +578,17 @@ void ADIOI_BGL_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, int *len_list
int **count_my_req_per_proc_ptr,
ADIOI_Access **my_req_ptr,
int **buf_idx_ptr)
+/* Possibly reconsider if buf_idx's are ok as int's, or should they be aints/offsets?
+ They are used as memory buffer indices so it seems like the 2G limit is in effect */
{
int *count_my_req_per_proc, count_my_req_procs, *buf_idx;
int i, l, proc;
ADIO_Offset fd_len, rem_len, curr_idx, off;
ADIOI_Access *my_req;
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5024, 0, NULL);
+#endif
*count_my_req_per_proc_ptr = (int *) ADIOI_Calloc(nprocs,sizeof(int));
count_my_req_per_proc = *count_my_req_per_proc_ptr;
@@ -656,10 +610,10 @@ void ADIOI_BGL_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, int *len_list
* contig_access_count was calculated way back in ADIOI_Calc_my_off_len()
*/
for (i=0; i < contig_access_count; i++) {
-
- /* When there is no data being processed, bypass this loop */
- if (len_list[i] == 0) continue;
-
+ /* short circuit offset/len processing if len == 0
+ * (zero-byte read/write */
+ if (len_list[i] == 0)
+ continue;
off = offset_list[i];
fd_len = len_list[i];
/* note: we set fd_len to be the total size of the access. then
@@ -710,20 +664,24 @@ void ADIOI_BGL_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list, int *len_list
/* now fill in my_req */
curr_idx = 0;
for (i=0; i 0) {
- FPRINTF(stdout, "data needed from %d (count = %d):\n", i,
+ DBG_FPRINTF(stderr, "data needed from %d (count = %d):\n", i,
my_req[i].count);
for (l=0; l < my_req[i].count; l++) {
- FPRINTF(stdout, " off[%d] = %Ld, len[%d] = %d\n", l,
+ DBG_FPRINTF(stderr, " off[%d] = %lld, len[%d] = %d\n", l,
my_req[i].offsets[l], l, my_req[i].lens[l]);
}
}
+ DBG_FPRINTF(stderr, "buf_idx[%d] = 0x%x\n", i, buf_idx[i]);
}
-#if 0
- for (i=0; i
- extern int *aggrsInPset; /* defined in ad_bgl_aggrs.c */
+#if !defined(GPFS_SUPER_MAGIC)
+ #define GPFS_SUPER_MAGIC (0x47504653)
+#endif
+#if !defined(PVFS2_SUPER_MAGIC)
+ #define PVFS2_SUPER_MAGIC (0x20030528)
+#endif
/* File system (BGL) specific information -
hung off of ADIOI_FileD file descriptor (fd->fs_ptr) at open */
typedef struct ADIOI_BGL_fs_s {
__blksize_t blksize;
+ int fsync_aggr; /* "fsync aggregation" flags (below) */
+#define ADIOI_BGL_FSYNC_AGGREGATION_DISABLED 0x00
+#define ADIOI_BGL_FSYNC_AGGREGATION_ENABLED 0x01
+#define ADIOI_BGL_FSYNC_AGGREGATOR 0x10 /* This rank is an aggregator */
} ADIOI_BGL_fs;
/* generate a list of I/O aggregators that utilizes BGL-PSET orginization. */
@@ -60,7 +70,7 @@
/* overriding ADIOI_Calc_my_req for the default implementation is specific for
static file domain partitioning */
- void ADIOI_BGL_Calc_my_req ( ADIO_File fd, ADIO_Offset *offset_list, int *len_list,
+ void ADIOI_BGL_Calc_my_req ( ADIO_File fd, ADIO_Offset *offset_list, ADIO_Offset *len_list,
int contig_access_count, ADIO_Offset
min_st_offset, ADIO_Offset *fd_start,
ADIO_Offset *fd_end, ADIO_Offset fd_size,
diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_close.c b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_close.c
index 63f620446f..8fcf857507 100644
--- a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_close.c
+++ b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_close.c
@@ -1,7 +1,8 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
+/* ---------------------------------------------------------------- */
/**
- * \file ad_bgl_open.c
+ * \file ad_bgl_close.c
* \brief ???
*/
diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_fcntl.c b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_fcntl.c
index c47c2aa5b5..afae2c2988 100644
--- a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_fcntl.c
+++ b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_fcntl.c
@@ -1,5 +1,6 @@
/* ---------------------------------------------------------------- */
/* (C)Copyright IBM Corp. 2007, 2008 */
+/* ---------------------------------------------------------------- */
/**
* \file ad_bgl_fcntl.c
* \brief ???
diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_flush.c b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_flush.c
new file mode 100644
index 0000000000..97fd2ca99e
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_flush.c
@@ -0,0 +1,90 @@
+/* ---------------------------------------------------------------- */
+/* (C)Copyright IBM Corp. 2007, 2008 */
+/* ---------------------------------------------------------------- */
+/**
+ * \file ad_bgl_flush.c
+ * \brief Scalable flush based on underlying filesystem and psets
+ */
+
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *
+ * Copyright (C) 1997 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ */
+
+#include "ad_bgl.h"
+#include "ad_bgl_aggrs.h"
+
+void ADIOI_BGL_Flush(ADIO_File fd, int *error_code)
+{
+ int err=0;
+ static char myname[] = "ADIOI_BGL_FLUSH";
+
+
+ if(((ADIOI_BGL_fs*)fd->fs_ptr)->fsync_aggr & ADIOI_BGL_FSYNC_AGGREGATION_ENABLED)
+ {
+ int rank;
+
+ /* Barrier so we can collectively do fewer fsync's */
+ MPI_Barrier(fd->comm);
+
+ MPI_Comm_rank(fd->comm, &rank);
+
+ /* All ranks marked as "fsync aggregators" should fsync.
+ (We currently only do one fsync on rank 0 but this is general
+ enough to support >1 aggregator using allreduce to get the
+ results instead of simply bcast'ing the results from rank 0.)*/
+ if(((ADIOI_BGL_fs*)fd->fs_ptr)->fsync_aggr & ADIOI_BGL_FSYNC_AGGREGATOR)
+ {
+ err = fsync(fd->fd_sys);
+ DBG_FPRINTF(stderr,"aggregation:fsync %s, err=%#X, errno=%#X\n",fd->filename, err, errno);
+ /* We want errno, not the return code if it failed */
+ if (err == -1) err = errno;
+ else err = 0;
+ }
+ /* Just pick an errno (using unsigned MPI_MAX) from any failures */
+ MPI_Allreduce( MPI_IN_PLACE, (unsigned*)&err, 1, MPI_UNSIGNED, MPI_MAX, fd->comm);
+ DBGV_FPRINTF(stderr,"aggregation result:fsync %s, errno %#X,\n",fd->filename, err);
+
+ if (err) /* if it's non-zero, it must be an errno */
+ {
+ errno = err;
+ err = -1;
+ }
+ }
+ else /* Non-aggregated fsync */
+ {
+#ifdef USE_DBG_LOGGING
+ int rank;
+#endif
+ err = fsync(fd->fd_sys);
+#ifdef USE_DBG_LOGGING
+ MPI_Comm_rank(fd->comm, &rank);
+
+ if(rank == 0)
+ {
+ DBG_FPRINTF(stderr,"no aggregation:fsync %s, err=%#X, errno=%#X\n",fd->filename, err, errno);
+ }
+ else
+ {
+ DBGV_FPRINTF(stderr,"no aggregation:fsync %s, err=%#X, errno=%#X\n",fd->filename, err, errno);
+ }
+#endif
+ }
+
+ /* --BEGIN ERROR HANDLING-- */
+ if (err == -1)
+ {
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
+ myname, __LINE__, MPI_ERR_IO,
+ "**io",
+ "**io %s", strerror(errno));
+ DBGT_FPRINTF(stderr,"fsync %s, err=%#X, errno=%#X\n",fd->filename, err, errno);
+ return;
+ }
+ /* --END ERROR HANDLING-- */
+
+ *error_code = MPI_SUCCESS;
+}
+
diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_hints.c b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_hints.c
index 8a233b2783..614f2cf618 100644
--- a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_hints.c
+++ b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_hints.c
@@ -38,8 +38,8 @@ void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
MPI_Info info;
char *value;
- int flag, intval, tmp_val, nprocs, nprocs_is_valid = 0;
- static char myname[] = "ADIOI_GEN_SETINFO";
+ int flag, intval, tmp_val, nprocs=0, nprocs_is_valid = 0;
+ static char myname[] = "ADIOI_BGL_SETINFO";
int did_anything = 0;
@@ -61,15 +61,15 @@ void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
did_anything = 1;
/* buffer size for collective I/O */
- MPI_Info_set(info, "cb_buffer_size", ADIOI_BGL_CB_BUFFER_SIZE_DFLT);
+ ADIOI_Info_set(info, "cb_buffer_size", ADIOI_BGL_CB_BUFFER_SIZE_DFLT);
fd->hints->cb_buffer_size = atoi(ADIOI_BGL_CB_BUFFER_SIZE_DFLT);
/* default is to let romio automatically decide when to use
* collective buffering
*/
- MPI_Info_set(info, "romio_cb_read", "enable");
+ ADIOI_Info_set(info, "romio_cb_read", "enable");
fd->hints->cb_read = ADIOI_HINT_ENABLE;
- MPI_Info_set(info, "romio_cb_write", "enable");
+ ADIOI_Info_set(info, "romio_cb_write", "enable");
fd->hints->cb_write = ADIOI_HINT_ENABLE;
if ( fd->hints->cb_config_list != NULL ) ADIOI_Free (fd->hints->cb_config_list);
@@ -78,30 +78,54 @@ void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
/* number of processes that perform I/O in collective I/O */
MPI_Comm_size(fd->comm, &nprocs);
nprocs_is_valid = 1;
- sprintf(value, "%d", nprocs);
- MPI_Info_set(info, "cb_nodes", value);
+ ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", nprocs);
+ ADIOI_Info_set(info, "cb_nodes", value);
fd->hints->cb_nodes = -1;
/* hint indicating that no indep. I/O will be performed on this file */
- MPI_Info_set(info, "romio_no_indep_rw", "false");
+ ADIOI_Info_set(info, "romio_no_indep_rw", "false");
fd->hints->no_indep_rw = 0;
- /* deferred_open derrived from no_indep_rw and cb_{read,write} */
+
+ /* bgl is not implementing file realms (ADIOI_IOStridedColl),
+ initialize to disabled it. */
+ /* hint instructing the use of persistent file realms */
+ ADIOI_Info_set(info, "romio_cb_pfr", "disable");
+ fd->hints->cb_pfr = ADIOI_HINT_DISABLE;
+
+ /* hint guiding the assignment of persistent file realms */
+ ADIOI_Info_set(info, "romio_cb_fr_types", "aar");
+ fd->hints->cb_fr_type = ADIOI_FR_AAR;
+
+ /* hint to align file realms with a certain byte value */
+ ADIOI_Info_set(info, "romio_cb_fr_alignment", "1");
+ fd->hints->cb_fr_alignment = 1;
+
+ /* hint to set a threshold percentage for a datatype's size/extent at
+ * which data sieving should be done in collective I/O */
+ ADIOI_Info_set(info, "romio_cb_ds_threshold", "0");
+ fd->hints->cb_ds_threshold = 0;
+
+ /* hint to switch between point-to-point or all-to-all for two-phase */
+ ADIOI_Info_set(info, "romio_cb_alltoall", "automatic");
+ fd->hints->cb_alltoall = ADIOI_HINT_AUTO;
+
+ /* deferred_open derived from no_indep_rw and cb_{read,write} */
fd->hints->deferred_open = 0;
/* buffer size for data sieving in independent reads */
- MPI_Info_set(info, "ind_rd_buffer_size", ADIOI_BGL_IND_RD_BUFFER_SIZE_DFLT);
+ ADIOI_Info_set(info, "ind_rd_buffer_size", ADIOI_BGL_IND_RD_BUFFER_SIZE_DFLT);
fd->hints->ind_rd_buffer_size = atoi(ADIOI_BGL_IND_RD_BUFFER_SIZE_DFLT);
/* buffer size for data sieving in independent writes */
- MPI_Info_set(info, "ind_wr_buffer_size", ADIOI_BGL_IND_WR_BUFFER_SIZE_DFLT);
+ ADIOI_Info_set(info, "ind_wr_buffer_size", ADIOI_BGL_IND_WR_BUFFER_SIZE_DFLT);
fd->hints->ind_wr_buffer_size = atoi(ADIOI_BGL_IND_WR_BUFFER_SIZE_DFLT);
if(fd->file_system == ADIO_UFS)
{
/* default for ufs/pvfs is to disable data sieving */
- MPI_Info_set(info, "romio_ds_read", "disable");
+ ADIOI_Info_set(info, "romio_ds_read", "disable");
fd->hints->ds_read = ADIOI_HINT_DISABLE;
- MPI_Info_set(info, "romio_ds_write", "disable");
+ ADIOI_Info_set(info, "romio_ds_write", "disable");
fd->hints->ds_write = ADIOI_HINT_DISABLE;
}
else
@@ -109,18 +133,23 @@ void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
/* default is to let romio automatically decide when to use data
* sieving
*/
- MPI_Info_set(info, "romio_ds_read", "automatic");
+ ADIOI_Info_set(info, "romio_ds_read", "automatic");
fd->hints->ds_read = ADIOI_HINT_AUTO;
- MPI_Info_set(info, "romio_ds_write", "automatic");
+ ADIOI_Info_set(info, "romio_ds_write", "automatic");
fd->hints->ds_write = ADIOI_HINT_AUTO;
}
- fd->hints->initialized = 1;
+ /* still to do: tune this a bit for a variety of file systems. there's
+ * no good default value so just leave it unset */
+ fd->hints->min_fdomain_size = 0;
+ fd->hints->striping_unit = 0;
+
+ fd->hints->initialized = 1;
}
/* add in user's info if supplied */
if (users_info != MPI_INFO_NULL) {
- MPI_Info_get(users_info, "cb_buffer_size", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "cb_buffer_size", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && ((intval=atoi(value)) > 0)) {
tmp_val = intval;
@@ -135,30 +164,106 @@ void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
}
/* --END ERROR HANDLING-- */
- MPI_Info_set(info, "cb_buffer_size", value);
+ ADIOI_Info_set(info, "cb_buffer_size", value);
fd->hints->cb_buffer_size = intval;
}
+#if 0
+ /* bgl is not implementing file realms (ADIOI_IOStridedColl) ... */
+ /* aligning file realms to certain sizes (e.g. stripe sizes)
+ * may benefit I/O performance */
+ ADIOI_Info_get(users_info, "romio_cb_fr_alignment", MPI_MAX_INFO_VAL,
+ value, &flag);
+ if (flag && ((intval=atoi(value)) > 0)) {
+ tmp_val = intval;
+ MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
+ /* --BEGIN ERROR HANDLING-- */
+ if (tmp_val != intval) {
+ MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+ "romio_cb_fr_alignment",
+ error_code);
+ return;
+ }
+ /* --END ERROR HANDLING-- */
+
+ ADIOI_Info_set(info, "romio_cb_fr_alignment", value);
+ fd->hints->cb_fr_alignment = intval;
+
+ }
+
+ /* for collective I/O, try to be smarter about when to do data sieving
+ * using a specific threshold for the datatype size/extent
+ * (percentage 0-100%) */
+ ADIOI_Info_get(users_info, "romio_cb_ds_threshold", MPI_MAX_INFO_VAL,
+ value, &flag);
+ if (flag && ((intval=atoi(value)) > 0)) {
+ tmp_val = intval;
+
+ MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
+ /* --BEGIN ERROR HANDLING-- */
+ if (tmp_val != intval) {
+ MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+ "romio_cb_ds_threshold",
+ error_code);
+ return;
+ }
+ /* --END ERROR HANDLING-- */
+
+ ADIOI_Info_set(info, "romio_cb_ds_threshold", value);
+ fd->hints->cb_ds_threshold = intval;
+
+ }
+ ADIOI_Info_get(users_info, "romio_cb_alltoall", MPI_MAX_INFO_VAL, value,
+ &flag);
+ if (flag) {
+ if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
+ ADIOI_Info_set(info, "romio_cb_alltoall", value);
+ fd->hints->cb_read = ADIOI_HINT_ENABLE;
+ }
+ else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
+ ADIOI_Info_set(info, "romio_cb_alltoall", value);
+ fd->hints->cb_read = ADIOI_HINT_DISABLE;
+ }
+ else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
+ {
+ ADIOI_Info_set(info, "romio_cb_alltoall", value);
+ fd->hints->cb_read = ADIOI_HINT_AUTO;
+ }
+
+ tmp_val = fd->hints->cb_alltoall;
+
+ MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
+ /* --BEGIN ERROR HANDLING-- */
+ if (tmp_val != fd->hints->cb_alltoall) {
+ MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+ "romio_cb_alltoall",
+ error_code);
+ return;
+ }
+ /* --END ERROR HANDLING-- */
+ }
+#endif
/* new hints for enabling/disabling coll. buffering on
* reads/writes
*/
- MPI_Info_get(users_info, "romio_cb_read", MPI_MAX_INFO_VAL, value, &flag);
+ ADIOI_Info_get(users_info, "romio_cb_read", MPI_MAX_INFO_VAL, value,
+ &flag);
if (flag) {
if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
- MPI_Info_set(info, "romio_cb_read", value);
+ ADIOI_Info_set(info, "romio_cb_read", value);
fd->hints->cb_read = ADIOI_HINT_ENABLE;
}
else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
/* romio_cb_read overrides no_indep_rw */
- MPI_Info_set(info, "romio_cb_read", value);
- MPI_Info_set(info, "romio_no_indep_rw", "false");
+ ADIOI_Info_set(info, "romio_cb_read", value);
+ ADIOI_Info_set(info, "romio_no_indep_rw", "false");
fd->hints->cb_read = ADIOI_HINT_DISABLE;
fd->hints->no_indep_rw = ADIOI_HINT_DISABLE;
}
else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
{
- MPI_Info_set(info, "romio_cb_read", value);
+ ADIOI_Info_set(info, "romio_cb_read", value);
fd->hints->cb_read = ADIOI_HINT_AUTO;
}
@@ -174,24 +279,25 @@ void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
}
/* --END ERROR HANDLING-- */
}
- MPI_Info_get(users_info, "romio_cb_write", MPI_MAX_INFO_VAL, value, &flag);
+ ADIOI_Info_get(users_info, "romio_cb_write", MPI_MAX_INFO_VAL, value,
+ &flag);
if (flag) {
if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
- MPI_Info_set(info, "romio_cb_write", value);
+ ADIOI_Info_set(info, "romio_cb_write", value);
fd->hints->cb_write = ADIOI_HINT_ENABLE;
}
else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE"))
{
/* romio_cb_write overrides no_indep_rw, too */
- MPI_Info_set(info, "romio_cb_write", value);
- MPI_Info_set(info, "romio_no_indep_rw", "false");
+ ADIOI_Info_set(info, "romio_cb_write", value);
+ ADIOI_Info_set(info, "romio_no_indep_rw", "false");
fd->hints->cb_write = ADIOI_HINT_DISABLE;
fd->hints->no_indep_rw = ADIOI_HINT_DISABLE;
}
else if (!strcmp(value, "automatic") ||
!strcmp(value, "AUTOMATIC"))
{
- MPI_Info_set(info, "romio_cb_write", value);
+ ADIOI_Info_set(info, "romio_cb_write", value);
fd->hints->cb_write = ADIOI_HINT_AUTO;
}
@@ -208,23 +314,81 @@ void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
/* --END ERROR HANDLING-- */
}
+#if 0
+ /* bgl is not implementing file realms (ADIOI_IOStridedColl) ... */
+ /* enable/disable persistent file realms for collective I/O */
+ /* may want to check for no_indep_rdwr hint as well */
+ ADIOI_Info_get(users_info, "romio_cb_pfr", MPI_MAX_INFO_VAL, value,
+ &flag);
+ if (flag) {
+ if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
+ ADIOI_Info_set(info, "romio_cb_pfr", value);
+ fd->hints->cb_pfr = ADIOI_HINT_ENABLE;
+ }
+ else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
+ ADIOI_Info_set(info, "romio_cb_pfr", value);
+ fd->hints->cb_pfr = ADIOI_HINT_DISABLE;
+ }
+ else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
+ {
+ ADIOI_Info_set(info, "romio_cb_pfr", value);
+ fd->hints->cb_pfr = ADIOI_HINT_AUTO;
+ }
+
+ tmp_val = fd->hints->cb_pfr;
+
+ MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
+ /* --BEGIN ERROR HANDLING-- */
+ if (tmp_val != fd->hints->cb_pfr) {
+ MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+ "romio_cb_pfr",
+ error_code);
+ return;
+ }
+ /* --END ERROR HANDLING-- */
+ }
+
+ /* file realm assignment types ADIOI_FR_AAR(0),
+ ADIOI_FR_FSZ(-1), ADIOI_FR_USR_REALMS(-2), all others specify
+ a regular fr size in bytes. probably not the best way... */
+ ADIOI_Info_get(users_info, "romio_cb_fr_type", MPI_MAX_INFO_VAL,
+ value, &flag);
+ if (flag && ((intval=atoi(value)) >= -2)) {
+ tmp_val = intval;
+
+ MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
+ /* --BEGIN ERROR HANDLING-- */
+ if (tmp_val != intval) {
+ MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+ "romio_cb_fr_type",
+ error_code);
+ return;
+ }
+ /* --END ERROR HANDLING-- */
+
+ ADIOI_Info_set(info, "romio_cb_fr_type", value);
+ fd->hints->cb_fr_type = intval;
+
+ }
+#endif
/* new hint for specifying no indep. read/write will be performed */
- MPI_Info_get(users_info, "romio_no_indep_rw", MPI_MAX_INFO_VAL, value, &flag);
+ ADIOI_Info_get(users_info, "romio_no_indep_rw", MPI_MAX_INFO_VAL, value,
+ &flag);
if (flag) {
if (!strcmp(value, "true") || !strcmp(value, "TRUE")) {
/* if 'no_indep_rw' set, also hint that we will do
* collective buffering: if we aren't doing independent io,
* then we have to do collective */
- MPI_Info_set(info, "romio_no_indep_rw", value);
- MPI_Info_set(info, "romio_cb_write", "enable");
- MPI_Info_set(info, "romio_cb_read", "enable");
+ ADIOI_Info_set(info, "romio_no_indep_rw", value);
+ ADIOI_Info_set(info, "romio_cb_write", "enable");
+ ADIOI_Info_set(info, "romio_cb_read", "enable");
fd->hints->no_indep_rw = 1;
fd->hints->cb_read = 1;
fd->hints->cb_write = 1;
tmp_val = 1;
}
else if (!strcmp(value, "false") || !strcmp(value, "FALSE")) {
- MPI_Info_set(info, "romio_no_indep_rw", value);
+ ADIOI_Info_set(info, "romio_no_indep_rw", value);
fd->hints->no_indep_rw = 0;
tmp_val = 0;
}
@@ -246,64 +410,80 @@ void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
/* new hints for enabling/disabling data sieving on
* reads/writes
*/
- MPI_Info_get(users_info, "romio_ds_read", MPI_MAX_INFO_VAL, value,
+ ADIOI_Info_get(users_info, "romio_ds_read", MPI_MAX_INFO_VAL, value,
&flag);
if (flag) {
if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
- MPI_Info_set(info, "romio_ds_read", value);
+ ADIOI_Info_set(info, "romio_ds_read", value);
fd->hints->ds_read = ADIOI_HINT_ENABLE;
}
else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
- MPI_Info_set(info, "romio_ds_read", value);
+ ADIOI_Info_set(info, "romio_ds_read", value);
fd->hints->ds_read = ADIOI_HINT_DISABLE;
}
else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
{
- MPI_Info_set(info, "romio_ds_read", value);
+ ADIOI_Info_set(info, "romio_ds_read", value);
fd->hints->ds_read = ADIOI_HINT_AUTO;
}
/* otherwise ignore */
}
- MPI_Info_get(users_info, "romio_ds_write", MPI_MAX_INFO_VAL, value,
+ ADIOI_Info_get(users_info, "romio_ds_write", MPI_MAX_INFO_VAL, value,
&flag);
if (flag) {
if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
- MPI_Info_set(info, "romio_ds_write", value);
+ ADIOI_Info_set(info, "romio_ds_write", value);
fd->hints->ds_write = ADIOI_HINT_ENABLE;
}
else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
- MPI_Info_set(info, "romio_ds_write", value);
+ ADIOI_Info_set(info, "romio_ds_write", value);
fd->hints->ds_write = ADIOI_HINT_DISABLE;
}
else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
{
- MPI_Info_set(info, "romio_ds_write", value);
+ ADIOI_Info_set(info, "romio_ds_write", value);
fd->hints->ds_write = ADIOI_HINT_AUTO;
}
/* otherwise ignore */
}
- MPI_Info_get(users_info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && ((intval = atoi(value)) > 0)) {
- MPI_Info_set(info, "ind_wr_buffer_size", value);
+ ADIOI_Info_set(info, "ind_wr_buffer_size", value);
fd->hints->ind_wr_buffer_size = intval;
}
- MPI_Info_get(users_info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && ((intval = atoi(value)) > 0)) {
- MPI_Info_set(info, "ind_rd_buffer_size", value);
+ ADIOI_Info_set(info, "ind_rd_buffer_size", value);
fd->hints->ind_rd_buffer_size = intval;
}
memset( value, 0, MPI_MAX_INFO_VAL+1 );
- MPI_Info_get(users_info, ADIOI_BGL_NAGG_IN_PSET_HINT_NAME, MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "romio_min_fdomain_size", MPI_MAX_INFO_VAL,
+ value, &flag);
+ if ( flag && ((intval = atoi(value)) > 0) ) {
+ ADIOI_Info_set(info, "romio_min_fdomain_size", value);
+ fd->hints->min_fdomain_size = intval;
+ }
+ /* Now we use striping unit in common code so we should
+ process hints for it. */
+ ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
+ value, &flag);
+ if ( flag && ((intval = atoi(value)) > 0) ) {
+ ADIOI_Info_set(info, "striping_unit", value);
+ fd->hints->striping_unit = intval;
+ }
+
+ memset( value, 0, MPI_MAX_INFO_VAL+1 );
+ ADIOI_Info_get(users_info, ADIOI_BGL_NAGG_IN_PSET_HINT_NAME, MPI_MAX_INFO_VAL,
value, &flag);
if (flag && ((intval = atoi(value)) > 0)) {
did_anything = 1;
- MPI_Info_set(info, ADIOI_BGL_NAGG_IN_PSET_HINT_NAME, value);
+ ADIOI_Info_set(info, ADIOI_BGL_NAGG_IN_PSET_HINT_NAME, value);
fd->hints->cb_nodes = intval;
}
}
@@ -312,24 +492,30 @@ void ADIOI_BGL_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
if (did_anything) {
ADIOI_BGL_gen_agg_ranklist(fd, fd->hints->cb_nodes);
}
+ /* ignore defered open hints and do not enable it for bluegene: need all
+ * processors in the open path so we can stat-and-broadcast the blocksize
+ */
+ ADIOI_Info_set(info, "romio_no_indep_rw", "false");
+ fd->hints->no_indep_rw = 0;
+ fd->hints->deferred_open = 0;
- /* deferred_open won't be set by callers, but if the user doesn't
- * explicitly disable collecitve buffering (two-phase) and does hint that
- * io w/o independent io is going on, we'll set this internal hint as a
- * convenience */
- if ( ( (fd->hints->cb_read != ADIOI_HINT_DISABLE)
- && (fd->hints->cb_write != ADIOI_HINT_DISABLE)
- && fd->hints->no_indep_rw ) )
- {
- fd->hints->deferred_open = 1;
- } else {
- /* setting romio_no_indep_rw enable and romio_cb_{read,write}
- * disable at the same time doesn't make sense. honor
- * romio_cb_{read,write} and force the no_indep_rw hint to
- * 'disable' */
- MPI_Info_set(info, "romio_no_indep_rw", "false");
- fd->hints->no_indep_rw = 0;
- fd->hints->deferred_open = 0;
+ /* BobC commented this out, but since hint processing runs on both bgl and
+ * bglockless, we need to keep DS writes enabled on gpfs and disabled on
+ * PVFS */
+ if (ADIO_Feature(fd, ADIO_DATA_SIEVING_WRITES) == 0) {
+ /* disable data sieving for fs that do not
+ support file locking */
+ ADIOI_Info_get(info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL,
+ value, &flag);
+ if (flag) {
+ /* get rid of this value if it is set */
+ ADIOI_Info_delete(info, "ind_wr_buffer_size");
+ }
+ /* note: leave ind_wr_buffer_size alone; used for other cases
+ * as well. -- Rob Ross, 04/22/2003
+ */
+ ADIOI_Info_set(info, "romio_ds_write", "disable");
+ fd->hints->ds_write = ADIOI_HINT_DISABLE;
}
ADIOI_Free(value);
diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_open.c b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_open.c
index 3c7119291c..eeb40fbc0a 100644
--- a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_open.c
+++ b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_open.c
@@ -15,6 +15,181 @@
#include "ad_bgl.h"
#include "ad_bgl_aggrs.h"
+#include
+#include
+
+/* COPIED FROM ad_fstype.c since it is static in that file
+
+ ADIO_FileSysType_parentdir - determines a string pathname for the
+ parent directory of a given filename.
+
+Input Parameters:
+. filename - pointer to file name character array
+
+Output Parameters:
+. dirnamep - pointer to location in which to store a pointer to a string
+
+ Note that the caller should free the memory located at the pointer returned
+ after the string is no longer needed.
+*/
+
+#ifndef PATH_MAX
+#define PATH_MAX 65535
+#endif
+
+/* In a strict ANSI environment, S_ISLNK may not be defined. Fix that
+ here. We assume that S_ISLNK is *always* defined as a macro. If
+ that is not universally true, then add a test to the romio
+ configure that trys to link a program that references S_ISLNK */
+#if !defined(S_ISLNK)
+# if defined(S_IFLNK)
+ /* Check for the link bit */
+# define S_ISLNK(mode) ((mode) & S_IFLNK)
+# else
+ /* no way to check if it is a link, so say false */
+# define S_ISLNK(mode) 0
+# endif
+#endif /* !(S_ISLNK) */
+
+/* ADIO_FileSysType_parentdir
+ *
+ * Returns pointer to string in dirnamep; that string is allocated with
+ * strdup and must be free()'d.
+ */
+static void ADIO_FileSysType_parentdir(char *filename, char **dirnamep)
+{
+ int err;
+ char *dir = NULL, *slash;
+ struct stat statbuf;
+
+ err = lstat(filename, &statbuf);
+
+ if (err || (!S_ISLNK(statbuf.st_mode))) {
+ /* no such file, or file is not a link; these are the "normal"
+ * cases where we can just return the parent directory.
+ */
+ dir = ADIOI_Strdup(filename);
+ }
+ else {
+ /* filename is a symlink. we've presumably already tried
+ * to stat it and found it to be missing (dangling link),
+ * but this code doesn't care if the target is really there
+ * or not.
+ */
+ int namelen;
+ char *linkbuf;
+
+ linkbuf = ADIOI_Malloc(PATH_MAX+1);
+ namelen = readlink(filename, linkbuf, PATH_MAX+1);
+ if (namelen == -1) {
+ /* something strange has happened between the time that
+ * we determined that this was a link and the time that
+ * we attempted to read it; punt and use the old name.
+ */
+ dir = ADIOI_Strdup(filename);
+ }
+ else {
+ /* successfully read the link */
+ linkbuf[namelen] = '\0'; /* readlink doesn't null terminate */
+ dir = ADIOI_Strdup(linkbuf);
+ ADIOI_Free(linkbuf);
+ }
+ }
+
+ slash = strrchr(dir, '/');
+ if (!slash) ADIOI_Strncpy(dir, ".", 2);
+ else {
+ if (slash == dir) *(dir + 1) = '\0';
+ else *slash = '\0';
+ }
+
+ *dirnamep = dir;
+ return;
+}
+
+static void scaleable_stat(ADIO_File fd)
+{
+ struct stat64 bgl_stat;
+ struct statfs bgl_statfs;
+ int rank, rc;
+ char * dir;
+ long buf[2];
+ MPI_Comm_rank(fd->comm, &rank);
+
+ if (rank == 0) {
+ /* Get the (real) underlying file system block size */
+ rc = stat64(fd->filename, &bgl_stat);
+ if (rc >= 0)
+ {
+ buf[0] = bgl_stat.st_blksize;
+ DBGV_FPRINTF(stderr,"Successful stat '%s'. Blocksize=%ld\n",
+ fd->filename,bgl_stat.st_blksize);
+ }
+ else
+ {
+ DBGV_FPRINTF(stderr,"Stat '%s' failed with rc=%d, errno=%d\n",
+ fd->filename,rc,errno);
+ }
+ /* Get the (real) underlying file system type so we can
+ * plan our fsync scaling strategy */
+ rc = statfs(fd->filename,&bgl_statfs);
+ if (rc >= 0)
+ {
+ DBGV_FPRINTF(stderr,"Successful statfs '%s'. Magic number=%#X\n",
+ fd->filename,bgl_statfs.f_type);
+ buf[1] = bgl_statfs.f_type;
+ }
+ else
+ {
+ DBGV_FPRINTF(stderr,"Statfs '%s' failed with rc=%d, errno=%d\n",
+ fd->filename,rc,errno);
+ ADIO_FileSysType_parentdir(fd->filename, &dir);
+ rc = statfs(dir,&bgl_statfs);
+ if (rc >= 0)
+ {
+ DBGV_FPRINTF(stderr,"Successful statfs '%s'. Magic number=%#X\n",dir,bgl_statfs.f_type);
+ buf[1] = bgl_statfs.f_type;
+ }
+ else
+ {
+ /* Hmm. Guess we'll assume the worst-case, that it's not GPFS
+ * or PVFS2 below */
+ buf[1] = -1; /* bogus magic number */
+ DBGV_FPRINTF(stderr,"Statfs '%s' failed with rc=%d, errno=%d\n",dir,rc,errno);
+ }
+ free(dir);
+ }
+ }
+ /* now we can broadcast the stat/statfs data to everyone else */
+ MPI_Bcast(buf, 2, MPI_LONG, 0, fd->comm);
+ bgl_stat.st_blksize = buf[0];
+ bgl_statfs.f_type = buf[1];
+
+ /* data from stat64 */
+ /* store the blksize in the file system specific storage */
+ ((ADIOI_BGL_fs*)fd->fs_ptr)->blksize = bgl_stat.st_blksize;
+
+ /* data from statfs */
+ if ((bgl_statfs.f_type == GPFS_SUPER_MAGIC) ||
+ (bgl_statfs.f_type == PVFS2_SUPER_MAGIC))
+ {
+ ((ADIOI_BGL_fs*)fd->fs_ptr)->fsync_aggr =
+ ADIOI_BGL_FSYNC_AGGREGATION_ENABLED;
+
+ /* Only one rank is an "fsync aggregator" because only one
+ * fsync is needed */
+ if (rank == 0)
+ {
+ ((ADIOI_BGL_fs*)fd->fs_ptr)->fsync_aggr |=
+ ADIOI_BGL_FSYNC_AGGREGATOR;
+ DBG_FPRINTF(stderr,"fsync aggregator %d\n",rank);
+ }
+ else ; /* aggregation enabled but this rank is not an aggregator*/
+ }
+ else; /* Other filesystems default to no fsync aggregation */
+}
+
+
void ADIOI_BGL_Open(ADIO_File fd, int *error_code)
{
int perm, old_mask, amode;
@@ -41,8 +216,14 @@ void ADIOI_BGL_Open(ADIO_File fd, int *error_code)
amode = amode | O_RDWR;
if (fd->access_mode & ADIO_EXCL)
amode = amode | O_EXCL;
-
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event(ADIOI_MPE_open_a, 0, NULL);
+#endif
fd->fd_sys = open(fd->filename, amode, perm);
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event(ADIOI_MPE_open_b, 0, NULL);
+#endif
+ DBG_FPRINTF(stderr,"open('%s',%#X,%#X) rc=%d, errno=%d\n",fd->filename,amode,perm,fd->fd_sys,errno);
fd->fd_direct = -1;
if ((fd->fd_sys != -1) && (fd->access_mode & ADIO_APPEND))
@@ -50,18 +231,29 @@ void ADIOI_BGL_Open(ADIO_File fd, int *error_code)
if(fd->fd_sys != -1)
{
- struct stat64 bgl_stat;
- int rc = stat64(fd->filename,&bgl_stat);
- if (rc >= 0)
- {
- /* store the blksize in the file system specific storage */
+ struct stat64 bgl_stat;
+ struct statfs bgl_statfs;
+ char* dir;
+ int rc;
+
+ /* Initialize the ad_bgl file system specific information */
AD_BGL_assert(fd->fs_ptr == NULL);
fd->fs_ptr = (ADIOI_BGL_fs*) ADIOI_Malloc(sizeof(ADIOI_BGL_fs));
- ((ADIOI_BGL_fs*)fd->fs_ptr)->blksize = bgl_stat.st_blksize;
-/* FPRINTF(stderr,"%s(%d):Successful stat '%s'. Blocksize=%ld\n",myname,__LINE__,fd->filename,bgl_stat.st_blksize);*/
- }
-/* else
- FPRINTF(stderr,"%s(%d):Stat '%s' failed with rc=%d, errno=%d\n",myname,__LINE__,fd->filename,rc,errno);*/
+
+ ((ADIOI_BGL_fs*)fd->fs_ptr)->blksize = 1048576; /* default to 1M */
+
+ /* default is no fsync aggregation */
+ ((ADIOI_BGL_fs*)fd->fs_ptr)->fsync_aggr =
+ ADIOI_BGL_FSYNC_AGGREGATION_DISABLED;
+
+
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event(ADIOI_MPE_stat_a, 0, NULL);
+#endif
+ scaleable_stat(fd);
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event(ADIOI_MPE_stat_b, 0, NULL);
+#endif
}
if (fd->fd_sys == -1) {
@@ -112,3 +304,6 @@ void ADIOI_BGL_Open(ADIO_File fd, int *error_code)
}
else *error_code = MPI_SUCCESS;
}
+/*
+ *vim: ts=8 sts=4 sw=4 noexpandtab
+ */
diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_rdcoll.c b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_rdcoll.c
index 0c441ab518..7f8e029d50 100644
--- a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_rdcoll.c
+++ b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_rdcoll.c
@@ -8,6 +8,7 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
+ *
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
@@ -22,18 +23,25 @@
#include "mpe.h"
#endif
+#ifdef USE_DBG_LOGGING
+ #define RDCOLL_DEBUG 1
+#endif
+#ifdef AGGREGATION_PROFILE
+#include "mpe.h"
+#endif
+
/* prototypes of functions used for collective reads only. */
static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype
datatype, int nprocs,
int myrank, ADIOI_Access
*others_req, ADIO_Offset *offset_list,
- int *len_list, int contig_access_count,
+ ADIO_Offset *len_list, int contig_access_count,
ADIO_Offset
min_st_offset, ADIO_Offset fd_size,
ADIO_Offset *fd_start, ADIO_Offset *fd_end,
int *buf_idx, int *error_code);
static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
- *flat_buf, ADIO_Offset *offset_list, int
+ *flat_buf, ADIO_Offset *offset_list, ADIO_Offset
*len_list, int *send_size, int *recv_size,
int *count, int *start_pos,
int *partial_send,
@@ -47,7 +55,7 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
int iter,
MPI_Aint buftype_extent, int *buf_idx);
static void ADIOI_R_Exchange_data_alltoallv(ADIO_File fd, void *buf, ADIOI_Flatlist_node
- *flat_buf, ADIO_Offset *offset_list, int
+ *flat_buf, ADIO_Offset *offset_list, ADIO_Offset
*len_list, int *send_size, int *recv_size,
int *count, int *start_pos,
int *partial_send,
@@ -62,8 +70,8 @@ static void ADIOI_R_Exchange_data_alltoallv(ADIO_File fd, void *buf, ADIOI_Flatl
MPI_Aint buftype_extent, int *buf_idx);
static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
*flat_buf, char **recv_buf, ADIO_Offset
- *offset_list, int *len_list,
- int *recv_size,
+ *offset_list, ADIO_Offset *len_list,
+ unsigned *recv_size,
MPI_Request *requests, MPI_Status *statuses,
int *recd_from_proc, int nprocs,
int contig_access_count,
@@ -74,7 +82,7 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
extern void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype
datatype, int file_ptr_type, ADIO_Offset
- offset, ADIO_Offset **offset_list_ptr, int
+ offset, ADIO_Offset **offset_list_ptr, ADIO_Offset
**len_list_ptr, ADIO_Offset *start_offset_ptr,
ADIO_Offset *end_offset_ptr, int
*contig_access_count_ptr);
@@ -99,25 +107,15 @@ void ADIOI_BGL_ReadStridedColl(ADIO_File fd, void *buf, int count,
whose request lies in this process's file domain. */
int i, filetype_is_contig, nprocs, nprocs_for_coll, myrank;
- int contig_access_count, interleave_count = 0, buftype_is_contig;
+ int contig_access_count=0, interleave_count = 0, buftype_is_contig;
int *count_my_req_per_proc, count_my_req_procs, count_others_req_procs;
ADIO_Offset start_offset, end_offset, orig_fp, fd_size, min_st_offset, off;
ADIO_Offset *offset_list = NULL, *st_offsets = NULL, *fd_start = NULL,
*fd_end = NULL, *end_offsets = NULL;
ADIO_Offset *bgl_offsets0 = NULL, *bgl_offsets = NULL;
int ii;
- int *len_list = NULL, *buf_idx = NULL;
-
- double io_time = 0., all_time, max_all_time;
- double tstep1, max_tstep1;
- double tstep1_1, max_tstep1_1;
- double tstep1_2, max_tstep1_2;
- double tstep1_3, max_tstep1_3;
- double tstep2, max_tstep2;
- double tstep3, max_tstep3;
- double tstep4, max_tstep4;
- double sum_sz;
-
+ ADIO_Offset *len_list = NULL;
+ int *buf_idx = NULL;
#if BGL_PROFILE
BGLMPIO_T_CIO_RESET( 0, r )
#endif
@@ -126,6 +124,14 @@ void ADIOI_BGL_ReadStridedColl(ADIO_File fd, void *buf, int count,
int bufsize, size;
#endif
+#if 0
+/* From common code - not implemented for bgl. */
+ if (fd->hints->cb_pfr != ADIOI_HINT_DISABLE) {
+ ADIOI_IOStridedColl (fd, buf, count, ADIOI_READ, datatype,
+ file_ptr_type, offset, status, error_code);
+ return;
+ } */
+#endif
#ifdef PROFILE
MPE_Log_event(13, 0, "start computation");
#endif
@@ -157,14 +163,16 @@ void ADIOI_BGL_ReadStridedColl(ADIO_File fd, void *buf, int count,
BGLMPIO_T_CIO_SET_GET( 0, r, 1, 1, 1, BGLMPIO_CIO_GATHER, BGLMPIO_CIO_LCOMP )
#endif
- /* for (i=0; idisp + (fd->etype_size) * offset;
+ off = fd->disp + (ADIO_Offset)(fd->etype_size) * offset;
ADIO_ReadContig(fd, buf, count, datatype, ADIO_EXPLICIT_OFFSET,
off, status, error_code);
}
@@ -263,7 +273,9 @@ void ADIOI_BGL_ReadStridedColl(ADIO_File fd, void *buf, int count,
else
ADIOI_Calc_file_domains(st_offsets, end_offsets, nprocs,
nprocs_for_coll, &min_st_offset,
- &fd_start, &fd_end, &fd_size);
+ &fd_start, &fd_end,
+ fd->hints->min_fdomain_size, &fd_size,
+ fd->hints->striping_unit);
#if BGL_PROFILE
BGLMPIO_T_CIO_SET_GET( 0, r, 0, 1, 1, BGLMPIO_CIO_MYREQ, BGLMPIO_CIO_FD_PART )
@@ -381,205 +393,11 @@ void ADIOI_BGL_ReadStridedColl(ADIO_File fd, void *buf, int count,
fd->fp_sys_posn = -1; /* set it to null. */
}
-#if 0
-void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype
- datatype, int file_ptr_type, ADIO_Offset
- offset, ADIO_Offset **offset_list_ptr, int
- **len_list_ptr, ADIO_Offset *start_offset_ptr,
- ADIO_Offset *end_offset_ptr, int
- *contig_access_count_ptr)
-{
- int filetype_size, buftype_size, etype_size;
- int i, j, k, frd_size=0, old_frd_size=0, st_index=0;
- int n_filetypes, etype_in_filetype;
- ADIO_Offset abs_off_in_filetype=0;
- int bufsize, sum, n_etypes_in_filetype, size_in_filetype;
- int contig_access_count, *len_list, flag, filetype_is_contig;
- MPI_Aint filetype_extent, filetype_lb;
- ADIOI_Flatlist_node *flat_file;
- ADIO_Offset *offset_list, off, end_offset=0, disp;
-
-/* For this process's request, calculate the list of offsets and
- lengths in the file and determine the start and end offsets. */
-
- ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
-
- MPI_Type_size(fd->filetype, &filetype_size);
- MPI_Type_extent(fd->filetype, &filetype_extent);
- MPI_Type_lb(fd->filetype, &filetype_lb);
- MPI_Type_size(datatype, &buftype_size);
- etype_size = fd->etype_size;
-
- if ( ! filetype_size ) {
- *contig_access_count_ptr = 0;
- *offset_list_ptr = (ADIO_Offset *) ADIOI_Malloc(2*sizeof(ADIO_Offset));
- *len_list_ptr = (int *) ADIOI_Malloc(2*sizeof(int));
- /* 2 is for consistency. everywhere I malloc one more than needed */
-
- offset_list = *offset_list_ptr;
- len_list = *len_list_ptr;
- offset_list[0] = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
- fd->disp + etype_size * offset;
- len_list[0] = 0;
- *start_offset_ptr = offset_list[0];
- *end_offset_ptr = offset_list[0] + len_list[0] - 1;
-
- return;
- }
-
- if (filetype_is_contig) {
- *contig_access_count_ptr = 1;
- *offset_list_ptr = (ADIO_Offset *) ADIOI_Malloc(2*sizeof(ADIO_Offset));
- *len_list_ptr = (int *) ADIOI_Malloc(2*sizeof(int));
- /* 2 is for consistency. everywhere I malloc one more than needed */
-
- offset_list = *offset_list_ptr;
- len_list = *len_list_ptr;
- offset_list[0] = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
- fd->disp + etype_size * offset;
- len_list[0] = bufcount * buftype_size;
- *start_offset_ptr = offset_list[0];
- *end_offset_ptr = offset_list[0] + len_list[0] - 1;
-
- /* update file pointer */
- if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = *end_offset_ptr + 1;
- }
-
- else {
-
- /* First calculate what size of offset_list and len_list to allocate */
-
- /* filetype already flattened in ADIO_Open or ADIO_Fcntl */
- flat_file = ADIOI_Flatlist;
- while (flat_file->type != fd->filetype) flat_file = flat_file->next;
- disp = fd->disp;
-
- if (file_ptr_type == ADIO_INDIVIDUAL) {
- offset = fd->fp_ind; /* in bytes */
- n_filetypes = -1;
- flag = 0;
- while (!flag) {
- n_filetypes++;
- for (i=0; icount; i++) {
- if (disp + flat_file->indices[i] +
- (ADIO_Offset) n_filetypes*filetype_extent +
- flat_file->blocklens[i] >= offset)
- {
- st_index = i;
- frd_size = (int) (disp + flat_file->indices[i] +
- (ADIO_Offset) n_filetypes*filetype_extent
- + flat_file->blocklens[i] - offset);
- flag = 1;
- break;
- }
- }
- }
- }
- else {
- n_etypes_in_filetype = filetype_size/etype_size;
- n_filetypes = (int) (offset / n_etypes_in_filetype);
- etype_in_filetype = (int) (offset % n_etypes_in_filetype);
- size_in_filetype = etype_in_filetype * etype_size;
-
- sum = 0;
- for (i=0; icount; i++) {
- sum += flat_file->blocklens[i];
- if (sum > size_in_filetype) {
- st_index = i;
- frd_size = sum - size_in_filetype;
- abs_off_in_filetype = flat_file->indices[i] +
- size_in_filetype - (sum - flat_file->blocklens[i]);
- break;
- }
- }
-
- /* abs. offset in bytes in the file */
- offset = disp + (ADIO_Offset) n_filetypes*filetype_extent +
- abs_off_in_filetype;
- }
-
- /* calculate how much space to allocate for offset_list, len_list */
-
- old_frd_size = frd_size;
- contig_access_count = i = 0;
- j = st_index;
- bufsize = buftype_size * bufcount;
- frd_size = ADIOI_MIN(frd_size, bufsize);
- while (i < bufsize) {
- if (frd_size) contig_access_count++;
- i += frd_size;
- j = (j + 1) % flat_file->count;
- frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
- }
-
- /* allocate space for offset_list and len_list */
-
- *offset_list_ptr = (ADIO_Offset *)
- ADIOI_Malloc((contig_access_count+1)*sizeof(ADIO_Offset));
- *len_list_ptr = (int *) ADIOI_Malloc((contig_access_count+1)*sizeof(int));
- /* +1 to avoid a 0-size malloc */
-
- offset_list = *offset_list_ptr;
- len_list = *len_list_ptr;
-
- /* find start offset, end offset, and fill in offset_list and len_list */
-
- *start_offset_ptr = offset; /* calculated above */
-
- i = k = 0;
- j = st_index;
- off = offset;
- frd_size = ADIOI_MIN(old_frd_size, bufsize);
- while (i < bufsize) {
- if (frd_size) {
- offset_list[k] = off;
- len_list[k] = frd_size;
- k++;
- }
- i += frd_size;
- end_offset = off + frd_size - 1;
-
- /* Note: end_offset points to the last byte-offset that will be accessed.
- e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/
-
- if (off + frd_size < disp + flat_file->indices[j] +
- flat_file->blocklens[j] +
- (ADIO_Offset) n_filetypes*filetype_extent)
- {
- off += frd_size;
- /* did not reach end of contiguous block in filetype.
- * no more I/O needed. off is incremented by frd_size.
- */
- }
- else {
- if (j < (flat_file->count - 1)) j++;
- else {
- /* hit end of flattened filetype;
- * start at beginning again
- */
- j = 0;
- n_filetypes++;
- }
- off = disp + flat_file->indices[j] +
- (ADIO_Offset) n_filetypes*filetype_extent;
- frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
- }
- }
-
- /* update file pointer */
- if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
-
- *contig_access_count_ptr = contig_access_count;
- *end_offset_ptr = end_offset;
- }
-}
-#endif
-
static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype
datatype, int nprocs,
int myrank, ADIOI_Access
*others_req, ADIO_Offset *offset_list,
- int *len_list, int contig_access_count, ADIO_Offset
+ ADIO_Offset *len_list, int contig_access_count, ADIO_Offset
min_st_offset, ADIO_Offset fd_size,
ADIO_Offset *fd_start, ADIO_Offset *fd_end,
int *buf_idx, int *error_code)
@@ -594,19 +412,21 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype
array from a file, where each local array is 8Mbytes, requiring
at least another 8Mbytes of temp space is unacceptable. */
- int i, j, m, size, ntimes, max_ntimes, buftype_is_contig;
+ int i, j, m, ntimes, max_ntimes, buftype_is_contig;
ADIO_Offset st_loc=-1, end_loc=-1, off, done, real_off, req_off;
char *read_buf = NULL, *tmp_buf;
int *curr_offlen_ptr, *count, *send_size, *recv_size;
- int *partial_send, *recd_from_proc, *start_pos, for_next_iter;
- int real_size, req_len, flag, for_curr_iter, rank;
+ int *partial_send, *recd_from_proc, *start_pos;
+ /* Not convinced end_loc-st_loc couldn't be > int, so make these offsets*/
+ ADIO_Offset real_size, size, for_curr_iter, for_next_iter;
+ int req_len, flag, rank;
MPI_Status status;
ADIOI_Flatlist_node *flat_buf=NULL;
MPI_Aint buftype_extent;
int coll_bufsize;
-
+#ifdef RDCOLL_DEBUG
int iii;
-
+#endif
*error_code = MPI_SUCCESS; /* changed below if error */
/* only I/O errors are currently reported */
@@ -738,7 +558,7 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype
#ifdef PROFILE
MPE_Log_event(13, 0, "start computation");
#endif
- size = (int) (ADIOI_MIN(coll_bufsize, end_loc-st_loc+1-done));
+ size = ADIOI_MIN((unsigned)coll_bufsize, end_loc-st_loc+1-done);
real_off = off - for_curr_iter;
real_size = size + for_curr_iter;
@@ -746,7 +566,9 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype
for_next_iter = 0;
for (i=0; icomm, requests+j);
j++;
@@ -960,8 +789,10 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
MPI_Irecv(recv_buf[i], recv_size[i], MPI_BYTE, i,
myrank+i+100*iter, fd->comm, requests+j);
j++;
- /* FPRINTF(stderr, "node %d, recv_size %d, tag %d \n",
- myrank, recv_size[i], myrank+i+100*iter); */
+#ifdef RDCOLL_DEBUG
+ DBG_FPRINTF(stderr, "node %d, recv_size %d, tag %d \n",
+ myrank, recv_size[i], myrank+i+100*iter);
+#endif
}
}
@@ -1006,7 +837,7 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
/* if noncontiguous, to the copies from the recv buffers */
if (!buftype_is_contig)
ADIOI_Fill_user_buffer(fd, buf, flat_buf, recv_buf,
- offset_list, len_list, recv_size,
+ offset_list, len_list, (unsigned*)recv_size,
requests, statuses, recd_from_proc,
nprocs, contig_access_count,
min_st_offset, fd_size, fd_start, fd_end,
@@ -1024,9 +855,11 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
if (recv_size[i]) ADIOI_Free(recv_buf[i]);
ADIOI_Free(recv_buf);
}
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5033, 0, NULL);
+#endif
}
-
#define ADIOI_BUF_INCR \
{ \
while (buf_incr) { \
@@ -1040,7 +873,7 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
n_buftypes++; \
} \
user_buf_idx = flat_buf->indices[flat_buf_idx] + \
- n_buftypes*buftype_extent; \
+ (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \
flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \
} \
buf_incr -= size_in_buf; \
@@ -1052,9 +885,11 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
{ \
while (size) { \
size_in_buf = ADIOI_MIN(size, flat_buf_sz); \
+ ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + user_buf_idx) == (ADIO_Offset)(MPIR_Upint)(buf + user_buf_idx)); \
+ ADIOI_Assert(size_in_buf == (size_t)size_in_buf); \
memcpy(((char *) buf) + user_buf_idx, \
&(recv_buf[p][recv_buf_idx[p]]), size_in_buf); \
- recv_buf_idx[p] += size_in_buf; \
+ recv_buf_idx[p] += size_in_buf; /* already tested (size_t)size_in_buf*/ \
user_buf_idx += size_in_buf; \
flat_buf_sz -= size_in_buf; \
if (!flat_buf_sz) { \
@@ -1064,7 +899,7 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
n_buftypes++; \
} \
user_buf_idx = flat_buf->indices[flat_buf_idx] + \
- n_buftypes*buftype_extent; \
+ (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \
flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \
} \
size -= size_in_buf; \
@@ -1073,11 +908,10 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
ADIOI_BUF_INCR \
}
-
static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
*flat_buf, char **recv_buf, ADIO_Offset
- *offset_list, int *len_list,
- int *recv_size,
+ *offset_list, ADIO_Offset *len_list,
+ unsigned *recv_size,
MPI_Request *requests, MPI_Status *statuses,
int *recd_from_proc, int nprocs,
int contig_access_count,
@@ -1086,13 +920,18 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
ADIO_Offset *fd_end,
MPI_Aint buftype_extent)
{
+
/* this function is only called if buftype is not contig */
- int i, p, flat_buf_idx, size, buf_incr;
- int flat_buf_sz, size_in_buf, n_buftypes;
+ int i, p, flat_buf_idx;
+ ADIO_Offset flat_buf_sz, size_in_buf, buf_incr, size;
+ int n_buftypes;
ADIO_Offset off, len, rem_len, user_buf_idx;
+ /* Not sure unsigned is necessary, but it makes the math safer */
+ unsigned *curr_from_proc, *done_from_proc, *recv_buf_idx;
- int *curr_from_proc, *done_from_proc, *recv_buf_idx;
+ ADIOI_UNREFERENCED_ARG(requests);
+ ADIOI_UNREFERENCED_ARG(statuses);
/* curr_from_proc[p] = amount of data recd from proc. p that has already
been accounted for so far
@@ -1100,9 +939,9 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
filled into user buffer in previous iterations
user_buf_idx = current location in user buffer
recv_buf_idx[p] = current location in recv_buf of proc. p */
- curr_from_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int));
- done_from_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int));
- recv_buf_idx = (int *) ADIOI_Malloc(nprocs * sizeof(int));
+ curr_from_proc = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned));
+ done_from_proc = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned));
+ recv_buf_idx = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned));
for (i=0; i < nprocs; i++) {
recv_buf_idx[i] = curr_from_proc[i] = 0;
@@ -1120,7 +959,7 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
for (i=0; i 0) {
@@ -1140,29 +979,32 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
if (recv_buf_idx[p] < recv_size[p]) {
if (curr_from_proc[p]+len > done_from_proc[p]) {
if (done_from_proc[p] > curr_from_proc[p]) {
- size = (int)ADIOI_MIN(curr_from_proc[p] + len -
+ size = ADIOI_MIN(curr_from_proc[p] + len -
done_from_proc[p], recv_size[p]-recv_buf_idx[p]);
buf_incr = done_from_proc[p] - curr_from_proc[p];
ADIOI_BUF_INCR
- buf_incr = (int)(curr_from_proc[p]+len-done_from_proc[p]);
+ buf_incr = curr_from_proc[p]+len-done_from_proc[p];
+ ADIOI_Assert((done_from_proc[p] + size) == (unsigned)((ADIO_Offset)done_from_proc[p] + size));
curr_from_proc[p] = done_from_proc[p] + size;
ADIOI_BUF_COPY
}
else {
- size = (int)ADIOI_MIN(len,recv_size[p]-recv_buf_idx[p]);
- buf_incr = (int)len;
- curr_from_proc[p] += size;
+ size = ADIOI_MIN(len,recv_size[p]-recv_buf_idx[p]);
+ buf_incr = len;
+ ADIOI_Assert((curr_from_proc[p] + size) == (unsigned)((ADIO_Offset)curr_from_proc[p] + size));
+ curr_from_proc[p] += (unsigned) size;
ADIOI_BUF_COPY
}
}
else {
- curr_from_proc[p] += (int)len;
- buf_incr = (int)len;
+ ADIOI_Assert((curr_from_proc[p] + len) == (unsigned)((ADIO_Offset)curr_from_proc[p] + len));
+ curr_from_proc[p] += (unsigned) len;
+ buf_incr = len;
ADIOI_BUF_INCR
}
}
else {
- buf_incr = (int)len;
+ buf_incr = len;
ADIOI_BUF_INCR
}
off += len;
@@ -1179,7 +1021,7 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
static void ADIOI_R_Exchange_data_alltoallv(
ADIO_File fd, void *buf, ADIOI_Flatlist_node
- *flat_buf, ADIO_Offset *offset_list, int
+ *flat_buf, ADIO_Offset *offset_list, ADIO_Offset
*len_list, int *send_size, int *recv_size,
int *count, int *start_pos, int *partial_send,
int *recd_from_proc, int nprocs,
@@ -1192,9 +1034,8 @@ static void ADIOI_R_Exchange_data_alltoallv(
{
int i, j, k=0, tmp=0, nprocs_recv, nprocs_send;
char **recv_buf = NULL;
- MPI_Request *requests;
- MPI_Datatype send_type;
- MPI_Status *statuses;
+ MPI_Request *requests=NULL;
+ MPI_Status *statuses=NULL;
int rtail, stail;
char *sbuf_ptr, *from_ptr;
int len;
@@ -1238,7 +1079,8 @@ static void ADIOI_R_Exchange_data_alltoallv(
}
sbuf_ptr = all_send_buf + sdispls[i];
for (j=0; jcomm );
#if 0
- printf( "\tall_recv_buf = " );
- for (i=131072; i<131073; i++) { printf( "%2d,", all_recv_buf [i] ); }
- printf( "\n" );
+ DBG_FPRINTF(stderr, "\tall_recv_buf = " );
+ for (i=131072; i<131073; i++) { DBG_FPRINTF(stderr, "%2d,", all_recv_buf [i] ); }
+ DBG_FPRINTF(stderr, "\n" );
#endif
/* unpack at the receiver side */
if (nprocs_recv) {
if (!buftype_is_contig)
ADIOI_Fill_user_buffer(fd, buf, flat_buf, recv_buf,
- offset_list, len_list, recv_size,
+ offset_list, len_list, (unsigned*)recv_size,
requests, statuses, /* never used inside */
recd_from_proc,
nprocs, contig_access_count,
diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_read.c b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_read.c
index 363d8c3436..b62c8f4fcf 100644
--- a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_read.c
+++ b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_read.c
@@ -21,9 +21,9 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int *error_code)
{
- int err=-1, datatype_size, len;
+ int err=-1, datatype_size;
+ ADIO_Offset len;
static char myname[] = "ADIOI_BGL_READCONTIG";
-
#if BGL_PROFILE
/* timing */
double io_time, io_time2;
@@ -35,7 +35,8 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count,
#endif
MPI_Type_size(datatype, &datatype_size);
- len = datatype_size * count;
+ len = (ADIO_Offset)datatype_size * (ADIO_Offset)count;
+ ADIOI_Assert(len == (unsigned int) len); /* read takes an unsigned int parm */
#if BGL_PROFILE
@@ -48,7 +49,7 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count,
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len);
if (bglmpio_timing2) io_time2 = MPI_Wtime();
- err = read(fd->fd_sys, buf, len);
+ err = read(fd->fd_sys, buf, (unsigned int)len);
if (bglmpio_timing2) bglmpio_prof_cr[ BGLMPIO_CIO_T_POSI_RW ] += (MPI_Wtime() - io_time2);
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_sys_posn = offset + err;
@@ -64,7 +65,7 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count,
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len);
if (bglmpio_timing2) io_time2 = MPI_Wtime();
- err = read(fd->fd_sys, buf, len);
+ err = read(fd->fd_sys, buf, (unsigned int)len);
if (bglmpio_timing2) bglmpio_prof_cr[ BGLMPIO_CIO_T_POSI_RW ] += (MPI_Wtime() - io_time2);
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_ind += err;
@@ -79,7 +80,7 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count,
if (fd->atomicity)
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len);
- err = read(fd->fd_sys, buf, len);
+ err = read(fd->fd_sys, buf, (unsigned int)len);
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_sys_posn = offset + err;
/* individual file pointer not updated */
@@ -91,7 +92,7 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count,
if (fd->atomicity)
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
else ADIOI_READ_LOCK(fd, offset, SEEK_SET, len);
- err = read(fd->fd_sys, buf, len);
+ err = read(fd->fd_sys, buf, (unsigned int)len);
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_ind += err;
fd->fp_sys_posn = fd->fp_ind;
@@ -120,12 +121,11 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count,
}
-
#define ADIOI_BUFFERED_READ \
{ \
if (req_off >= readbuf_off + readbuf_len) { \
readbuf_off = req_off; \
- readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));\
+ readbuf_len = (unsigned) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));\
lseek(fd->fd_sys, readbuf_off, SEEK_SET);\
if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, readbuf_off, SEEK_SET, readbuf_len);\
err = read(fd->fd_sys, readbuf, readbuf_len);\
@@ -133,6 +133,7 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count,
if (err == -1) err_flag = 1; \
} \
while (req_len > readbuf_off + readbuf_len - req_off) { \
+ ADIOI_Assert((readbuf_off + readbuf_len - req_off) == (int) (readbuf_off + readbuf_len - req_off));\
partial_read = (int) (readbuf_off + readbuf_len - req_off); \
tmp_buf = (char *) ADIOI_Malloc(partial_read); \
memcpy(tmp_buf, readbuf+readbuf_len-partial_read, partial_read); \
@@ -141,7 +142,7 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count,
memcpy(readbuf, tmp_buf, partial_read); \
ADIOI_Free(tmp_buf); \
readbuf_off += readbuf_len-partial_read; \
- readbuf_len = (int) (partial_read + ADIOI_MIN(max_bufsize, \
+ readbuf_len = (unsigned) (partial_read + ADIOI_MIN(max_bufsize, \
end_offset-readbuf_off+1)); \
lseek(fd->fd_sys, readbuf_off+partial_read, SEEK_SET);\
if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, readbuf_off+partial_read, SEEK_SET, readbuf_len-partial_read);\
@@ -149,6 +150,7 @@ void ADIOI_BGL_ReadContig(ADIO_File fd, void *buf, int count,
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, readbuf_off+partial_read, SEEK_SET, readbuf_len-partial_read);\
if (err == -1) err_flag = 1; \
} \
+ ADIOI_Assert(req_len == (size_t)req_len); \
memcpy((char *)buf + userbuf_off, readbuf+req_off-readbuf_off, req_len); \
}
@@ -160,20 +162,23 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
{
/* offset is in units of etype relative to the filetype. */
+
ADIOI_Flatlist_node *flat_buf, *flat_file;
- int i, j, k, err=-1, brd_size, frd_size=0, st_index=0;
- int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype;
- int n_filetypes, etype_in_filetype;
+ ADIO_Offset i_offset, new_brd_size, brd_size, size;
+ int i, j, k, err=-1, st_index=0;
+ ADIO_Offset frd_size=0, new_frd_size, st_frd_size;
+ unsigned num, bufsize;
+ int n_etypes_in_filetype;
+ ADIO_Offset n_filetypes, etype_in_filetype, st_n_filetypes, size_in_filetype;
ADIO_Offset abs_off_in_filetype=0;
- int filetype_size, etype_size, buftype_size, req_len, partial_read;
+ int filetype_size, etype_size, buftype_size, partial_read;
MPI_Aint filetype_extent, buftype_extent;
int buf_count, buftype_is_contig, filetype_is_contig;
- ADIO_Offset userbuf_off;
+ ADIO_Offset userbuf_off, req_len, sum;
ADIO_Offset off, req_off, disp, end_offset=0, readbuf_off, start_off;
char *readbuf, *tmp_buf, *value;
- int flag, st_frd_size, st_n_filetypes, readbuf_len;
- int new_brd_size, new_frd_size, err_flag=0, info_flag, max_bufsize;
-
+ int err_flag=0, info_flag;
+ unsigned max_bufsize, readbuf_len;
static char myname[] = "ADIOI_BGL_READSTRIDED";
if (fd->hints->ds_read == ADIOI_HINT_DISABLE) {
@@ -207,12 +212,13 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
MPI_Type_extent(datatype, &buftype_extent);
etype_size = fd->etype_size;
+ ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count));
bufsize = buftype_size * count;
/* get max_bufsize from the info object. */
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
- MPI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value,
+ ADIOI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value,
&info_flag);
max_bufsize = atoi(value);
ADIOI_Free(value);
@@ -226,13 +232,13 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
- fd->disp + etype_size * offset;
+ fd->disp + (ADIO_Offset)etype_size * offset;
start_off = off;
end_offset = off + bufsize - 1;
readbuf_off = off;
readbuf = (char *) ADIOI_Malloc(max_bufsize);
- readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));
+ readbuf_len = (unsigned) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));
/* if atomicity is true, lock (exclusive) the region to be accessed */
if (fd->atomicity)
@@ -245,13 +251,16 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
if (err == -1) err_flag = 1;
for (j=0; jcount; i++) {
- userbuf_off = j*buftype_extent + flat_buf->indices[i];
- req_off = off;
- req_len = flat_buf->blocklens[i];
- ADIOI_BUFFERED_READ
- off += flat_buf->blocklens[i];
- }
+ {
+ int i;
+ for (i=0; icount; i++) {
+ userbuf_off = (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i];
+ req_off = off;
+ req_len = flat_buf->blocklens[i];
+ ADIOI_BUFFERED_READ
+ off += flat_buf->blocklens[i];
+ }
+ }
if (fd->atomicity)
ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
@@ -277,29 +286,36 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
disp = fd->disp;
if (file_ptr_type == ADIO_INDIVIDUAL) {
- offset = fd->fp_ind; /* in bytes */
- n_filetypes = -1;
- flag = 0;
- while (!flag) {
- n_filetypes++;
- for (i=0; icount; i++) {
- if (disp + flat_file->indices[i] +
- (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i]
- >= offset) {
- st_index = i;
- frd_size = (int) (disp + flat_file->indices[i] +
- (ADIO_Offset) n_filetypes*filetype_extent
- + flat_file->blocklens[i] - offset);
- flag = 1;
- break;
- }
+ /* Wei-keng reworked type processing to be a bit more efficient */
+ offset = fd->fp_ind - disp;
+ n_filetypes = (offset - flat_file->indices[0]) / filetype_extent;
+ offset -= (ADIO_Offset)n_filetypes * filetype_extent;
+ /* now offset is local to this extent */
+
+ /* find the block where offset is located, skip blocklens[i]==0 */
+ for (i=0; icount; i++) {
+ ADIO_Offset dist;
+ if (flat_file->blocklens[i] == 0) continue;
+ dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
+ /* frd_size is from offset to the end of block i */
+ if (dist == 0) {
+ i++;
+ offset = flat_file->indices[i];
+ frd_size = flat_file->blocklens[i];
+ break;
+ }
+ if (dist > 0) {
+ frd_size = dist;
+ break;
}
}
+ st_index = i; /* starting index in flat_file->indices[] */
+ offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
}
else {
n_etypes_in_filetype = filetype_size/etype_size;
- n_filetypes = (int) (offset / n_etypes_in_filetype);
- etype_in_filetype = (int) (offset % n_etypes_in_filetype);
+ n_filetypes = offset / n_etypes_in_filetype;
+ etype_in_filetype = offset % n_etypes_in_filetype;
size_in_filetype = etype_in_filetype * etype_size;
sum = 0;
@@ -315,32 +331,63 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
}
/* abs. offset in bytes in the file */
- offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
+ offset = disp + (ADIO_Offset) n_filetypes*filetype_extent +
+ abs_off_in_filetype;
}
start_off = offset;
+ /* Wei-keng Liao: read request is within a single flat_file contig
+ * block e.g. with subarray types that actually describe the whole
+ * array */
+ if (buftype_is_contig && bufsize <= frd_size) {
+ ADIO_ReadContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
+ offset, status, error_code);
+
+ if (file_ptr_type == ADIO_INDIVIDUAL) {
+ /* update MPI-IO file pointer to point to the first byte that
+ * can be accessed in the fileview. */
+ fd->fp_ind = offset + bufsize;
+ if (bufsize == frd_size) {
+ do {
+ st_index++;
+ if (st_index == flat_file->count) {
+ st_index = 0;
+ n_filetypes++;
+ }
+ } while (flat_file->blocklens[st_index] == 0);
+ fd->fp_ind = disp + flat_file->indices[st_index]
+ + n_filetypes*filetype_extent;
+ }
+ }
+ fd->fp_sys_posn = -1; /* set it to null. */
+#ifdef HAVE_STATUS_SET_BYTES
+ MPIR_Status_set_bytes(status, datatype, bufsize);
+#endif
+ return;
+ }
+
/* Calculate end_offset, the last byte-offset that will be accessed.
e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/
st_frd_size = frd_size;
st_n_filetypes = n_filetypes;
- i = 0;
+ i_offset = 0;
j = st_index;
off = offset;
frd_size = ADIOI_MIN(st_frd_size, bufsize);
- while (i < bufsize) {
- i += frd_size;
+ while (i_offset < bufsize) {
+ i_offset += frd_size;
end_offset = off + frd_size - 1;
- if (j < (flat_file->count - 1)) j++;
- else {
- j = 0;
- n_filetypes++;
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ while (flat_file->blocklens[j]==0) {
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
}
-
- off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent;
- frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
+ off = disp + flat_file->indices[j] + n_filetypes*(ADIO_Offset)filetype_extent;
+ frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
}
/* if atomicity is true, lock (exclusive) the region to be accessed */
@@ -350,7 +397,7 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
/* initial read into readbuf */
readbuf_off = offset;
readbuf = (char *) ADIOI_Malloc(max_bufsize);
- readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));
+ readbuf_len = (unsigned) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));
lseek(fd->fd_sys, offset, SEEK_SET);
if (!(fd->atomicity)) ADIOI_READ_LOCK(fd, offset, SEEK_SET, readbuf_len);
@@ -364,12 +411,12 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
/* contiguous in memory, noncontiguous in file. should be the most
common case. */
- i = 0;
+ i_offset = 0;
j = st_index;
off = offset;
n_filetypes = st_n_filetypes;
frd_size = ADIOI_MIN(st_frd_size, bufsize);
- while (i < bufsize) {
+ while (i_offset < bufsize) {
if (frd_size) {
/* TYPE_UB and TYPE_LB can result in
frd_size = 0. save system call in such cases */
@@ -378,25 +425,26 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
req_off = off;
req_len = frd_size;
- userbuf_off = i;
+ userbuf_off = i_offset;
ADIOI_BUFFERED_READ
}
- i += frd_size;
+ i_offset += frd_size;
if (off + frd_size < disp + flat_file->indices[j] +
- flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent)
+ flat_file->blocklens[j] + n_filetypes*(ADIO_Offset)filetype_extent)
off += frd_size;
/* did not reach end of contiguous block in filetype.
no more I/O needed. off is incremented by frd_size. */
else {
- if (j < (flat_file->count - 1)) j++;
- else {
- j = 0;
- n_filetypes++;
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ while (flat_file->blocklens[j]==0) {
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
}
off = disp + flat_file->indices[j] +
- (ADIO_Offset) n_filetypes*filetype_extent;
- frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
+ n_filetypes*(ADIO_Offset)filetype_extent;
+ frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
}
}
}
@@ -408,7 +456,7 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
k = num = buf_count = 0;
- i = (int) (flat_buf->indices[0]);
+ i_offset = flat_buf->indices[0];
j = st_index;
off = offset;
n_filetypes = st_n_filetypes;
@@ -423,7 +471,7 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
req_off = off;
req_len = size;
- userbuf_off = i;
+ userbuf_off = i_offset;
ADIOI_BUFFERED_READ
}
@@ -432,18 +480,19 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
if (size == frd_size) {
/* reached end of contiguous block in file */
- if (j < (flat_file->count - 1)) j++;
- else {
- j = 0;
- n_filetypes++;
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ while (flat_file->blocklens[j]==0) {
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
}
off = disp + flat_file->indices[j] +
- (ADIO_Offset) n_filetypes*filetype_extent;
+ n_filetypes*(ADIO_Offset)filetype_extent;
new_frd_size = flat_file->blocklens[j];
if (size != brd_size) {
- i += size;
+ i_offset += size;
new_brd_size -= size;
}
}
@@ -453,7 +502,7 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
k = (k + 1)%flat_buf->count;
buf_count++;
- i = (int) (buftype_extent*(buf_count/flat_buf->count) +
+ i_offset = ((ADIO_Offset)buftype_extent*(ADIO_Offset)(buf_count/flat_buf->count) +
flat_buf->indices[k]);
new_brd_size = flat_buf->blocklens[k];
if (size != frd_size) {
@@ -461,6 +510,7 @@ void ADIOI_BGL_ReadStrided(ADIO_File fd, void *buf, int count,
new_frd_size -= size;
}
}
+ ADIOI_Assert(((ADIO_Offset)num + size) == (unsigned)(num + size));
num += size;
frd_size = new_frd_size;
brd_size = new_brd_size;
diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_tuning.c b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_tuning.c
index 4959fa3eeb..074186efe0 100644
--- a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_tuning.c
+++ b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_tuning.c
@@ -3,7 +3,13 @@
/* ---------------------------------------------------------------- */
/**
* \file ad_bgl_tuning.c
- * \brief ???
+ * \brief defines ad_bgl performance tuning
+ */
+
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ * Copyright (C) 2008 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
*/
/*---------------------------------------------------------------------
@@ -26,6 +32,40 @@ double bglmpio_prof_cw [BGLMPIO_CIO_LAST];
double bglmpio_prof_cr [BGLMPIO_CIO_LAST];
/* set internal variables for tuning environment variables */
+/** \page env_vars Environment Variables
+ * - BGLMPIO_COMM - Define how data is exchanged on collective
+ * reads and writes. Possible values:
+ * - 0 - Use MPI_Alltoallv.
+ * - 1 - Use MPI_Isend/MPI_Irecv.
+ * - Default is 0.
+ *
+ * - BGLMPIO_TIMING - collect timing breakdown for MPI I/O collective calls.
+ * Must also compile the library with BGL_PROFILE defined. Possible values:
+ * - 0 - Do not collect/report timing.
+ * - 1 - Collect/report timing.
+ * - Default is 0.
+ *
+ * - BGLMPIO_TIMING2 - collect additional averages for MPI I/O collective calls.
+ * Must also compile the library with BGL_PROFILE defined. Possible values:
+ * - 0 - Do not collect/report averages.
+ * - 1 - Collect/report averages.
+ * - Default is 0.
+ *
+ * - BGLMPIO_TUNEGATHER - Tune how starting and ending offsets are communicated
+ * for aggregator collective i/o. Possible values:
+ * - 0 - Use two MPI_Allgather's to collect starting and ending offsets.
+ * - 1 - Use MPI_Allreduce(MPI_MAX) to collect starting and ending offsets.
+ * - Default is 1.
+ *
+ * - BGLMPIO_TUNEBLOCKING - Tune how aggregate file domains are
+ * calculated (block size). Possible values:
+ * - 0 - Evenly calculate file domains across aggregators. Also use
+ * MPI_Isend/MPI_Irecv to exchange domain information.
+ * - 1 - Align file domains with the underlying file system's block size. Also use
+ * MPI_Alltoallv to exchange domain information.
+ * - Default is 1.
+ *
+*/
void ad_bgl_get_env_vars() {
char *x;
diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_wrcoll.c b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_wrcoll.c
index c8fabc1818..c7e32df6b7 100644
--- a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_wrcoll.c
+++ b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_wrcoll.c
@@ -18,6 +18,9 @@
#include "ad_bgl_pset.h"
#include "ad_bgl_aggrs.h"
+#ifdef AGGREGATION_PROFILE
+#include "mpe.h"
+#endif
#ifdef PROFILE
#include "mpe.h"
#endif
@@ -26,13 +29,13 @@
static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
datatype, int nprocs, int myrank, ADIOI_Access
*others_req, ADIO_Offset *offset_list,
- int *len_list, int contig_access_count, ADIO_Offset
+ ADIO_Offset *len_list, int contig_access_count, ADIO_Offset
min_st_offset, ADIO_Offset fd_size,
ADIO_Offset *fd_start, ADIO_Offset *fd_end,
int *buf_idx, int *error_code);
static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
ADIOI_Flatlist_node *flat_buf, ADIO_Offset
- *offset_list, int *len_list, int *send_size,
+ *offset_list, ADIO_Offset *len_list, int *send_size,
int *recv_size, ADIO_Offset off, int size,
int *count, int *start_pos, int *partial_recv,
int *sent_to_proc, int nprocs,
@@ -49,7 +52,7 @@ static void ADIOI_W_Exchange_data_alltoallv(
char *write_buf, /* 1 */
ADIOI_Flatlist_node *flat_buf,
ADIO_Offset *offset_list,
- int *len_list, int *send_size, int *recv_size,
+ ADIO_Offset *len_list, int *send_size, int *recv_size,
ADIO_Offset off, int size, /* 2 */
int *count, int *start_pos, int *partial_recv,
int *sent_to_proc, int nprocs, int myrank,
@@ -65,7 +68,7 @@ static void ADIOI_W_Exchange_data_alltoallv(
int *error_code);
static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
*flat_buf, char **send_buf, ADIO_Offset
- *offset_list, int *len_list, int *send_size,
+ *offset_list, ADIO_Offset *len_list, int *send_size,
MPI_Request *requests, int *sent_to_proc,
int nprocs, int myrank,
int contig_access_count, ADIO_Offset
@@ -76,7 +79,7 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
MPI_Aint buftype_extent);
static void ADIOI_Fill_send_buffer_nosend(ADIO_File fd, void *buf, ADIOI_Flatlist_node
*flat_buf, char **send_buf, ADIO_Offset
- *offset_list, int *len_list, int *send_size,
+ *offset_list, ADIO_Offset *len_list, int *send_size,
MPI_Request *requests, int *sent_to_proc,
int nprocs, int myrank,
int contig_access_count, ADIO_Offset
@@ -118,26 +121,27 @@ void ADIOI_BGL_WriteStridedColl(ADIO_File fd, void *buf, int count,
ADIO_Offset *bgl_offsets0 = NULL, *bgl_offsets = NULL;
int ii;
- int *buf_idx = NULL, *len_list = NULL;
-
- double io_time = 0, all_time, max_all_time;
- double tstep1, max_tstep1;
- double tstep1_1, max_tstep1_1;
- double tstep1_2, max_tstep1_2;
- double tstep1_3, max_tstep1_3;
- double tstep2, max_tstep2;
- double tstep3, max_tstep3;
- double tstep4, max_tstep4;
- double sum_sz;
-
+ int *buf_idx = NULL;
+ ADIO_Offset *len_list = NULL;
#if BGL_PROFILE
BGLMPIO_T_CIO_RESET( 0, w )
#endif
-
+#if 0
+ /* From common code - not implemented for bgl.*/
+ int old_error, tmp_error;
+#endif
#ifdef PROFILE
MPE_Log_event(13, 0, "start computation");
#endif
+#if 0
+/* From common code - not implemented for bgl. */
+ if (fd->hints->cb_pfr != ADIOI_HINT_DISABLE) {
+ ADIOI_IOStridedColl (fd, buf, count, ADIOI_WRITE, datatype,
+ file_ptr_type, offset, status, error_code);
+ return;
+ }
+#endif
MPI_Comm_size(fd->comm, &nprocs);
MPI_Comm_rank(fd->comm, &myrank);
@@ -207,7 +211,8 @@ void ADIOI_BGL_WriteStridedColl(ADIO_File fd, void *buf, int count,
/* are the accesses of different processes interleaved? */
for (i=1; idisp + (fd->etype_size) * offset;
+ off = fd->disp + (ADIO_Offset)(fd->etype_size) * offset;
ADIO_WriteContig(fd, buf, count, datatype,
ADIO_EXPLICIT_OFFSET,
off, status, error_code);
@@ -260,7 +265,9 @@ void ADIOI_BGL_WriteStridedColl(ADIO_File fd, void *buf, int count,
else
ADIOI_Calc_file_domains(st_offsets, end_offsets, nprocs,
nprocs_for_coll, &min_st_offset,
- &fd_start, &fd_end, &fd_size);
+ &fd_start, &fd_end,
+ fd->hints->min_fdomain_size, &fd_size,
+ fd->hints->striping_unit);
#if BGL_PROFILE
BGLMPIO_T_CIO_SET_GET( 0, w, 0, 1, 1, BGLMPIO_CIO_MYREQ, BGLMPIO_CIO_FD_PART )
@@ -329,9 +336,50 @@ void ADIOI_BGL_WriteStridedColl(ADIO_File fd, void *buf, int count,
BGLMPIO_T_CIO_REPORT( 0, w, fd, myrank )
#endif
-
+#if 0
+ /* From common code - not implemented for bgl.
+ *
+ * If this collective write is followed by an independent write,
+ * it's possible to have those subsequent writes on other processes
+ * race ahead and sneak in before the read-modify-write completes.
+ * We carry out a collective communication at the end here so no one
+ * can start independent i/o before collective I/O completes.
+ *
+ * need to do some gymnastics with the error codes so that if something
+ * went wrong, all processes report error, but if a process has a more
+ * specific error code, we can still have that process report the
+ * additional information */
+ old_error = *error_code;
+ if (*error_code != MPI_SUCCESS) *error_code = MPI_ERR_IO;
+
+ /* optimization: if only one process performing i/o, we can perform
+ * a less-expensive Bcast */
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event( ADIOI_MPE_postwrite_a, 0, NULL );
+#endif
+ if (fd->hints->cb_nodes == 1)
+ MPI_Bcast(error_code, 1, MPI_INT,
+ fd->hints->ranklist[0], fd->comm);
+ else {
+ tmp_error = *error_code;
+ MPI_Allreduce(&tmp_error, error_code, 1, MPI_INT,
+ MPI_MAX, fd->comm);
+ }
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event( ADIOI_MPE_postwrite_b, 0, NULL );
+#endif
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5012, 0, NULL);
+#endif
+
+ if ( (old_error != MPI_SUCCESS) && (old_error != MPI_ERR_IO) )
+ *error_code = old_error;
+
+
+#endif
/* free all memory allocated for collective I/O */
+ if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
for (i=0; ifp_sys_posn = -1; /* set it to null. */
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5013, 0, NULL);
+#endif
}
@@ -371,12 +422,12 @@ void ADIOI_BGL_WriteStridedColl(ADIO_File fd, void *buf, int count,
* code is created and returned in error_code.
*/
static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
- datatype, int nprocs, int myrank,
+ datatype, int nprocs,
+ int myrank,
ADIOI_Access
*others_req, ADIO_Offset *offset_list,
- int *len_list, int contig_access_count,
- ADIO_Offset
- min_st_offset, ADIO_Offset fd_size,
+ ADIO_Offset *len_list, int contig_access_count,
+ ADIO_Offset min_st_offset, ADIO_Offset fd_size,
ADIO_Offset *fd_start, ADIO_Offset *fd_end,
int *buf_idx, int *error_code)
{
@@ -389,7 +440,9 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
array to a file, where each local array is 8Mbytes, requiring
at least another 8Mbytes of temp space is unacceptable. */
- int hole, i, j, m, size=0, ntimes, max_ntimes, buftype_is_contig;
+ /* Not convinced end_loc-st_loc couldn't be > int, so make these offsets*/
+ ADIO_Offset size=0;
+ int hole, i, j, m, ntimes, max_ntimes, buftype_is_contig;
ADIO_Offset st_loc=-1, end_loc=-1, off, done, req_off;
char *write_buf=NULL;
int *curr_offlen_ptr, *count, *send_size, req_len, *recv_size;
@@ -410,7 +463,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
That gives the no. of communication phases as well. */
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
- MPI_Info_get(fd->info, "cb_buffer_size", MPI_MAX_INFO_VAL, value,
+ ADIOI_Info_get(fd->info, "cb_buffer_size", MPI_MAX_INFO_VAL, value,
&info_flag);
coll_bufsize = atoi(value);
ADIOI_Free(value);
@@ -526,7 +579,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
#endif
for (i=0; i < nprocs; i++) count[i] = recv_size[i] = 0;
- size = (int) (ADIOI_MIN(coll_bufsize, end_loc-st_loc+1-done));
+ size = ADIOI_MIN((unsigned)coll_bufsize, end_loc-st_loc+1-done);
for (i=0; i < nprocs; i++) {
if (others_req[i].count) {
@@ -550,12 +603,14 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
}
if (req_off < off + size) {
count[i]++;
+ ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)write_buf)+req_off-off) == (ADIO_Offset)(MPIR_Upint)(write_buf+req_off-off));
MPI_Address(write_buf+req_off-off,
&(others_req[i].mem_ptrs[j]));
- recv_size[i] += (int)(ADIOI_MIN(off + (ADIO_Offset)size -
- req_off, req_len));
+ ADIOI_Assert((off + size - req_off) == (int)(off + size - req_off));
+ recv_size[i] += (int)(ADIOI_MIN(off + size - req_off,
+ (unsigned)req_len));
- if (off+size-req_off < req_len)
+ if (off+size-req_off < (unsigned)req_len)
{
partial_recv[i] = (int) (off + size - req_off);
@@ -618,7 +673,8 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
if (count[i]) flag = 1;
if (flag) {
- ADIO_WriteContig(fd, write_buf, size, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
+ ADIOI_Assert(size == (int)size);
+ ADIO_WriteContig(fd, write_buf, (int)size, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
off, &status, error_code);
if (*error_code != MPI_SUCCESS) return;
}
@@ -678,7 +734,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
*/
static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
ADIOI_Flatlist_node *flat_buf, ADIO_Offset
- *offset_list, int *len_list, int *send_size,
+ *offset_list, ADIO_Offset *len_list, int *send_size,
int *recv_size, ADIO_Offset off, int size,
int *count, int *start_pos,
int *partial_recv,
@@ -758,19 +814,26 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
}
ADIOI_Free(tmp_len);
-/* check if there are any holes */
+ /* check if there are any holes. If yes, must do read-modify-write.
+ * holes can be in three places. 'middle' is what you'd expect: the
+ * processes are operating on noncontigous data. But holes can also show
+ * up at the beginning or end of the file domain (see John Bent ROMIO REQ
+ * #835). Missing these holes would result in us writing more data than
+ * recieved by everyone else. */
*hole = 0;
- /* See if there are holes before the first request or after the last request*/
- if((srt_off[0] > off) ||
- ((srt_off[sum-1] + srt_len[sum-1]) < (off + size)))
- {
- *hole = 1;
- }
- else /* See if there are holes between the requests, if there are more than one */
- for (i=0; i srt_len[0]) srt_len[0] = new_len;
+ }
+ else
+ break;
+ }
+ if (i < sum || size != srt_len[0]) /* hole in middle or end */
+ *hole = 1;
}
ADIOI_Free(srt_off);
@@ -821,6 +884,9 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
/* post sends. if buftype_is_contig, data can be directly sent from
user buf at location given by buf_idx. else use send_buf. */
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5032, 0, NULL);
+#endif
if (buftype_is_contig) {
j = 0;
for (i=0; i < nprocs; i++)
@@ -895,6 +961,9 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
MPI_Waitall(nprocs_send+nprocs_recv, requests, statuses);
#endif
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5033, 0, NULL);
+#endif
ADIOI_Free(statuses);
ADIOI_Free(requests);
if (!buftype_is_contig && nprocs_send) {
@@ -918,7 +987,7 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
n_buftypes++; \
} \
user_buf_idx = flat_buf->indices[flat_buf_idx] + \
- n_buftypes*buftype_extent; \
+ (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \
flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \
} \
buf_incr -= size_in_buf; \
@@ -930,6 +999,8 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
{ \
while (size) { \
size_in_buf = ADIOI_MIN(size, flat_buf_sz); \
+ ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + user_buf_idx) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + user_buf_idx)); \
+ ADIOI_Assert(size_in_buf == (size_t)size_in_buf); \
memcpy(&(send_buf[p][send_buf_idx[p]]), \
((char *) buf) + user_buf_idx, size_in_buf); \
send_buf_idx[p] += size_in_buf; \
@@ -942,7 +1013,7 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
n_buftypes++; \
} \
user_buf_idx = flat_buf->indices[flat_buf_idx] + \
- n_buftypes*buftype_extent; \
+ (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \
flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \
} \
size -= size_in_buf; \
@@ -951,11 +1022,9 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
ADIOI_BUF_INCR \
}
-
-
static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
*flat_buf, char **send_buf, ADIO_Offset
- *offset_list, int *len_list, int *send_size,
+ *offset_list, ADIO_Offset *len_list, int *send_size,
MPI_Request *requests, int *sent_to_proc,
int nprocs, int myrank,
int contig_access_count,
@@ -967,8 +1036,9 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
{
/* this function is only called if buftype is not contig */
- int i, p, flat_buf_idx, size;
- int flat_buf_sz, buf_incr, size_in_buf, jj, n_buftypes;
+ int i, p, flat_buf_idx;
+ ADIO_Offset flat_buf_sz, size_in_buf, buf_incr, size;
+ int jj, n_buftypes;
ADIO_Offset off, len, rem_len, user_buf_idx;
/* curr_to_proc[p] = amount of data sent to proc. p that has already
@@ -995,7 +1065,7 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
for (i=0; i done_to_proc[p]) {
if (done_to_proc[p] > curr_to_proc[p]) {
- size = (int)ADIOI_MIN(curr_to_proc[p] + len -
+ size = ADIOI_MIN(curr_to_proc[p] + len -
done_to_proc[p], send_size[p]-send_buf_idx[p]);
buf_incr = done_to_proc[p] - curr_to_proc[p];
ADIOI_BUF_INCR
- buf_incr = (int)(curr_to_proc[p] + len - done_to_proc[p]);
+ ADIOI_Assert((curr_to_proc[p] + len - done_to_proc[p]) == (unsigned)(curr_to_proc[p] + len - done_to_proc[p]));
+ buf_incr = curr_to_proc[p] + len - done_to_proc[p];
+ ADIOI_Assert((done_to_proc[p] + size) == (unsigned)(done_to_proc[p] + size));
curr_to_proc[p] = done_to_proc[p] + size;
ADIOI_BUF_COPY
}
else {
- size = (int)ADIOI_MIN(len,send_size[p]-send_buf_idx[p]);
- buf_incr = (int)len;
+ size = ADIOI_MIN(len,send_size[p]-send_buf_idx[p]);
+ buf_incr = len;
+ ADIOI_Assert((curr_to_proc[p] + size) == (unsigned)((ADIO_Offset)curr_to_proc[p] + size));
curr_to_proc[p] += size;
ADIOI_BUF_COPY
}
@@ -1036,13 +1109,14 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
}
}
else {
- curr_to_proc[p] += (int)len;
- buf_incr = (int)len;
+ ADIOI_Assert((curr_to_proc[p] + len) == (unsigned)((ADIO_Offset)curr_to_proc[p] + len));
+ curr_to_proc[p] += len;
+ buf_incr = len;
ADIOI_BUF_INCR
}
}
else {
- buf_incr = (int)len;
+ buf_incr = len;
ADIOI_BUF_INCR
}
off += len;
@@ -1181,7 +1255,7 @@ static void ADIOI_W_Exchange_data_alltoallv(
char *write_buf, /* 1 */
ADIOI_Flatlist_node *flat_buf,
ADIO_Offset *offset_list,
- int *len_list, int *send_size, int *recv_size,
+ ADIO_Offset *len_list, int *send_size, int *recv_size,
ADIO_Offset off, int size, /* 2 */
int *count, int *start_pos, int *partial_recv,
int *sent_to_proc, int nprocs, int myrank,
@@ -1196,11 +1270,10 @@ static void ADIOI_W_Exchange_data_alltoallv(
int iter, MPI_Aint buftype_extent, int *buf_idx,
int *error_code)
{
- int i, j, k=0, tmp=0, nprocs_recv, nprocs_send, erri, *tmp_len, err;
+ int i, j, k=0, nprocs_recv, nprocs_send, *tmp_len, err;
char **send_buf = NULL;
- MPI_Request *requests, *send_req;
- MPI_Datatype recv_type;
- MPI_Status *statuses, status;
+ MPI_Request *send_req=NULL;
+ MPI_Status status;
int rtail, stail;
char *sbuf_ptr, *to_ptr;
int len;
@@ -1324,7 +1397,8 @@ static void ADIOI_W_Exchange_data_alltoallv(
sbuf_ptr = all_recv_buf + rdispls[i];
for (j=0; j done_to_proc[p]) {
if (done_to_proc[p] > curr_to_proc[p]) {
- size = (int)ADIOI_MIN(curr_to_proc[p] + len -
+ size = ADIOI_MIN(curr_to_proc[p] + len -
done_to_proc[p], send_size[p]-send_buf_idx[p]);
buf_incr = done_to_proc[p] - curr_to_proc[p];
ADIOI_BUF_INCR
- buf_incr = (int)(curr_to_proc[p] + len - done_to_proc[p]);
+ ADIOI_Assert((curr_to_proc[p] + len - done_to_proc[p]) == (unsigned)(curr_to_proc[p] + len - done_to_proc[p]));
+ buf_incr = curr_to_proc[p] + len - done_to_proc[p];
+ ADIOI_Assert((done_to_proc[p] + size) == (unsigned)(done_to_proc[p] + size));
curr_to_proc[p] = done_to_proc[p] + size;
ADIOI_BUF_COPY
}
else {
- size = (int)ADIOI_MIN(len,send_size[p]-send_buf_idx[p]);
- buf_incr = (int)len;
+ size = ADIOI_MIN(len,send_size[p]-send_buf_idx[p]);
+ buf_incr = len;
+ ADIOI_Assert((curr_to_proc[p] + size) == (unsigned)((ADIO_Offset)curr_to_proc[p] + size));
curr_to_proc[p] += size;
ADIOI_BUF_COPY
}
@@ -1433,13 +1511,14 @@ static void ADIOI_Fill_send_buffer_nosend(ADIO_File fd, void *buf, ADIOI_Flatlis
*/
}
else {
+ ADIOI_Assert((curr_to_proc[p] + len) == (unsigned)((ADIO_Offset)curr_to_proc[p] + len));
curr_to_proc[p] += (int)len;
- buf_incr = (int)len;
+ buf_incr = len;
ADIOI_BUF_INCR
}
}
else {
- buf_incr = (int)len;
+ buf_incr = len;
ADIOI_BUF_INCR
}
off += len;
diff --git a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_write.c b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_write.c
index b482bb3ac3..6fcd569fef 100644
--- a/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_write.c
+++ b/ompi/mca/io/romio/romio/adio/ad_bgl/ad_bgl_write.c
@@ -17,13 +17,20 @@
#include "ad_bgl_tuning.h"
+#ifdef AGGREGATION_PROFILE
+#include "mpe.h"
+#endif
+
void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int *error_code)
{
- int err=-1, datatype_size, len;
+ int err=-1, datatype_size;
+ ADIO_Offset len;
static char myname[] = "ADIOI_BGL_WRITECONTIG";
-
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5036, 0, NULL);
+#endif
#if BGL_PROFILE
/* timing */
double io_time, io_time2;
@@ -35,7 +42,8 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count,
#endif
MPI_Type_size(datatype, &datatype_size);
- len = datatype_size * count;
+ len = (ADIO_Offset)datatype_size * (ADIO_Offset)count;
+ ADIOI_Assert(len == (unsigned int) len); /* write takes an unsigned int parm */
#if BGL_PROFILE
@@ -46,7 +54,7 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count,
if (bglmpio_timing2) bglmpio_prof_cw[ BGLMPIO_CIO_T_SEEK ] += (MPI_Wtime() - io_time2);
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
if (bglmpio_timing2) io_time2 = MPI_Wtime();
- err = write(fd->fd_sys, buf, len);
+ err = write(fd->fd_sys, buf, (unsigned int)len);
if (bglmpio_timing2) bglmpio_prof_cw[ BGLMPIO_CIO_T_POSI_RW ] += (MPI_Wtime() - io_time2);
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_sys_posn = offset + err;
@@ -60,7 +68,7 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count,
if (bglmpio_timing2) bglmpio_prof_cw[ BGLMPIO_CIO_T_SEEK ] += (MPI_Wtime() - io_time2);
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
if (bglmpio_timing2) io_time2 = MPI_Wtime();
- err = write(fd->fd_sys, buf, len);
+ err = write(fd->fd_sys, buf, (unsigned int)len);
if (bglmpio_timing2) bglmpio_prof_cw[ BGLMPIO_CIO_T_POSI_RW ] += (MPI_Wtime() - io_time2);
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_ind += err;
@@ -73,7 +81,7 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count,
if (fd->fp_sys_posn != offset)
lseek(fd->fd_sys, offset, SEEK_SET);
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
- err = write(fd->fd_sys, buf, len);
+ err = write(fd->fd_sys, buf, (unsigned int)len);
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_sys_posn = offset + err;
/* individual file pointer not updated */
@@ -83,7 +91,7 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count,
if (fd->fp_sys_posn != fd->fp_ind)
lseek(fd->fd_sys, fd->fp_ind, SEEK_SET);
ADIOI_WRITE_LOCK(fd, offset, SEEK_SET, len);
- err = write(fd->fd_sys, buf, len);
+ err = write(fd->fd_sys, buf, (unsigned int)len);
ADIOI_UNLOCK(fd, offset, SEEK_SET, len);
fd->fp_ind += err;
fd->fp_sys_posn = fd->fp_ind;
@@ -110,11 +118,12 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count,
#endif
*error_code = MPI_SUCCESS;
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5037, 0, NULL);
+#endif
}
-
-
#define ADIOI_BUFFERED_WRITE \
{ \
if (req_off >= writebuf_off + writebuf_len) { \
@@ -123,7 +132,7 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count,
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
if (err == -1) err_flag = 1; \
writebuf_off = req_off; \
- writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
+ writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
err = read(fd->fd_sys, writebuf, writebuf_len); \
@@ -135,7 +144,8 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count,
return; \
} \
} \
- write_sz = (int) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
+ write_sz = (unsigned) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
+ ADIOI_Assert((ADIO_Offset)write_sz == ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off));\
memcpy(writebuf+req_off-writebuf_off, (char *)buf +userbuf_off, write_sz);\
while (write_sz != req_len) { \
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
@@ -145,7 +155,7 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count,
req_len -= write_sz; \
userbuf_off += write_sz; \
writebuf_off += writebuf_len; \
- writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
+ writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
err = read(fd->fd_sys, writebuf, writebuf_len); \
@@ -173,9 +183,10 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count,
if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
if (err == -1) err_flag = 1; \
writebuf_off = req_off; \
- writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
+ writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
} \
- write_sz = (int) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
+ write_sz = (unsigned) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
+ ADIOI_Assert((ADIO_Offset)write_sz == ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off));\
memcpy(writebuf+req_off-writebuf_off, (char *)buf +userbuf_off, write_sz);\
while (write_sz != req_len) { \
lseek(fd->fd_sys, writebuf_off, SEEK_SET); \
@@ -186,7 +197,7 @@ void ADIOI_BGL_WriteContig(ADIO_File fd, void *buf, int count,
req_len -= write_sz; \
userbuf_off += write_sz; \
writebuf_off += writebuf_len; \
- writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
+ writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
write_sz = ADIOI_MIN(req_len, writebuf_len); \
memcpy(writebuf, (char *)buf + userbuf_off, write_sz);\
} \
@@ -201,19 +212,23 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count,
{
/* offset is in units of etype relative to the filetype. */
+
+
ADIOI_Flatlist_node *flat_buf, *flat_file;
- int i, j, k, err=-1, bwr_size, fwr_size=0, st_index=0;
- int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype;
- int n_filetypes, etype_in_filetype;
+ ADIO_Offset i_offset, sum, size_in_filetype;
+ int i, j, k, err=-1, st_index=0;
+ int n_etypes_in_filetype;
+ ADIO_Offset num, size, n_filetypes, etype_in_filetype, st_n_filetypes;
ADIO_Offset abs_off_in_filetype=0;
- int filetype_size, etype_size, buftype_size, req_len;
+ int filetype_size, etype_size, buftype_size;
MPI_Aint filetype_extent, buftype_extent;
int buf_count, buftype_is_contig, filetype_is_contig;
ADIO_Offset userbuf_off;
ADIO_Offset off, req_off, disp, end_offset=0, writebuf_off, start_off;
char *writebuf, *value;
- int flag, st_fwr_size, st_n_filetypes, writebuf_len, write_sz;
- int new_bwr_size, new_fwr_size, err_flag=0, info_flag, max_bufsize;
+ unsigned bufsize, writebuf_len, max_bufsize, write_sz;
+ int err_flag=0, info_flag;
+ ADIO_Offset new_bwr_size, new_fwr_size, st_fwr_size, fwr_size=0, bwr_size, req_len;
static char myname[] = "ADIOI_BGL_WRITESTRIDED";
if (fd->hints->ds_write == ADIOI_HINT_DISABLE) {
@@ -247,12 +262,13 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count,
MPI_Type_extent(datatype, &buftype_extent);
etype_size = fd->etype_size;
+ ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count));
bufsize = buftype_size * count;
/* get max_bufsize from the info object. */
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
- MPI_Info_get(fd->info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value,
+ ADIOI_Info_get(fd->info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value,
&info_flag);
max_bufsize = atoi(value);
ADIOI_Free(value);
@@ -272,20 +288,23 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count,
end_offset = off + bufsize - 1;
writebuf_off = off;
writebuf = (char *) ADIOI_Malloc(max_bufsize);
- writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));
+ writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));
/* if atomicity is true, lock the region to be accessed */
if (fd->atomicity)
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
for (j=0; jcount; i++) {
- userbuf_off = j*buftype_extent + flat_buf->indices[i];
+ userbuf_off = (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i];
req_off = off;
req_len = flat_buf->blocklens[i];
ADIOI_BUFFERED_WRITE_WITHOUT_READ
off += flat_buf->blocklens[i];
}
+ }
/* write the buffer out finally */
lseek(fd->fd_sys, writebuf_off, SEEK_SET);
@@ -317,29 +336,37 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count,
disp = fd->disp;
if (file_ptr_type == ADIO_INDIVIDUAL) {
- offset = fd->fp_ind; /* in bytes */
- n_filetypes = -1;
- flag = 0;
- while (!flag) {
- n_filetypes++;
- for (i=0; icount; i++) {
- if (disp + flat_file->indices[i] +
- (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i]
- >= offset) {
- st_index = i;
- fwr_size = (int) (disp + flat_file->indices[i] +
- (ADIO_Offset) n_filetypes*filetype_extent
- + flat_file->blocklens[i] - offset);
- flag = 1;
- break;
- }
- }
- }
+ /* Wei-keng reworked type processing to be a bit more efficient */
+ offset = fd->fp_ind - disp;
+ n_filetypes = (offset - flat_file->indices[0]) / filetype_extent;
+ offset -= (ADIO_Offset)n_filetypes * filetype_extent;
+ /* now offset is local to this extent */
+
+ /* find the block where offset is located, skip blocklens[i]==0 */
+ for (i=0; icount; i++) {
+ ADIO_Offset dist;
+ if (flat_file->blocklens[i] == 0) continue;
+ dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
+ /* fwr_size is from offset to the end of block i */
+ if (dist == 0) {
+ i++;
+ offset = flat_file->indices[i];
+ fwr_size = flat_file->blocklens[i];
+ break;
+ }
+ if (dist > 0) {
+ fwr_size = dist;
+ break;
+ }
+ }
+ st_index = i; /* starting index in flat_file->indices[] */
+ offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
}
else {
+ int i;
n_etypes_in_filetype = filetype_size/etype_size;
- n_filetypes = (int) (offset / n_etypes_in_filetype);
- etype_in_filetype = (int) (offset % n_etypes_in_filetype);
+ n_filetypes = offset / n_etypes_in_filetype;
+ etype_in_filetype = offset % n_etypes_in_filetype;
size_in_filetype = etype_in_filetype * etype_size;
sum = 0;
@@ -355,32 +382,64 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count,
}
/* abs. offset in bytes in the file */
- offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
+ offset = disp + (ADIO_Offset) n_filetypes*filetype_extent +
+ abs_off_in_filetype;
}
start_off = offset;
+ /* Wei-keng Liao:write request is within single flat_file contig block*/
+ /* this could happen, for example, with subarray types that are
+ * actually fairly contiguous */
+ if (buftype_is_contig && bufsize <= fwr_size) {
+ ADIO_WriteContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
+ offset, status, error_code);
+
+ if (file_ptr_type == ADIO_INDIVIDUAL) {
+ /* update MPI-IO file pointer to point to the first byte
+ * that can be accessed in the fileview. */
+ fd->fp_ind = offset + bufsize;
+ if (bufsize == fwr_size) {
+ do {
+ st_index++;
+ if (st_index == flat_file->count) {
+ st_index = 0;
+ n_filetypes++;
+ }
+ } while (flat_file->blocklens[st_index] == 0);
+ fd->fp_ind = disp + flat_file->indices[st_index]
+ + (ADIO_Offset)n_filetypes*filetype_extent;
+ }
+ }
+ fd->fp_sys_posn = -1; /* set it to null. */
+#ifdef HAVE_STATUS_SET_BYTES
+ MPIR_Status_set_bytes(status, datatype, bufsize);
+#endif
+ return;
+ }
/* Calculate end_offset, the last byte-offset that will be accessed.
e.g., if start_offset=0 and 100 bytes to be write, end_offset=99*/
st_fwr_size = fwr_size;
st_n_filetypes = n_filetypes;
- i = 0;
+ i_offset = 0;
j = st_index;
off = offset;
fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
- while (i < bufsize) {
- i += fwr_size;
+ while (i_offset < bufsize) {
+ i_offset += fwr_size;
end_offset = off + fwr_size - 1;
- if (j < (flat_file->count - 1)) j++;
- else {
- j = 0;
- n_filetypes++;
- }
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ while (flat_file->blocklens[j]==0) {
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ }
- off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent;
- fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
+ off = disp + flat_file->indices[j] +
+ n_filetypes*(ADIO_Offset)filetype_extent;
+ fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
}
/* if atomicity is true, lock the region to be accessed */
@@ -390,7 +449,7 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count,
/* initial read for the read-modify-write */
writebuf_off = offset;
writebuf = (char *) ADIOI_Malloc(max_bufsize);
- writebuf_len = (int)(ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));
+ writebuf_len = (unsigned)(ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
lseek(fd->fd_sys, writebuf_off, SEEK_SET);
err = read(fd->fd_sys, writebuf, writebuf_len);
@@ -408,39 +467,41 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count,
/* contiguous in memory, noncontiguous in file. should be the most
common case. */
- i = 0;
+ i_offset = 0;
j = st_index;
off = offset;
n_filetypes = st_n_filetypes;
fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
- while (i < bufsize) {
+ while (i_offset < bufsize) {
if (fwr_size) {
/* TYPE_UB and TYPE_LB can result in
fwr_size = 0. save system call in such cases */
/* lseek(fd->fd_sys, off, SEEK_SET);
- err = write(fd->fd_sys, ((char *) buf) + i, fwr_size);*/
+ err = write(fd->fd_sys, ((char *) buf) + i_offset, fwr_size);*/
req_off = off;
req_len = fwr_size;
- userbuf_off = i;
+ userbuf_off = i_offset;
ADIOI_BUFFERED_WRITE
}
- i += fwr_size;
+ i_offset += fwr_size;
if (off + fwr_size < disp + flat_file->indices[j] +
- flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent)
+ flat_file->blocklens[j] + n_filetypes*(ADIO_Offset)filetype_extent)
off += fwr_size;
/* did not reach end of contiguous block in filetype.
no more I/O needed. off is incremented by fwr_size. */
else {
- if (j < (flat_file->count - 1)) j++;
- else {
- j = 0;
- n_filetypes++;
- }
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ while (flat_file->blocklens[j]==0) {
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ }
off = disp + flat_file->indices[j] +
- (ADIO_Offset) n_filetypes*filetype_extent;
- fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
+ n_filetypes*(ADIO_Offset)filetype_extent;
+ fwr_size = ADIOI_MIN(flat_file->blocklens[j],
+ bufsize-i_offset);
}
}
}
@@ -452,7 +513,7 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count,
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
k = num = buf_count = 0;
- i = (int) (flat_buf->indices[0]);
+ i_offset = flat_buf->indices[0];
j = st_index;
off = offset;
n_filetypes = st_n_filetypes;
@@ -463,11 +524,11 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count,
size = ADIOI_MIN(fwr_size, bwr_size);
if (size) {
/* lseek(fd->fd_sys, off, SEEK_SET);
- err = write(fd->fd_sys, ((char *) buf) + i, size); */
+ err = write(fd->fd_sys, ((char *) buf) + i_offset, size); */
req_off = off;
req_len = size;
- userbuf_off = i;
+ userbuf_off = i_offset;
ADIOI_BUFFERED_WRITE
}
@@ -476,18 +537,19 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count,
if (size == fwr_size) {
/* reached end of contiguous block in file */
- if (j < (flat_file->count - 1)) j++;
- else {
- j = 0;
- n_filetypes++;
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ while (flat_file->blocklens[j]==0) {
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
}
off = disp + flat_file->indices[j] +
- (ADIO_Offset) n_filetypes*filetype_extent;
+ n_filetypes*(ADIO_Offset)filetype_extent;
new_fwr_size = flat_file->blocklens[j];
if (size != bwr_size) {
- i += size;
+ i_offset += size;
new_bwr_size -= size;
}
}
@@ -497,8 +559,8 @@ void ADIOI_BGL_WriteStrided(ADIO_File fd, void *buf, int count,
k = (k + 1)%flat_buf->count;
buf_count++;
- i = (int) (buftype_extent*(buf_count/flat_buf->count) +
- flat_buf->indices[k]);
+ i_offset = (ADIO_Offset)buftype_extent*(ADIO_Offset)(buf_count/flat_buf->count) +
+ flat_buf->indices[k];
new_bwr_size = flat_buf->blocklens[k];
if (size != fwr_size) {
off += size;
diff --git a/ompi/mca/io/romio/romio/adio/ad_bglockless/.state-cache b/ompi/mca/io/romio/romio/adio/ad_bglockless/.state-cache
deleted file mode 100644
index 7b79bb6e95..0000000000
--- a/ompi/mca/io/romio/romio/adio/ad_bglockless/.state-cache
+++ /dev/null
@@ -1,7 +0,0 @@
-
-
-
-
-
-
-
diff --git a/ompi/mca/io/romio/romio/adio/ad_bglockless/Makefile.am b/ompi/mca/io/romio/romio/adio/ad_bglockless/Makefile.am
index 991f779520..ec0f951f41 100644
--- a/ompi/mca/io/romio/romio/adio/ad_bglockless/Makefile.am
+++ b/ompi/mca/io/romio/romio/adio/ad_bglockless/Makefile.am
@@ -21,4 +21,6 @@ include $(top_srcdir)/Makefile.options
noinst_LTLIBRARIES = libadio_bglockless.la
libadio_bglockless_la_SOURCES = \
- ad_bglockless.c
+ ad_bglockless.c \
+ ad_bglockless.h \
+ ad_bglockless_features.c
diff --git a/ompi/mca/io/romio/romio/adio/ad_bglockless/ad_bglockless.c b/ompi/mca/io/romio/romio/adio/ad_bglockless/ad_bglockless.c
index 725590d1b9..759616523c 100644
--- a/ompi/mca/io/romio/romio/adio/ad_bglockless/ad_bglockless.c
+++ b/ompi/mca/io/romio/romio/adio/ad_bglockless/ad_bglockless.c
@@ -6,12 +6,14 @@
*/
#include "../ad_bgl/ad_bgl.h"
+#include "ad_bglockless.h"
/* adioi.h has the ADIOI_Fns_struct define */
#include "adioi.h"
struct ADIOI_Fns_struct ADIO_BGLOCKLESS_operations = {
ADIOI_BGL_Open, /* Open */
+ ADIOI_GEN_OpenColl, /* Collective open */
ADIOI_GEN_ReadContig, /* ReadContig */
ADIOI_GEN_WriteContig, /* WriteContig */
ADIOI_BGL_ReadStridedColl, /* ReadStridedColl */
@@ -35,7 +37,8 @@ struct ADIOI_Fns_struct ADIO_BGLOCKLESS_operations = {
ADIOI_GEN_IOComplete, /* WriteComplete */
ADIOI_GEN_IreadStrided, /* IreadStrided */
ADIOI_GEN_IwriteStrided, /* IwriteStrided */
- ADIOI_GEN_Flush, /* Flush */
+ ADIOI_BGL_Flush, /* Flush */
ADIOI_GEN_Resize, /* Resize */
ADIOI_GEN_Delete, /* Delete */
+ ADIOI_BGLOCKLESS_Feature /* Features */
};
diff --git a/ompi/mca/io/romio/romio/adio/ad_bglockless/ad_bglockless.h b/ompi/mca/io/romio/romio/adio/ad_bglockless/ad_bglockless.h
new file mode 100644
index 0000000000..5b6a1fc40f
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_bglockless/ad_bglockless.h
@@ -0,0 +1,14 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *
+ * Copyright (C) 2008 Uchicago Argonne LLC
+ * See COPYRIGHT notice in top-level directory.
+ */
+
+#ifndef AD_BGLOCKLESS_INCLUDE
+#define AD_PVFS2_INCLUDE
+
+int ADIOI_BGLOCKLESS_Feature(ADIO_File fd, int flag);
+
+#endif
+
diff --git a/ompi/mca/io/romio/romio/adio/ad_bglockless/ad_bglockless_features.c b/ompi/mca/io/romio/romio/adio/ad_bglockless/ad_bglockless_features.c
new file mode 100644
index 0000000000..4153c5e409
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_bglockless/ad_bglockless_features.c
@@ -0,0 +1,15 @@
+#include "adio.h"
+
+int ADIOI_BGLOCKLESS_Feature(ADIO_File fd, int flag)
+{
+ switch(flag) {
+ case ADIO_SCALABLE_OPEN:
+ return 1;
+ case ADIO_SHARED_FP:
+ case ADIO_LOCKS:
+ case ADIO_SEQUENTIAL:
+ case ADIO_DATA_SIEVING_WRITES:
+ default:
+ return 0;
+ }
+}
diff --git a/ompi/mca/io/romio/romio/adio/ad_gridftp/Makefile.am b/ompi/mca/io/romio/romio/adio/ad_gridftp/Makefile.am
index f2905f0894..9e0dd5dff9 100644
--- a/ompi/mca/io/romio/romio/adio/ad_gridftp/Makefile.am
+++ b/ompi/mca/io/romio/romio/adio/ad_gridftp/Makefile.am
@@ -25,6 +25,7 @@ libadio_gridftp_la_SOURCES = \
ad_gridftp_close.c \
ad_gridftp_delete.c \
ad_gridftp_fcntl.c \
+ ad_gridftp_features.c \
ad_gridftp_flush.c \
ad_gridftp_hints.c \
ad_gridftp_open.c \
diff --git a/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp.c b/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp.c
index 734f93628f..fe429c9e4b 100644
--- a/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp.c
+++ b/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp.c
@@ -33,4 +33,5 @@ struct ADIOI_Fns_struct ADIO_GRIDFTP_operations = {
ADIOI_GRIDFTP_Flush, /* Flush */
ADIOI_GRIDFTP_Resize, /* Resize */
ADIOI_GRIDFTP_Delete, /* Delete */
+ ADIOI_GRIDFTP_Feature, /* Features */
};
diff --git a/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_features.c b/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_features.c
new file mode 100644
index 0000000000..6eea52734b
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_features.c
@@ -0,0 +1,12 @@
+int ADIOI_GRIDFTP_Feature (ADIO_File fd, int flag)
+{
+ switch(flag) {
+ case ADIO_SCALABLE_OPEN:
+ case ADIO_SHARED_FP:
+ case ADIO_LOCKS:
+ case ADIO_SEQUENTIAL:
+ case ADIO_DATA_SIEVING_WRITES:
+ default:
+ return 0;
+ }
+}
diff --git a/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_hints.c b/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_hints.c
index ca96e6f26d..124bef6174 100644
--- a/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_hints.c
+++ b/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_hints.c
@@ -56,8 +56,8 @@ void ADIOI_GRIDFTP_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
MPI_Info_get_valuelen(users_info,key,&valuelen,&flag);
if (flag)
{
- MPI_Info_get(users_info,key,valuelen,value,&flag);
- if (flag) MPI_Info_set(fd->info,key,value);
+ ADIOI_Info_get(users_info,key,valuelen,value,&flag);
+ if (flag) ADIOI_Info_set(fd->info,key,value);
}
}
}
diff --git a/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_open.c b/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_open.c
index 26a7afd7cc..d7b175ed86 100644
--- a/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_open.c
+++ b/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_open.c
@@ -136,7 +136,7 @@ void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code)
oattr[] (eg. parallelism, striping, etc.) goes here */
if ( fd->info!=MPI_INFO_NULL )
{
- MPI_Info_get(fd->info,"ftp_control_mode",MPI_MAX_INFO_VAL,hintval,&keyfound);
+ ADIOI_Info_get(fd->info,"ftp_control_mode",MPI_MAX_INFO_VAL,hintval,&keyfound);
if ( keyfound )
{
if ( ( !strcmp(hintval,"extended") || !strcmp(hintval,"extended_block") ) &&
@@ -153,7 +153,7 @@ void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code)
globus_err_handler("globus_ftp_client_operationattr_set_mode",myname,result);
}
- MPI_Info_get(fd->info,"parallelism",MPI_MAX_INFO_VAL,hintval,&keyfound);
+ ADIOI_Info_get(fd->info,"parallelism",MPI_MAX_INFO_VAL,hintval,&keyfound);
if ( keyfound )
{
int nftpthreads;
@@ -170,14 +170,14 @@ void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code)
}
}
- MPI_Info_get(fd->info,"striped_ftp",MPI_MAX_INFO_VAL,hintval,&keyfound);
+ ADIOI_Info_get(fd->info,"striped_ftp",MPI_MAX_INFO_VAL,hintval,&keyfound);
if ( keyfound )
{
/* if set to "true" or "enable", set up round-robin block layout */
if ( !strncmp("true",hintval,4) || !strncmp("TRUE",hintval,4) ||
!strncmp("enable",hintval,4) || !strncmp("ENABLE",hintval,4) )
{
- MPI_Info_get(fd->info,"striping_factor",MPI_MAX_INFO_VAL,hintval,&keyfound);
+ ADIOI_Info_get(fd->info,"striping_factor",MPI_MAX_INFO_VAL,hintval,&keyfound);
if ( keyfound )
{
int striping_factor;
@@ -197,7 +197,7 @@ void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code)
}
}
- MPI_Info_get(fd->info,"tcp_buffer",MPI_MAX_INFO_VAL,hintval,&keyfound);
+ ADIOI_Info_get(fd->info,"tcp_buffer",MPI_MAX_INFO_VAL,hintval,&keyfound);
if ( keyfound )
{
/* set tcp buffer size */
@@ -214,7 +214,7 @@ void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code)
}
}
- MPI_Info_get(fd->info,"transfer_type",MPI_MAX_INFO_VAL,hintval,&keyfound);
+ ADIOI_Info_get(fd->info,"transfer_type",MPI_MAX_INFO_VAL,hintval,&keyfound);
if ( keyfound )
{
globus_ftp_control_type_t filetype;
@@ -340,84 +340,4 @@ void ADIOI_GRIDFTP_Open(ADIO_File fd, int *error_code)
}
}
num_gridftp_handles++;
-
-#if 0
- /* Debugging info for testing PASV mode behind firewalls */
- if ( myrank==0 )
- {
- globus_bool_t striped;
- globus_ftp_control_mode_t mode;
- globus_ftp_control_type_t filetype;
- globus_ftp_control_parallelism_t parallelism;
-
- FPRINTF(stderr,"--gridftp details for %s--\n",
- fd->filename);
-
- /*
- FPRINTF(stderr,"Connection caching: ");
- globus_ftp_client_handleattr_get_cache_all(&hattr,&cached);
- if ( cached==GLOBUS_TRUE )
- FPRINTF(stderr,"Y\n");
- else
- FPRINTF(stderr,"N\n");
- */
-
- FPRINTF(stderr,"Control mode: ");
- globus_ftp_client_operationattr_get_mode(&(oattr[fd->fd_sys]),&mode);
- if ( mode==GLOBUS_FTP_CONTROL_MODE_BLOCK )
- FPRINTF(stderr,"block\n");
- else if ( mode==GLOBUS_FTP_CONTROL_MODE_COMPRESSED )
- FPRINTF(stderr,"compressed\n");
- else if ( mode==GLOBUS_FTP_CONTROL_MODE_EXTENDED_BLOCK )
- FPRINTF(stderr,"extended block\n");
- else if ( mode==GLOBUS_FTP_CONTROL_MODE_STREAM )
- FPRINTF(stderr,"stream\n");
- else
- FPRINTF(stderr,"unknown\n");
-
- FPRINTF(stderr,"File type: ");
- globus_ftp_client_operationattr_get_type(&(oattr[fd->fd_sys]),&filetype);
- if ( filetype==GLOBUS_FTP_CONTROL_TYPE_ASCII )
- FPRINTF(stderr,"ASCII\n");
- else if ( filetype==GLOBUS_FTP_CONTROL_TYPE_IMAGE )
- FPRINTF(stderr,"binary\n");
- else if ( filetype==GLOBUS_FTP_CONTROL_TYPE_EBCDIC )
- FPRINTF(stderr,"EBCDIC\n");
- else
- FPRINTF(stderr,"unknown\n");
-
- FPRINTF(stderr,"Parallelism: ");
- globus_ftp_client_operationattr_get_parallelism(&(oattr[fd->fd_sys]),¶llelism);
- if ( parallelism.mode==GLOBUS_FTP_CONTROL_PARALLELISM_NONE )
- FPRINTF(stderr,"none\n");
- else if ( parallelism.mode==GLOBUS_FTP_CONTROL_PARALLELISM_FIXED )
- FPRINTF(stderr,"fixed with %d streams\n",parallelism.fixed.size);
- else
- FPRINTF(stderr,"unknown\n");
-
- FPRINTF(stderr,"Striping: ");
- globus_ftp_client_operationattr_get_striped(&(oattr[fd->fd_sys]),&striped);
- if ( striped==GLOBUS_TRUE )
- {
- globus_ftp_control_layout_t layout;
-
- FPRINTF(stderr,"Y\nLayout: ");
- globus_ftp_client_operationattr_get_layout(&(oattr[fd->fd_sys]),
- &layout);
- if ( layout.mode==GLOBUS_FTP_CONTROL_STRIPING_NONE )
- FPRINTF(stderr,"none\n");
- else if ( layout.mode==GLOBUS_FTP_CONTROL_STRIPING_PARTITIONED )
- FPRINTF(stderr,"partitioned, size=%d\n",layout.partitioned.size);
- else if ( layout.mode==GLOBUS_FTP_CONTROL_STRIPING_BLOCKED_ROUND_ROBIN )
- FPRINTF(stderr,"round-robin, block size=%d\n",layout.round_robin.block_size);
- else
- FPRINTF(stderr,"unknown\n");
- }
- else
- FPRINTF(stderr,"N\n");
-
- fflush(stderr);
- }
-#endif
-
}
diff --git a/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_read.c b/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_read.c
index 31a93beaa4..a6af99d50e 100644
--- a/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_read.c
+++ b/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_read.c
@@ -50,10 +50,6 @@ static void readcontig_data_cb(void *myargs, globus_ftp_client_handle_t *handle,
readcontig_data_cb: buffer 0x404c0008 length 65536 offset 32112640 eof 0
readcontig_data_cb: buffer 0x404d0008 length 65536 offset 32178176 eof 0
*/
-#if 0
- FPRINTF(stderr, "%s: buffer %p length %d offset %Ld eof %d\n",
- __func__, buffer, length, offset, eof);
-#endif
if ( !eof )
globus_ftp_client_register_read(handle,
buffer+length,
diff --git a/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_write.c b/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_write.c
index 1dd25c7f26..869ecef8d6 100644
--- a/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_write.c
+++ b/ompi/mca/io/romio/romio/adio/ad_gridftp/ad_gridftp_write.c
@@ -364,10 +364,6 @@ void ADIOI_GRIDFTP_WriteDiscontig(ADIO_File fd, void *buf, int count,
{
fd->fp_ind += extent;
fd->fp_sys_posn = fd->fp_ind;
-#if 0
- FPRINTF(stdout, "[%d/%d] new file position is %Ld\n", myrank,
- nprocs, (long long) fd->fp_ind);
-#endif
}
else {
fd->fp_sys_posn = offset + extent;
diff --git a/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_fcntl.c b/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_fcntl.c
index db42414c64..b17227fd41 100644
--- a/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_fcntl.c
+++ b/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_fcntl.c
@@ -8,6 +8,9 @@
#include "ad_hfs.h"
#include "adio_extern.h"
+#ifndef HAVE_LSEEK64
+#define lseek64 lseek
+#endif
void ADIOI_HFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code)
{
int i, ntimes, err;
diff --git a/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_open.c b/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_open.c
index 1c04c7af09..8e56a4571e 100644
--- a/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_open.c
+++ b/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_open.c
@@ -7,6 +7,10 @@
#include "ad_hfs.h"
+#ifndef HAVE_LSEEK64
+#define lseek64 lseek
+#endif
+
void ADIOI_HFS_Open(ADIO_File fd, int *error_code)
{
int perm, old_mask, amode;
diff --git a/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_read.c b/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_read.c
index b28e3f6ee9..06e7fec896 100644
--- a/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_read.c
+++ b/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_read.c
@@ -7,6 +7,10 @@
#include "ad_hfs.h"
+#ifndef HAVE_LSEEK64
+#define lseek64 lseek
+#endif
+
void ADIOI_HFS_ReadContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int *error_code)
diff --git a/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_write.c b/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_write.c
index a4a41a0aa8..ab42d31bb6 100644
--- a/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_write.c
+++ b/ompi/mca/io/romio/romio/adio/ad_hfs/ad_hfs_write.c
@@ -7,6 +7,10 @@
#include "ad_hfs.h"
+#ifndef HAVE_LSEEK64
+#define lseek64 lseek
+#endif
+
void ADIOI_HFS_WriteContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int *error_code)
diff --git a/ompi/mca/io/romio/romio/adio/ad_lustre/.state-cache b/ompi/mca/io/romio/romio/adio/ad_lustre/.state-cache
deleted file mode 100644
index 3e2ff262a8..0000000000
--- a/ompi/mca/io/romio/romio/adio/ad_lustre/.state-cache
+++ /dev/null
@@ -1,22 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/ompi/mca/io/romio/romio/adio/ad_lustre/Makefile.am b/ompi/mca/io/romio/romio/adio/ad_lustre/Makefile.am
index 67107e302f..6e20ce8919 100644
--- a/ompi/mca/io/romio/romio/adio/ad_lustre/Makefile.am
+++ b/ompi/mca/io/romio/romio/adio/ad_lustre/Makefile.am
@@ -24,8 +24,11 @@ EXTRA_DIST = README
noinst_LTLIBRARIES = libadio_lustre.la
libadio_lustre_la_SOURCES = \
ad_lustre.c \
+ ad_lustre_aggregate.c \
ad_lustre_fcntl.c \
ad_lustre.h \
ad_lustre_hints.c \
ad_lustre_open.c \
- ad_lustre_rwcontig.c
+ ad_lustre_wrcoll.c \
+ ad_lustre_rwcontig.c \
+ ad_lustre_wrstr.c
diff --git a/ompi/mca/io/romio/romio/adio/ad_lustre/README b/ompi/mca/io/romio/romio/adio/ad_lustre/README
index 545ef3485f..a217c0f8fe 100644
--- a/ompi/mca/io/romio/romio/adio/ad_lustre/README
+++ b/ompi/mca/io/romio/romio/adio/ad_lustre/README
@@ -4,6 +4,21 @@ Upcoming soon:
Further out:
o To post the code for ParColl (Partitioned collective IO)
+-----------------------------------------------------
+V05:
+-----------------------------------------------------
+Improved data redistribution
+ o Improve I/O pattern identification. Besides checking interleaving,
+ if request I/O size is small, collective I/O will be performed.
+ The hint bigsize can be used to define the req size value.
+ o Provide hint CO for load balancing to control the number of
+ IO clients for each OST
+ o Produce stripe-contiguous I/O pattern that Lustre prefers
+ o Control read-modify-write in data sieving in collective IO
+ by hint ds_in_coll.
+ o Reduce extent lock conflicts by make each OST accessed by one or
+ more constant clients.
+
-----------------------------------------------------
V04:
-----------------------------------------------------
diff --git a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre.c b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre.c
index 1a465f85aa..08809e5c65 100644
--- a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre.c
+++ b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre.c
@@ -1,24 +1,27 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- * Copyright (C) 2001 University of Chicago.
+/*
+ * Copyright (C) 2001 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*
* Copyright (C) 2007 Oak Ridge National Laboratory
+ *
+ * Copyright (C) 2008 Sun Microsystems, Lustre group
*/
#include "ad_lustre.h"
struct ADIOI_Fns_struct ADIO_LUSTRE_operations = {
ADIOI_LUSTRE_Open, /* Open */
+ ADIOI_GEN_OpenColl, /* OpenColl */
ADIOI_LUSTRE_ReadContig, /* ReadContig */
ADIOI_LUSTRE_WriteContig, /* WriteContig */
ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
- ADIOI_GEN_WriteStridedColl, /* WriteStridedColl */
+ ADIOI_LUSTRE_WriteStridedColl, /* WriteStridedColl */
ADIOI_GEN_SeekIndividual, /* SeekIndividual */
ADIOI_GEN_Fcntl, /* Fcntl */
ADIOI_LUSTRE_SetInfo, /* SetInfo */
ADIOI_GEN_ReadStrided, /* ReadStrided */
- ADIOI_GEN_WriteStrided, /* WriteStrided */
+ ADIOI_LUSTRE_WriteStrided, /* WriteStrided */
ADIOI_GEN_Close, /* Close */
#if defined(ROMIO_HAVE_WORKING_AIO) && !defined(CRAY_XT_LUSTRE)
ADIOI_GEN_IreadContig, /* IreadContig */
@@ -36,4 +39,5 @@ struct ADIOI_Fns_struct ADIO_LUSTRE_operations = {
ADIOI_GEN_Flush, /* Flush */
ADIOI_GEN_Resize, /* Resize */
ADIOI_GEN_Delete, /* Delete */
+ ADIOI_GEN_Feature, /* Features */
};
diff --git a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre.h b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre.h
index a0fbdc40e2..3252907596 100644
--- a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre.h
+++ b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre.h
@@ -1,9 +1,11 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- * Copyright (C) 1997 University of Chicago.
+/*
+ * Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*
* Copyright (C) 2007 Oak Ridge National Laboratory
+ *
+ * Copyright (C) 2008 Sun Microsystems, Lustre group
*/
#ifndef AD_UNIX_INCLUDE
@@ -17,6 +19,7 @@
#ifdef __linux__
# include /* necessary for: */
+# include
# define __USE_GNU /* O_DIRECT and */
# include /* IO operations */
# undef __USE_GNU
@@ -24,7 +27,7 @@
/*#include */
#include
-#include "lustre/lustre_user.h"
+#include
#include "adio.h"
/*#include "adioi.h"*/
@@ -41,24 +44,48 @@
void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code);
void ADIOI_LUSTRE_Close(ADIO_File fd, int *error_code);
-void ADIOI_LUSTRE_ReadContig(ADIO_File fd, void *buf, int count,
- MPI_Datatype datatype, int file_ptr_type,
- ADIO_Offset offset, ADIO_Status *status, int
- *error_code);
-void ADIOI_LUSTRE_WriteContig(ADIO_File fd, void *buf, int count,
- MPI_Datatype datatype, int file_ptr_type,
- ADIO_Offset offset, ADIO_Status *status, int
- *error_code);
+void ADIOI_LUSTRE_ReadContig(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status,
+ int *error_code);
+void ADIOI_LUSTRE_WriteContig(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status,
+ int *error_code);
+void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status,
+ int *error_code);
void ADIOI_LUSTRE_WriteStridedColl(ADIO_File fd, void *buf, int count,
- MPI_Datatype datatype, int file_ptr_type,
- ADIO_Offset offset, ADIO_Status *status, int
- *error_code);
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status,
+ int *error_code);
void ADIOI_LUSTRE_ReadStridedColl(ADIO_File fd, void *buf, int count,
- MPI_Datatype datatype, int file_ptr_type,
- ADIO_Offset offset, ADIO_Status *status, int
- *error_code);
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status,
+ int *error_code);
+void ADIOI_LUSTRE_ReadStrided(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status,
+ int *error_code);
void ADIOI_LUSTRE_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct,
int *error_code);
void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code);
+/* the lustre utilities: */
+int ADIOI_LUSTRE_Docollect(ADIO_File fd, int contig_access_count,
+ ADIO_Offset *len_list, int nprocs);
+
+void ADIOI_LUSTRE_Get_striping_info(ADIO_File fd, int **striping_info_ptr,
+ int mode);
+void ADIOI_LUSTRE_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list,
+ ADIO_Offset *len_list, int contig_access_count,
+ int *striping_info, int nprocs,
+ int *count_my_req_procs_ptr,
+ int **count_my_req_per_proc_ptr,
+ ADIOI_Access **my_req_ptr,
+ int ***buf_idx_ptr);
+
+int ADIOI_LUSTRE_Calc_aggregator(ADIO_File fd, ADIO_Offset off,
+ ADIO_Offset *len, int *striping_info);
#endif /* End of AD_UNIX_INCLUDE */
diff --git a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_aggregate.c b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_aggregate.c
new file mode 100644
index 0000000000..203b080edb
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_aggregate.c
@@ -0,0 +1,322 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ * Copyright (C) 1997 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ *
+ * Copyright (C) 2007 Oak Ridge National Laboratory
+ *
+ * Copyright (C) 2008 Sun Microsystems, Lustre group
+ */
+
+#include "ad_lustre.h"
+#include "adio_extern.h"
+
+#undef AGG_DEBUG
+
+void ADIOI_LUSTRE_Get_striping_info(ADIO_File fd, int **striping_info_ptr,
+ int mode)
+{
+ int *striping_info = NULL;
+ /* get striping information:
+ * striping_info[0]: stripe_size
+ * striping_info[1]: stripe_count
+ * striping_info[2]: avail_cb_nodes
+ */
+ int stripe_size, stripe_count, CO = 1;
+ int avail_cb_nodes, divisor, nprocs_for_coll = fd->hints->cb_nodes;
+
+ /* Get hints value */
+ /* stripe size */
+ stripe_size = fd->hints->striping_unit;
+ /* stripe count */
+ /* stripe_size and stripe_count have been validated in ADIOI_LUSTRE_Open() */
+ stripe_count = fd->hints->striping_factor;
+
+ /* Calculate the available number of I/O clients */
+ if (!mode) {
+ /* for collective read,
+ * if "CO" clients access the same OST simultaneously,
+ * the OST disk seek time would be much. So, to avoid this,
+ * it might be better if 1 client only accesses 1 OST.
+ * So, we set CO = 1 to meet the above requirement.
+ */
+ CO = 1;
+ /*XXX: maybe there are other better way for collective read */
+ } else {
+ /* CO also has been validated in ADIOI_LUSTRE_Open(), >0 */
+ CO = fd->hints->fs_hints.lustre.co_ratio;
+ }
+ /* Calculate how many IO clients we need */
+ /* Algorithm courtesy Pascal Deveze (pascal.deveze@bull.net) */
+ /* To avoid extent lock conflicts,
+ * avail_cb_nodes should either
+ * - be a multiple of stripe_count,
+ * - or divide stripe_count exactly
+ * so that each OST is accessed by a maximum of CO constant clients. */
+ if (nprocs_for_coll >= stripe_count)
+ /* avail_cb_nodes should be a multiple of stripe_count and the number
+ * of procs per OST should be limited to the minimum between
+ * nprocs_for_coll/stripe_count and CO
+ *
+ * e.g. if stripe_count=20, nprocs_for_coll=42 and CO=3 then
+ * avail_cb_nodes should be equal to 40 */
+ avail_cb_nodes =
+ stripe_count * ADIOI_MIN(nprocs_for_coll/stripe_count, CO);
+ else {
+ /* nprocs_for_coll is less than stripe_count */
+ /* avail_cb_nodes should divide stripe_count */
+ /* e.g. if stripe_count=60 and nprocs_for_coll=8 then
+ * avail_cb_nodes should be egal to 6 */
+ /* This could be done with :
+ while (stripe_count % avail_cb_nodes != 0) avail_cb_nodes--;
+ but this can be optimized for large values of nprocs_for_coll and
+ stripe_count */
+ divisor = 2;
+ avail_cb_nodes = 1;
+ /* try to divise */
+ while (stripe_count >= divisor*divisor) {
+ if ((stripe_count % divisor) == 0) {
+ if (stripe_count/divisor <= nprocs_for_coll) {
+ /* The value is found ! */
+ avail_cb_nodes = stripe_count/divisor;
+ break;
+ }
+ /* if divisor is less than nprocs_for_coll, divisor is a
+ * solution, but it is not sure that it is the best one */
+ else if (divisor <= nprocs_for_coll)
+ avail_cb_nodes = divisor;
+ }
+ divisor++;
+ }
+ }
+
+ *striping_info_ptr = (int *) ADIOI_Malloc(3 * sizeof(int));
+ striping_info = *striping_info_ptr;
+ striping_info[0] = stripe_size;
+ striping_info[1] = stripe_count;
+ striping_info[2] = avail_cb_nodes;
+}
+
+int ADIOI_LUSTRE_Calc_aggregator(ADIO_File fd, ADIO_Offset off,
+ ADIO_Offset *len, int *striping_info)
+{
+ int rank_index, rank;
+ ADIO_Offset avail_bytes;
+ int stripe_size = striping_info[0];
+ int avail_cb_nodes = striping_info[2];
+
+ /* Produce the stripe-contiguous pattern for Lustre */
+ rank_index = (int)((off / stripe_size) % avail_cb_nodes);
+
+ /* we index into fd_end with rank_index, and fd_end was allocated to be no
+ * bigger than fd->hins->cb_nodes. If we ever violate that, we're
+ * overrunning arrays. Obviously, we should never ever hit this abort
+ */
+ if (rank_index >= fd->hints->cb_nodes)
+ MPI_Abort(MPI_COMM_WORLD, 1);
+
+ avail_bytes = (off / (ADIO_Offset)stripe_size + 1) *
+ (ADIO_Offset)stripe_size - off;
+ if (avail_bytes < *len) {
+ /* this proc only has part of the requested contig. region */
+ *len = avail_bytes;
+ }
+ /* map our index to a rank */
+ /* NOTE: FOR NOW WE DON'T HAVE A MAPPING...JUST DO 0..NPROCS_FOR_COLL */
+ rank = fd->hints->ranklist[rank_index];
+
+ return rank;
+}
+
+/* ADIOI_LUSTRE_Calc_my_req() - calculate what portions of the access requests
+ * of this process are located in the file domains of various processes
+ * (including this one)
+ */
+
+
+void ADIOI_LUSTRE_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list,
+ ADIO_Offset *len_list, int contig_access_count,
+ int *striping_info, int nprocs,
+ int *count_my_req_procs_ptr,
+ int **count_my_req_per_proc_ptr,
+ ADIOI_Access **my_req_ptr,
+ int ***buf_idx_ptr)
+{
+ /* Nothing different from ADIOI_Calc_my_req(), except calling
+ * ADIOI_Lustre_Calc_aggregator() instead of the old one */
+ int *count_my_req_per_proc, count_my_req_procs, **buf_idx;
+ int i, l, proc;
+ ADIO_Offset avail_len, rem_len, curr_idx, off;
+ ADIOI_Access *my_req;
+
+ *count_my_req_per_proc_ptr = (int *) ADIOI_Calloc(nprocs, sizeof(int));
+ count_my_req_per_proc = *count_my_req_per_proc_ptr;
+ /* count_my_req_per_proc[i] gives the no. of contig. requests of this
+ * process in process i's file domain. calloc initializes to zero.
+ * I'm allocating memory of size nprocs, so that I can do an
+ * MPI_Alltoall later on.
+ */
+
+ buf_idx = (int **) ADIOI_Malloc(nprocs * sizeof(int*));
+
+ /* one pass just to calculate how much space to allocate for my_req;
+ * contig_access_count was calculated way back in ADIOI_Calc_my_off_len()
+ */
+ for (i = 0; i < contig_access_count; i++) {
+ /* short circuit offset/len processing if len == 0
+ * (zero-byte read/write
+ */
+ if (len_list[i] == 0)
+ continue;
+ off = offset_list[i];
+ avail_len = len_list[i];
+ /* note: we set avail_len to be the total size of the access.
+ * then ADIOI_LUSTRE_Calc_aggregator() will modify the value to return
+ * the amount that was available.
+ */
+ proc = ADIOI_LUSTRE_Calc_aggregator(fd, off, &avail_len, striping_info);
+ count_my_req_per_proc[proc]++;
+
+ /* figure out how many data is remaining in the access
+ * we'll take care of this data (if there is any)
+ * in the while loop below.
+ */
+ rem_len = len_list[i] - avail_len;
+
+ while (rem_len != 0) {
+ off += avail_len; /* point to first remaining byte */
+ avail_len = rem_len; /* save remaining size, pass to calc */
+ proc = ADIOI_LUSTRE_Calc_aggregator(fd, off, &avail_len, striping_info);
+ count_my_req_per_proc[proc]++;
+ rem_len -= avail_len; /* reduce remaining length by amount from fd */
+ }
+ }
+
+ /* buf_idx is relevant only if buftype_is_contig.
+ * buf_idx[i] gives the index into user_buf where data received
+ * from proc 'i' should be placed. This allows receives to be done
+ * without extra buffer. This can't be done if buftype is not contig.
+ */
+
+ /* initialize buf_idx vectors */
+ for (i = 0; i < nprocs; i++) {
+ /* add one to count_my_req_per_proc[i] to avoid zero size malloc */
+ buf_idx[i] = (int *) ADIOI_Malloc((count_my_req_per_proc[i] + 1)
+ * sizeof(int));
+ }
+
+ /* now allocate space for my_req, offset, and len */
+ *my_req_ptr = (ADIOI_Access *) ADIOI_Malloc(nprocs * sizeof(ADIOI_Access));
+ my_req = *my_req_ptr;
+
+ count_my_req_procs = 0;
+ for (i = 0; i < nprocs; i++) {
+ if (count_my_req_per_proc[i]) {
+ my_req[i].offsets = (ADIO_Offset *)
+ ADIOI_Malloc(count_my_req_per_proc[i] *
+ sizeof(ADIO_Offset));
+ my_req[i].lens = (int *) ADIOI_Malloc(count_my_req_per_proc[i] *
+ sizeof(int));
+ count_my_req_procs++;
+ }
+ my_req[i].count = 0; /* will be incremented where needed later */
+ }
+
+ /* now fill in my_req */
+ curr_idx = 0;
+ for (i = 0; i < contig_access_count; i++) {
+ /* short circuit offset/len processing if len == 0
+ * (zero-byte read/write */
+ if (len_list[i] == 0)
+ continue;
+ off = offset_list[i];
+ avail_len = len_list[i];
+ proc = ADIOI_LUSTRE_Calc_aggregator(fd, off, &avail_len, striping_info);
+
+ l = my_req[proc].count;
+
+ ADIOI_Assert(curr_idx == (int) curr_idx);
+ ADIOI_Assert(l < count_my_req_per_proc[proc]);
+ buf_idx[proc][l] = (int) curr_idx;
+ curr_idx += avail_len;
+
+ rem_len = len_list[i] - avail_len;
+
+ /* store the proc, offset, and len information in an array
+ * of structures, my_req. Each structure contains the
+ * offsets and lengths located in that process's FD,
+ * and the associated count.
+ */
+ my_req[proc].offsets[l] = off;
+ ADIOI_Assert(avail_len == (int) avail_len);
+ my_req[proc].lens[l] = (int) avail_len;
+ my_req[proc].count++;
+
+ while (rem_len != 0) {
+ off += avail_len;
+ avail_len = rem_len;
+ proc = ADIOI_LUSTRE_Calc_aggregator(fd, off, &avail_len,
+ striping_info);
+
+ l = my_req[proc].count;
+ ADIOI_Assert(curr_idx == (int) curr_idx);
+ ADIOI_Assert(l < count_my_req_per_proc[proc]);
+ buf_idx[proc][l] = (int) curr_idx;
+
+ curr_idx += avail_len;
+ rem_len -= avail_len;
+
+ my_req[proc].offsets[l] = off;
+ ADIOI_Assert(avail_len == (int) avail_len);
+ my_req[proc].lens[l] = (int) avail_len;
+ my_req[proc].count++;
+ }
+ }
+
+#ifdef AGG_DEBUG
+ for (i = 0; i < nprocs; i++) {
+ if (count_my_req_per_proc[i] > 0) {
+ FPRINTF(stdout, "data needed from %d (count = %d):\n",
+ i, my_req[i].count);
+ for (l = 0; l < my_req[i].count; l++) {
+ FPRINTF(stdout, " off[%d] = %lld, len[%d] = %d\n",
+ l, my_req[i].offsets[l], l, my_req[i].lens[l]);
+ }
+ }
+ }
+#endif
+
+ *count_my_req_procs_ptr = count_my_req_procs;
+ *buf_idx_ptr = buf_idx;
+}
+
+int ADIOI_LUSTRE_Docollect(ADIO_File fd, int contig_access_count,
+ ADIO_Offset *len_list, int nprocs)
+{
+ /* If the processes are non-interleaved, we will check the req_size.
+ * if (avg_req_size > big_req_size) {
+ * docollect = 0;
+ * }
+ */
+
+ int i, docollect = 1, big_req_size = 0;
+ ADIO_Offset req_size = 0, total_req_size;
+ int avg_req_size, total_access_count;
+
+ /* calculate total_req_size and total_access_count */
+ for (i = 0; i < contig_access_count; i++)
+ req_size += len_list[i];
+ MPI_Allreduce(&req_size, &total_req_size, 1, MPI_LONG_LONG_INT, MPI_SUM,
+ fd->comm);
+ MPI_Allreduce(&contig_access_count, &total_access_count, 1, MPI_INT, MPI_SUM,
+ fd->comm);
+ /* estimate average req_size */
+ avg_req_size = (int)(total_req_size / total_access_count);
+ /* get hint of big_req_size */
+ big_req_size = fd->hints->fs_hints.lustre.coll_threshold;
+ /* Don't perform collective I/O if there are big requests */
+ if ((big_req_size > 0) && (avg_req_size > big_req_size))
+ docollect = 0;
+
+ return docollect;
+}
diff --git a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_fcntl.c b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_fcntl.c
index 739d3b00ce..3afd16a011 100644
--- a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_fcntl.c
+++ b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_fcntl.c
@@ -25,7 +25,7 @@ void ADIOI_LUSTRE_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int
if (fd->fp_sys_posn != -1)
lseek(fd->fd_sys, fd->fp_sys_posn, SEEK_SET);
if (fcntl_struct->fsize == -1) {
- *error_code = MPIR_Err_create_code(MPI_SUCCESS,
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname, __LINE__,
MPI_ERR_IO, "**io", "**io %s", strerror(errno));
}
@@ -56,7 +56,7 @@ void ADIOI_LUSTRE_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int
ADIO_ReadContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, done,
&status, error_code);
if (*error_code != MPI_SUCCESS) {
- *error_code = MPIR_Err_create_code(MPI_SUCCESS,
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE, myname, __LINE__,
MPI_ERR_IO, "**io", "**io %s", strerror(errno));
return;
diff --git a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_hints.c b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_hints.c
index 817b5bd802..d925779ef3 100644
--- a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_hints.c
+++ b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_hints.c
@@ -1,9 +1,11 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- * Copyright (C) 1997 University of Chicago.
+/*
+ * Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*
* Copyright (C) 2007 Oak Ridge National Laboratory
+ *
+ * Copyright (C) 2008 Sun Microsystems, Lustre group
*/
#include "ad_lustre.h"
@@ -11,70 +13,81 @@
void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
{
- char *value, *value_in_fd;
- int flag, tmp_val[3], str_factor=-1, str_unit=0, start_iodev=-1;
+ char *value;
+ int flag, stripe_val[3], str_factor = -1, str_unit=0, start_iodev=-1;
struct lov_user_md lum = { 0 };
int err, myrank, fd_sys, perm, amode, old_mask;
+ int int_val, tmp_val;
+ static char myname[] = "ADIOI_LUSTRE_SETINFO";
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
if ( (fd->info) == MPI_INFO_NULL) {
- /* This must be part of the open call. can set striping parameters
- if necessary. */
+ /* This must be part of the open call. can set striping parameters
+ if necessary. */
MPI_Info_create(&(fd->info));
- MPI_Info_set(fd->info, "direct_read", "false");
- MPI_Info_set(fd->info, "direct_write", "false");
+ ADIOI_Info_set(fd->info, "direct_read", "false");
+ ADIOI_Info_set(fd->info, "direct_write", "false");
fd->direct_read = fd->direct_write = 0;
-
- /* has user specified striping or server buffering parameters
+ /* initialize lustre hints */
+ ADIOI_Info_set(fd->info, "romio_lustre_co_ratio", "1");
+ fd->hints->fs_hints.lustre.co_ratio = 1;
+ ADIOI_Info_set(fd->info, "romio_lustre_coll_threshold", "0");
+ fd->hints->fs_hints.lustre.coll_threshold = 0;
+ ADIOI_Info_set(fd->info, "romio_lustre_ds_in_coll", "enable");
+ fd->hints->fs_hints.lustre.ds_in_coll = ADIOI_HINT_ENABLE;
+
+ /* has user specified striping or server buffering parameters
and do they have the same value on all processes? */
if (users_info != MPI_INFO_NULL) {
- MPI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
+ /* striping information */
+ ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
value, &flag);
- if (flag)
+ if (flag)
str_unit=atoi(value);
- MPI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
value, &flag);
- if (flag)
+ if (flag)
str_factor=atoi(value);
- MPI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL,
- value, &flag);
- if (flag)
+ ADIOI_Info_get(users_info, "romio_lustre_start_iodevice",
+ MPI_MAX_INFO_VAL, value, &flag);
+ if (flag)
start_iodev=atoi(value);
- MPI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL,
- value, &flag);
+ /* direct read and write */
+ ADIOI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL,
+ value, &flag);
if (flag && (!strcmp(value, "true") || !strcmp(value, "TRUE"))) {
- MPI_Info_set(fd->info, "direct_read", "true");
+ ADIOI_Info_set(fd->info, "direct_read", "true");
fd->direct_read = 1;
}
-
- MPI_Info_get(users_info, "direct_write", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "direct_write", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && (!strcmp(value, "true") || !strcmp(value, "TRUE"))) {
- MPI_Info_set(fd->info, "direct_write", "true");
+ ADIOI_Info_set(fd->info, "direct_write", "true");
fd->direct_write = 1;
}
}
+ /* set striping information with ioctl */
MPI_Comm_rank(fd->comm, &myrank);
if (myrank == 0) {
- tmp_val[0] = str_factor;
- tmp_val[1] = str_unit;
- tmp_val[2] = start_iodev;
+ stripe_val[0] = str_factor;
+ stripe_val[1] = str_unit;
+ stripe_val[2] = start_iodev;
}
- MPI_Bcast(tmp_val, 3, MPI_INT, 0, fd->comm);
+ MPI_Bcast(stripe_val, 3, MPI_INT, 0, fd->comm);
- if (tmp_val[0] != str_factor
- || tmp_val[1] != str_unit
- || tmp_val[2] != start_iodev) {
+ if (stripe_val[0] != str_factor
+ || stripe_val[1] != str_unit
+ || stripe_val[2] != start_iodev) {
FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: All keys"
"-striping_factor:striping_unit:start_iodevice "
"need to be identical across all processes\n");
MPI_Abort(MPI_COMM_WORLD, 1);
- } else if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0)) {
+ } else if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0)) {
/* if user has specified striping info, process 0 tries to set it */
if (!myrank) {
if (fd->perm == ADIO_PERM_NULL) {
@@ -100,9 +113,9 @@ void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
amode = amode | O_LOV_DELAY_CREATE | O_CREAT;
fd_sys = open(fd->filename, amode, perm);
- if (fd_sys == -1) {
- if (errno != EEXIST)
- fprintf(stderr,
+ if (fd_sys == -1) {
+ if (errno != EEXIST)
+ fprintf(stderr,
"Failure to open file %s %d %d\n",strerror(errno), amode, perm);
} else {
lum.lmm_magic = LOV_USER_MAGIC;
@@ -112,25 +125,73 @@ void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
lum.lmm_stripe_offset = start_iodev;
err = ioctl(fd_sys, LL_IOC_LOV_SETSTRIPE, &lum);
- if (err == -1 && errno != EEXIST) {
+ if (err == -1 && errno != EEXIST) {
fprintf(stderr, "Failure to set stripe info %s \n", strerror(errno));
}
close(fd_sys);
}
} /* End of striping parameters validation */
}
-
MPI_Barrier(fd->comm);
- /* set the values for collective I/O and data sieving parameters */
- ADIOI_GEN_SetInfo(fd, users_info, error_code);
- } else {
- /* The file has been opened previously and fd->fd_sys is a valid
- file descriptor. cannot set striping parameters now. */
-
- /* set the values for collective I/O and data sieving parameters */
- ADIOI_GEN_SetInfo(fd, users_info, error_code);
}
-
+ /* get other hint */
+ if (users_info != MPI_INFO_NULL) {
+ /* CO: IO Clients/OST,
+ * to keep the load balancing between clients and OSTs */
+ ADIOI_Info_get(users_info, "romio_lustre_co_ratio", MPI_MAX_INFO_VAL, value,
+ &flag);
+ if (flag && (int_val = atoi(value)) > 0) {
+ tmp_val = int_val;
+ MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
+ if (tmp_val != int_val) {
+ MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+ "romio_lustre_co_ratio",
+ error_code);
+ ADIOI_Free(value);
+ return;
+ }
+ ADIOI_Info_set(fd->info, "romio_lustre_co_ratio", value);
+ fd->hints->fs_hints.lustre.co_ratio = atoi(value);
+ }
+ /* coll_threshold:
+ * if the req size is bigger than this, collective IO may not be performed.
+ */
+ ADIOI_Info_get(users_info, "romio_lustre_coll_threshold", MPI_MAX_INFO_VAL, value,
+ &flag);
+ if (flag && (int_val = atoi(value)) > 0) {
+ tmp_val = int_val;
+ MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
+ if (tmp_val != int_val) {
+ MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+ "romio_lustre_coll_threshold",
+ error_code);
+ ADIOI_Free(value);
+ return;
+ }
+ ADIOI_Info_set(fd->info, "romio_lustre_coll_threshold", value);
+ fd->hints->fs_hints.lustre.coll_threshold = atoi(value);
+ }
+ /* ds_in_coll: disable data sieving in collective IO */
+ ADIOI_Info_get(users_info, "romio_lustre_ds_in_coll", MPI_MAX_INFO_VAL,
+ value, &flag);
+ if (flag && (!strcmp(value, "disable") ||
+ !strcmp(value, "DISABLE"))) {
+ tmp_val = int_val = 2;
+ MPI_Bcast(&tmp_val, 2, MPI_INT, 0, fd->comm);
+ if (tmp_val != int_val) {
+ MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+ "romio_lustre_ds_in_coll",
+ error_code);
+ ADIOI_Free(value);
+ return;
+ }
+ ADIOI_Info_set(fd->info, "romio_lustre_ds_in_coll", "disable");
+ fd->hints->fs_hints.lustre.ds_in_coll = ADIOI_HINT_DISABLE;
+ }
+ }
+ /* set the values for collective I/O and data sieving parameters */
+ ADIOI_GEN_SetInfo(fd, users_info, error_code);
+
if (ADIOI_Direct_read) fd->direct_read = 1;
if (ADIOI_Direct_write) fd->direct_write = 1;
diff --git a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_open.c b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_open.c
index 17622f6562..1ee9870fcf 100644
--- a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_open.c
+++ b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_open.c
@@ -1,17 +1,25 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- * Copyright (C) 1997 University of Chicago.
+/*
+ * Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*
* Copyright (C) 2007 Oak Ridge National Laboratory
+ *
+ * Copyright (C) 2008 Sun Microsystems, Lustre group
*/
#include "ad_lustre.h"
+/* what is the basis for this define?
+ * what happens if there are more than 1k UUIDs? */
+
+#define MAX_LOV_UUID_COUNT 1000
+
void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
{
int perm, old_mask, amode, amode_direct;
- struct lov_user_md lum = { 0 };
+ int lumlen;
+ struct lov_user_md *lum = NULL;
char *value;
#if defined(MPICH2) || !defined(PRINT_ERR_MSG)
@@ -44,23 +52,37 @@ void ADIOI_LUSTRE_Open(ADIO_File fd, int *error_code)
if (fd->fd_sys != -1) {
int err;
- value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
-
/* get file striping information and set it in info */
- lum.lmm_magic = LOV_USER_MAGIC;
- err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *) &lum);
-
+ /* odd malloc here because lov_user_md contains some fixed data and
+ * then a list of 'lmm_objects' representing stripe */
+ lumlen = sizeof(struct lov_user_md) +
+ MAX_LOV_UUID_COUNT * sizeof(struct lov_user_ost_data);
+ /* furthermore, Pascal Deveze reports that, even though we pass a
+ * "GETSTRIPE" (read) flag to the ioctl, if some of the values of this
+ * struct are uninitialzed, the call can give an error. calloc in case
+ * there are other members that must be initialized and in case
+ * lov_user_md struct changes in future */
+ lum = (struct lov_user_md *)ADIOI_Calloc(1,lumlen);
+ lum->lmm_magic = LOV_USER_MAGIC;
+ err = ioctl(fd->fd_sys, LL_IOC_LOV_GETSTRIPE, (void *)lum);
if (!err) {
- sprintf(value, "%d", lum.lmm_stripe_size);
- MPI_Info_set(fd->info, "striping_unit", value);
+ value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
- sprintf(value, "%d", lum.lmm_stripe_count);
- MPI_Info_set(fd->info, "striping_factor", value);
+ fd->hints->striping_unit = lum->lmm_stripe_size;
+ sprintf(value, "%d", lum->lmm_stripe_size);
+ ADIOI_Info_set(fd->info, "striping_unit", value);
- sprintf(value, "%d", lum.lmm_stripe_offset);
- MPI_Info_set(fd->info, "start_iodevice", value);
+ fd->hints->striping_factor = lum->lmm_stripe_count;
+ sprintf(value, "%d", lum->lmm_stripe_count);
+ ADIOI_Info_set(fd->info, "striping_factor", value);
+
+ fd->hints->fs_hints.lustre.start_iodevice = lum->lmm_stripe_offset;
+ sprintf(value, "%d", lum->lmm_stripe_offset);
+ ADIOI_Info_set(fd->info, "romio_lustre_start_iodevice", value);
+
+ ADIOI_Free(value);
}
- ADIOI_Free(value);
+ ADIOI_Free(lum);
if (fd->access_mode & ADIO_APPEND)
fd->fp_ind = fd->fp_sys_posn = lseek(fd->fd_sys, 0, SEEK_END);
diff --git a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_rwcontig.c b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_rwcontig.c
index 51f9357f65..dd8f5ec8a6 100644
--- a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_rwcontig.c
+++ b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_rwcontig.c
@@ -1,9 +1,11 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- * Copyright (C) 1997 University of Chicago.
+/*
+ * Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*
* Copyright (C) 2007 Oak Ridge National Laboratory
+ *
+ * Copyright (C) 2008 Sun Microsystems, Lustre group
*/
#define _XOPEN_SOURCE 600
@@ -18,7 +20,7 @@ static void ADIOI_LUSTRE_Aligned_Mem_File_Write(ADIO_File fd, void *buf, int len
static void ADIOI_LUSTRE_Aligned_Mem_File_Write(ADIO_File fd, void *buf, int len,
ADIO_Offset offset, int *err)
{
- int ntimes, rem, newrem, i, size, nbytes;
+ int rem, size, nbytes;
if (!(len % fd->d_miniosz) && (len >= fd->d_miniosz)) {
*err = pwrite(fd->fd_direct, buf, len, offset);
} else if (len < fd->d_miniosz) {
@@ -37,7 +39,7 @@ static void ADIOI_LUSTRE_Aligned_Mem_File_Read(ADIO_File fd, void *buf, int len,
static void ADIOI_LUSTRE_Aligned_Mem_File_Read(ADIO_File fd, void *buf, int len,
ADIO_Offset offset, int *err)
{
- int ntimes, rem, newrem, i, size, nbytes;
+ int rem, size, nbytes;
if (!(len % fd->d_miniosz) && (len >= fd->d_miniosz))
*err = pread(fd->fd_direct, buf, len, offset);
else if (len < fd->d_miniosz)
@@ -59,7 +61,6 @@ static int ADIOI_LUSTRE_Directio(ADIO_File fd, void *buf, int len,
{
int err=-1, diff, size=len, nbytes = 0;
void *newbuf;
- static char myname[] = "ADIOI_LUSTRE_Directio";
if (offset % fd->d_miniosz) {
diff = fd->d_miniosz - (offset % fd->d_miniosz);
@@ -87,7 +88,7 @@ static int ADIOI_LUSTRE_Directio(ADIO_File fd, void *buf, int len,
memcpy(newbuf, buf, size);
ADIOI_LUSTRE_Aligned_Mem_File_Write(fd, newbuf, size, offset, &err);
nbytes += err;
- free(newbuf);
+ ADIOI_Free(newbuf);
}
else nbytes += pwrite(fd->fd_sys, buf, size, offset);
}
@@ -102,7 +103,7 @@ static int ADIOI_LUSTRE_Directio(ADIO_File fd, void *buf, int len,
ADIOI_LUSTRE_Aligned_Mem_File_Read(fd, newbuf, size, offset, &err);
if (err > 0) memcpy(buf, newbuf, err);
nbytes += err;
- free(newbuf);
+ ADIOI_Free(newbuf);
}
else nbytes += pread(fd->fd_sys, buf, size, offset);
}
@@ -136,10 +137,23 @@ static void ADIOI_LUSTRE_IOContig(ADIO_File fd, void *buf, int count,
if (err == -1) goto ioerr;
}
- if (io_mode)
+ if (io_mode) {
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event(ADIOI_MPE_write_a, 0, NULL);
+#endif
err = write(fd->fd_sys, buf, len);
- else
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event(ADIOI_MPE_write_b, 0, NULL);
+#endif
+ } else {
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event(ADIOI_MPE_read_a, 0, NULL);
+#endif
err = read(fd->fd_sys, buf, len);
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event(ADIOI_MPE_read_b, 0, NULL);
+#endif
+ }
} else {
err = ADIOI_LUSTRE_Directio(fd, buf, len, offset, io_mode);
}
diff --git a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_wrcoll.c b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_wrcoll.c
new file mode 100644
index 0000000000..231465b481
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_wrcoll.c
@@ -0,0 +1,954 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ * Copyright (C) 1997 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ *
+ * Copyright (C) 2007 Oak Ridge National Laboratory
+ *
+ * Copyright (C) 2008 Sun Microsystems, Lustre group
+ */
+
+#include "ad_lustre.h"
+#include "adio_extern.h"
+
+/* prototypes of functions used for collective writes only. */
+static void ADIOI_LUSTRE_Exch_and_write(ADIO_File fd, void *buf,
+ MPI_Datatype datatype, int nprocs,
+ int myrank,
+ ADIOI_Access *others_req,
+ ADIOI_Access *my_req,
+ ADIO_Offset *offset_list,
+ ADIO_Offset *len_list,
+ int contig_access_count,
+ int *striping_info,
+ int **buf_idx, int *error_code);
+static void ADIOI_LUSTRE_Fill_send_buffer(ADIO_File fd, void *buf,
+ ADIOI_Flatlist_node *flat_buf,
+ char **send_buf,
+ ADIO_Offset *offset_list,
+ ADIO_Offset *len_list, int *send_size,
+ MPI_Request *requests,
+ int *sent_to_proc, int nprocs,
+ int myrank, int contig_access_count,
+ int *striping_info,
+ int *send_buf_idx,
+ int *curr_to_proc,
+ int *done_to_proc, int iter,
+ MPI_Aint buftype_extent);
+static void ADIOI_LUSTRE_W_Exchange_data(ADIO_File fd, void *buf,
+ char *write_buf,
+ ADIOI_Flatlist_node *flat_buf,
+ ADIO_Offset *offset_list,
+ ADIO_Offset *len_list, int *send_size,
+ int *recv_size, ADIO_Offset off,
+ int size, int *count,
+ int *start_pos, int *partial_recv,
+ int *sent_to_proc, int nprocs,
+ int myrank, int buftype_is_contig,
+ int contig_access_count,
+ int *striping_info,
+ ADIOI_Access *others_req,
+ int *send_buf_idx,
+ int *curr_to_proc,
+ int *done_to_proc, int *hole,
+ int iter, MPI_Aint buftype_extent,
+ int *buf_idx, int *error_code);
+void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count,
+ ADIO_Offset *srt_off, int *srt_len, int *start_pos,
+ int nprocs, int nprocs_recv, int total_elements);
+
+void ADIOI_LUSTRE_WriteStridedColl(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype,
+ int file_ptr_type, ADIO_Offset offset,
+ ADIO_Status *status, int *error_code)
+{
+ /* Uses a generalized version of the extended two-phase method described
+ * in "An Extended Two-Phase Method for Accessing Sections of
+ * Out-of-Core Arrays", Rajeev Thakur and Alok Choudhary,
+ * Scientific Programming, (5)4:301--317, Winter 1996.
+ * http://www.mcs.anl.gov/home/thakur/ext2ph.ps
+ */
+
+ ADIOI_Access *my_req;
+ /* array of nprocs access structures, one for each other process has
+ this process's request */
+
+ ADIOI_Access *others_req;
+ /* array of nprocs access structures, one for each other process
+ whose request is written by this process. */
+
+ int i, filetype_is_contig, nprocs, myrank, do_collect = 0;
+ int contig_access_count = 0, buftype_is_contig, interleave_count = 0;
+ int *count_my_req_per_proc, count_my_req_procs, count_others_req_procs;
+ ADIO_Offset orig_fp, start_offset, end_offset, off;
+ ADIO_Offset *offset_list = NULL, *st_offsets = NULL, *end_offsets = NULL;
+ ADIO_Offset *len_list = NULL;
+ int **buf_idx = NULL, *striping_info = NULL;
+ int old_error, tmp_error;
+
+ MPI_Comm_size(fd->comm, &nprocs);
+ MPI_Comm_rank(fd->comm, &myrank);
+
+ orig_fp = fd->fp_ind;
+
+ /* IO patten identification if cb_write isn't disabled */
+ if (fd->hints->cb_write != ADIOI_HINT_DISABLE) {
+ /* For this process's request, calculate the list of offsets and
+ lengths in the file and determine the start and end offsets. */
+
+ /* Note: end_offset points to the last byte-offset that will be accessed.
+ * e.g., if start_offset=0 and 100 bytes to be read, end_offset=99
+ */
+
+ ADIOI_Calc_my_off_len(fd, count, datatype, file_ptr_type, offset,
+ &offset_list, &len_list, &start_offset,
+ &end_offset, &contig_access_count);
+
+ /* each process communicates its start and end offsets to other
+ * processes. The result is an array each of start and end offsets
+ * stored in order of process rank.
+ */
+ st_offsets = (ADIO_Offset *) ADIOI_Malloc(nprocs * sizeof(ADIO_Offset));
+ end_offsets = (ADIO_Offset *) ADIOI_Malloc(nprocs * sizeof(ADIO_Offset));
+ MPI_Allgather(&start_offset, 1, ADIO_OFFSET, st_offsets, 1,
+ ADIO_OFFSET, fd->comm);
+ MPI_Allgather(&end_offset, 1, ADIO_OFFSET, end_offsets, 1,
+ ADIO_OFFSET, fd->comm);
+ /* are the accesses of different processes interleaved? */
+ for (i = 1; i < nprocs; i++)
+ if ((st_offsets[i] < end_offsets[i-1]) &&
+ (st_offsets[i] <= end_offsets[i]))
+ interleave_count++;
+ /* This is a rudimentary check for interleaving, but should suffice
+ for the moment. */
+
+ /* Two typical access patterns can benefit from collective write.
+ * 1) the processes are interleaved, and
+ * 2) the req size is small.
+ */
+ if (interleave_count > 0) {
+ do_collect = 1;
+ } else {
+ do_collect = ADIOI_LUSTRE_Docollect(fd, contig_access_count,
+ len_list, nprocs);
+ }
+ }
+ ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
+
+ /* Decide if collective I/O should be done */
+ if ((!do_collect && fd->hints->cb_write == ADIOI_HINT_AUTO) ||
+ fd->hints->cb_write == ADIOI_HINT_DISABLE) {
+
+ /* use independent accesses */
+ if (fd->hints->cb_write != ADIOI_HINT_DISABLE) {
+ ADIOI_Free(offset_list);
+ ADIOI_Free(len_list);
+ ADIOI_Free(st_offsets);
+ ADIOI_Free(end_offsets);
+ }
+
+ fd->fp_ind = orig_fp;
+ ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
+ if (buftype_is_contig && filetype_is_contig) {
+ if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
+ off = fd->disp + (ADIO_Offset)(fd->etype_size) * offset;
+ ADIO_WriteContig(fd, buf, count, datatype,
+ ADIO_EXPLICIT_OFFSET,
+ off, status, error_code);
+ } else
+ ADIO_WriteContig(fd, buf, count, datatype, ADIO_INDIVIDUAL,
+ 0, status, error_code);
+ } else {
+ ADIO_WriteStrided(fd, buf, count, datatype, file_ptr_type,
+ offset, status, error_code);
+ }
+ return;
+ }
+
+ /* Get Lustre hints information */
+ ADIOI_LUSTRE_Get_striping_info(fd, &striping_info, 1);
+
+ /* calculate what portions of the access requests of this process are
+ * located in which process
+ */
+ ADIOI_LUSTRE_Calc_my_req(fd, offset_list, len_list, contig_access_count,
+ striping_info, nprocs, &count_my_req_procs,
+ &count_my_req_per_proc, &my_req,
+ &buf_idx);
+
+ /* based on everyone's my_req, calculate what requests of other processes
+ * will be accessed by this process.
+ * count_others_req_procs = number of processes whose requests (including
+ * this process itself) will be accessed by this process
+ * count_others_req_per_proc[i] indicates how many separate contiguous
+ * requests of proc. i will be accessed by this process.
+ */
+
+ ADIOI_Calc_others_req(fd, count_my_req_procs, count_my_req_per_proc,
+ my_req, nprocs, myrank, &count_others_req_procs,
+ &others_req);
+ ADIOI_Free(count_my_req_per_proc);
+
+ /* exchange data and write in sizes of no more than stripe_size. */
+ ADIOI_LUSTRE_Exch_and_write(fd, buf, datatype, nprocs, myrank,
+ others_req, my_req, offset_list, len_list,
+ contig_access_count, striping_info,
+ buf_idx, error_code);
+
+ /* If this collective write is followed by an independent write,
+ * it's possible to have those subsequent writes on other processes
+ * race ahead and sneak in before the read-modify-write completes.
+ * We carry out a collective communication at the end here so no one
+ * can start independent i/o before collective I/O completes.
+ *
+ * need to do some gymnastics with the error codes so that if something
+ * went wrong, all processes report error, but if a process has a more
+ * specific error code, we can still have that process report the
+ * additional information */
+
+ old_error = *error_code;
+ if (*error_code != MPI_SUCCESS)
+ *error_code = MPI_ERR_IO;
+
+ /* optimization: if only one process performing i/o, we can perform
+ * a less-expensive Bcast */
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event(ADIOI_MPE_postwrite_a, 0, NULL);
+#endif
+ if (fd->hints->cb_nodes == 1)
+ MPI_Bcast(error_code, 1, MPI_INT,
+ fd->hints->ranklist[0], fd->comm);
+ else {
+ tmp_error = *error_code;
+ MPI_Allreduce(&tmp_error, error_code, 1, MPI_INT,
+ MPI_MAX, fd->comm);
+ }
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event(ADIOI_MPE_postwrite_b, 0, NULL);
+#endif
+
+ if ((old_error != MPI_SUCCESS) && (old_error != MPI_ERR_IO))
+ *error_code = old_error;
+
+
+ if (!buftype_is_contig)
+ ADIOI_Delete_flattened(datatype);
+
+ /* free all memory allocated for collective I/O */
+ /* free others_req */
+ for (i = 0; i < nprocs; i++) {
+ if (others_req[i].count) {
+ ADIOI_Free(others_req[i].offsets);
+ ADIOI_Free(others_req[i].lens);
+ ADIOI_Free(others_req[i].mem_ptrs);
+ }
+ }
+ ADIOI_Free(others_req);
+ /* free my_req here */
+ for (i = 0; i < nprocs; i++) {
+ if (my_req[i].count) {
+ ADIOI_Free(my_req[i].offsets);
+ ADIOI_Free(my_req[i].lens);
+ }
+ }
+ ADIOI_Free(my_req);
+ for (i = 0; i < nprocs; i++) {
+ ADIOI_Free(buf_idx[i]);
+ }
+ ADIOI_Free(buf_idx);
+ ADIOI_Free(offset_list);
+ ADIOI_Free(len_list);
+ ADIOI_Free(st_offsets);
+ ADIOI_Free(end_offsets);
+ ADIOI_Free(striping_info);
+
+#ifdef HAVE_STATUS_SET_BYTES
+ if (status) {
+ int bufsize, size;
+ /* Don't set status if it isn't needed */
+ MPI_Type_size(datatype, &size);
+ bufsize = size * count;
+ MPIR_Status_set_bytes(status, datatype, bufsize);
+ }
+ /* This is a temporary way of filling in status. The right way is to
+ * keep track of how much data was actually written during collective I/O.
+ */
+#endif
+
+ fd->fp_sys_posn = -1; /* set it to null. */
+}
+
+/* If successful, error_code is set to MPI_SUCCESS. Otherwise an error
+ * code is created and returned in error_code.
+ */
+static void ADIOI_LUSTRE_Exch_and_write(ADIO_File fd, void *buf,
+ MPI_Datatype datatype, int nprocs,
+ int myrank, ADIOI_Access *others_req,
+ ADIOI_Access *my_req,
+ ADIO_Offset *offset_list,
+ ADIO_Offset *len_list,
+ int contig_access_count,
+ int *striping_info, int **buf_idx,
+ int *error_code)
+{
+ /* Send data to appropriate processes and write in sizes of no more
+ * than lustre stripe_size.
+ * The idea is to reduce the amount of extra memory required for
+ * collective I/O. If all data were written all at once, which is much
+ * easier, it would require temp space more than the size of user_buf,
+ * which is often unacceptable. For example, to write a distributed
+ * array to a file, where each local array is 8Mbytes, requiring
+ * at least another 8Mbytes of temp space is unacceptable.
+ */
+
+ int hole, i, j, m, flag, ntimes = 1 , max_ntimes, buftype_is_contig;
+ ADIO_Offset st_loc = -1, end_loc = -1, min_st_loc, max_end_loc;
+ ADIO_Offset off, req_off, send_off, iter_st_off, *off_list;
+ ADIO_Offset max_size, step_size = 0;
+ int real_size, req_len, send_len;
+ int *recv_curr_offlen_ptr, *recv_count, *recv_size;
+ int *send_curr_offlen_ptr, *send_size;
+ int *partial_recv, *sent_to_proc, *recv_start_pos;
+ int *send_buf_idx, *curr_to_proc, *done_to_proc;
+ int *this_buf_idx;
+ char *write_buf = NULL;
+ MPI_Status status;
+ ADIOI_Flatlist_node *flat_buf = NULL;
+ MPI_Aint buftype_extent;
+ int stripe_size = striping_info[0], avail_cb_nodes = striping_info[2];
+ int data_sieving = 0;
+
+ *error_code = MPI_SUCCESS; /* changed below if error */
+ /* only I/O errors are currently reported */
+
+ /* calculate the number of writes of stripe size to be done.
+ * That gives the no. of communication phases as well.
+ * Note:
+ * Because we redistribute data in stripe-contiguous pattern for Lustre,
+ * each process has the same no. of communication phases.
+ */
+
+ for (i = 0; i < nprocs; i++) {
+ if (others_req[i].count) {
+ st_loc = others_req[i].offsets[0];
+ end_loc = others_req[i].offsets[0];
+ break;
+ }
+ }
+ for (i = 0; i < nprocs; i++) {
+ for (j = 0; j < others_req[i].count; j++) {
+ st_loc = ADIOI_MIN(st_loc, others_req[i].offsets[j]);
+ end_loc = ADIOI_MAX(end_loc, (others_req[i].offsets[j] +
+ others_req[i].lens[j] - 1));
+ }
+ }
+ /* this process does no writing. */
+ if ((st_loc == -1) && (end_loc == -1))
+ ntimes = 0;
+ MPI_Allreduce(&end_loc, &max_end_loc, 1, MPI_LONG_LONG_INT, MPI_MAX, fd->comm);
+ /* avoid min_st_loc be -1 */
+ if (st_loc == -1)
+ st_loc = max_end_loc;
+ MPI_Allreduce(&st_loc, &min_st_loc, 1, MPI_LONG_LONG_INT, MPI_MIN, fd->comm);
+ /* align downward */
+ min_st_loc -= min_st_loc % (ADIO_Offset)stripe_size;
+
+ /* Each time, only avail_cb_nodes number of IO clients perform IO,
+ * so, step_size=avail_cb_nodes*stripe_size IO will be performed at most,
+ * and ntimes=whole_file_portion/step_size
+ */
+ step_size = (ADIO_Offset) avail_cb_nodes * stripe_size;
+ max_ntimes = (max_end_loc - min_st_loc + 1) / step_size
+ + (((max_end_loc - min_st_loc + 1) % step_size) ? 1 : 0);
+/* max_ntimes = (int)((max_end_loc - min_st_loc) / step_size + 1); */
+ if (ntimes)
+ write_buf = (char *) ADIOI_Malloc(stripe_size);
+
+ /* calculate the start offset for each iteration */
+ off_list = (ADIO_Offset *) ADIOI_Malloc(max_ntimes * sizeof(ADIO_Offset));
+ for (m = 0; m < max_ntimes; m ++)
+ off_list[m] = max_end_loc;
+ for (i = 0; i < nprocs; i++) {
+ for (j = 0; j < others_req[i].count; j ++) {
+ req_off = others_req[i].offsets[j];
+ m = (int)((req_off - min_st_loc) / step_size);
+ off_list[m] = ADIOI_MIN(off_list[m], req_off);
+ }
+ }
+
+ recv_curr_offlen_ptr = (int *) ADIOI_Calloc(nprocs, sizeof(int));
+ send_curr_offlen_ptr = (int *) ADIOI_Calloc(nprocs, sizeof(int));
+ /* their use is explained below. calloc initializes to 0. */
+
+ recv_count = (int *) ADIOI_Malloc(nprocs * sizeof(int));
+ /* to store count of how many off-len pairs per proc are satisfied
+ in an iteration. */
+
+ send_size = (int *) ADIOI_Malloc(nprocs * sizeof(int));
+ /* total size of data to be sent to each proc. in an iteration.
+ Of size nprocs so that I can use MPI_Alltoall later. */
+
+ recv_size = (int *) ADIOI_Malloc(nprocs * sizeof(int));
+ /* total size of data to be recd. from each proc. in an iteration. */
+
+ sent_to_proc = (int *) ADIOI_Calloc(nprocs, sizeof(int));
+ /* amount of data sent to each proc so far. Used in
+ ADIOI_Fill_send_buffer. initialized to 0 here. */
+
+ send_buf_idx = (int *) ADIOI_Malloc(nprocs * sizeof(int));
+ curr_to_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int));
+ done_to_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int));
+ /* Above three are used in ADIOI_Fill_send_buffer */
+
+ this_buf_idx = (int *) ADIOI_Malloc(nprocs * sizeof(int));
+
+ recv_start_pos = (int *) ADIOI_Malloc(nprocs * sizeof(int));
+ /* used to store the starting value of recv_curr_offlen_ptr[i] in
+ this iteration */
+
+ ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
+ if (!buftype_is_contig) {
+ ADIOI_Flatten_datatype(datatype);
+ flat_buf = ADIOI_Flatlist;
+ while (flat_buf->type != datatype)
+ flat_buf = flat_buf->next;
+ }
+ MPI_Type_extent(datatype, &buftype_extent);
+ /* I need to check if there are any outstanding nonblocking writes to
+ * the file, which could potentially interfere with the writes taking
+ * place in this collective write call. Since this is not likely to be
+ * common, let me do the simplest thing possible here: Each process
+ * completes all pending nonblocking operations before completing.
+ */
+ /*ADIOI_Complete_async(error_code);
+ if (*error_code != MPI_SUCCESS) return;
+ MPI_Barrier(fd->comm);
+ */
+
+ iter_st_off = min_st_loc;
+
+ /* Although we have recognized the data according to OST index,
+ * a read-modify-write will be done if there is a hole between the data.
+ * For example: if blocksize=60, xfersize=30 and stripe_size=100,
+ * then rank0 will collect data [0, 30] and [60, 90] then write. There
+ * is a hole in [30, 60], which will cause a read-modify-write in [0, 90].
+ *
+ * To reduce its impact on the performance, we can disable data sieving
+ * by hint "ds_in_coll".
+ */
+ /* check the hint for data sieving */
+ data_sieving = fd->hints->fs_hints.lustre.ds_in_coll;
+
+ for (m = 0; m < max_ntimes; m++) {
+ /* go through all others_req and my_req to check which will be received
+ * and sent in this iteration.
+ */
+
+ /* Note that MPI guarantees that displacements in filetypes are in
+ monotonically nondecreasing order and that, for writes, the
+ filetypes cannot specify overlapping regions in the file. This
+ simplifies implementation a bit compared to reads. */
+
+ /*
+ off = start offset in the file for the data to be written in
+ this iteration
+ iter_st_off = start offset of this iteration
+ real_size = size of data written (bytes) corresponding to off
+ max_size = possible maximum size of data written in this iteration
+ req_off = offset in the file for a particular contiguous request minus
+ what was satisfied in previous iteration
+ send_off = offset the request needed by other processes in this iteration
+ req_len = size corresponding to req_off
+ send_len = size corresponding to send_off
+ */
+
+ /* first calculate what should be communicated */
+ for (i = 0; i < nprocs; i++)
+ recv_count[i] = recv_size[i] = send_size[i] = 0;
+
+ off = off_list[m];
+ max_size = ADIOI_MIN(step_size, max_end_loc - iter_st_off + 1);
+ real_size = (int) ADIOI_MIN((off / stripe_size + 1) * stripe_size -
+ off,
+ end_loc - off + 1);
+
+ for (i = 0; i < nprocs; i++) {
+ if (my_req[i].count) {
+ this_buf_idx[i] = buf_idx[i][send_curr_offlen_ptr[i]];
+ for (j = send_curr_offlen_ptr[i]; j < my_req[i].count; j++) {
+ send_off = my_req[i].offsets[j];
+ send_len = my_req[i].lens[j];
+ if (send_off < iter_st_off + max_size) {
+ send_size[i] += send_len;
+ } else {
+ break;
+ }
+ }
+ send_curr_offlen_ptr[i] = j;
+ }
+ if (others_req[i].count) {
+ recv_start_pos[i] = recv_curr_offlen_ptr[i];
+ for (j = recv_curr_offlen_ptr[i]; j < others_req[i].count; j++) {
+ req_off = others_req[i].offsets[j];
+ req_len = others_req[i].lens[j];
+ if (req_off < iter_st_off + max_size) {
+ recv_count[i]++;
+ ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)write_buf)+req_off-off) == (ADIO_Offset)(MPIR_Upint)(write_buf+req_off-off));
+ MPI_Address(write_buf + req_off - off,
+ &(others_req[i].mem_ptrs[j]));
+ recv_size[i] += req_len;
+ } else {
+ break;
+ }
+ }
+ recv_curr_offlen_ptr[i] = j;
+ }
+ }
+ /* use variable "hole" to pass data_sieving flag into W_Exchange_data */
+ hole = data_sieving;
+ ADIOI_LUSTRE_W_Exchange_data(fd, buf, write_buf, flat_buf, offset_list,
+ len_list, send_size, recv_size, off, real_size,
+ recv_count, recv_start_pos, partial_recv,
+ sent_to_proc, nprocs, myrank,
+ buftype_is_contig, contig_access_count,
+ striping_info, others_req, send_buf_idx,
+ curr_to_proc, done_to_proc, &hole, m,
+ buftype_extent, this_buf_idx, error_code);
+ if (*error_code != MPI_SUCCESS)
+ goto over;
+
+ flag = 0;
+ for (i = 0; i < nprocs; i++)
+ if (recv_count[i]) {
+ flag = 1;
+ break;
+ }
+ if (flag) {
+ /* check whether to do data sieving */
+ if(data_sieving == ADIOI_HINT_ENABLE) {
+ ADIO_WriteContig(fd, write_buf, real_size, MPI_BYTE,
+ ADIO_EXPLICIT_OFFSET, off, &status,
+ error_code);
+ } else {
+ /* if there is no hole, write data in one time;
+ * otherwise, write data in several times */
+ if (!hole) {
+ ADIO_WriteContig(fd, write_buf, real_size, MPI_BYTE,
+ ADIO_EXPLICIT_OFFSET, off, &status,
+ error_code);
+ } else {
+ for (i = 0; i < nprocs; i++) {
+ if (others_req[i].count) {
+ for (j = 0; j < others_req[i].count; j++) {
+ if (others_req[i].offsets[j] < off + real_size &&
+ others_req[i].offsets[j] >= off) {
+ ADIO_WriteContig(fd,
+ write_buf + others_req[i].offsets[j] - off,
+ others_req[i].lens[j],
+ MPI_BYTE, ADIO_EXPLICIT_OFFSET,
+ others_req[i].offsets[j], &status,
+ error_code);
+ if (*error_code != MPI_SUCCESS)
+ goto over;
+ }
+ }
+ }
+ }
+ }
+ }
+ if (*error_code != MPI_SUCCESS)
+ goto over;
+ }
+ iter_st_off += max_size;
+ }
+over:
+ if (ntimes)
+ ADIOI_Free(write_buf);
+ ADIOI_Free(recv_curr_offlen_ptr);
+ ADIOI_Free(send_curr_offlen_ptr);
+ ADIOI_Free(recv_count);
+ ADIOI_Free(send_size);
+ ADIOI_Free(recv_size);
+ ADIOI_Free(sent_to_proc);
+ ADIOI_Free(recv_start_pos);
+ ADIOI_Free(send_buf_idx);
+ ADIOI_Free(curr_to_proc);
+ ADIOI_Free(done_to_proc);
+ ADIOI_Free(this_buf_idx);
+ ADIOI_Free(off_list);
+}
+
+/* Sets error_code to MPI_SUCCESS if successful, or creates an error code
+ * in the case of error.
+ */
+static void ADIOI_LUSTRE_W_Exchange_data(ADIO_File fd, void *buf,
+ char *write_buf,
+ ADIOI_Flatlist_node *flat_buf,
+ ADIO_Offset *offset_list,
+ ADIO_Offset *len_list, int *send_size,
+ int *recv_size, ADIO_Offset off,
+ int size, int *count,
+ int *start_pos, int *partial_recv,
+ int *sent_to_proc, int nprocs,
+ int myrank, int buftype_is_contig,
+ int contig_access_count,
+ int *striping_info,
+ ADIOI_Access *others_req,
+ int *send_buf_idx,
+ int *curr_to_proc, int *done_to_proc,
+ int *hole, int iter,
+ MPI_Aint buftype_extent,
+ int *buf_idx, int *error_code)
+{
+ int i, j, nprocs_recv, nprocs_send, err;
+ char **send_buf = NULL;
+ MPI_Request *requests, *send_req;
+ MPI_Datatype *recv_types;
+ MPI_Status *statuses, status;
+ int *srt_len, sum, sum_recv;
+ ADIO_Offset *srt_off;
+ int data_sieving = *hole;
+ static char myname[] = "ADIOI_W_EXCHANGE_DATA";
+
+ /* create derived datatypes for recv */
+ nprocs_recv = 0;
+ for (i = 0; i < nprocs; i++)
+ if (recv_size[i])
+ nprocs_recv++;
+
+ recv_types = (MPI_Datatype *) ADIOI_Malloc((nprocs_recv + 1) *
+ sizeof(MPI_Datatype));
+ /* +1 to avoid a 0-size malloc */
+
+ j = 0;
+ for (i = 0; i < nprocs; i++) {
+ if (recv_size[i]) {
+ MPI_Type_hindexed(count[i],
+ &(others_req[i].lens[start_pos[i]]),
+ &(others_req[i].mem_ptrs[start_pos[i]]),
+ MPI_BYTE, recv_types + j);
+ /* absolute displacements; use MPI_BOTTOM in recv */
+ MPI_Type_commit(recv_types + j);
+ j++;
+ }
+ }
+
+ /* To avoid a read-modify-write,
+ * check if there are holes in the data to be written.
+ * For this, merge the (sorted) offset lists others_req using a heap-merge.
+ */
+
+ sum = 0;
+ for (i = 0; i < nprocs; i++)
+ sum += count[i];
+ srt_off = (ADIO_Offset *) ADIOI_Malloc((sum + 1) * sizeof(ADIO_Offset));
+ srt_len = (int *) ADIOI_Malloc((sum + 1) * sizeof(int));
+ /* +1 to avoid a 0-size malloc */
+
+ ADIOI_Heap_merge(others_req, count, srt_off, srt_len, start_pos,
+ nprocs, nprocs_recv, sum);
+
+ /* check if there are any holes */
+ *hole = 0;
+ for (i = 0; i < sum - 1; i++) {
+ if (srt_off[i] + srt_len[i] < srt_off[i + 1]) {
+ *hole = 1;
+ break;
+ }
+ }
+ /* In some cases (see John Bent ROMIO REQ # 835), an odd interaction
+ * between aggregation, nominally contiguous regions, and cb_buffer_size
+ * should be handled with a read-modify-write (otherwise we will write out
+ * more data than we receive from everyone else (inclusive), so override
+ * hole detection
+ */
+ if (*hole == 0) {
+ sum_recv = 0;
+ for (i = 0; i < nprocs; i++)
+ sum_recv += recv_size[i];
+ if (size > sum_recv)
+ *hole = 1;
+ }
+ /* check the hint for data sieving */
+ if (data_sieving == ADIOI_HINT_ENABLE && nprocs_recv && *hole) {
+ ADIO_ReadContig(fd, write_buf, size, MPI_BYTE,
+ ADIO_EXPLICIT_OFFSET, off, &status, &err);
+ // --BEGIN ERROR HANDLING--
+ if (err != MPI_SUCCESS) {
+ *error_code = MPIO_Err_create_code(err,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ MPI_ERR_IO,
+ "**ioRMWrdwr", 0);
+ ADIOI_Free(recv_types);
+ ADIOI_Free(srt_off);
+ ADIOI_Free(srt_len);
+ return;
+ }
+ // --END ERROR HANDLING--
+ }
+ ADIOI_Free(srt_off);
+ ADIOI_Free(srt_len);
+
+ nprocs_send = 0;
+ for (i = 0; i < nprocs; i++)
+ if (send_size[i])
+ nprocs_send++;
+
+ if (fd->atomicity) {
+ /* bug fix from Wei-keng Liao and Kenin Coloma */
+ requests = (MPI_Request *) ADIOI_Malloc((nprocs_send + 1) *
+ sizeof(MPI_Request));
+ send_req = requests;
+ } else {
+ requests = (MPI_Request *) ADIOI_Malloc((nprocs_send + nprocs_recv + 1)*
+ sizeof(MPI_Request));
+ /* +1 to avoid a 0-size malloc */
+
+ /* post receives */
+ j = 0;
+ for (i = 0; i < nprocs; i++) {
+ if (recv_size[i]) {
+ MPI_Irecv(MPI_BOTTOM, 1, recv_types[j], i,
+ myrank + i + 100 * iter, fd->comm, requests + j);
+ j++;
+ }
+ }
+ send_req = requests + nprocs_recv;
+ }
+
+ /* post sends.
+ * if buftype_is_contig, data can be directly sent from
+ * user buf at location given by buf_idx. else use send_buf.
+ */
+ if (buftype_is_contig) {
+ j = 0;
+ for (i = 0; i < nprocs; i++)
+ if (send_size[i]) {
+ ADIOI_Assert(buf_idx[i] != -1);
+ MPI_Isend(((char *) buf) + buf_idx[i], send_size[i],
+ MPI_BYTE, i, myrank + i + 100 * iter, fd->comm,
+ send_req + j);
+ j++;
+ }
+ } else
+ if (nprocs_send) {
+ /* buftype is not contig */
+ send_buf = (char **) ADIOI_Malloc(nprocs * sizeof(char *));
+ for (i = 0; i < nprocs; i++)
+ if (send_size[i])
+ send_buf[i] = (char *) ADIOI_Malloc(send_size[i]);
+
+ ADIOI_LUSTRE_Fill_send_buffer(fd, buf, flat_buf, send_buf, offset_list,
+ len_list, send_size, send_req,
+ sent_to_proc, nprocs, myrank,
+ contig_access_count, striping_info,
+ send_buf_idx, curr_to_proc, done_to_proc,
+ iter, buftype_extent);
+ /* the send is done in ADIOI_Fill_send_buffer */
+ }
+
+ /* bug fix from Wei-keng Liao and Kenin Coloma */
+ if (fd->atomicity) {
+ j = 0;
+ for (i = 0; i < nprocs; i++) {
+ MPI_Status wkl_status;
+ if (recv_size[i]) {
+ MPI_Recv(MPI_BOTTOM, 1, recv_types[j], i,
+ myrank + i + 100 * iter, fd->comm, &wkl_status);
+ j++;
+ }
+ }
+ }
+
+ for (i = 0; i < nprocs_recv; i++)
+ MPI_Type_free(recv_types + i);
+ ADIOI_Free(recv_types);
+
+ /* bug fix from Wei-keng Liao and Kenin Coloma */
+ /* +1 to avoid a 0-size malloc */
+ if (fd->atomicity) {
+ statuses = (MPI_Status *) ADIOI_Malloc((nprocs_send + 1) *
+ sizeof(MPI_Status));
+ } else {
+ statuses = (MPI_Status *) ADIOI_Malloc((nprocs_send + nprocs_recv + 1) *
+ sizeof(MPI_Status));
+ }
+
+#ifdef NEEDS_MPI_TEST
+ i = 0;
+ if (fd->atomicity) {
+ /* bug fix from Wei-keng Liao and Kenin Coloma */
+ while (!i)
+ MPI_Testall(nprocs_send, send_req, &i, statuses);
+ } else {
+ while (!i)
+ MPI_Testall(nprocs_send + nprocs_recv, requests, &i, statuses);
+ }
+#else
+ /* bug fix from Wei-keng Liao and Kenin Coloma */
+ if (fd->atomicity)
+ MPI_Waitall(nprocs_send, send_req, statuses);
+ else
+ MPI_Waitall(nprocs_send + nprocs_recv, requests, statuses);
+#endif
+ ADIOI_Free(statuses);
+ ADIOI_Free(requests);
+ if (!buftype_is_contig && nprocs_send) {
+ for (i = 0; i < nprocs; i++)
+ if (send_size[i])
+ ADIOI_Free(send_buf[i]);
+ ADIOI_Free(send_buf);
+ }
+}
+
+#define ADIOI_BUF_INCR \
+{ \
+ while (buf_incr) { \
+ size_in_buf = ADIOI_MIN(buf_incr, flat_buf_sz); \
+ user_buf_idx += size_in_buf; \
+ flat_buf_sz -= size_in_buf; \
+ if (!flat_buf_sz) { \
+ if (flat_buf_idx < (flat_buf->count - 1)) flat_buf_idx++; \
+ else { \
+ flat_buf_idx = 0; \
+ n_buftypes++; \
+ } \
+ user_buf_idx = flat_buf->indices[flat_buf_idx] + \
+ (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \
+ flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \
+ } \
+ buf_incr -= size_in_buf; \
+ } \
+}
+
+
+#define ADIOI_BUF_COPY \
+{ \
+ while (size) { \
+ size_in_buf = ADIOI_MIN(size, flat_buf_sz); \
+ ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + user_buf_idx) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + user_buf_idx)); \
+ ADIOI_Assert(size_in_buf == (size_t)size_in_buf); \
+ memcpy(&(send_buf[p][send_buf_idx[p]]), \
+ ((char *) buf) + user_buf_idx, size_in_buf); \
+ send_buf_idx[p] += size_in_buf; \
+ user_buf_idx += size_in_buf; \
+ flat_buf_sz -= size_in_buf; \
+ if (!flat_buf_sz) { \
+ if (flat_buf_idx < (flat_buf->count - 1)) flat_buf_idx++; \
+ else { \
+ flat_buf_idx = 0; \
+ n_buftypes++; \
+ } \
+ user_buf_idx = flat_buf->indices[flat_buf_idx] + \
+ (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \
+ flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \
+ } \
+ size -= size_in_buf; \
+ buf_incr -= size_in_buf; \
+ } \
+ ADIOI_BUF_INCR \
+}
+
+static void ADIOI_LUSTRE_Fill_send_buffer(ADIO_File fd, void *buf,
+ ADIOI_Flatlist_node *flat_buf,
+ char **send_buf,
+ ADIO_Offset *offset_list,
+ ADIO_Offset *len_list, int *send_size,
+ MPI_Request *requests,
+ int *sent_to_proc, int nprocs,
+ int myrank,
+ int contig_access_count,
+ int *striping_info,
+ int *send_buf_idx,
+ int *curr_to_proc,
+ int *done_to_proc, int iter,
+ MPI_Aint buftype_extent)
+{
+ /* this function is only called if buftype is not contig */
+ int i, p, flat_buf_idx, size;
+ int flat_buf_sz, buf_incr, size_in_buf, jj, n_buftypes;
+ ADIO_Offset off, len, rem_len, user_buf_idx;
+
+ /* curr_to_proc[p] = amount of data sent to proc. p that has already
+ * been accounted for so far
+ * done_to_proc[p] = amount of data already sent to proc. p in
+ * previous iterations
+ * user_buf_idx = current location in user buffer
+ * send_buf_idx[p] = current location in send_buf of proc. p
+ */
+
+ for (i = 0; i < nprocs; i++) {
+ send_buf_idx[i] = curr_to_proc[i] = 0;
+ done_to_proc[i] = sent_to_proc[i];
+ }
+ jj = 0;
+
+ user_buf_idx = flat_buf->indices[0];
+ flat_buf_idx = 0;
+ n_buftypes = 0;
+ flat_buf_sz = flat_buf->blocklens[0];
+
+ /* flat_buf_idx = current index into flattened buftype
+ * flat_buf_sz = size of current contiguous component in flattened buf
+ */
+ for (i = 0; i < contig_access_count; i++) {
+ off = offset_list[i];
+ rem_len = (ADIO_Offset) len_list[i];
+
+ /*this request may span to more than one process */
+ while (rem_len != 0) {
+ len = rem_len;
+ /* NOTE: len value is modified by ADIOI_Calc_aggregator() to be no
+ * longer than the single region that processor "p" is responsible
+ * for.
+ */
+ p = ADIOI_LUSTRE_Calc_aggregator(fd, off, &len, striping_info);
+
+ if (send_buf_idx[p] < send_size[p]) {
+ if (curr_to_proc[p] + len > done_to_proc[p]) {
+ if (done_to_proc[p] > curr_to_proc[p]) {
+ size = (int) ADIOI_MIN(curr_to_proc[p] + len -
+ done_to_proc[p],
+ send_size[p] -
+ send_buf_idx[p]);
+ buf_incr = done_to_proc[p] - curr_to_proc[p];
+ ADIOI_BUF_INCR
+ ADIOI_Assert((curr_to_proc[p] + len - done_to_proc[p]) == (unsigned)(curr_to_proc[p] + len - done_to_proc[p]));
+ buf_incr = (int) (curr_to_proc[p] + len -
+ done_to_proc[p]);
+ ADIOI_Assert((done_to_proc[p] + size) == (unsigned)(done_to_proc[p] + size));
+ curr_to_proc[p] = done_to_proc[p] + size;
+ ADIOI_BUF_COPY
+ } else {
+ size = (int) ADIOI_MIN(len, send_size[p] -
+ send_buf_idx[p]);
+ buf_incr = (int) len;
+ ADIOI_Assert((curr_to_proc[p] + size) == (unsigned)((ADIO_Offset)curr_to_proc[p] + size));
+ curr_to_proc[p] += size;
+ ADIOI_BUF_COPY
+ }
+ if (send_buf_idx[p] == send_size[p]) {
+ MPI_Isend(send_buf[p], send_size[p], MPI_BYTE, p,
+ myrank + p + 100 * iter, fd->comm,
+ requests + jj);
+ jj++;
+ }
+ } else {
+ ADIOI_Assert((curr_to_proc[p] + len) == (unsigned)((ADIO_Offset)curr_to_proc[p] + len));
+ curr_to_proc[p] += (int) len;
+ buf_incr = (int) len;
+ ADIOI_BUF_INCR
+ }
+ } else {
+ buf_incr = (int) len;
+ ADIOI_BUF_INCR
+ }
+ off += len;
+ rem_len -= len;
+ }
+ }
+ for (i = 0; i < nprocs; i++)
+ if (send_size[i])
+ sent_to_proc[i] = curr_to_proc[i];
+}
diff --git a/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_wrstr.c b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_wrstr.c
new file mode 100644
index 0000000000..2854a37c7e
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_lustre/ad_lustre_wrstr.c
@@ -0,0 +1,530 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ * Copyright (C) 1997 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ *
+ * Copyright (C) 2007 Oak Ridge National Laboratory
+ *
+ * Copyright (C) 2008 Sun Microsystems, Lustre group
+ */
+
+#include "ad_lustre.h"
+#include "adio_extern.h"
+
+#define ADIOI_BUFFERED_WRITE \
+{ \
+ if (req_off >= writebuf_off + writebuf_len) { \
+ if (writebuf_len) { \
+ ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \
+ ADIO_EXPLICIT_OFFSET, writebuf_off, \
+ &status1, error_code); \
+ if (!(fd->atomicity)) \
+ ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
+ if (*error_code != MPI_SUCCESS) { \
+ *error_code = MPIO_Err_create_code(*error_code, \
+ MPIR_ERR_RECOVERABLE, \
+ myname, \
+ __LINE__, MPI_ERR_IO, \
+ "**iowswc", 0); \
+ ADIOI_Free(writebuf); \
+ return; \
+ } \
+ } \
+ writebuf_off = req_off; \
+ /* stripe_size alignment */ \
+ writebuf_len = (unsigned) ADIOI_MIN(end_offset - writebuf_off + 1, \
+ (writebuf_off / stripe_size + 1) * \
+ stripe_size - writebuf_off); \
+ if (!(fd->atomicity)) \
+ ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
+ ADIO_ReadContig(fd, writebuf, writebuf_len, MPI_BYTE, \
+ ADIO_EXPLICIT_OFFSET, \
+ writebuf_off, &status1, error_code); \
+ if (*error_code != MPI_SUCCESS) { \
+ *error_code = MPIO_Err_create_code(*error_code, \
+ MPIR_ERR_RECOVERABLE, \
+ myname, \
+ __LINE__, MPI_ERR_IO, \
+ "**iowsrc", 0); \
+ ADIOI_Free(writebuf); \
+ return; \
+ } \
+ } \
+ write_sz = (unsigned) (ADIOI_MIN(req_len, \
+ writebuf_off + writebuf_len - req_off)); \
+ ADIOI_Assert((ADIO_Offset)write_sz == \
+ ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
+ memcpy(writebuf + req_off - writebuf_off, (char *)buf +userbuf_off, write_sz); \
+ while (write_sz != req_len) { \
+ ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \
+ ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); \
+ if (!(fd->atomicity)) \
+ ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
+ if (*error_code != MPI_SUCCESS) { \
+ *error_code = MPIO_Err_create_code(*error_code, \
+ MPIR_ERR_RECOVERABLE, myname, \
+ __LINE__, MPI_ERR_IO, \
+ "**iowswc", 0); \
+ ADIOI_Free(writebuf); \
+ return; \
+ } \
+ req_len -= write_sz; \
+ userbuf_off += write_sz; \
+ writebuf_off += writebuf_len; \
+ /* stripe_size alignment */ \
+ writebuf_len = (unsigned) ADIOI_MIN(end_offset - writebuf_off + 1, \
+ (writebuf_off / stripe_size + 1) * \
+ stripe_size - writebuf_off); \
+ if (!(fd->atomicity)) \
+ ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
+ ADIO_ReadContig(fd, writebuf, writebuf_len, MPI_BYTE, \
+ ADIO_EXPLICIT_OFFSET, \
+ writebuf_off, &status1, error_code); \
+ if (*error_code != MPI_SUCCESS) { \
+ *error_code = MPIO_Err_create_code(*error_code, \
+ MPIR_ERR_RECOVERABLE, myname, \
+ __LINE__, MPI_ERR_IO, \
+ "**iowsrc", 0); \
+ ADIOI_Free(writebuf); \
+ return; \
+ } \
+ write_sz = ADIOI_MIN(req_len, writebuf_len); \
+ memcpy(writebuf, (char *)buf + userbuf_off, write_sz); \
+ } \
+}
+
+
+/* this macro is used when filetype is contig and buftype is not contig.
+ it does not do a read-modify-write and does not lock*/
+#define ADIOI_BUFFERED_WRITE_WITHOUT_READ \
+{ \
+ if (req_off >= writebuf_off + writebuf_len) { \
+ ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \
+ ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, \
+ error_code); \
+ if (*error_code != MPI_SUCCESS) { \
+ *error_code = MPIO_Err_create_code(*error_code, \
+ MPIR_ERR_RECOVERABLE, \
+ myname, \
+ __LINE__, MPI_ERR_IO, \
+ "**iowswc", 0); \
+ ADIOI_Free(writebuf); \
+ return; \
+ } \
+ writebuf_off = req_off; \
+ /* stripe_size alignment */ \
+ writebuf_len = (unsigned) ADIOI_MIN(end_offset - writebuf_off + 1, \
+ (writebuf_off / stripe_size + 1) * \
+ stripe_size - writebuf_off); \
+ } \
+ write_sz = (unsigned) ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off); \
+ ADIOI_Assert((ADIO_Offset)write_sz == ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
+ memcpy(writebuf + req_off - writebuf_off, \
+ (char *)buf + userbuf_off, write_sz); \
+ while (write_sz != req_len) { \
+ ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \
+ ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); \
+ if (*error_code != MPI_SUCCESS) { \
+ *error_code = MPIO_Err_create_code(*error_code, \
+ MPIR_ERR_RECOVERABLE, myname, \
+ __LINE__, MPI_ERR_IO, \
+ "**iowswc", 0); \
+ ADIOI_Free(writebuf); \
+ return; \
+ } \
+ req_len -= write_sz; \
+ userbuf_off += write_sz; \
+ writebuf_off += writebuf_len; \
+ /* stripe_size alignment */ \
+ writebuf_len = (unsigned) ADIOI_MIN(end_offset - writebuf_off + 1, \
+ (writebuf_off / stripe_size + 1) * \
+ stripe_size - writebuf_off); \
+ write_sz = ADIOI_MIN(req_len, writebuf_len); \
+ memcpy(writebuf, (char *)buf + userbuf_off, write_sz); \
+ } \
+}
+
+void ADIOI_LUSTRE_WriteStrided(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status * status,
+ int *error_code)
+{
+ /* offset is in units of etype relative to the filetype. */
+ ADIOI_Flatlist_node *flat_buf, *flat_file;
+ ADIO_Offset i_offset, sum, size_in_filetype;
+ int i, j, k, st_index=0;
+ int n_etypes_in_filetype;
+ ADIO_Offset num, size, n_filetypes, etype_in_filetype, st_n_filetypes;
+ ADIO_Offset abs_off_in_filetype=0;
+ int filetype_size, etype_size, buftype_size;
+ MPI_Aint filetype_extent, buftype_extent;
+ int buf_count, buftype_is_contig, filetype_is_contig;
+ ADIO_Offset userbuf_off;
+ ADIO_Offset off, req_off, disp, end_offset=0, writebuf_off, start_off;
+ char *writebuf;
+ unsigned bufsize, writebuf_len, write_sz;
+ ADIO_Status status1;
+ ADIO_Offset new_bwr_size, new_fwr_size, st_fwr_size, fwr_size=0, bwr_size, req_len;
+ int stripe_size;
+ static char myname[] = "ADIOI_LUSTRE_WriteStrided";
+
+ if (fd->hints->ds_write == ADIOI_HINT_DISABLE) {
+ /* if user has disabled data sieving on writes, use naive
+ * approach instead.
+ */
+ ADIOI_GEN_WriteStrided_naive(fd,
+ buf,
+ count,
+ datatype,
+ file_ptr_type,
+ offset, status, error_code);
+ return;
+ }
+
+ *error_code = MPI_SUCCESS; /* changed below if error */
+
+ ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
+ ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
+
+ MPI_Type_size(fd->filetype, &filetype_size);
+ if (!filetype_size) {
+ *error_code = MPI_SUCCESS;
+ return;
+ }
+
+ MPI_Type_extent(fd->filetype, &filetype_extent);
+ MPI_Type_size(datatype, &buftype_size);
+ MPI_Type_extent(datatype, &buftype_extent);
+ etype_size = fd->etype_size;
+
+ ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count));
+ bufsize = buftype_size * count;
+
+ /* get striping info */
+ stripe_size = fd->hints->striping_unit;
+
+ /* Different buftype to different filetype */
+ if (!buftype_is_contig && filetype_is_contig) {
+ /* noncontiguous in memory, contiguous in file. */
+ ADIOI_Flatten_datatype(datatype);
+ flat_buf = ADIOI_Flatlist;
+ while (flat_buf->type != datatype)
+ flat_buf = flat_buf->next;
+
+ off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
+ fd->disp + (ADIO_Offset)etype_size * offset;
+
+ start_off = off;
+ end_offset = start_off + bufsize - 1;
+ /* write stripe size buffer each time */
+ writebuf = (char *) ADIOI_Malloc(ADIOI_MIN(bufsize, stripe_size));
+ writebuf_off = 0;
+ writebuf_len = 0;
+
+ /* if atomicity is true, lock the region to be accessed */
+ if (fd->atomicity)
+ ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, bufsize);
+
+ for (j = 0; j < count; j++) {
+ for (i = 0; i < flat_buf->count; i++) {
+ userbuf_off = (ADIO_Offset)j * (ADIO_Offset)buftype_extent +
+ flat_buf->indices[i];
+ req_off = off;
+ req_len = flat_buf->blocklens[i];
+ ADIOI_BUFFERED_WRITE_WITHOUT_READ
+ off += flat_buf->blocklens[i];
+ }
+ }
+
+ /* write the buffer out finally */
+ ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE,
+ ADIO_EXPLICIT_OFFSET, writebuf_off, &status1,
+ error_code);
+
+ if (fd->atomicity)
+ ADIOI_UNLOCK(fd, start_off, SEEK_SET, bufsize);
+ if (*error_code != MPI_SUCCESS) {
+ ADIOI_Free(writebuf);
+ return;
+ }
+ ADIOI_Free(writebuf);
+ if (file_ptr_type == ADIO_INDIVIDUAL)
+ fd->fp_ind = off;
+ } else {
+ /* noncontiguous in file */
+ /* filetype already flattened in ADIO_Open */
+ flat_file = ADIOI_Flatlist;
+ while (flat_file->type != fd->filetype)
+ flat_file = flat_file->next;
+ disp = fd->disp;
+
+ if (file_ptr_type == ADIO_INDIVIDUAL) {
+ /* Wei-keng reworked type processing to be a bit more efficient */
+ offset = fd->fp_ind - disp;
+ n_filetypes = (offset - flat_file->indices[0]) / filetype_extent;
+ offset -= (ADIO_Offset)n_filetypes * filetype_extent;
+ /* now offset is local to this extent */
+
+ /* find the block where offset is located, skip blocklens[i]==0 */
+ for (i=0; icount; i++) {
+ ADIO_Offset dist;
+ if (flat_file->blocklens[i] == 0) continue;
+ dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
+ /* fwr_size is from offset to the end of block i */
+ if (dist == 0) {
+ i++;
+ offset = flat_file->indices[i];
+ fwr_size = flat_file->blocklens[i];
+ break;
+ }
+ if (dist > 0) {
+ fwr_size = dist;
+ break;
+ }
+ }
+ st_index = i; /* starting index in flat_file->indices[] */
+ offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
+ }
+ else {
+ n_etypes_in_filetype = filetype_size/etype_size;
+ n_filetypes = offset / n_etypes_in_filetype;
+ etype_in_filetype = offset % n_etypes_in_filetype;
+ size_in_filetype = etype_in_filetype * etype_size;
+
+ sum = 0;
+ for (i = 0; i < flat_file->count; i++) {
+ sum += flat_file->blocklens[i];
+ if (sum > size_in_filetype) {
+ st_index = i;
+ fwr_size = sum - size_in_filetype;
+ abs_off_in_filetype = flat_file->indices[i] +
+ size_in_filetype - (sum - flat_file->blocklens[i]);
+ break;
+ }
+ }
+
+ /* abs. offset in bytes in the file */
+ offset = disp + (ADIO_Offset) n_filetypes *filetype_extent +
+ abs_off_in_filetype;
+ }
+
+ start_off = offset;
+
+ /* Wei-keng Liao:write request is within single flat_file
+ * contig block*/
+ /* this could happen, for example, with subarray types that are
+ * actually fairly contiguous */
+ if (buftype_is_contig && bufsize <= fwr_size) {
+ req_off = start_off;
+ req_len = bufsize;
+ end_offset = start_off + bufsize - 1;
+ writebuf = (char *) ADIOI_Malloc(ADIOI_MIN(bufsize, stripe_size));
+ memset(writebuf, -1, ADIOI_MIN(bufsize, stripe_size));
+ writebuf_off = 0;
+ writebuf_len = 0;
+ userbuf_off = 0;
+ ADIOI_BUFFERED_WRITE_WITHOUT_READ
+ /* write the buffer out finally */
+ ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE,
+ ADIO_EXPLICIT_OFFSET, writebuf_off, &status1,
+ error_code);
+
+ if (file_ptr_type == ADIO_INDIVIDUAL) {
+ /* update MPI-IO file pointer to point to the first byte
+ * that can be accessed in the fileview. */
+ fd->fp_ind = offset + bufsize;
+ if (bufsize == fwr_size) {
+ do {
+ st_index++;
+ if (st_index == flat_file->count) {
+ st_index = 0;
+ n_filetypes++;
+ }
+ } while (flat_file->blocklens[st_index] == 0);
+ fd->fp_ind = disp + flat_file->indices[st_index]
+ + (ADIO_Offset)n_filetypes*filetype_extent;
+ }
+ }
+ fd->fp_sys_posn = -1; /* set it to null. */
+#ifdef HAVE_STATUS_SET_BYTES
+ MPIR_Status_set_bytes(status, datatype, bufsize);
+#endif
+ ADIOI_Free(writebuf);
+ return;
+ }
+
+ /* Calculate end_offset, the last byte-offset that will be accessed.
+ e.g., if start_offset=0 and 100 bytes to be write, end_offset=99*/
+
+ st_fwr_size = fwr_size;
+ st_n_filetypes = n_filetypes;
+ i_offset = 0;
+ j = st_index;
+ off = offset;
+ fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
+ while (i_offset < bufsize) {
+ i_offset += fwr_size;
+ end_offset = off + fwr_size - 1;
+
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ while (flat_file->blocklens[j]==0) {
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ }
+
+ off = disp + flat_file->indices[j] +
+ n_filetypes*(ADIO_Offset)filetype_extent;
+ fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
+ }
+
+/* if atomicity is true, lock the region to be accessed */
+ if (fd->atomicity)
+ ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
+
+ writebuf_off = 0;
+ writebuf_len = 0;
+ writebuf = (char *) ADIOI_Malloc(stripe_size);
+ memset(writebuf, -1, stripe_size);
+
+ if (buftype_is_contig && !filetype_is_contig) {
+
+/* contiguous in memory, noncontiguous in file. should be the most
+ common case. */
+
+ i_offset = 0;
+ j = st_index;
+ off = offset;
+ n_filetypes = st_n_filetypes;
+ fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
+ while (i_offset < bufsize) {
+ if (fwr_size) {
+ /* TYPE_UB and TYPE_LB can result in
+ fwr_size = 0. save system call in such cases */
+ /* lseek(fd->fd_sys, off, SEEK_SET);
+ err = write(fd->fd_sys, ((char *) buf) + i_offset, fwr_size);*/
+
+ req_off = off;
+ req_len = fwr_size;
+ userbuf_off = i_offset;
+ ADIOI_BUFFERED_WRITE
+ }
+ i_offset += fwr_size;
+
+ if (off + fwr_size < disp + flat_file->indices[j] +
+ flat_file->blocklens[j] +
+ n_filetypes*(ADIO_Offset)filetype_extent)
+ off += fwr_size;
+ /* did not reach end of contiguous block in filetype.
+ no more I/O needed. off is incremented by fwr_size. */
+ else {
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ while (flat_file->blocklens[j]==0) {
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ }
+ off = disp + flat_file->indices[j] +
+ n_filetypes*(ADIO_Offset)filetype_extent;
+ fwr_size = ADIOI_MIN(flat_file->blocklens[j],
+ bufsize-i_offset);
+ }
+ }
+ }
+ else {
+/* noncontiguous in memory as well as in file */
+
+ ADIOI_Flatten_datatype(datatype);
+ flat_buf = ADIOI_Flatlist;
+ while (flat_buf->type != datatype) flat_buf = flat_buf->next;
+
+ k = num = buf_count = 0;
+ i_offset = flat_buf->indices[0];
+ j = st_index;
+ off = offset;
+ n_filetypes = st_n_filetypes;
+ fwr_size = st_fwr_size;
+ bwr_size = flat_buf->blocklens[0];
+
+ while (num < bufsize) {
+ size = ADIOI_MIN(fwr_size, bwr_size);
+ if (size) {
+ /* lseek(fd->fd_sys, off, SEEK_SET);
+ err = write(fd->fd_sys, ((char *) buf) + i_offset, size); */
+
+ req_off = off;
+ req_len = size;
+ userbuf_off = i_offset;
+ ADIOI_BUFFERED_WRITE
+ }
+
+ new_fwr_size = fwr_size;
+ new_bwr_size = bwr_size;
+
+ if (size == fwr_size) {
+/* reached end of contiguous block in file */
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ while (flat_file->blocklens[j]==0) {
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ }
+
+ off = disp + flat_file->indices[j] +
+ n_filetypes*(ADIO_Offset)filetype_extent;
+
+ new_fwr_size = flat_file->blocklens[j];
+ if (size != bwr_size) {
+ i_offset += size;
+ new_bwr_size -= size;
+ }
+ }
+
+ if (size == bwr_size) {
+/* reached end of contiguous block in memory */
+
+ k = (k + 1)%flat_buf->count;
+ buf_count++;
+ i_offset = (ADIO_Offset)buftype_extent *
+ (ADIO_Offset)(buf_count/flat_buf->count) +
+ flat_buf->indices[k];
+ new_bwr_size = flat_buf->blocklens[k];
+ if (size != fwr_size) {
+ off += size;
+ new_fwr_size -= size;
+ }
+ }
+ num += size;
+ fwr_size = new_fwr_size;
+ bwr_size = new_bwr_size;
+ }
+ }
+
+ /* write the buffer out finally */
+ if (writebuf_len) {
+ ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE,
+ ADIO_EXPLICIT_OFFSET,
+ writebuf_off, &status1, error_code);
+ if (!(fd->atomicity))
+ ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len);
+ if (*error_code != MPI_SUCCESS) return;
+ }
+ if (fd->atomicity)
+ ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
+
+ ADIOI_Free(writebuf);
+
+ if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
+ }
+
+ fd->fp_sys_posn = -1; /* set it to null. */
+
+#ifdef HAVE_STATUS_SET_BYTES
+ MPIR_Status_set_bytes(status, datatype, bufsize);
+/* This is a temporary way of filling in status. The right way is to
+ keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
+#endif
+
+ if (!buftype_is_contig)
+ ADIOI_Delete_flattened(datatype);
+}
diff --git a/ompi/mca/io/romio/romio/adio/ad_nfs/Makefile.am b/ompi/mca/io/romio/romio/adio/ad_nfs/Makefile.am
index 21116d71dc..0caa867c3f 100644
--- a/ompi/mca/io/romio/romio/adio/ad_nfs/Makefile.am
+++ b/ompi/mca/io/romio/romio/adio/ad_nfs/Makefile.am
@@ -24,6 +24,7 @@ libadio_nfs_la_SOURCES = \
ad_nfs.h \
ad_nfs_done.c \
ad_nfs_fcntl.c \
+ ad_nfs_features.c \
ad_nfs_getsh.c \
ad_nfs_hints.c \
ad_nfs_iread.c \
diff --git a/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs.c b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs.c
index cb505980db..c3f8b4ce4f 100644
--- a/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs.c
+++ b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs.c
@@ -12,6 +12,7 @@
struct ADIOI_Fns_struct ADIO_NFS_operations = {
ADIOI_NFS_Open, /* Open */
+ ADIOI_FAILSAFE_OpenColl, /* OpenColl */
ADIOI_NFS_ReadContig, /* ReadContig */
ADIOI_NFS_WriteContig, /* WriteContig */
ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
@@ -35,4 +36,5 @@ struct ADIOI_Fns_struct ADIO_NFS_operations = {
ADIOI_GEN_Flush, /* Flush */
ADIOI_NFS_Resize, /* Resize */
ADIOI_GEN_Delete, /* Delete */
+ ADIOI_NFS_Feature, /* Features */
};
diff --git a/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs.h b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs.h
index e6fdda2903..5a1daa5f64 100644
--- a/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs.h
+++ b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs.h
@@ -78,5 +78,6 @@ void ADIOI_NFS_Get_shared_fp(ADIO_File fd, int size, ADIO_Offset *shared_fp,
int *error_code);
void ADIOI_NFS_Set_shared_fp(ADIO_File fd, ADIO_Offset offset, int *error_code);
void ADIOI_NFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code);
+int ADIOI_NFS_Feature(ADIO_File fd, int feature_flag);
#endif
diff --git a/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_features.c b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_features.c
new file mode 100644
index 0000000000..01768f71f3
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_features.c
@@ -0,0 +1,16 @@
+#include "adio.h"
+#include "ad_nfs.h"
+
+int ADIOI_NFS_Feature(ADIO_File fd, int flag)
+{
+ switch(flag) {
+ case ADIO_SHARED_FP:
+ case ADIO_LOCKS:
+ case ADIO_SEQUENTIAL:
+ case ADIO_DATA_SIEVING_WRITES:
+ return 1;
+ case ADIO_SCALABLE_OPEN:
+ default:
+ return 0;
+ }
+}
diff --git a/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_iwrite.c b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_iwrite.c
index 4fd4677eff..0cfd562170 100644
--- a/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_iwrite.c
+++ b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_iwrite.c
@@ -59,6 +59,7 @@ int ADIOI_NFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
struct aiocb *aiocbp;
ADIOI_AIO_Request *aio_req;
+ MPI_Status status;
fd_sys = fd->fd_sys;
@@ -108,7 +109,7 @@ int ADIOI_NFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
/* exceeded the max. no. of outstanding requests.
complete all previous async. requests and try again. */
ADIO_WriteContig(fd, buf, len, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
- offset, NULL, &error_code);
+ offset, &status, &error_code);
MPIO_Completed_request_create(&fd, len, &error_code, request);
return 0;
} else {
diff --git a/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_read.c b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_read.c
index cf8f01895d..b28a57bb55 100644
--- a/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_read.c
+++ b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_read.c
@@ -177,7 +177,7 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count,
ADIO_Offset userbuf_off;
ADIO_Offset off, req_off, disp, end_offset=0, readbuf_off, start_off;
char *readbuf, *tmp_buf, *value;
- int flag, st_frd_size, st_n_filetypes, readbuf_len;
+ int st_frd_size, st_n_filetypes, readbuf_len;
int new_brd_size, new_frd_size, err_flag=0, info_flag, max_bufsize;
static char myname[] = "ADIOI_NFS_READSTRIDED";
@@ -201,7 +201,7 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count,
/* get max_bufsize from the info object. */
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
- MPI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value,
+ ADIOI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value,
&info_flag);
max_bufsize = atoi(value);
ADIOI_Free(value);
@@ -278,25 +278,32 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count,
disp = fd->disp;
if (file_ptr_type == ADIO_INDIVIDUAL) {
- offset = fd->fp_ind; /* in bytes */
- n_filetypes = -1;
- flag = 0;
- while (!flag) {
- n_filetypes++;
- for (i=0; icount; i++) {
- if (disp + flat_file->indices[i] +
- (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i]
- >= offset) {
- st_index = i;
- frd_size = (int) (disp + flat_file->indices[i] +
- (ADIO_Offset) n_filetypes*filetype_extent
- + flat_file->blocklens[i] - offset);
- flag = 1;
- break;
- }
- }
- }
- }
+ /* Wei-keng reworked type processing to be a bit more efficient */
+ offset = fd->fp_ind - disp;
+ n_filetypes = (offset - flat_file->indices[0]) / filetype_extent;
+ offset -= (ADIO_Offset)n_filetypes * filetype_extent;
+ /* now offset is local to this extent */
+
+ /* find the block where offset is located, skip blocklens[i]==0 */
+ for (i=0; icount; i++) {
+ ADIO_Offset dist;
+ if (flat_file->blocklens[i] == 0) continue;
+ dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
+ /* frd_size is from offset to the end of block i */
+ if (dist == 0) {
+ i++;
+ offset = flat_file->indices[i];
+ frd_size = flat_file->blocklens[i];
+ break;
+ }
+ if (dist > 0 ) {
+ frd_size = dist;
+ break;
+ }
+ }
+ st_index = i; /* starting index in flat_file->indices[] */
+ offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
+ }
else {
n_etypes_in_filetype = filetype_size/etype_size;
n_filetypes = (int) (offset / n_etypes_in_filetype);
@@ -316,11 +323,42 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count,
}
/* abs. offset in bytes in the file */
- offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
+ offset = disp + (ADIO_Offset) n_filetypes*filetype_extent +
+ abs_off_in_filetype;
}
start_off = offset;
+ /* Wei-keng Liao: read request is within a single flat_file contig
+ * block e.g. with subarray types that actually describe the whole
+ * array */
+ if (buftype_is_contig && bufsize <= frd_size) {
+ ADIO_ReadContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
+ offset, status, error_code);
+
+ if (file_ptr_type == ADIO_INDIVIDUAL) {
+ /* update MPI-IO file pointer to point to the first byte that
+ * can be accessed in the fileview. */
+ fd->fp_ind = offset + bufsize;
+ if (bufsize == frd_size) {
+ do {
+ st_index++;
+ if (st_index == flat_file->count) {
+ st_index = 0;
+ n_filetypes++;
+ }
+ } while (flat_file->blocklens[st_index] == 0);
+ fd->fp_ind = disp + flat_file->indices[st_index]
+ + n_filetypes*filetype_extent;
+ }
+ }
+ fd->fp_sys_posn = -1; /* set it to null. */
+#ifdef HAVE_STATUS_SET_BYTES
+ MPIR_Status_set_bytes(status, datatype, bufsize);
+#endif
+ return;
+ }
+
/* Calculate end_offset, the last byte-offset that will be accessed.
e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/
@@ -333,11 +371,11 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count,
while (i < bufsize) {
i += frd_size;
end_offset = off + frd_size - 1;
-
- if (j < (flat_file->count - 1)) j++;
- else {
- j = 0;
- n_filetypes++;
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ while (flat_file->blocklens[j]==0) {
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
}
off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent;
@@ -402,11 +440,12 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count,
/* did not reach end of contiguous block in filetype.
no more I/O needed. off is incremented by frd_size. */
else {
- if (j < (flat_file->count - 1)) j++;
- else {
- j = 0;
- n_filetypes++;
- }
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ while (flat_file->blocklens[j]==0) {
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ }
off = disp + flat_file->indices[j] +
(ADIO_Offset) n_filetypes*filetype_extent;
frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
@@ -445,12 +484,12 @@ void ADIOI_NFS_ReadStrided(ADIO_File fd, void *buf, int count,
if (size == frd_size) {
/* reached end of contiguous block in file */
- if (j < (flat_file->count - 1)) j++;
- else {
- j = 0;
- n_filetypes++;
- }
-
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ while (flat_file->blocklens[j]==0) {
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ }
off = disp + flat_file->indices[j] +
(ADIO_Offset) n_filetypes*filetype_extent;
diff --git a/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_wait.c b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_wait.c
index ad8da83d78..0148f63973 100644
--- a/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_wait.c
+++ b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_wait.c
@@ -10,120 +10,6 @@ void ADIOI_NFS_ReadComplete(ADIO_Request *request, ADIO_Status *status,
int *error_code)
{
return;
-#if 0
-#ifdef ROMIO_HAVE_WORKING_AIO
- int err;
- static char myname[] = "ADIOI_NFS_READCOMPLETE";
-#ifdef ROMIO_HAVE_STRUCT_AIOCB_WITH_AIO_HANDLE
- struct aiocb *tmp1;
-#endif
-#endif
-
- if (*request == ADIO_REQUEST_NULL) {
- *error_code = MPI_SUCCESS;
- return;
- }
-
-#ifdef ROMIO_HAVE_AIO_SUSPEND_TWO_ARGS
-/* old IBM */
- if ((*request)->queued) {
- do {
-#if !defined(_AIO_AIX_SOURCE) && !defined(_NO_PROTO)
- err = aio_suspend((*request)->handle,1,NULL);
-#else
- err = aio_suspend(1, (struct aiocb **) &((*request)->handle));
-#endif
- } while ((err == -1) && (errno == EINTR));
-
- tmp1 = (struct aiocb *) (*request)->handle;
- if (err != -1) {
- err = aio_return(tmp1->aio_handle);
- (*request)->nbytes = err;
- errno = aio_error(tmp1->aio_handle);
- }
- else (*request)->nbytes = -1;
-
-/* on DEC, it is required to call aio_return to dequeue the request.
- IBM man pages don't indicate what function to use for dequeue.
- I'm assuming it is aio_return! */
-
- if (err == -1) {
- *error_code = MPIO_Err_create_code(MPI_SUCCESS,
- MPIR_ERR_RECOVERABLE, myname,
- __LINE__, MPI_ERR_IO, "**io",
- "**io %s", strerror(errno));
- }
- else *error_code = MPI_SUCCESS;
- }
- else *error_code = MPI_SUCCESS; /* if ( (*request)->queued ) */
-
-#ifdef HAVE_STATUS_SET_BYTES
- if ((*request)->nbytes != -1)
- MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
-#endif
-
-#elif defined(ROMIO_HAVE_WORKING_AIO)
-/* all other aio types */
- if ((*request)->queued) {
- do {
- err = aio_suspend((const struct aiocb **) &((*request)->handle), 1, 0);
- } while ((err == -1) && (errno == EINTR));
-
- if (err != -1) {
- err = aio_return((struct aiocb *) (*request)->handle);
- (*request)->nbytes = err;
- errno = aio_error((struct aiocb *) (*request)->handle);
- }
- else (*request)->nbytes = -1;
-
- if (err == -1) {
- *error_code = MPIO_Err_create_code(MPI_SUCCESS,
- MPIR_ERR_RECOVERABLE, myname,
- __LINE__, MPI_ERR_IO, "**io",
- "**io %s", strerror(errno));
- }
- else *error_code = MPI_SUCCESS;
- }
- else *error_code = MPI_SUCCESS; /* if ((*request)->queued) ... */
-#ifdef HAVE_STATUS_SET_BYTES
- if ((*request)->nbytes != -1)
- MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
-#endif
-#endif
-
-#ifdef ROMIO_HAVE_WORKING_AIO
- if ((*request)->queued != -1) {
-
- /* queued = -1 is an internal hack used when the request must
- be completed, but the request object should not be
- freed. This is used in ADIOI_Complete_async, because the user
- will call MPI_Wait later, which would require status to
- be filled. Ugly but works. queued = -1 should be used only
- in ADIOI_Complete_async.
- This should not affect the user in any way. */
-
- /* if request is still queued in the system, it is also there
- on ADIOI_Async_list. Delete it from there. */
- if ((*request)->queued) ADIOI_Del_req_from_list(request);
-
- (*request)->fd->async_count--;
- if ((*request)->handle) ADIOI_Free((*request)->handle);
- ADIOI_Free_request((ADIOI_Req_node *) (*request));
- *request = ADIO_REQUEST_NULL;
- }
-
-#else
-/* no aio */
-
-#ifdef HAVE_STATUS_SET_BYTES
- MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
-#endif
- (*request)->fd->async_count--;
- ADIOI_Free_request((ADIOI_Req_node *) (*request));
- *request = ADIO_REQUEST_NULL;
- *error_code = MPI_SUCCESS;
-#endif
-#endif
}
diff --git a/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_write.c b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_write.c
index f856685e59..7c82e314e1 100644
--- a/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_write.c
+++ b/ompi/mca/io/romio/romio/adio/ad_nfs/ad_nfs_write.c
@@ -281,7 +281,7 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count,
ADIO_Offset userbuf_off;
ADIO_Offset off, req_off, disp, end_offset=0, writebuf_off, start_off;
char *writebuf, *value;
- int flag, st_fwr_size, st_n_filetypes, writebuf_len, write_sz;
+ int st_fwr_size, st_n_filetypes, writebuf_len, write_sz;
int new_bwr_size, new_fwr_size, err_flag=0, info_flag, max_bufsize;
static char myname[] = "ADIOI_NFS_WRITESTRIDED";
@@ -304,7 +304,7 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count,
/* get max_bufsize from the info object. */
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
- MPI_Info_get(fd->info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value,
+ ADIOI_Info_get(fd->info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value,
&info_flag);
max_bufsize = atoi(value);
ADIOI_Free(value);
@@ -381,25 +381,32 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count,
disp = fd->disp;
if (file_ptr_type == ADIO_INDIVIDUAL) {
- offset = fd->fp_ind; /* in bytes */
- n_filetypes = -1;
- flag = 0;
- while (!flag) {
- n_filetypes++;
- for (i=0; icount; i++) {
- if (disp + flat_file->indices[i] +
- (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i]
- >= offset) {
- st_index = i;
- fwr_size = (int) (disp + flat_file->indices[i] +
- (ADIO_Offset) n_filetypes*filetype_extent
- + flat_file->blocklens[i] - offset);
- flag = 1;
- break;
- }
- }
- }
- }
+ /* Wei-keng reworked type processing to be a bit more efficient */
+ offset = fd->fp_ind - disp;
+ n_filetypes = (offset - flat_file->indices[0]) / filetype_extent;
+ offset -= (ADIO_Offset)n_filetypes * filetype_extent;
+ /* now offset is local to this extent */
+
+ /* find the block where offset is located, skip blocklens[i]==0 */
+ for (i=0; icount; i++) {
+ ADIO_Offset dist;
+ if (flat_file->blocklens[i] == 0) continue;
+ dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
+ /* fwr_size is from offset to the end of block i */
+ if (dist == 0) {
+ i++;
+ offset = flat_file->indices[i];
+ fwr_size = flat_file->blocklens[i];
+ break;
+ }
+ if (dist > 0) {
+ fwr_size = dist;
+ break;
+ }
+ }
+ st_index = i; /* starting index in flat_file->indices[] */
+ offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
+ }
else {
n_etypes_in_filetype = filetype_size/etype_size;
n_filetypes = (int) (offset / n_etypes_in_filetype);
@@ -419,10 +426,40 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count,
}
/* abs. offset in bytes in the file */
- offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
+ offset = disp + (ADIO_Offset) n_filetypes*filetype_extent +
+ abs_off_in_filetype;
}
start_off = offset;
+ /* Wei-keng Liao:write request is within single flat_file contig block*/
+ /* this could happen, for example, with subarray types that are
+ * actually fairly contiguous */
+ if (buftype_is_contig && bufsize <= fwr_size) {
+ ADIO_WriteContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
+ offset, status, error_code);
+
+ if (file_ptr_type == ADIO_INDIVIDUAL) {
+ /* update MPI-IO file pointer to point to the first byte
+ * that can be accessed in the fileview. */
+ fd->fp_ind = offset + bufsize;
+ if (bufsize == fwr_size) {
+ do {
+ st_index++;
+ if (st_index == flat_file->count) {
+ st_index = 0;
+ n_filetypes++;
+ }
+ } while (flat_file->blocklens[st_index] == 0);
+ fd->fp_ind = disp + flat_file->indices[st_index]
+ + (ADIO_Offset)n_filetypes*filetype_extent;
+ }
+ }
+ fd->fp_sys_posn = -1; /* set it to null. */
+#ifdef HAVE_STATUS_SET_BYTES
+ MPIR_Status_set_bytes(status, datatype, bufsize);
+#endif
+ return;
+ }
/* Calculate end_offset, the last byte-offset that will be accessed.
e.g., if start_offset=0 and 100 bytes to be write, end_offset=99*/
@@ -436,14 +473,15 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count,
while (i < bufsize) {
i += fwr_size;
end_offset = off + fwr_size - 1;
-
- if (j < (flat_file->count - 1)) j++;
- else {
- j = 0;
- n_filetypes++;
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ while (flat_file->blocklens[j]==0) {
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
}
- off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent;
+ off = disp + flat_file->indices[j] +
+ (ADIO_Offset) n_filetypes*filetype_extent;
fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
}
@@ -509,13 +547,14 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count,
/* did not reach end of contiguous block in filetype.
no more I/O needed. off is incremented by fwr_size. */
else {
- if (j < (flat_file->count - 1)) j++;
- else {
- j = 0;
- n_filetypes++;
- }
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ while (flat_file->blocklens[j]==0) {
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ }
off = disp + flat_file->indices[j] +
- (ADIO_Offset) n_filetypes*filetype_extent;
+ (ADIO_Offset) n_filetypes*filetype_extent;
fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
}
}
@@ -552,10 +591,11 @@ void ADIOI_NFS_WriteStrided(ADIO_File fd, void *buf, int count,
if (size == fwr_size) {
/* reached end of contiguous block in file */
- if (j < (flat_file->count - 1)) j++;
- else {
- j = 0;
- n_filetypes++;
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ while (flat_file->blocklens[j]==0) {
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
}
off = disp + flat_file->indices[j] +
diff --git a/ompi/mca/io/romio/romio/adio/ad_ntfs/ad_ntfs.c b/ompi/mca/io/romio/romio/adio/ad_ntfs/ad_ntfs.c
index 23c482875d..c49f2e86eb 100644
--- a/ompi/mca/io/romio/romio/adio/ad_ntfs/ad_ntfs.c
+++ b/ompi/mca/io/romio/romio/adio/ad_ntfs/ad_ntfs.c
@@ -12,6 +12,7 @@
struct ADIOI_Fns_struct ADIO_NTFS_operations = {
ADIOI_NTFS_Open, /* Open */
+ ADIOI_GEN_OpenColl, /* OpenColl */
ADIOI_NTFS_ReadContig, /* ReadContig */
ADIOI_NTFS_WriteContig, /* WriteContig */
ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
@@ -33,4 +34,5 @@ struct ADIOI_Fns_struct ADIO_NTFS_operations = {
ADIOI_NTFS_Flush, /* Flush */
ADIOI_NTFS_Resize, /* Resize */
ADIOI_GEN_Delete, /* Delete */
+ ADIOI_GEN_Feature /* Features */
};
diff --git a/ompi/mca/io/romio/romio/adio/ad_ntfs/ad_ntfs_iwrite.c b/ompi/mca/io/romio/romio/adio/ad_ntfs/ad_ntfs_iwrite.c
index 33ce0f1acd..b43e1a9a75 100644
--- a/ompi/mca/io/romio/romio/adio/ad_ntfs/ad_ntfs_iwrite.c
+++ b/ompi/mca/io/romio/romio/adio/ad_ntfs/ad_ntfs_iwrite.c
@@ -60,7 +60,6 @@ int ADIOI_NTFS_aio_poll_fn(void *extra_state, MPI_Status *status)
/* TODO: unsure how to handle this */
}
}else{
- MPIR_Nest_incr();
mpi_errno = MPI_Grequest_complete(aio_req->req);
if (mpi_errno != MPI_SUCCESS) {
mpi_errno = MPIO_Err_create_code(MPI_SUCCESS,
@@ -69,7 +68,6 @@ int ADIOI_NTFS_aio_poll_fn(void *extra_state, MPI_Status *status)
MPI_ERR_IO, "**mpi_grequest_complete",
0);
}
- MPIR_Nest_decr();
}
return mpi_errno;
}
@@ -111,16 +109,14 @@ int ADIOI_NTFS_aio_wait_fn(int count, void **array_of_states,
aio_reqlist[retObject]->lpOvl, &(aio_reqlist[retObject]->nbytes),
FALSE)){
/* XXX: mark completed requests as 'done'*/
- MPIR_Nest_incr();
- mpi_errno = MPI_Grequest_complete(aio_reqlist[retObject]->req);
+ mpi_errno = MPI_Grequest_complete(aio_reqlist[retObject]->req);
if (mpi_errno != MPI_SUCCESS) {
mpi_errno = MPIO_Err_create_code(MPI_SUCCESS,
MPIR_ERR_RECOVERABLE,
"ADIOI_NTFS_aio_wait_fn", __LINE__,
MPI_ERR_IO, "**mpi_grequest_complete",
0);
- }
- MPIR_Nest_decr();
+ }
}else{
if(GetLastError() == ERROR_IO_INCOMPLETE){
/* IO in progress */
@@ -146,7 +142,6 @@ int ADIOI_NTFS_aio_query_fn(void *extra_state, MPI_Status *status)
MPI_Status_set_elements(status, MPI_BYTE, aio_req->nbytes);
- /* do i need to nest_incr/nest_decr here? */
/* can never cancel so always true */
MPI_Status_set_cancelled(status, 0);
diff --git a/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs.c b/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs.c
index 7af1045014..f2e96bbba8 100644
--- a/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs.c
+++ b/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs.c
@@ -13,6 +13,7 @@
struct ADIOI_Fns_struct ADIO_PANFS_operations = {
ADIOI_PANFS_Open, /* Open */
+ ADIOI_GEN_OpenColl,
ADIOI_PANFS_ReadContig, /* ReadContig */
ADIOI_PANFS_WriteContig, /* WriteContig */
ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
@@ -39,4 +40,5 @@ struct ADIOI_Fns_struct ADIO_PANFS_operations = {
ADIOI_GEN_Flush, /* Flush */
ADIOI_PANFS_Resize, /* Resize */
ADIOI_GEN_Delete, /* Delete */
+ ADIOI_GEN_Feature,
};
diff --git a/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs_hints.c b/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs_hints.c
index 9b02a414d7..4755a0aa74 100644
--- a/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs_hints.c
+++ b/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs_hints.c
@@ -36,7 +36,7 @@ void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
if (users_info != MPI_INFO_NULL) {
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
- MPI_Info_get(users_info, "panfs_concurrent_write", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "panfs_concurrent_write", MPI_MAX_INFO_VAL,
value, &flag);
if (flag) {
concurrent_write = strtoul(value,NULL,10);
@@ -46,10 +46,10 @@ void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_concurrent_write\" must be the same on all processes\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
- MPI_Info_set(fd->info, "panfs_concurrent_write", value);
+ ADIOI_Info_set(fd->info, "panfs_concurrent_write", value);
}
- MPI_Info_get(users_info, "panfs_layout_type", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "panfs_layout_type", MPI_MAX_INFO_VAL,
value, &flag);
if (flag) {
layout_type = strtoul(value,NULL,10);
@@ -59,10 +59,10 @@ void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_type\" must be the same on all processes\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
- MPI_Info_set(fd->info, "panfs_layout_type", value);
+ ADIOI_Info_set(fd->info, "panfs_layout_type", value);
}
- MPI_Info_get(users_info, "panfs_layout_stripe_unit", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "panfs_layout_stripe_unit", MPI_MAX_INFO_VAL,
value, &flag);
if (flag) {
layout_stripe_unit = strtoul(value,NULL,10);
@@ -72,10 +72,10 @@ void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_stripe_unit\" must be the same on all processes\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
- MPI_Info_set(fd->info, "panfs_layout_stripe_unit", value);
+ ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", value);
}
- MPI_Info_get(users_info, "panfs_layout_parity_stripe_width", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "panfs_layout_parity_stripe_width", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE)) {
layout_parity_stripe_width = strtoul(value,NULL,10);
@@ -85,10 +85,10 @@ void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_parity_stripe_width\" must be the same on all processes\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
- MPI_Info_set(fd->info, "panfs_layout_parity_stripe_width", value);
+ ADIOI_Info_set(fd->info, "panfs_layout_parity_stripe_width", value);
}
- MPI_Info_get(users_info, "panfs_layout_parity_stripe_depth", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "panfs_layout_parity_stripe_depth", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE)) {
layout_parity_stripe_depth = strtoul(value,NULL,10);
@@ -98,10 +98,10 @@ void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_parity_stripe_depth\" must be the same on all processes\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
- MPI_Info_set(fd->info, "panfs_layout_parity_stripe_depth", value);
+ ADIOI_Info_set(fd->info, "panfs_layout_parity_stripe_depth", value);
}
- MPI_Info_get(users_info, "panfs_layout_total_num_comps", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "panfs_layout_total_num_comps", MPI_MAX_INFO_VAL,
value, &flag);
if (flag) {
layout_total_num_comps = strtoul(value,NULL,10);
@@ -111,10 +111,10 @@ void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_total_num_comps\" must be the same on all processes\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
- MPI_Info_set(fd->info, "panfs_layout_total_num_comps", value);
+ ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", value);
}
- MPI_Info_get(users_info, "panfs_layout_visit_policy", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "panfs_layout_visit_policy", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && (layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE || layout_type == PAN_FS_CLIENT_LAYOUT_TYPE__RAID10)) {
layout_visit_policy = strtoul(value,NULL,10);
@@ -124,7 +124,7 @@ void ADIOI_PANFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
FPRINTF(stderr, "ADIOI_PANFS_SetInfo: the value for key \"panfs_layout_visit_policy\" must be the same on all processes\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
- MPI_Info_set(fd->info, "panfs_layout_visit_policy", value);
+ ADIOI_Info_set(fd->info, "panfs_layout_visit_policy", value);
}
ADIOI_Free(value);
diff --git a/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs_open.c b/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs_open.c
index b5f2a124b9..d7cdf19454 100644
--- a/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs_open.c
+++ b/ompi/mca/io/romio/romio/adio/ad_panfs/ad_panfs_open.c
@@ -39,32 +39,32 @@ void ADIOI_PANFS_Open(ADIO_File fd, int *error_code)
*error_code = MPI_SUCCESS;
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
- MPI_Info_get(fd->info, "panfs_layout_type", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(fd->info, "panfs_layout_type", MPI_MAX_INFO_VAL,
value, &flag);
if (flag) {
layout_type = strtoul(value,NULL,10);
}
- MPI_Info_get(fd->info, "panfs_layout_stripe_unit", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(fd->info, "panfs_layout_stripe_unit", MPI_MAX_INFO_VAL,
value, &flag);
if (flag) {
layout_stripe_unit = strtoul(value,NULL,10);
}
- MPI_Info_get(fd->info, "panfs_layout_total_num_comps", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(fd->info, "panfs_layout_total_num_comps", MPI_MAX_INFO_VAL,
value, &flag);
if (flag) {
layout_total_num_comps = strtoul(value,NULL,10);
}
- MPI_Info_get(fd->info, "panfs_layout_parity_stripe_width", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(fd->info, "panfs_layout_parity_stripe_width", MPI_MAX_INFO_VAL,
value, &flag);
if (flag) {
layout_parity_stripe_width = strtoul(value,NULL,10);
}
- MPI_Info_get(fd->info, "panfs_layout_parity_stripe_depth", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(fd->info, "panfs_layout_parity_stripe_depth", MPI_MAX_INFO_VAL,
value, &flag);
if (flag) {
layout_parity_stripe_depth = strtoul(value,NULL,10);
}
- MPI_Info_get(fd->info, "panfs_layout_visit_policy", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(fd->info, "panfs_layout_visit_policy", MPI_MAX_INFO_VAL,
value, &flag);
if (flag) {
layout_visit_policy = strtoul(value,NULL,10);
@@ -266,7 +266,7 @@ void ADIOI_PANFS_Open(ADIO_File fd, int *error_code)
amode = amode | O_EXCL;
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
- MPI_Info_get(fd->info, "panfs_concurrent_write", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(fd->info, "panfs_concurrent_write", MPI_MAX_INFO_VAL,
value, &flag);
if (flag) {
unsigned long int concurrent_write = strtoul(value,NULL,10);
@@ -291,41 +291,41 @@ void ADIOI_PANFS_Open(ADIO_File fd, int *error_code)
if (rc < 0)
{
/* Error - set layout type to unknown */
- MPI_Info_set(fd->info, "panfs_layout_type", "PAN_FS_CLIENT_LAYOUT_TYPE__INVALID");
+ ADIOI_Info_set(fd->info, "panfs_layout_type", "PAN_FS_CLIENT_LAYOUT_TYPE__INVALID");
}
else
{
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.agg_type);
- MPI_Info_set(fd->info, "panfs_layout_type", temp_buffer);
+ ADIOI_Info_set(fd->info, "panfs_layout_type", temp_buffer);
if (file_query_args.layout.layout_is_valid == 1)
{
switch (file_query_args.layout.agg_type)
{
case PAN_FS_CLIENT_LAYOUT_TYPE__RAID0:
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid0.stripe_unit);
- MPI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
+ ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid0.total_num_comps);
- MPI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
+ ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
break;
case PAN_FS_CLIENT_LAYOUT_TYPE__RAID1_5_PARITY_STRIPE:
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.stripe_unit);
- MPI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
+ ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.parity_stripe_width);
- MPI_Info_set(fd->info, "panfs_layout_parity_stripe_width", temp_buffer);
+ ADIOI_Info_set(fd->info, "panfs_layout_parity_stripe_width", temp_buffer);
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.parity_stripe_depth);
- MPI_Info_set(fd->info, "panfs_layout_parity_stripe_depth", temp_buffer);
+ ADIOI_Info_set(fd->info, "panfs_layout_parity_stripe_depth", temp_buffer);
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.total_num_comps);
- MPI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
+ ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid1_5_parity_stripe.layout_visit_policy);
- MPI_Info_set(fd->info, "panfs_layout_visit_policy", temp_buffer);
+ ADIOI_Info_set(fd->info, "panfs_layout_visit_policy", temp_buffer);
break;
case PAN_FS_CLIENT_LAYOUT_TYPE__RAID10:
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid10.stripe_unit);
- MPI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
+ ADIOI_Info_set(fd->info, "panfs_layout_stripe_unit", temp_buffer);
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid10.total_num_comps);
- MPI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
+ ADIOI_Info_set(fd->info, "panfs_layout_total_num_comps", temp_buffer);
ADIOI_Snprintf(temp_buffer,TEMP_BUFFER_SIZE,"%u",file_query_args.layout.u.raid10.layout_visit_policy);
- MPI_Info_set(fd->info, "panfs_layout_visit_policy", temp_buffer);
+ ADIOI_Info_set(fd->info, "panfs_layout_visit_policy", temp_buffer);
break;
}
}
diff --git a/ompi/mca/io/romio/romio/adio/ad_pfs/ad_pfs_hints.c b/ompi/mca/io/romio/romio/adio/ad_pfs/ad_pfs_hints.c
index d4a8575a66..5815ca0925 100644
--- a/ompi/mca/io/romio/romio/adio/ad_pfs/ad_pfs_hints.c
+++ b/ompi/mca/io/romio/romio/adio/ad_pfs/ad_pfs_hints.c
@@ -24,7 +24,7 @@ void ADIOI_PFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
if (users_info != MPI_INFO_NULL) {
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
- MPI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
value, &flag);
if (flag) {
str_factor=atoi(value);
@@ -40,7 +40,7 @@ void ADIOI_PFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
/* --END ERROR HANDLING-- */
}
- MPI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
value, &flag);
if (flag) {
str_unit=atoi(value);
@@ -56,7 +56,7 @@ void ADIOI_PFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
/* --END ERROR HANDLING-- */
}
- MPI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL,
value, &flag);
if (flag) {
start_iodev=atoi(value);
@@ -119,15 +119,15 @@ void ADIOI_PFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
If so, mark it as true in fd->info and turn it on in
ADIOI_PFS_Open after the file is opened */
- MPI_Info_get(users_info, "pfs_svr_buf", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "pfs_svr_buf", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && (!strcmp(value, "true")))
- MPI_Info_set(fd->info, "pfs_svr_buf", "true");
- else MPI_Info_set(fd->info, "pfs_svr_buf", "false");
+ ADIOI_Info_set(fd->info, "pfs_svr_buf", "true");
+ else ADIOI_Info_set(fd->info, "pfs_svr_buf", "false");
ADIOI_Free(value);
}
- else MPI_Info_set(fd->info, "pfs_svr_buf", "false");
+ else ADIOI_Info_set(fd->info, "pfs_svr_buf", "false");
/* set the values for collective I/O and data sieving parameters */
ADIOI_GEN_SetInfo(fd, users_info, error_code);
@@ -144,23 +144,23 @@ void ADIOI_PFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
if (users_info != MPI_INFO_NULL) {
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
- MPI_Info_get(users_info, "pfs_svr_buf", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "pfs_svr_buf", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && (!strcmp(value, "true") || !strcmp(value, "false"))) {
value_in_fd = (char *)
ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
- MPI_Info_get(fd->info, "pfs_svr_buf", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(fd->info, "pfs_svr_buf", MPI_MAX_INFO_VAL,
value_in_fd, &flag);
if (strcmp(value, value_in_fd)) {
if (!strcmp(value, "true")) {
err = fcntl(fd->fd_sys, F_PFS_SVR_BUF, TRUE);
if (!err)
- MPI_Info_set(fd->info, "pfs_svr_buf", "true");
+ ADIOI_Info_set(fd->info, "pfs_svr_buf", "true");
}
else {
err = fcntl(fd->fd_sys, F_PFS_SVR_BUF, FALSE);
if (!err)
- MPI_Info_set(fd->info, "pfs_svr_buf", "false");
+ ADIOI_Info_set(fd->info, "pfs_svr_buf", "false");
}
}
ADIOI_Free(value_in_fd);
diff --git a/ompi/mca/io/romio/romio/adio/ad_pfs/ad_pfs_open.c b/ompi/mca/io/romio/romio/adio/ad_pfs/ad_pfs_open.c
index c56c0f991f..c424af07fa 100644
--- a/ompi/mca/io/romio/romio/adio/ad_pfs/ad_pfs_open.c
+++ b/ompi/mca/io/romio/romio/adio/ad_pfs/ad_pfs_open.c
@@ -49,11 +49,11 @@ void ADIOI_PFS_Open(ADIO_File fd, int *error_code)
to ADIOI_PFS_SetInfo. Turn it on now, since we now have a
valid file descriptor. */
- MPI_Info_get(fd->info, "pfs_svr_buf", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(fd->info, "pfs_svr_buf", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && (!strcmp(value, "true"))) {
err = fcntl(fd->fd_sys, F_PFS_SVR_BUF, TRUE);
- if (err) MPI_Info_set(fd->info, "pfs_svr_buf", "false");
+ if (err) ADIOI_Info_set(fd->info, "pfs_svr_buf", "false");
}
/* get file striping information and set it in info */
@@ -61,13 +61,13 @@ void ADIOI_PFS_Open(ADIO_File fd, int *error_code)
if (!err) {
ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", attr.s_sunitsize);
- MPI_Info_set(fd->info, "striping_unit", value);
+ ADIOI_Info_set(fd->info, "striping_unit", value);
ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", attr.s_sfactor);
- MPI_Info_set(fd->info, "striping_factor", value);
+ ADIOI_Info_set(fd->info, "striping_factor", value);
ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", attr.s_start_sdir);
- MPI_Info_set(fd->info, "start_iodevice", value);
+ ADIOI_Info_set(fd->info, "start_iodevice", value);
}
ADIOI_Free(value);
diff --git a/ompi/mca/io/romio/romio/adio/ad_piofs/Makefile.am b/ompi/mca/io/romio/romio/adio/ad_piofs/Makefile.am
index 894ffde106..6fc828a43f 100644
--- a/ompi/mca/io/romio/romio/adio/ad_piofs/Makefile.am
+++ b/ompi/mca/io/romio/romio/adio/ad_piofs/Makefile.am
@@ -25,6 +25,7 @@ libadio_piofs_la_SOURCES = \
ad_piofs.c \
ad_piofs.h \
ad_piofs_fcntl.c \
+ ad_piofs_features.c \
ad_piofs_hints.c \
ad_piofs_open.c \
ad_piofs_read.c \
diff --git a/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs.c b/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs.c
index 9c2bd59ef1..b602c789bd 100644
--- a/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs.c
+++ b/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs.c
@@ -33,4 +33,5 @@ struct ADIOI_Fns_struct ADIO_PIOFS_operations = {
ADIOI_GEN_Flush, /* Flush */
ADIOI_GEN_Resize, /* Resize */
ADIOI_GEN_Delete, /* Delete */
+ ADIOI_PIOFS_Feature,
};
diff --git a/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs.h b/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs.h
index f39149de25..919cdb4cab 100644
--- a/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs.h
+++ b/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs.h
@@ -35,4 +35,6 @@ void ADIOI_PIOFS_WriteStrided(ADIO_File fd, void *buf, int count,
*error_code);
void ADIOI_PIOFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code);
+void ADIOI_PIOFS_Feature(ADIO_File fd, int flag);
+
#endif
diff --git a/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_features.c b/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_features.c
new file mode 100644
index 0000000000..11b17bf7be
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_features.c
@@ -0,0 +1,13 @@
+int ADIOI_PIOFS_Features(int flag)
+{
+ switch(flag) {
+ case ADIO_LOCKS:
+ case ADIO_SHARED_FP:
+ case ADIO_ATOMIC_MODE:
+ case ADIO_DATA_SIEVING_WRITES:
+ case ADIO_SCALABLE_OPEN:
+ default:
+ return 0;
+ break;
+ }
+}
diff --git a/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_hints.c b/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_hints.c
index 57d53edc8f..295310512d 100644
--- a/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_hints.c
+++ b/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_hints.c
@@ -25,7 +25,7 @@ void ADIOI_PIOFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
if (users_info != MPI_INFO_NULL) {
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
- MPI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
value, &flag);
if (flag) {
str_factor=atoi(value);
@@ -37,7 +37,7 @@ void ADIOI_PIOFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
}
}
- MPI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
value, &flag);
if (flag) {
str_unit=atoi(value);
@@ -49,7 +49,7 @@ void ADIOI_PIOFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
}
}
- MPI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL,
value, &flag);
if (flag) {
start_iodev=atoi(value);
diff --git a/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_open.c b/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_open.c
index 6572234632..20c3644c9b 100644
--- a/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_open.c
+++ b/ompi/mca/io/romio/romio/adio/ad_piofs/ad_piofs_open.c
@@ -49,13 +49,13 @@ void ADIOI_PIOFS_Open(ADIO_File fd, int *error_code)
if (!err) {
ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", piofs_fstat.st_bsu);
- MPI_Info_set(fd->info, "striping_unit", value);
+ ADIOI_Info_set(fd->info, "striping_unit", value);
ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", piofs_fstat.st_cells);
- MPI_Info_set(fd->info, "striping_factor", value);
+ ADIOI_Info_set(fd->info, "striping_factor", value);
ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", piofs_fstat.st_base_node);
- MPI_Info_set(fd->info, "start_iodevice", value);
+ ADIOI_Info_set(fd->info, "start_iodevice", value);
}
ADIOI_Free(value);
diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs.c b/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs.c
index 967eadd787..198cc9fb65 100644
--- a/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs.c
+++ b/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs.c
@@ -33,4 +33,5 @@ struct ADIOI_Fns_struct ADIO_PVFS_operations = {
ADIOI_PVFS_Flush, /* Flush */
ADIOI_PVFS_Resize, /* Resize */
ADIOI_PVFS_Delete, /* Delete */
+ ADIOI_PVFS_Feature, /* Features */
};
diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_hints.c b/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_hints.c
index 40c5434e34..710aea7708 100644
--- a/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_hints.c
+++ b/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_hints.c
@@ -17,8 +17,8 @@ void ADIOI_PVFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
/* This must be part of the open call. can set striping parameters
if necessary. */
MPI_Info_create(&(fd->info));
- MPI_Info_set(fd->info, "romio_pvfs_listio_read", "disable");
- MPI_Info_set(fd->info, "romio_pvfs_listio_write", "disable");
+ ADIOI_Info_set(fd->info, "romio_pvfs_listio_read", "disable");
+ ADIOI_Info_set(fd->info, "romio_pvfs_listio_write", "disable");
fd->hints->fs_hints.pvfs.listio_read = ADIOI_HINT_DISABLE;
fd->hints->fs_hints.pvfs.listio_write = ADIOI_HINT_DISABLE;
@@ -27,7 +27,7 @@ void ADIOI_PVFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
if (users_info != MPI_INFO_NULL) {
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
- MPI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
value, &flag);
if (flag) {
str_factor=atoi(value);
@@ -41,10 +41,10 @@ void ADIOI_PVFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
return;
/* --END ERROR HANDLING-- */
}
- else MPI_Info_set(fd->info, "striping_factor", value);
+ else ADIOI_Info_set(fd->info, "striping_factor", value);
}
- MPI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
value, &flag);
if (flag) {
str_unit=atoi(value);
@@ -58,10 +58,10 @@ void ADIOI_PVFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
return;
/* --END ERROR HANDLING-- */
}
- else MPI_Info_set(fd->info, "striping_unit", value);
+ else ADIOI_Info_set(fd->info, "striping_unit", value);
}
- MPI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "start_iodevice", MPI_MAX_INFO_VAL,
value, &flag);
if (flag) {
start_iodev=atoi(value);
@@ -75,25 +75,25 @@ void ADIOI_PVFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
return;
/* --END ERROR HANDLING-- */
}
- else MPI_Info_set(fd->info, "start_iodevice", value);
+ else ADIOI_Info_set(fd->info, "start_iodevice", value);
}
- MPI_Info_get(users_info, "romio_pvfs_listio_read",
+ ADIOI_Info_get(users_info, "romio_pvfs_listio_read",
MPI_MAX_INFO_VAL,
value, &flag);
if (flag) {
if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
{
- MPI_Info_set(fd->info, "romio_pvfs_listio_read", value);
+ ADIOI_Info_set(fd->info, "romio_pvfs_listio_read", value);
fd->hints->fs_hints.pvfs.listio_read = ADIOI_HINT_ENABLE;
} else if ( !strcmp(value, "disable") || !strcmp(value, "DISABLE"))
{
- MPI_Info_set(fd->info , "romio_pvfs_listio_read", value);
+ ADIOI_Info_set(fd->info , "romio_pvfs_listio_read", value);
fd->hints->fs_hints.pvfs.listio_read = ADIOI_HINT_DISABLE;
}
else if ( !strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
{
- MPI_Info_set(fd->info, "romio_pvfs_listio_read", value);
+ ADIOI_Info_set(fd->info, "romio_pvfs_listio_read", value);
fd->hints->fs_hints.pvfs.listio_read = ADIOI_HINT_AUTO;
}
tmp_val = fd->hints->fs_hints.pvfs.listio_read;
@@ -107,21 +107,21 @@ void ADIOI_PVFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
/* --END ERROR HANDLING-- */
}
}
- MPI_Info_get(users_info, "romio_pvfs_listio_write", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "romio_pvfs_listio_write", MPI_MAX_INFO_VAL,
value, &flag);
if (flag) {
if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
{
- MPI_Info_set(fd->info, "romio_pvfs_listio_write", value);
+ ADIOI_Info_set(fd->info, "romio_pvfs_listio_write", value);
fd->hints->fs_hints.pvfs.listio_write = ADIOI_HINT_ENABLE;
} else if ( !strcmp(value, "disable") || !strcmp(value, "DISABLE"))
{
- MPI_Info_set(fd->info, "romio_pvfs_listio_write", value);
+ ADIOI_Info_set(fd->info, "romio_pvfs_listio_write", value);
fd->hints->fs_hints.pvfs.listio_write = ADIOI_HINT_DISABLE;
}
else if ( !strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
{
- MPI_Info_set(fd->info, "romio_pvfs_listio_write", value);
+ ADIOI_Info_set(fd->info, "romio_pvfs_listio_write", value);
fd->hints->fs_hints.pvfs.listio_write = ADIOI_HINT_AUTO;
}
tmp_val = fd->hints->fs_hints.pvfs.listio_write;
diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_open.c b/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_open.c
index e65a7f3d74..535ed04b5b 100644
--- a/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_open.c
+++ b/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_open.c
@@ -37,15 +37,15 @@ void ADIOI_PVFS_Open(ADIO_File fd, int *error_code)
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
- MPI_Info_get(fd->info, "striping_factor", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(fd->info, "striping_factor", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && (atoi(value) > 0)) pstat.pcount = atoi(value);
- MPI_Info_get(fd->info, "striping_unit", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(fd->info, "striping_unit", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && (atoi(value) > 0)) pstat.ssize = atoi(value);
- MPI_Info_get(fd->info, "start_iodevice", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(fd->info, "start_iodevice", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && (atoi(value) >= 0)) pstat.base = atoi(value);
@@ -71,11 +71,11 @@ void ADIOI_PVFS_Open(ADIO_File fd, int *error_code)
if (fd->fd_sys != -1) {
pvfs_ioctl(fd->fd_sys, GETMETA, &pstat);
ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", pstat.pcount);
- MPI_Info_set(fd->info, "striping_factor", value);
+ ADIOI_Info_set(fd->info, "striping_factor", value);
ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", pstat.ssize);
- MPI_Info_set(fd->info, "striping_unit", value);
+ ADIOI_Info_set(fd->info, "striping_unit", value);
ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", pstat.base);
- MPI_Info_set(fd->info, "start_iodevice", value);
+ ADIOI_Info_set(fd->info, "start_iodevice", value);
}
ADIOI_Free(value);
diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_read.c b/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_read.c
index a0e3fb8456..f6035218c3 100644
--- a/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_read.c
+++ b/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_read.c
@@ -43,7 +43,8 @@ void ADIOI_PVFS_ReadContig(ADIO_File fd, void *buf, int count,
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
- fd->fp_sys_posn = offset + err;
+ if (err>0)
+ fd->fp_sys_posn = offset + err;
/* individual file pointer not updated */
}
else { /* read from curr. location of ind. file pointer */
@@ -63,7 +64,8 @@ void ADIOI_PVFS_ReadContig(ADIO_File fd, void *buf, int count,
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
- fd->fp_ind += err;
+ if (err > 0)
+ fd->fp_ind += err;
fd->fp_sys_posn = fd->fp_ind;
}
diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_write.c b/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_write.c
index df261593ef..50175f3e51 100644
--- a/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_write.c
+++ b/ompi/mca/io/romio/romio/adio/ad_pvfs/ad_pvfs_write.c
@@ -43,7 +43,8 @@ void ADIOI_PVFS_WriteContig(ADIO_File fd, void *buf, int count,
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
- fd->fp_sys_posn = offset + err;
+ if (err > 0)
+ fd->fp_sys_posn = offset + err;
/* individual file pointer not updated */
}
else { /* write from curr. location of ind. file pointer */
@@ -63,7 +64,8 @@ void ADIOI_PVFS_WriteContig(ADIO_File fd, void *buf, int count,
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
- fd->fp_ind += err;
+ if (err > 0)
+ fd->fp_ind += err;
fd->fp_sys_posn = fd->fp_ind;
}
diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/Makefile.am b/ompi/mca/io/romio/romio/adio/ad_pvfs2/Makefile.am
index ea21dda326..3670d07691 100644
--- a/ompi/mca/io/romio/romio/adio/ad_pvfs2/Makefile.am
+++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/Makefile.am
@@ -28,9 +28,15 @@ libadio_pvfs2_la_SOURCES = \
ad_pvfs2_common.c \
ad_pvfs2_delete.c \
ad_pvfs2_fcntl.c \
+ ad_pvfs2_features.c \
ad_pvfs2_flush.c \
ad_pvfs2_hints.c \
+ ad_pvfs2_io.h \
+ ad_pvfs2_io_dtype.c \
+ ad_pvfs2_io_list.c \
ad_pvfs2_open.c \
ad_pvfs2_read.c \
+ ad_pvfs2_read_list_classic.c
ad_pvfs2_resize.c \
- ad_pvfs2_write.c
+ ad_pvfs2_write.c \
+ ad_pvfs2_write_list_classic.c
diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2.c b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2.c
index 915be8e0b4..75ab87d854 100644
--- a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2.c
+++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2.c
@@ -12,6 +12,7 @@
struct ADIOI_Fns_struct ADIO_PVFS2_operations = {
ADIOI_PVFS2_Open, /* Open */
+ ADIOI_SCALEABLE_OpenColl, /* OpenColl */
ADIOI_PVFS2_ReadContig, /* ReadContig */
ADIOI_PVFS2_WriteContig, /* WriteContig */
ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
@@ -22,13 +23,8 @@ struct ADIOI_Fns_struct ADIO_PVFS2_operations = {
ADIOI_PVFS2_ReadStrided, /* ReadStrided */
ADIOI_PVFS2_WriteStrided, /* WriteStrided */
ADIOI_PVFS2_Close, /* Close */
-#ifdef ROMIO_HAVE_WORKING_AIO
ADIOI_PVFS2_IReadContig, /* IreadContig */
ADIOI_PVFS2_IWriteContig, /* IwriteContig */
-#else
- ADIOI_FAKE_IreadContig, /* IreadContig */
- ADIOI_FAKE_IwriteContig, /* IwriteContig */
-#endif
ADIOI_FAKE_IODone, /* ReadDone */
ADIOI_FAKE_IODone, /* WriteDone */
ADIOI_FAKE_IOComplete, /* ReadComplete */
@@ -38,6 +34,7 @@ struct ADIOI_Fns_struct ADIO_PVFS2_operations = {
ADIOI_PVFS2_Flush, /* Flush */
ADIOI_PVFS2_Resize, /* Resize */
ADIOI_PVFS2_Delete, /* Delete */
+ ADIOI_PVFS2_Feature,
};
/*
diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2.h b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2.h
index 026e400d0c..e3ff045233 100644
--- a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2.h
+++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2.h
@@ -17,7 +17,6 @@
#include "pvfs2-compat.h"
#endif
-
void ADIOI_PVFS2_Open(ADIO_File fd, int *error_code);
void ADIOI_PVFS2_Close(ADIO_File fd, int *error_code);
void ADIOI_PVFS2_ReadContig(ADIO_File fd, void *buf, int count,
@@ -42,6 +41,8 @@ void ADIOI_PVFS2_Flush(ADIO_File fd, int *error_code);
void ADIOI_PVFS2_Delete(char *filename, int *error_code);
void ADIOI_PVFS2_Resize(ADIO_File fd, ADIO_Offset size, int *error_code);
void ADIOI_PVFS2_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code);
+int ADIOI_PVFS2_Feature(ADIO_File fd, int flag);
+
void ADIOI_PVFS2_IReadContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, MPI_Request *request,
@@ -54,4 +55,12 @@ void ADIOI_PVFS2_AIO_contig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, MPI_Request *request,
int flag, int *error_code);
+void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status, int
+ *error_code);
+void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status, int
+ *error_code);
#endif
diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_aio.c b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_aio.c
index 880d1e28f6..89ca5c2f6b 100644
--- a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_aio.c
+++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_aio.c
@@ -17,7 +17,6 @@
#define READ 0
#define WRITE 1
-#ifdef ROMIO_HAVE_WORKING_AIO
static int ADIOI_PVFS2_greq_class = 0;
int ADIOI_PVFS2_aio_free_fn(void *extra_state);
int ADIOI_PVFS2_aio_poll_fn(void *extra_state, MPI_Status *status);
@@ -168,12 +167,10 @@ int ADIOI_PVFS2_aio_poll_fn(void *extra_state, MPI_Status *status)
aio_req = (ADIOI_AIO_Request *)extra_state;
/* BUG: cannot PVFS_sys_testsome: does not work for a specific request */
- ret = PVFS_sys_wait(aio_req->op_id, __FUNCTION__, &error);
+ ret = PVFS_sys_wait(aio_req->op_id, "ADIOI_PVFS2_aio_poll_fn", &error);
if (ret == 0) {
aio_req->nbytes = aio_req->resp_io.total_completed;
- MPIR_Nest_incr();
MPI_Grequest_complete(aio_req->req);
- MPIR_Nest_decr();
return MPI_SUCCESS;
} else
return MPI_UNDEFINED; /* TODO: what's this error? */
@@ -186,7 +183,7 @@ int ADIOI_PVFS2_aio_wait_fn(int count, void ** array_of_states,
ADIOI_AIO_Request **aio_reqlist;
PVFS_sys_op_id *op_id_array;
- int i,j, greq_count;
+ int i,j, greq_count, completed_count=0;
int *error_array;
aio_reqlist = (ADIOI_AIO_Request **)array_of_states;
@@ -195,25 +192,27 @@ int ADIOI_PVFS2_aio_wait_fn(int count, void ** array_of_states,
error_array = (int *)ADIOI_Calloc(count, sizeof(int));
greq_count = count;
+
/* PVFS-2.6: testsome actually tests all requests and fills in op_id_array
* with the ones that have completed. count is an in/out parameter.
* returns with the number of completed operations. what a mess! */
- PVFS_sys_testsome(op_id_array, &count, NULL, error_array, INT_MAX);
- for (i=0; i< count; i++) {
- for (j=0; jop_id) {
- aio_reqlist[j]->nbytes =
- aio_reqlist[j]->resp_io.total_completed;
- MPIR_Nest_incr();
- MPI_Grequest_complete(aio_reqlist[j]->req);
- MPIR_Nest_decr();
+ while (completed_count < greq_count ) {
+ count = greq_count;
+ PVFS_sys_testsome(op_id_array, &count, NULL, error_array, INT_MAX);
+ completed_count += count;
+ for (i=0; i< count; i++) {
+ for (j=0; jop_id) {
+ aio_reqlist[j]->nbytes =
+ aio_reqlist[j]->resp_io.total_completed;
+ MPI_Grequest_complete(aio_reqlist[j]->req);
+ }
}
}
}
return MPI_SUCCESS; /* TODO: no idea how to deal with errors */
}
-#endif
/*
* vim: ts=8 sts=4 sw=4 noexpandtab
diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_common.c b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_common.c
index adbd104520..4da9eac02a 100644
--- a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_common.c
+++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_common.c
@@ -42,6 +42,7 @@ int ADIOI_PVFS2_End_call(MPI_Comm comm, int keyval,
{
int error_code;
ADIOI_PVFS2_End(&error_code);
+ MPI_Keyval_free(&keyval);
return error_code;
}
@@ -81,7 +82,7 @@ void ADIOI_PVFS2_Init(int *error_code )
&ADIOI_PVFS2_Initialized, (void *)0);
/* just like romio does, we make a dummy attribute so we
* get cleaned up */
- MPI_Attr_put(MPI_COMM_WORLD, ADIOI_PVFS2_Initialized, (void *)0);
+ MPI_Attr_put(MPI_COMM_SELF, ADIOI_PVFS2_Initialized, (void *)0);
}
void ADIOI_PVFS2_makeattribs(PVFS_sys_attr * attribs)
@@ -107,7 +108,41 @@ void ADIOI_PVFS2_makecredentials(PVFS_credentials * credentials)
int ADIOI_PVFS2_error_convert(int pvfs_error)
{
- return MPI_UNDEFINED;
+ switch(pvfs_error)
+ {
+ case PVFS_EPERM:
+ case PVFS_EACCES:
+ return MPI_ERR_ACCESS;
+ case PVFS_ENOENT:
+ case PVFS_ENXIO:
+ case PVFS_ENODEV:
+ return MPI_ERR_NO_SUCH_FILE;
+ case PVFS_EIO:
+ return MPI_ERR_IO;
+ case PVFS_EEXIST:
+ return MPI_ERR_FILE_EXISTS;
+ case PVFS_ENOTDIR: /* ??? */
+ case PVFS_EISDIR: /* ??? */
+ case PVFS_ENAMETOOLONG:
+ return MPI_ERR_BAD_FILE;
+ case PVFS_EINVAL:
+ return MPI_ERR_FILE;
+ case PVFS_EFBIG: /* ??? */
+ case PVFS_ENOSPC:
+ return MPI_ERR_NO_SPACE;
+ case PVFS_EROFS:
+ return MPI_ERR_READ_ONLY;
+ case PVFS_ENOSYS:
+ return MPI_ERR_UNSUPPORTED_OPERATION;
+ /* PVFS does not support quotas */
+ case EDQUOT:
+ return MPI_ERR_QUOTA;
+ case PVFS_ENOMEM:
+ return MPI_ERR_INTERN;
+ default:
+ return MPI_UNDEFINED;
+ }
+
}
/*
diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_features.c b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_features.c
new file mode 100644
index 0000000000..71d99e67e8
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_features.c
@@ -0,0 +1,16 @@
+#include "adio.h"
+#include "ad_pvfs2.h"
+
+int ADIOI_PVFS2_Feature(ADIO_File fd, int flag)
+{
+ switch(flag) {
+ case ADIO_SCALABLE_OPEN:
+ return 1;
+ case ADIO_SHARED_FP:
+ case ADIO_LOCKS:
+ case ADIO_SEQUENTIAL:
+ case ADIO_DATA_SIEVING_WRITES:
+ default:
+ return 0;
+ }
+}
diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_hints.c b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_hints.c
index a752dbeade..5170e8afb7 100644
--- a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_hints.c
+++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_hints.c
@@ -17,20 +17,37 @@ void ADIOI_PVFS2_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
if ((fd->info) == MPI_INFO_NULL) {
/* part of the open call */
MPI_Info_create(&(fd->info));
- MPI_Info_set(fd->info, "romio_pvfs2_debugmask", "0");
+ ADIOI_Info_set(fd->info, "romio_pvfs2_debugmask", "0");
fd->hints->fs_hints.pvfs2.debugmask = 0;
- MPI_Info_set(fd->info, "striping_factor", "0");
+ ADIOI_Info_set(fd->info, "striping_factor", "0");
fd->hints->striping_factor = 0;
- MPI_Info_set(fd->info, "striping_unit", "0");
+ ADIOI_Info_set(fd->info, "striping_unit", "0");
fd->hints->striping_unit = 0;
+
+ /* disable the aggressive strided optimizations by default */
+ ADIOI_Info_set(fd->info, "romio_pvfs2_posix_read", "disable");
+ ADIOI_Info_set(fd->info, "romio_pvfs2_posix_write", "disable");
+ fd->hints->fs_hints.pvfs2.posix_read = ADIOI_HINT_DISABLE;
+ fd->hints->fs_hints.pvfs2.posix_write = ADIOI_HINT_DISABLE;
+
+ ADIOI_Info_set(fd->info, "romio_pvfs2_dtype_read", "disable");
+ ADIOI_Info_set(fd->info, "romio_pvfs2_dtype_write", "disable");
+ fd->hints->fs_hints.pvfs2.dtype_read = ADIOI_HINT_DISABLE;
+ fd->hints->fs_hints.pvfs2.dtype_write = ADIOI_HINT_DISABLE;
+
+ ADIOI_Info_set(fd->info, "romio_pvfs2_listio_read", "disable");
+ ADIOI_Info_set(fd->info, "romio_pvfs2_listio_write", "disable");
+ fd->hints->fs_hints.pvfs2.listio_read = ADIOI_HINT_DISABLE;
+ fd->hints->fs_hints.pvfs2.listio_write = ADIOI_HINT_DISABLE;
+
/* any user-provided hints? */
if (users_info != MPI_INFO_NULL) {
/* pvfs2 debugging */
value = (char *) ADIOI_Malloc( (MPI_MAX_INFO_VAL+1)*sizeof(char));
- MPI_Info_get(users_info, "romio_pvfs2_debugmask",
+ ADIOI_Info_get(users_info, "romio_pvfs2_debugmask",
MPI_MAX_INFO_VAL, value, &flag);
if (flag) {
tmp_value = fd->hints->fs_hints.pvfs2.debugmask =
@@ -46,11 +63,11 @@ void ADIOI_PVFS2_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
}
/* --END ERROR HANDLING-- */
- MPI_Info_set(fd->info, "romio_pvfs2_debugmask", value);
+ ADIOI_Info_set(fd->info, "romio_pvfs2_debugmask", value);
}
/* the striping factor */
- MPI_Info_get(users_info, "striping_factor",
+ ADIOI_Info_get(users_info, "striping_factor",
MPI_MAX_INFO_VAL, value, &flag);
if (flag) {
tmp_value = fd->hints->striping_factor = atoi(value);
@@ -65,11 +82,11 @@ void ADIOI_PVFS2_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
}
/* --END ERROR HANDLING-- */
- MPI_Info_set(fd->info, "striping_factor", value);
+ ADIOI_Info_set(fd->info, "striping_factor", value);
}
/* the striping unit */
- MPI_Info_get(users_info, "striping_unit",
+ ADIOI_Info_get(users_info, "striping_unit",
MPI_MAX_INFO_VAL, value, &flag);
if (flag) {
tmp_value = fd->hints->striping_unit = atoi(value);
@@ -83,16 +100,167 @@ void ADIOI_PVFS2_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
}
/* --END ERROR HANDLING-- */
- MPI_Info_set(fd->info, "striping_unit", value);
+ ADIOI_Info_set(fd->info, "striping_unit", value);
}
/* distribution name */
- MPI_Info_get(users_info, "romio_pvfs2_distribution_name",
+ ADIOI_Info_get(users_info, "romio_pvfs2_distribution_name",
MPI_MAX_INFO_VAL, value, &flag);
if (flag) {
}
+
+ /* POSIX read */
+ ADIOI_Info_get(users_info, "romio_pvfs2_posix_read",
+ MPI_MAX_INFO_VAL, value, &flag);
+ if (flag) {
+ if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
+ {
+ ADIOI_Info_set(fd->info, "romio_pvfs2_posix_read", value);
+ fd->hints->fs_hints.pvfs2.posix_read = ADIOI_HINT_ENABLE;
+ }
+ else if ( !strcmp(value, "disable") ||
+ !strcmp(value, "DISABLE"))
+ {
+ ADIOI_Info_set(fd->info , "romio_pvfs2_posix_read", value);
+ fd->hints->fs_hints.pvfs2.posix_read = ADIOI_HINT_DISABLE;
+ }
+ tmp_value = fd->hints->fs_hints.pvfs2.posix_read;
+ MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm);
+ if (tmp_value != fd->hints->fs_hints.pvfs2.posix_read) {
+ MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+ "posix_read",
+ error_code);
+ return;
+ }
+ }
+
+ /* POSIX write */
+ ADIOI_Info_get(users_info, "romio_pvfs2_posix_write",
+ MPI_MAX_INFO_VAL, value, &flag);
+ if (flag) {
+ if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
+ {
+ ADIOI_Info_set(fd->info, "romio_pvfs2_posix_write", value);
+ fd->hints->fs_hints.pvfs2.posix_write = ADIOI_HINT_ENABLE;
+ }
+ else if ( !strcmp(value, "disable") ||
+ !strcmp(value, "DISABLE"))
+ {
+ ADIOI_Info_set(fd->info , "romio_pvfs2_posix_write", value);
+ fd->hints->fs_hints.pvfs2.posix_write = ADIOI_HINT_DISABLE;
+ }
+ tmp_value = fd->hints->fs_hints.pvfs2.posix_write;
+ MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm);
+ if (tmp_value != fd->hints->fs_hints.pvfs2.posix_write) {
+ MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+ "posix_write",
+ error_code);
+ return;
+ }
+ }
+
+ /* Datatype read */
+ ADIOI_Info_get(users_info, "romio_pvfs2_dtype_read",
+ MPI_MAX_INFO_VAL, value, &flag);
+ if (flag) {
+ if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
+ {
+ ADIOI_Info_set(fd->info, "romio_pvfs2_dtype_read", value);
+ fd->hints->fs_hints.pvfs2.dtype_read = ADIOI_HINT_ENABLE;
+ }
+ else if ( !strcmp(value, "disable") ||
+ !strcmp(value, "DISABLE"))
+ {
+ ADIOI_Info_set(fd->info , "romio_pvfs2_dtype_read", value);
+ fd->hints->fs_hints.pvfs2.dtype_read = ADIOI_HINT_DISABLE;
+ }
+ tmp_value = fd->hints->fs_hints.pvfs2.dtype_read;
+ MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm);
+ if (tmp_value != fd->hints->fs_hints.pvfs2.dtype_read) {
+ MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+ "dtype_read",
+ error_code);
+ return;
+ }
+ }
+
+ /* Datatype write */
+ ADIOI_Info_get(users_info, "romio_pvfs2_dtype_write",
+ MPI_MAX_INFO_VAL, value, &flag);
+ if (flag) {
+ if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
+ {
+ ADIOI_Info_set(fd->info, "romio_pvfs2_dtype_write", value);
+ fd->hints->fs_hints.pvfs2.dtype_write = ADIOI_HINT_ENABLE;
+ }
+ else if ( !strcmp(value, "disable") ||
+ !strcmp(value, "DISABLE"))
+ {
+ ADIOI_Info_set(fd->info , "romio_pvfs2_dtype_write", value);
+ fd->hints->fs_hints.pvfs2.dtype_write = ADIOI_HINT_DISABLE;
+ }
+ tmp_value = fd->hints->fs_hints.pvfs2.dtype_write;
+ MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm);
+ if (tmp_value != fd->hints->fs_hints.pvfs2.dtype_write) {
+ MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+ "dtype_write",
+ error_code);
+ return;
+ }
+ }
+
+ /* Listio read */
+ ADIOI_Info_get(users_info, "romio_pvfs2_listio_read",
+ MPI_MAX_INFO_VAL, value, &flag);
+ if (flag) {
+ if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
+ {
+ ADIOI_Info_set(fd->info, "romio_pvfs2_listio_read", value);
+ fd->hints->fs_hints.pvfs2.listio_read = ADIOI_HINT_ENABLE;
+ }
+ else if ( !strcmp(value, "disable") ||
+ !strcmp(value, "DISABLE"))
+ {
+ ADIOI_Info_set(fd->info , "romio_pvfs2_listio_read", value);
+ fd->hints->fs_hints.pvfs2.listio_read = ADIOI_HINT_DISABLE;
+ }
+ tmp_value = fd->hints->fs_hints.pvfs2.listio_read;
+ MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm);
+ if (tmp_value != fd->hints->fs_hints.pvfs2.listio_read) {
+ MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+ "listio_read",
+ error_code);
+ return;
+ }
+ }
+
+ /* Datatype write */
+ ADIOI_Info_get(users_info, "romio_pvfs2_listio_write",
+ MPI_MAX_INFO_VAL, value, &flag);
+ if (flag) {
+ if ( !strcmp(value, "enable") || !strcmp(value, "ENABLE"))
+ {
+ ADIOI_Info_set(fd->info, "romio_pvfs2_listio_write", value);
+ fd->hints->fs_hints.pvfs2.listio_write = ADIOI_HINT_ENABLE;
+ }
+ else if ( !strcmp(value, "disable") ||
+ !strcmp(value, "DISABLE"))
+ {
+ ADIOI_Info_set(fd->info , "romio_pvfs2_listio_write", value);
+ fd->hints->fs_hints.pvfs2.listio_write = ADIOI_HINT_DISABLE;
+ }
+ tmp_value = fd->hints->fs_hints.pvfs2.listio_write;
+ MPI_Bcast(&tmp_value, 1, MPI_INT, 0, fd->comm);
+ if (tmp_value != fd->hints->fs_hints.pvfs2.listio_write) {
+ MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+ "listio_write",
+ error_code);
+ return;
+ }
+ }
ADIOI_Free(value);
+
}
}
/* set the values for collective I/O and data sieving parameters */
diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_io.h b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_io.h
new file mode 100644
index 0000000000..dc1641ee1b
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_io.h
@@ -0,0 +1,79 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*-
+ * vim: ts=8 sts=4 sw=4 noexpandtab
+ *
+ * Copyright (C) 2006 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ */
+
+/* Contig I/O helper prototypes */
+
+#define READ 0
+#define WRITE 1
+
+/* #define DEBUG_CONTIG */
+/* #define DEBUG_LIST */
+/* #define DEBUG_DTYPE */
+
+/* Contig I/O helper prototypes */
+int ADIOI_PVFS2_Contig(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status,
+ int *error_code, int rw_type);
+
+/* List I/O helper prototypes */
+int ADIOI_PVFS2_StridedListIO(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status,
+ int *error_code, int rw_type);
+
+int gen_listio_arr(ADIOI_Flatlist_node *flat_buf,
+ int *flat_buf_index_p,
+ int64_t *cur_flat_buf_reg_off_p,
+ int flat_buf_size,
+ int flat_buf_extent,
+ ADIOI_Flatlist_node *flat_file,
+ int *flat_file_index_p,
+ int64_t *cur_flat_file_reg_off_p,
+ int flat_file_size,
+ int flat_file_extent,
+ int max_ol_count,
+ ADIO_Offset disp,
+ int bytes_into_filetype,
+ int64_t *bytes_completed,
+ int64_t total_io_size,
+ int64_t buf_off_arr[],
+ int32_t buf_len_arr[],
+ int32_t *buf_ol_count_p,
+ int64_t file_off_arr[],
+ int32_t file_len_arr[],
+ int32_t *file_ol_count_p);
+
+void print_buf_file_ol_pairs(int64_t buf_off_arr[],
+ int32_t buf_len_arr[],
+ int32_t buf_ol_count,
+ int64_t file_off_arr[],
+ int32_t file_len_arr[],
+ int32_t file_ol_count,
+ void *buf,
+ int rw_type);
+
+/* Datatype I/O helper prototypes */
+int ADIOI_PVFS2_StridedDtypeIO(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status,
+ int *error_code, int rw_type);
+
+int convert_named(MPI_Datatype *mpi_dtype,
+ PVFS_Request *pvfs_dtype, int combiner);
+
+void print_dtype_info(int combiner,
+ int num_int,
+ int num_addr,
+ int num_dtype,
+ int *arr_int,
+ MPI_Aint *arr_addr,
+ MPI_Datatype *arr_dtype);
+
+int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype,
+ PVFS_Request *pvfs_dtype);
+
diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_io_dtype.c b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_io_dtype.c
new file mode 100644
index 0000000000..ff625b53f8
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_io_dtype.c
@@ -0,0 +1,720 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*-
+ * vim: ts=8 sts=4 sw=4 noexpandtab
+ *
+ * Copyright (C) 2006 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ */
+
+#include
+#include "adio.h"
+#include "adio_extern.h"
+#include "ad_pvfs2.h"
+#include "ad_pvfs2_io.h"
+#include "ad_pvfs2_common.h"
+
+int ADIOI_PVFS2_StridedDtypeIO(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status, int
+ *error_code,
+ int rw_type)
+{
+ int filetype_size = -1, ret = -1, filetype_is_contig = -1;
+ int num_filetypes = 0, cur_flat_file_reg_off = 0;
+ PVFS_Request tmp_mem_req, mem_req, tmp_file_req, file_req;
+ PVFS_sysresp_io resp_io;
+ ADIO_Offset off = -1, bytes_into_filetype = 0;
+ MPI_Aint filetype_extent = -1;
+ int etype_size = -1, i = -1;
+ PVFS_size pvfs_disp = -1;
+ ADIOI_Flatlist_node *flat_file_p = ADIOI_Flatlist;
+
+ /* Use for offseting the PVFS2 filetype */
+ int pvfs_blk = 1;
+ ADIOI_PVFS2_fs *pvfs_fs;
+ static char myname[] = "ADIOI_PVFS2_STRIDED_DTYPE";
+
+ memset(&tmp_mem_req, 0, sizeof(PVFS_Request));
+ memset(&mem_req, 0, sizeof(PVFS_Request));
+ memset(&tmp_file_req, 0, sizeof(PVFS_Request));
+ memset(&file_req, 0, sizeof(PVFS_Request));
+
+ pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr;
+
+ ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
+
+ /* changed below if error */
+ *error_code = MPI_SUCCESS;
+
+ /* datatype is the memory type
+ * fd->filetype is the file type */
+ MPI_Type_size(fd->filetype, &filetype_size);
+ if (filetype_size == 0) {
+ *error_code = MPI_SUCCESS;
+ return -1;
+ }
+ MPI_Type_extent(fd->filetype, &filetype_extent);
+ MPI_Type_size(fd->etype, &etype_size);
+ if (filetype_size == 0) {
+ *error_code = MPI_SUCCESS;
+ return -1;
+ }
+
+ /* offset is in units of etype relative to the filetype. We
+ * convert this to off in terms of actual data bytes (the offset
+ * minus the number of bytes that are not used). We are allowed
+ * to do this since PVFS2 handles offsets with respect to a
+ * file_req in bytes, otherwise we would have to convert into a
+ * pure byte offset as is done in other methods. Explicit offset
+ * case is handled by using fd->disp and byte-converted off. */
+
+ pvfs_disp = fd->disp;
+ if (file_ptr_type == ADIO_INDIVIDUAL)
+ {
+ if (filetype_is_contig)
+ {
+ off = fd->fp_ind - fd->disp;
+ }
+ else
+ {
+ int flag = 0;
+ /* Should have already been flattened in ADIO_Open*/
+ while (flat_file_p->type != fd->filetype)
+ {
+ flat_file_p = flat_file_p->next;
+ }
+ num_filetypes = -1;
+ while (!flag)
+ {
+ num_filetypes++;
+ for (i = 0; i < flat_file_p->count; i++)
+ {
+ /* Start on a non zero-length region */
+ if (flat_file_p->blocklens[i])
+ {
+ if (fd->disp + flat_file_p->indices[i] +
+ (num_filetypes * filetype_extent) +
+ flat_file_p->blocklens[i] > fd->fp_ind &&
+ fd->disp + flat_file_p->indices[i] <=
+ fd->fp_ind)
+ {
+ cur_flat_file_reg_off = fd->fp_ind -
+ (fd->disp + flat_file_p->indices[i] +
+ (num_filetypes * filetype_extent));
+ flag = 1;
+ break;
+ }
+ else
+ bytes_into_filetype += flat_file_p->blocklens[i];
+ }
+ }
+ }
+ /* Impossible that we don't find it in this datatype */
+ assert(i != flat_file_p->count);
+ off = bytes_into_filetype + cur_flat_file_reg_off;
+ }
+ }
+ else /* ADIO_EXPLICIT */
+ {
+ off = etype_size * offset;
+ }
+
+#ifdef DEBUG_DTYPE
+ fprintf(stderr, "ADIOI_PVFS2_StridedDtypeIO: (fd->fp_ind=%Ld,fd->disp=%Ld,"
+ " offset=%Ld),(pvfs_disp=%Ld,off=%Ld)\n",
+ fd->fp_ind, fd->disp, offset, pvfs_disp, off);
+#endif
+
+
+ /* Convert the MPI memory and file datatypes into
+ * PVFS2 datatypes */
+ ret = convert_mpi_pvfs2_dtype(&datatype, &tmp_mem_req);
+ if (ret < 0)
+ {
+ goto error_state;
+ }
+ ret = convert_mpi_pvfs2_dtype(&(fd->filetype), &tmp_file_req);
+ if (ret < 0)
+ {
+ goto error_state;
+ }
+
+ ret = PVFS_Request_contiguous(count, tmp_mem_req, &mem_req);
+ if (ret != 0) /* TODO: convert this to MPIO error handling */
+ fprintf(stderr, "ADIOI_PVFS2_stridedDtypeIO: error in final"
+ " CONTIG memory type\n");
+ PVFS_Request_free(&tmp_mem_req);
+
+ /* pvfs_disp is used to offset the filetype */
+ ret = PVFS_Request_hindexed(1, &pvfs_blk, &pvfs_disp,
+ tmp_file_req, &file_req);
+ if (ret != 0)
+ fprintf(stderr, "ADIOI_PVFS2_StridedDtypeIO: error in final"
+ " HINDEXED file type\n");
+ PVFS_Request_free(&tmp_file_req);
+
+ if (rw_type == READ)
+ ret = PVFS_sys_read(pvfs_fs->object_ref, file_req, off, buf,
+ mem_req, &(pvfs_fs->credentials), &resp_io);
+ else
+ ret = PVFS_sys_write(pvfs_fs->object_ref, file_req, off, buf,
+ mem_req, &(pvfs_fs->credentials), &resp_io);
+
+ if (ret != 0) {
+ fprintf(stderr, "ADIOI_PVFS2_StridedDtypeIO: Warning - PVFS_sys_"
+ "read/write returned %d and completed %Ld bytes.\n",
+ ret, resp_io.total_completed);
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ ADIOI_PVFS2_error_convert(ret),
+ "Error in PVFS_sys_io \n", 0);
+ goto error_state;
+ }
+
+ if (file_ptr_type == ADIO_INDIVIDUAL)
+ {
+ fd->fp_ind = off += resp_io.total_completed;
+ }
+
+ error_state:
+ fd->fp_sys_posn = -1; /* set it to null. */
+
+ PVFS_Request_free(&mem_req);
+ PVFS_Request_free(&file_req);
+
+#ifdef DEBUG_DTYPE
+ fprintf(stderr, "ADIOI_PVFS2_StridedDtypeIO: "
+ "resp_io.total_completed=%Ld,ret=%d\n",
+ resp_io.total_completed, ret);
+#endif
+
+#ifdef HAVE_STATUS_SET_BYTES
+ MPIR_Status_set_bytes(status, datatype, (int)resp_io.total_completed);
+ /* This is a temporary way of filling in status. The right way is to
+ * keep track of how much data was actually acccessed by
+ * ADIOI_BUFFERED operations */
+#endif
+ return ret;
+}
+
+/* convert_mpi_pvfs2_dtype - Convert a MPI datatype into
+ * a PVFS2 datatype so that we can natively use the PVFS2
+ * datatypes in the PVFS2 I/O calls instead of converting
+ * all datatypes to the hindexed method
+ * return 1 - a leaf node
+ * return 0 - normal return
+ * return -1 - problems */
+
+int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype,
+ PVFS_Request *pvfs_dtype)
+{
+ int num_int = -1, num_addr = -1, num_dtype = -1,
+ combiner = -1, i = -1, ret = -1, leaf = -1;
+ int *arr_int = NULL, *arr_addr = NULL;
+ MPI_Datatype *arr_dtype = NULL;
+ PVFS_Request *old_pvfs_dtype = NULL;
+ PVFS_Request *old_pvfs_dtype_arr = NULL;
+ int arr_count = -1;
+ PVFS_size *pvfs_arr_disp = NULL;
+ int *pvfs_arr_len = NULL;
+
+ MPI_Type_get_envelope(*mpi_dtype,
+ &num_int,
+ &num_addr,
+ &num_dtype,
+ &combiner);
+
+ /* Depending on type of datatype do the following
+ * operations */
+
+ if (combiner == MPI_COMBINER_NAMED)
+ {
+ convert_named(mpi_dtype, pvfs_dtype, combiner);
+ return 1;
+ }
+
+ /* Allocate space for the arrays necessary for
+ * MPI_Type_get_contents */
+
+ if ((arr_int = ADIOI_Malloc(sizeof(int)*num_int)) == NULL)
+ {
+ fprintf(stderr, "Failed to allocate array_int\n");
+ return -1;
+ }
+ if ((arr_addr = ADIOI_Malloc(sizeof(int)*num_addr)) == NULL)
+ {
+ ADIOI_Free(arr_int);
+ fprintf(stderr, "Failed to allocate array_addr\n");
+ return -1;
+ }
+ if ((arr_dtype = ADIOI_Malloc(sizeof(MPI_Datatype)*num_dtype)) == NULL)
+ {
+ ADIOI_Free(arr_int);
+ ADIOI_Free(arr_addr);
+ fprintf(stderr, "Failed to allocate array_dtypes\n");
+ return -1;
+ }
+
+ MPI_Type_get_contents(*mpi_dtype,
+ num_int,
+ num_addr,
+ num_dtype,
+ arr_int,
+ arr_addr,
+ arr_dtype);
+
+ /* If it's not a predefined datatype, it is either a
+ * derived datatype or a structured datatype */
+
+ if (combiner != MPI_COMBINER_STRUCT)
+ {
+ if ((old_pvfs_dtype = ADIOI_Malloc(sizeof(PVFS_Request))) == NULL)
+ fprintf(stderr, "convert_mpi_pvfs2_dtype: "
+ "Failed to allocate PVFS_Request\n");
+ switch (combiner)
+ {
+ case MPI_COMBINER_CONTIGUOUS:
+ leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype);
+ ret = PVFS_Request_contiguous(arr_int[0],
+ *old_pvfs_dtype, pvfs_dtype);
+ break;
+ case MPI_COMBINER_VECTOR:
+ leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype);
+ ret = PVFS_Request_vector(arr_int[0], arr_int[1],
+ arr_int[2], *old_pvfs_dtype,
+ pvfs_dtype);
+ break;
+ case MPI_COMBINER_HVECTOR:
+ leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype);
+ ret = PVFS_Request_hvector(arr_int[0], arr_int[1],
+ arr_addr[0], *old_pvfs_dtype,
+ pvfs_dtype);
+ break;
+ /* Both INDEXED and HINDEXED types require PVFS_size
+ * address arrays. Therefore, we need to copy and
+ * convert the data from MPI_get_contents() into
+ * a PVFS_size buffer */
+ case MPI_COMBINER_INDEXED:
+ leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype);
+ if ((pvfs_arr_disp =
+ ADIOI_Malloc(arr_int[0]*sizeof(PVFS_size))) == 0)
+ {
+ fprintf(stderr, "convert_mpi_pvfs2_dtype: "
+ "Failed to allocate pvfs_arr_disp\n");
+ }
+ for (i = 0; i < arr_int[0]; i++)
+ {
+ pvfs_arr_disp[i] =
+ (PVFS_size) arr_int[arr_int[0]+1+i];
+ }
+ ret = PVFS_Request_indexed(arr_int[0], &arr_int[1],
+ pvfs_arr_disp,
+ *old_pvfs_dtype, pvfs_dtype);
+ ADIOI_Free(pvfs_arr_disp);
+ break;
+ case MPI_COMBINER_HINDEXED:
+ leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype);
+ if ((pvfs_arr_disp =
+ ADIOI_Malloc(arr_int[0]*sizeof(PVFS_size))) == 0)
+ {
+ fprintf(stderr, "convert_mpi_pvfs2_dtype: "
+ "Failed to allocate pvfs_arr_disp\n");
+ }
+ for (i = 0; i < arr_int[0]; i++)
+ {
+ pvfs_arr_disp[i] =
+ (PVFS_size) arr_addr[i];
+ }
+ ret = PVFS_Request_hindexed(arr_int[0], &arr_int[1],
+ (int64_t *)&arr_addr[0],
+ *old_pvfs_dtype, pvfs_dtype);
+ ADIOI_Free(pvfs_arr_disp);
+ break;
+ case MPI_COMBINER_DUP:
+ leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype);
+ ret = PVFS_Request_contiguous(1,
+ *old_pvfs_dtype, pvfs_dtype);
+
+ break;
+ case MPI_COMBINER_INDEXED_BLOCK:
+ /* No native PVFS2 support for this operation currently */
+ ADIOI_Free(old_pvfs_dtype);
+ fprintf(stderr, "convert_mpi_pvfs2_dtype: "
+ "INDEXED_BLOCK is unsupported\n");
+ break;
+ case MPI_COMBINER_HINDEXED_INTEGER:
+ ADIOI_Free(old_pvfs_dtype);
+ fprintf(stderr, "convert_mpi_pvfs2_dtype: "
+ "HINDEXED_INTEGER is unsupported\n");
+ break;
+ case MPI_COMBINER_STRUCT_INTEGER:
+ ADIOI_Free(old_pvfs_dtype);
+ fprintf(stderr, "convert_mpi_pvfs2_dtype: "
+ "STRUCT_INTEGER is unsupported\n");
+ break;
+ case MPI_COMBINER_SUBARRAY:
+ ADIOI_Free(old_pvfs_dtype);
+ fprintf(stderr, "convert_mpi_pvfs2_dtype: "
+ "SUBARRAY is unsupported\n");
+ break;
+ case MPI_COMBINER_DARRAY:
+ ADIOI_Free(old_pvfs_dtype);
+ fprintf(stderr, "convert_mpi_pvfs2_dtype: "
+ "DARRAY is unsupported\n");
+ break;
+ case MPI_COMBINER_F90_REAL:
+ ADIOI_Free(old_pvfs_dtype);
+ fprintf(stderr, "convert_mpi_pvfs2_dtype: "
+ "F90_REAL is unsupported\n");
+ break;
+ case MPI_COMBINER_F90_COMPLEX:
+ ADIOI_Free(old_pvfs_dtype);
+ fprintf(stderr, "convert_mpi_pvfs2_dtype: "
+ "F90_COMPLEX is unsupported\n");
+ break;
+ case MPI_COMBINER_F90_INTEGER:
+ ADIOI_Free(old_pvfs_dtype);
+ fprintf(stderr, "convert_mpi_pvfs2_dtype: "
+ "F90_INTEGER is unsupported\n");
+ break;
+ case MPI_COMBINER_RESIZED:
+ ADIOI_Free(old_pvfs_dtype);
+ fprintf(stderr, "convert_mpi_pvfs2_dtype: "
+ "RESIZED is unsupported\n");
+ break;
+ default:
+ break;
+ }
+
+ if (ret != 0)
+ fprintf(stderr, "Error in PVFS_Request_* "
+ "for a derived datatype\n");
+
+#ifdef DEBUG_DTYPE
+ print_dtype_info(combiner,
+ num_int,
+ num_addr,
+ num_dtype,
+ arr_int,
+ arr_addr,
+ arr_dtype);
+#endif
+
+ if (leaf != 1 && combiner != MPI_COMBINER_DUP)
+ MPI_Type_free(&arr_dtype[0]);
+
+ ADIOI_Free(arr_int);
+ ADIOI_Free(arr_addr);
+ ADIOI_Free(arr_dtype);
+
+ PVFS_Request_free(old_pvfs_dtype);
+ ADIOI_Free(old_pvfs_dtype);
+
+ return ret;
+ }
+ else /* MPI_COMBINER_STRUCT */
+ {
+ MPI_Aint mpi_lb = -1, mpi_extent = -1;
+ PVFS_offset pvfs_lb = -1;
+ PVFS_size pvfs_extent = -1;
+ int has_lb_ub = 0;
+
+ /* When converting into a PVFS_Request_struct, we no longer
+ * can use MPI_LB and MPI_UB. Therfore, we have to do the
+ * following.
+ * We simply ignore all the MPI_LB and MPI_UB types and
+ * get the lb and extent and pass it on through a
+ * PVFS resized_req */
+
+ arr_count = 0;
+ for (i = 0; i < arr_int[0]; i++)
+ {
+ if (arr_dtype[i] != MPI_LB &&
+ arr_dtype[i] != MPI_UB)
+ {
+ arr_count++;
+ }
+ }
+
+ if (arr_int[0] != arr_count)
+ {
+ MPI_Type_get_extent(*mpi_dtype, &mpi_lb, &mpi_extent);
+ pvfs_lb = mpi_lb;
+ pvfs_extent = mpi_extent;
+ if ((pvfs_arr_len = ADIOI_Malloc(arr_count*sizeof(int)))
+ == NULL)
+ {
+ fprintf(stderr, "convert_mpi_pvfs2_dtype: "
+ "Failed to allocate pvfs_arr_len\n");
+ }
+ has_lb_ub = 1;
+ }
+
+ if ((old_pvfs_dtype_arr
+ = ADIOI_Malloc(arr_count*sizeof(PVFS_Request))) == NULL)
+ fprintf(stderr, "convert_mpi_pvfs2_dtype: "
+ "Failed to allocate PVFS_Requests\n");
+
+ if ((pvfs_arr_disp = ADIOI_Malloc(arr_count*sizeof(PVFS_size)))
+ == NULL)
+ {
+ fprintf(stderr, "convert_mpi_pvfs2_dtype: "
+ "Failed to allocate pvfs_arr_disp\n");
+ }
+
+ arr_count = 0;
+ for (i = 0; i < arr_int[0]; i++)
+ {
+ if (arr_dtype[i] != MPI_LB &&
+ arr_dtype[i] != MPI_UB)
+ {
+ leaf = convert_mpi_pvfs2_dtype(
+ &arr_dtype[i], &old_pvfs_dtype_arr[arr_count]);
+ if (leaf != 1)
+ MPI_Type_free(&arr_dtype[i]);
+ pvfs_arr_disp[arr_count] =
+ (PVFS_size) arr_addr[i];
+ if (has_lb_ub)
+ {
+ pvfs_arr_len[arr_count] =
+ arr_int[i+1];
+ }
+ arr_count++;
+ }
+ }
+
+ /* If a MPI_UB or MPI_LB did exist, we have to
+ * resize the datatype */
+ if (has_lb_ub)
+ {
+ PVFS_Request *tmp_pvfs_dtype = NULL;
+ if ((tmp_pvfs_dtype = ADIOI_Malloc(sizeof(PVFS_Request))) == NULL)
+ fprintf(stderr, "convert_mpi_pvfs2_dtype: "
+ "Failed to allocate PVFS_Request\n");
+
+ ret = PVFS_Request_struct(arr_count, pvfs_arr_len,
+ pvfs_arr_disp,
+ old_pvfs_dtype_arr, tmp_pvfs_dtype);
+ if (ret != 0)
+ fprintf(stderr, "Error in PVFS_Request_struct\n");
+
+ arr_count = 0;
+ for (i = 0; i < arr_int[0]; i++)
+ {
+ if (arr_dtype[i] != MPI_LB &&
+ arr_dtype[i] != MPI_UB)
+ {
+ PVFS_Request_free(&old_pvfs_dtype_arr[arr_count]);
+ arr_count++;
+ }
+ }
+
+#ifdef DEBUG_DTYPE
+ fprintf(stderr, "STRUCT(WITHOUT %d LB or UB)(%d,[",
+ arr_int[0] - arr_count, arr_count);
+ for (i = 0; i < arr_count; i++)
+ fprintf(stderr, "(%d,%Ld) ",
+ pvfs_arr_len[i],
+ pvfs_arr_disp[i]);
+ fprintf(stderr, "]\n");
+ fprintf(stderr, "RESIZED(LB = %Ld, EXTENT = %Ld)\n",
+ pvfs_lb, pvfs_extent);
+#endif
+ ret = PVFS_Request_resized(*tmp_pvfs_dtype,
+ pvfs_lb, pvfs_extent, pvfs_dtype);
+ if (ret != 0)
+ fprintf(stderr, "Error in PVFS_Request_resize\n");
+
+ PVFS_Request_free(tmp_pvfs_dtype);
+ ADIOI_Free(tmp_pvfs_dtype);
+ }
+ else /* No MPI_LB or MPI_UB datatypes */
+ {
+ ret = PVFS_Request_struct(arr_int[0], &arr_int[1],
+ pvfs_arr_disp,
+ old_pvfs_dtype_arr, pvfs_dtype);
+ if (ret != 0)
+ fprintf(stderr, "Error in PVFS_Request_struct\n");
+
+ for (i = 0; i < arr_int[0]; i++)
+ {
+ if (arr_dtype[i] != MPI_LB &&
+ arr_dtype[i] != MPI_UB)
+ PVFS_Request_free(&old_pvfs_dtype_arr[i]);
+ }
+
+#ifdef DEBUG_DTYPE
+ print_dtype_info(combiner,
+ num_int,
+ num_addr,
+ num_dtype,
+ arr_int,
+ arr_addr,
+ arr_dtype);
+#endif
+ }
+
+ ADIOI_Free(arr_int);
+ ADIOI_Free(arr_addr);
+ ADIOI_Free(arr_dtype);
+
+ ADIOI_Free(old_pvfs_dtype_arr);
+ ADIOI_Free(pvfs_arr_disp);
+ ADIOI_Free(pvfs_arr_len);
+
+ return ret;
+ }
+
+ /* Shouldn't have gotten here */
+ fprintf(stderr, "convert_mpi_pvfs2_dtype: SERIOUS ERROR\n");
+ return -1;
+}
+
+int convert_named(MPI_Datatype *mpi_dtype,
+ PVFS_Request *pvfs_dtype, int combiner)
+{
+ int ret = -1;
+#ifdef DEBUG_DTYPE
+ fprintf(stderr, "NAMED");
+#endif
+
+ switch (*mpi_dtype)
+ {
+ case MPI_CHAR:
+ ret = PVFS_Request_contiguous(1, PVFS_CHAR, pvfs_dtype);
+#ifdef DEBUG_DTYPE
+ fprintf(stderr, "-MPI_CHAR\n");
+#endif
+ break;
+ case MPI_BYTE:
+ ret = PVFS_Request_contiguous(1, PVFS_BYTE, pvfs_dtype);
+#ifdef DEBUG_DTYPE
+ fprintf(stderr, "-MPI_BYTE\n");
+#endif
+ break;
+ case MPI_SHORT:
+ ret = PVFS_Request_contiguous(1, PVFS_SHORT, pvfs_dtype);
+#ifdef DEBUG_DTYPE
+ fprintf(stderr, "-MPI_SHORT\n");
+#endif
+ break;
+ case MPI_INT:
+ ret = PVFS_Request_contiguous(1, PVFS_INT, pvfs_dtype);
+#ifdef DEBUG_DTYPE
+ fprintf(stderr, "-MPI_INT\n");
+#endif
+ break;
+ case MPI_LONG:
+ ret = PVFS_Request_contiguous(1, PVFS_LONG, pvfs_dtype);
+#ifdef DEBUG_DTYPE
+ fprintf(stderr, "-MPI_LONG\n");
+#endif
+ break;
+ case MPI_FLOAT:
+ ret = PVFS_Request_contiguous(1, PVFS_FLOAT, pvfs_dtype);
+#ifdef DEBUG_DTYPE
+ fprintf(stderr, "-MPI_FLOAT\n");
+#endif
+ break;
+ case MPI_DOUBLE:
+ ret = PVFS_Request_contiguous(1, PVFS_DOUBLE, pvfs_dtype);
+#ifdef DEBUG_DTYPE
+ fprintf(stderr, "-MPI_DOUBLE\n");
+#endif
+ break;
+ case MPI_UNSIGNED_CHAR:
+ ret = PVFS_Request_contiguous(1, PVFS_UNSIGNED_CHAR, pvfs_dtype);
+#ifdef DEBUG_DTYPE
+ fprintf(stderr, "-MPI_UNSIGNED_CHAR\n");
+#endif
+ break;
+ case MPI_UNSIGNED_SHORT:
+ ret = PVFS_Request_contiguous(1, PVFS_UNSIGNED, pvfs_dtype);
+#ifdef DEBUG_DTYPE
+ fprintf(stderr, "-MPI_UNSIGNED_SHORT\n");
+#endif
+ break;
+ case MPI_UNSIGNED:
+ ret = PVFS_Request_contiguous(1, PVFS_UNSIGNED, pvfs_dtype);
+#ifdef DEBUG_DTYPE
+ fprintf(stderr, "-MPI_SHORT\n");
+#endif
+ break;
+ case MPI_UNSIGNED_LONG:
+ ret = PVFS_Request_contiguous(1, PVFS_UNSIGNED_LONG, pvfs_dtype);
+#ifdef DEBUG_DTYPE
+ fprintf(stderr, "-MPI_UNSIGNED_LONG\n");
+#endif
+ break;
+ case MPI_LONG_DOUBLE:
+ ret = PVFS_Request_contiguous(1, PVFS_LONG_DOUBLE, pvfs_dtype);
+#ifdef DEBUG_DTYPE
+ fprintf(stderr, "-MPI_LONG_DOUBLE\n");
+#endif
+ break;
+ default:
+ fprintf(stderr, "convert_named: predefined type not found");
+ return -1;
+ break;
+ }
+ if (ret != 0)
+ fprintf(stderr, "convert_named: Datatype creation failed\n");
+ return ret;
+}
+
+void print_dtype_info(int combiner,
+ int num_int,
+ int num_addr,
+ int num_dtype,
+ int *arr_int,
+ MPI_Aint *arr_addr,
+ MPI_Datatype *arr_dtype)
+{
+ int i = -1;
+ switch (combiner)
+ {
+ case MPI_COMBINER_CONTIGUOUS:
+ fprintf(stderr, "CONTIG(%d)\n", arr_int[0]);
+ break;
+ case MPI_COMBINER_VECTOR:
+ fprintf(stderr, "VECTOR(%d,%d,%d)\n",
+ arr_int[0], arr_int[1], arr_int[2]);
+ break;
+ case MPI_COMBINER_HVECTOR:
+ fprintf(stderr, "HVECTOR(%d,%d,%d)\n",
+ arr_int[0], arr_int[1],arr_addr[0]);
+ break;
+ case MPI_COMBINER_INDEXED:
+ fprintf(stderr, "INDEXED(%d,[",
+ arr_int[0]);
+ for (i = 0; i < arr_int[0]; i++)
+ fprintf(stderr, "(%d,%Ld) ",
+ arr_int[1+i],
+ (int64_t) arr_int[arr_int[0]+1+i]);
+ fprintf(stderr, "]\n");
+ break;
+ case MPI_COMBINER_HINDEXED:
+ fprintf(stderr, "HINDEXED(%d,[",
+ arr_int[0]);
+ for (i = 0; i < arr_int[0]; i++)
+ fprintf(stderr, "(%d,%Ld) ",
+ arr_int[1+i],
+ (int64_t) arr_addr[i]);
+ fprintf(stderr, "]\n");
+ break;
+ case MPI_COMBINER_STRUCT:
+ fprintf(stderr, "STRUCT(%d,[",
+ arr_int[0]);
+ for (i = 0; i < arr_int[0]; i++)
+ fprintf(stderr, "(%d,%Ld) ",
+ arr_int[1+i],
+ (int64_t) arr_addr[i]);
+ fprintf(stderr, "]\n");
+ break;
+ case MPI_COMBINER_DUP:
+ fprintf(stderr, "DUP\n");
+ break;
+ default:
+ fprintf(stderr, "no available information on this datatype");
+ }
+}
diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_io_list.c b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_io_list.c
new file mode 100644
index 0000000000..38cc63e3e1
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_io_list.c
@@ -0,0 +1,665 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*-
+ * vim: ts=8 sts=4 sw=4 noexpandtab
+ *
+ * Copyright (C) 2006 Unknown (TODO: fix this)
+ */
+
+#include
+#include "adio.h"
+#include "adio_extern.h"
+#include "ad_pvfs2.h"
+#include "ad_pvfs2_io.h"
+#include "ad_pvfs2_common.h"
+
+#define COALESCE_REGIONS /* TODO: would we ever want to *not* coalesce? */
+#define MAX_OL_COUNT 64
+int ADIOI_PVFS2_StridedListIO(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status,
+ int *error_code, int rw_type)
+{
+ /* list I/O parameters */
+ int i = -1, ret = -1;
+ int tmp_filetype_size = -1;
+ int64_t cur_io_size = 0, io_size = 0;
+ int etype_size = -1;
+ int num_etypes_in_filetype = -1, num_filetypes = -1;
+ int etypes_in_filetype = -1, size_in_filetype = -1;
+ int bytes_into_filetype = 0;
+ MPI_Offset total_bytes_accessed = 0;
+
+ /* parameters for offset-length pairs arrays */
+ int64_t buf_off_arr[MAX_OL_COUNT];
+ int32_t buf_len_arr[MAX_OL_COUNT];
+ int64_t file_off_arr[MAX_OL_COUNT];
+ int32_t file_len_arr[MAX_OL_COUNT];
+ int32_t buf_ol_count = 0;
+ int32_t file_ol_count = 0;
+
+ /* parameters for flattened memory and file datatypes*/
+ int flat_buf_index = 0;
+ int flat_file_index = 0;
+ int64_t cur_flat_buf_reg_off = 0;
+ int64_t cur_flat_file_reg_off = 0;
+ ADIOI_Flatlist_node *flat_buf_p, *flat_file_p;
+ int buftype_size = -1, buftype_extent = -1,
+ filetype_size = -1, filetype_extent = -1;
+ int buftype_is_contig = -1, filetype_is_contig = -1;
+
+ /* PVFS2 specific parameters */
+ PVFS_Request mem_req, file_req;
+ ADIOI_PVFS2_fs * pvfs_fs;
+ PVFS_sysresp_io resp_io;
+ static char myname[] = "ADIOI_PVFS2_STRIDED_LISTIO";
+
+ if (fd->atomicity) {
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ MPI_ERR_ARG,
+ "Atomic noncontiguous writes"
+ " are not supported by PVFS2", 0);
+ return -1;
+ }
+
+ MPI_Type_size(fd->filetype, &filetype_size);
+ if (filetype_size == 0) {
+ *error_code = MPI_SUCCESS;
+ return -1;
+ }
+ MPI_Type_extent(fd->filetype, &filetype_extent);
+ MPI_Type_size(datatype, &buftype_size);
+ MPI_Type_extent(datatype, &buftype_extent);
+ io_size = buftype_size*count;
+
+ pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr;
+
+ /* Flatten the memory datatype
+ * (file datatype has already been flattened in ADIO open
+ * unless it is contibuous, then we need to flatten it manually)
+ * and set the correct buffers for flat_buf and flat_file */
+ ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
+ ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
+ if (buftype_is_contig == 0)
+ {
+ ADIOI_Flatten_datatype(datatype);
+ flat_buf_p = ADIOI_Flatlist;
+ while (flat_buf_p->type != datatype)
+ flat_buf_p = flat_buf_p->next;
+ }
+ else
+ {
+ /* flatten and add to the list */
+ flat_buf_p = (ADIOI_Flatlist_node *) ADIOI_Malloc
+ (sizeof(ADIOI_Flatlist_node));
+ flat_buf_p->blocklens = (ADIO_Offset*)ADIOI_Malloc(sizeof(ADIO_Offset));
+ flat_buf_p->indices =
+ (ADIO_Offset *) ADIOI_Malloc(sizeof(ADIO_Offset));
+ /* For the buffer, we can optimize the buftype, this is not
+ * possible with the filetype since it is tiled */
+ buftype_size = buftype_size*count;
+ buftype_extent = buftype_size*count;
+ flat_buf_p->blocklens[0] = buftype_size;
+ flat_buf_p->indices[0] = 0;
+ flat_buf_p->count = 1;
+ }
+ if (filetype_is_contig == 0)
+ {
+ /* TODO: why does avery say this should already have been
+ * flattened in Open, but also says contig types don't get
+ * flattened */
+ ADIOI_Flatten_datatype(fd->filetype);
+ flat_file_p = ADIOI_Flatlist;
+ while (flat_file_p->type != fd->filetype)
+ flat_file_p = flat_file_p->next;
+ }
+ else
+ {
+ /* flatten and add to the list */
+ flat_file_p = (ADIOI_Flatlist_node *) ADIOI_Malloc
+ (sizeof(ADIOI_Flatlist_node));
+ flat_file_p->blocklens =(ADIO_Offset*)ADIOI_Malloc(sizeof(ADIO_Offset));
+ flat_file_p->indices =
+ (ADIO_Offset *) ADIOI_Malloc(sizeof(ADIO_Offset));
+ flat_file_p->blocklens[0] = filetype_size;
+ flat_file_p->indices[0] = 0;
+ flat_file_p->count = 1;
+ }
+
+ /* Find out where we are in the flattened filetype (the block index,
+ * how far into the block, and how many bytes_into_filetype)
+ * If the file_ptr_type == ADIO_INDIVIDUAL we will use disp, fp_ind
+ * to figure this out (offset should always be zero)
+ * If file_ptr_type == ADIO_EXPLICIT, we will use disp and offset
+ * to figure this out. */
+
+ etype_size = fd->etype_size;
+ num_etypes_in_filetype = filetype_size / etype_size;
+ if (file_ptr_type == ADIO_INDIVIDUAL)
+ {
+ int flag = 0;
+ /* Should have already been flattened in ADIO_Open*/
+ num_filetypes = -1;
+ while (!flag)
+ {
+ num_filetypes++;
+ for (i = 0; i < flat_file_p->count; i++)
+ {
+ /* Start on a non zero-length region */
+ if (flat_file_p->blocklens[i])
+ {
+ if (fd->disp + flat_file_p->indices[i] +
+ (num_filetypes * filetype_extent) +
+ flat_file_p->blocklens[i] > fd->fp_ind &&
+ fd->disp + flat_file_p->indices[i] <=
+ fd->fp_ind)
+ {
+ flat_file_index = i;
+ cur_flat_file_reg_off = fd->fp_ind -
+ (fd->disp + flat_file_p->indices[i] +
+ (num_filetypes * filetype_extent));
+ flag = 1;
+ break;
+ }
+ else
+ bytes_into_filetype += flat_file_p->blocklens[i];
+ }
+ }
+ }
+ /* Impossible that we don't find it in this datatype */
+ assert(i != flat_file_p->count);
+ }
+ else
+ {
+ num_filetypes = (int) (offset / num_etypes_in_filetype);
+ etypes_in_filetype = (int) (offset % num_etypes_in_filetype);
+ size_in_filetype = etypes_in_filetype * etype_size;
+
+ tmp_filetype_size = 0;
+ for (i=0; icount; i++) {
+ tmp_filetype_size += flat_file_p->blocklens[i];
+ if (tmp_filetype_size > size_in_filetype)
+ {
+ flat_file_index = i;
+ cur_flat_file_reg_off = flat_file_p->blocklens[i] -
+ (tmp_filetype_size - size_in_filetype);
+ bytes_into_filetype = offset * filetype_size -
+ flat_file_p->blocklens[i];
+ break;
+ }
+ }
+ }
+#ifdef DEBUG_LIST
+ fprintf(stderr, "ADIOI_PVFS2_StridedListIO: (fd->fp_ind=%Ld,fd->disp=%Ld,"
+ " offset=%Ld)\n(flat_file_index=%d,cur_flat_file_reg_off=%Ld,"
+ "bytes_into_filetype=%d)\n",
+ fd->fp_ind, fd->disp, offset, flat_file_index,
+ cur_flat_file_reg_off, bytes_into_filetype);
+#endif
+#ifdef DEBUG_LIST2
+ fprintf(stderr, "flat_buf:\n");
+ for (i = 0; i < flat_buf_p->count; i++)
+ fprintf(stderr, "(offset, length) = (%Ld, %d)\n",
+ flat_buf_p->indices[i],
+ flat_buf_p->blocklens[i]);
+ fprintf(stderr, "flat_file:\n");
+ for (i = 0; i < flat_file_p->count; i++)
+ fprintf(stderr, "(offset, length) = (%Ld, %d)\n",
+ flat_file_p->indices[i],
+ flat_file_p->blocklens[i]);
+#endif
+
+ /* total data written */
+ cur_io_size = 0;
+ while (cur_io_size != io_size)
+ {
+ /* Initialize the temporarily unrolling lists and
+ * and associated variables */
+ buf_ol_count = 0;
+ file_ol_count = 0;
+ for (i = 0; i < MAX_OL_COUNT; i++)
+ {
+ buf_off_arr[i] = 0;
+ buf_len_arr[i] = 0;
+ file_off_arr[i] = 0;
+ file_len_arr[i] = 0;
+ }
+
+ /* Generate the offset-length pairs for a
+ * list I/O operation */
+ gen_listio_arr(flat_buf_p,
+ &flat_buf_index,
+ &cur_flat_buf_reg_off,
+ buftype_size,
+ buftype_extent,
+ flat_file_p,
+ &flat_file_index,
+ &cur_flat_file_reg_off,
+ filetype_size,
+ filetype_extent,
+ MAX_OL_COUNT,
+ fd->disp,
+ bytes_into_filetype,
+ &cur_io_size,
+ io_size,
+ buf_off_arr,
+ buf_len_arr,
+ &buf_ol_count,
+ file_off_arr,
+ file_len_arr,
+ &file_ol_count);
+
+ assert(buf_ol_count <= MAX_OL_COUNT);
+ assert(file_ol_count <= MAX_OL_COUNT);
+#ifdef DEBUG_LIST2
+ print_buf_file_ol_pairs(buf_off_arr,
+ buf_len_arr,
+ buf_ol_count,
+ file_off_arr,
+ file_len_arr,
+ file_ol_count,
+ buf,
+ rw_type);
+#endif
+#ifdef DEBUG_LIST2
+ do {
+ int y, z;
+ fprintf(stderr, "ad_pvfs2_io_list.c::\n");
+ for (y = 0; y < buf_ol_count; y++)
+ {
+ for (z = 0; z < buf_len_arr[y]; z++)
+ {
+ fprintf(stderr, "buf[%d][%d]=%c\n",
+ y, z, ((char *) buf + buf_off_arr[y])[z]);
+ }
+ }
+ } while (0);
+#endif
+
+ /* Run list I/O operation */
+ ret = PVFS_Request_hindexed(buf_ol_count, buf_len_arr,
+ buf_off_arr, PVFS_BYTE, &mem_req);
+
+ ret = PVFS_Request_hindexed(file_ol_count, file_len_arr,
+ file_off_arr, PVFS_BYTE, &file_req);
+ if (rw_type == READ)
+ {
+ ret = PVFS_sys_read(pvfs_fs->object_ref, file_req, 0,
+ buf, mem_req,
+ &(pvfs_fs->credentials), &resp_io);
+ }
+ else
+ {
+ ret = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0,
+ buf, mem_req,
+ &(pvfs_fs->credentials), &resp_io);
+ }
+ if (ret != 0)
+ {
+ fprintf(stderr, "ADIOI_PVFS2_StridedListIO: Warning - PVFS_sys_"
+ "read/write returned %d and completed %Ld bytes.\n",
+ ret, resp_io.total_completed);
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ ADIOI_PVFS2_error_convert(ret),
+ "Error in PVFS_sys_io \n", 0);
+ PVFS_Request_free(&mem_req);
+ PVFS_Request_free(&file_req);
+ goto error_state;
+ }
+ total_bytes_accessed += resp_io.total_completed;
+
+ PVFS_Request_free(&mem_req);
+ PVFS_Request_free(&file_req);
+ }
+
+#ifdef DEBUG_LIST
+ fprintf(stderr, "ADIOI_PVFS2_StridedListIO: "
+ "total_bytes_accessed=%Ld,ret=%d\n",
+ total_bytes_accessed, ret);
+#endif
+
+ if (file_ptr_type == ADIO_INDIVIDUAL)
+ fd->fp_ind += total_bytes_accessed;
+ *error_code = MPI_SUCCESS;
+
+error_state:
+#ifdef HAVE_STATUS_SET_BYTES
+ /* TODO: why the cast? */
+ MPIR_Status_set_bytes(status, datatype, (int)total_bytes_accessed);
+/* This is a temporary way of filling in status. The right way is to
+ keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
+#endif
+ if (buftype_is_contig == 0)
+ ADIOI_Delete_flattened(datatype);
+ else
+ {
+ ADIOI_Free(flat_buf_p->blocklens);
+ ADIOI_Free(flat_buf_p->indices);
+ ADIOI_Free(flat_buf_p);
+ }
+
+ if (filetype_is_contig == 0)
+ ADIOI_Delete_flattened(fd->filetype);
+ else
+ {
+ ADIOI_Free(flat_file_p->blocklens);
+ ADIOI_Free(flat_file_p->indices);
+ ADIOI_Free(flat_file_p);
+ }
+
+ return 0;
+}
+
+/* To do: Fix the code to coalesce the offset-length pairs for memory
+ * and file. */
+
+/* gen_listio_arr - fills in offset-length pairs for memory and file
+ * for list I/O */
+int gen_listio_arr(ADIOI_Flatlist_node *flat_buf_p,
+ int *flat_buf_index_p,
+ int64_t *cur_flat_buf_reg_off_p,
+ int flat_buf_size,
+ int flat_buf_extent,
+ ADIOI_Flatlist_node *flat_file_p,
+ int *flat_file_index_p,
+ int64_t *cur_flat_file_reg_off_p,
+ int flat_file_size,
+ int flat_file_extent,
+ int max_ol_count,
+ ADIO_Offset disp,
+ int bytes_into_filetype,
+ int64_t *bytes_completed,
+ int64_t total_io_size,
+ int64_t buf_off_arr[],
+ int32_t buf_len_arr[],
+ int32_t *buf_ol_count_p,
+ int64_t file_off_arr[],
+ int32_t file_len_arr[],
+ int32_t *file_ol_count_p)
+{
+ int region_size = -1;
+
+ /* parameters for flattened memory and file datatypes*/
+ int64_t cur_flat_buf_reg_left = 0;
+ int64_t cur_flat_file_reg_left = 0;
+
+#ifdef DEBUG_LIST2
+ fprintf(stderr, "gen_list_arr:\n");
+#endif
+
+ if ((*buf_ol_count_p) != 0 ||(*file_ol_count_p) != 0)
+ {
+ fprintf(stderr, "buf_ol_count != 0 || file_ol_count != 0\n");
+ return -1;
+ }
+
+ /* Start on a non-zero memory and file region
+ * Note this does not affect the bytes_completed
+ * since no data is in these regions. Initialize the
+ * first memory and file offsets. */
+ while (flat_buf_p->blocklens[(*flat_buf_index_p)] == 0)
+ {
+ (*flat_buf_index_p) = ((*flat_buf_index_p) + 1) %
+ flat_buf_p->count;
+ }
+ buf_off_arr[*buf_ol_count_p] =
+ (*bytes_completed / flat_buf_size) *
+ flat_buf_extent +
+ flat_buf_p->indices[*flat_buf_index_p] +
+ *cur_flat_buf_reg_off_p;
+ buf_len_arr[*buf_ol_count_p] = 0;
+
+ while (flat_file_p->blocklens[(*flat_file_index_p)] == 0)
+ {
+ (*flat_file_index_p) = ((*flat_file_index_p) + 1) %
+ flat_file_p->count;
+ }
+ file_off_arr[*file_ol_count_p] = disp +
+ (((bytes_into_filetype + *bytes_completed) / flat_file_size) *
+ flat_file_extent) +
+ flat_file_p->indices[*flat_file_index_p] +
+ *cur_flat_file_reg_off_p;
+ file_len_arr[*file_ol_count_p] = 0;
+
+#ifdef DEBUG_LIST2
+ fprintf(stderr, "initial buf_off_arr[%d] = %Ld\n", *buf_ol_count_p,
+ buf_off_arr[*buf_ol_count_p]);
+ fprintf(stderr, "initial file_off_arr[%d] = %Ld\n", *file_ol_count_p,
+ file_off_arr[*file_ol_count_p]);
+#endif
+
+ while (*bytes_completed != total_io_size
+ && (*buf_ol_count_p) < max_ol_count
+ && (*file_ol_count_p) < max_ol_count)
+ {
+ /* How much data is left in the current piece in
+ * the flattened datatypes */
+ cur_flat_buf_reg_left = flat_buf_p->blocklens[*flat_buf_index_p]
+ - *cur_flat_buf_reg_off_p;
+ cur_flat_file_reg_left = flat_file_p->blocklens[*flat_file_index_p]
+ - *cur_flat_file_reg_off_p;
+
+#ifdef DEBUG_LIST2
+ fprintf(stderr,
+ "flat_buf_index=%d flat_buf->blocklens[%d]=%d\n"
+ "cur_flat_buf_reg_left=%Ld "
+ "*cur_flat_buf_reg_off_p=%Ld\n"
+ "flat_file_index=%d flat_file->blocklens[%d]=%d\n"
+ "cur_flat_file_reg_left=%Ld "
+ "*cur_flat_file_reg_off_p=%Ld\n"
+ "bytes_completed=%Ld\n"
+ "buf_ol_count=%d file_ol_count=%d\n"
+ "buf_len_arr[%d]=%d file_len_arr[%d]=%d\n\n",
+ *flat_buf_index_p, *flat_buf_index_p,
+ flat_buf_p->blocklens[*flat_buf_index_p],
+ cur_flat_buf_reg_left,
+ *cur_flat_buf_reg_off_p,
+ *flat_file_index_p, *flat_file_index_p,
+ flat_file_p->blocklens[*flat_file_index_p],
+ cur_flat_file_reg_left,
+ *cur_flat_file_reg_off_p,
+ *bytes_completed,
+ *buf_ol_count_p, *file_ol_count_p,
+ *buf_ol_count_p,
+ buf_len_arr[*buf_ol_count_p],
+ *file_ol_count_p,
+ file_len_arr[*file_ol_count_p]);
+#endif
+
+ /* What is the size of the next contiguous region agreed
+ * upon by both memory and file regions that does not
+ * surpass the file size */
+ if (cur_flat_buf_reg_left > cur_flat_file_reg_left)
+ region_size = cur_flat_file_reg_left;
+ else
+ region_size = cur_flat_buf_reg_left;
+
+ if (region_size > total_io_size - *bytes_completed)
+ region_size = total_io_size - *bytes_completed;
+
+ /* Add this piece to both the mem and file arrays
+ * coalescing offset-length pairs if possible and advance
+ * the pointers through the flatten mem and file datatypes
+ * as well Note: no more than a single piece can be done
+ * since we take the smallest one possible */
+
+ if (cur_flat_buf_reg_left == region_size)
+ {
+#ifdef DEBUG_LIST2
+ fprintf(stderr, "reached end of memory block...\n");
+#endif
+ (*flat_buf_index_p) = ((*flat_buf_index_p) + 1) %
+ flat_buf_p->count;
+ while (flat_buf_p->blocklens[(*flat_buf_index_p)] == 0)
+ {
+ (*flat_buf_index_p) = ((*flat_buf_index_p) + 1) %
+ flat_buf_p->count;
+ }
+ *cur_flat_buf_reg_off_p = 0;
+
+#ifdef COALESCE_REGIONS
+ if (*buf_ol_count_p != 0)
+ {
+ if (buf_off_arr[(*buf_ol_count_p) - 1] +
+ buf_len_arr[(*buf_ol_count_p) - 1] ==
+ buf_off_arr[*buf_ol_count_p])
+ {
+ buf_len_arr[(*buf_ol_count_p) - 1] +=
+ region_size;
+ }
+ else
+ {
+ buf_len_arr[*buf_ol_count_p] += region_size;
+ (*buf_ol_count_p)++;
+ }
+ }
+ else
+ {
+#endif
+ buf_len_arr[*buf_ol_count_p] += region_size;
+ (*buf_ol_count_p)++;
+#ifdef COALESCE_REGIONS
+ }
+#endif
+
+ /* Don't prepare for the next piece if we have reached
+ * the limit or else it will segment fault. */
+ if ((*buf_ol_count_p) != max_ol_count)
+ {
+ buf_off_arr[*buf_ol_count_p] =
+ ((*bytes_completed + region_size) / flat_buf_size) *
+ flat_buf_extent +
+ flat_buf_p->indices[*flat_buf_index_p] +
+ (*cur_flat_buf_reg_off_p);
+ buf_len_arr[*buf_ol_count_p] = 0;
+ }
+ }
+ else if (cur_flat_buf_reg_left > region_size)
+ {
+#ifdef DEBUG_LIST2
+ fprintf(stderr, "advanced %d in memory block...\n",
+ region_size);
+#endif
+ (*cur_flat_buf_reg_off_p) += region_size;
+ buf_len_arr[*buf_ol_count_p] += region_size;
+ }
+ else
+ {
+ fprintf(stderr, "gen_listio_arr: Error\n");
+ }
+
+ /* To calculate the absolute file offset we need to
+ * add the disp, how many filetypes we have gone through,
+ * the relative block offset in the filetype and how far
+ * into the block we have gone. */
+ if (cur_flat_file_reg_left == region_size)
+ {
+#ifdef DEBUG_LIST2
+ fprintf(stderr, "reached end of file block...\n");
+#endif
+ (*flat_file_index_p) = ((*flat_file_index_p) + 1) %
+ flat_file_p->count;
+ while (flat_file_p->blocklens[(*flat_file_index_p)] == 0)
+ {
+ (*flat_file_index_p) = ((*flat_file_index_p) + 1) %
+ flat_file_p->count;
+ }
+ (*cur_flat_file_reg_off_p) = 0;
+
+#ifdef COALESCE_REGIONS
+ if (*file_ol_count_p != 0)
+ {
+ if (file_off_arr[(*file_ol_count_p) - 1] +
+ file_len_arr[(*file_ol_count_p) - 1] ==
+ file_off_arr[*file_ol_count_p])
+ {
+ file_len_arr[(*file_ol_count_p) - 1] +=
+ region_size;
+ }
+ else
+ {
+ file_len_arr[*file_ol_count_p] += region_size;
+ (*file_ol_count_p)++;
+ }
+ }
+ else
+ {
+#endif
+ file_len_arr[*file_ol_count_p] += region_size;
+ (*file_ol_count_p)++;
+#ifdef COALESCE_REGIONS
+ }
+#endif
+
+ /* Don't prepare for the next piece if we have reached
+ * the limit or else it will segment fault. */
+ if ((*file_ol_count_p) != max_ol_count)
+ {
+ file_off_arr[*file_ol_count_p] = disp +
+ (((bytes_into_filetype + *bytes_completed + region_size)
+ / flat_file_size) *
+ flat_file_extent) +
+ flat_file_p->indices[*flat_file_index_p] +
+ (*cur_flat_file_reg_off_p);
+ file_len_arr[*file_ol_count_p] = 0;
+ }
+ }
+ else if (cur_flat_file_reg_left > region_size)
+ {
+#ifdef DEBUG_LIST2
+ fprintf(stderr, "advanced %d in file block...\n",
+ region_size);
+#endif
+ (*cur_flat_file_reg_off_p) += region_size;
+ file_len_arr[*file_ol_count_p] += region_size;
+ }
+ else
+ {
+ fprintf(stderr, "gen_listio_arr: Error\n");
+ }
+#ifdef DEBUG_LIST2
+ fprintf(stderr,
+ "------------------------------\n\n");
+#endif
+ *bytes_completed += region_size;
+ }
+ /* Increment the count if we stopped in the middle of a
+ * memory or file region */
+ if (*cur_flat_buf_reg_off_p != 0)
+ (*buf_ol_count_p)++;
+ if (*cur_flat_file_reg_off_p != 0)
+ (*file_ol_count_p)++;
+
+ return 0;
+}
+
+void print_buf_file_ol_pairs(int64_t buf_off_arr[],
+ int32_t buf_len_arr[],
+ int32_t buf_ol_count,
+ int64_t file_off_arr[],
+ int32_t file_len_arr[],
+ int32_t file_ol_count,
+ void *buf,
+ int rw_type)
+{
+ int i = -1;
+
+ fprintf(stderr, "buf_ol_pairs(offset,length) count = %d\n",
+ buf_ol_count);
+ for (i = 0; i < buf_ol_count; i++)
+ {
+ fprintf(stderr, "(%Ld, %d) ", buf_off_arr[i], buf_len_arr[i]);
+ }
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, "file_ol_pairs(offset,length) count = %d\n",
+ file_ol_count);
+ for (i = 0; i < file_ol_count; i++)
+ {
+ fprintf(stderr, "(%Ld, %d) ", file_off_arr[i], file_len_arr[i]);
+ }
+ fprintf(stderr, "\n\n");
+
+}
diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_read.c b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_read.c
index 2dd7e55ca1..48009f2537 100644
--- a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_read.c
+++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_read.c
@@ -8,7 +8,7 @@
#include "adio.h"
#include "adio_extern.h"
#include "ad_pvfs2.h"
-
+#include "ad_pvfs2_io.h"
#include "ad_pvfs2_common.h"
void ADIOI_PVFS2_ReadContig(ADIO_File fd, void *buf, int count,
@@ -92,899 +92,77 @@ fn_exit:
return;
}
+static int ADIOI_PVFS2_ReadStridedListIO(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status,
+ int *error_code)
+{
+ return ADIOI_PVFS2_StridedListIO(fd, buf, count,
+ datatype, file_ptr_type,
+ offset, status,
+ error_code, READ);
+}
+
+static int ADIOI_PVFS2_ReadStridedDtypeIO(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status,
+ int *error_code)
+{
+ return ADIOI_PVFS2_StridedDtypeIO(fd, buf, count,
+ datatype, file_ptr_type,
+ offset, status, error_code,
+ READ);
+}
void ADIOI_PVFS2_ReadStrided(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
*error_code)
{
- /* offset is in units of etype relative to the filetype. */
- ADIOI_Flatlist_node *flat_buf, *flat_file;
- int i, j, k, brd_size, frd_size=0, st_index=0;
- int bufsize, sum, n_etypes_in_filetype, size_in_filetype;
- int n_filetypes, etype_in_filetype;
- ADIO_Offset abs_off_in_filetype=0;
- int filetype_size, etype_size, buftype_size;
- MPI_Aint filetype_extent, buftype_extent;
- int buf_count, buftype_is_contig, filetype_is_contig;
- ADIO_Offset off, disp, start_off, initial_off;
- int flag, st_frd_size, st_n_filetypes;
+ /* four ways (to date) that we can carry out strided i/o accesses:
+ * - naive posix
+ * - 'true' Datatype (from avery)
+ * - new List I/O (from avery)
+ * - classic List I/O (the one that's always been in ROMIO)
+ * I imagine we'll keep Datatype as an optional optimization, and afer a
+ * release or two promote it to the default
+ */
+ int ret = -1;
- int mem_list_count, file_list_count;
- PVFS_size *mem_offsets;
- int64_t *file_offsets;
- int *mem_lengths;
- int32_t *file_lengths;
- int total_blks_to_read;
-
- int max_mem_list, max_file_list;
-
- int b_blks_read;
- int f_data_read;
- int size_read=0, n_read_lists, extra_blks;
-
- int end_brd_size, end_frd_size;
- int start_k, start_j, new_file_read, new_buffer_read;
- int start_mem_offset;
- PVFS_Request mem_req, file_req;
- ADIOI_PVFS2_fs * pvfs_fs;
- PVFS_sysresp_io resp_io;
- int err_flag=0;
- MPI_Offset total_bytes_read = 0;
- static char myname[] = "ADIOI_PVFS2_ReadStrided";
-
-#define MAX_ARRAY_SIZE 64
-
- *error_code = MPI_SUCCESS; /* changed below if error */
-
- ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
- ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
-
- /* the HDF5 tests showed a bug in this list processing code (see many many
- * lines down below). We added a workaround, but common HDF5 file types
- * are actually contiguous and do not need the expensive workarond */
- if (!filetype_is_contig) {
- flat_file = ADIOI_Flatlist;
- while (flat_file->type != fd->filetype) flat_file = flat_file->next;
- if (flat_file->count == 1 && !buftype_is_contig)
- filetype_is_contig = 1;
- }
-
- MPI_Type_size(fd->filetype, &filetype_size);
- if ( ! filetype_size ) {
- *error_code = MPI_SUCCESS;
+ if (fd->hints->fs_hints.pvfs2.posix_read == ADIOI_HINT_ENABLE) {
+ ADIOI_GEN_ReadStrided(fd, buf, count, datatype,
+ file_ptr_type, offset, status, error_code);
return;
}
+ if (fd->hints->fs_hints.pvfs2.dtype_read == ADIOI_HINT_ENABLE) {
+ ret = ADIOI_PVFS2_ReadStridedDtypeIO(fd, buf, count,
+ datatype, file_ptr_type,
+ offset, status, error_code);
- MPI_Type_extent(fd->filetype, &filetype_extent);
- MPI_Type_size(datatype, &buftype_size);
- MPI_Type_extent(datatype, &buftype_extent);
- etype_size = fd->etype_size;
-
- bufsize = buftype_size * count;
-
- pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr;
-
- if (!buftype_is_contig && filetype_is_contig) {
-
-/* noncontiguous in memory, contiguous in file. */
- int64_t file_offsets;
- int32_t file_lengths;
-
- ADIOI_Flatten_datatype(datatype);
- flat_buf = ADIOI_Flatlist;
- while (flat_buf->type != datatype) flat_buf = flat_buf->next;
-
- off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
- fd->disp + etype_size * offset;
-
- file_list_count = 1;
- file_offsets = off;
- file_lengths = 0;
- total_blks_to_read = count*flat_buf->count;
- b_blks_read = 0;
-
- /* allocate arrays according to max usage */
- if (total_blks_to_read > MAX_ARRAY_SIZE)
- mem_list_count = MAX_ARRAY_SIZE;
- else mem_list_count = total_blks_to_read;
- mem_offsets = (PVFS_size*)ADIOI_Malloc(mem_list_count*sizeof(PVFS_size));
- mem_lengths = (int*)ADIOI_Malloc(mem_list_count*sizeof(int));
-
- /* TODO: CHECK RESULTS OF MEMORY ALLOCATION */
-
- j = 0;
- /* step through each block in memory, filling memory arrays */
- while (b_blks_read < total_blks_to_read) {
- for (i=0; icount; i++) {
- mem_offsets[b_blks_read % MAX_ARRAY_SIZE] =
- /* TODO: fix this compiler warning */
- ((PVFS_size)buf + j*buftype_extent + flat_buf->indices[i]);
- mem_lengths[b_blks_read % MAX_ARRAY_SIZE] =
- flat_buf->blocklens[i];
- file_lengths += flat_buf->blocklens[i];
- b_blks_read++;
- if (!(b_blks_read % MAX_ARRAY_SIZE) ||
- (b_blks_read == total_blks_to_read)) {
-
- /* in the case of the last read list call,
- adjust mem_list_count */
- if (b_blks_read == total_blks_to_read) {
- mem_list_count = total_blks_to_read % MAX_ARRAY_SIZE;
- /* in case last read list call fills max arrays */
- if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE;
- }
- err_flag = PVFS_Request_hindexed(mem_list_count,
- mem_lengths, mem_offsets, PVFS_BYTE, &mem_req);
- if (err_flag < 0) break;
- err_flag = PVFS_Request_contiguous(file_lengths,
- PVFS_BYTE, &file_req);
- if (err_flag < 0) break;
-#ifdef ADIOI_MPE_LOGGING
- MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
-#endif
- err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req,
- file_offsets, PVFS_BOTTOM, mem_req,
- &(pvfs_fs->credentials), &resp_io);
-#ifdef ADIOI_MPE_LOGGING
- MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
-#endif
- /* --BEGIN ERROR HANDLING-- */
- if (err_flag != 0) {
- *error_code = MPIO_Err_create_code(MPI_SUCCESS,
- MPIR_ERR_RECOVERABLE,
- myname, __LINE__,
- ADIOI_PVFS2_error_convert(err_flag),
- "Error in PVFS_sys_read", 0);
- goto error_state;
- }
- PVFS_Request_free(&mem_req);
- PVFS_Request_free(&file_req);
- total_bytes_read += resp_io.total_completed;
- /* --END ERROR HANDLING-- */
-
- /* in the case of error or the last read list call,
- * leave here */
- if (err_flag || b_blks_read == total_blks_to_read) break;
-
- file_offsets += file_lengths;
- file_lengths = 0;
- }
- } /* for (i=0; icount; i++) */
- j++;
- } /* while (b_blks_read < total_blks_to_read) */
- ADIOI_Free(mem_offsets);
- ADIOI_Free(mem_lengths);
-
- if (file_ptr_type == ADIO_INDIVIDUAL)
- fd->fp_ind += total_bytes_read;
-
- fd->fp_sys_posn = -1; /* set it to null. */
-
-#ifdef HAVE_STATUS_SET_BYTES
- MPIR_Status_set_bytes(status, datatype, bufsize);
- /* This isa temporary way of filling in status. The right way is to
- keep tracke of how much data was actually read adn placed in buf
- by ADIOI_BUFFERED_READ. */
-#endif
- ADIOI_Delete_flattened(datatype);
-
+ /* Fall back to list I/O if datatype I/O didn't work */
+ if (ret != 0)
+ {
+ fprintf(stderr,
+ "Falling back to list I/O since datatype I/O failed\n");
+ ret = ADIOI_PVFS2_ReadStridedListIO(fd, buf, count,
+ datatype, file_ptr_type,
+ offset, status, error_code);
+ }
+ return;
+ }
+ if (fd->hints->fs_hints.pvfs2.listio_read == ADIOI_HINT_ENABLE) {
+ ret = ADIOI_PVFS2_ReadStridedListIO(fd, buf, count, datatype,
+ file_ptr_type, offset, status, error_code);
return;
- } /* if (!buftype_is_contig && filetype_is_contig) */
-
- /* know file is noncontiguous from above */
- /* noncontiguous in file */
-
- /* filetype already flattened in ADIO_Open */
- flat_file = ADIOI_Flatlist;
- while (flat_file->type != fd->filetype) flat_file = flat_file->next;
-
- disp = fd->disp;
- initial_off = offset;
-
-
- /* for each case - ADIO_Individual pointer or explicit, find the file
- offset in bytes (offset), n_filetypes (how many filetypes into
- file to start), frd_size (remaining amount of data in present
- file block), and st_index (start point in terms of blocks in
- starting filetype) */
- if (file_ptr_type == ADIO_INDIVIDUAL) {
- offset = fd->fp_ind; /* in bytes */
- n_filetypes = -1;
- flag = 0;
- while (!flag) {
- n_filetypes++;
- for (i=0; icount; i++) {
- if (disp + flat_file->indices[i] +
- ((ADIO_Offset) n_filetypes)*filetype_extent +
- flat_file->blocklens[i] >= offset) {
- st_index = i;
- frd_size = (int) (disp + flat_file->indices[i] +
- ((ADIO_Offset) n_filetypes)*filetype_extent
- + flat_file->blocklens[i] - offset);
- flag = 1;
- break;
- }
- }
- } /* while (!flag) */
- } /* if (file_ptr_type == ADIO_INDIVIDUAL) */
- else {
- n_etypes_in_filetype = filetype_size/etype_size;
- n_filetypes = (int) (offset / n_etypes_in_filetype);
- etype_in_filetype = (int) (offset % n_etypes_in_filetype);
- size_in_filetype = etype_in_filetype * etype_size;
-
- sum = 0;
- for (i=0; icount; i++) {
- sum += flat_file->blocklens[i];
- if (sum > size_in_filetype) {
- st_index = i;
- frd_size = sum - size_in_filetype;
- abs_off_in_filetype = flat_file->indices[i] +
- size_in_filetype - (sum - flat_file->blocklens[i]);
- break;
- }
- }
-
- /* abs. offset in bytes in the file */
- offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent +
- abs_off_in_filetype;
- } /* else [file_ptr_type != ADIO_INDIVIDUAL] */
-
- start_off = offset;
- st_frd_size = frd_size;
- st_n_filetypes = n_filetypes;
-
- if (buftype_is_contig && !filetype_is_contig) {
-
-/* contiguous in memory, noncontiguous in file. should be the most
- common case. */
-
- int mem_lengths;
- char *mem_offsets;
-
- i = 0;
- j = st_index;
- n_filetypes = st_n_filetypes;
-
- mem_list_count = 1;
-
- /* determine how many blocks in file to read */
- f_data_read = ADIOI_MIN(st_frd_size, bufsize);
- total_blks_to_read = 1;
- if (j < (flat_file->count-1)) j++;
- else {
- j = 0;
- n_filetypes++;
- }
- while (f_data_read < bufsize) {
- f_data_read += flat_file->blocklens[j];
- total_blks_to_read++;
- if (j<(flat_file->count-1)) j++;
- else j = 0;
- }
-
- j = st_index;
- n_filetypes = st_n_filetypes;
- n_read_lists = total_blks_to_read/MAX_ARRAY_SIZE;
- extra_blks = total_blks_to_read%MAX_ARRAY_SIZE;
-
- mem_offsets = buf;
- mem_lengths = 0;
-
- /* if at least one full readlist, allocate file arrays
- at max array size and don't free until very end */
- if (n_read_lists) {
- file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
- sizeof(int64_t));
- file_lengths = (int32_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
- sizeof(int32_t));
- }
- /* if there's no full readlist allocate file arrays according
- to needed size (extra_blks) */
- else {
- file_offsets = (int64_t*)ADIOI_Malloc(extra_blks*
- sizeof(int64_t));
- file_lengths = (int32_t*)ADIOI_Malloc(extra_blks*
- sizeof(int32_t));
- }
-
- /* for file arrays that are of MAX_ARRAY_SIZE, build arrays */
- for (i=0; iindices[j];
- file_lengths[k] = flat_file->blocklens[j];
- mem_lengths += file_lengths[k];
- }
- if (j<(flat_file->count - 1)) j++;
- else {
- j = 0;
- n_filetypes++;
- }
- } /* for (k=0; kobject_ref, file_req, 0,
- mem_offsets, mem_req,
- &(pvfs_fs->credentials), &resp_io);
-#ifdef ADIOI_MPE_LOGGING
- MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
-#endif
- /* --BEGIN ERROR HANDLING-- */
- if (err_flag != 0) {
- *error_code = MPIO_Err_create_code(MPI_SUCCESS,
- MPIR_ERR_RECOVERABLE,
- myname, __LINE__,
- ADIOI_PVFS2_error_convert(err_flag),
- "Error in PVFS_sys_read", 0);
- goto error_state;
- }
- /* --END ERROR HANDING-- */
- PVFS_Request_free(&mem_req);
- PVFS_Request_free(&file_req);
-
- total_bytes_read += resp_io.total_completed;
-
- mem_offsets += mem_lengths;
- mem_lengths = 0;
- } /* for (i=0; iindices[j];
- if (k == (extra_blks - 1)) {
- file_lengths[k] = bufsize - (int32_t) mem_lengths
- - (int32_t) mem_offsets + (int32_t) buf;
- }
- else file_lengths[k] = flat_file->blocklens[j];
- } /* if(i || k) */
- mem_lengths += file_lengths[k];
- if (j<(flat_file->count - 1)) j++;
- else {
- j = 0;
- n_filetypes++;
- }
- } /* for (k=0; kobject_ref, file_req, 0,
- mem_offsets, mem_req, &(pvfs_fs->credentials), &resp_io);
-#ifdef ADIOI_MPE_LOGGING
- MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
-#endif
- /* --BEGIN ERROR HANDLING-- */
- if (err_flag != 0) {
- *error_code = MPIO_Err_create_code(MPI_SUCCESS,
- MPIR_ERR_RECOVERABLE,
- myname, __LINE__,
- ADIOI_PVFS2_error_convert(err_flag),
- "Error in PVFS_sys_read", 0);
- goto error_state;
- }
- /* --END ERROR HANDLING-- */
- PVFS_Request_free(&mem_req);
- PVFS_Request_free(&file_req);
- total_bytes_read += resp_io.total_completed;
- }
}
- else {
-/* noncontiguous in memory as well as in file */
-
- ADIOI_Flatten_datatype(datatype);
- flat_buf = ADIOI_Flatlist;
- while (flat_buf->type != datatype) flat_buf = flat_buf->next;
+ /* Use classic list I/O if no hints given base case */
- size_read = 0;
- n_filetypes = st_n_filetypes;
- frd_size = st_frd_size;
- brd_size = flat_buf->blocklens[0];
- buf_count = 0;
- start_mem_offset = 0;
- start_k = k = 0;
- start_j = st_index;
- max_mem_list = 0;
- max_file_list = 0;
-
- /* run through and file max_file_list and max_mem_list so that you
- can allocate the file and memory arrays less than MAX_ARRAY_SIZE
- if possible */
-
- while (size_read < bufsize) {
- k = start_k;
- new_buffer_read = 0;
- mem_list_count = 0;
- while ((mem_list_count < MAX_ARRAY_SIZE) &&
- (new_buffer_read < bufsize-size_read)) {
- /* find mem_list_count and file_list_count such that both are
- less than MAX_ARRAY_SIZE, the sum of their lengths are
- equal, and the sum of all the data read and data to be
- read in the next immediate read list is less than
- bufsize */
- if(mem_list_count) {
- if((new_buffer_read + flat_buf->blocklens[k] +
- size_read) > bufsize) {
- end_brd_size = new_buffer_read +
- flat_buf->blocklens[k] - (bufsize - size_read);
- new_buffer_read = bufsize - size_read;
- }
- else {
- new_buffer_read += flat_buf->blocklens[k];
- end_brd_size = flat_buf->blocklens[k];
- }
- }
- else {
- if (brd_size > (bufsize - size_read)) {
- new_buffer_read = bufsize - size_read;
- brd_size = new_buffer_read;
- }
- else new_buffer_read = brd_size;
- }
- mem_list_count++;
- k = (k + 1)%flat_buf->count;
- } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
- (new_buffer_read < bufsize-size_read)) */
- j = start_j;
- new_file_read = 0;
- file_list_count = 0;
- while ((file_list_count < MAX_ARRAY_SIZE) &&
- (new_file_read < new_buffer_read)) {
- if(file_list_count) {
- if((new_file_read + flat_file->blocklens[j]) >
- new_buffer_read) {
- end_frd_size = new_buffer_read - new_file_read;
- new_file_read = new_buffer_read;
- j--;
- }
- else {
- new_file_read += flat_file->blocklens[j];
- end_frd_size = flat_file->blocklens[j];
- }
- }
- else {
- if (frd_size > new_buffer_read) {
- new_file_read = new_buffer_read;
- frd_size = new_file_read;
- }
- else new_file_read = frd_size;
- }
- file_list_count++;
- if (j < (flat_file->count - 1)) j++;
- else j = 0;
-
- k = start_k;
- if ((new_file_read < new_buffer_read) &&
- (file_list_count == MAX_ARRAY_SIZE)) {
- new_buffer_read = 0;
- mem_list_count = 0;
- while (new_buffer_read < new_file_read) {
- if(mem_list_count) {
- if((new_buffer_read + flat_buf->blocklens[k]) >
- new_file_read) {
- end_brd_size = new_file_read - new_buffer_read;
- new_buffer_read = new_file_read;
- k--;
- }
- else {
- new_buffer_read += flat_buf->blocklens[k];
- end_brd_size = flat_buf->blocklens[k];
- }
- }
- else {
- new_buffer_read = brd_size;
- if (brd_size > (bufsize - size_read)) {
- new_buffer_read = bufsize - size_read;
- brd_size = new_buffer_read;
- }
- }
- mem_list_count++;
- k = (k + 1)%flat_buf->count;
- } /* while (new_buffer_read < new_file_read) */
- } /* if ((new_file_read < new_buffer_read) && (file_list_count
- == MAX_ARRAY_SIZE)) */
- } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
- (new_buffer_read < bufsize-size_read)) */
-
- /* fakes filling the readlist arrays of lengths found above */
- k = start_k;
- j = start_j;
- for (i=0; iblocklens[k] == end_brd_size)
- brd_size = flat_buf->blocklens[(k+1)%
- flat_buf->count];
- else {
- brd_size = flat_buf->blocklens[k] - end_brd_size;
- k--;
- buf_count--;
- }
- }
- }
- buf_count++;
- k = (k + 1)%flat_buf->count;
- } /* for (i=0; iblocklens[j] == end_frd_size)
- frd_size = flat_file->blocklens[(j+1)%
- flat_file->count];
- else {
- frd_size = flat_file->blocklens[j] - end_frd_size;
- j--;
- }
- }
- }
- if (j < flat_file->count - 1) j++;
- else {
- j = 0;
- n_filetypes++;
- }
- } /* for (i=0; iblocklens[0] ) ) ||
- ((mem_list_count == 1) &&
- (new_buffer_read < flat_buf->blocklens[0]) ) ||
- ((file_list_count == MAX_ARRAY_SIZE) &&
- (new_file_read < flat_buf->blocklens[0]) ) ||
- ( (mem_list_count == MAX_ARRAY_SIZE) &&
- (new_buffer_read < flat_file->blocklens[0])) )
- {
-
- ADIOI_Delete_flattened(datatype);
- ADIOI_GEN_ReadStrided_naive(fd, buf, count, datatype,
- file_ptr_type, initial_off, status, error_code);
- return;
- }
-
- mem_offsets = (PVFS_size*)ADIOI_Malloc(max_mem_list*sizeof(PVFS_size));
- mem_lengths = (int *)ADIOI_Malloc(max_mem_list*sizeof(int));
- file_offsets = (int64_t *)ADIOI_Malloc(max_file_list*sizeof(int64_t));
- file_lengths = (int32_t *)ADIOI_Malloc(max_file_list*sizeof(int32_t));
-
- size_read = 0;
- n_filetypes = st_n_filetypes;
- frd_size = st_frd_size;
- brd_size = flat_buf->blocklens[0];
- buf_count = 0;
- start_mem_offset = 0;
- start_k = k = 0;
- start_j = st_index;
-
- /* this section calculates mem_list_count and file_list_count
- and also finds the possibly odd sized last array elements
- in new_frd_size and new_brd_size */
-
- while (size_read < bufsize) {
- k = start_k;
- new_buffer_read = 0;
- mem_list_count = 0;
- while ((mem_list_count < MAX_ARRAY_SIZE) &&
- (new_buffer_read < bufsize-size_read)) {
- /* find mem_list_count and file_list_count such that both are
- less than MAX_ARRAY_SIZE, the sum of their lengths are
- equal, and the sum of all the data read and data to be
- read in the next immediate read list is less than
- bufsize */
- if(mem_list_count) {
- if((new_buffer_read + flat_buf->blocklens[k] +
- size_read) > bufsize) {
- end_brd_size = new_buffer_read +
- flat_buf->blocklens[k] - (bufsize - size_read);
- new_buffer_read = bufsize - size_read;
- }
- else {
- new_buffer_read += flat_buf->blocklens[k];
- end_brd_size = flat_buf->blocklens[k];
- }
- }
- else {
- if (brd_size > (bufsize - size_read)) {
- new_buffer_read = bufsize - size_read;
- brd_size = new_buffer_read;
- }
- else new_buffer_read = brd_size;
- }
- mem_list_count++;
- k = (k + 1)%flat_buf->count;
- } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
- (new_buffer_read < bufsize-size_read)) */
- j = start_j;
- new_file_read = 0;
- file_list_count = 0;
- while ((file_list_count < MAX_ARRAY_SIZE) &&
- (new_file_read < new_buffer_read)) {
- if(file_list_count) {
- if((new_file_read + flat_file->blocklens[j]) >
- new_buffer_read) {
- end_frd_size = new_buffer_read - new_file_read;
- new_file_read = new_buffer_read;
- j--;
- }
- else {
- new_file_read += flat_file->blocklens[j];
- end_frd_size = flat_file->blocklens[j];
- }
- }
- else {
- if (frd_size > new_buffer_read) {
- new_file_read = new_buffer_read;
- frd_size = new_file_read;
- }
- else new_file_read = frd_size;
- }
- file_list_count++;
- if (j < (flat_file->count - 1)) j++;
- else j = 0;
-
- k = start_k;
- if ((new_file_read < new_buffer_read) &&
- (file_list_count == MAX_ARRAY_SIZE)) {
- new_buffer_read = 0;
- mem_list_count = 0;
- while (new_buffer_read < new_file_read) {
- if(mem_list_count) {
- if((new_buffer_read + flat_buf->blocklens[k]) >
- new_file_read) {
- end_brd_size = new_file_read - new_buffer_read;
- new_buffer_read = new_file_read;
- k--;
- }
- else {
- new_buffer_read += flat_buf->blocklens[k];
- end_brd_size = flat_buf->blocklens[k];
- }
- }
- else {
- new_buffer_read = brd_size;
- if (brd_size > (bufsize - size_read)) {
- new_buffer_read = bufsize - size_read;
- brd_size = new_buffer_read;
- }
- }
- mem_list_count++;
- k = (k + 1)%flat_buf->count;
- } /* while (new_buffer_read < new_file_read) */
- } /* if ((new_file_read < new_buffer_read) && (file_list_count
- == MAX_ARRAY_SIZE)) */
- } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
- (new_buffer_read < bufsize-size_read)) */
-
- /* fills the allocated readlist arrays */
- k = start_k;
- j = start_j;
- for (i=0; icount) +
- (int)flat_buf->indices[k]);
- if(!i) {
- mem_lengths[0] = brd_size;
- mem_offsets[0] += flat_buf->blocklens[k] - brd_size;
- }
- else {
- if (i == (mem_list_count - 1)) {
- mem_lengths[i] = end_brd_size;
- if (flat_buf->blocklens[k] == end_brd_size)
- brd_size = flat_buf->blocklens[(k+1)%
- flat_buf->count];
- else {
- brd_size = flat_buf->blocklens[k] - end_brd_size;
- k--;
- buf_count--;
- }
- }
- else {
- mem_lengths[i] = flat_buf->blocklens[k];
- }
- }
- buf_count++;
- k = (k + 1)%flat_buf->count;
- } /* for (i=0; iindices[j] +
- ((ADIO_Offset)n_filetypes) * filetype_extent;
- if (!i) {
- file_lengths[0] = frd_size;
- file_offsets[0] += flat_file->blocklens[j] - frd_size;
- }
- else {
- if (i == (file_list_count - 1)) {
- file_lengths[i] = end_frd_size;
- if (flat_file->blocklens[j] == end_frd_size)
- frd_size = flat_file->blocklens[(j+1)%
- flat_file->count];
- else {
- frd_size = flat_file->blocklens[j] - end_frd_size;
- j--;
- }
- }
- else file_lengths[i] = flat_file->blocklens[j];
- }
- if (j < flat_file->count - 1) j++;
- else {
- j = 0;
- n_filetypes++;
- }
- } /* for (i=0; iobject_ref, file_req, 0,
- PVFS_BOTTOM, mem_req, &(pvfs_fs->credentials), &resp_io);
-#ifdef ADIOI_MPE_LOGGING
- MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
-#endif
- /* --BEGIN ERROR HANDLING-- */
- if (err_flag != 0) {
- *error_code = MPIO_Err_create_code(MPI_SUCCESS,
- MPIR_ERR_RECOVERABLE,
- myname, __LINE__,
- ADIOI_PVFS2_error_convert(err_flag),
- "Error in PVFS_sys_read", 0);
- }
- /* --END ERROR HANDLING-- */
- PVFS_Request_free(&mem_req);
- PVFS_Request_free(&file_req);
- total_bytes_read += resp_io.total_completed;
- size_read += new_buffer_read;
- start_k = k;
- start_j = j;
- } /* while (size_read < bufsize) */
- ADIOI_Free(mem_offsets);
- ADIOI_Free(mem_lengths);
- }
- ADIOI_Free(file_offsets);
- ADIOI_Free(file_lengths);
-
- /* Other ADIO routines will convert absolute bytes into counts of datatypes */
- /* when incrementing fp_ind, need to also take into account the file type:
- * consider an N-element 1-d subarray with a lb and ub: ( |---xxxxx-----|
- * if we wrote N elements, offset needs to point at beginning of type, not
- * at empty region at offset N+1) */
- if (file_ptr_type == ADIO_INDIVIDUAL) {
- /* this is closer, but still incorrect for the cases where a small
- * amount of a file type is "leftover" after a write */
- fd->fp_ind = disp + flat_file->indices[j] +
- ((ADIO_Offset)n_filetypes)*filetype_extent;
- }
- if (err_flag == 0) *error_code = MPI_SUCCESS;
-
-error_state:
- fd->fp_sys_posn = -1; /* set it to null. */
-
-#ifdef HAVE_STATUS_SET_BYTES
- MPIR_Status_set_bytes(status, datatype, bufsize);
- /* This is a temporary way of filling in status. The right way is to
- keep track of how much data was actually read and placed in buf
- by ADIOI_BUFFERED_READ. */
-#endif
-
- if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
+ ADIOI_PVFS2_OldReadStrided(fd, buf, count, datatype,
+ file_ptr_type, offset, status, error_code);
+ return;
}
+
/*
* vim: ts=8 sts=4 sw=4 noexpandtab
*/
diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_read_list_classic.c b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_read_list_classic.c
new file mode 100644
index 0000000000..d5ceefa464
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_read_list_classic.c
@@ -0,0 +1,909 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*-
+ * vim: ts=8 sts=4 sw=4 noexpandtab
+ *
+ * Copyright (C) 2008 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ */
+
+#include "adio.h"
+#include "adio_extern.h"
+#include "ad_pvfs2.h"
+
+#include "ad_pvfs2_common.h"
+
+void ADIOI_PVFS2_OldReadStrided(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status, int
+ *error_code)
+{
+ /* offset is in units of etype relative to the filetype. */
+ ADIOI_Flatlist_node *flat_buf, *flat_file;
+ int i, j, k, brd_size, frd_size=0, st_index=0;
+ int bufsize, sum, n_etypes_in_filetype, size_in_filetype;
+ int n_filetypes, etype_in_filetype;
+ ADIO_Offset abs_off_in_filetype=0;
+ int filetype_size, etype_size, buftype_size;
+ MPI_Aint filetype_extent, buftype_extent;
+ int buf_count, buftype_is_contig, filetype_is_contig;
+ ADIO_Offset off, disp, start_off, initial_off;
+ int flag, st_frd_size, st_n_filetypes;
+
+ int mem_list_count, file_list_count;
+ PVFS_size *mem_offsets;
+ int64_t *file_offsets;
+ int *mem_lengths;
+ int32_t *file_lengths;
+ int total_blks_to_read;
+
+ int max_mem_list, max_file_list;
+
+ int b_blks_read;
+ int f_data_read;
+ int size_read=0, n_read_lists, extra_blks;
+
+ int end_brd_size, end_frd_size;
+ int start_k, start_j, new_file_read, new_buffer_read;
+ int start_mem_offset;
+ PVFS_Request mem_req, file_req;
+ ADIOI_PVFS2_fs * pvfs_fs;
+ PVFS_sysresp_io resp_io;
+ int err_flag=0;
+ MPI_Offset total_bytes_read = 0;
+ static char myname[] = "ADIOI_PVFS2_ReadStrided";
+
+#define MAX_ARRAY_SIZE 64
+
+ *error_code = MPI_SUCCESS; /* changed below if error */
+
+ ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
+ ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
+
+ /* the HDF5 tests showed a bug in this list processing code (see many many
+ * lines down below). We added a workaround, but common HDF5 file types
+ * are actually contiguous and do not need the expensive workarond */
+ if (!filetype_is_contig) {
+ flat_file = ADIOI_Flatlist;
+ while (flat_file->type != fd->filetype) flat_file = flat_file->next;
+ if (flat_file->count == 1 && !buftype_is_contig)
+ filetype_is_contig = 1;
+ }
+
+ MPI_Type_size(fd->filetype, &filetype_size);
+ if ( ! filetype_size ) {
+ *error_code = MPI_SUCCESS;
+ return;
+ }
+
+ MPI_Type_extent(fd->filetype, &filetype_extent);
+ MPI_Type_size(datatype, &buftype_size);
+ MPI_Type_extent(datatype, &buftype_extent);
+ etype_size = fd->etype_size;
+
+ bufsize = buftype_size * count;
+
+ pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr;
+
+ if (!buftype_is_contig && filetype_is_contig) {
+
+/* noncontiguous in memory, contiguous in file. */
+ int64_t file_offsets;
+ int32_t file_lengths;
+
+ ADIOI_Flatten_datatype(datatype);
+ flat_buf = ADIOI_Flatlist;
+ while (flat_buf->type != datatype) flat_buf = flat_buf->next;
+
+ off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
+ fd->disp + etype_size * offset;
+
+ file_list_count = 1;
+ file_offsets = off;
+ file_lengths = 0;
+ total_blks_to_read = count*flat_buf->count;
+ b_blks_read = 0;
+
+ /* allocate arrays according to max usage */
+ if (total_blks_to_read > MAX_ARRAY_SIZE)
+ mem_list_count = MAX_ARRAY_SIZE;
+ else mem_list_count = total_blks_to_read;
+ mem_offsets = (PVFS_size*)ADIOI_Malloc(mem_list_count*sizeof(PVFS_size));
+ mem_lengths = (int*)ADIOI_Malloc(mem_list_count*sizeof(int));
+
+ /* TODO: CHECK RESULTS OF MEMORY ALLOCATION */
+
+ j = 0;
+ /* step through each block in memory, filling memory arrays */
+ while (b_blks_read < total_blks_to_read) {
+ for (i=0; icount; i++) {
+ mem_offsets[b_blks_read % MAX_ARRAY_SIZE] =
+ /* TODO: fix this compiler warning */
+ ((PVFS_size)buf + j*buftype_extent + flat_buf->indices[i]);
+ mem_lengths[b_blks_read % MAX_ARRAY_SIZE] =
+ flat_buf->blocklens[i];
+ file_lengths += flat_buf->blocklens[i];
+ b_blks_read++;
+ if (!(b_blks_read % MAX_ARRAY_SIZE) ||
+ (b_blks_read == total_blks_to_read)) {
+
+ /* in the case of the last read list call,
+ adjust mem_list_count */
+ if (b_blks_read == total_blks_to_read) {
+ mem_list_count = total_blks_to_read % MAX_ARRAY_SIZE;
+ /* in case last read list call fills max arrays */
+ if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE;
+ }
+ err_flag = PVFS_Request_hindexed(mem_list_count,
+ mem_lengths, mem_offsets, PVFS_BYTE, &mem_req);
+ if (err_flag < 0) break;
+ err_flag = PVFS_Request_contiguous(file_lengths,
+ PVFS_BYTE, &file_req);
+ if (err_flag < 0) break;
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
+#endif
+ err_flag = PVFS_sys_read(pvfs_fs->object_ref, file_req,
+ file_offsets, PVFS_BOTTOM, mem_req,
+ &(pvfs_fs->credentials), &resp_io);
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
+#endif
+ /* --BEGIN ERROR HANDLING-- */
+ if (err_flag != 0) {
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ ADIOI_PVFS2_error_convert(err_flag),
+ "Error in PVFS_sys_read", 0);
+ goto error_state;
+ }
+ PVFS_Request_free(&mem_req);
+ PVFS_Request_free(&file_req);
+ total_bytes_read += resp_io.total_completed;
+ /* --END ERROR HANDLING-- */
+
+ /* in the case of error or the last read list call,
+ * leave here */
+ if (err_flag || b_blks_read == total_blks_to_read) break;
+
+ file_offsets += file_lengths;
+ file_lengths = 0;
+ }
+ } /* for (i=0; icount; i++) */
+ j++;
+ } /* while (b_blks_read < total_blks_to_read) */
+ ADIOI_Free(mem_offsets);
+ ADIOI_Free(mem_lengths);
+
+ if (file_ptr_type == ADIO_INDIVIDUAL)
+ fd->fp_ind += total_bytes_read;
+
+ fd->fp_sys_posn = -1; /* set it to null. */
+
+#ifdef HAVE_STATUS_SET_BYTES
+ MPIR_Status_set_bytes(status, datatype, bufsize);
+ /* This isa temporary way of filling in status. The right way is to
+ keep tracke of how much data was actually read adn placed in buf
+ by ADIOI_BUFFERED_READ. */
+#endif
+ ADIOI_Delete_flattened(datatype);
+
+ return;
+ } /* if (!buftype_is_contig && filetype_is_contig) */
+
+ /* know file is noncontiguous from above */
+ /* noncontiguous in file */
+
+ /* filetype already flattened in ADIO_Open */
+ flat_file = ADIOI_Flatlist;
+ while (flat_file->type != fd->filetype) flat_file = flat_file->next;
+
+ disp = fd->disp;
+ initial_off = offset;
+
+
+ /* for each case - ADIO_Individual pointer or explicit, find the file
+ offset in bytes (offset), n_filetypes (how many filetypes into
+ file to start), frd_size (remaining amount of data in present
+ file block), and st_index (start point in terms of blocks in
+ starting filetype) */
+ if (file_ptr_type == ADIO_INDIVIDUAL) {
+ offset = fd->fp_ind; /* in bytes */
+ n_filetypes = -1;
+ flag = 0;
+ while (!flag) {
+ n_filetypes++;
+ for (i=0; icount; i++) {
+ if (disp + flat_file->indices[i] +
+ ((ADIO_Offset) n_filetypes)*filetype_extent +
+ flat_file->blocklens[i] >= offset) {
+ st_index = i;
+ frd_size = (int) (disp + flat_file->indices[i] +
+ ((ADIO_Offset) n_filetypes)*filetype_extent
+ + flat_file->blocklens[i] - offset);
+ flag = 1;
+ break;
+ }
+ }
+ } /* while (!flag) */
+ } /* if (file_ptr_type == ADIO_INDIVIDUAL) */
+ else {
+ n_etypes_in_filetype = filetype_size/etype_size;
+ n_filetypes = (int) (offset / n_etypes_in_filetype);
+ etype_in_filetype = (int) (offset % n_etypes_in_filetype);
+ size_in_filetype = etype_in_filetype * etype_size;
+
+ sum = 0;
+ for (i=0; icount; i++) {
+ sum += flat_file->blocklens[i];
+ if (sum > size_in_filetype) {
+ st_index = i;
+ frd_size = sum - size_in_filetype;
+ abs_off_in_filetype = flat_file->indices[i] +
+ size_in_filetype - (sum - flat_file->blocklens[i]);
+ break;
+ }
+ }
+
+ /* abs. offset in bytes in the file */
+ offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent +
+ abs_off_in_filetype;
+ } /* else [file_ptr_type != ADIO_INDIVIDUAL] */
+
+ start_off = offset;
+ st_frd_size = frd_size;
+ st_n_filetypes = n_filetypes;
+
+ if (buftype_is_contig && !filetype_is_contig) {
+
+/* contiguous in memory, noncontiguous in file. should be the most
+ common case. */
+
+ int mem_lengths;
+ char *mem_offsets;
+
+ i = 0;
+ j = st_index;
+ n_filetypes = st_n_filetypes;
+
+ mem_list_count = 1;
+
+ /* determine how many blocks in file to read */
+ f_data_read = ADIOI_MIN(st_frd_size, bufsize);
+ total_blks_to_read = 1;
+ if (j < (flat_file->count-1)) j++;
+ else {
+ j = 0;
+ n_filetypes++;
+ }
+ while (f_data_read < bufsize) {
+ f_data_read += flat_file->blocklens[j];
+ total_blks_to_read++;
+ if (j<(flat_file->count-1)) j++;
+ else j = 0;
+ }
+
+ j = st_index;
+ n_filetypes = st_n_filetypes;
+ n_read_lists = total_blks_to_read/MAX_ARRAY_SIZE;
+ extra_blks = total_blks_to_read%MAX_ARRAY_SIZE;
+
+ mem_offsets = buf;
+ mem_lengths = 0;
+
+ /* if at least one full readlist, allocate file arrays
+ at max array size and don't free until very end */
+ if (n_read_lists) {
+ file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
+ sizeof(int64_t));
+ file_lengths = (int32_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
+ sizeof(int32_t));
+ }
+ /* if there's no full readlist allocate file arrays according
+ to needed size (extra_blks) */
+ else {
+ file_offsets = (int64_t*)ADIOI_Malloc(extra_blks*
+ sizeof(int64_t));
+ file_lengths = (int32_t*)ADIOI_Malloc(extra_blks*
+ sizeof(int32_t));
+ }
+
+ /* for file arrays that are of MAX_ARRAY_SIZE, build arrays */
+ for (i=0; iindices[j];
+ file_lengths[k] = flat_file->blocklens[j];
+ mem_lengths += file_lengths[k];
+ }
+ if (j<(flat_file->count - 1)) j++;
+ else {
+ j = 0;
+ n_filetypes++;
+ }
+ } /* for (k=0; kobject_ref, file_req, 0,
+ mem_offsets, mem_req,
+ &(pvfs_fs->credentials), &resp_io);
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
+#endif
+ /* --BEGIN ERROR HANDLING-- */
+ if (err_flag != 0) {
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ ADIOI_PVFS2_error_convert(err_flag),
+ "Error in PVFS_sys_read", 0);
+ goto error_state;
+ }
+ /* --END ERROR HANDING-- */
+ PVFS_Request_free(&mem_req);
+ PVFS_Request_free(&file_req);
+
+ total_bytes_read += resp_io.total_completed;
+
+ mem_offsets += mem_lengths;
+ mem_lengths = 0;
+ } /* for (i=0; iindices[j];
+ if (k == (extra_blks - 1)) {
+ file_lengths[k] = bufsize - (int32_t) mem_lengths
+ - (int32_t) mem_offsets + (int32_t) buf;
+ }
+ else file_lengths[k] = flat_file->blocklens[j];
+ } /* if(i || k) */
+ mem_lengths += file_lengths[k];
+ if (j<(flat_file->count - 1)) j++;
+ else {
+ j = 0;
+ n_filetypes++;
+ }
+ } /* for (k=0; kobject_ref, file_req, 0,
+ mem_offsets, mem_req, &(pvfs_fs->credentials), &resp_io);
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
+#endif
+ /* --BEGIN ERROR HANDLING-- */
+ if (err_flag != 0) {
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ ADIOI_PVFS2_error_convert(err_flag),
+ "Error in PVFS_sys_read", 0);
+ goto error_state;
+ }
+ /* --END ERROR HANDLING-- */
+ PVFS_Request_free(&mem_req);
+ PVFS_Request_free(&file_req);
+ total_bytes_read += resp_io.total_completed;
+ }
+ }
+ else {
+/* noncontiguous in memory as well as in file */
+
+ ADIOI_Flatten_datatype(datatype);
+ flat_buf = ADIOI_Flatlist;
+ while (flat_buf->type != datatype) flat_buf = flat_buf->next;
+
+ size_read = 0;
+ n_filetypes = st_n_filetypes;
+ frd_size = st_frd_size;
+ brd_size = flat_buf->blocklens[0];
+ buf_count = 0;
+ start_mem_offset = 0;
+ start_k = k = 0;
+ start_j = st_index;
+ max_mem_list = 0;
+ max_file_list = 0;
+
+ /* run through and file max_file_list and max_mem_list so that you
+ can allocate the file and memory arrays less than MAX_ARRAY_SIZE
+ if possible */
+
+ while (size_read < bufsize) {
+ k = start_k;
+ new_buffer_read = 0;
+ mem_list_count = 0;
+ while ((mem_list_count < MAX_ARRAY_SIZE) &&
+ (new_buffer_read < bufsize-size_read)) {
+ /* find mem_list_count and file_list_count such that both are
+ less than MAX_ARRAY_SIZE, the sum of their lengths are
+ equal, and the sum of all the data read and data to be
+ read in the next immediate read list is less than
+ bufsize */
+ if(mem_list_count) {
+ if((new_buffer_read + flat_buf->blocklens[k] +
+ size_read) > bufsize) {
+ end_brd_size = new_buffer_read +
+ flat_buf->blocklens[k] - (bufsize - size_read);
+ new_buffer_read = bufsize - size_read;
+ }
+ else {
+ new_buffer_read += flat_buf->blocklens[k];
+ end_brd_size = flat_buf->blocklens[k];
+ }
+ }
+ else {
+ if (brd_size > (bufsize - size_read)) {
+ new_buffer_read = bufsize - size_read;
+ brd_size = new_buffer_read;
+ }
+ else new_buffer_read = brd_size;
+ }
+ mem_list_count++;
+ k = (k + 1)%flat_buf->count;
+ } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
+ (new_buffer_read < bufsize-size_read)) */
+ j = start_j;
+ new_file_read = 0;
+ file_list_count = 0;
+ while ((file_list_count < MAX_ARRAY_SIZE) &&
+ (new_file_read < new_buffer_read)) {
+ if(file_list_count) {
+ if((new_file_read + flat_file->blocklens[j]) >
+ new_buffer_read) {
+ end_frd_size = new_buffer_read - new_file_read;
+ new_file_read = new_buffer_read;
+ j--;
+ }
+ else {
+ new_file_read += flat_file->blocklens[j];
+ end_frd_size = flat_file->blocklens[j];
+ }
+ }
+ else {
+ if (frd_size > new_buffer_read) {
+ new_file_read = new_buffer_read;
+ frd_size = new_file_read;
+ }
+ else new_file_read = frd_size;
+ }
+ file_list_count++;
+ if (j < (flat_file->count - 1)) j++;
+ else j = 0;
+
+ k = start_k;
+ if ((new_file_read < new_buffer_read) &&
+ (file_list_count == MAX_ARRAY_SIZE)) {
+ new_buffer_read = 0;
+ mem_list_count = 0;
+ while (new_buffer_read < new_file_read) {
+ if(mem_list_count) {
+ if((new_buffer_read + flat_buf->blocklens[k]) >
+ new_file_read) {
+ end_brd_size = new_file_read - new_buffer_read;
+ new_buffer_read = new_file_read;
+ k--;
+ }
+ else {
+ new_buffer_read += flat_buf->blocklens[k];
+ end_brd_size = flat_buf->blocklens[k];
+ }
+ }
+ else {
+ new_buffer_read = brd_size;
+ if (brd_size > (bufsize - size_read)) {
+ new_buffer_read = bufsize - size_read;
+ brd_size = new_buffer_read;
+ }
+ }
+ mem_list_count++;
+ k = (k + 1)%flat_buf->count;
+ } /* while (new_buffer_read < new_file_read) */
+ } /* if ((new_file_read < new_buffer_read) && (file_list_count
+ == MAX_ARRAY_SIZE)) */
+ } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
+ (new_buffer_read < bufsize-size_read)) */
+
+ /* fakes filling the readlist arrays of lengths found above */
+ k = start_k;
+ j = start_j;
+ for (i=0; iblocklens[k] == end_brd_size)
+ brd_size = flat_buf->blocklens[(k+1)%
+ flat_buf->count];
+ else {
+ brd_size = flat_buf->blocklens[k] - end_brd_size;
+ k--;
+ buf_count--;
+ }
+ }
+ }
+ buf_count++;
+ k = (k + 1)%flat_buf->count;
+ } /* for (i=0; iblocklens[j] == end_frd_size)
+ frd_size = flat_file->blocklens[(j+1)%
+ flat_file->count];
+ else {
+ frd_size = flat_file->blocklens[j] - end_frd_size;
+ j--;
+ }
+ }
+ }
+ if (j < flat_file->count - 1) j++;
+ else {
+ j = 0;
+ n_filetypes++;
+ }
+ } /* for (i=0; iblocklens[0] ) ) ||
+ ((mem_list_count == 1) &&
+ (new_buffer_read < flat_buf->blocklens[0]) ) ||
+ ((file_list_count == MAX_ARRAY_SIZE) &&
+ (new_file_read < flat_buf->blocklens[0]) ) ||
+ ( (mem_list_count == MAX_ARRAY_SIZE) &&
+ (new_buffer_read < flat_file->blocklens[0])) )
+ {
+
+ ADIOI_Delete_flattened(datatype);
+ ADIOI_GEN_ReadStrided_naive(fd, buf, count, datatype,
+ file_ptr_type, initial_off, status, error_code);
+ return;
+ }
+
+ mem_offsets = (PVFS_size*)ADIOI_Malloc(max_mem_list*sizeof(PVFS_size));
+ mem_lengths = (int *)ADIOI_Malloc(max_mem_list*sizeof(int));
+ file_offsets = (int64_t *)ADIOI_Malloc(max_file_list*sizeof(int64_t));
+ file_lengths = (int32_t *)ADIOI_Malloc(max_file_list*sizeof(int32_t));
+
+ size_read = 0;
+ n_filetypes = st_n_filetypes;
+ frd_size = st_frd_size;
+ brd_size = flat_buf->blocklens[0];
+ buf_count = 0;
+ start_mem_offset = 0;
+ start_k = k = 0;
+ start_j = st_index;
+
+ /* this section calculates mem_list_count and file_list_count
+ and also finds the possibly odd sized last array elements
+ in new_frd_size and new_brd_size */
+
+ while (size_read < bufsize) {
+ k = start_k;
+ new_buffer_read = 0;
+ mem_list_count = 0;
+ while ((mem_list_count < MAX_ARRAY_SIZE) &&
+ (new_buffer_read < bufsize-size_read)) {
+ /* find mem_list_count and file_list_count such that both are
+ less than MAX_ARRAY_SIZE, the sum of their lengths are
+ equal, and the sum of all the data read and data to be
+ read in the next immediate read list is less than
+ bufsize */
+ if(mem_list_count) {
+ if((new_buffer_read + flat_buf->blocklens[k] +
+ size_read) > bufsize) {
+ end_brd_size = new_buffer_read +
+ flat_buf->blocklens[k] - (bufsize - size_read);
+ new_buffer_read = bufsize - size_read;
+ }
+ else {
+ new_buffer_read += flat_buf->blocklens[k];
+ end_brd_size = flat_buf->blocklens[k];
+ }
+ }
+ else {
+ if (brd_size > (bufsize - size_read)) {
+ new_buffer_read = bufsize - size_read;
+ brd_size = new_buffer_read;
+ }
+ else new_buffer_read = brd_size;
+ }
+ mem_list_count++;
+ k = (k + 1)%flat_buf->count;
+ } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
+ (new_buffer_read < bufsize-size_read)) */
+ j = start_j;
+ new_file_read = 0;
+ file_list_count = 0;
+ while ((file_list_count < MAX_ARRAY_SIZE) &&
+ (new_file_read < new_buffer_read)) {
+ if(file_list_count) {
+ if((new_file_read + flat_file->blocklens[j]) >
+ new_buffer_read) {
+ end_frd_size = new_buffer_read - new_file_read;
+ new_file_read = new_buffer_read;
+ j--;
+ }
+ else {
+ new_file_read += flat_file->blocklens[j];
+ end_frd_size = flat_file->blocklens[j];
+ }
+ }
+ else {
+ if (frd_size > new_buffer_read) {
+ new_file_read = new_buffer_read;
+ frd_size = new_file_read;
+ }
+ else new_file_read = frd_size;
+ }
+ file_list_count++;
+ if (j < (flat_file->count - 1)) j++;
+ else j = 0;
+
+ k = start_k;
+ if ((new_file_read < new_buffer_read) &&
+ (file_list_count == MAX_ARRAY_SIZE)) {
+ new_buffer_read = 0;
+ mem_list_count = 0;
+ while (new_buffer_read < new_file_read) {
+ if(mem_list_count) {
+ if((new_buffer_read + flat_buf->blocklens[k]) >
+ new_file_read) {
+ end_brd_size = new_file_read - new_buffer_read;
+ new_buffer_read = new_file_read;
+ k--;
+ }
+ else {
+ new_buffer_read += flat_buf->blocklens[k];
+ end_brd_size = flat_buf->blocklens[k];
+ }
+ }
+ else {
+ new_buffer_read = brd_size;
+ if (brd_size > (bufsize - size_read)) {
+ new_buffer_read = bufsize - size_read;
+ brd_size = new_buffer_read;
+ }
+ }
+ mem_list_count++;
+ k = (k + 1)%flat_buf->count;
+ } /* while (new_buffer_read < new_file_read) */
+ } /* if ((new_file_read < new_buffer_read) && (file_list_count
+ == MAX_ARRAY_SIZE)) */
+ } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
+ (new_buffer_read < bufsize-size_read)) */
+
+ /* fills the allocated readlist arrays */
+ k = start_k;
+ j = start_j;
+ for (i=0; icount) +
+ (int)flat_buf->indices[k]);
+ if(!i) {
+ mem_lengths[0] = brd_size;
+ mem_offsets[0] += flat_buf->blocklens[k] - brd_size;
+ }
+ else {
+ if (i == (mem_list_count - 1)) {
+ mem_lengths[i] = end_brd_size;
+ if (flat_buf->blocklens[k] == end_brd_size)
+ brd_size = flat_buf->blocklens[(k+1)%
+ flat_buf->count];
+ else {
+ brd_size = flat_buf->blocklens[k] - end_brd_size;
+ k--;
+ buf_count--;
+ }
+ }
+ else {
+ mem_lengths[i] = flat_buf->blocklens[k];
+ }
+ }
+ buf_count++;
+ k = (k + 1)%flat_buf->count;
+ } /* for (i=0; iindices[j] +
+ ((ADIO_Offset)n_filetypes) * filetype_extent;
+ if (!i) {
+ file_lengths[0] = frd_size;
+ file_offsets[0] += flat_file->blocklens[j] - frd_size;
+ }
+ else {
+ if (i == (file_list_count - 1)) {
+ file_lengths[i] = end_frd_size;
+ if (flat_file->blocklens[j] == end_frd_size)
+ frd_size = flat_file->blocklens[(j+1)%
+ flat_file->count];
+ else {
+ frd_size = flat_file->blocklens[j] - end_frd_size;
+ j--;
+ }
+ }
+ else file_lengths[i] = flat_file->blocklens[j];
+ }
+ if (j < flat_file->count - 1) j++;
+ else {
+ j = 0;
+ n_filetypes++;
+ }
+ } /* for (i=0; iobject_ref, file_req, 0,
+ PVFS_BOTTOM, mem_req, &(pvfs_fs->credentials), &resp_io);
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
+#endif
+ /* --BEGIN ERROR HANDLING-- */
+ if (err_flag != 0) {
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ ADIOI_PVFS2_error_convert(err_flag),
+ "Error in PVFS_sys_read", 0);
+ }
+ /* --END ERROR HANDLING-- */
+ PVFS_Request_free(&mem_req);
+ PVFS_Request_free(&file_req);
+ total_bytes_read += resp_io.total_completed;
+ size_read += new_buffer_read;
+ start_k = k;
+ start_j = j;
+ } /* while (size_read < bufsize) */
+ ADIOI_Free(mem_offsets);
+ ADIOI_Free(mem_lengths);
+ }
+ /* Other ADIO routines will convert absolute bytes into counts of datatypes */
+ /* when incrementing fp_ind, need to also take into account the file type:
+ * consider an N-element 1-d subarray with a lb and ub: ( |---xxxxx-----|
+ * if we wrote N elements, offset needs to point at beginning of type, not
+ * at empty region at offset N+1)
+ *
+ * As we discussed on mpich-discuss in may/june 2009, the code below might
+ * look wierd, but by putting fp_ind at the last byte written, the next
+ * time we run through the strided code we'll update the fp_ind to the
+ * right location. */
+ if (file_ptr_type == ADIO_INDIVIDUAL) {
+ fd->fp_ind = file_offsets[file_list_count-1]+
+ file_lengths[file_list_count-1];
+ }
+
+ ADIOI_Free(file_offsets);
+ ADIOI_Free(file_lengths);
+
+ if (err_flag == 0) *error_code = MPI_SUCCESS;
+
+error_state:
+ fd->fp_sys_posn = -1; /* set it to null. */
+
+#ifdef HAVE_STATUS_SET_BYTES
+ MPIR_Status_set_bytes(status, datatype, bufsize);
+ /* This is a temporary way of filling in status. The right way is to
+ keep track of how much data was actually read and placed in buf
+ by ADIOI_BUFFERED_READ. */
+#endif
+
+ if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
+}
+
diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_write.c b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_write.c
index aaa4c75a41..47f64ad2fd 100644
--- a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_write.c
+++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_write.c
@@ -7,7 +7,7 @@
#include "ad_pvfs2.h"
#include "adio_extern.h"
-
+#include "ad_pvfs2_io.h"
#include "ad_pvfs2_common.h"
void ADIOI_PVFS2_WriteContig(ADIO_File fd, void *buf, int count,
@@ -104,950 +104,78 @@ fn_exit:
return;
}
+int ADIOI_PVFS2_WriteStridedListIO(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status,
+ int *error_code)
+{
+ return ADIOI_PVFS2_StridedListIO(fd, buf, count,
+ datatype, file_ptr_type,
+ offset, status,
+ error_code, WRITE);
+}
+
+int ADIOI_PVFS2_WriteStridedDtypeIO(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status,
+ int *error_code)
+{
+ return ADIOI_PVFS2_StridedDtypeIO(fd, buf, count,
+ datatype, file_ptr_type,
+ offset, status, error_code,
+ WRITE);
+}
+
+
void ADIOI_PVFS2_WriteStrided(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status,
int *error_code)
{
- /* as with all the other WriteStrided functions, offset is in units of
- * etype relative to the filetype */
+ /* four ways (to date) that we can carry out strided i/o accesses:
+ * - naive posix
+ * - 'true' Datatype (from avery)
+ * - new List I/O (from avery)
+ * - classic List I/O (the one that's always been in ROMIO)
+ * I imagine we'll keep Datatype as an optional optimization, and afer a
+ * release or two promote it to the default
+ */
- /* Since PVFS2 does not support file locking, can't do buffered writes
- as on Unix */
-
- ADIOI_Flatlist_node *flat_buf, *flat_file;
- int i, j, k, bwr_size, fwr_size=0, st_index=0;
- int bufsize, sum, n_etypes_in_filetype, size_in_filetype;
- int n_filetypes, etype_in_filetype;
- ADIO_Offset abs_off_in_filetype=0;
- int filetype_size, etype_size, buftype_size;
- MPI_Aint filetype_extent, buftype_extent;
- int buf_count, buftype_is_contig, filetype_is_contig;
- ADIO_Offset off, disp, start_off, initial_off;
- int flag, st_fwr_size, st_n_filetypes;
- int err_flag=0;
-
- int mem_list_count, file_list_count;
- PVFS_size * mem_offsets;
- int64_t *file_offsets;
- int *mem_lengths;
- int32_t *file_lengths;
- int total_blks_to_write;
-
- int max_mem_list, max_file_list;
-
- int b_blks_wrote;
- int f_data_wrote;
- int size_wrote=0, n_write_lists, extra_blks;
-
- int end_bwr_size, end_fwr_size;
- int start_k, start_j, new_file_write, new_buffer_write;
- int start_mem_offset;
- PVFS_Request mem_req, file_req;
- ADIOI_PVFS2_fs * pvfs_fs;
- PVFS_sysresp_io resp_io;
- MPI_Offset total_bytes_written=0;
- static char myname[] = "ADIOI_PVFS2_WRITESTRIDED";
-
- /* note: don't increase this: several parts of PVFS2 now
- * assume this limit*/
-#define MAX_ARRAY_SIZE 64
-
- /* --BEGIN ERROR HANDLING-- */
- if (fd->atomicity) {
- *error_code = MPIO_Err_create_code(MPI_SUCCESS,
- MPIR_ERR_RECOVERABLE,
- myname, __LINE__,
- MPI_ERR_ARG,
- "Atomic noncontiguous writes are not supported by PVFS2", 0);
- return;
+ /* a lot of near-duplication from ADIOI_PVFS2_ReadStrided: for
+ * debugging/testing it's helpful to be able to turn on and off these
+ * optimizations separately for the read and write cases */
+ int ret = -1;
+ if ( fd->hints->fs_hints.pvfs2.posix_write == ADIOI_HINT_ENABLE) {
+ ADIOI_GEN_WriteStrided_naive(fd, buf, count,
+ datatype, file_ptr_type,
+ offset, status, error_code);
+ return;
}
- /* --END ERROR HANDLING-- */
+ if ( fd->hints->fs_hints.pvfs2.dtype_write == ADIOI_HINT_ENABLE) {
+ ret = ADIOI_PVFS2_WriteStridedDtypeIO(fd, buf, count,
+ datatype, file_ptr_type,
+ offset, status, error_code);
- ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
- ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
-
- /* the HDF5 tests showed a bug in this list processing code (see many many
- * lines down below). We added a workaround, but common HDF5 file types
- * are actually contiguous and do not need the expensive workarond */
- if (!filetype_is_contig) {
- flat_file = ADIOI_Flatlist;
- while (flat_file->type != fd->filetype) flat_file = flat_file->next;
- if (flat_file->count == 1 && !buftype_is_contig)
- filetype_is_contig = 1;
- }
-
- MPI_Type_size(fd->filetype, &filetype_size);
- if ( ! filetype_size ) {
- *error_code = MPI_SUCCESS;
- return;
- }
-
- MPI_Type_extent(fd->filetype, &filetype_extent);
- MPI_Type_size(datatype, &buftype_size);
- MPI_Type_extent(datatype, &buftype_extent);
- etype_size = fd->etype_size;
-
- bufsize = buftype_size * count;
-
- pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr;
-
- if (!buftype_is_contig && filetype_is_contig) {
-
-/* noncontiguous in memory, contiguous in file. */
- int64_t file_offsets;
- int32_t file_lengths;
-
- ADIOI_Flatten_datatype(datatype);
- flat_buf = ADIOI_Flatlist;
- while (flat_buf->type != datatype) flat_buf = flat_buf->next;
-
- if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
- off = fd->disp + etype_size * offset;
- }
- else off = fd->fp_ind;
-
- file_list_count = 1;
- file_offsets = off;
- file_lengths = 0;
- total_blks_to_write = count*flat_buf->count;
- b_blks_wrote = 0;
-
- /* allocate arrays according to max usage */
- if (total_blks_to_write > MAX_ARRAY_SIZE)
- mem_list_count = MAX_ARRAY_SIZE;
- else mem_list_count = total_blks_to_write;
- mem_offsets = (PVFS_size*)ADIOI_Malloc(mem_list_count*sizeof(PVFS_size));
- mem_lengths = (int*)ADIOI_Malloc(mem_list_count*sizeof(int));
-
- j = 0;
- /* step through each block in memory, filling memory arrays */
- while (b_blks_wrote < total_blks_to_write) {
- for (i=0; icount; i++) {
- mem_offsets[b_blks_wrote % MAX_ARRAY_SIZE] =
- /* TODO: fix this warning by casting to an integer that's
- * the same size as a char * and /then/ casting to
- * PVFS_size */
- ((PVFS_size)buf + j*buftype_extent + flat_buf->indices[i]);
- mem_lengths[b_blks_wrote % MAX_ARRAY_SIZE] =
- flat_buf->blocklens[i];
- file_lengths += flat_buf->blocklens[i];
- b_blks_wrote++;
- if (!(b_blks_wrote % MAX_ARRAY_SIZE) ||
- (b_blks_wrote == total_blks_to_write)) {
-
- /* in the case of the last write list call,
- adjust mem_list_count */
- if (b_blks_wrote == total_blks_to_write) {
- mem_list_count = total_blks_to_write % MAX_ARRAY_SIZE;
- /* in case last write list call fills max arrays */
- if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE;
- }
- err_flag = PVFS_Request_hindexed(mem_list_count,
- mem_lengths, mem_offsets,
- PVFS_BYTE, &mem_req);
- /* --BEGIN ERROR HANDLING-- */
- if (err_flag != 0) {
- *error_code = MPIO_Err_create_code(MPI_SUCCESS,
- MPIR_ERR_RECOVERABLE,
- myname, __LINE__,
- ADIOI_PVFS2_error_convert(err_flag),
- "Error in PVFS_Request_hindexed (memory)", 0);
- break;
- }
- /* --END ERROR HANDLING-- */
-
- err_flag = PVFS_Request_contiguous(file_lengths,
- PVFS_BYTE, &file_req);
- /* --BEGIN ERROR HANDLING-- */
- if (err_flag != 0) {
- *error_code = MPIO_Err_create_code(MPI_SUCCESS,
- MPIR_ERR_RECOVERABLE,
- myname, __LINE__,
- ADIOI_PVFS2_error_convert(err_flag),
- "Error in PVFS_Request_contiguous (file)", 0);
- break;
- }
- /* --END ERROR HANDLING-- */
-
-#ifdef ADIOI_MPE_LOGGING
- MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
-#endif
- err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req,
- file_offsets, PVFS_BOTTOM,
- mem_req,
- &(pvfs_fs->credentials),
- &resp_io);
-#ifdef ADIOI_MPE_LOGGING
- MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
-#endif
- total_bytes_written += resp_io.total_completed;
-
- /* in the case of error or the last write list call,
- * leave here */
- /* --BEGIN ERROR HANDLING-- */
- if (err_flag) {
- *error_code = MPIO_Err_create_code(MPI_SUCCESS,
- MPIR_ERR_RECOVERABLE,
- myname, __LINE__,
- ADIOI_PVFS2_error_convert(err_flag),
- "Error in PVFS_sys_write", 0);
- break;
- }
- /* --END ERROR HANDLING-- */
- if (b_blks_wrote == total_blks_to_write) break;
-
- file_offsets += file_lengths;
- file_lengths = 0;
- PVFS_Request_free(&mem_req);
- PVFS_Request_free(&file_req);
- }
- } /* for (i=0; icount; i++) */
- j++;
- } /* while (b_blks_wrote < total_blks_to_write) */
- ADIOI_Free(mem_offsets);
- ADIOI_Free(mem_lengths);
-
- if (file_ptr_type == ADIO_INDIVIDUAL)
- fd->fp_ind += total_bytes_written;
-
- if (!err_flag) *error_code = MPI_SUCCESS;
-
- fd->fp_sys_posn = -1; /* clear this. */
-
-#ifdef HAVE_STATUS_SET_BYTES
- MPIR_Status_set_bytes(status, datatype, bufsize);
-/* This is a temporary way of filling in status. The right way is to
- keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
-#endif
-
- ADIOI_Delete_flattened(datatype);
- return;
- } /* if (!buftype_is_contig && filetype_is_contig) */
-
- /* already know that file is noncontiguous from above */
- /* noncontiguous in file */
-
-/* filetype already flattened in ADIO_Open */
- flat_file = ADIOI_Flatlist;
- while (flat_file->type != fd->filetype) flat_file = flat_file->next;
-
- disp = fd->disp;
- initial_off = offset;
-
- /* for each case - ADIO_Individual pointer or explicit, find offset
- (file offset in bytes), n_filetypes (how many filetypes into file
- to start), fwr_size (remaining amount of data in present file
- block), and st_index (start point in terms of blocks in starting
- filetype) */
- if (file_ptr_type == ADIO_INDIVIDUAL) {
- offset = fd->fp_ind; /* in bytes */
- n_filetypes = -1;
- flag = 0;
- while (!flag) {
- n_filetypes++;
- for (i=0; icount; i++) {
- if (disp + flat_file->indices[i] +
- ((ADIO_Offset) n_filetypes)*filetype_extent +
- flat_file->blocklens[i] >= offset) {
- st_index = i;
- fwr_size = disp + flat_file->indices[i] +
- ((ADIO_Offset) n_filetypes)*filetype_extent
- + flat_file->blocklens[i] - offset;
- flag = 1;
- break;
- }
- }
- } /* while (!flag) */
- } /* if (file_ptr_type == ADIO_INDIVIDUAL) */
- else {
- n_etypes_in_filetype = filetype_size/etype_size;
- n_filetypes = (int) (offset / n_etypes_in_filetype);
- etype_in_filetype = (int) (offset % n_etypes_in_filetype);
- size_in_filetype = etype_in_filetype * etype_size;
-
- sum = 0;
- for (i=0; icount; i++) {
- sum += flat_file->blocklens[i];
- if (sum > size_in_filetype) {
- st_index = i;
- fwr_size = sum - size_in_filetype;
- abs_off_in_filetype = flat_file->indices[i] +
- size_in_filetype - (sum - flat_file->blocklens[i]);
- break;
- }
- }
-
- /* abs. offset in bytes in the file */
- offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent +
- abs_off_in_filetype;
- } /* else [file_ptr_type != ADIO_INDIVIDUAL] */
-
- start_off = offset;
- st_fwr_size = fwr_size;
- st_n_filetypes = n_filetypes;
-
- if (buftype_is_contig && !filetype_is_contig) {
-
-/* contiguous in memory, noncontiguous in file. should be the most
- common case. */
-
- int mem_lengths;
- char *mem_offsets;
-
- i = 0;
- j = st_index;
- off = offset;
- n_filetypes = st_n_filetypes;
-
- mem_list_count = 1;
-
- /* determine how many blocks in file to write */
- f_data_wrote = ADIOI_MIN(st_fwr_size, bufsize);
- total_blks_to_write = 1;
- if (j < (flat_file->count -1)) j++;
- else {
- j = 0;
- n_filetypes++;
- }
- while (f_data_wrote < bufsize) {
- f_data_wrote += flat_file->blocklens[j];
- total_blks_to_write++;
- if (j<(flat_file->count-1)) j++;
- else j = 0;
- }
-
- j = st_index;
- n_filetypes = st_n_filetypes;
- n_write_lists = total_blks_to_write/MAX_ARRAY_SIZE;
- extra_blks = total_blks_to_write%MAX_ARRAY_SIZE;
-
- mem_offsets = buf;
- mem_lengths = 0;
-
- /* if at least one full writelist, allocate file arrays
- at max array size and don't free until very end */
- if (n_write_lists) {
- file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
- sizeof(int64_t));
- file_lengths = (int32_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
- sizeof(int32_t));
- }
- /* if there's no full writelist allocate file arrays according
- to needed size (extra_blks) */
- else {
- file_offsets = (int64_t*)ADIOI_Malloc(extra_blks*
- sizeof(int64_t));
- file_lengths = (int32_t*)ADIOI_Malloc(extra_blks*
- sizeof(int32_t));
+ /* Fall back to list I/O if datatype I/O didn't work */
+ if (ret != 0)
+ {
+ fprintf(stderr,
+ "Falling back to list I/O since datatype I/O failed\n");
+ ret = ADIOI_PVFS2_WriteStridedListIO(fd, buf, count,
+ datatype, file_ptr_type,
+ offset, status, error_code);
}
-
- /* for file arrays that are of MAX_ARRAY_SIZE, build arrays */
- for (i=0; iindices[j];
- file_lengths[k] = flat_file->blocklens[j];
- mem_lengths += file_lengths[k];
- }
- if (j<(flat_file->count - 1)) j++;
- else {
- j = 0;
- n_filetypes++;
- }
- } /* for (k=0; kobject_ref, file_req, 0,
- mem_offsets, mem_req,
- &(pvfs_fs->credentials), &resp_io);
-#ifdef ADIOI_MPE_LOGGING
- MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
-#endif
- /* --BEGIN ERROR HANDLING-- */
- if (err_flag != 0) {
- *error_code = MPIO_Err_create_code(MPI_SUCCESS,
- MPIR_ERR_RECOVERABLE,
- myname, __LINE__,
- ADIOI_PVFS2_error_convert(err_flag),
- "Error in PVFS_sys_write", 0);
- goto error_state;
- }
- /* --END ERROR HANDLING-- */
- total_bytes_written += resp_io.total_completed;
-
- mem_offsets += mem_lengths;
- mem_lengths = 0;
- PVFS_Request_free(&file_req);
- PVFS_Request_free(&mem_req);
-
- } /* for (i=0; iindices[j];
- if (k == (extra_blks - 1)) {
- file_lengths[k] = bufsize - (int32_t) mem_lengths
- - (int32_t) mem_offsets + (int32_t) buf;
- }
- else file_lengths[k] = flat_file->blocklens[j];
- } /* if(i || k) */
- mem_lengths += file_lengths[k];
- if (j<(flat_file->count - 1)) j++;
- else {
- j = 0;
- n_filetypes++;
- }
- } /* for (k=0; kobject_ref, file_req, 0,
- mem_offsets, mem_req,
- &(pvfs_fs->credentials), &resp_io);
-#ifdef ADIOI_MPE_LOGGING
- MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
-#endif
- /* --BEGIN ERROR HANDLING-- */
- if (err_flag != 0) {
- *error_code = MPIO_Err_create_code(MPI_SUCCESS,
- MPIR_ERR_RECOVERABLE,
- myname, __LINE__,
- ADIOI_PVFS2_error_convert(err_flag),
- "Error in PVFS_sys_write", 0);
- goto error_state;
- }
- /* --END ERROR HANDLING-- */
- total_bytes_written += resp_io.total_completed;
- PVFS_Request_free(&mem_req);
- PVFS_Request_free(&file_req);
- }
- }
- else {
- /* noncontiguous in memory as well as in file */
-
- ADIOI_Flatten_datatype(datatype);
- flat_buf = ADIOI_Flatlist;
- while (flat_buf->type != datatype) flat_buf = flat_buf->next;
-
- size_wrote = 0;
- n_filetypes = st_n_filetypes;
- fwr_size = st_fwr_size;
- bwr_size = flat_buf->blocklens[0];
- buf_count = 0;
- start_mem_offset = 0;
- start_k = k = 0;
- start_j = st_index;
- max_mem_list = 0;
- max_file_list = 0;
-
- /* run through and file max_file_list and max_mem_list so that you
- can allocate the file and memory arrays less than MAX_ARRAY_SIZE
- if possible */
-
- while (size_wrote < bufsize) {
- k = start_k;
- new_buffer_write = 0;
- mem_list_count = 0;
- while ((mem_list_count < MAX_ARRAY_SIZE) &&
- (new_buffer_write < bufsize-size_wrote)) {
- /* find mem_list_count and file_list_count such that both are
- less than MAX_ARRAY_SIZE, the sum of their lengths are
- equal, and the sum of all the data written and data to be
- written in the next immediate write list is less than
- bufsize */
- if(mem_list_count) {
- if((new_buffer_write + flat_buf->blocklens[k] +
- size_wrote) > bufsize) {
- end_bwr_size = new_buffer_write +
- flat_buf->blocklens[k] - (bufsize - size_wrote);
- new_buffer_write = bufsize - size_wrote;
- }
- else {
- new_buffer_write += flat_buf->blocklens[k];
- end_bwr_size = flat_buf->blocklens[k];
- }
- }
- else {
- if (bwr_size > (bufsize - size_wrote)) {
- new_buffer_write = bufsize - size_wrote;
- bwr_size = new_buffer_write;
- }
- else new_buffer_write = bwr_size;
- }
- mem_list_count++;
- k = (k + 1)%flat_buf->count;
- } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
- (new_buffer_write < bufsize-size_wrote)) */
- j = start_j;
- new_file_write = 0;
- file_list_count = 0;
- while ((file_list_count < MAX_ARRAY_SIZE) &&
- (new_file_write < new_buffer_write)) {
- if(file_list_count) {
- if((new_file_write + flat_file->blocklens[j]) >
- new_buffer_write) {
- end_fwr_size = new_buffer_write - new_file_write;
- new_file_write = new_buffer_write;
- j--;
- }
- else {
- new_file_write += flat_file->blocklens[j];
- end_fwr_size = flat_file->blocklens[j];
- }
- }
- else {
- if (fwr_size > new_buffer_write) {
- new_file_write = new_buffer_write;
- fwr_size = new_file_write;
- }
- else new_file_write = fwr_size;
- }
- file_list_count++;
- if (j < (flat_file->count - 1)) j++;
- else j = 0;
-
- k = start_k;
- if ((new_file_write < new_buffer_write) &&
- (file_list_count == MAX_ARRAY_SIZE)) {
- new_buffer_write = 0;
- mem_list_count = 0;
- while (new_buffer_write < new_file_write) {
- if(mem_list_count) {
- if((new_buffer_write + flat_buf->blocklens[k]) >
- new_file_write) {
- end_bwr_size = new_file_write -
- new_buffer_write;
- new_buffer_write = new_file_write;
- k--;
- }
- else {
- new_buffer_write += flat_buf->blocklens[k];
- end_bwr_size = flat_buf->blocklens[k];
- }
- }
- else {
- new_buffer_write = bwr_size;
- if (bwr_size > (bufsize - size_wrote)) {
- new_buffer_write = bufsize - size_wrote;
- bwr_size = new_buffer_write;
- }
- }
- mem_list_count++;
- k = (k + 1)%flat_buf->count;
- } /* while (new_buffer_write < new_file_write) */
- } /* if ((new_file_write < new_buffer_write) &&
- (file_list_count == MAX_ARRAY_SIZE)) */
- } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
- (new_buffer_write < bufsize-size_wrote)) */
-
- /* fakes filling the writelist arrays of lengths found above */
- k = start_k;
- j = start_j;
- for (i=0; iblocklens[k] == end_bwr_size)
- bwr_size = flat_buf->blocklens[(k+1)%
- flat_buf->count];
- else {
- bwr_size = flat_buf->blocklens[k] - end_bwr_size;
- k--;
- buf_count--;
- }
- }
- }
- buf_count++;
- k = (k + 1)%flat_buf->count;
- } /* for (i=0; iblocklens[j] == end_fwr_size)
- fwr_size = flat_file->blocklens[(j+1)%
- flat_file->count];
- else {
- fwr_size = flat_file->blocklens[j] - end_fwr_size;
- j--;
- }
- }
- }
- if (j < flat_file->count - 1) j++;
- else {
- j = 0;
- n_filetypes++;
- }
- } /* for (i=0; iblocklens[0] ) ) ||
- ((mem_list_count == 1) &&
- (new_buffer_write < flat_buf->blocklens[0]) ) ||
- ((file_list_count == MAX_ARRAY_SIZE) &&
- (new_file_write < flat_buf->blocklens[0]) ) ||
- ( (mem_list_count == MAX_ARRAY_SIZE) &&
- (new_buffer_write < flat_file->blocklens[0])) )
- {
- ADIOI_Delete_flattened(datatype);
- ADIOI_GEN_WriteStrided_naive(fd, buf, count, datatype,
- file_ptr_type, initial_off, status, error_code);
- return;
- }
-
-
- mem_offsets = (PVFS_size*)ADIOI_Malloc(max_mem_list*sizeof(PVFS_size));
- mem_lengths = (int *)ADIOI_Malloc(max_mem_list*sizeof(int));
- file_offsets = (int64_t *)ADIOI_Malloc(max_file_list*sizeof(int64_t));
- file_lengths = (int32_t *)ADIOI_Malloc(max_file_list*sizeof(int32_t));
-
- size_wrote = 0;
- n_filetypes = st_n_filetypes;
- fwr_size = st_fwr_size;
- bwr_size = flat_buf->blocklens[0];
- buf_count = 0;
- start_mem_offset = 0;
- start_k = k = 0;
- start_j = st_index;
-
- /* this section calculates mem_list_count and file_list_count
- and also finds the possibly odd sized last array elements
- in new_fwr_size and new_bwr_size */
-
- while (size_wrote < bufsize) {
- k = start_k;
- new_buffer_write = 0;
- mem_list_count = 0;
- while ((mem_list_count < MAX_ARRAY_SIZE) &&
- (new_buffer_write < bufsize-size_wrote)) {
- /* find mem_list_count and file_list_count such that both are
- less than MAX_ARRAY_SIZE, the sum of their lengths are
- equal, and the sum of all the data written and data to be
- written in the next immediate write list is less than
- bufsize */
- if(mem_list_count) {
- if((new_buffer_write + flat_buf->blocklens[k] +
- size_wrote) > bufsize) {
- end_bwr_size = new_buffer_write +
- flat_buf->blocklens[k] - (bufsize - size_wrote);
- new_buffer_write = bufsize - size_wrote;
- }
- else {
- new_buffer_write += flat_buf->blocklens[k];
- end_bwr_size = flat_buf->blocklens[k];
- }
- }
- else {
- if (bwr_size > (bufsize - size_wrote)) {
- new_buffer_write = bufsize - size_wrote;
- bwr_size = new_buffer_write;
- }
- else new_buffer_write = bwr_size;
- }
- mem_list_count++;
- k = (k + 1)%flat_buf->count;
- } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
- (new_buffer_write < bufsize-size_wrote)) */
- j = start_j;
- new_file_write = 0;
- file_list_count = 0;
- while ((file_list_count < MAX_ARRAY_SIZE) &&
- (new_file_write < new_buffer_write)) {
- if(file_list_count) {
- if((new_file_write + flat_file->blocklens[j]) >
- new_buffer_write) {
- end_fwr_size = new_buffer_write - new_file_write;
- new_file_write = new_buffer_write;
- j--;
- }
- else {
- new_file_write += flat_file->blocklens[j];
- end_fwr_size = flat_file->blocklens[j];
- }
- }
- else {
- if (fwr_size > new_buffer_write) {
- new_file_write = new_buffer_write;
- fwr_size = new_file_write;
- }
- else new_file_write = fwr_size;
- }
- file_list_count++;
- if (j < (flat_file->count - 1)) j++;
- else j = 0;
-
- k = start_k;
- if ((new_file_write < new_buffer_write) &&
- (file_list_count == MAX_ARRAY_SIZE)) {
- new_buffer_write = 0;
- mem_list_count = 0;
- while (new_buffer_write < new_file_write) {
- if(mem_list_count) {
- if((new_buffer_write + flat_buf->blocklens[k]) >
- new_file_write) {
- end_bwr_size = new_file_write -
- new_buffer_write;
- new_buffer_write = new_file_write;
- k--;
- }
- else {
- new_buffer_write += flat_buf->blocklens[k];
- end_bwr_size = flat_buf->blocklens[k];
- }
- }
- else {
- new_buffer_write = bwr_size;
- if (bwr_size > (bufsize - size_wrote)) {
- new_buffer_write = bufsize - size_wrote;
- bwr_size = new_buffer_write;
- }
- }
- mem_list_count++;
- k = (k + 1)%flat_buf->count;
- } /* while (new_buffer_write < new_file_write) */
- } /* if ((new_file_write < new_buffer_write) &&
- (file_list_count == MAX_ARRAY_SIZE)) */
- } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
- (new_buffer_write < bufsize-size_wrote)) */
-
- /* fills the allocated writelist arrays */
- k = start_k;
- j = start_j;
- for (i=0; icount) +
- (int)flat_buf->indices[k]);
-
- if(!i) {
- mem_lengths[0] = bwr_size;
- mem_offsets[0] += flat_buf->blocklens[k] - bwr_size;
- }
- else {
- if (i == (mem_list_count - 1)) {
- mem_lengths[i] = end_bwr_size;
- if (flat_buf->blocklens[k] == end_bwr_size)
- bwr_size = flat_buf->blocklens[(k+1)%
- flat_buf->count];
- else {
- bwr_size = flat_buf->blocklens[k] - end_bwr_size;
- k--;
- buf_count--;
- }
- }
- else {
- mem_lengths[i] = flat_buf->blocklens[k];
- }
- }
- buf_count++;
- k = (k + 1)%flat_buf->count;
- } /* for (i=0; iindices[j] +
- ((ADIO_Offset)n_filetypes) * filetype_extent;
- if (!i) {
- file_lengths[0] = fwr_size;
- file_offsets[0] += flat_file->blocklens[j] - fwr_size;
- }
- else {
- if (i == (file_list_count - 1)) {
- file_lengths[i] = end_fwr_size;
- if (flat_file->blocklens[j] == end_fwr_size)
- fwr_size = flat_file->blocklens[(j+1)%
- flat_file->count];
- else {
- fwr_size = flat_file->blocklens[j] - end_fwr_size;
- j--;
- }
- }
- else file_lengths[i] = flat_file->blocklens[j];
- }
- if (j < flat_file->count - 1) j++;
- else {
- j = 0;
- n_filetypes++;
- }
- } /* for (i=0; iobject_ref, file_req, 0,
- PVFS_BOTTOM, mem_req,
- &(pvfs_fs->credentials), &resp_io);
-#ifdef ADIOI_MPE_LOGGING
- MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
-#endif
- /* --BEGIN ERROR HANDLING-- */
- if (err_flag != 0) {
- *error_code = MPIO_Err_create_code(MPI_SUCCESS,
- MPIR_ERR_RECOVERABLE,
- myname, __LINE__,
- ADIOI_PVFS2_error_convert(err_flag),
- "Error in PVFS_sys_write", 0);
- goto error_state;
- }
- /* --END ERROR HANDLING-- */
-
- size_wrote += new_buffer_write;
- total_bytes_written += resp_io.total_completed;
- start_k = k;
- start_j = j;
- PVFS_Request_free(&mem_req);
- PVFS_Request_free(&file_req);
- } /* while (size_wrote < bufsize) */
- ADIOI_Free(mem_offsets);
- ADIOI_Free(mem_lengths);
+ return;
}
- ADIOI_Free(file_offsets);
- ADIOI_Free(file_lengths);
-
- /* when incrementing fp_ind, need to also take into account the file type:
- * consider an N-element 1-d subarray with a lb and ub: ( |---xxxxx-----|
- * if we wrote N elements, offset needs to point at beginning of type, not
- * at empty region at offset N+1) */
- if (file_ptr_type == ADIO_INDIVIDUAL) {
- /* this is closer, but still incorrect for the cases where a small
- * amount of a file type is "leftover" after a write */
- fd->fp_ind = disp + flat_file->indices[j] +
- ((ADIO_Offset)n_filetypes)*filetype_extent;
+ /* Use list I/O in the base case */
+ if (fd->hints->fs_hints.pvfs2.listio_write == ADIOI_HINT_ENABLE) {
+ ret = ADIOI_PVFS2_WriteStridedListIO(fd, buf, count, datatype,
+ file_ptr_type, offset, status, error_code);
+ return;
}
- *error_code = MPI_SUCCESS;
-error_state:
- fd->fp_sys_posn = -1; /* set it to null. */
-
-#ifdef HAVE_STATUS_SET_BYTES
- MPIR_Status_set_bytes(status, datatype, bufsize);
-/* This is a temporary way of filling in status. The right way is to
- keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
-#endif
-
- if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
+ /* Use classic list I/O if no hints given base case */
+ ADIOI_PVFS2_OldWriteStrided(fd, buf, count, datatype,
+ file_ptr_type, offset, status, error_code);
+ return;
}
diff --git a/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_write_list_classic.c b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_write_list_classic.c
new file mode 100644
index 0000000000..413977eef3
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_pvfs2/ad_pvfs2_write_list_classic.c
@@ -0,0 +1,963 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*-
+ * vim: ts=8 sts=4 sw=4 noexpandtab
+ *
+ * Copyright (C) 2008 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ */
+
+#include "adio.h"
+#include "adio_extern.h"
+#include "ad_pvfs2.h"
+
+#include "ad_pvfs2_common.h"
+
+void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status,
+ int *error_code)
+{
+ /* as with all the other WriteStrided functions, offset is in units of
+ * etype relative to the filetype */
+
+ /* Since PVFS2 does not support file locking, can't do buffered writes
+ as on Unix */
+
+ ADIOI_Flatlist_node *flat_buf, *flat_file;
+ int i, j, k, bwr_size, fwr_size=0, st_index=0;
+ int bufsize, sum, n_etypes_in_filetype, size_in_filetype;
+ int n_filetypes, etype_in_filetype;
+ ADIO_Offset abs_off_in_filetype=0;
+ int filetype_size, etype_size, buftype_size;
+ MPI_Aint filetype_extent, buftype_extent;
+ int buf_count, buftype_is_contig, filetype_is_contig;
+ ADIO_Offset off, disp, start_off, initial_off;
+ int flag, st_fwr_size, st_n_filetypes;
+ int err_flag=0;
+
+ int mem_list_count, file_list_count;
+ PVFS_size * mem_offsets;
+ int64_t *file_offsets;
+ int *mem_lengths;
+ int32_t *file_lengths;
+ int total_blks_to_write;
+
+ int max_mem_list, max_file_list;
+
+ int b_blks_wrote;
+ int f_data_wrote;
+ int size_wrote=0, n_write_lists, extra_blks;
+
+ int end_bwr_size, end_fwr_size;
+ int start_k, start_j, new_file_write, new_buffer_write;
+ int start_mem_offset;
+ PVFS_Request mem_req, file_req;
+ ADIOI_PVFS2_fs * pvfs_fs;
+ PVFS_sysresp_io resp_io;
+ MPI_Offset total_bytes_written=0;
+ static char myname[] = "ADIOI_PVFS2_WRITESTRIDED";
+
+ /* note: don't increase this: several parts of PVFS2 now
+ * assume this limit*/
+#define MAX_ARRAY_SIZE 64
+
+ /* --BEGIN ERROR HANDLING-- */
+ if (fd->atomicity) {
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ MPI_ERR_ARG,
+ "Atomic noncontiguous writes are not supported by PVFS2", 0);
+ return;
+ }
+ /* --END ERROR HANDLING-- */
+
+ ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
+ ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
+
+ /* the HDF5 tests showed a bug in this list processing code (see many many
+ * lines down below). We added a workaround, but common HDF5 file types
+ * are actually contiguous and do not need the expensive workarond */
+ if (!filetype_is_contig) {
+ flat_file = ADIOI_Flatlist;
+ while (flat_file->type != fd->filetype) flat_file = flat_file->next;
+ if (flat_file->count == 1 && !buftype_is_contig)
+ filetype_is_contig = 1;
+ }
+
+ MPI_Type_size(fd->filetype, &filetype_size);
+ if ( ! filetype_size ) {
+ *error_code = MPI_SUCCESS;
+ return;
+ }
+
+ MPI_Type_extent(fd->filetype, &filetype_extent);
+ MPI_Type_size(datatype, &buftype_size);
+ MPI_Type_extent(datatype, &buftype_extent);
+ etype_size = fd->etype_size;
+
+ bufsize = buftype_size * count;
+
+ pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr;
+
+ if (!buftype_is_contig && filetype_is_contig) {
+
+/* noncontiguous in memory, contiguous in file. */
+ int64_t file_offsets;
+ int32_t file_lengths;
+
+ ADIOI_Flatten_datatype(datatype);
+ flat_buf = ADIOI_Flatlist;
+ while (flat_buf->type != datatype) flat_buf = flat_buf->next;
+
+ if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
+ off = fd->disp + etype_size * offset;
+ }
+ else off = fd->fp_ind;
+
+ file_list_count = 1;
+ file_offsets = off;
+ file_lengths = 0;
+ total_blks_to_write = count*flat_buf->count;
+ b_blks_wrote = 0;
+
+ /* allocate arrays according to max usage */
+ if (total_blks_to_write > MAX_ARRAY_SIZE)
+ mem_list_count = MAX_ARRAY_SIZE;
+ else mem_list_count = total_blks_to_write;
+ mem_offsets = (PVFS_size*)ADIOI_Malloc(mem_list_count*sizeof(PVFS_size));
+ mem_lengths = (int*)ADIOI_Malloc(mem_list_count*sizeof(int));
+
+ j = 0;
+ /* step through each block in memory, filling memory arrays */
+ while (b_blks_wrote < total_blks_to_write) {
+ for (i=0; icount; i++) {
+ mem_offsets[b_blks_wrote % MAX_ARRAY_SIZE] =
+ /* TODO: fix this warning by casting to an integer that's
+ * the same size as a char * and /then/ casting to
+ * PVFS_size */
+ ((PVFS_size)buf + j*buftype_extent + flat_buf->indices[i]);
+ mem_lengths[b_blks_wrote % MAX_ARRAY_SIZE] =
+ flat_buf->blocklens[i];
+ file_lengths += flat_buf->blocklens[i];
+ b_blks_wrote++;
+ if (!(b_blks_wrote % MAX_ARRAY_SIZE) ||
+ (b_blks_wrote == total_blks_to_write)) {
+
+ /* in the case of the last write list call,
+ adjust mem_list_count */
+ if (b_blks_wrote == total_blks_to_write) {
+ mem_list_count = total_blks_to_write % MAX_ARRAY_SIZE;
+ /* in case last write list call fills max arrays */
+ if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE;
+ }
+ err_flag = PVFS_Request_hindexed(mem_list_count,
+ mem_lengths, mem_offsets,
+ PVFS_BYTE, &mem_req);
+ /* --BEGIN ERROR HANDLING-- */
+ if (err_flag != 0) {
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ ADIOI_PVFS2_error_convert(err_flag),
+ "Error in PVFS_Request_hindexed (memory)", 0);
+ break;
+ }
+ /* --END ERROR HANDLING-- */
+
+ err_flag = PVFS_Request_contiguous(file_lengths,
+ PVFS_BYTE, &file_req);
+ /* --BEGIN ERROR HANDLING-- */
+ if (err_flag != 0) {
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ ADIOI_PVFS2_error_convert(err_flag),
+ "Error in PVFS_Request_contiguous (file)", 0);
+ break;
+ }
+ /* --END ERROR HANDLING-- */
+
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
+#endif
+ err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req,
+ file_offsets, PVFS_BOTTOM,
+ mem_req,
+ &(pvfs_fs->credentials),
+ &resp_io);
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
+#endif
+ total_bytes_written += resp_io.total_completed;
+
+ /* in the case of error or the last write list call,
+ * leave here */
+ /* --BEGIN ERROR HANDLING-- */
+ if (err_flag) {
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ ADIOI_PVFS2_error_convert(err_flag),
+ "Error in PVFS_sys_write", 0);
+ break;
+ }
+ /* --END ERROR HANDLING-- */
+ if (b_blks_wrote == total_blks_to_write) break;
+
+ file_offsets += file_lengths;
+ file_lengths = 0;
+ PVFS_Request_free(&mem_req);
+ PVFS_Request_free(&file_req);
+ }
+ } /* for (i=0; icount; i++) */
+ j++;
+ } /* while (b_blks_wrote < total_blks_to_write) */
+ ADIOI_Free(mem_offsets);
+ ADIOI_Free(mem_lengths);
+
+ if (file_ptr_type == ADIO_INDIVIDUAL)
+ fd->fp_ind += total_bytes_written;
+
+ if (!err_flag) *error_code = MPI_SUCCESS;
+
+ fd->fp_sys_posn = -1; /* clear this. */
+
+#ifdef HAVE_STATUS_SET_BYTES
+ MPIR_Status_set_bytes(status, datatype, bufsize);
+/* This is a temporary way of filling in status. The right way is to
+ keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
+#endif
+
+ ADIOI_Delete_flattened(datatype);
+ return;
+ } /* if (!buftype_is_contig && filetype_is_contig) */
+
+ /* already know that file is noncontiguous from above */
+ /* noncontiguous in file */
+
+/* filetype already flattened in ADIO_Open */
+ flat_file = ADIOI_Flatlist;
+ while (flat_file->type != fd->filetype) flat_file = flat_file->next;
+
+ disp = fd->disp;
+ initial_off = offset;
+
+ /* for each case - ADIO_Individual pointer or explicit, find offset
+ (file offset in bytes), n_filetypes (how many filetypes into file
+ to start), fwr_size (remaining amount of data in present file
+ block), and st_index (start point in terms of blocks in starting
+ filetype) */
+ if (file_ptr_type == ADIO_INDIVIDUAL) {
+ offset = fd->fp_ind; /* in bytes */
+ n_filetypes = -1;
+ flag = 0;
+ while (!flag) {
+ n_filetypes++;
+ for (i=0; icount; i++) {
+ if (disp + flat_file->indices[i] +
+ ((ADIO_Offset) n_filetypes)*filetype_extent +
+ flat_file->blocklens[i] >= offset) {
+ st_index = i;
+ fwr_size = disp + flat_file->indices[i] +
+ ((ADIO_Offset) n_filetypes)*filetype_extent
+ + flat_file->blocklens[i] - offset;
+ flag = 1;
+ break;
+ }
+ }
+ } /* while (!flag) */
+ } /* if (file_ptr_type == ADIO_INDIVIDUAL) */
+ else {
+ n_etypes_in_filetype = filetype_size/etype_size;
+ n_filetypes = (int) (offset / n_etypes_in_filetype);
+ etype_in_filetype = (int) (offset % n_etypes_in_filetype);
+ size_in_filetype = etype_in_filetype * etype_size;
+
+ sum = 0;
+ for (i=0; icount; i++) {
+ sum += flat_file->blocklens[i];
+ if (sum > size_in_filetype) {
+ st_index = i;
+ fwr_size = sum - size_in_filetype;
+ abs_off_in_filetype = flat_file->indices[i] +
+ size_in_filetype - (sum - flat_file->blocklens[i]);
+ break;
+ }
+ }
+
+ /* abs. offset in bytes in the file */
+ offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent +
+ abs_off_in_filetype;
+ } /* else [file_ptr_type != ADIO_INDIVIDUAL] */
+
+ start_off = offset;
+ st_fwr_size = fwr_size;
+ st_n_filetypes = n_filetypes;
+
+ if (buftype_is_contig && !filetype_is_contig) {
+
+/* contiguous in memory, noncontiguous in file. should be the most
+ common case. */
+
+ int mem_lengths;
+ char *mem_offsets;
+
+ i = 0;
+ j = st_index;
+ off = offset;
+ n_filetypes = st_n_filetypes;
+
+ mem_list_count = 1;
+
+ /* determine how many blocks in file to write */
+ f_data_wrote = ADIOI_MIN(st_fwr_size, bufsize);
+ total_blks_to_write = 1;
+ if (j < (flat_file->count -1)) j++;
+ else {
+ j = 0;
+ n_filetypes++;
+ }
+ while (f_data_wrote < bufsize) {
+ f_data_wrote += flat_file->blocklens[j];
+ total_blks_to_write++;
+ if (j<(flat_file->count-1)) j++;
+ else j = 0;
+ }
+
+ j = st_index;
+ n_filetypes = st_n_filetypes;
+ n_write_lists = total_blks_to_write/MAX_ARRAY_SIZE;
+ extra_blks = total_blks_to_write%MAX_ARRAY_SIZE;
+
+ mem_offsets = buf;
+ mem_lengths = 0;
+
+ /* if at least one full writelist, allocate file arrays
+ at max array size and don't free until very end */
+ if (n_write_lists) {
+ file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
+ sizeof(int64_t));
+ file_lengths = (int32_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
+ sizeof(int32_t));
+ }
+ /* if there's no full writelist allocate file arrays according
+ to needed size (extra_blks) */
+ else {
+ file_offsets = (int64_t*)ADIOI_Malloc(extra_blks*
+ sizeof(int64_t));
+ file_lengths = (int32_t*)ADIOI_Malloc(extra_blks*
+ sizeof(int32_t));
+ }
+
+ /* for file arrays that are of MAX_ARRAY_SIZE, build arrays */
+ for (i=0; iindices[j];
+ file_lengths[k] = flat_file->blocklens[j];
+ mem_lengths += file_lengths[k];
+ }
+ if (j<(flat_file->count - 1)) j++;
+ else {
+ j = 0;
+ n_filetypes++;
+ }
+ } /* for (k=0; kobject_ref, file_req, 0,
+ mem_offsets, mem_req,
+ &(pvfs_fs->credentials), &resp_io);
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
+#endif
+ /* --BEGIN ERROR HANDLING-- */
+ if (err_flag != 0) {
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ ADIOI_PVFS2_error_convert(err_flag),
+ "Error in PVFS_sys_write", 0);
+ goto error_state;
+ }
+ /* --END ERROR HANDLING-- */
+ total_bytes_written += resp_io.total_completed;
+
+ mem_offsets += mem_lengths;
+ mem_lengths = 0;
+ PVFS_Request_free(&file_req);
+ PVFS_Request_free(&mem_req);
+
+ } /* for (i=0; iindices[j];
+ if (k == (extra_blks - 1)) {
+ file_lengths[k] = bufsize - (int32_t) mem_lengths
+ - (int32_t) mem_offsets + (int32_t) buf;
+ }
+ else file_lengths[k] = flat_file->blocklens[j];
+ } /* if(i || k) */
+ mem_lengths += file_lengths[k];
+ if (j<(flat_file->count - 1)) j++;
+ else {
+ j = 0;
+ n_filetypes++;
+ }
+ } /* for (k=0; kobject_ref, file_req, 0,
+ mem_offsets, mem_req,
+ &(pvfs_fs->credentials), &resp_io);
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
+#endif
+ /* --BEGIN ERROR HANDLING-- */
+ if (err_flag != 0) {
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ ADIOI_PVFS2_error_convert(err_flag),
+ "Error in PVFS_sys_write", 0);
+ goto error_state;
+ }
+ /* --END ERROR HANDLING-- */
+ total_bytes_written += resp_io.total_completed;
+ PVFS_Request_free(&mem_req);
+ PVFS_Request_free(&file_req);
+ }
+ }
+ else {
+ /* noncontiguous in memory as well as in file */
+
+ ADIOI_Flatten_datatype(datatype);
+ flat_buf = ADIOI_Flatlist;
+ while (flat_buf->type != datatype) flat_buf = flat_buf->next;
+
+ size_wrote = 0;
+ n_filetypes = st_n_filetypes;
+ fwr_size = st_fwr_size;
+ bwr_size = flat_buf->blocklens[0];
+ buf_count = 0;
+ start_mem_offset = 0;
+ start_k = k = 0;
+ start_j = st_index;
+ max_mem_list = 0;
+ max_file_list = 0;
+
+ /* run through and file max_file_list and max_mem_list so that you
+ can allocate the file and memory arrays less than MAX_ARRAY_SIZE
+ if possible */
+
+ while (size_wrote < bufsize) {
+ k = start_k;
+ new_buffer_write = 0;
+ mem_list_count = 0;
+ while ((mem_list_count < MAX_ARRAY_SIZE) &&
+ (new_buffer_write < bufsize-size_wrote)) {
+ /* find mem_list_count and file_list_count such that both are
+ less than MAX_ARRAY_SIZE, the sum of their lengths are
+ equal, and the sum of all the data written and data to be
+ written in the next immediate write list is less than
+ bufsize */
+ if(mem_list_count) {
+ if((new_buffer_write + flat_buf->blocklens[k] +
+ size_wrote) > bufsize) {
+ end_bwr_size = new_buffer_write +
+ flat_buf->blocklens[k] - (bufsize - size_wrote);
+ new_buffer_write = bufsize - size_wrote;
+ }
+ else {
+ new_buffer_write += flat_buf->blocklens[k];
+ end_bwr_size = flat_buf->blocklens[k];
+ }
+ }
+ else {
+ if (bwr_size > (bufsize - size_wrote)) {
+ new_buffer_write = bufsize - size_wrote;
+ bwr_size = new_buffer_write;
+ }
+ else new_buffer_write = bwr_size;
+ }
+ mem_list_count++;
+ k = (k + 1)%flat_buf->count;
+ } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
+ (new_buffer_write < bufsize-size_wrote)) */
+ j = start_j;
+ new_file_write = 0;
+ file_list_count = 0;
+ while ((file_list_count < MAX_ARRAY_SIZE) &&
+ (new_file_write < new_buffer_write)) {
+ if(file_list_count) {
+ if((new_file_write + flat_file->blocklens[j]) >
+ new_buffer_write) {
+ end_fwr_size = new_buffer_write - new_file_write;
+ new_file_write = new_buffer_write;
+ j--;
+ }
+ else {
+ new_file_write += flat_file->blocklens[j];
+ end_fwr_size = flat_file->blocklens[j];
+ }
+ }
+ else {
+ if (fwr_size > new_buffer_write) {
+ new_file_write = new_buffer_write;
+ fwr_size = new_file_write;
+ }
+ else new_file_write = fwr_size;
+ }
+ file_list_count++;
+ if (j < (flat_file->count - 1)) j++;
+ else j = 0;
+
+ k = start_k;
+ if ((new_file_write < new_buffer_write) &&
+ (file_list_count == MAX_ARRAY_SIZE)) {
+ new_buffer_write = 0;
+ mem_list_count = 0;
+ while (new_buffer_write < new_file_write) {
+ if(mem_list_count) {
+ if((new_buffer_write + flat_buf->blocklens[k]) >
+ new_file_write) {
+ end_bwr_size = new_file_write -
+ new_buffer_write;
+ new_buffer_write = new_file_write;
+ k--;
+ }
+ else {
+ new_buffer_write += flat_buf->blocklens[k];
+ end_bwr_size = flat_buf->blocklens[k];
+ }
+ }
+ else {
+ new_buffer_write = bwr_size;
+ if (bwr_size > (bufsize - size_wrote)) {
+ new_buffer_write = bufsize - size_wrote;
+ bwr_size = new_buffer_write;
+ }
+ }
+ mem_list_count++;
+ k = (k + 1)%flat_buf->count;
+ } /* while (new_buffer_write < new_file_write) */
+ } /* if ((new_file_write < new_buffer_write) &&
+ (file_list_count == MAX_ARRAY_SIZE)) */
+ } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
+ (new_buffer_write < bufsize-size_wrote)) */
+
+ /* fakes filling the writelist arrays of lengths found above */
+ k = start_k;
+ j = start_j;
+ for (i=0; iblocklens[k] == end_bwr_size)
+ bwr_size = flat_buf->blocklens[(k+1)%
+ flat_buf->count];
+ else {
+ bwr_size = flat_buf->blocklens[k] - end_bwr_size;
+ k--;
+ buf_count--;
+ }
+ }
+ }
+ buf_count++;
+ k = (k + 1)%flat_buf->count;
+ } /* for (i=0; iblocklens[j] == end_fwr_size)
+ fwr_size = flat_file->blocklens[(j+1)%
+ flat_file->count];
+ else {
+ fwr_size = flat_file->blocklens[j] - end_fwr_size;
+ j--;
+ }
+ }
+ }
+ if (j < flat_file->count - 1) j++;
+ else {
+ j = 0;
+ n_filetypes++;
+ }
+ } /* for (i=0; iblocklens[0] ) ) ||
+ ((mem_list_count == 1) &&
+ (new_buffer_write < flat_buf->blocklens[0]) ) ||
+ ((file_list_count == MAX_ARRAY_SIZE) &&
+ (new_file_write < flat_buf->blocklens[0]) ) ||
+ ( (mem_list_count == MAX_ARRAY_SIZE) &&
+ (new_buffer_write < flat_file->blocklens[0])) )
+ {
+ ADIOI_Delete_flattened(datatype);
+ ADIOI_GEN_WriteStrided_naive(fd, buf, count, datatype,
+ file_ptr_type, initial_off, status, error_code);
+ return;
+ }
+
+
+ mem_offsets = (PVFS_size*)ADIOI_Malloc(max_mem_list*sizeof(PVFS_size));
+ mem_lengths = (int *)ADIOI_Malloc(max_mem_list*sizeof(int));
+ file_offsets = (int64_t *)ADIOI_Malloc(max_file_list*sizeof(int64_t));
+ file_lengths = (int32_t *)ADIOI_Malloc(max_file_list*sizeof(int32_t));
+
+ size_wrote = 0;
+ n_filetypes = st_n_filetypes;
+ fwr_size = st_fwr_size;
+ bwr_size = flat_buf->blocklens[0];
+ buf_count = 0;
+ start_mem_offset = 0;
+ start_k = k = 0;
+ start_j = st_index;
+
+ /* this section calculates mem_list_count and file_list_count
+ and also finds the possibly odd sized last array elements
+ in new_fwr_size and new_bwr_size */
+
+ while (size_wrote < bufsize) {
+ k = start_k;
+ new_buffer_write = 0;
+ mem_list_count = 0;
+ while ((mem_list_count < MAX_ARRAY_SIZE) &&
+ (new_buffer_write < bufsize-size_wrote)) {
+ /* find mem_list_count and file_list_count such that both are
+ less than MAX_ARRAY_SIZE, the sum of their lengths are
+ equal, and the sum of all the data written and data to be
+ written in the next immediate write list is less than
+ bufsize */
+ if(mem_list_count) {
+ if((new_buffer_write + flat_buf->blocklens[k] +
+ size_wrote) > bufsize) {
+ end_bwr_size = new_buffer_write +
+ flat_buf->blocklens[k] - (bufsize - size_wrote);
+ new_buffer_write = bufsize - size_wrote;
+ }
+ else {
+ new_buffer_write += flat_buf->blocklens[k];
+ end_bwr_size = flat_buf->blocklens[k];
+ }
+ }
+ else {
+ if (bwr_size > (bufsize - size_wrote)) {
+ new_buffer_write = bufsize - size_wrote;
+ bwr_size = new_buffer_write;
+ }
+ else new_buffer_write = bwr_size;
+ }
+ mem_list_count++;
+ k = (k + 1)%flat_buf->count;
+ } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
+ (new_buffer_write < bufsize-size_wrote)) */
+ j = start_j;
+ new_file_write = 0;
+ file_list_count = 0;
+ while ((file_list_count < MAX_ARRAY_SIZE) &&
+ (new_file_write < new_buffer_write)) {
+ if(file_list_count) {
+ if((new_file_write + flat_file->blocklens[j]) >
+ new_buffer_write) {
+ end_fwr_size = new_buffer_write - new_file_write;
+ new_file_write = new_buffer_write;
+ j--;
+ }
+ else {
+ new_file_write += flat_file->blocklens[j];
+ end_fwr_size = flat_file->blocklens[j];
+ }
+ }
+ else {
+ if (fwr_size > new_buffer_write) {
+ new_file_write = new_buffer_write;
+ fwr_size = new_file_write;
+ }
+ else new_file_write = fwr_size;
+ }
+ file_list_count++;
+ if (j < (flat_file->count - 1)) j++;
+ else j = 0;
+
+ k = start_k;
+ if ((new_file_write < new_buffer_write) &&
+ (file_list_count == MAX_ARRAY_SIZE)) {
+ new_buffer_write = 0;
+ mem_list_count = 0;
+ while (new_buffer_write < new_file_write) {
+ if(mem_list_count) {
+ if((new_buffer_write + flat_buf->blocklens[k]) >
+ new_file_write) {
+ end_bwr_size = new_file_write -
+ new_buffer_write;
+ new_buffer_write = new_file_write;
+ k--;
+ }
+ else {
+ new_buffer_write += flat_buf->blocklens[k];
+ end_bwr_size = flat_buf->blocklens[k];
+ }
+ }
+ else {
+ new_buffer_write = bwr_size;
+ if (bwr_size > (bufsize - size_wrote)) {
+ new_buffer_write = bufsize - size_wrote;
+ bwr_size = new_buffer_write;
+ }
+ }
+ mem_list_count++;
+ k = (k + 1)%flat_buf->count;
+ } /* while (new_buffer_write < new_file_write) */
+ } /* if ((new_file_write < new_buffer_write) &&
+ (file_list_count == MAX_ARRAY_SIZE)) */
+ } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
+ (new_buffer_write < bufsize-size_wrote)) */
+
+ /* fills the allocated writelist arrays */
+ k = start_k;
+ j = start_j;
+ for (i=0; icount) +
+ (int)flat_buf->indices[k]);
+
+ if(!i) {
+ mem_lengths[0] = bwr_size;
+ mem_offsets[0] += flat_buf->blocklens[k] - bwr_size;
+ }
+ else {
+ if (i == (mem_list_count - 1)) {
+ mem_lengths[i] = end_bwr_size;
+ if (flat_buf->blocklens[k] == end_bwr_size)
+ bwr_size = flat_buf->blocklens[(k+1)%
+ flat_buf->count];
+ else {
+ bwr_size = flat_buf->blocklens[k] - end_bwr_size;
+ k--;
+ buf_count--;
+ }
+ }
+ else {
+ mem_lengths[i] = flat_buf->blocklens[k];
+ }
+ }
+ buf_count++;
+ k = (k + 1)%flat_buf->count;
+ } /* for (i=0; iindices[j] +
+ ((ADIO_Offset)n_filetypes) * filetype_extent;
+ if (!i) {
+ file_lengths[0] = fwr_size;
+ file_offsets[0] += flat_file->blocklens[j] - fwr_size;
+ }
+ else {
+ if (i == (file_list_count - 1)) {
+ file_lengths[i] = end_fwr_size;
+ if (flat_file->blocklens[j] == end_fwr_size)
+ fwr_size = flat_file->blocklens[(j+1)%
+ flat_file->count];
+ else {
+ fwr_size = flat_file->blocklens[j] - end_fwr_size;
+ j--;
+ }
+ }
+ else file_lengths[i] = flat_file->blocklens[j];
+ }
+ if (j < flat_file->count - 1) j++;
+ else {
+ j = 0;
+ n_filetypes++;
+ }
+ } /* for (i=0; iobject_ref, file_req, 0,
+ PVFS_BOTTOM, mem_req,
+ &(pvfs_fs->credentials), &resp_io);
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
+#endif
+ /* --BEGIN ERROR HANDLING-- */
+ if (err_flag != 0) {
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ ADIOI_PVFS2_error_convert(err_flag),
+ "Error in PVFS_sys_write", 0);
+ goto error_state;
+ }
+ /* --END ERROR HANDLING-- */
+
+ size_wrote += new_buffer_write;
+ total_bytes_written += resp_io.total_completed;
+ start_k = k;
+ start_j = j;
+ PVFS_Request_free(&mem_req);
+ PVFS_Request_free(&file_req);
+ } /* while (size_wrote < bufsize) */
+ ADIOI_Free(mem_offsets);
+ ADIOI_Free(mem_lengths);
+ }
+ /* when incrementing fp_ind, need to also take into account the file type:
+ * consider an N-element 1-d subarray with a lb and ub: ( |---xxxxx-----|
+ * if we wrote N elements, offset needs to point at beginning of type, not
+ * at empty region at offset N+1).
+ *
+ * As we discussed on mpich-discuss in may/june 2009, the code below might
+ * look wierd, but by putting fp_ind at the last byte written, the next
+ * time we run through the strided code we'll update the fp_ind to the
+ * right location. */
+ if (file_ptr_type == ADIO_INDIVIDUAL) {
+ fd->fp_ind = file_offsets[file_list_count-1]+
+ file_lengths[file_list_count-1];
+ }
+ ADIOI_Free(file_offsets);
+ ADIOI_Free(file_lengths);
+
+ *error_code = MPI_SUCCESS;
+
+error_state:
+ fd->fp_sys_posn = -1; /* set it to null. */
+
+#ifdef HAVE_STATUS_SET_BYTES
+ MPIR_Status_set_bytes(status, datatype, bufsize);
+/* This is a temporary way of filling in status. The right way is to
+ keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
+#endif
+
+ if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
+}
diff --git a/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs.c b/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs.c
index 29c6835345..36286c7a6e 100644
--- a/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs.c
+++ b/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs.c
@@ -12,6 +12,7 @@
struct ADIOI_Fns_struct ADIO_TESTFS_operations = {
ADIOI_TESTFS_Open, /* Open */
+ ADIOI_GEN_OpenColl, /* OpenColl */
ADIOI_TESTFS_ReadContig, /* ReadContig */
ADIOI_TESTFS_WriteContig, /* WriteContig */
ADIOI_TESTFS_ReadStridedColl, /* ReadStridedColl */
@@ -33,4 +34,5 @@ struct ADIOI_Fns_struct ADIO_TESTFS_operations = {
ADIOI_TESTFS_Flush, /* Flush */
ADIOI_TESTFS_Resize, /* Resize */
ADIOI_TESTFS_Delete, /* Delete */
+ ADIOI_GEN_Feature, /* Features */
};
diff --git a/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_hints.c b/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_hints.c
index 9c72bcda7d..a7b1b7540f 100644
--- a/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_hints.c
+++ b/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_hints.c
@@ -7,7 +7,9 @@
#include "ad_testfs.h"
#include "adioi.h"
-
+#ifdef ROMIO_BGL
+#include "../ad_bgl/ad_bgl.h"
+#endif
void ADIOI_TESTFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
{
int myrank, nprocs;
@@ -21,5 +23,10 @@ void ADIOI_TESTFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
FPRINTF(stdout, "[%d/%d] calling ADIOI_GEN_SetInfo\n",
myrank, nprocs);
+#ifdef ROMIO_BGL /* BlueGene support for pvfs through ufs */
+ /* BlueGene hack: force testfs to mimic BlueGene hints */
+ ADIOI_BGL_SetInfo(fd, users_info, error_code);
+#else
ADIOI_GEN_SetInfo(fd, users_info, error_code);
+#endif
}
diff --git a/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_read.c b/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_read.c
index 5966f81dfe..bf911d460c 100644
--- a/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_read.c
+++ b/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_read.c
@@ -26,10 +26,6 @@ void ADIOI_TESTFS_ReadContig(ADIO_File fd, void *buf, int count,
offset = fd->fp_ind;
fd->fp_ind += datatype_size * count;
fd->fp_sys_posn = fd->fp_ind;
-#if 0
- FPRINTF(stdout, "[%d/%d] new file position is %lld\n", myrank,
- nprocs, (long long) fd->fp_ind);
-#endif
}
else {
fd->fp_sys_posn = offset + datatype_size * count;
diff --git a/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_seek.c b/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_seek.c
index 93bf3757f4..4b23bc7940 100644
--- a/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_seek.c
+++ b/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_seek.c
@@ -26,8 +26,8 @@ ADIO_Offset ADIOI_TESTFS_SeekIndividual(ADIO_File fd, ADIO_Offset offset,
ADIO_Offset off;
ADIOI_Flatlist_node *flat_file;
int i, n_etypes_in_filetype, n_filetypes, etype_in_filetype;
- ADIO_Offset abs_off_in_filetype=0;
- int size_in_filetype, sum;
+ ADIO_Offset abs_off_in_filetype=0, sum;
+ int size_in_filetype;
int filetype_size, etype_size, filetype_is_contig;
MPI_Aint filetype_extent;
@@ -54,6 +54,7 @@ ADIO_Offset ADIOI_TESTFS_SeekIndividual(ADIO_File fd, ADIO_Offset offset,
}
n_etypes_in_filetype = filetype_size/etype_size;
+ ADIOI_Assert((offset / n_etypes_in_filetype) == (int) (offset / n_etypes_in_filetype));
n_filetypes = (int) (offset / n_etypes_in_filetype);
etype_in_filetype = (int) (offset % n_etypes_in_filetype);
size_in_filetype = etype_in_filetype * etype_size;
@@ -70,7 +71,7 @@ ADIO_Offset ADIOI_TESTFS_SeekIndividual(ADIO_File fd, ADIO_Offset offset,
}
/* abs. offset in bytes in the file */
- off = fd->disp + (ADIO_Offset) n_filetypes * filetype_extent +
+ off = fd->disp + (ADIO_Offset)n_filetypes * (ADIO_Offset)filetype_extent +
abs_off_in_filetype;
}
diff --git a/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_write.c b/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_write.c
index 3306b6edcf..c3eea50f60 100644
--- a/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_write.c
+++ b/ompi/mca/io/romio/romio/adio/ad_testfs/ad_testfs_write.c
@@ -23,7 +23,7 @@ void ADIOI_TESTFS_WriteContig(ADIO_File fd, void *buf, int count,
nprocs, fd->filename);
FPRINTF(stdout, "[%d/%d] writing (buf = %p, loc = %lld, sz = %lld)\n",
myrank, nprocs, buf, (long long) offset,
- (long long) datatype_size * count);
+ (long long)datatype_size * (long long)count);
if (file_ptr_type != ADIO_EXPLICIT_OFFSET)
{
diff --git a/ompi/mca/io/romio/romio/adio/ad_ufs/ad_ufs.c b/ompi/mca/io/romio/romio/adio/ad_ufs/ad_ufs.c
index ce7bd03094..014222a508 100644
--- a/ompi/mca/io/romio/romio/adio/ad_ufs/ad_ufs.c
+++ b/ompi/mca/io/romio/romio/adio/ad_ufs/ad_ufs.c
@@ -12,6 +12,7 @@
struct ADIOI_Fns_struct ADIO_UFS_operations = {
ADIOI_UFS_Open, /* Open */
+ ADIOI_GEN_OpenColl, /* OpenColl */
ADIOI_GEN_ReadContig, /* ReadContig */
ADIOI_GEN_WriteContig, /* WriteContig */
ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
@@ -38,4 +39,5 @@ struct ADIOI_Fns_struct ADIO_UFS_operations = {
ADIOI_GEN_Flush, /* Flush */
ADIOI_GEN_Resize, /* Resize */
ADIOI_GEN_Delete, /* Delete */
+ ADIOI_GEN_Feature, /* Features */
};
diff --git a/ompi/mca/io/romio/romio/adio/ad_xfs/Makefile.am b/ompi/mca/io/romio/romio/adio/ad_xfs/Makefile.am
index 8541ee271d..b077408789 100644
--- a/ompi/mca/io/romio/romio/adio/ad_xfs/Makefile.am
+++ b/ompi/mca/io/romio/romio/adio/ad_xfs/Makefile.am
@@ -22,13 +22,9 @@ noinst_LTLIBRARIES = libadio_xfs.la
libadio_xfs_la_SOURCES = \
ad_xfs.c \
ad_xfs.h \
- ad_xfs_done.c \
ad_xfs_fcntl.c \
ad_xfs_hints.c \
- ad_xfs_iread.c \
- ad_xfs_iwrite.c \
ad_xfs_open.c \
ad_xfs_read.c \
ad_xfs_resize.c \
- ad_xfs_wait.c \
ad_xfs_write.c
diff --git a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs.c b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs.c
index c04ef8fd4f..07730aa2d7 100644
--- a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs.c
+++ b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs.c
@@ -12,6 +12,7 @@
struct ADIOI_Fns_struct ADIO_XFS_operations = {
ADIOI_XFS_Open, /* Open */
+ ADIOI_GEN_OpenColl, /* OpenColl */
ADIOI_XFS_ReadContig, /* ReadContig */
ADIOI_XFS_WriteContig, /* WriteContig */
ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
@@ -22,15 +23,21 @@ struct ADIOI_Fns_struct ADIO_XFS_operations = {
ADIOI_GEN_ReadStrided, /* ReadStrided */
ADIOI_GEN_WriteStrided, /* WriteStrided */
ADIOI_GEN_Close, /* Close */
- ADIOI_XFS_IreadContig, /* IreadContig */
- ADIOI_XFS_IwriteContig, /* IwriteContig */
- ADIOI_XFS_ReadDone, /* ReadDone */
- ADIOI_XFS_WriteDone, /* WriteDone */
- ADIOI_XFS_ReadComplete, /* ReadComplete */
- ADIOI_XFS_WriteComplete, /* WriteComplete */
+#if defined(ROMIO_HAVE_WORKING_AIO)
+ ADIOI_GEN_IreadContig, /* IreadContig */
+ ADIOI_GEN_IwriteContig, /* IwriteContig */
+#else
+ ADIOI_FAKE_IreadContig, /* IreadContig */
+ ADIOI_FAKE_IwriteContig, /* IwriteContig */
+#endif /* ROMIO_HAVE_WORKING_AIO */
+ ADIOI_GEN_IODone, /* ReadDone */
+ ADIOI_GEN_IODone, /* WriteDone */
+ ADIOI_GEN_IOComplete, /* ReadComplete */
+ ADIOI_GEN_IOComplete, /* WriteComplete */
ADIOI_GEN_IreadStrided, /* IreadStrided */
ADIOI_GEN_IwriteStrided, /* IwriteStrided */
ADIOI_GEN_Flush, /* Flush */
ADIOI_XFS_Resize, /* Resize */
ADIOI_GEN_Delete, /* Delete */
+ ADIOI_GEN_Feature, /* Features */
};
diff --git a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs.h b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs.h
index 2d81688ee2..c529abcd91 100644
--- a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs.h
+++ b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs.h
@@ -8,20 +8,19 @@
#ifndef AD_XFS_INCLUDE
#define AD_XFS_INCLUDE
+#define _XOPEN_SOURCE 500
#include
#include
#include
#include "adio.h"
-#include
-int ADIOI_XFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
- int wr, void *handle);
-
-#if (defined(HAVE_PREAD64) && (_ABIO32 == 1))
-# define pread pread64
-# define pwrite pwrite64
+#if defined(MPISGI)
+#include "xfs/xfs_fs.h"
+#ifndef __USE_LARGEFILE64
+#define __USE_LARGEFILE64
+#endif
+typedef struct aiocb64 aiocb64_t;
#endif
-/* above needed for IRIX 6.5 */
void ADIOI_XFS_Open(ADIO_File fd, int *error_code);
void ADIOI_XFS_Close(ADIO_File fd, int *error_code);
@@ -33,22 +32,6 @@ void ADIOI_XFS_WriteContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
*error_code);
-void ADIOI_XFS_IwriteContig(ADIO_File fd, void *buf, int count,
- MPI_Datatype datatype, int file_ptr_type,
- ADIO_Offset offset, ADIO_Request *request, int
- *error_code);
-void ADIOI_XFS_IreadContig(ADIO_File fd, void *buf, int count,
- MPI_Datatype datatype, int file_ptr_type,
- ADIO_Offset offset, ADIO_Request *request, int
- *error_code);
-int ADIOI_XFS_ReadDone(ADIO_Request *request, ADIO_Status *status, int
- *error_code);
-int ADIOI_XFS_WriteDone(ADIO_Request *request, ADIO_Status *status, int
- *error_code);
-void ADIOI_XFS_ReadComplete(ADIO_Request *request, ADIO_Status *status, int
- *error_code);
-void ADIOI_XFS_WriteComplete(ADIO_Request *request, ADIO_Status *status,
- int *error_code);
void ADIOI_XFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int
*error_code);
void ADIOI_XFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code);
diff --git a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_done.c b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_done.c
deleted file mode 100644
index ebdca50eed..0000000000
--- a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_done.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- *
- * Copyright (C) 1997 University of Chicago.
- * See COPYRIGHT notice in top-level directory.
- */
-
-#include "ad_xfs.h"
-
-int ADIOI_XFS_ReadDone(ADIO_Request *request, ADIO_Status *status,
- int *error_code)
-{
- int err, done=0;
- static char myname[] = "ADIOI_XFS_READDONE";
-
- if (*request == ADIO_REQUEST_NULL) {
- *error_code = MPI_SUCCESS;
- return 1;
- }
-
- if ((*request)->queued) {
- errno = aio_error64((const aiocb64_t *) (*request)->handle);
- if (errno == EINPROGRESS) {
- done = 0;
- *error_code = MPI_SUCCESS;
- }
- else {
- err = aio_return64((aiocb64_t *) (*request)->handle);
- (*request)->nbytes = err;
- errno = aio_error64((const aiocb64_t *) (*request)->handle);
-
- done = 1;
- if (err == -1) {
- *error_code = MPIO_Err_create_code(MPI_SUCCESS,
- MPIR_ERR_RECOVERABLE, myname,
- __LINE__, MPI_ERR_IO, "**io",
- "**io %s", strerror(errno));
- }
- else *error_code = MPI_SUCCESS;
- }
- } /* if ((*request)->queued) */
- else {
- done = 1;
- *error_code = MPI_SUCCESS;
- }
-#ifdef HAVE_STATUS_SET_BYTES
- if (done && ((*request)->nbytes != -1))
- MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
-#endif
-
- if (done) {
- /* if request is still queued in the system, it is also there
- on ADIOI_Async_list. Delete it from there. */
- if ((*request)->queued) ADIOI_Del_req_from_list(request);
-
- (*request)->fd->async_count--;
- if ((*request)->handle) ADIOI_Free((*request)->handle);
- ADIOI_Free_request((ADIOI_Req_node *) (*request));
- *request = ADIO_REQUEST_NULL;
- /* status to be filled */
- }
- return done;
-}
-
-
-int ADIOI_XFS_WriteDone(ADIO_Request *request, ADIO_Status *status, int *error_code)
-{
- return ADIOI_XFS_ReadDone(request, status, error_code);
-}
diff --git a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_fcntl.c b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_fcntl.c
index e9194f4bbd..7c49da24da 100644
--- a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_fcntl.c
+++ b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_fcntl.c
@@ -7,6 +7,11 @@
#include "ad_xfs.h"
#include "adio_extern.h"
+#include
+
+#ifndef HAVE_LSEEK64
+#define lseek64 lseek
+#endif
void ADIOI_XFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code)
{
@@ -37,7 +42,7 @@ void ADIOI_XFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *er
fl.l_len = fcntl_struct->diskspace;
#if defined(LINUX) && defined(MPISGI)
- err = fcntl(fd->fd_sys, XFS_IOC_RESVSP64, &fl);
+ err = ioctl(fd->fd_sys, XFS_IOC_RESVSP64, &fl);
#else
err = fcntl(fd->fd_sys, F_RESVSP64, &fl);
#endif
diff --git a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_hints.c b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_hints.c
index d6bff11337..97909b3819 100644
--- a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_hints.c
+++ b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_hints.c
@@ -8,36 +8,76 @@
#include "ad_xfs.h"
#include "adio_extern.h"
+static unsigned xfs_direct_read_chunk_size;
+static unsigned xfs_direct_write_chunk_size;
+
void ADIOI_XFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
{
- char *value;
+ char *value, * c;
int flag;
+ static char xfs_initialized = 0;
if (fd->info == MPI_INFO_NULL) MPI_Info_create(&(fd->info));
- /* the nightly builds say somthing is calling MPI_Info_set w/ a null info,
- * so protect the calls to MPI_Info_set */
- if (fd->info != MPI_INFO_NULL ) {
- MPI_Info_set(fd->info, "direct_read", "false");
- MPI_Info_set(fd->info, "direct_write", "false");
- fd->direct_read = fd->direct_write = 0;
- }
-
- /* has user specified values for keys "direct_read" and "direct wirte"? */
+ ADIOI_Info_set(fd->info, "direct_read", "false");
+ ADIOI_Info_set(fd->info, "direct_write", "false");
+ fd->direct_read = fd->direct_write = 0;
+
+ if (!xfs_initialized) {
+ xfs_initialized = 1;
+ c = getenv("MPIO_DIRECT_READ_CHUNK_SIZE");
+ if (c) {
+ int io;
+ io = atoi(c);
+ if (io <= 0) {
+ fprintf(stderr,
+"MPI: Ignoring an invalid setting for MPIO_DIRECT_READ_CHUNK_SIZE.\n"
+" It must be set to a positive integer value.\n");
+ } else {
+ xfs_direct_read_chunk_size = io;
+ }
+ } else {
+ xfs_direct_read_chunk_size = 0;
+ }
+
+ c = getenv("MPIO_DIRECT_WRITE_CHUNK_SIZE");
+ if (c) {
+ int io;
+ io = atoi(c);
+ if (io <= 0) {
+ fprintf(stderr,
+"MPI: Ignoring an invalid setting for MPIO_DIRECT_WRITE_CHUNK_SIZE.\n"
+" It must be set to a positive integer value.\n");
+ } else {
+ xfs_direct_write_chunk_size = io;
+ }
+ } else {
+ xfs_direct_write_chunk_size = 0;
+ }
+ }
+
+ if (!fd->hints->initialized) {
+ fd->hints->fs_hints.xfs.read_chunk_sz =
+ xfs_direct_read_chunk_size;
+ fd->hints->fs_hints.xfs.write_chunk_sz =
+ xfs_direct_write_chunk_size;
+ }
+
+ /* has user specified values for keys "direct_read" and "direct write"? */
if (users_info != MPI_INFO_NULL) {
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
- MPI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && !strcmp(value, "true")) {
- MPI_Info_set(fd->info, "direct_read", "true");
+ ADIOI_Info_set(fd->info, "direct_read", "true");
fd->direct_read = 1;
}
- MPI_Info_get(users_info, "direct_write", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "direct_write", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && !strcmp(value, "true")) {
- MPI_Info_set(fd->info, "direct_write", "true");
+ ADIOI_Info_set(fd->info, "direct_write", "true");
fd->direct_write = 1;
}
@@ -47,8 +87,10 @@ void ADIOI_XFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
/* set the values for collective I/O and data sieving parameters */
ADIOI_GEN_SetInfo(fd, users_info, error_code);
+ /* Environment variables override MPI_Info hints */
if (ADIOI_Direct_read) fd->direct_read = 1;
if (ADIOI_Direct_write) fd->direct_write = 1;
+
/* environment variables checked in ADIO_Init */
*error_code = MPI_SUCCESS;
diff --git a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_iread.c b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_iread.c
deleted file mode 100644
index a85062ba72..0000000000
--- a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_iread.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- * Copyright (C) 1997 University of Chicago.
- * See COPYRIGHT notice in top-level directory.
- */
-
-#include "ad_xfs.h"
-
-void ADIOI_XFS_IreadContig(ADIO_File fd, void *buf, int count,
- MPI_Datatype datatype, int file_ptr_type,
- ADIO_Offset offset, ADIO_Request *request, int *error_code)
-{
- int len, typesize, aio_errno = 0;
- static char myname[] = "ADIOI_XFS_IREADCONTIG";
-
- (*request) = ADIOI_Malloc_request();
- (*request)->optype = ADIOI_READ;
- (*request)->fd = fd;
- (*request)->datatype = datatype;
-
- MPI_Type_size(datatype, &typesize);
- len = count * typesize;
-
- if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind;
- aio_errno = ADIOI_XFS_aio(fd, buf, len, offset, 0, &((*request)->handle));
- if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len;
-
- (*request)->queued = 1;
- ADIOI_Add_req_to_list(request);
-
- fd->fp_sys_posn = -1;
-
- /* --BEGIN ERROR HANDLING-- */
- if (aio_errno != 0) {
- MPIO_ERR_CREATE_CODE_ERRNO(myname, aio_errno, error_code);
- return;
- }
- /* --END ERROR HANDLING-- */
-
- *error_code = MPI_SUCCESS;
- fd->async_count++;
-}
diff --git a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_iwrite.c b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_iwrite.c
deleted file mode 100644
index 61980621f7..0000000000
--- a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_iwrite.c
+++ /dev/null
@@ -1,145 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- *
- * Copyright (C) 1997 University of Chicago.
- * See COPYRIGHT notice in top-level directory.
- */
-
-#include "ad_xfs.h"
-
-void ADIOI_XFS_IwriteContig(ADIO_File fd, void *buf, int count,
- MPI_Datatype datatype, int file_ptr_type,
- ADIO_Offset offset, ADIO_Request *request,
- int *error_code)
-{
- int len, typesize, aio_errno = 0;
- static char myname[] = "ADIOI_XFS_IWRITECONTIG";
-
- *request = ADIOI_Malloc_request();
- (*request)->optype = ADIOI_WRITE;
- (*request)->fd = fd;
- (*request)->datatype = datatype;
-
- MPI_Type_size(datatype, &typesize);
- len = count * typesize;
-
- if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind;
- aio_errno = ADIOI_XFS_aio(fd, buf, len, offset, 1, &((*request)->handle));
- if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len;
-
- (*request)->queued = 1;
- ADIOI_Add_req_to_list(request);
-
- fd->fp_sys_posn = -1;
-
- /* --BEGIN ERROR HANDLING-- */
- if (aio_errno != 0) {
- MPIO_ERR_CREATE_CODE_ERRNO(myname, aio_errno, error_code);
- return;
- }
- /* --END ERROR HANDLING-- */
-
- *error_code = MPI_SUCCESS;
- fd->async_count++;
-}
-
-
-void ADIOI_XFS_IwriteStrided(ADIO_File fd, void *buf, int count,
- MPI_Datatype datatype, int file_ptr_type,
- ADIO_Offset offset, ADIO_Request *request, int
- *error_code)
-{
- ADIO_Status status;
-#ifdef HAVE_STATUS_SET_BYTES
- int typesize;
-#endif
-
- *request = ADIOI_Malloc_request();
- (*request)->optype = ADIOI_WRITE;
- (*request)->fd = fd;
- (*request)->datatype = datatype;
- (*request)->queued = 0;
- (*request)->handle = 0;
-
-/* call the blocking version. It is faster because it does data sieving. */
- ADIO_WriteStrided(fd, buf, count, datatype, file_ptr_type,
- offset, &status, error_code);
-
- fd->async_count++;
-
-#ifdef HAVE_STATUS_SET_BYTES
- if (*error_code == MPI_SUCCESS) {
- MPI_Type_size(datatype, &typesize);
- (*request)->nbytes = count * typesize;
- }
-#endif
-}
-
-
-/* This function is for implementation convenience. It is not user-visible.
- * It takes care of the differences in the interface for nonblocking I/O
- * on various Unix machines! If wr==1 write, wr==0 read.
- *
- * Returns 0 on success, -errno on failure.
- */
-int ADIOI_XFS_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
- int wr, void *handle)
-{
- int err, error_code;
- aiocb64_t *aiocbp;
-
- aiocbp = (aiocb64_t *) ADIOI_Calloc(sizeof(aiocb64_t), 1);
-
- if (((wr && fd->direct_write) || (!wr && fd->direct_read))
- && !(((long) buf) % fd->d_mem) && !(offset % fd->d_miniosz) &&
- !(len % fd->d_miniosz) && (len >= fd->d_miniosz) &&
- (len <= fd->d_maxiosz))
- aiocbp->aio_fildes = fd->fd_direct;
- else aiocbp->aio_fildes = fd->fd_sys;
-
- aiocbp->aio_offset = offset;
- aiocbp->aio_buf = buf;
- aiocbp->aio_nbytes = len;
- aiocbp->aio_reqprio = 0;
-
-#ifdef AIO_SIGNOTIFY_NONE
-/* SGI IRIX 6 */
- aiocbp->aio_sigevent.sigev_notify = SIGEV_NONE;
-#else
- aiocbp->aio_sigevent.sigev_signo = 0;
-#endif
-
- if (wr) err = aio_write64(aiocbp);
- else err = aio_read64(aiocbp);
-
- if (err != 0) {
- if (errno == EAGAIN) {
- /* exceeded the max. no. of outstanding requests.
- complete all previous async. requests and try again. */
-
- /* ADIOI_Complete_async(&error_code); */
- if (error_code != MPI_SUCCESS) return -EIO;
-
- if (wr) err = aio_write64(aiocbp);
- else err = aio_read64(aiocbp);
-
- while (err != 0) {
- if (errno == EAGAIN) {
- /* sleep and try again */
- sleep(1);
- if (wr) err = aio_write64(aiocbp);
- else err = aio_read64(aiocbp);
- }
- else {
- return -errno;
- }
- }
- }
- else {
- return -errno;
- }
- }
-
- *((aiocb64_t **) handle) = aiocbp;
- return 0;
-}
diff --git a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_open.c b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_open.c
index b993644f46..fa073fb316 100644
--- a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_open.c
+++ b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_open.c
@@ -5,22 +5,26 @@
* See COPYRIGHT notice in top-level directory.
*/
+#define _GNU_SOURCE // for O_DIRECT
+
#include "ad_xfs.h"
+#include
#ifdef HAVE_STDDEF_H
#include
#endif
-#if defined(MPISGI)
-#include
-#include
+#ifndef HAVE_LSEEK64
+#define lseek64 lseek
#endif
void ADIOI_XFS_Open(ADIO_File fd, int *error_code)
{
- int perm, amode, amode_direct;
+ int perm, amode, amode_direct, factor;
unsigned int old_mask;
struct dioattr st;
static char myname[] = "ADIOI_XFS_OPEN";
+ unsigned read_chunk_sz = fd->hints->fs_hints.xfs.read_chunk_sz;
+ unsigned write_chunk_sz = fd->hints->fs_hints.xfs.write_chunk_sz;
if (fd->perm == ADIO_PERM_NULL) {
old_mask = umask(022);
@@ -49,7 +53,7 @@ void ADIOI_XFS_Open(ADIO_File fd, int *error_code)
fd->fd_direct = open(fd->filename, amode_direct, perm);
if (fd->fd_direct != -1) {
-#if defined(LINUX) && defined(MPISGI)
+#if defined(MPISGI)
ioctl(fd->fd_direct, XFS_IOC_DIOINFO, &st);
#else
fcntl(fd->fd_direct, F_DIOINFO, &st);
@@ -57,7 +61,34 @@ void ADIOI_XFS_Open(ADIO_File fd, int *error_code)
fd->d_mem = st.d_mem;
fd->d_miniosz = st.d_miniosz;
- fd->d_maxiosz = st.d_maxiosz;
+
+ if (read_chunk_sz == 0) {
+ fd->hints->fs_hints.xfs.read_chunk_sz = st.d_maxiosz;
+ } else {
+ /*
+ * MPIO_DIRECT_READ_CHUNK_SIZE was set.
+ * Make read_chunk_sz a multiple of d_miniosz.
+ */
+ factor = read_chunk_sz / fd->d_miniosz;
+ if (factor == 0 || read_chunk_sz != fd->d_miniosz * factor) {
+ fd->hints->fs_hints.xfs.read_chunk_sz =
+ fd->d_miniosz * (factor + 1);
+ }
+ }
+
+ if (write_chunk_sz == 0) {
+ fd->hints->fs_hints.xfs.write_chunk_sz = st.d_maxiosz;
+ } else {
+ /*
+ * MPIO_DIRECT_WRITE_CHUNK_SIZE was set.
+ * Make write_chunk_sz a multiple of d_miniosz.
+ */
+ factor = write_chunk_sz / fd->d_miniosz;
+ if (factor == 0 || write_chunk_sz != fd->d_miniosz * factor) {
+ fd->hints->fs_hints.xfs.write_chunk_sz =
+ fd->d_miniosz * (factor + 1);
+ }
+ }
if (fd->d_mem > XFS_MEMALIGN) {
FPRINTF(stderr, "MPI: Run-time Direct-IO memory alignment, %d, does not match compile-time value, %d.\n",
diff --git a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_read.c b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_read.c
index 8396dd87f5..38b28f0e3b 100644
--- a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_read.c
+++ b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_read.c
@@ -63,7 +63,7 @@ void ADIOI_XFS_ReadContig(ADIO_File fd, void *buf, int count,
ADIOI_XFS_Aligned_Mem_File_Read(fd, newbuf, size, offset, &err);
if (err > 0) memcpy(buf, newbuf, err);
nbytes += err;
- free(newbuf);
+ ADIOI_Free(newbuf);
}
else nbytes += pread(fd->fd_sys, buf, size, offset);
}
@@ -77,7 +77,7 @@ void ADIOI_XFS_ReadContig(ADIO_File fd, void *buf, int count,
if (newbuf) {
ADIOI_XFS_Aligned_Mem_File_Read(fd, newbuf, len, offset, &err);
if (err > 0) memcpy(buf, newbuf, err);
- free(newbuf);
+ ADIOI_Free(newbuf);
}
else err = pread(fd->fd_sys, buf, len, offset);
}
@@ -102,6 +102,7 @@ void ADIOI_XFS_Aligned_Mem_File_Read(ADIO_File fd, void *buf, int len,
ADIO_Offset offset, int *err)
{
int ntimes, rem, newrem, i, size, nbytes;
+ unsigned read_chunk_sz = fd->hints->fs_hints.xfs.read_chunk_sz;
/* memory buffer is aligned, offset in file is aligned,
io_size may or may not be of the right size.
@@ -109,33 +110,33 @@ void ADIOI_XFS_Aligned_Mem_File_Read(ADIO_File fd, void *buf, int len,
use buffered I/O for remaining. */
if (!(len % fd->d_miniosz) &&
- (len >= fd->d_miniosz) && (len <= fd->d_maxiosz))
+ (len >= fd->d_miniosz) && (len <= read_chunk_sz))
*err = pread(fd->fd_direct, buf, len, offset);
else if (len < fd->d_miniosz)
*err = pread(fd->fd_sys, buf, len, offset);
- else if (len > fd->d_maxiosz) {
- ntimes = len/(fd->d_maxiosz);
- rem = len - ntimes * fd->d_maxiosz;
+ else if (len > read_chunk_sz) {
+ ntimes = len/(read_chunk_sz);
+ rem = len - ntimes * read_chunk_sz;
nbytes = 0;
for (i=0; ifd_direct, ((char *)buf) + i * fd->d_maxiosz,
- fd->d_maxiosz, offset);
- offset += fd->d_maxiosz;
+ nbytes += pread(fd->fd_direct, ((char *)buf) + i * read_chunk_sz,
+ read_chunk_sz, offset);
+ offset += read_chunk_sz;
}
if (rem) {
if (!(rem % fd->d_miniosz))
nbytes += pread(fd->fd_direct,
- ((char *)buf) + ntimes * fd->d_maxiosz, rem, offset);
+ ((char *)buf) + ntimes * read_chunk_sz, rem, offset);
else {
newrem = rem % fd->d_miniosz;
size = rem - newrem;
if (size) {
nbytes += pread(fd->fd_direct,
- ((char *)buf) + ntimes * fd->d_maxiosz, size, offset);
+ ((char *)buf) + ntimes * read_chunk_sz, size, offset);
offset += size;
}
nbytes += pread(fd->fd_sys,
- ((char *)buf) + ntimes*fd->d_maxiosz + size, newrem, offset);
+ ((char *)buf) + ntimes * read_chunk_sz + size, newrem, offset);
}
}
*err = nbytes;
diff --git a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_wait.c b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_wait.c
deleted file mode 100644
index 93a0bdbfce..0000000000
--- a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_wait.c
+++ /dev/null
@@ -1,73 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- *
- * Copyright (C) 1997 University of Chicago.
- * See COPYRIGHT notice in top-level directory.
- */
-
-#include "ad_xfs.h"
-
-void ADIOI_XFS_ReadComplete(ADIO_Request *request, ADIO_Status *status,
- int *error_code)
-{
- int err;
- static char myname[] = "ADIOI_XFS_READCOMPLETE";
-
- if (*request == ADIO_REQUEST_NULL) {
- *error_code = MPI_SUCCESS;
- return;
- }
-
- if ((*request)->queued) {
- do {
- err = aio_suspend64((const aiocb64_t **) &((*request)->handle), 1, 0);
- } while ((err == -1) && (errno == EINTR));
-
- if (err != -1) {
- err = aio_return64((aiocb64_t *) (*request)->handle);
- (*request)->nbytes = err;
- errno = aio_error64((aiocb64_t *) (*request)->handle);
- }
- else (*request)->nbytes = -1;
-
- if (err == -1) {
- *error_code = MPIO_Err_create_code(MPI_SUCCESS,
- MPIR_ERR_RECOVERABLE, myname,
- __LINE__, MPI_ERR_IO, "**io",
- "**io %s", strerror(errno));
- }
- else *error_code = MPI_SUCCESS;
- } /* if ((*request)->queued) */
- else *error_code = MPI_SUCCESS;
-
-#ifdef HAVE_STATUS_SET_BYTES
- if ((*request)->nbytes != -1)
- MPIR_Status_set_bytes(status, (*request)->datatype, (*request)->nbytes);
-#endif
-
- if ((*request)->queued != -1) {
-
- /* queued = -1 is an internal hack used when the request must
- be completed, but the request object should not be
- freed. This is used in ADIOI_Complete_async, because the user
- will call MPI_Wait later, which would require status to
- be filled. Ugly but works. queued = -1 should be used only
- in ADIOI_Complete_async.
- This should not affect the user in any way. */
-
- /* if request is still queued in the system, it is also there
- on ADIOI_Async_list. Delete it from there. */
- if ((*request)->queued) ADIOI_Del_req_from_list(request);
-
- (*request)->fd->async_count--;
- if ((*request)->handle) ADIOI_Free((*request)->handle);
- ADIOI_Free_request((ADIOI_Req_node *) (*request));
- *request = ADIO_REQUEST_NULL;
- }
-}
-
-
-void ADIOI_XFS_WriteComplete(ADIO_Request *request, ADIO_Status *status, int *error_code)
-{
- ADIOI_XFS_ReadComplete(request, status, error_code);
-}
diff --git a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_write.c b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_write.c
index 0c01352314..ecb9c7b801 100644
--- a/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_write.c
+++ b/ompi/mca/io/romio/romio/adio/ad_xfs/ad_xfs_write.c
@@ -13,14 +13,15 @@
/* style: allow:free:2 sig:0 */
-static void ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf, int len,
- ADIO_Offset offset, int *err);
+static int ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf,
+ ADIO_Offset len, ADIO_Offset offset);
void ADIOI_XFS_WriteContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int *error_code)
{
- int err=-1, datatype_size, len, diff, size, nbytes;
+ int err=-1, datatype_size, diff, size;
+ ssize_t len;
void *newbuf;
static char myname[] = "ADIOI_XFS_WRITECONTIG";
@@ -31,44 +32,48 @@ void ADIOI_XFS_WriteContig(ADIO_File fd, void *buf, int count,
if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind;
- if (!(fd->direct_write)) /* direct I/O not enabled */
+ if (!(fd->direct_write)) { /* direct I/O not enabled */
err = pwrite(fd->fd_sys, buf, len, offset);
- else { /* direct I/O enabled */
+ if (err < 0) {goto leaving;}
+ } else { /* direct I/O enabled */
/* (1) if mem_aligned && file_aligned
use direct I/O to write up to correct io_size
use buffered I/O for remaining */
- if (!(((long) buf) % fd->d_mem) && !(offset % fd->d_miniosz))
- ADIOI_XFS_Aligned_Mem_File_Write(fd, buf, len, offset, &err);
+ if (!(((long) buf) % fd->d_mem) && !(offset % fd->d_miniosz)) {
+ err = ADIOI_XFS_Aligned_Mem_File_Write(fd, buf, len, offset);
+ if (err < 0) {goto leaving;}
/* (2) if !file_aligned
use buffered I/O to write up to file_aligned
At that point, if still mem_aligned, use (1)
else copy into aligned buf and then use (1) */
- else if (offset % fd->d_miniosz) {
+ } else if (offset % fd->d_miniosz) {
diff = fd->d_miniosz - (offset % fd->d_miniosz);
diff = ADIOI_MIN(diff, len);
- nbytes = pwrite(fd->fd_sys, buf, diff, offset);
+ err = pwrite(fd->fd_sys, buf, diff, offset);
+ if (err < 0) {goto leaving;}
buf = ((char *) buf) + diff;
offset += diff;
size = len - diff;
if (!(((long) buf) % fd->d_mem)) {
- ADIOI_XFS_Aligned_Mem_File_Write(fd, buf, size, offset, &err);
- nbytes += err;
+ err = ADIOI_XFS_Aligned_Mem_File_Write(fd, buf, size, offset);
+ if (err < 0) {goto leaving;}
}
else {
newbuf = (void *) memalign(XFS_MEMALIGN, size);
if (newbuf) {
memcpy(newbuf, buf, size);
- ADIOI_XFS_Aligned_Mem_File_Write(fd, newbuf, size, offset, &err);
- nbytes += err;
- free(newbuf);
+ err = ADIOI_XFS_Aligned_Mem_File_Write(fd, newbuf, size, offset);
+ ADIOI_Free(newbuf);
+ if (err < 0) {goto leaving;}
+ } else {
+ err = pwrite(fd->fd_sys, buf, size, offset);
+ if (err < 0) {goto leaving;}
}
- else nbytes += pwrite(fd->fd_sys, buf, size, offset);
}
- err = nbytes;
}
/* (3) if !mem_aligned && file_aligned
@@ -77,19 +82,22 @@ void ADIOI_XFS_WriteContig(ADIO_File fd, void *buf, int count,
newbuf = (void *) memalign(XFS_MEMALIGN, len);
if (newbuf) {
memcpy(newbuf, buf, len);
- ADIOI_XFS_Aligned_Mem_File_Write(fd, newbuf, len, offset, &err);
- free(newbuf);
+ err = ADIOI_XFS_Aligned_Mem_File_Write(fd, newbuf, len, offset);
+ ADIOI_Free(newbuf);
+ } else {
+ err = pwrite(fd->fd_sys, buf, len, offset);
}
- else err = pwrite(fd->fd_sys, buf, len, offset);
+
+ if (err < 0) {goto leaving;}
}
}
- if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += err;
+ if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len;
#ifdef HAVE_STATUS_SET_BYTES
- if (err != -1) MPIR_Status_set_bytes(status, datatype, err);
+ if (err != -1) MPIR_Status_set_bytes(status, datatype, len);
#endif
-
+leaving:
if (err == -1) {
*error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
myname, __LINE__, MPI_ERR_IO, "**io",
@@ -99,10 +107,13 @@ void ADIOI_XFS_WriteContig(ADIO_File fd, void *buf, int count,
}
-void ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf, int len,
- ADIO_Offset offset, int *err)
+static int
+ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf, ADIO_Offset len,
+ ADIO_Offset offset)
{
- int ntimes, rem, newrem, i, size, nbytes;
+ unsigned write_chunk_sz = fd->hints->fs_hints.xfs.write_chunk_sz;
+ ADIO_Offset nbytes, rem, newrem, size;
+ int ntimes, i;
/* memory buffer is aligned, offset in file is aligned,
io_size may or may not be of the right size.
@@ -110,42 +121,50 @@ void ADIOI_XFS_Aligned_Mem_File_Write(ADIO_File fd, void *buf, int len,
use buffered I/O for remaining. */
if (!(len % fd->d_miniosz) &&
- (len >= fd->d_miniosz) && (len <= fd->d_maxiosz))
- *err = pwrite(fd->fd_direct, buf, len, offset);
- else if (len < fd->d_miniosz)
- *err = pwrite(fd->fd_sys, buf, len, offset);
- else if (len > fd->d_maxiosz) {
- ntimes = len/(fd->d_maxiosz);
- rem = len - ntimes * fd->d_maxiosz;
+ (len >= fd->d_miniosz) && (len <= write_chunk_sz)) {
+ nbytes = pwrite(fd->fd_direct, buf, len, offset);
+ if (nbytes < 0) {return -1;}
+ } else if (len < fd->d_miniosz) {
+ nbytes = pwrite(fd->fd_sys, buf, len, offset);
+ if (nbytes < 0) {return -1;}
+ } else if (len > write_chunk_sz) {
+ ntimes = len/(write_chunk_sz);
+ rem = len - ntimes * write_chunk_sz;
nbytes = 0;
for (i=0; ifd_direct, ((char *)buf) + i * fd->d_maxiosz,
- fd->d_maxiosz, offset);
- offset += fd->d_maxiosz;
+ nbytes = pwrite(fd->fd_direct, ((char *)buf) + i * write_chunk_sz,
+ write_chunk_sz, offset);
+ offset += write_chunk_sz;
+ if (nbytes < 0) {return -1;}
}
if (rem) {
- if (!(rem % fd->d_miniosz))
- nbytes += pwrite(fd->fd_direct,
- ((char *)buf) + ntimes * fd->d_maxiosz, rem, offset);
- else {
+ if (!(rem % fd->d_miniosz)) {
+ nbytes = pwrite(fd->fd_direct,
+ ((char *)buf) + ntimes * write_chunk_sz, rem, offset);
+ if (nbytes < 0) {return -1;}
+ } else {
newrem = rem % fd->d_miniosz;
size = rem - newrem;
if (size) {
- nbytes += pwrite(fd->fd_direct,
- ((char *)buf) + ntimes * fd->d_maxiosz, size, offset);
+ nbytes = pwrite(fd->fd_direct,
+ ((char *)buf) + ntimes * write_chunk_sz, size, offset);
offset += size;
+ if (nbytes < 0) {return -1;}
}
- nbytes += pwrite(fd->fd_sys,
- ((char *)buf) + ntimes*fd->d_maxiosz + size, newrem, offset);
+ nbytes = pwrite(fd->fd_sys,
+ ((char *)buf) + ntimes * write_chunk_sz + size, newrem, offset);
+ if (nbytes < 0) {return -1;}
}
}
- *err = nbytes;
}
else {
rem = len % fd->d_miniosz;
size = len - rem;
nbytes = pwrite(fd->fd_direct, buf, size, offset);
- nbytes += pwrite(fd->fd_sys, (char *)buf + size, rem, offset+size);
- *err = nbytes;
+ if (nbytes < 0) {return -1;}
+ nbytes = pwrite(fd->fd_sys, (char *)buf + size, rem, offset+size);
+ if (nbytes < 0) {return -1;}
}
+
+ return 0;
}
diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/Makefile.am b/ompi/mca/io/romio/romio/adio/ad_zoidfs/Makefile.am
new file mode 100644
index 0000000000..dfa5b419cd
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/Makefile.am
@@ -0,0 +1,37 @@
+#
+# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+# University Research and Technology
+# Corporation. All rights reserved.
+# Copyright (c) 2004-2005 The University of Tennessee and The University
+# of Tennessee Research Foundation. All rights
+# reserved.
+# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+# University of Stuttgart. All rights reserved.
+# Copyright (c) 2004-2005 The Regents of the University of California.
+# All rights reserved.
+# $COPYRIGHT$
+#
+# Additional copyrights may follow
+#
+# $HEADER$
+#
+
+include $(top_srcdir)/Makefile.options
+
+EXTRA_DIST = README
+
+noinst_LTLIBRARIES = libadio_zoidf.la
+libadio_zoidf_la_SOURCES = \
+ ad_zoidfs.o \
+ ad_zoidfs_close.o \
+ ad_zoidfs_common.o \
+ ad_zoidfs_delete.o \
+ ad_zoidfs_fcntl.o \
+ ad_zoidfs_flush.o \
+ ad_zoidfs_io.o \
+ ad_zoidfs_open.o \
+ ad_zoidfs_resize.o \
+ ad_zoidfs_features.o\
+ ad_zoidfs_read_list.o \
+ ad_zoidfs_write_list.o
+
diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs.c b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs.c
new file mode 100644
index 0000000000..28b8ea54e6
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs.c
@@ -0,0 +1,42 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *
+ * Copyright (C) 2003 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ */
+
+#include "ad_zoidfs.h"
+
+/* adioi.h has the ADIOI_Fns_struct define */
+#include "adioi.h"
+
+struct ADIOI_Fns_struct ADIO_ZOIDFS_operations = {
+ ADIOI_ZOIDFS_Open, /* Open */
+ ADIOI_SCALEABLE_OpenColl, /* OpenColl */
+ ADIOI_ZOIDFS_ReadContig, /* ReadContig */
+ ADIOI_ZOIDFS_WriteContig, /* WriteContig */
+ ADIOI_GEN_ReadStridedColl, /* ReadStridedColl */
+ ADIOI_GEN_WriteStridedColl, /* WriteStridedColl */
+ ADIOI_GEN_SeekIndividual, /* SeekIndividual */
+ ADIOI_ZOIDFS_Fcntl, /* Fcntl */
+ ADIOI_GEN_SetInfo, /* SetInfo */
+ ADIOI_GEN_ReadStrided, /* ReadStrided */
+ ADIOI_ZOIDFS_WriteStrided, /* WriteStrided */
+ ADIOI_ZOIDFS_Close, /* Close */
+ ADIOI_FAKE_IreadContig, /* IreadContig */
+ ADIOI_FAKE_IwriteContig, /* IwriteContig */
+ ADIOI_FAKE_IODone, /* ReadDone */
+ ADIOI_FAKE_IODone, /* WriteDone */
+ ADIOI_FAKE_IOComplete, /* ReadComplete */
+ ADIOI_FAKE_IOComplete, /* WriteComplete */
+ ADIOI_FAKE_IreadStrided, /* IreadStrided */
+ ADIOI_FAKE_IwriteStrided, /* IwriteStrided */
+ ADIOI_ZOIDFS_Flush, /* Flush */
+ ADIOI_ZOIDFS_Resize, /* Resize */
+ ADIOI_ZOIDFS_Delete, /* Delete */
+ ADIOI_ZOIDFS_Feature,
+};
+
+/*
+ * vim: ts=8 sts=4 sw=4 noexpandtab
+ */
diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs.h b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs.h
new file mode 100644
index 0000000000..03b2a57662
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs.h
@@ -0,0 +1,44 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *
+ * Copyright (C) 1997 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ */
+
+#ifndef AD_ZOIDFS_INCLUDE
+#define AD_ZOIDFS_INCLUDE
+
+#include "adio.h"
+#ifdef HAVE_ZOIDFS_H
+#include "zoidfs.h"
+#endif
+
+
+typedef zoidfs_handle_t ADIOI_ZOIDFS_object;
+
+void ADIOI_ZOIDFS_Open(ADIO_File fd, int *error_code);
+void ADIOI_ZOIDFS_Close(ADIO_File fd, int *error_code);
+void ADIOI_ZOIDFS_ReadContig(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status, int
+ *error_code);
+void ADIOI_ZOIDFS_WriteContig(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status, int
+ *error_code);
+void ADIOI_ZOIDFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int
+ *error_code);
+void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status, int
+ *error_code);
+void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status, int
+ *error_code);
+void ADIOI_ZOIDFS_Flush(ADIO_File fd, int *error_code);
+void ADIOI_ZOIDFS_Delete(char *filename, int *error_code);
+void ADIOI_ZOIDFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code);
+void ADIOI_ZOIDFS_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code);
+int ADIOI_ZOIDFS_Feature(ADIO_File fd, int flag);
+#endif
diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_close.c b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_close.c
new file mode 100644
index 0000000000..1bee6b83e9
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_close.c
@@ -0,0 +1,25 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *
+ * Copyright (C) 1997 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ */
+
+#include "ad_zoidfs.h"
+
+void ADIOI_ZOIDFS_Close(ADIO_File fd, int *error_code)
+{
+ ADIOI_Free(fd->fs_ptr);
+ fd->fs_ptr = NULL;
+
+ /* At some point or another it was decided that ROMIO would not
+ * explicitly flush (other than any local cache) on close, because
+ * there is no way to *avoid* that overhead if you implement it here
+ * and don't actually want it.
+ */
+
+ *error_code = MPI_SUCCESS;
+}
+/*
+ * vim: ts=8 sts=4 sw=4 noexpandtab
+ */
diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_common.c b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_common.c
new file mode 100644
index 0000000000..3437359da7
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_common.c
@@ -0,0 +1,126 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ * Copyright (C) 2003 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ */
+
+#include "ad_zoidfs.h"
+#include "ad_zoidfs_common.h"
+#include
+#include
+
+/* keyval hack to both tell us if we've already initialized zoidfs and also
+ * close it down when mpi exits */
+int ADIOI_ZOIDFS_Initialized = MPI_KEYVAL_INVALID;
+
+void ADIOI_ZOIDFS_End(int *error_code)
+{
+ int ret;
+ static char myname[] = "ADIOI_ZOIDFS_END";
+
+ ret = zoidfs_finalize();
+
+ /* --BEGIN ERROR HANDLING-- */
+ if (ret != 0 ) {
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ ADIOI_ZOIDFS_error_convert(ret),
+ "Error in zoidfs_finalize", 0);
+ return;
+ }
+ /* --END ERROR HANDLING-- */
+
+ *error_code = MPI_SUCCESS;
+}
+
+int ADIOI_ZOIDFS_End_call(MPI_Comm comm, int keyval,
+ void *attribute_val, void *extra_state)
+{
+ int error_code;
+ ADIOI_ZOIDFS_End(&error_code);
+ MPI_Keyval_free(&keyval);
+ return error_code;
+}
+
+void ADIOI_ZOIDFS_Init(int rank, int *error_code )
+{
+ int ret;
+ static char myname[] = "ADIOI_ZOIDFS_INIT";
+
+ /* do nothing if we've already fired up the zoidfs interface */
+ if (ADIOI_ZOIDFS_Initialized != MPI_KEYVAL_INVALID) {
+ *error_code = MPI_SUCCESS;
+ return;
+ }
+
+ ret = zoidfs_init();
+ if (ret < 0 ) {
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ ADIOI_ZOIDFS_error_convert(ret),
+ "Error in zoidfs_init",
+ 0);
+ return;
+ }
+
+ MPI_Keyval_create(MPI_NULL_COPY_FN, ADIOI_ZOIDFS_End_call,
+ &ADIOI_ZOIDFS_Initialized, (void *)0);
+ /* just like romio does, we make a dummy attribute so we
+ * get cleaned up */
+ MPI_Attr_put(MPI_COMM_SELF, ADIOI_ZOIDFS_Initialized, (void *)0);
+}
+
+void ADIOI_ZOIDFS_makeattribs(zoidfs_sattr_t * attribs)
+{
+ memset(attribs, 0, sizeof(zoidfs_sattr_t));
+
+ attribs->mask = ZOIDFS_ATTR_MODE;
+ attribs->mode = 0644;
+}
+
+int ADIOI_ZOIDFS_error_convert(int error)
+{
+ switch (error)
+ {
+ case ZFSERR_PERM: /* ??? */
+ case ZFSERR_ACCES:
+ return MPI_ERR_ACCESS;
+ case ZFSERR_NOENT:
+ case ZFSERR_NXIO: /* ??? */
+ case ZFSERR_NODEV: /* ??? */
+ return MPI_ERR_NO_SUCH_FILE;
+ case ZFSERR_IO:
+ return MPI_ERR_IO;
+ case ZFSERR_EXIST:
+ return MPI_ERR_FILE_EXISTS;
+ case ZFSERR_NOTDIR: /* ??? */
+ case ZFSERR_ISDIR: /* ??? */
+ case ZFSERR_NAMETOOLONG:
+ return MPI_ERR_BAD_FILE;
+ case ZFSERR_INVAL:
+ case ZFSERR_STALE:
+ return MPI_ERR_FILE;
+ case ZFSERR_FBIG: /* ??? */
+ case ZFSERR_NOSPC:
+ return MPI_ERR_NO_SPACE;
+ case ZFSERR_ROFS:
+ return MPI_ERR_READ_ONLY;
+ case ZFSERR_NOTIMPL:
+ return MPI_ERR_UNSUPPORTED_OPERATION;
+ case ZFSERR_DQUOT:
+ return MPI_ERR_QUOTA;
+ /* case ZFSERR_NOTEMPTY: */
+ /* case ZFSERR_WFLUSH: */
+ /* case ZFSERR_OTHER: */
+ case ZFSERR_NOMEM:
+ return MPI_ERR_INTERN;
+ default:
+ return MPI_UNDEFINED;
+ }
+}
+
+/*
+ * vim: ts=8 sts=4 sw=4 noexpandtab
+ */
diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_common.h b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_common.h
new file mode 100644
index 0000000000..b519f791dd
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_common.h
@@ -0,0 +1,43 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*-
+ * vim: ts=8 sts=4 sw=4 noexpandtab
+ *
+ * Copyright (C) 1997 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ */
+
+#ifndef _AD_ZOIDFS_COMMON_H
+#define _AD_ZOIDFS_COMMON_H
+#include "ad_zoidfs.h"
+
+/* The ESTALE problem:
+ * The IO forwarding protocol can respond to any call with ESTALE, which means
+ * the handle upon which that call operates has expired from the metadata
+ * cache. We thus wrap any zoidfs routine (expr) in this macro.
+ *
+ * ROMIO stores the filename in the ADIOI_File structrue (fd), so we can always
+ * re-lookup in response to ESTALE */
+
+#define NO_STALE(ret, fd, handle_p, expr) \
+ do { \
+ (ret) = (expr); \
+ while ((ret) == ZFSERR_STALE) { \
+ /* lookup again */ \
+ (ret) = zoidfs_lookup(NULL, NULL, (fd)->filename, \
+ (zoidfs_handle_t*)((fd)->fs_ptr), ZOIDFS_NO_OP_HINT); \
+ if ((ret) == ZFS_OK) { \
+ *((ADIOI_ZOIDFS_object*)handle_p) \
+ = *((ADIOI_ZOIDFS_object*)((fd)->fs_ptr)); \
+ /* re-execute the expr with new handle */ \
+ (ret) = (expr); \
+ } \
+ } \
+ } while (0)
+
+void ADIOI_ZOIDFS_Init(int rank, int *error_code );
+void ADIOI_ZOIDFS_makeattribs(zoidfs_sattr_t * attribs);
+void ADIOI_ZOIDFS_End(int *error_code);
+int ADIOI_ZOIDFS_End_call(MPI_Comm comm, int keyval,
+ void *attribute_val, void *extra_state);
+int ADIOI_ZOIDFS_error_convert(int error);
+
+#endif
diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_delete.c b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_delete.c
new file mode 100644
index 0000000000..58d3bc0bb9
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_delete.c
@@ -0,0 +1,45 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *
+ * Copyright (C) 2003 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ */
+
+#include "ad_zoidfs.h"
+#include "adio.h"
+
+#include "ad_zoidfs_common.h"
+
+void ADIOI_ZOIDFS_Delete(char *filename, int *error_code)
+{
+ int ret;
+ static char myname[] = "ADIOI_ZOIDFS_DELETE";
+
+ ADIOI_ZOIDFS_Init(0, error_code);
+ /* --BEGIN ERROR HANDLING-- */
+ if (*error_code != MPI_SUCCESS)
+ {
+ /* ADIOI_ZOIDFS_INIT handles creating error codes itself */
+ return;
+ }
+ /* --END ERROR HANDLING-- */
+
+ ret = zoidfs_remove(NULL, NULL, filename, NULL, ZOIDFS_NO_OP_HINT);
+ /* --BEGIN ERROR HANDLING-- */
+ if (ret != ZFS_OK) {
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ ADIOI_ZOIDFS_error_convert(ret),
+ "Error in zoidfs_remove", 0);
+ return;
+ }
+ /* --END ERROR HANDLING-- */
+
+ *error_code = MPI_SUCCESS;
+ return;
+}
+
+/*
+ * vim: ts=8 sts=4 sw=4 noexpandtab
+ */
diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_fcntl.c b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_fcntl.c
new file mode 100644
index 0000000000..04cd2b8f9c
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_fcntl.c
@@ -0,0 +1,60 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *
+ * Copyright (C) 1997 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ */
+
+#include "ad_zoidfs.h"
+#include "adio_extern.h"
+#include "ad_zoidfs_common.h"
+
+void ADIOI_ZOIDFS_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct,
+ int *error_code)
+{
+ int ret;
+ zoidfs_attr_t attr;
+ ADIOI_ZOIDFS_object *zoidfs_obj_ptr;
+ static char myname[] = "ADIOI_ZOIDFS_FCNTL";
+
+ zoidfs_obj_ptr = (ADIOI_ZOIDFS_object*)fd->fs_ptr;
+
+ switch(flag) {
+ case ADIO_FCNTL_GET_FSIZE:
+ attr.mask = ZOIDFS_ATTR_SIZE;
+ NO_STALE(ret, fd, zoidfs_obj_ptr,
+ zoidfs_getattr(zoidfs_obj_ptr, &attr, ZOIDFS_NO_OP_HINT));
+ if ( !(attr.mask & ZOIDFS_ATTR_SIZE) || (ret != ZFS_OK ) ) {
+ /* --BEGIN ERROR HANDLING-- */
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ ADIOI_ZOIDFS_error_convert(ret),
+ "Error in zoidfs_getattr", 0);
+ /* --END ERROR HANDLING-- */
+ }
+ else {
+ *error_code = MPI_SUCCESS;
+ }
+ fcntl_struct->fsize = attr.size;
+ return;
+
+ case ADIO_FCNTL_SET_DISKSPACE:
+ ADIOI_GEN_Prealloc(fd, fcntl_struct->diskspace, error_code);
+ break;
+
+ /* --BEGIN ERROR HANDLING-- */
+ case ADIO_FCNTL_SET_ATOMICITY:
+ default:
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ MPI_ERR_ARG,
+ "**flag", "**flag %d", flag);
+ /* --END ERROR HANDLING-- */
+ }
+}
+
+/*
+ * vim: ts=8 sts=4 sw=4 noexpandtab
+ */
diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_features.c b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_features.c
new file mode 100644
index 0000000000..892e4ca548
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_features.c
@@ -0,0 +1,16 @@
+#include "adio.h"
+#include "ad_zoidfs.h"
+
+int ADIOI_ZOIDFS_Feature(ADIO_File fd, int flag)
+{
+ switch(flag) {
+ case ADIO_SCALABLE_OPEN:
+ return 1;
+ case ADIO_SHARED_FP:
+ case ADIO_LOCKS:
+ case ADIO_SEQUENTIAL:
+ case ADIO_DATA_SIEVING_WRITES:
+ default:
+ return 0;
+ }
+}
diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_flush.c b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_flush.c
new file mode 100644
index 0000000000..6191dada1d
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_flush.c
@@ -0,0 +1,52 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *
+ * Copyright (C) 1997 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ */
+
+#include "ad_zoidfs.h"
+#include "ad_zoidfs_common.h"
+
+/* we want to be a bit clever here: at scale, if every client sends a
+ * flush request, it will stress the file system with redundant
+ * commit requests. Instead, one process should wait for
+ * everyone to catch up, do the sync, then broadcast the result.
+ */
+
+void ADIOI_ZOIDFS_Flush(ADIO_File fd, int *error_code)
+{
+ int ret, rank, dummy=0, dummy_in=0;
+ ADIOI_ZOIDFS_object *zoidfs_obj_ptr;
+ static char myname[] = "ADIOI_ZOIDFS_FLUSH";
+
+ *error_code = MPI_SUCCESS;
+
+ zoidfs_obj_ptr = (ADIOI_ZOIDFS_object*)fd->fs_ptr;
+
+ MPI_Comm_rank(fd->comm, &rank);
+
+ /* collective call to ensure no outstanding write requests. reduce is
+ * slightly less expensvie than barrier */
+ MPI_Reduce(&dummy_in, &dummy, 1, MPI_INT, MPI_SUM,
+ fd->hints->ranklist[0], fd->comm);
+
+ if (rank == fd->hints->ranklist[0]) {
+ ret = zoidfs_commit(zoidfs_obj_ptr, ZOIDFS_NO_OP_HINT);
+ }
+ MPI_Bcast(&ret, 1, MPI_INT, fd->hints->ranklist[0], fd->comm);
+
+ /* --BEGIN ERROR HANDLING-- */
+ if (ret != 0) {
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ ADIOI_ZOIDFS_error_convert(ret),
+ "Error in zoidfs_commit", 0);
+ }
+ /* --END ERROR HANDLING-- */
+}
+
+/*
+ * vim: ts=8 sts=4 sw=4 noexpandtab
+ */
diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_io.c b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_io.c
new file mode 100644
index 0000000000..f785d0da84
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_io.c
@@ -0,0 +1,95 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*-
+ *
+ * Copyright (C) 1997 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ */
+
+#include "adio.h"
+#include "adio_extern.h"
+#include "ad_zoidfs.h"
+
+#include "ad_zoidfs_common.h"
+
+#define ZOIDFS_READ 0
+#define ZOIDFS_WRITE 1
+
+static void ZOIDFS_IOContig(ADIO_File fd, void * buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status,
+ int flag, int *error_code)
+{
+ int ret, datatype_size;
+ uint64_t file_len;
+ size_t mem_len;
+ ADIOI_ZOIDFS_object *zoidfs_obj_ptr;
+ uint64_t file_offset = offset;
+ static char myname[] = "ADIOI_ZOIDFS_IOCONTIG";
+
+ zoidfs_obj_ptr = (ADIOI_ZOIDFS_object*)fd->fs_ptr;
+
+ MPI_Type_size(datatype, &datatype_size);
+ file_len = mem_len = datatype_size * count;
+
+ if (file_ptr_type == ADIO_INDIVIDUAL) {
+ file_offset = fd->fp_ind;
+ }
+
+ if (flag == ZOIDFS_READ) {
+ NO_STALE(ret, fd, zoidfs_obj_ptr,
+ zoidfs_read(zoidfs_obj_ptr,
+ 1, &buf, &mem_len,
+ 1, &file_offset, &file_len, ZOIDFS_NO_OP_HINT));
+ } else {
+ NO_STALE(ret, fd, zoidfs_obj_ptr,
+ zoidfs_write(zoidfs_obj_ptr,
+ 1, (const void **)&buf, &mem_len,
+ 1, &file_offset, &file_len, ZOIDFS_NO_OP_HINT));
+ }
+ /* --BEGIN ERROR HANDLING-- */
+ if (ret != ZFS_OK ) {
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ ADIOI_ZOIDFS_error_convert(ret),
+ "Error in ZOIDFS I/O", 0);
+ goto fn_exit;
+ }
+ /* --END ERROR HANDLING-- */
+
+ if (file_ptr_type == ADIO_INDIVIDUAL) {
+ fd->fp_ind += file_len;
+ }
+ fd->fp_sys_posn = file_offset + file_len;
+
+#ifdef HAVE_STATUS_SET_BYTES
+ MPIR_Status_set_bytes(status, datatype, file_len);
+#endif
+
+ *error_code = MPI_SUCCESS;
+
+fn_exit:
+ return;
+}
+
+void ADIOI_ZOIDFS_ReadContig(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status,
+ int *error_code)
+{
+ ZOIDFS_IOContig(fd, buf, count, datatype, file_ptr_type,
+ offset, status, ZOIDFS_READ, error_code);
+}
+
+void ADIOI_ZOIDFS_WriteContig(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status,
+ int *error_code)
+{
+ ZOIDFS_IOContig(fd, buf, count, datatype, file_ptr_type,
+ offset, status, ZOIDFS_WRITE, error_code);
+}
+
+
+/*
+ * vim: ts=8 sts=4 sw=4 noexpandtab
+ */
diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_open.c b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_open.c
new file mode 100644
index 0000000000..f0d5484223
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_open.c
@@ -0,0 +1,153 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*-
+ * vim: ts=8 sts=4 sw=4 noexpandtab
+ *
+ * Copyright (C) 2007 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ */
+
+#include "ad_zoidfs.h"
+#include "ad_zoidfs_common.h"
+
+/* open_status is helpful for bcasting values around */
+struct open_status_s {
+ int error;
+ zoidfs_handle_t handle;
+};
+typedef struct open_status_s open_status;
+
+static void fake_an_open(char *fname, int access_mode,
+ int nr_datafiles, MPI_Offset strip_size,
+ ADIOI_ZOIDFS_object *zoidfs_ptr,
+ open_status *o_status)
+{
+ int ret, created;
+ zoidfs_sattr_t attribs;
+ zoidfs_handle_t handle;
+
+ ADIOI_ZOIDFS_makeattribs(&attribs);
+
+ /* zoidfs_create succeeds even if a file already exists, so we can do
+ * our job with fewer calls than in other cases. However, we need to
+ * be careful with ADIO_EXCL.
+ */
+ if (access_mode & ADIO_CREATE) {
+ ret = zoidfs_create(NULL, NULL,
+ fname, &attribs, &handle, &created, ZOIDFS_NO_OP_HINT);
+ if ((ret == ZFS_OK) && !created && (access_mode & ADIO_EXCL)) {
+ /* lookup should not succeed if opened with EXCL */
+ o_status->error = ZFSERR_EXIST;
+ return;
+ }
+ }
+ else {
+ ret = zoidfs_lookup(NULL, NULL, fname, &handle, ZOIDFS_NO_OP_HINT);
+ }
+
+ o_status->error = ret;
+ o_status->handle = handle;
+ return;
+}
+
+
+/* ADIOI_ZOIDFS_Open:
+ * one process opens (or creates) the file, then broadcasts the result to the
+ * remaining processors.
+ *
+ * ADIO_Open used to perform an optimization when MPI_MODE_CREATE (and before
+ * that, MPI_MODE_EXCL) was set. Because ZoidFS handles file lookup and
+ * creation more scalably than traditional file systems, ADIO_Open now skips any
+ * special handling when CREATE is set. */
+void ADIOI_ZOIDFS_Open(ADIO_File fd, int *error_code)
+{
+ int rank;
+ static char myname[] = "ADIOI_ZOIDFS_OPEN";
+ ADIOI_ZOIDFS_object *zoidfs_obj_ptr;
+
+ /* since one process is doing the open, that means one process is also
+ * doing the error checking. define a struct for both the object reference
+ * and the error code to broadcast to all the processors */
+
+ open_status o_status;
+ MPI_Datatype open_status_type;
+ MPI_Datatype types[2] = {MPI_INT, MPI_BYTE};
+ int lens[2] = {1, sizeof(ADIOI_ZOIDFS_object)};
+ MPI_Aint offsets[2];
+
+ memset(&o_status, 0, sizeof(o_status));
+ zoidfs_obj_ptr = (ADIOI_ZOIDFS_object *)
+ ADIOI_Malloc(sizeof(ADIOI_ZOIDFS_object));
+ /* --BEGIN ERROR HANDLING-- */
+ if (zoidfs_obj_ptr == NULL) {
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ MPI_ERR_UNKNOWN,
+ "Error allocating memory", 0);
+ return;
+ }
+ /* --END ERROR HANDLING-- */
+
+ MPI_Comm_rank(fd->comm, &rank);
+
+ ADIOI_ZOIDFS_Init(rank, error_code);
+ if (*error_code != MPI_SUCCESS)
+ {
+ /* ADIOI_ZOIDFS_INIT handles creating error codes on its own */
+ ADIOI_Free(zoidfs_obj_ptr);
+ return;
+ }
+
+ /* one process resolves name and will later bcast to others */
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event( ADIOI_MPE_open_a, 0, NULL );
+#endif
+ if (rank == fd->hints->ranklist[0] && fd->fs_ptr == NULL) {
+ fake_an_open(fd->filename, fd->access_mode,
+ fd->hints->striping_factor,
+ fd->hints->striping_unit,
+ zoidfs_obj_ptr, &o_status);
+ /* store credentials and object reference in fd */
+ *zoidfs_obj_ptr = o_status.handle;
+ fd->fs_ptr = zoidfs_obj_ptr;
+ }
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event( ADIOI_MPE_open_b, 0, NULL );
+#endif
+
+ /* broadcast status and (possibly valid) object reference */
+ MPI_Address(&o_status.error, &offsets[0]);
+ MPI_Address(&o_status.handle, &offsets[1]);
+
+ MPI_Type_struct(2, lens, offsets, types, &open_status_type);
+ MPI_Type_commit(&open_status_type);
+
+ /* Assertion: if we hit this Bcast, then all processes collectively
+ * called this open.
+ *
+ * That's because deferred open never happens with this fs.
+ */
+ MPI_Bcast(MPI_BOTTOM, 1, open_status_type, fd->hints->ranklist[0],
+ fd->comm);
+ MPI_Type_free(&open_status_type);
+
+ /* --BEGIN ERROR HANDLING-- */
+ if (o_status.error != ZFS_OK)
+ {
+ ADIOI_Free(zoidfs_obj_ptr);
+ fd->fs_ptr = NULL;
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ ADIOI_ZOIDFS_error_convert(o_status.error),
+ "Unknown error", 0);
+ /* TODO: FIX STRING */
+ return;
+ }
+ /* --END ERROR HANDLING-- */
+
+ *zoidfs_obj_ptr = o_status.handle;
+ fd->fs_ptr = zoidfs_obj_ptr;
+
+ *error_code = MPI_SUCCESS;
+ return;
+}
diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_read_list.c b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_read_list.c
new file mode 100644
index 0000000000..ac3bf5d6bd
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_read_list.c
@@ -0,0 +1,826 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*-
+ * vim: ts=8 sts=4 sw=4 noexpandtab
+ *
+ * Copyright (C) 2008 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ */
+
+#include "adio.h"
+#include "adio_extern.h"
+#include "ad_zoidfs.h"
+
+#include "ad_zoidfs_common.h"
+
+/* Copied from ADIOI_PVFS2_OldReadStrided. It would be good to have fewer
+ * copies of this code... */
+void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status, int
+ *error_code)
+{
+ /* offset is in units of etype relative to the filetype. */
+ ADIOI_Flatlist_node *flat_buf, *flat_file;
+ int i, j, k, brd_size, frd_size=0, st_index=0;
+ int bufsize, sum, n_etypes_in_filetype, size_in_filetype;
+ int n_filetypes, etype_in_filetype;
+ ADIO_Offset abs_off_in_filetype=0;
+ int filetype_size, etype_size, buftype_size;
+ MPI_Aint filetype_extent, buftype_extent;
+ int buf_count, buftype_is_contig, filetype_is_contig;
+ ADIO_Offset off, disp, start_off, initial_off;
+ int flag, st_frd_size, st_n_filetypes;
+
+ size_t mem_list_count, file_list_count;
+ void ** mem_offsets;
+ uint64_t *file_offsets;
+ size_t *mem_lengths;
+ uint64_t *file_lengths;
+ int total_blks_to_read;
+
+ int max_mem_list, max_file_list;
+
+ int b_blks_read;
+ int f_data_read;
+ int size_read=0, n_read_lists, extra_blks;
+
+ int end_brd_size, end_frd_size;
+ int start_k, start_j, new_file_read, new_buffer_read;
+ int start_mem_offset;
+ ADIOI_ZOIDFS_object * zoidfs_obj_ptr;
+ int err_flag=0;
+ MPI_Offset total_bytes_read = 0;
+ static char myname[] = "ADIOI_ZOIDFS_ReadStrided";
+
+ /* note: I don't know what zoidfs will do if you pass it a super-long list,
+ * so let's keep with the PVFS limit for now */
+#define MAX_ARRAY_SIZE 64
+
+ *error_code = MPI_SUCCESS; /* changed below if error */
+
+ ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
+ ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
+
+ /* the HDF5 tests showed a bug in this list processing code (see many many
+ * lines down below). We added a workaround, but common HDF5 file types
+ * are actually contiguous and do not need the expensive workarond */
+ if (!filetype_is_contig) {
+ flat_file = ADIOI_Flatlist;
+ while (flat_file->type != fd->filetype) flat_file = flat_file->next;
+ if (flat_file->count == 1 && !buftype_is_contig)
+ filetype_is_contig = 1;
+ }
+
+ MPI_Type_size(fd->filetype, &filetype_size);
+ if ( ! filetype_size ) {
+ *error_code = MPI_SUCCESS;
+ return;
+ }
+
+ MPI_Type_extent(fd->filetype, &filetype_extent);
+ MPI_Type_size(datatype, &buftype_size);
+ MPI_Type_extent(datatype, &buftype_extent);
+ etype_size = fd->etype_size;
+
+ bufsize = buftype_size * count;
+
+ zoidfs_obj_ptr = (ADIOI_ZOIDFS_object *)fd->fs_ptr;
+
+ if (!buftype_is_contig && filetype_is_contig) {
+
+/* noncontiguous in memory, contiguous in file. */
+ uint64_t file_offsets;
+ uint64_t file_lengths;
+
+ ADIOI_Flatten_datatype(datatype);
+ flat_buf = ADIOI_Flatlist;
+ while (flat_buf->type != datatype) flat_buf = flat_buf->next;
+
+ off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
+ fd->disp + etype_size * offset;
+
+ file_list_count = 1;
+ file_offsets = off;
+ file_lengths = 0;
+ total_blks_to_read = count*flat_buf->count;
+ b_blks_read = 0;
+
+ /* allocate arrays according to max usage */
+ if (total_blks_to_read > MAX_ARRAY_SIZE)
+ mem_list_count = MAX_ARRAY_SIZE;
+ else mem_list_count = total_blks_to_read;
+ mem_offsets = (void*)ADIOI_Malloc(mem_list_count*sizeof(void*));
+ mem_lengths = (size_t*)ADIOI_Malloc(mem_list_count*sizeof(size_t));
+
+ /* TODO: CHECK RESULTS OF MEMORY ALLOCATION */
+
+ j = 0;
+ /* step through each block in memory, filling memory arrays */
+ while (b_blks_read < total_blks_to_read) {
+ for (i=0; icount; i++) {
+ mem_offsets[b_blks_read % MAX_ARRAY_SIZE] =
+ buf + j*buftype_extent + flat_buf->indices[i];
+ mem_lengths[b_blks_read % MAX_ARRAY_SIZE] =
+ flat_buf->blocklens[i];
+ file_lengths += flat_buf->blocklens[i];
+ b_blks_read++;
+ if (!(b_blks_read % MAX_ARRAY_SIZE) ||
+ (b_blks_read == total_blks_to_read)) {
+
+ /* in the case of the last read list call,
+ adjust mem_list_count */
+ if (b_blks_read == total_blks_to_read) {
+ mem_list_count = total_blks_to_read % MAX_ARRAY_SIZE;
+ /* in case last read list call fills max arrays */
+ if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE;
+ }
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
+#endif
+ NO_STALE(err_flag, fd, zoidfs_obj_ptr,
+ zoidfs_read(zoidfs_obj_ptr,
+ mem_list_count,
+ mem_offsets, mem_lengths,
+ 1, &file_offsets, &file_lengths, ZOIDFS_NO_OP_HINT));
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
+#endif
+ /* --BEGIN ERROR HANDLING-- */
+ if (err_flag != ZFS_OK) {
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ ADIOI_ZOIDFS_error_convert(err_flag),
+ "Error in zoidfs_read", 0);
+ goto error_state;
+ }
+ total_bytes_read += file_lengths;
+ /* --END ERROR HANDLING-- */
+
+ /* in the case of error or the last read list call,
+ * leave here */
+ if (err_flag || b_blks_read == total_blks_to_read) break;
+
+ file_offsets += file_lengths;
+ file_lengths = 0;
+ }
+ } /* for (i=0; icount; i++) */
+ j++;
+ } /* while (b_blks_read < total_blks_to_read) */
+ ADIOI_Free(mem_offsets);
+ ADIOI_Free(mem_lengths);
+
+ if (file_ptr_type == ADIO_INDIVIDUAL)
+ fd->fp_ind += total_bytes_read;
+
+ fd->fp_sys_posn = -1; /* set it to null. */
+
+#ifdef HAVE_STATUS_SET_BYTES
+ MPIR_Status_set_bytes(status, datatype, bufsize);
+ /* This isa temporary way of filling in status. The right way is to
+ keep tracke of how much data was actually read adn placed in buf
+ by ADIOI_BUFFERED_READ. */
+#endif
+ ADIOI_Delete_flattened(datatype);
+
+ return;
+ } /* if (!buftype_is_contig && filetype_is_contig) */
+
+ /* know file is noncontiguous from above */
+ /* noncontiguous in file */
+
+ /* filetype already flattened in ADIO_Open */
+ flat_file = ADIOI_Flatlist;
+ while (flat_file->type != fd->filetype) flat_file = flat_file->next;
+
+ disp = fd->disp;
+ initial_off = offset;
+
+
+ /* for each case - ADIO_Individual pointer or explicit, find the file
+ offset in bytes (offset), n_filetypes (how many filetypes into
+ file to start), frd_size (remaining amount of data in present
+ file block), and st_index (start point in terms of blocks in
+ starting filetype) */
+ if (file_ptr_type == ADIO_INDIVIDUAL) {
+ offset = fd->fp_ind; /* in bytes */
+ n_filetypes = -1;
+ flag = 0;
+ while (!flag) {
+ n_filetypes++;
+ for (i=0; icount; i++) {
+ if (disp + flat_file->indices[i] +
+ ((ADIO_Offset) n_filetypes)*filetype_extent +
+ flat_file->blocklens[i] >= offset) {
+ st_index = i;
+ frd_size = disp + flat_file->indices[i] +
+ ((ADIO_Offset) n_filetypes)*filetype_extent
+ + flat_file->blocklens[i] - offset;
+ flag = 1;
+ break;
+ }
+ }
+ } /* while (!flag) */
+ } /* if (file_ptr_type == ADIO_INDIVIDUAL) */
+ else {
+ n_etypes_in_filetype = filetype_size/etype_size;
+ n_filetypes = (int) (offset / n_etypes_in_filetype);
+ etype_in_filetype = (int) (offset % n_etypes_in_filetype);
+ size_in_filetype = etype_in_filetype * etype_size;
+
+ sum = 0;
+ for (i=0; icount; i++) {
+ sum += flat_file->blocklens[i];
+ if (sum > size_in_filetype) {
+ st_index = i;
+ frd_size = sum - size_in_filetype;
+ abs_off_in_filetype = flat_file->indices[i] +
+ size_in_filetype - (sum - flat_file->blocklens[i]);
+ break;
+ }
+ }
+
+ /* abs. offset in bytes in the file */
+ offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent +
+ abs_off_in_filetype;
+ } /* else [file_ptr_type != ADIO_INDIVIDUAL] */
+
+ start_off = offset;
+ st_frd_size = frd_size;
+ st_n_filetypes = n_filetypes;
+
+ if (buftype_is_contig && !filetype_is_contig) {
+
+/* contiguous in memory, noncontiguous in file. should be the most
+ common case. */
+
+ /* only one memory off-len pair, so no array here */
+ size_t mem_lengths;
+ size_t mem_offsets;
+
+ i = 0;
+ j = st_index;
+ n_filetypes = st_n_filetypes;
+
+ mem_list_count = 1;
+
+ /* determine how many blocks in file to read */
+ f_data_read = ADIOI_MIN(st_frd_size, bufsize);
+ total_blks_to_read = 1;
+ if (j < (flat_file->count-1)) j++;
+ else {
+ j = 0;
+ n_filetypes++;
+ }
+ while (f_data_read < bufsize) {
+ f_data_read += flat_file->blocklens[j];
+ total_blks_to_read++;
+ if (j<(flat_file->count-1)) j++;
+ else j = 0;
+ }
+
+ j = st_index;
+ n_filetypes = st_n_filetypes;
+ n_read_lists = total_blks_to_read/MAX_ARRAY_SIZE;
+ extra_blks = total_blks_to_read%MAX_ARRAY_SIZE;
+
+ mem_offsets = (size_t)buf;
+ mem_lengths = 0;
+
+ /* if at least one full readlist, allocate file arrays
+ at max array size and don't free until very end */
+ if (n_read_lists) {
+ file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
+ sizeof(int64_t));
+ file_lengths = (uint64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
+ sizeof(uint64_t));
+ }
+ /* if there's no full readlist allocate file arrays according
+ to needed size (extra_blks) */
+ else {
+ file_offsets = (int64_t*)ADIOI_Malloc(extra_blks*
+ sizeof(int64_t));
+ file_lengths = (uint64_t*)ADIOI_Malloc(extra_blks*
+ sizeof(uint64_t));
+ }
+
+ /* for file arrays that are of MAX_ARRAY_SIZE, build arrays */
+ for (i=0; iindices[j];
+ file_lengths[k] = flat_file->blocklens[j];
+ mem_lengths += file_lengths[k];
+ }
+ if (j<(flat_file->count - 1)) j++;
+ else {
+ j = 0;
+ n_filetypes++;
+ }
+ } /* for (k=0; kindices[j];
+ if (k == (extra_blks - 1)) {
+ file_lengths[k] = bufsize - mem_lengths
+ - mem_offsets + (size_t)buf;
+ }
+ else file_lengths[k] = flat_file->blocklens[j];
+ } /* if(i || k) */
+ mem_lengths += file_lengths[k];
+ if (j<(flat_file->count - 1)) j++;
+ else {
+ j = 0;
+ n_filetypes++;
+ }
+ } /* for (k=0; ktype != datatype) flat_buf = flat_buf->next;
+
+ size_read = 0;
+ n_filetypes = st_n_filetypes;
+ frd_size = st_frd_size;
+ brd_size = flat_buf->blocklens[0];
+ buf_count = 0;
+ start_mem_offset = 0;
+ start_k = k = 0;
+ start_j = st_index;
+ max_mem_list = 0;
+ max_file_list = 0;
+
+ /* run through and file max_file_list and max_mem_list so that you
+ can allocate the file and memory arrays less than MAX_ARRAY_SIZE
+ if possible */
+
+ while (size_read < bufsize) {
+ k = start_k;
+ new_buffer_read = 0;
+ mem_list_count = 0;
+ while ((mem_list_count < MAX_ARRAY_SIZE) &&
+ (new_buffer_read < bufsize-size_read)) {
+ /* find mem_list_count and file_list_count such that both are
+ less than MAX_ARRAY_SIZE, the sum of their lengths are
+ equal, and the sum of all the data read and data to be
+ read in the next immediate read list is less than
+ bufsize */
+ if(mem_list_count) {
+ if((new_buffer_read + flat_buf->blocklens[k] +
+ size_read) > bufsize) {
+ end_brd_size = new_buffer_read +
+ flat_buf->blocklens[k] - (bufsize - size_read);
+ new_buffer_read = bufsize - size_read;
+ }
+ else {
+ new_buffer_read += flat_buf->blocklens[k];
+ end_brd_size = flat_buf->blocklens[k];
+ }
+ }
+ else {
+ if (brd_size > (bufsize - size_read)) {
+ new_buffer_read = bufsize - size_read;
+ brd_size = new_buffer_read;
+ }
+ else new_buffer_read = brd_size;
+ }
+ mem_list_count++;
+ k = (k + 1)%flat_buf->count;
+ } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
+ (new_buffer_read < bufsize-size_read)) */
+ j = start_j;
+ new_file_read = 0;
+ file_list_count = 0;
+ while ((file_list_count < MAX_ARRAY_SIZE) &&
+ (new_file_read < new_buffer_read)) {
+ if(file_list_count) {
+ if((new_file_read + flat_file->blocklens[j]) >
+ new_buffer_read) {
+ end_frd_size = new_buffer_read - new_file_read;
+ new_file_read = new_buffer_read;
+ j--;
+ }
+ else {
+ new_file_read += flat_file->blocklens[j];
+ end_frd_size = flat_file->blocklens[j];
+ }
+ }
+ else {
+ if (frd_size > new_buffer_read) {
+ new_file_read = new_buffer_read;
+ frd_size = new_file_read;
+ }
+ else new_file_read = frd_size;
+ }
+ file_list_count++;
+ if (j < (flat_file->count - 1)) j++;
+ else j = 0;
+
+ k = start_k;
+ if ((new_file_read < new_buffer_read) &&
+ (file_list_count == MAX_ARRAY_SIZE)) {
+ new_buffer_read = 0;
+ mem_list_count = 0;
+ while (new_buffer_read < new_file_read) {
+ if(mem_list_count) {
+ if((new_buffer_read + flat_buf->blocklens[k]) >
+ new_file_read) {
+ end_brd_size = new_file_read - new_buffer_read;
+ new_buffer_read = new_file_read;
+ k--;
+ }
+ else {
+ new_buffer_read += flat_buf->blocklens[k];
+ end_brd_size = flat_buf->blocklens[k];
+ }
+ }
+ else {
+ new_buffer_read = brd_size;
+ if (brd_size > (bufsize - size_read)) {
+ new_buffer_read = bufsize - size_read;
+ brd_size = new_buffer_read;
+ }
+ }
+ mem_list_count++;
+ k = (k + 1)%flat_buf->count;
+ } /* while (new_buffer_read < new_file_read) */
+ } /* if ((new_file_read < new_buffer_read) && (file_list_count
+ == MAX_ARRAY_SIZE)) */
+ } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
+ (new_buffer_read < bufsize-size_read)) */
+
+ /* fakes filling the readlist arrays of lengths found above */
+ k = start_k;
+ j = start_j;
+ for (i=0; iblocklens[k] == end_brd_size)
+ brd_size = flat_buf->blocklens[(k+1)%
+ flat_buf->count];
+ else {
+ brd_size = flat_buf->blocklens[k] - end_brd_size;
+ k--;
+ buf_count--;
+ }
+ }
+ }
+ buf_count++;
+ k = (k + 1)%flat_buf->count;
+ } /* for (i=0; iblocklens[j] == end_frd_size)
+ frd_size = flat_file->blocklens[(j+1)%
+ flat_file->count];
+ else {
+ frd_size = flat_file->blocklens[j] - end_frd_size;
+ j--;
+ }
+ }
+ }
+ if (j < flat_file->count - 1) j++;
+ else {
+ j = 0;
+ n_filetypes++;
+ }
+ } /* for (i=0; iblocklens[0] ) ) ||
+ ((mem_list_count == 1) &&
+ (new_buffer_read < flat_buf->blocklens[0]) ) ||
+ ((file_list_count == MAX_ARRAY_SIZE) &&
+ (new_file_read < flat_buf->blocklens[0]) ) ||
+ ( (mem_list_count == MAX_ARRAY_SIZE) &&
+ (new_buffer_read < flat_file->blocklens[0])) )
+ {
+
+ ADIOI_Delete_flattened(datatype);
+ ADIOI_GEN_ReadStrided_naive(fd, buf, count, datatype,
+ file_ptr_type, initial_off, status, error_code);
+ return;
+ }
+
+ mem_offsets = (void *)ADIOI_Malloc(max_mem_list*sizeof(void *));
+ mem_lengths = (size_t*)ADIOI_Malloc(max_mem_list*sizeof(size_t));
+ file_offsets = (uint64_t *)ADIOI_Malloc(max_file_list*sizeof(uint64_t));
+ file_lengths = (uint64_t *)ADIOI_Malloc(max_file_list*sizeof(uint64_t));
+
+ size_read = 0;
+ n_filetypes = st_n_filetypes;
+ frd_size = st_frd_size;
+ brd_size = flat_buf->blocklens[0];
+ buf_count = 0;
+ start_mem_offset = 0;
+ start_k = k = 0;
+ start_j = st_index;
+
+ /* this section calculates mem_list_count and file_list_count
+ and also finds the possibly odd sized last array elements
+ in new_frd_size and new_brd_size */
+
+ while (size_read < bufsize) {
+ k = start_k;
+ new_buffer_read = 0;
+ mem_list_count = 0;
+ while ((mem_list_count < MAX_ARRAY_SIZE) &&
+ (new_buffer_read < bufsize-size_read)) {
+ /* find mem_list_count and file_list_count such that both are
+ less than MAX_ARRAY_SIZE, the sum of their lengths are
+ equal, and the sum of all the data read and data to be
+ read in the next immediate read list is less than
+ bufsize */
+ if(mem_list_count) {
+ if((new_buffer_read + flat_buf->blocklens[k] +
+ size_read) > bufsize) {
+ end_brd_size = new_buffer_read +
+ flat_buf->blocklens[k] - (bufsize - size_read);
+ new_buffer_read = bufsize - size_read;
+ }
+ else {
+ new_buffer_read += flat_buf->blocklens[k];
+ end_brd_size = flat_buf->blocklens[k];
+ }
+ }
+ else {
+ if (brd_size > (bufsize - size_read)) {
+ new_buffer_read = bufsize - size_read;
+ brd_size = new_buffer_read;
+ }
+ else new_buffer_read = brd_size;
+ }
+ mem_list_count++;
+ k = (k + 1)%flat_buf->count;
+ } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
+ (new_buffer_read < bufsize-size_read)) */
+ j = start_j;
+ new_file_read = 0;
+ file_list_count = 0;
+ while ((file_list_count < MAX_ARRAY_SIZE) &&
+ (new_file_read < new_buffer_read)) {
+ if(file_list_count) {
+ if((new_file_read + flat_file->blocklens[j]) >
+ new_buffer_read) {
+ end_frd_size = new_buffer_read - new_file_read;
+ new_file_read = new_buffer_read;
+ j--;
+ }
+ else {
+ new_file_read += flat_file->blocklens[j];
+ end_frd_size = flat_file->blocklens[j];
+ }
+ }
+ else {
+ if (frd_size > new_buffer_read) {
+ new_file_read = new_buffer_read;
+ frd_size = new_file_read;
+ }
+ else new_file_read = frd_size;
+ }
+ file_list_count++;
+ if (j < (flat_file->count - 1)) j++;
+ else j = 0;
+
+ k = start_k;
+ if ((new_file_read < new_buffer_read) &&
+ (file_list_count == MAX_ARRAY_SIZE)) {
+ new_buffer_read = 0;
+ mem_list_count = 0;
+ while (new_buffer_read < new_file_read) {
+ if(mem_list_count) {
+ if((new_buffer_read + flat_buf->blocklens[k]) >
+ new_file_read) {
+ end_brd_size = new_file_read - new_buffer_read;
+ new_buffer_read = new_file_read;
+ k--;
+ }
+ else {
+ new_buffer_read += flat_buf->blocklens[k];
+ end_brd_size = flat_buf->blocklens[k];
+ }
+ }
+ else {
+ new_buffer_read = brd_size;
+ if (brd_size > (bufsize - size_read)) {
+ new_buffer_read = bufsize - size_read;
+ brd_size = new_buffer_read;
+ }
+ }
+ mem_list_count++;
+ k = (k + 1)%flat_buf->count;
+ } /* while (new_buffer_read < new_file_read) */
+ } /* if ((new_file_read < new_buffer_read) && (file_list_count
+ == MAX_ARRAY_SIZE)) */
+ } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
+ (new_buffer_read < bufsize-size_read)) */
+
+ /* fills the allocated readlist arrays */
+ k = start_k;
+ j = start_j;
+ for (i=0; icount) +
+ flat_buf->indices[k];
+ if(!i) {
+ mem_lengths[0] = brd_size;
+ mem_offsets[0] += flat_buf->blocklens[k] - brd_size;
+ }
+ else {
+ if (i == (mem_list_count - 1)) {
+ mem_lengths[i] = end_brd_size;
+ if (flat_buf->blocklens[k] == end_brd_size)
+ brd_size = flat_buf->blocklens[(k+1)%
+ flat_buf->count];
+ else {
+ brd_size = flat_buf->blocklens[k] - end_brd_size;
+ k--;
+ buf_count--;
+ }
+ }
+ else {
+ mem_lengths[i] = flat_buf->blocklens[k];
+ }
+ }
+ buf_count++;
+ k = (k + 1)%flat_buf->count;
+ } /* for (i=0; iindices[j] +
+ ((ADIO_Offset)n_filetypes) * filetype_extent;
+ if (!i) {
+ file_lengths[0] = frd_size;
+ file_offsets[0] += flat_file->blocklens[j] - frd_size;
+ }
+ else {
+ if (i == (file_list_count - 1)) {
+ file_lengths[i] = end_frd_size;
+ if (flat_file->blocklens[j] == end_frd_size)
+ frd_size = flat_file->blocklens[(j+1)%
+ flat_file->count];
+ else {
+ frd_size = flat_file->blocklens[j] - end_frd_size;
+ j--;
+ }
+ }
+ else file_lengths[i] = flat_file->blocklens[j];
+ }
+ if (j < flat_file->count - 1) j++;
+ else {
+ j = 0;
+ n_filetypes++;
+ }
+ } /* for (i=0; ifp_ind = file_offsets[file_list_count-1]+
+ file_lengths[file_list_count-1];
+ }
+
+ ADIOI_Free(file_offsets);
+ ADIOI_Free(file_lengths);
+
+ if (err_flag == 0) *error_code = MPI_SUCCESS;
+
+error_state:
+ fd->fp_sys_posn = -1; /* set it to null. */
+
+#ifdef HAVE_STATUS_SET_BYTES
+ MPIR_Status_set_bytes(status, datatype, bufsize);
+ /* This is a temporary way of filling in status. The right way is to
+ keep track of how much data was actually read and placed in buf
+ by ADIOI_BUFFERED_READ. */
+#endif
+
+ if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
+}
+
diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_resize.c b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_resize.c
new file mode 100644
index 0000000000..de000f1e6e
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_resize.c
@@ -0,0 +1,53 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *
+ * Copyright (C) 1997 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ */
+
+#include "ad_zoidfs.h"
+#include "ad_zoidfs_common.h"
+
+/* as with flush, implement the resize operation in a scalable
+ * manner. one process does the work, then broadcasts the result to everyone
+ * else. fortunately, this operation is defined to be collective */
+void ADIOI_ZOIDFS_Resize(ADIO_File fd, ADIO_Offset size, int *error_code)
+{
+ int ret, rank;
+ ADIOI_ZOIDFS_object *zoidfs_obj_ptr;
+ static char myname[] = "ADIOI_ZOIDFS_RESIZE";
+
+ *error_code = MPI_SUCCESS;
+
+ zoidfs_obj_ptr = (ADIOI_ZOIDFS_object *)fd->fs_ptr;
+
+ MPI_Comm_rank(fd->comm, &rank);
+
+
+ /* MPI-IO semantics treat conflicting MPI_File_set_size requests the
+ * same as conflicting write requests. Thus, a resize from one
+ * process does not have to be visible to the other processes until a
+ * syncronization point is reached */
+
+ if (rank == fd->hints->ranklist[0]) {
+ NO_STALE(ret, fd, zoidfs_obj_ptr,
+ zoidfs_resize(zoidfs_obj_ptr, size, ZOIDFS_NO_OP_HINT));
+ MPI_Bcast(&ret, 1, MPI_INT, fd->hints->ranklist[0], fd->comm);
+ } else {
+ MPI_Bcast(&ret, 1, MPI_INT, fd->hints->ranklist[0], fd->comm);
+ }
+ /* --BEGIN ERROR HANDLING-- */
+ if (ret != ZFS_OK) {
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ ADIOI_ZOIDFS_error_convert(ret),
+ "Error in zoidfs_resize", 0);
+ return;
+ }
+ /* --END ERROR HANDLING-- */
+}
+
+/*
+ * vim: ts=8 sts=4 sw=4 noexpandtab
+ */
diff --git a/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_write_list.c b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_write_list.c
new file mode 100644
index 0000000000..5ecf0c82ed
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/ad_zoidfs/ad_zoidfs_write_list.c
@@ -0,0 +1,857 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*-
+ * vim: ts=8 sts=4 sw=4 noexpandtab
+ *
+ * Copyright (C) 2008 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ */
+
+#include "adio.h"
+#include "adio_extern.h"
+#include "ad_zoidfs.h"
+
+#include "ad_zoidfs_common.h"
+
+/* Copied from ADIOI_PVFS2_OldWriteStrided. It would be good to have fewer
+ * copies of this code... */
+void ADIOI_ZOIDFS_WriteStrided(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status,
+ int *error_code)
+{
+ /* as with all the other WriteStrided functions, offset is in units of
+ * etype relative to the filetype */
+
+ /* Since zoidfs does not support file locking, can't do buffered writes
+ as on Unix */
+
+ ADIOI_Flatlist_node *flat_buf, *flat_file;
+ int i, j, k, bwr_size, fwr_size=0, st_index=0;
+ int bufsize, sum, n_etypes_in_filetype, size_in_filetype;
+ int n_filetypes, etype_in_filetype;
+ ADIO_Offset abs_off_in_filetype=0;
+ int filetype_size, etype_size, buftype_size;
+ MPI_Aint filetype_extent, buftype_extent;
+ int buf_count, buftype_is_contig, filetype_is_contig;
+ ADIO_Offset off, disp, start_off, initial_off;
+ int flag, st_fwr_size, st_n_filetypes;
+ int err_flag=0;
+
+ size_t mem_list_count, file_list_count;
+ const void ** mem_offsets;
+ uint64_t *file_offsets;
+ size_t *mem_lengths;
+ uint64_t *file_lengths;
+ int total_blks_to_write;
+
+ int max_mem_list, max_file_list;
+
+ int b_blks_wrote;
+ int f_data_wrote;
+ int size_wrote=0, n_write_lists, extra_blks;
+
+ int end_bwr_size, end_fwr_size;
+ int start_k, start_j, new_file_write, new_buffer_write;
+ int start_mem_offset;
+ ADIOI_ZOIDFS_object *zoidfs_obj_ptr;
+ MPI_Offset total_bytes_written=0;
+ static char myname[] = "ADIOI_ZOIDFS_WRITESTRIDED";
+
+ /* note: I don't know what zoidfs will do if you pass it a super-long list,
+ * so let's keep with the PVFS limit for now */
+#define MAX_ARRAY_SIZE 64
+
+ /* --BEGIN ERROR HANDLING-- */
+ if (fd->atomicity) {
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ MPI_ERR_ARG,
+ "Atomic noncontiguous writes are not supported by ZOIDFS", 0);
+ return;
+ }
+ /* --END ERROR HANDLING-- */
+
+ ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
+ ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
+
+ /* the HDF5 tests showed a bug in this list processing code (see many many
+ * lines down below). We added a workaround, but common HDF5 file types
+ * are actually contiguous and do not need the expensive workarond */
+ if (!filetype_is_contig) {
+ flat_file = ADIOI_Flatlist;
+ while (flat_file->type != fd->filetype) flat_file = flat_file->next;
+ if (flat_file->count == 1 && !buftype_is_contig)
+ filetype_is_contig = 1;
+ }
+
+ MPI_Type_size(fd->filetype, &filetype_size);
+ if ( ! filetype_size ) {
+ *error_code = MPI_SUCCESS;
+ return;
+ }
+
+ MPI_Type_extent(fd->filetype, &filetype_extent);
+ MPI_Type_size(datatype, &buftype_size);
+ MPI_Type_extent(datatype, &buftype_extent);
+ etype_size = fd->etype_size;
+
+ bufsize = buftype_size * count;
+
+ zoidfs_obj_ptr = (ADIOI_ZOIDFS_object*)fd->fs_ptr;
+
+ if (!buftype_is_contig && filetype_is_contig) {
+
+/* noncontiguous in memory, contiguous in file. */
+ uint64_t file_offsets;
+ uint64_t file_lengths;
+
+ ADIOI_Flatten_datatype(datatype);
+ flat_buf = ADIOI_Flatlist;
+ while (flat_buf->type != datatype) flat_buf = flat_buf->next;
+
+ if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
+ off = fd->disp + etype_size * offset;
+ }
+ else off = fd->fp_ind;
+
+ file_list_count = 1;
+ file_offsets = off;
+ file_lengths = 0;
+ total_blks_to_write = count*flat_buf->count;
+ b_blks_wrote = 0;
+
+ /* allocate arrays according to max usage */
+ if (total_blks_to_write > MAX_ARRAY_SIZE)
+ mem_list_count = MAX_ARRAY_SIZE;
+ else mem_list_count = total_blks_to_write;
+ mem_offsets = (void*)ADIOI_Malloc(mem_list_count*sizeof(void*));
+ mem_lengths = (size_t*)ADIOI_Malloc(mem_list_count*sizeof(size_t));
+
+ j = 0;
+ /* step through each block in memory, filling memory arrays */
+ while (b_blks_wrote < total_blks_to_write) {
+ for (i=0; icount; i++) {
+ mem_offsets[b_blks_wrote % MAX_ARRAY_SIZE] =
+ buf +
+ j*buftype_extent +
+ flat_buf->indices[i];
+ mem_lengths[b_blks_wrote % MAX_ARRAY_SIZE] =
+ flat_buf->blocklens[i];
+ file_lengths += flat_buf->blocklens[i];
+ b_blks_wrote++;
+ if (!(b_blks_wrote % MAX_ARRAY_SIZE) ||
+ (b_blks_wrote == total_blks_to_write)) {
+
+ /* in the case of the last write list call,
+ adjust mem_list_count */
+ if (b_blks_wrote == total_blks_to_write) {
+ mem_list_count = total_blks_to_write % MAX_ARRAY_SIZE;
+ /* in case last write list call fills max arrays */
+ if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE;
+ }
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
+#endif
+ NO_STALE(err_flag, fd, zoidfs_obj_ptr,
+ zoidfs_write(zoidfs_obj_ptr,
+ mem_list_count,
+ mem_offsets, mem_lengths,
+ 1, &file_offsets, &file_lengths, ZOIDFS_NO_OP_HINT));
+
+ /* --BEGIN ERROR HANDLING-- */
+ if (err_flag != ZFS_OK) {
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ ADIOI_ZOIDFS_error_convert(err_flag),
+ "Error in zoidfs_write", 0);
+ break;
+ }
+#ifdef ADIOI_MPE_LOGGING
+ MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
+#endif
+ total_bytes_written += file_lengths;
+
+ /* in the case of error or the last write list call,
+ * leave here */
+ /* --BEGIN ERROR HANDLING-- */
+ if (err_flag) {
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS,
+ MPIR_ERR_RECOVERABLE,
+ myname, __LINE__,
+ ADIOI_ZOIDFS_error_convert(err_flag),
+ "Error in zoidfs_write", 0);
+ break;
+ }
+ /* --END ERROR HANDLING-- */
+ if (b_blks_wrote == total_blks_to_write) break;
+
+ file_offsets += file_lengths;
+ file_lengths = 0;
+ }
+ } /* for (i=0; icount; i++) */
+ j++;
+ } /* while (b_blks_wrote < total_blks_to_write) */
+ ADIOI_Free(mem_offsets);
+ ADIOI_Free(mem_lengths);
+
+ if (file_ptr_type == ADIO_INDIVIDUAL)
+ fd->fp_ind += total_bytes_written;
+
+ if (!err_flag) *error_code = MPI_SUCCESS;
+
+ fd->fp_sys_posn = -1; /* clear this. */
+
+#ifdef HAVE_STATUS_SET_BYTES
+ MPIR_Status_set_bytes(status, datatype, bufsize);
+/* This is a temporary way of filling in status. The right way is to
+ keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
+#endif
+
+ ADIOI_Delete_flattened(datatype);
+ return;
+ } /* if (!buftype_is_contig && filetype_is_contig) */
+
+ /* already know that file is noncontiguous from above */
+ /* noncontiguous in file */
+
+/* filetype already flattened in ADIO_Open */
+ flat_file = ADIOI_Flatlist;
+ while (flat_file->type != fd->filetype) flat_file = flat_file->next;
+
+ disp = fd->disp;
+ initial_off = offset;
+
+ /* for each case - ADIO_Individual pointer or explicit, find offset
+ (file offset in bytes), n_filetypes (how many filetypes into file
+ to start), fwr_size (remaining amount of data in present file
+ block), and st_index (start point in terms of blocks in starting
+ filetype) */
+ if (file_ptr_type == ADIO_INDIVIDUAL) {
+ offset = fd->fp_ind; /* in bytes */
+ n_filetypes = -1;
+ flag = 0;
+ while (!flag) {
+ n_filetypes++;
+ for (i=0; icount; i++) {
+ if (disp + flat_file->indices[i] +
+ ((ADIO_Offset) n_filetypes)*filetype_extent +
+ flat_file->blocklens[i] >= offset) {
+ st_index = i;
+ fwr_size = disp + flat_file->indices[i] +
+ ((ADIO_Offset) n_filetypes)*filetype_extent
+ + flat_file->blocklens[i] - offset;
+ flag = 1;
+ break;
+ }
+ }
+ } /* while (!flag) */
+ } /* if (file_ptr_type == ADIO_INDIVIDUAL) */
+ else {
+ n_etypes_in_filetype = filetype_size/etype_size;
+ n_filetypes = (int) (offset / n_etypes_in_filetype);
+ etype_in_filetype = (int) (offset % n_etypes_in_filetype);
+ size_in_filetype = etype_in_filetype * etype_size;
+
+ sum = 0;
+ for (i=0; icount; i++) {
+ sum += flat_file->blocklens[i];
+ if (sum > size_in_filetype) {
+ st_index = i;
+ fwr_size = sum - size_in_filetype;
+ abs_off_in_filetype = flat_file->indices[i] +
+ size_in_filetype - (sum - flat_file->blocklens[i]);
+ break;
+ }
+ }
+
+ /* abs. offset in bytes in the file */
+ offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent +
+ abs_off_in_filetype;
+ } /* else [file_ptr_type != ADIO_INDIVIDUAL] */
+
+ start_off = offset;
+ st_fwr_size = fwr_size;
+ st_n_filetypes = n_filetypes;
+
+ if (buftype_is_contig && !filetype_is_contig) {
+
+/* contiguous in memory, noncontiguous in file. should be the most
+ common case. */
+
+ /* only one memory off-len pair, so no array */
+ size_t mem_lengths;
+ size_t mem_offsets;
+
+ i = 0;
+ j = st_index;
+ off = offset;
+ n_filetypes = st_n_filetypes;
+
+ mem_list_count = 1;
+
+ /* determine how many blocks in file to write */
+ f_data_wrote = ADIOI_MIN(st_fwr_size, bufsize);
+ total_blks_to_write = 1;
+ if (j < (flat_file->count -1)) j++;
+ else {
+ j = 0;
+ n_filetypes++;
+ }
+ while (f_data_wrote < bufsize) {
+ f_data_wrote += flat_file->blocklens[j];
+ total_blks_to_write++;
+ if (j<(flat_file->count-1)) j++;
+ else j = 0;
+ }
+
+ j = st_index;
+ n_filetypes = st_n_filetypes;
+ n_write_lists = total_blks_to_write/MAX_ARRAY_SIZE;
+ extra_blks = total_blks_to_write%MAX_ARRAY_SIZE;
+
+ mem_offsets = (size_t)buf;
+ mem_lengths = 0;
+
+ /* if at least one full writelist, allocate file arrays
+ at max array size and don't free until very end */
+ if (n_write_lists) {
+ file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
+ sizeof(int64_t));
+ file_lengths = (uint64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
+ sizeof(uint64_t));
+ }
+ /* if there's no full writelist allocate file arrays according
+ to needed size (extra_blks) */
+ else {
+ file_offsets = (int64_t*)ADIOI_Malloc(extra_blks*
+ sizeof(int64_t));
+ file_lengths = (uint64_t*)ADIOI_Malloc(extra_blks*
+ sizeof(uint64_t));
+ }
+
+ /* for file arrays that are of MAX_ARRAY_SIZE, build arrays */
+ for (i=0; iindices[j];
+ file_lengths[k] = flat_file->blocklens[j];
+ mem_lengths += file_lengths[k];
+ }
+ if (j<(flat_file->count - 1)) j++;
+ else {
+ j = 0;
+ n_filetypes++;
+ }
+ } /* for (k=0; kindices[j];
+ /* XXX: double-check these casts */
+ if (k == (extra_blks - 1)) {
+ file_lengths[k] = bufsize
+ - mem_lengths - mem_offsets + (size_t)buf;
+ }
+ else file_lengths[k] = flat_file->blocklens[j];
+ } /* if(i || k) */
+ mem_lengths += file_lengths[k];
+ if (j<(flat_file->count - 1)) j++;
+ else {
+ j = 0;
+ n_filetypes++;
+ }
+ } /* for (k=0; ktype != datatype) flat_buf = flat_buf->next;
+
+ size_wrote = 0;
+ n_filetypes = st_n_filetypes;
+ fwr_size = st_fwr_size;
+ bwr_size = flat_buf->blocklens[0];
+ buf_count = 0;
+ start_mem_offset = 0;
+ start_k = k = 0;
+ start_j = st_index;
+ max_mem_list = 0;
+ max_file_list = 0;
+
+ /* run through and file max_file_list and max_mem_list so that you
+ can allocate the file and memory arrays less than MAX_ARRAY_SIZE
+ if possible */
+
+ while (size_wrote < bufsize) {
+ k = start_k;
+ new_buffer_write = 0;
+ mem_list_count = 0;
+ while ((mem_list_count < MAX_ARRAY_SIZE) &&
+ (new_buffer_write < bufsize-size_wrote)) {
+ /* find mem_list_count and file_list_count such that both are
+ less than MAX_ARRAY_SIZE, the sum of their lengths are
+ equal, and the sum of all the data written and data to be
+ written in the next immediate write list is less than
+ bufsize */
+ if(mem_list_count) {
+ if((new_buffer_write + flat_buf->blocklens[k] +
+ size_wrote) > bufsize) {
+ end_bwr_size = new_buffer_write +
+ flat_buf->blocklens[k] - (bufsize - size_wrote);
+ new_buffer_write = bufsize - size_wrote;
+ }
+ else {
+ new_buffer_write += flat_buf->blocklens[k];
+ end_bwr_size = flat_buf->blocklens[k];
+ }
+ }
+ else {
+ if (bwr_size > (bufsize - size_wrote)) {
+ new_buffer_write = bufsize - size_wrote;
+ bwr_size = new_buffer_write;
+ }
+ else new_buffer_write = bwr_size;
+ }
+ mem_list_count++;
+ k = (k + 1)%flat_buf->count;
+ } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
+ (new_buffer_write < bufsize-size_wrote)) */
+ j = start_j;
+ new_file_write = 0;
+ file_list_count = 0;
+ while ((file_list_count < MAX_ARRAY_SIZE) &&
+ (new_file_write < new_buffer_write)) {
+ if(file_list_count) {
+ if((new_file_write + flat_file->blocklens[j]) >
+ new_buffer_write) {
+ end_fwr_size = new_buffer_write - new_file_write;
+ new_file_write = new_buffer_write;
+ j--;
+ }
+ else {
+ new_file_write += flat_file->blocklens[j];
+ end_fwr_size = flat_file->blocklens[j];
+ }
+ }
+ else {
+ if (fwr_size > new_buffer_write) {
+ new_file_write = new_buffer_write;
+ fwr_size = new_file_write;
+ }
+ else new_file_write = fwr_size;
+ }
+ file_list_count++;
+ if (j < (flat_file->count - 1)) j++;
+ else j = 0;
+
+ k = start_k;
+ if ((new_file_write < new_buffer_write) &&
+ (file_list_count == MAX_ARRAY_SIZE)) {
+ new_buffer_write = 0;
+ mem_list_count = 0;
+ while (new_buffer_write < new_file_write) {
+ if(mem_list_count) {
+ if((new_buffer_write + flat_buf->blocklens[k]) >
+ new_file_write) {
+ end_bwr_size = new_file_write -
+ new_buffer_write;
+ new_buffer_write = new_file_write;
+ k--;
+ }
+ else {
+ new_buffer_write += flat_buf->blocklens[k];
+ end_bwr_size = flat_buf->blocklens[k];
+ }
+ }
+ else {
+ new_buffer_write = bwr_size;
+ if (bwr_size > (bufsize - size_wrote)) {
+ new_buffer_write = bufsize - size_wrote;
+ bwr_size = new_buffer_write;
+ }
+ }
+ mem_list_count++;
+ k = (k + 1)%flat_buf->count;
+ } /* while (new_buffer_write < new_file_write) */
+ } /* if ((new_file_write < new_buffer_write) &&
+ (file_list_count == MAX_ARRAY_SIZE)) */
+ } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
+ (new_buffer_write < bufsize-size_wrote)) */
+
+ /* fakes filling the writelist arrays of lengths found above */
+ k = start_k;
+ j = start_j;
+ for (i=0; iblocklens[k] == end_bwr_size)
+ bwr_size = flat_buf->blocklens[(k+1)%
+ flat_buf->count];
+ else {
+ bwr_size = flat_buf->blocklens[k] - end_bwr_size;
+ k--;
+ buf_count--;
+ }
+ }
+ }
+ buf_count++;
+ k = (k + 1)%flat_buf->count;
+ } /* for (i=0; iblocklens[j] == end_fwr_size)
+ fwr_size = flat_file->blocklens[(j+1)%
+ flat_file->count];
+ else {
+ fwr_size = flat_file->blocklens[j] - end_fwr_size;
+ j--;
+ }
+ }
+ }
+ if (j < flat_file->count - 1) j++;
+ else {
+ j = 0;
+ n_filetypes++;
+ }
+ } /* for (i=0; iblocklens[0] ) ) ||
+ ((mem_list_count == 1) &&
+ (new_buffer_write < flat_buf->blocklens[0]) ) ||
+ ((file_list_count == MAX_ARRAY_SIZE) &&
+ (new_file_write < flat_buf->blocklens[0]) ) ||
+ ( (mem_list_count == MAX_ARRAY_SIZE) &&
+ (new_buffer_write < flat_file->blocklens[0])) )
+ {
+ ADIOI_Delete_flattened(datatype);
+ ADIOI_GEN_WriteStrided_naive(fd, buf, count, datatype,
+ file_ptr_type, initial_off, status, error_code);
+ return;
+ }
+
+
+ mem_offsets = (void *)ADIOI_Malloc(max_mem_list*sizeof(void *));
+ mem_lengths = (size_t*)ADIOI_Malloc(max_mem_list*sizeof(size_t));
+ file_offsets = (uint64_t *)ADIOI_Malloc(max_file_list*sizeof(uint64_t));
+ file_lengths = (uint64_t*)ADIOI_Malloc(max_file_list*sizeof(uint64_t));
+
+ size_wrote = 0;
+ n_filetypes = st_n_filetypes;
+ fwr_size = st_fwr_size;
+ bwr_size = flat_buf->blocklens[0];
+ buf_count = 0;
+ start_mem_offset = 0;
+ start_k = k = 0;
+ start_j = st_index;
+
+ /* this section calculates mem_list_count and file_list_count
+ and also finds the possibly odd sized last array elements
+ in new_fwr_size and new_bwr_size */
+
+ while (size_wrote < bufsize) {
+ k = start_k;
+ new_buffer_write = 0;
+ mem_list_count = 0;
+ while ((mem_list_count < MAX_ARRAY_SIZE) &&
+ (new_buffer_write < bufsize-size_wrote)) {
+ /* find mem_list_count and file_list_count such that both are
+ less than MAX_ARRAY_SIZE, the sum of their lengths are
+ equal, and the sum of all the data written and data to be
+ written in the next immediate write list is less than
+ bufsize */
+ if(mem_list_count) {
+ if((new_buffer_write + flat_buf->blocklens[k] +
+ size_wrote) > bufsize) {
+ end_bwr_size = new_buffer_write +
+ flat_buf->blocklens[k] - (bufsize - size_wrote);
+ new_buffer_write = bufsize - size_wrote;
+ }
+ else {
+ new_buffer_write += flat_buf->blocklens[k];
+ end_bwr_size = flat_buf->blocklens[k];
+ }
+ }
+ else {
+ if (bwr_size > (bufsize - size_wrote)) {
+ new_buffer_write = bufsize - size_wrote;
+ bwr_size = new_buffer_write;
+ }
+ else new_buffer_write = bwr_size;
+ }
+ mem_list_count++;
+ k = (k + 1)%flat_buf->count;
+ } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
+ (new_buffer_write < bufsize-size_wrote)) */
+ j = start_j;
+ new_file_write = 0;
+ file_list_count = 0;
+ while ((file_list_count < MAX_ARRAY_SIZE) &&
+ (new_file_write < new_buffer_write)) {
+ if(file_list_count) {
+ if((new_file_write + flat_file->blocklens[j]) >
+ new_buffer_write) {
+ end_fwr_size = new_buffer_write - new_file_write;
+ new_file_write = new_buffer_write;
+ j--;
+ }
+ else {
+ new_file_write += flat_file->blocklens[j];
+ end_fwr_size = flat_file->blocklens[j];
+ }
+ }
+ else {
+ if (fwr_size > new_buffer_write) {
+ new_file_write = new_buffer_write;
+ fwr_size = new_file_write;
+ }
+ else new_file_write = fwr_size;
+ }
+ file_list_count++;
+ if (j < (flat_file->count - 1)) j++;
+ else j = 0;
+
+ k = start_k;
+ if ((new_file_write < new_buffer_write) &&
+ (file_list_count == MAX_ARRAY_SIZE)) {
+ new_buffer_write = 0;
+ mem_list_count = 0;
+ while (new_buffer_write < new_file_write) {
+ if(mem_list_count) {
+ if((new_buffer_write + flat_buf->blocklens[k]) >
+ new_file_write) {
+ end_bwr_size = new_file_write -
+ new_buffer_write;
+ new_buffer_write = new_file_write;
+ k--;
+ }
+ else {
+ new_buffer_write += flat_buf->blocklens[k];
+ end_bwr_size = flat_buf->blocklens[k];
+ }
+ }
+ else {
+ new_buffer_write = bwr_size;
+ if (bwr_size > (bufsize - size_wrote)) {
+ new_buffer_write = bufsize - size_wrote;
+ bwr_size = new_buffer_write;
+ }
+ }
+ mem_list_count++;
+ k = (k + 1)%flat_buf->count;
+ } /* while (new_buffer_write < new_file_write) */
+ } /* if ((new_file_write < new_buffer_write) &&
+ (file_list_count == MAX_ARRAY_SIZE)) */
+ } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
+ (new_buffer_write < bufsize-size_wrote)) */
+
+ /* fills the allocated writelist arrays */
+ k = start_k;
+ j = start_j;
+ for (i=0; icount) +
+ flat_buf->indices[k];
+
+ if(!i) {
+ mem_lengths[0] = bwr_size;
+ mem_offsets[0] += flat_buf->blocklens[k] - bwr_size;
+ }
+ else {
+ if (i == (mem_list_count - 1)) {
+ mem_lengths[i] = end_bwr_size;
+ if (flat_buf->blocklens[k] == end_bwr_size)
+ bwr_size = flat_buf->blocklens[(k+1)%
+ flat_buf->count];
+ else {
+ bwr_size = flat_buf->blocklens[k] - end_bwr_size;
+ k--;
+ buf_count--;
+ }
+ }
+ else {
+ mem_lengths[i] = flat_buf->blocklens[k];
+ }
+ }
+ buf_count++;
+ k = (k + 1)%flat_buf->count;
+ } /* for (i=0; iindices[j] +
+ ((ADIO_Offset)n_filetypes) * filetype_extent;
+ if (!i) {
+ file_lengths[0] = fwr_size;
+ file_offsets[0] += flat_file->blocklens[j] - fwr_size;
+ }
+ else {
+ if (i == (file_list_count - 1)) {
+ file_lengths[i] = end_fwr_size;
+ if (flat_file->blocklens[j] == end_fwr_size)
+ fwr_size = flat_file->blocklens[(j+1)%
+ flat_file->count];
+ else {
+ fwr_size = flat_file->blocklens[j] - end_fwr_size;
+ j--;
+ }
+ }
+ else file_lengths[i] = flat_file->blocklens[j];
+ }
+ if (j < flat_file->count - 1) j++;
+ else {
+ j = 0;
+ n_filetypes++;
+ }
+ } /* for (i=0; ifp_ind = file_offsets[file_list_count-1]+
+ file_lengths[file_list_count-1];
+ }
+ ADIOI_Free(file_offsets);
+ ADIOI_Free(file_lengths);
+
+ *error_code = MPI_SUCCESS;
+
+error_state:
+ fd->fp_sys_posn = -1; /* set it to null. */
+
+#ifdef HAVE_STATUS_SET_BYTES
+ MPIR_Status_set_bytes(status, datatype, bufsize);
+/* This is a temporary way of filling in status. The right way is to
+ keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
+#endif
+
+ if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
+}
diff --git a/ompi/mca/io/romio/romio/adio/common/Makefile.am b/ompi/mca/io/romio/romio/adio/common/Makefile.am
index 9dca94a663..eebfa5dd7c 100644
--- a/ompi/mca/io/romio/romio/adio/common/Makefile.am
+++ b/ompi/mca/io/romio/romio/adio/common/Makefile.am
@@ -26,24 +26,32 @@ include $(top_srcdir)/Makefile.options
noinst_LTLIBRARIES = libadio_common.la
libadio_common_la_SOURCES = \
ad_aggregate.c \
+ ad_aggregate_new.c \
ad_close.c \
+ ad_coll_build_req_new.c \
+ ad_coll_exch_new.c \
ad_darray.c \
ad_delete.c \
ad_done.c \
ad_done_fake.c \
ad_end.c \
+ ad_features.c \
ad_fcntl.c \
ad_flush.c \
ad_fstype.c \
ad_get_sh_fp.c \
ad_hints.c \
ad_init.c \
+ ad_io_coll.c \
ad_iopen.c \
ad_iread.c \
ad_iread_fake.c \
ad_iwrite.c \
ad_iwrite_fake.c \
ad_open.c \
+ ad_opencoll.c \
+ ad_opencoll_failsafe.c \
+ ad_opencoll_scalable.c \
ad_prealloc.c \
ad_read.c \
ad_read_coll.c \
@@ -69,6 +77,7 @@ libadio_common_la_SOURCES = \
flatten.c \
get_fp_posn.c \
greq_fns.c \
+ heap-sort.c \
iscontig.c \
lock.c \
malloc.c \
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_aggregate.c b/ompi/mca/io/romio/romio/adio/common/ad_aggregate.c
index 7252f50e2c..ab5d3636e4 100644
--- a/ompi/mca/io/romio/romio/adio/common/ad_aggregate.c
+++ b/ompi/mca/io/romio/romio/adio/common/ad_aggregate.c
@@ -7,6 +7,10 @@
#include "adio.h"
#include "adio_extern.h"
+#ifdef AGGREGATION_PROFILE
+#include "mpe.h"
+#endif
+
#undef AGG_DEBUG
/* This file contains four functions:
@@ -79,21 +83,26 @@ int ADIOI_Calc_aggregator(ADIO_File fd,
ADIOI_UNREFERENCED_ARG(fd_start);
-#ifdef AGG_DEBUG
-#if 0
- FPRINTF(stdout, "off = %lld, min_off = %lld, len = %lld, fd_size = %lld\n",
- off, min_off, *len, fd_size);
-#endif
-#endif
-
/* get an index into our array of aggregators */
rank_index = (int) ((off - min_off + fd_size)/ fd_size - 1);
+ if (fd->hints->striping_unit > 0) {
+ /* wkliao: implementation for file domain alignment
+ fd_start[] and fd_end[] have been aligned with file lock
+ boundaries when returned from ADIOI_Calc_file_domains() so cannot
+ just use simple arithmatic as above */
+ rank_index = 0;
+ while (off > fd_end[rank_index]) rank_index++;
+ }
+
/* we index into fd_end with rank_index, and fd_end was allocated to be no
* bigger than fd->hins->cb_nodes. If we ever violate that, we're
* overrunning arrays. Obviously, we should never ever hit this abort */
- if (rank_index >= fd->hints->cb_nodes)
- MPI_Abort(MPI_COMM_WORLD, 1);
+ if (rank_index >= fd->hints->cb_nodes || rank_index < 0) {
+ FPRINTF(stderr, "Error in ADIOI_Calc_aggregator(): rank_index(%d) >= fd->hints->cb_nodes (%d) fd_size=%lld off=%lld\n",
+ rank_index,fd->hints->cb_nodes,fd_size,off);
+ MPI_Abort(MPI_COMM_WORLD, 1);
+ }
/* remember here that even in Rajeev's original code it was the case that
* different aggregators could end up with different amounts of data to
@@ -119,19 +128,21 @@ void ADIOI_Calc_file_domains(ADIO_Offset *st_offsets, ADIO_Offset
*end_offsets, int nprocs, int nprocs_for_coll,
ADIO_Offset *min_st_offset_ptr,
ADIO_Offset **fd_start_ptr, ADIO_Offset
- **fd_end_ptr, ADIO_Offset *fd_size_ptr)
+ **fd_end_ptr, int min_fd_size,
+ ADIO_Offset *fd_size_ptr,
+ int striping_unit)
{
/* Divide the I/O workload among "nprocs_for_coll" processes. This is
done by (logically) dividing the file into file domains (FDs); each
process may directly access only its own file domain. */
- /* XXX: one idea: tweak the file domains so that no fd is smaller than
- * a threshold (one presumably well-suited to a file system). We don't
- * do that, but this routine would be the place for it */
-
ADIO_Offset min_st_offset, max_end_offset, *fd_start, *fd_end, fd_size;
int i;
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5004, 0, NULL);
+#endif
+
#ifdef AGG_DEBUG
FPRINTF(stderr, "ADIOI_Calc_file_domains: %d aggregator(s)\n",
nprocs_for_coll);
@@ -156,6 +167,14 @@ void ADIOI_Calc_file_domains(ADIO_Offset *st_offsets, ADIO_Offset
1)/nprocs_for_coll;
/* ceiling division as in HPF block distribution */
+ /* Tweak the file domains so that no fd is smaller than a threshold. We
+ * have to strike a balance between efficency and parallelism: somewhere
+ * between 10k processes sending 32-byte requests and one process sending a
+ * 320k request is a (system-dependent) sweet spot */
+
+ if (fd_size < min_fd_size)
+ fd_size = min_fd_size;
+
*fd_start_ptr = (ADIO_Offset *)
ADIOI_Malloc(nprocs_for_coll*sizeof(ADIO_Offset));
*fd_end_ptr = (ADIO_Offset *)
@@ -164,12 +183,46 @@ void ADIOI_Calc_file_domains(ADIO_Offset *st_offsets, ADIO_Offset
fd_start = *fd_start_ptr;
fd_end = *fd_end_ptr;
- fd_start[0] = min_st_offset;
- fd_end[0] = min_st_offset + fd_size - 1;
+ /* Wei-keng Liao: implementation for fild domain alignment to nearest file
+ * lock boundary (as specified by striping_unit hint). Could also
+ * experiment with other alignment strategies here */
+ if (striping_unit > 0) {
+ ADIO_Offset end_off;
+ int rem_front, rem_back;
- for (i=1; i
+/* #define DEBUG */
+
+void ADIOI_Calc_file_realms_user_size (ADIO_File fd, int fr_size,
+ int nprocs_for_coll,
+ ADIO_Offset *file_realm_st_offs,
+ MPI_Datatype *file_realm_types);
+void ADIOI_Calc_file_realms_aar (ADIO_File fd, int nprocs_for_coll,
+ int pfr_enabled,
+ ADIO_Offset min_st_offset,
+ ADIO_Offset max_end_offset,
+ ADIO_Offset *file_realm_st_offs,
+ MPI_Datatype *file_realm_types);
+void ADIOI_Calc_file_realms_fsize (ADIO_File fd,
+ int nprocs_for_coll,
+ ADIO_Offset max_end_offset,
+ ADIO_Offset *file_realm_st_offs,
+ MPI_Datatype *file_realm_types);
+void ADIOI_Create_fr_simpletype (int size, int nprocs_for_coll,
+ MPI_Datatype *simpletype);
+static void align_fr (int fr_size, ADIO_Offset fr_off, int alignment,
+ int *aligned_fr_size, ADIO_Offset *aligned_fr_off);
+void ADIOI_Verify_fr (int nprocs_for_coll, ADIO_Offset *file_realm_st_offs,
+ MPI_Datatype *file_realm_types);
+
+void ADIOI_Calc_file_realms (ADIO_File fd, ADIO_Offset min_st_offset,
+ ADIO_Offset max_end_offset)
+{
+ int nprocs_for_coll;
+ int file_realm_calc_type;
+
+ MPI_Datatype *file_realm_types = NULL;
+ ADIO_Offset *file_realm_st_offs = NULL;
+
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5004, 0, NULL);
+#endif
+#ifdef DEBUG
+ printf ("ADIOI_Calc_file_realms\n");
+#endif
+
+ nprocs_for_coll = fd->hints->cb_nodes;
+ file_realm_calc_type = fd->hints->cb_fr_type;
+
+ /* If PFRs are disabled we know these pointers are not allocated */
+ if (fd->hints->cb_pfr != ADIOI_HINT_ENABLE) {
+ fd->file_realm_st_offs = NULL;
+ fd->file_realm_types = NULL;
+ }
+
+ if (nprocs_for_coll == 1) {
+ /* if there's only one aggregator, we can reset the file
+ * realms every single time */
+ if (fd->file_realm_st_offs == NULL)
+ {
+ file_realm_st_offs = (ADIO_Offset *)
+ ADIOI_Malloc (sizeof(ADIO_Offset));
+ file_realm_types = (MPI_Datatype *)
+ ADIOI_Malloc (sizeof(MPI_Datatype));
+ }
+ else
+ {
+ file_realm_st_offs = fd->file_realm_st_offs;
+ file_realm_types = fd->file_realm_types;
+ }
+ *file_realm_st_offs = min_st_offset;
+ MPI_Type_contiguous ((max_end_offset - min_st_offset + 1), MPI_BYTE,
+ file_realm_types);
+ MPI_Type_commit (file_realm_types);
+ ADIOI_Add_contig_flattened (*file_realm_types);
+ }
+ else if (fd->file_realm_st_offs == NULL) {
+ file_realm_st_offs = (ADIO_Offset *)
+ ADIOI_Malloc (nprocs_for_coll * sizeof(ADIO_Offset));
+ file_realm_types = (MPI_Datatype *)
+ ADIOI_Malloc (nprocs_for_coll * sizeof(MPI_Datatype));
+
+ if (file_realm_calc_type == ADIOI_FR_AAR) {
+ ADIOI_Calc_file_realms_aar (fd, nprocs_for_coll,
+ fd->hints->cb_pfr,
+ min_st_offset, max_end_offset,
+ file_realm_st_offs, file_realm_types);
+ /* flatten file realm datatype for future use - only one
+ * because all are the same*/
+ ADIOI_Flatten_datatype (file_realm_types[0]);
+ }
+ else if (file_realm_calc_type == ADIOI_FR_FSZ) {
+ ADIOI_Calc_file_realms_fsize (fd, nprocs_for_coll, max_end_offset,
+ file_realm_st_offs,
+ file_realm_types);
+ /* flatten file realm datatype for future use - only one
+ * because all are the same*/
+ ADIOI_Flatten_datatype (file_realm_types[0]);
+ }
+ else if (file_realm_calc_type == ADIOI_FR_USR_REALMS) {
+ /* copy user provided realm datatypes and realm offsets in
+ * hints to file descriptor. may also want to verify that
+ * the provided file realms are covering (for pfr at
+ * least) and non-overlapping */
+ }
+ else if (file_realm_calc_type > 0) {
+ ADIOI_Calc_file_realms_user_size (fd, file_realm_calc_type,
+ nprocs_for_coll,
+ file_realm_st_offs,
+ file_realm_types);
+ /* flatten file realm datatype for future use - only one
+ * because all are the same */
+ ADIOI_Flatten_datatype (file_realm_types[0]);
+ }
+ }
+ fd->file_realm_st_offs = file_realm_st_offs;
+ fd->file_realm_types = file_realm_types;
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5005, 0, NULL);
+#endif
+}
+
+void ADIOI_Calc_file_realms_user_size (ADIO_File fd, int fr_size,
+ int nprocs_for_coll,
+ ADIO_Offset *file_realm_st_offs,
+ MPI_Datatype *file_realm_types)
+{
+ int i;
+ int aligned_fr_size;
+ ADIO_Offset aligned_fr_off;
+ MPI_Datatype simpletype;
+
+ align_fr(fr_size, 0, fd->hints->cb_fr_alignment, &aligned_fr_size,
+ &aligned_fr_off);
+ fr_size = aligned_fr_size;
+ ADIOI_Create_fr_simpletype (fr_size, nprocs_for_coll, &simpletype);
+
+ if (fd->hints->cb_pfr == ADIOI_HINT_ENABLE)
+ file_realm_st_offs[0] = 0;
+ else
+ file_realm_st_offs[0] = aligned_fr_off;
+ file_realm_types[0] = simpletype;
+#ifdef DEBUG
+ printf ("file_realm[0] = (%lld, %d)\n", file_realm_st_offs[0],
+ fr_size);
+#endif
+
+ for (i=1; i < nprocs_for_coll; i++)
+ {
+ file_realm_st_offs[i] = file_realm_st_offs[i-1] + fr_size;
+ file_realm_types[i] = simpletype;
+#ifdef DEBUG
+ printf ("file_realm[%d] = (%lld, %d)\n", i, file_realm_st_offs[i],
+ aligned_fr_size);
+#endif
+ }
+}
+
+/* takes an extra romio_cb_pfr param to decide whether file realms
+ * should start at byte 0 of the file*/
+void ADIOI_Calc_file_realms_aar (ADIO_File fd, int nprocs_for_coll, int cb_pfr,
+ ADIO_Offset min_st_offset,
+ ADIO_Offset max_end_offset,
+ ADIO_Offset *file_realm_st_offs,
+ MPI_Datatype *file_realm_types)
+{
+ int fr_size, aligned_fr_size, i;
+ MPI_Datatype simpletype;
+ ADIO_Offset aligned_start_off;
+ char value[9];
+
+ fr_size = (max_end_offset - min_st_offset + nprocs_for_coll) /
+ nprocs_for_coll;
+ align_fr(fr_size, min_st_offset, fd->hints->cb_fr_alignment,
+ &aligned_fr_size, &aligned_start_off);
+ fr_size = aligned_fr_size;
+ ADIOI_Create_fr_simpletype (fr_size, nprocs_for_coll, &simpletype);
+ if (cb_pfr == ADIOI_HINT_ENABLE)
+ file_realm_st_offs[0] = 0;
+ else
+ file_realm_st_offs[0] = aligned_start_off;
+ file_realm_types[0] = simpletype;
+
+#ifdef DEBUG
+ printf ("file_realm[0] = (%lld, %d)\n", file_realm_st_offs[0],
+ fr_size);
+#endif
+ for (i=1; i < nprocs_for_coll; i++)
+ {
+ file_realm_st_offs[i] = file_realm_st_offs[i-1] + fr_size;
+ file_realm_types[i] = simpletype;
+#ifdef DEBUG
+ printf ("file_realm[%d] = (%lld, %d)\n", i, file_realm_st_offs[i],
+ fr_size);
+#endif
+ }
+ if (fd->hints->cb_pfr == ADIOI_HINT_ENABLE) {
+ sprintf (value, "%d", fr_size);
+ ADIOI_Info_set (fd->info, "romio_cb_fr_type", value);
+ }
+}
+
+void ADIOI_Calc_file_realms_fsize (ADIO_File fd, int nprocs_for_coll,
+ ADIO_Offset max_end_offset,
+ ADIO_Offset *file_realm_st_offs,
+ MPI_Datatype *file_realm_types)
+{
+ int fr_size, aligned_fr_size, error_code, i;
+ int fsize;
+ ADIO_Offset aligned_fr_off;
+ ADIO_Fcntl_t fcntl_struct;
+ MPI_Datatype simpletype;
+
+ ADIO_Fcntl (fd, ADIO_FCNTL_GET_FSIZE, &fcntl_struct, &error_code);
+
+ /* use impending file size since a write call may lengthen the file */
+ fsize = ADIOI_MAX (fcntl_struct.fsize, max_end_offset+1);
+ fr_size = (fsize + nprocs_for_coll - 1) / nprocs_for_coll;
+ align_fr(fr_size, 0, fd->hints->cb_fr_alignment,
+ &aligned_fr_size, &aligned_fr_off);
+ ADIOI_Create_fr_simpletype (fr_size, nprocs_for_coll, &simpletype);
+
+ for (i=0; i < nprocs_for_coll; i++)
+ {
+ file_realm_st_offs[i] = fr_size * i;
+ file_realm_types[i] = simpletype;
+ }
+}
+
+/* creates a datatype with an empty trailing edge */
+void ADIOI_Create_fr_simpletype (int size, int nprocs_for_coll,
+ MPI_Datatype *simpletype)
+{
+ int count=2, blocklens[2];
+ MPI_Aint indices[2];
+ MPI_Datatype old_types[2];
+
+ blocklens[0] = size;
+ blocklens[1] = 1;
+ indices[0] = 0;
+ indices[1] = size*nprocs_for_coll;
+ old_types[0] = MPI_BYTE;
+ old_types[1] = MPI_UB;
+
+ MPI_Type_struct (count, blocklens, indices, old_types, simpletype);
+
+ MPI_Type_commit (simpletype);
+}
+
+/* Verify that file realms are covering (PFRs) and non-overlapping */
+void ADIOI_Verify_fr (int nprocs_for_coll, ADIO_Offset *file_realm_st_offs,
+ MPI_Datatype *file_realm_types)
+{
+}
+
+int ADIOI_Agg_idx (int rank, ADIO_File fd) {
+ int i, cb_nodes, *ranklist;
+ cb_nodes = fd->hints->cb_nodes;
+ ranklist = fd->hints->ranklist;
+
+ for (i=0; ihints && fd->hints->ranklist) ADIOI_Free(fd->hints->ranklist);
if (fd->hints && fd->hints->cb_config_list) ADIOI_Free(fd->hints->cb_config_list);
+
+ /* Persistent File Realms */
+ if (fd->hints->cb_pfr == ADIOI_HINT_ENABLE) {
+ /* AAR, FSIZE, and User provided uniform File realms */
+ if (1) {
+ ADIOI_Delete_flattened (fd->file_realm_types[0]);
+ MPI_Type_free (&fd->file_realm_types[0]);
+ }
+ else {
+ for (i=0; ihints->cb_nodes; i++) {
+ ADIOI_Datatype_iscontig(fd->file_realm_types[i], &is_contig);
+ if (!is_contig)
+ ADIOI_Delete_flattened(fd->file_realm_types[i]);
+ MPI_Type_free (&fd->file_realm_types[i]);
+ }
+ }
+ ADIOI_Free(fd->file_realm_st_offs);
+ ADIOI_Free(fd->file_realm_types);
+ }
if (fd->hints) ADIOI_Free(fd->hints);
+
+
+
MPI_Comm_free(&(fd->comm));
/* deferred open: if we created an aggregator communicator, free it */
if (fd->agg_comm != MPI_COMM_NULL) {
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_coll_build_req_new.c b/ompi/mca/io/romio/romio/adio/common/ad_coll_build_req_new.c
new file mode 100644
index 0000000000..3f3201bf1a
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/common/ad_coll_build_req_new.c
@@ -0,0 +1,2086 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *
+ * Copyright (C) 1997 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ */
+
+#include
+#include "adio.h"
+#include "adio_extern.h"
+#ifdef AGGREGATION_PROFILE
+#include "mpe.h"
+#endif
+#include "heap-sort.h"
+
+/*
+#define DEBUG1
+#define DEBUG2
+#define DEBUG3
+*/
+/* #define DEBUG_HEAP */
+
+#define DTYPE_SKIP
+
+#ifdef DEBUG3
+static char *off_type_name[MAX_OFF_TYPE] = {"TEMP_OFFSETS",
+ "REAL_OFFSETS"};
+#endif
+
+/* Simple function to return the size of the view_state. */
+static inline ADIO_Offset view_state_get_cur_sz(view_state *tmp_view_state_p,
+ int op_type)
+{
+ flatten_state *tmp_state_p = NULL;
+ switch(op_type)
+ {
+ case TEMP_OFF:
+ tmp_state_p = &(tmp_view_state_p->tmp_state);
+ break;
+ case REAL_OFF:
+ tmp_state_p = &(tmp_view_state_p->cur_state);
+ break;
+ default:
+ fprintf(stderr, "op_type invalid\n");
+ }
+ return tmp_state_p->cur_sz;
+}
+
+/* Simple function to return the len of the next piece of the view_state. */
+static inline ADIO_Offset view_state_get_next_len(view_state *tmp_view_state_p,
+ int op_type)
+{
+ flatten_state *tmp_state_p = NULL;
+ switch(op_type)
+ {
+ case TEMP_OFF:
+ tmp_state_p = &(tmp_view_state_p->tmp_state);
+ break;
+ case REAL_OFF:
+ tmp_state_p = &(tmp_view_state_p->cur_state);
+ break;
+ default:
+ fprintf(stderr, "op_type invalid\n");
+ }
+ return (ADIO_Offset)
+ tmp_view_state_p->flat_type_p->blocklens[tmp_state_p->idx] -
+ tmp_state_p->cur_reg_off;
+}
+
+/* Add up to a region of a file view and no larger than a max size.
+ * The view_state is always consistent with the abs_off and where the
+ * index and cur_reg_off point to. The regions should be coalesced if
+ * possible later on. */
+static inline int view_state_add_region(
+ ADIO_Offset max_sz,
+ view_state *tmp_view_state_p,
+ ADIO_Offset *st_reg_p,
+ ADIO_Offset *tmp_reg_sz_p,
+ int op_type)
+{
+ ADIOI_Flatlist_node *tmp_flat_type_p = NULL;
+ flatten_state *tmp_state_p = NULL;
+ int64_t data_sz = 0;
+
+#ifdef AGGREGATION_PROFILE
+ /* MPE_Log_event (5020, 0, NULL); */
+#endif
+
+ switch(op_type)
+ {
+ case TEMP_OFF:
+ tmp_state_p = &(tmp_view_state_p->tmp_state);
+ break;
+ case REAL_OFF:
+ tmp_state_p = &(tmp_view_state_p->cur_state);
+ break;
+ default:
+ fprintf(stderr, "op_type invalid\n");
+ }
+
+ tmp_flat_type_p = tmp_view_state_p->flat_type_p;
+
+ *st_reg_p = tmp_state_p->abs_off;
+
+ /* Should be looking at some data (or it's a zero len blocklens
+ * (i.e. placeholder). */
+ assert(tmp_state_p->cur_reg_off !=
+ tmp_flat_type_p->blocklens[tmp_state_p->idx]);
+ /* Shouldn't have been called if the view_state is done. */
+ assert(tmp_state_p->cur_sz != tmp_view_state_p->sz);
+
+ /* Make sure we are not in a non-zero region in the flat_type */
+ assert(tmp_flat_type_p->blocklens[tmp_state_p->idx] != 0);
+
+#ifdef DEBUG3
+ fprintf(stderr, "view_state:(blocklens[%Ld]=%d,cur_reg_off=%Ld,"
+ "max_sz=%Ld)\n", tmp_state_p->idx,
+ tmp_flat_type_p->blocklens[tmp_state_p->idx],
+ tmp_state_p->cur_reg_off, max_sz);
+#endif
+
+ /* Can it add the whole piece? */
+ if (tmp_flat_type_p->blocklens[tmp_state_p->idx] -
+ tmp_state_p->cur_reg_off <= max_sz)
+ {
+ data_sz = tmp_flat_type_p->blocklens[tmp_state_p->idx] -
+ tmp_state_p->cur_reg_off;
+
+ tmp_state_p->cur_sz += data_sz;
+
+ /* Advance the abs_off to the beginning of the next piece */
+ if (tmp_flat_type_p->count == 1)
+ {
+ assert(tmp_flat_type_p->blocklens[tmp_state_p->idx] != 0);
+ tmp_state_p->abs_off += data_sz;
+#ifdef DEBUG3
+ fprintf(stderr, "view_state_add_region: %s contig type "
+ "(old abs_off=%Ld,abs_off=%Ld,cur_sz=%Ld,reg size=%Ld)\n",
+ off_type_name[op_type], tmp_state_p->abs_off - data_sz,
+ tmp_state_p->abs_off, tmp_state_p->cur_sz, data_sz);
+#endif
+ }
+ else
+ {
+ /* Is this the last region in the datatype? */
+ if (tmp_state_p->idx == (tmp_flat_type_p->count - 1))
+ {
+ tmp_state_p->abs_off += data_sz -
+ tmp_flat_type_p->indices[tmp_flat_type_p->count-1] -
+ tmp_flat_type_p->blocklens[tmp_flat_type_p->count-1] +
+ tmp_view_state_p->ext;
+#ifdef DEBUG3
+ fprintf(stderr, "view_state_add_region: %s last region for type "
+ "(old abs_off=%Ld,abs_off=%Ld,cur_sz=%Ld,reg size=%Ld)\n",
+ off_type_name[op_type], tmp_state_p->abs_off - data_sz,
+ tmp_state_p->abs_off, tmp_state_p->cur_sz, data_sz);
+#endif
+ }
+ else
+ {
+ tmp_state_p->abs_off +=
+ tmp_flat_type_p->indices[tmp_state_p->idx + 1] -
+ (tmp_flat_type_p->indices[tmp_state_p->idx] +
+ tmp_state_p->cur_reg_off);
+#ifdef DEBUG3
+ fprintf(stderr, "view_state_add_region: %s inner region type "
+ "(old abs_off=%Ld,abs_off=%Ld,cur_sz=%Ld,reg size=%Ld)\n",
+ off_type_name[op_type], tmp_state_p->abs_off -
+ (tmp_flat_type_p->indices[tmp_state_p->idx + 1] -
+ (tmp_flat_type_p->indices[tmp_state_p->idx] +
+ tmp_state_p->cur_reg_off)), tmp_state_p->abs_off,
+ tmp_state_p->cur_sz, data_sz);
+#endif
+ }
+ /* Increment idx to next non-zero region in the flat_type */
+ do {
+ tmp_state_p->idx =
+ (tmp_state_p->idx + 1) % tmp_flat_type_p->count;
+ } while (tmp_flat_type_p->blocklens[tmp_state_p->idx] == 0);
+ }
+ tmp_state_p->cur_reg_off = 0;
+ }
+ else /* Add part of the piece */
+ {
+ data_sz = max_sz;
+ tmp_state_p->cur_reg_off += data_sz;
+ tmp_state_p->abs_off += data_sz;
+ tmp_state_p->cur_sz += data_sz;
+#ifdef DEBUG3
+ fprintf(stderr, "view_state_add_region: %s partial region type "
+ "(cur_reg_off=%Ld,abs_off=%Ld,cur_sz=%Ld,reg size=%Ld\n",
+ off_type_name[op_type], tmp_state_p->cur_reg_off,
+ tmp_state_p->abs_off, tmp_state_p->cur_sz, data_sz);
+#endif
+ }
+
+ *tmp_reg_sz_p = data_sz;
+#ifdef AGGREGATION_PROFILE
+ /* MPE_Log_event (5021, 0, NULL); */
+#endif
+ return 0;
+}
+
+/* Set up the abs_off, idx, and cur_reg_off of a view_state for the
+ * tmp_state or the cur_state. */
+int ADIOI_init_view_state(int file_ptr_type,
+ int nprocs,
+ view_state *view_state_arr,
+ int op_type)
+{
+ ADIOI_Flatlist_node *tmp_flat_type_p = NULL;
+ ADIO_Offset tmp_off_used = 0, st_reg = 0, tmp_reg_sz = 0;
+ int i;
+ flatten_state *tmp_state_p = NULL;
+ view_state *tmp_view_p = NULL;
+
+ for (i = 0; i < nprocs; i++)
+ {
+ switch(op_type)
+ {
+ case TEMP_OFF:
+ tmp_state_p = &(view_state_arr[i].tmp_state);
+ break;
+ case REAL_OFF:
+ tmp_state_p = &(view_state_arr[i].cur_state);
+ break;
+ default:
+ fprintf(stderr, "op_type invalid\n");
+ }
+
+ tmp_view_p = &(view_state_arr[i]);
+ tmp_flat_type_p = tmp_view_p->flat_type_p;
+
+ if (file_ptr_type == ADIO_INDIVIDUAL)
+ tmp_state_p->abs_off = tmp_view_p->fp_ind;
+ else
+ tmp_state_p->abs_off = tmp_view_p->disp;
+
+ tmp_off_used = 0;
+
+ /* initialize tmp_state idx */
+ while (tmp_flat_type_p->blocklens[tmp_state_p->idx] == 0)
+ tmp_state_p->idx = (tmp_state_p->idx + 1) % tmp_flat_type_p->count;
+ if (file_ptr_type == ADIO_EXPLICIT_OFFSET)
+ tmp_state_p->abs_off += tmp_flat_type_p->indices[tmp_state_p->idx];
+
+ /* Initialize the abs_off by moving into the datatype
+ * byte_off bytes. Since we only do this in the beginning, we
+ * make the assumption that pieces are added whole until the last
+ * piece which MAY be partial. */
+ while (tmp_off_used != tmp_view_p->byte_off)
+ {
+ view_state_add_region(
+ tmp_view_p->byte_off - tmp_off_used,
+ &(view_state_arr[i]), &st_reg, &tmp_reg_sz,
+ op_type);
+ }
+
+ /* Re-initialize the cur_size so that the abs_off was set to
+ * the proper position while the actual size = 0.*/
+ tmp_state_p->cur_sz = 0;
+#ifdef DEBUG1
+ fprintf(stderr, "init_view_state: %s (idx=%d,byte_off=%Ld,"
+ "abs_off=%Ld,reg_off=%Ld,sz=%Ld)\n", off_type_name[op_type],
+ i, tmp_view_p->byte_off, tmp_state_p->abs_off,
+ tmp_state_p->cur_reg_off, tmp_view_p->sz);
+#endif
+
+ }
+ return 0;
+}
+
+/* Return the next file realm offset and length for this datatype state
+ * within a particular file realm. */
+static inline int get_next_fr_off(ADIO_File fd,
+ ADIO_Offset off,
+ ADIO_Offset fr_st_off,
+ MPI_Datatype *fr_type_p,
+ ADIO_Offset *fr_next_off_p,
+ ADIO_Offset *fr_max_len_p)
+{
+ MPI_Aint fr_extent = -1;
+ ADIO_Offset tmp_off, off_rem;
+ ADIOI_Flatlist_node *fr_node_p = ADIOI_Flatlist;
+ int i = -1, fr_dtype_ct = 0;
+
+ /* Should have already been flattened in calc_file_realms() */
+ while (fr_node_p->type != (*fr_type_p))
+ fr_node_p = fr_node_p->next;
+ assert(fr_node_p != NULL);
+
+ /* Did we get to the first region of the file realm? */
+ if (off - fr_st_off < 0)
+ {
+ *fr_next_off_p = fr_st_off + fr_node_p->indices[0];
+ *fr_max_len_p = fr_node_p->blocklens[0];
+ return 0;
+ }
+
+ /* Calculate how many times to loop through the fr_type
+ * and where the next fr_off is. */
+ MPI_Type_extent(*fr_type_p, &fr_extent);
+ tmp_off = off - fr_st_off;
+ fr_dtype_ct = tmp_off / fr_extent;
+ off_rem = tmp_off % fr_extent;
+ for (i = 0; i < fr_node_p->count; i++)
+ {
+ if (off_rem < fr_node_p->indices[i])
+ {
+ *fr_next_off_p = fr_st_off +
+ (fr_dtype_ct * fr_extent) + fr_node_p->indices[i];
+ *fr_max_len_p = fr_node_p->blocklens[i];
+ return 0;
+ }
+ else if (off_rem < fr_node_p->indices[i] + fr_node_p->blocklens[i])
+ {
+ *fr_next_off_p = off;
+ *fr_max_len_p = fr_node_p->blocklens[i] -
+ (off_rem - fr_node_p->indices[i]);
+ return off;
+ }
+ }
+
+ /* Shouldn't get here. */
+ fprintf(stderr, "get_next_fr_off: Couldn't find the correct "
+ "location of the next offset for this file realm.\n");
+ return -1;
+}
+
+/* Look in all the view states for the first offset within a given
+ * file realm. Report the end of a contiguous region within the file
+ * realm (possibly more than the actual view state may be able to
+ * process contiguously). */
+static inline int find_next_off(ADIO_File fd,
+ view_state *view_state_p,
+ ADIO_Offset fr_st_off,
+ MPI_Datatype *fr_type_p,
+ int op_type,
+ ADIO_Offset *cur_off_p,
+ ADIO_Offset *cur_reg_max_len_p)
+{
+ ADIOI_Flatlist_node *tmp_flat_type_p = NULL;
+ ADIO_Offset tmp_off = -1, fr_next_off = -1, fr_max_len = -1,
+ tmp_fr_max_len = -1;
+ int ret = -1;
+ flatten_state *tmp_state_p = NULL;
+ ADIO_Offset tmp_st_off = 0, tmp_reg_sz = 0;
+#ifdef DTYPE_SKIP
+ int skip_type_ct;
+#endif
+
+#ifdef AGGREGATION_PROFILE
+ /* MPE_Log_event (5022, 0, NULL); */
+#endif
+
+ switch(op_type)
+ {
+ case TEMP_OFF:
+ tmp_state_p = &(view_state_p->tmp_state);
+ break;
+ case REAL_OFF:
+ tmp_state_p = &(view_state_p->cur_state);
+ break;
+ default:
+ fprintf(stderr, "op_type invalid\n");
+ }
+
+ tmp_flat_type_p = view_state_p->flat_type_p;
+
+ /* Can we use this proc? */
+ if (tmp_state_p->cur_sz < view_state_p->sz) {
+ tmp_st_off = 0;
+ tmp_reg_sz = 0;
+ /* If the current region is not within the file realm, advance
+ * the state until it is and calculate the end of the next file
+ * realm in fr_max_len. */
+ ret = get_next_fr_off(fd,
+ tmp_state_p->abs_off,
+ fr_st_off,
+ fr_type_p,
+ &fr_next_off,
+ &fr_max_len);
+
+ while ((tmp_state_p->abs_off < fr_next_off) &&
+ (tmp_state_p->cur_sz != view_state_p->sz))
+ {
+
+ /* While this might appear to be erroneous at first,
+ * view_state_add_region can only add a single piece at a
+ * time. Therefore, it will never overshoot the beginning
+ * of the next file realm. When it finally does enter the
+ * next file realm it will not be able to go beyond its
+ * first piece. */
+
+#ifdef DTYPE_SKIP
+ if (tmp_flat_type_p->count > 1) {
+ /* let's see if we can skip whole datatypes */
+ skip_type_ct = (fr_next_off - tmp_state_p->abs_off) /
+ view_state_p->ext;
+ if (skip_type_ct > 0) {
+ /* before we go on, let's check if we've actually
+ * finished up already */
+ tmp_state_p->cur_sz += skip_type_ct *
+ view_state_p->type_sz;
+ if (tmp_state_p->cur_sz >= view_state_p->sz) {
+ tmp_state_p->cur_sz = view_state_p->sz;
+ break;
+ }
+ tmp_state_p->abs_off += skip_type_ct * view_state_p->ext;
+ }
+ }
+#endif
+ view_state_add_region(
+ fr_next_off - tmp_state_p->abs_off,
+ view_state_p,
+ &tmp_st_off,
+ &tmp_reg_sz,
+ op_type);
+
+ ret = get_next_fr_off(fd,
+ tmp_state_p->abs_off,
+ fr_st_off,
+ fr_type_p,
+ &fr_next_off,
+ &fr_max_len);
+ }
+
+ if (tmp_state_p->cur_sz != view_state_p->sz) {
+ tmp_off = tmp_state_p->abs_off;
+ /* Calculate how much of the remaining file realm there is from the
+ * current offset */
+ tmp_fr_max_len = fr_next_off + fr_max_len - tmp_off;
+ }
+ }
+
+ *cur_off_p = tmp_off;
+ *cur_reg_max_len_p = tmp_fr_max_len;
+#ifdef AGGREGATION_PROFILE
+ /* MPE_Log_event (5023, 0, NULL); */
+#endif
+ return 0;
+}
+
+/* Upon completion of a full collective buffer, end of a file realm
+ * region (data sieving), or the end of all I/O for an aggregator, we
+ * should return a list of MPI_Datatypes that correspond to client
+ * communication into a collective buffer, a list of corresponding
+ * sizes, and an aggregate MPI_Datatype which will be used as a
+ * filetype in MPI_File_write/read on the aggregator. */
+int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs,
+ view_state *client_file_view_state_arr,
+ MPI_Datatype *client_comm_dtype_arr,
+ ADIO_Offset *client_comm_sz_arr,
+ ADIO_Offset *agg_dtype_offset_p,
+ MPI_Datatype *agg_dtype_p)
+{
+ MPI_Aint **client_disp_arr = NULL, *agg_disp_arr = NULL;
+ int **client_blk_arr = NULL, *agg_blk_arr = NULL;
+ ADIO_Offset tmp_coll_buf_sz = 0, st_reg = 0, act_reg_sz = 0;
+ ADIO_Offset cur_off = -1, cur_reg_max_len = -1;
+ ADIO_Offset ds_fr_end = -1;
+ ADIO_Offset *fr_st_off_arr = fd->file_realm_st_offs;
+ MPI_Datatype *fr_type_arr = fd->file_realm_types;
+ int *client_ol_ct_arr = NULL;
+ int *client_ol_cur_ct_arr = NULL;
+ int agg_ol_ct = 0, agg_ol_cur_ct = 0;
+ int cur_off_proc = -1;
+ int next_off_idx = -1;
+ int i = 0, j = 0, all_done = -1;
+ int agg_idx = fd->my_cb_nodes_index;
+ heap_t offset_heap;
+ ADIO_Offset next_off = -1, next_reg_max_len = -1;
+
+ /* Used for coalescing ol pairs next to each other. */
+ ADIO_Offset *client_comm_next_off_arr = NULL;
+ ADIO_Offset agg_next_off = -1;
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5016, 0, NULL);
+#endif
+
+ memset(client_comm_sz_arr, 0, nprocs*sizeof(ADIO_Offset));
+
+ if ((client_comm_next_off_arr = (ADIO_Offset *)
+ ADIOI_Malloc(nprocs*sizeof(ADIO_Offset))) == NULL)
+ {
+ fprintf(stderr, "ADIOI_Build_agg_reqs: malloc client_next_off_arr "
+ "failed\n");
+ return -1;
+ }
+
+ if ((client_ol_ct_arr = (int *) ADIOI_Calloc(nprocs, sizeof(int))) == NULL)
+ {
+ fprintf(stderr, "ADIOI_Build_agg_reqs: "
+ "malloc client_ol_ct_arr failed\n");
+ return -1;
+ }
+ if ((client_ol_cur_ct_arr =
+ (int *) ADIOI_Calloc(nprocs, sizeof(int))) == NULL)
+ {
+ fprintf(stderr, "ADIOI_Build_agg_reqs: "
+ "malloc client_ol_cur_ct_arr failed\n");
+ return -1;
+ }
+
+ /* On the first pass see how many offset-length pairs are
+ * necessary for each client. Then allocate the correct amount of
+ * offset-length pairs for describing the collective buffer. All
+ * data is processed in order by the aggregator's file realm. On
+ * the second pass, set the offset-length pairs to the correct
+ * values. */
+ for (i = 0; i < MAX_OFF_TYPE; i++)
+ {
+ memset(client_comm_next_off_arr, -1, nprocs*sizeof(ADIO_Offset));
+ tmp_coll_buf_sz = 0;
+ ds_fr_end = -1;
+
+ /* initialize heap */
+ ADIOI_Heap_create(&offset_heap, nprocs);
+ offset_heap.size = 0;
+
+ for (j=0; j 0)) {
+ ADIOI_Heap_insert(&offset_heap, cur_off, j, cur_reg_max_len);
+#ifdef DEBUG_HEAP
+ printf ("initial: inserting offset %lld with "
+ "cur_reg_max_len = %lld for p%d\n",
+ cur_off, cur_reg_max_len, j);
+#endif
+ }
+
+ }
+ if (!offset_heap.size)
+ ADIOI_Heap_insert(&offset_heap, -1, -1, -1);
+
+ while (tmp_coll_buf_sz < fd->hints->cb_buffer_size)
+ {
+ /* Find the next process with the next region within the
+ * file realm and the maximum amount that can be added for
+ * this particular file realm as a contiguous region. */
+ ADIOI_Heap_extract_min(&offset_heap, &cur_off, &cur_off_proc,
+ &cur_reg_max_len);
+#ifdef DEBUG_HEAP
+ printf ("extracted cur_off %lld from proc %d\n",
+ cur_off, cur_off_proc);
+#endif
+
+ if (cur_off == -1)
+ break;
+
+#ifdef DEBUG3
+ fprintf(stderr, "ADIOI_Build_agg_reqs: %s proc %d start/add to"
+ " list (max_reg_fr=%Ld,tmp_coll_buf_sz=%Ld,"
+ "cb_buffer_size=%d)\n", off_type_name[i], cur_off_proc,
+ cur_reg_max_len, tmp_coll_buf_sz,
+ fd->hints->cb_buffer_size);
+#endif
+
+ /* We process only contiguous file realm regions if we are
+ * using data sieving. Note that we only do this for
+ * writes since reads can be data sieved across each other
+ * without consistency issues. */
+ if ((fd->hints->ds_write == ADIOI_HINT_ENABLE ||
+ fd->hints->ds_write == ADIOI_HINT_AUTO) &&
+ rw_type == ADIOI_WRITE && fd->hints->cb_nodes > 1)
+ {
+#ifdef DEBUG2
+ fprintf(stderr, "ADIOI_Build_agg_reqs: "
+ "Warning - Data sieving writes on\n");
+#endif
+ if (ds_fr_end == -1)
+ {
+ ds_fr_end = cur_off + cur_reg_max_len;
+#ifdef DEBUG1
+ fprintf(stderr, "ADIOI_Build_agg_reqs: "
+ "cur_off=%Ld, cur_reg_max_len=%Ld\n"
+ "Data sieving file realm end initialized to %Ld\n",
+ cur_off,
+ cur_reg_max_len,
+ ds_fr_end);
+#endif
+ }
+ else
+ {
+ /* The next off switched file realms, so we will stop
+ * here. */
+ if (ds_fr_end != cur_off + cur_reg_max_len)
+ {
+#ifdef DEBUG1
+ fprintf(stderr, "ADIOI_Build_agg_reqs: "
+ "Data sieving file realm end changed from "
+ "%Ld to %Ld\n", ds_fr_end,
+ cur_off + cur_reg_max_len);
+#endif
+ break;
+ }
+ }
+ }
+
+ /* Add up to the end of the file realm or the collective
+ * buffer. */
+ if (cur_reg_max_len > (fd->hints->cb_buffer_size -
+ tmp_coll_buf_sz))
+ cur_reg_max_len = fd->hints->cb_buffer_size - tmp_coll_buf_sz;
+
+ view_state_add_region(
+ cur_reg_max_len,
+ &(client_file_view_state_arr[cur_off_proc]),
+ &st_reg, &act_reg_sz, i);
+
+ switch(i)
+ {
+ case TEMP_OFF:
+ /* Increment the ol list count for each proc and
+ * the used part of the collective buffer if the
+ * next region is not adjacent to the previous
+ * region. */
+ if (client_comm_next_off_arr[cur_off_proc] !=
+ tmp_coll_buf_sz)
+ {
+ (client_ol_ct_arr[cur_off_proc])++;
+ }
+ client_comm_next_off_arr[cur_off_proc] =
+ tmp_coll_buf_sz + act_reg_sz;
+
+ if (agg_next_off != st_reg)
+ agg_ol_ct++;
+ agg_next_off = st_reg + act_reg_sz;
+ break;
+ case REAL_OFF:
+ /* Add this region to the proper client ol list if
+ * the next region is not adjacent to the previous
+ * region. */
+ next_off_idx = client_ol_cur_ct_arr[cur_off_proc];
+ if (client_comm_next_off_arr[cur_off_proc] !=
+ tmp_coll_buf_sz)
+ {
+ client_disp_arr[cur_off_proc][next_off_idx] =
+ tmp_coll_buf_sz;
+ client_blk_arr[cur_off_proc][next_off_idx] =
+ act_reg_sz;
+ (client_ol_cur_ct_arr[cur_off_proc])++;
+ }
+ else
+ {
+ client_blk_arr[cur_off_proc][next_off_idx - 1]
+ += act_reg_sz;
+ }
+ client_comm_sz_arr[cur_off_proc] += act_reg_sz;
+ client_comm_next_off_arr[cur_off_proc] =
+ tmp_coll_buf_sz + act_reg_sz;
+
+ /* Add to the aggregator filetype if the next
+ * region is not adjacent to the previous
+ * region. */
+ if (agg_next_off != st_reg)
+ {
+ /* this will enable initial offsets much further into
+ * the file than an MPI_Aint */
+ if (!agg_ol_cur_ct)
+ *agg_dtype_offset_p = st_reg;
+ agg_disp_arr[agg_ol_cur_ct] = st_reg -
+ (MPI_Aint) *agg_dtype_offset_p;
+ agg_blk_arr[agg_ol_cur_ct] = act_reg_sz;
+ agg_ol_cur_ct++;
+ }
+ else
+ {
+ agg_blk_arr[agg_ol_cur_ct - 1] += act_reg_sz;
+ }
+ agg_next_off = st_reg + act_reg_sz;
+
+ break;
+ default:
+ fprintf(stderr, "ADIOI_Build_agg_reqs: Impossible type\n");
+ }
+ tmp_coll_buf_sz += act_reg_sz;
+
+ find_next_off(fd,
+ &client_file_view_state_arr[cur_off_proc],
+ fr_st_off_arr[agg_idx],
+ &(fr_type_arr[agg_idx]),
+ i,
+ &next_off,
+ &next_reg_max_len);
+
+ if ((next_off != -1) || (!offset_heap.size)) {
+ ADIOI_Heap_insert(&offset_heap, next_off, cur_off_proc,
+ next_reg_max_len);
+#ifdef DEBUG_HEAP
+ printf ("inserting offset %lld for p%d\n", next_off,
+ cur_off_proc);
+#endif
+ }
+ }
+
+ if (i == TEMP_OFF)
+ {
+ /* Allocate offset-length pairs for creating hindexed
+ * MPI_Datatypes for both the client and the aggregator. */
+ if ((client_disp_arr = (MPI_Aint **)
+ ADIOI_Malloc(nprocs*sizeof(MPI_Aint *))) == NULL)
+ {
+ fprintf(stderr, "ADIOI_Build_agg_reqs: malloc "
+ "client_disp_arr failed\n");
+ return -1;
+ }
+ if ((client_blk_arr = (int **) ADIOI_Malloc(
+ nprocs*sizeof(int *))) == NULL)
+ {
+ ADIOI_Free(client_disp_arr);
+ fprintf(stderr, "ADIOI_Build_agg_reqs: malloc "
+ "client_blk_arr failed\n");
+ return -1;
+ }
+ for (j = 0; j < nprocs; j++)
+ {
+ if ((client_disp_arr[j] = (MPI_Aint *) ADIOI_Malloc(
+ client_ol_ct_arr[j]*sizeof(MPI_Aint))) == NULL)
+ {
+ fprintf(stderr, "ADIOI_Build_agg_reqs: malloc "
+ "client_disp_arr[%d] failed\n", j);
+ return -1;
+ }
+ if ((client_blk_arr[j] = (int *)
+ ADIOI_Malloc(client_ol_ct_arr[j]*sizeof(int))) == NULL)
+ {
+ ADIOI_Free(client_disp_arr[j]);
+ fprintf(stderr, "ADIOI_Build_agg_reqs: malloc "
+ "client_blk_arr[%d] failed\n", j);
+ return -1;
+ }
+ }
+
+ if (agg_ol_ct > 0)
+ {
+ if ((agg_disp_arr = (MPI_Aint *) ADIOI_Malloc(
+ agg_ol_ct*sizeof(MPI_Aint))) == NULL)
+ {
+ fprintf(stderr,
+ "ADIOI_Build_agg_reqs: malloc disp_arr failed\n");
+ return -1;
+ }
+ if ((agg_blk_arr = (int *)
+ ADIOI_Malloc(agg_ol_ct*sizeof(int))) == NULL)
+ {
+ ADIOI_Free(agg_disp_arr);
+ fprintf(stderr,
+ "ADIOI_Build_agg_reqs: malloc blk_arr failed\n");
+ return -1;
+ }
+ }
+ }
+ ADIOI_Heap_free(&offset_heap);
+ }
+
+ /* Let the clients know if this aggregator is totally finished
+ * with all possible client requests. */
+ all_done = 1;
+ for (i = 0; i < nprocs; i++)
+ {
+ if ((client_file_view_state_arr[i].cur_state.cur_sz !=
+ client_file_view_state_arr[i].sz) ||
+ client_comm_sz_arr[i] != 0)
+ {
+ all_done = 0;
+ break;
+ }
+ }
+ if (all_done == 1)
+ {
+ for (i = 0; i < nprocs; i++)
+ {
+ client_comm_sz_arr[i] = -1;
+ }
+ }
+
+ /* Quick check to make sure we found all the ol pairs we thought
+ * we did */
+ for (i = 0; i < nprocs; i++)
+ {
+ if (client_ol_cur_ct_arr[i] != client_ol_ct_arr[i])
+ {
+ fprintf(stderr, "ADIOI_Build_agg_reqs: ERROR Process %d "
+ "processed only %d out of %d ol pairs\n", i,
+ client_ol_cur_ct_arr[i],
+ client_ol_ct_arr[i]);
+ return -1;
+ }
+ }
+#ifdef DEBUG1
+ fprintf(stderr, "ADIOI_Build_agg_reqs:(client,ol_pairs,size_req)=");
+ for (i = 0; i < nprocs; i++)
+ {
+ fprintf(stderr, "(%d,%d,%Ld)", i, client_ol_ct_arr[i],
+ client_comm_sz_arr[i]);
+ if (i != nprocs - 1)
+ fprintf(stderr, ",");
+ }
+ fprintf(stderr, "\n");
+#endif
+#ifdef DEBUG1
+ fprintf(stderr, "ADIOI_Build_agg_reqs: Generated %d of %d "
+ "aggregate offset-length pairs\n", agg_ol_cur_ct, agg_ol_ct);
+#endif
+#ifdef DEBUG2
+ for (i = 0; i < nprocs; i++)
+ {
+ if (client_ol_ct_arr[i] > 0)
+ {
+ fprintf(stderr, "ADIOI_Build_agg_reqs: p %d (off,len) = ", i);
+ for (j = 0; j < client_ol_ct_arr[i]; j++)
+ {
+ fprintf(stderr, "[%d](%d,%d) ", j,
+ client_disp_arr[i][j],
+ client_blk_arr[i][j]);
+ }
+ fprintf(stderr, "\n");
+ }
+ }
+ if (agg_ol_ct) {
+ fprintf(stderr, "ADIOI_Build_agg_reqs:agg_type(off,len)=");
+ for (i = 0; i < agg_ol_ct; i++)
+ {
+ fprintf(stderr, "[%d](%d,%d)",
+ i, agg_disp_arr[i], agg_blk_arr[i]);
+ if (i != agg_ol_ct - 1)
+ fprintf(stderr, ",");
+ }
+ fprintf(stderr, "\n");
+ }
+#endif
+
+ assert(agg_ol_cur_ct == agg_ol_ct);
+
+ /* Create all the client and aggregate MPI_Datatypes */
+ for (i = 0; i < nprocs; i++)
+ {
+ if (client_comm_sz_arr[i] > 0)
+ {
+ MPI_Type_hindexed(client_ol_ct_arr[i], client_blk_arr[i],
+ client_disp_arr[i], MPI_BYTE,
+ &(client_comm_dtype_arr[i]));
+ MPI_Type_commit(&(client_comm_dtype_arr[i]));
+ }
+ else
+ {
+ client_comm_dtype_arr[i] = MPI_BYTE;
+ }
+ ADIOI_Free(client_blk_arr[i]);
+ ADIOI_Free(client_disp_arr[i]);
+ }
+ ADIOI_Free(client_blk_arr);
+ ADIOI_Free(client_disp_arr);
+
+ if (agg_ol_ct > 0) {
+ if (agg_ol_ct == 1)
+ MPI_Type_contiguous (agg_blk_arr[0], MPI_BYTE, agg_dtype_p);
+ else if (agg_ol_ct > 1)
+ MPI_Type_hindexed(agg_ol_ct, agg_blk_arr, agg_disp_arr, MPI_BYTE,
+ agg_dtype_p);
+
+ MPI_Type_commit(agg_dtype_p);
+
+ ADIOI_Free(agg_disp_arr);
+ ADIOI_Free(agg_blk_arr);
+ }
+ ADIOI_Free(client_ol_ct_arr);
+ ADIOI_Free(client_ol_cur_ct_arr);
+ ADIOI_Free(client_comm_next_off_arr);
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5017, 0, NULL);
+#endif
+ return 0;
+}
+
+/* All sizes from all aggregators are gathered on the clients, which
+ * then call this function, which will generate the comm datatypes for
+ * each aggregator (agg_comm_dtype_arr) in the upcoming
+ * MPI_Alltoallw() */
+int ADIOI_Build_client_reqs(ADIO_File fd,
+ int nprocs,
+ view_state *my_mem_view_state_arr,
+ view_state *agg_file_view_state_arr,
+ ADIO_Offset *agg_comm_sz_arr,
+ MPI_Datatype *agg_comm_dtype_arr)
+{
+ MPI_Aint **agg_disp_arr = NULL;
+ int **agg_blk_arr = NULL;
+ view_state *tmp_mem_state_p = NULL, *tmp_file_state_p = NULL;
+ ADIO_Offset total_agg_comm_sz = 0, cur_total_agg_comm_sz = 0;
+ ADIO_Offset st_reg = 0, act_reg_sz = 0, tmp_reg_sz = 0;
+ ADIO_Offset cur_off = -1, cur_reg_max_len = -1;
+ ADIO_Offset tmp_cur_off = -1, tmp_cur_reg_max_len = -1;
+ ADIO_Offset agg_mem_st_reg = 0, agg_mem_act_reg_sz = 0;
+ ADIO_Offset *fr_st_off_arr = fd->file_realm_st_offs;
+ ADIO_Offset *agg_comm_cur_sz_arr = NULL;
+ MPI_Datatype *fr_type_arr = fd->file_realm_types;
+ int cb_node_ct = fd->hints->cb_nodes;
+ int *agg_ol_ct_arr = NULL;
+ int *agg_ol_cur_ct_arr = NULL;
+ int agg_fr_idx = -1, tmp_agg_fr_idx = -1;
+ int cur_off_proc = -1;
+ int i = 0, j = 0;
+ int agg_next_off_idx = -1;
+ /* Used for coalescing ol pairs next to each other. */
+ ADIO_Offset *agg_mem_next_off_arr = NULL;
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5018, 0, NULL);
+#endif
+
+#ifdef DEBUG
+ fprintf(stderr, "ADIOI_Build_client_reqs:(agg,size_req)=");
+ for (i = 0; i < nprocs; i++)
+ {
+ int tmp_agg_idx = ADIOI_Agg_idx(i, fd);
+ if (tmp_agg_idx >= 0)
+ {
+ fprintf(stderr, "(%d,%Ld)", i, agg_comm_sz_arr[i]);
+ if (i != fd->hints->cb_nodes - 1)
+ fprintf(stderr, ",");
+ }
+ fprintf(stderr, "\n");
+ }
+#endif
+
+ if ((agg_mem_next_off_arr = (ADIO_Offset *) ADIOI_Malloc(
+ nprocs*sizeof(ADIO_Offset))) == NULL)
+ {
+ fprintf(stderr, "ADIOI_Build_client_reqs: malloc agg_mem_next_off_arr"
+ "failed\n");
+ return -1;
+ }
+
+ if ((agg_comm_cur_sz_arr = (ADIO_Offset *)
+ ADIOI_Malloc(nprocs*sizeof(ADIO_Offset))) == NULL)
+ {
+ fprintf(stderr, "ADIOI_Build_client_reqs: malloc agg_comm_cur_sz_arr"
+ " failed\n");
+ return -1;
+ }
+ if ((agg_ol_ct_arr = (int *) ADIOI_Calloc(nprocs, sizeof(int)))
+ == NULL)
+ {
+ fprintf(stderr, "ADIOI_Build_client_reqs: "
+ "malloc agg_ol_ct_arr failed\n");
+ return -1;
+ }
+ if ((agg_ol_cur_ct_arr = (int *) ADIOI_Calloc(nprocs, sizeof(int)))
+ == NULL)
+ {
+ fprintf(stderr, "ADIOI_Build_client_reqs: "
+ "malloc agg_ol_cur_ct_arr failed\n");
+ return -1;
+ }
+
+ for (i = 0; i < nprocs; i++)
+ {
+ if (agg_comm_sz_arr[i] > 0)
+ total_agg_comm_sz += agg_comm_sz_arr[i];
+ }
+
+ /* On the first pass see how many offset-length pairs are
+ * necessary for each aggregator. Then allocate the correct
+ * amount of offset-length pairs for handling each aggregator's
+ * particular data size. On the last pass, we actually create the
+ * offset-length pairs. */
+ for (i = 0; i < MAX_OFF_TYPE; i++)
+ {
+ cur_total_agg_comm_sz = 0;
+ memset(agg_comm_cur_sz_arr, 0, nprocs*sizeof(ADIO_Offset));
+ memset(agg_mem_next_off_arr, -1, nprocs*sizeof(ADIO_Offset));
+ while (total_agg_comm_sz > cur_total_agg_comm_sz)
+ {
+ /* Look for the next aggregator offset among all the
+ * aggregators and their respective file realms. */
+ cur_off = -1;
+ for (j = 0; j < nprocs; j++)
+ {
+ tmp_agg_fr_idx = ADIOI_Agg_idx(j, fd);
+ assert(tmp_agg_fr_idx < cb_node_ct);
+
+ /* If this process is not an aggregator or we have
+ * finished all the bytes for this aggregator, move
+ * along. */
+ if (tmp_agg_fr_idx < 0 ||
+ agg_comm_cur_sz_arr[j] == agg_comm_sz_arr[j])
+ {
+ continue;
+ }
+
+ find_next_off(fd,
+ &(agg_file_view_state_arr[j]),
+ fr_st_off_arr[tmp_agg_fr_idx],
+ &(fr_type_arr[tmp_agg_fr_idx]),
+ i,
+ &tmp_cur_off,
+ &tmp_cur_reg_max_len);
+ if (tmp_cur_off == -1)
+ continue;
+
+ if ((cur_off == -1) ||
+ (cur_off > tmp_cur_off))
+ {
+ agg_fr_idx = tmp_agg_fr_idx;
+ cur_off_proc = j;
+ cur_off = tmp_cur_off;
+ cur_reg_max_len = tmp_cur_reg_max_len;
+ }
+ }
+
+ assert(cur_off_proc != -1);
+
+ /* Add up to the end of the file realm or as many bytes
+ * are left for this particular aggregator in the client's
+ * filetype */
+ if (cur_reg_max_len > agg_comm_sz_arr[cur_off_proc] -
+ agg_comm_cur_sz_arr[cur_off_proc])
+ {
+ cur_reg_max_len = agg_comm_sz_arr[cur_off_proc] -
+ agg_comm_cur_sz_arr[cur_off_proc];
+ }
+ assert(cur_reg_max_len > 0);
+
+ view_state_add_region(
+ cur_reg_max_len,
+ &(agg_file_view_state_arr[cur_off_proc]),
+ &st_reg, &act_reg_sz, i);
+
+#ifdef DEBUG2
+ fprintf(stderr, "ADIOI_Build_client_reqs: %s File region"
+ " (proc=%d,off=%Ld,sz=%Ld)\n",
+ off_type_name[i], cur_off_proc,
+ cur_off, act_reg_sz);
+#endif
+
+ /* Before translating the file regions to memory regions,
+ * we first must advance to the proper point in the
+ * mem_view_state for this aggregator to match the
+ * file_view_state. */
+ tmp_file_state_p = &(agg_file_view_state_arr[cur_off_proc]);
+ tmp_mem_state_p = &(my_mem_view_state_arr[cur_off_proc]);
+ assert(view_state_get_cur_sz(tmp_file_state_p, i) - act_reg_sz >=
+ view_state_get_cur_sz(tmp_mem_state_p, i));
+ while (view_state_get_cur_sz(tmp_file_state_p, i) - act_reg_sz !=
+ view_state_get_cur_sz(tmp_mem_state_p, i))
+ {
+ ADIO_Offset fill_st_reg = -1, fill_reg_sz = -1;
+ view_state_add_region(
+ view_state_get_cur_sz(tmp_file_state_p, i) - act_reg_sz -
+ view_state_get_cur_sz(tmp_mem_state_p, i),
+ tmp_mem_state_p,
+ &fill_st_reg,
+ &fill_reg_sz, i);
+ }
+
+ /* Based on how large the act_reg_sz 1. Figure out how
+ * many memory offset-length pairs are necessary. 2. Set
+ * the offset-length pairs. */
+ tmp_reg_sz = 0;
+ while (tmp_reg_sz != act_reg_sz)
+ {
+ view_state_add_region(
+ act_reg_sz - tmp_reg_sz,
+ tmp_mem_state_p,
+ &agg_mem_st_reg, &agg_mem_act_reg_sz,
+ i);
+ tmp_reg_sz += agg_mem_act_reg_sz;
+
+#ifdef DEBUG2
+ fprintf(stderr, "ADIOI_Build_client_reqs: Mem region %s"
+ "(proc=%d,off=%Ld,sz=%Ld)\n",
+ off_type_name[i], cur_off_proc,
+ agg_mem_st_reg, agg_mem_act_reg_sz);
+#endif
+ agg_comm_cur_sz_arr[cur_off_proc] += agg_mem_act_reg_sz;
+ cur_total_agg_comm_sz += agg_mem_act_reg_sz;
+ switch(i)
+ {
+ case TEMP_OFF:
+ /* Increment the ol list count a particular
+ * aggregator if next region is not adjacent
+ * to the previous region. */
+ if (agg_mem_next_off_arr[cur_off_proc] !=
+ agg_mem_st_reg)
+ {
+ agg_ol_ct_arr[cur_off_proc]++;
+ }
+ agg_mem_next_off_arr[cur_off_proc] =
+ agg_mem_st_reg + agg_mem_act_reg_sz;
+ break;
+ case REAL_OFF:
+ /* Set the ol list for the memtypes that will
+ * map to each aggregator, coaslescing if
+ * possible. */
+ agg_next_off_idx = agg_ol_cur_ct_arr[cur_off_proc];
+ if (agg_mem_next_off_arr[cur_off_proc] !=
+ agg_mem_st_reg)
+ {
+ agg_disp_arr[cur_off_proc][agg_next_off_idx] =
+ agg_mem_st_reg;
+ agg_blk_arr[cur_off_proc][agg_next_off_idx] =
+ agg_mem_act_reg_sz;
+ (agg_ol_cur_ct_arr[cur_off_proc])++;
+ }
+ else
+ {
+ agg_blk_arr[cur_off_proc][agg_next_off_idx - 1]
+ += agg_mem_act_reg_sz;
+ }
+ agg_mem_next_off_arr[cur_off_proc] =
+ agg_mem_st_reg + agg_mem_act_reg_sz;
+ break;
+ default:
+ fprintf(stderr, "ADIOI_Build_client_reqs: "
+ "Impossible type\n");
+ }
+ }
+ }
+
+ /* On the first pass, allocate the memory structures for
+ * creating the MPI_hindexed type. */
+ if (i == TEMP_OFF)
+ {
+ /* Allocate offset-length pairs for creating hindexed
+ * MPI_Datatypes for each aggregator */
+ if ((agg_disp_arr = (MPI_Aint **)
+ ADIOI_Malloc(nprocs*sizeof(MPI_Aint *))) == NULL)
+ {
+ fprintf(stderr,
+ "ADIOI_Build_client_reqs: malloc agg_disp_arr failed\n");
+ return -1;
+ }
+ if ((agg_blk_arr = (int **) ADIOI_Malloc(nprocs*sizeof(int *)))
+ == NULL)
+ {
+ ADIOI_Free(agg_disp_arr);
+ fprintf(stderr,
+ "ADIOI_Build_client_reqs: malloc agg_blk_arr failed\n");
+ return -1;
+ }
+ for (j = 0; j < nprocs; j++)
+ {
+ if ((agg_disp_arr[j] = (MPI_Aint *)
+ ADIOI_Malloc(agg_ol_ct_arr[j]*sizeof(MPI_Aint))) == NULL)
+ {
+ fprintf(stderr, "ADIOI_Build_client_reqs: malloc "
+ "agg_disp_arr[%d] failed\n", j);
+ return -1;
+ }
+ if ((agg_blk_arr[j] = (int *)
+ ADIOI_Malloc(agg_ol_ct_arr[j]*sizeof(int))) == NULL)
+ {
+ ADIOI_Free(agg_disp_arr[j]);
+ fprintf(stderr, "ADIOI_Build_client_reqs: malloc "
+ "agg_blk_arr[%d] failed\n", j);
+ return -1;
+ }
+ }
+ }
+ }
+
+#ifdef DEBUG
+ fprintf(stderr, "ADIOI_Build_client_reqs:(agg,cur_ol_count=ol_count)=");
+ for (i = 0; i < nprocs; i++)
+ {
+ int tmp_agg_idx = ADIOI_Agg_idx(i, fd);
+ if (tmp_agg_idx >= 0)
+ {
+ fprintf(stderr, "(%d,%d=%d)", i, agg_ol_cur_ct_arr[i],
+ agg_ol_ct_arr[i]);
+ assert(agg_ol_ct_arr[i] == agg_ol_cur_ct_arr[i]);
+ if (tmp_agg_idx != fd->hints->cb_nodes - 1)
+ fprintf(stderr, ",");
+ }
+ }
+ fprintf(stderr, "\n");
+#endif
+
+#ifdef DEBUG2
+ for (i = 0; i < nprocs; i++)
+ {
+ if (agg_ol_ct_arr[i] > 0)
+ {
+ fprintf(stderr, "ADIOI_Build_client_reqs: p %d (off,len) = ", i);
+ for (j = 0; j < agg_ol_ct_arr[i]; j++)
+ {
+ fprintf(stderr, "[%d](%d,%d) ", j,
+ agg_disp_arr[i][j],
+ agg_blk_arr[i][j]);
+ }
+ fprintf(stderr, "\n");
+ }
+ }
+#endif
+
+ /* Create all the aggregator MPI_Datatypes */
+ for (i = 0; i < nprocs; i++)
+ {
+ if (agg_comm_sz_arr[i] > 0)
+ {
+ MPI_Type_hindexed(agg_ol_ct_arr[i], agg_blk_arr[i],
+ agg_disp_arr[i], MPI_BYTE,
+ &(agg_comm_dtype_arr[i]));
+ MPI_Type_commit(&(agg_comm_dtype_arr[i]));
+ }
+ else
+ {
+ agg_comm_dtype_arr[i] = MPI_BYTE;
+ }
+ ADIOI_Free(agg_blk_arr[i]);
+ ADIOI_Free(agg_disp_arr[i]);
+ }
+ ADIOI_Free(agg_blk_arr);
+ ADIOI_Free(agg_disp_arr);
+
+ ADIOI_Free(agg_mem_next_off_arr);
+ ADIOI_Free(agg_comm_cur_sz_arr);
+ ADIOI_Free(agg_ol_ct_arr);
+ ADIOI_Free(agg_ol_cur_ct_arr);
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5019, 0, NULL);
+#endif
+ return 0;
+}
+/* ADIOI_Build_client_pre_req allows a client to calculate the memtype
+ * offset-length pairs up (up to a limit - max_pre_req_sz or max
+ * ol_ct). It basically allows ADIOI_Build_client_req to do less work.
+ * If it called and there already exist some preprocessed memtype
+ * offset-length pairs, it will exit immediately if a limit has been
+ * reached or if will add on the old limites to reach the new
+ * limits. */
+
+int ADIOI_Build_client_pre_req(ADIO_File fd,
+ int agg_rank, int agg_idx,
+ view_state *my_mem_view_state_p,
+ view_state *agg_file_view_state_p,
+ ADIO_Offset max_pre_req_sz,
+ int max_ol_ct)
+{
+ ADIO_Offset act_reg_sz = 0, tmp_reg_sz = 0;
+ ADIO_Offset cur_off = -1, cur_reg_max_len = -1;
+ ADIO_Offset agg_mem_st_reg = 0, agg_mem_act_reg_sz = 0;
+ int agg_ol_ct = 0, agg_ol_cur_ct = 0;
+ int i, agg_next_off_idx = -1;
+
+ ADIO_Offset cur_sz = 0, max_sz = 0, agg_mem_next_off = -1;
+ ADIO_Offset fill_st_reg = -1, fill_reg_sz = -1;
+ ADIO_Offset *fr_st_off_arr = fd->file_realm_st_offs;
+ MPI_Datatype *fr_type_arr = fd->file_realm_types;
+ MPI_Aint *tmp_disp_arr = NULL;
+ int *tmp_blk_arr = NULL, exit_loop = -1;
+ flatten_state *tmp_mem_state_p = NULL, *tmp_file_state_p = NULL;
+#ifdef DTYPE_SKIP
+ int skip_type_ct;
+#endif
+ if (agg_idx < 0 || agg_idx >= fd->hints->cb_nodes)
+ {
+ fprintf(stderr, "ADIOI_Build_client_pre_req: Invalid agg_idx %d\n",
+ agg_idx);
+ return -1;
+ }
+
+ if (agg_file_view_state_p->cur_state.cur_sz ==
+ agg_file_view_state_p->sz || max_pre_req_sz <= 0 ||
+ max_ol_ct <= 0)
+ {
+#ifdef DEBUG1
+ fprintf(stderr,
+ "ADIOI_Build_client_pre_req: Nothing to preprocess\n");
+#endif
+ return 0;
+ }
+
+ /* The new limits have already been surpassed by what already
+ * exists. Otherwise we will use the next restrictions */
+ if ((my_mem_view_state_p->pre_sz >= max_pre_req_sz) ||
+ (my_mem_view_state_p->pre_ol_ct >= max_ol_ct))
+ {
+#ifdef DEBUG1
+ fprintf(stderr,
+ "ADIOI_Build_client_pre_req: Old values surpass new "
+ "pre_req values\n");
+#endif
+ return 0;
+ }
+
+ /* General idea is to first advance the filetype to the file realm
+ * and then the memtype to the filetype. The memtype is advanced
+ * further by peeking at the filetype and then the filetype is
+ * advanced. */
+ for (i = 0; i < MAX_OFF_TYPE; i++)
+ {
+ switch(i)
+ {
+ case TEMP_OFF:
+ tmp_mem_state_p = &(my_mem_view_state_p->tmp_state);
+ tmp_file_state_p = &(agg_file_view_state_p->tmp_state);
+ break;
+ case REAL_OFF:
+ tmp_mem_state_p = &(my_mem_view_state_p->cur_state);
+ tmp_file_state_p = &(agg_file_view_state_p->cur_state);
+ break;
+ default:
+ fprintf(stderr, "ADIOI_Build_client_pre_req: "
+ "Invalid off type %d\n", i);
+ }
+
+ if (i == TEMP_OFF && my_mem_view_state_p->pre_sz > 0)
+ {
+ cur_sz = my_mem_view_state_p->pre_sz;
+ agg_ol_ct = my_mem_view_state_p->pre_ol_ct;
+ /* Save the old arrays */
+ tmp_disp_arr = my_mem_view_state_p->pre_disp_arr;
+ tmp_blk_arr = my_mem_view_state_p->pre_blk_arr;
+ my_mem_view_state_p->pre_disp_arr = NULL;
+ my_mem_view_state_p->pre_blk_arr = NULL;
+ agg_mem_next_off =
+ tmp_disp_arr[agg_ol_ct - 1] + tmp_blk_arr[agg_ol_ct - 1];
+ }
+ else if (i == REAL_OFF && my_mem_view_state_p->pre_sz > 0)
+ {
+ cur_sz = my_mem_view_state_p->pre_sz;
+ agg_ol_cur_ct = my_mem_view_state_p->pre_ol_ct;
+
+ /* Copy the old data to the new data, freeing the old
+ * arrays */
+ memcpy(my_mem_view_state_p->pre_disp_arr, tmp_disp_arr,
+ my_mem_view_state_p->pre_ol_ct * sizeof(MPI_Aint));
+ memcpy(my_mem_view_state_p->pre_blk_arr, tmp_blk_arr,
+ my_mem_view_state_p->pre_ol_ct * sizeof(int));
+
+ ADIOI_Free(tmp_disp_arr);
+ ADIOI_Free(tmp_blk_arr);
+
+ agg_mem_next_off =
+ my_mem_view_state_p->pre_disp_arr[agg_ol_cur_ct - 1] +
+ my_mem_view_state_p->pre_blk_arr[agg_ol_cur_ct - 1];
+ }
+ else
+ {
+ cur_sz = 0;
+ }
+
+ /* Max_pre_req_sz may be larger than the amount of data left
+ * to preprocess */
+ if (max_pre_req_sz - cur_sz >
+ agg_file_view_state_p->sz - tmp_file_state_p->cur_sz)
+ {
+ max_sz = cur_sz +
+ agg_file_view_state_p->sz - tmp_file_state_p->cur_sz;
+ }
+ else
+ max_sz = max_pre_req_sz;
+
+ assert(cur_sz != max_sz);
+#ifdef DEBUG1
+ fprintf(stderr,
+ "ADIOI_Build_client_pre_req: (cur_sz=%Ld,agg_ol_ct=%d,"
+ "agg_mem_next_off=%Ld,max_sz=%Ld,max_ol_ct=%d)\n",
+ cur_sz, agg_ol_ct, agg_mem_next_off, max_sz, max_ol_ct);
+#endif
+ while (cur_sz < max_sz)
+ {
+ find_next_off(fd, agg_file_view_state_p,
+ fr_st_off_arr[agg_rank],
+ &(fr_type_arr[agg_rank]),
+ i,
+ &cur_off,
+ &cur_reg_max_len);
+
+ /* find_next_off may show that the file_view_state is done
+ * even if cur_sz != max_sz since find_next_off may
+ * advance the file_view_state to the end here and realize
+ * that it is done. */
+ if (cur_off == -1)
+ break;
+
+ assert(cur_off != -1);
+
+ /* Before translating the file regions to memory regions,
+ * we first must advance to the proper point in the
+ * mem_view_state for this aggregator to match the
+ * file_view_state. */
+ while (tmp_file_state_p->cur_sz != tmp_mem_state_p->cur_sz)
+ {
+#ifdef DTYPE_SKIP
+ if (my_mem_view_state_p->flat_type_p->count > 1) {
+ /* let's see if we can skip whole memory datatypes */
+ skip_type_ct =
+ (tmp_file_state_p->cur_sz - tmp_mem_state_p->cur_sz) /
+ my_mem_view_state_p->type_sz;
+ if (skip_type_ct > 0) {
+ tmp_mem_state_p->cur_sz +=
+ skip_type_ct * my_mem_view_state_p->type_sz;
+ tmp_mem_state_p->abs_off +=
+ skip_type_ct * my_mem_view_state_p->ext;
+ if (tmp_mem_state_p->cur_sz ==
+ tmp_file_state_p->cur_sz)
+ break;
+ }
+ }
+#endif
+ view_state_add_region(
+ tmp_file_state_p->cur_sz - tmp_mem_state_p->cur_sz,
+ my_mem_view_state_p,
+ &fill_st_reg,
+ &fill_reg_sz, i);
+ }
+
+ /* Now that the filetype and memtype are advanced to the
+ * same position, add memtype ol-pairs while we have not
+ * overstepped the min(end of the current piece in the
+ * file view, end of the file realm, data left in
+ * max_sz) */
+
+ if (cur_reg_max_len >
+ view_state_get_next_len(agg_file_view_state_p, i))
+ cur_reg_max_len =
+ view_state_get_next_len(agg_file_view_state_p, i);
+
+ if (cur_reg_max_len > max_sz - cur_sz)
+ cur_reg_max_len = max_sz - cur_sz;
+
+ assert(cur_reg_max_len > 0);
+
+ /* Add memtype ol pairs while we have not passed
+ * cur_reg_max_len or the max number of ol pairs
+ * allowed */
+ act_reg_sz = 0;
+ exit_loop = 0;
+ while ((act_reg_sz < cur_reg_max_len) &&
+ (exit_loop == 0))
+ {
+ view_state_add_region(
+ cur_reg_max_len - act_reg_sz,
+ my_mem_view_state_p,
+ &agg_mem_st_reg, &agg_mem_act_reg_sz,
+ i);
+ act_reg_sz += agg_mem_act_reg_sz;
+
+#ifdef DEBUG2
+ fprintf(stderr, "ADIOI_Build_client_pre_req: %s Mem region"
+ "(proc=%d,off=%Ld,sz=%Ld)\n",
+ off_type_name[i], agg_rank, agg_mem_st_reg,
+ agg_mem_act_reg_sz);
+#endif
+ switch(i)
+ {
+ case TEMP_OFF:
+ /* Increment the ol list count if the next
+ * region is not adjacent to the previous
+ * region. */
+ if (agg_mem_next_off != agg_mem_st_reg)
+ {
+ agg_ol_ct++;
+ if (agg_ol_ct == max_ol_ct)
+ exit_loop = 1;
+ }
+ agg_mem_next_off =
+ agg_mem_st_reg + agg_mem_act_reg_sz;
+ break;
+ case REAL_OFF:
+ /* Set the ol list for the memtype that
+ * will map to our aggregator, coaslescing
+ * if possible. */
+ agg_next_off_idx = agg_ol_cur_ct;
+ if (agg_mem_next_off != agg_mem_st_reg)
+ {
+ my_mem_view_state_p->
+ pre_disp_arr[agg_next_off_idx] =
+ agg_mem_st_reg;
+ my_mem_view_state_p->
+ pre_blk_arr[agg_next_off_idx] =
+ agg_mem_act_reg_sz;
+ agg_ol_cur_ct++;
+ if (agg_ol_cur_ct == agg_ol_ct)
+ exit_loop = 1;
+ }
+ else
+ {
+ my_mem_view_state_p->
+ pre_blk_arr[agg_next_off_idx - 1]
+ += agg_mem_act_reg_sz;
+ }
+ agg_mem_next_off =
+ agg_mem_st_reg + agg_mem_act_reg_sz;
+ break;
+ default:
+ fprintf(stderr, "ADIOI_Build_client_pre_req: "
+ "Impossible type\n");
+ }
+ }
+
+ /* Advance the filetype flatten state appropriately to
+ * match the data advanced in the memtype flatten state.
+ * Should only take at most a single view_state_add_region
+ * call since the memtype cannot proceed beyond the end of
+ * a contig piece in the file type. */
+ view_state_add_region(act_reg_sz - tmp_reg_sz,
+ agg_file_view_state_p,
+ &fill_st_reg, &fill_reg_sz, i);
+#ifdef DEBUG2
+ fprintf(stderr, "ADIOI_Build_client_pre_req: %s File region"
+ " (proc=%d,off=%Ld,sz=%Ld)\n",
+ off_type_name[i], agg_rank, fill_st_reg, fill_reg_sz);
+#endif
+ if (fill_reg_sz != act_reg_sz)
+ {
+ fprintf(stderr, "ADIOI_Build_client_pre_req: "
+ "view_state_add_region failed to match the memtype\n");
+ return -1;
+ }
+
+ cur_sz += act_reg_sz;
+ }
+
+ /* On the first pass, allocate the memory structures for
+ * storing the preprocessed information */
+ if (i == TEMP_OFF)
+ {
+ if ((my_mem_view_state_p->pre_disp_arr = (MPI_Aint *)
+ ADIOI_Malloc(agg_ol_ct * sizeof(MPI_Aint))) == NULL)
+ {
+ fprintf(stderr, "ADIOI_Build_client_pre_req: malloc "
+ "pre_disp_arr of size %ld failed\n",
+ (long int)agg_ol_ct * sizeof(MPI_Aint));
+ return -1;
+ }
+ if ((my_mem_view_state_p->pre_blk_arr = (int *)
+ ADIOI_Malloc(agg_ol_ct * sizeof(int))) == NULL)
+ {
+ ADIOI_Free(my_mem_view_state_p->pre_disp_arr);
+ fprintf(stderr, "ADIOI_Build_client_pre_req: malloc "
+ "agg_blk_arr of size %ld failed\n",
+ (long int)agg_ol_ct * sizeof(int));
+ return -1;
+ }
+ }
+ }
+
+ my_mem_view_state_p->pre_sz = cur_sz;
+ my_mem_view_state_p->pre_ol_ct = agg_ol_ct;
+
+#ifdef DEBUG1
+ fprintf(stderr, "ADIOI_Build_client_pre_req:(agg=%d,cur_ol_count=%d"
+ "=ol_count=%d)\n",
+ agg_rank, my_mem_view_state_p->pre_ol_ct, agg_ol_ct);
+#endif
+
+#ifdef DEBUG2
+ if (agg_ol_ct > 0)
+ {
+ fprintf(stderr, "ADIOI_Build_client_pre_req: agg=%d,pre_sz=%Ld "
+ "(off,len) = \n", agg_rank, my_mem_view_state_p->pre_sz);
+ for (i = 0; i < my_mem_view_state_p->pre_ol_ct; i++)
+ {
+ fprintf(stderr, "[%d](%d,%d) ", i,
+ my_mem_view_state_p->pre_disp_arr[i],
+ my_mem_view_state_p->pre_blk_arr[i]);
+ if (i % 5 == 0 && i != 0)
+ fprintf(stderr, "\n");
+ }
+ fprintf(stderr, "\n");
+ }
+#endif
+
+ return 0;
+}
+
+/* process_pre_req() allows ADIOI_Build_client_req to use the pre_req
+ * information. */
+
+static int process_pre_req(ADIO_File fd,
+ int agg_rank,
+ int agg_idx,
+ view_state *my_mem_view_state_p,
+ view_state *agg_file_view_state_p,
+ ADIO_Offset agg_comm_sz,
+ int off_type,
+ MPI_Aint *agg_disp_arr,
+ int *agg_blk_arr,
+ ADIO_Offset *agg_comm_pre_sz_p,
+ ADIO_Offset *agg_comm_cur_sz_p,
+ ADIO_Offset *agg_comm_sz_p,
+ int *agg_ol_cur_ct_p,
+ int *agg_ol_ct_p,
+ ADIO_Offset *agg_mem_next_off_p)
+{
+ int i, has_partial = 0;
+ MPI_Aint partial_disp = 0;
+ int partial_len = 0;
+ ADIO_Offset tmp_agg_comm_pre_sz = 0;
+
+ assert (my_mem_view_state_p->pre_sz > 0);
+ switch(off_type)
+ {
+ case TEMP_OFF:
+ /* Use only some of the precalculated data */
+ if (my_mem_view_state_p->pre_sz > *agg_comm_sz_p)
+ {
+ for (i = 0; i < my_mem_view_state_p->pre_ol_ct; i++)
+ {
+ if ((my_mem_view_state_p->pre_blk_arr[i] +
+ *agg_comm_pre_sz_p) > *agg_comm_sz_p)
+ {
+ has_partial = 1;
+ partial_len = *agg_comm_sz_p - *agg_comm_pre_sz_p;
+ *agg_comm_pre_sz_p = *agg_comm_sz_p;
+ i++;
+ break;
+ }
+ else if ((my_mem_view_state_p->pre_blk_arr[i] +
+ *agg_comm_pre_sz_p) == *agg_comm_sz_p)
+ {
+ *agg_comm_pre_sz_p +=
+ my_mem_view_state_p->pre_blk_arr[i];
+ i++;
+ break;
+ }
+ else
+ *agg_comm_pre_sz_p +=
+ my_mem_view_state_p->pre_blk_arr[i];
+ }
+
+ if (has_partial == 1)
+ {
+ *agg_mem_next_off_p =
+ my_mem_view_state_p->pre_disp_arr[i - 1] +
+ partial_len;
+ }
+ else
+ {
+ *agg_mem_next_off_p =
+ my_mem_view_state_p->pre_disp_arr[i - 1] +
+ my_mem_view_state_p->pre_blk_arr[i - 1];
+ }
+
+ *agg_comm_cur_sz_p = *agg_comm_pre_sz_p;
+ *agg_ol_ct_p = i;
+
+ }
+ else /* Use all the precalculated data */
+ {
+ *agg_comm_pre_sz_p = my_mem_view_state_p->pre_sz;
+ *agg_comm_cur_sz_p = *agg_comm_pre_sz_p;
+ *agg_ol_ct_p = my_mem_view_state_p->pre_ol_ct;
+ *agg_mem_next_off_p =
+ my_mem_view_state_p->pre_disp_arr[
+ my_mem_view_state_p->pre_ol_ct - 1] +
+ my_mem_view_state_p->pre_blk_arr[
+ my_mem_view_state_p->pre_ol_ct - 1];
+ }
+#ifdef DEBUG1
+ fprintf(stderr, "process_pre_req: TEMP_OFF "
+ "agg_comm_pre_sz=%Ld,agg_comm_cur_sz=%Ld,agg_ol_ct=%d\n",
+ *agg_comm_pre_sz_p, *agg_comm_cur_sz_p, *agg_ol_ct_p);
+#endif
+ assert(*agg_comm_cur_sz_p <= *agg_comm_sz_p);
+ break;
+ case REAL_OFF:
+ /* Set the ol list for the memtype that will map to our
+ * aggregator, coaslescing if possible. */
+ for (i = 0; i < my_mem_view_state_p->pre_ol_ct; i++)
+ {
+ agg_disp_arr[i] = my_mem_view_state_p->pre_disp_arr[i];
+ agg_blk_arr[i] = my_mem_view_state_p->pre_blk_arr[i];
+
+ if ((my_mem_view_state_p->pre_blk_arr[i] +
+ tmp_agg_comm_pre_sz) > *agg_comm_pre_sz_p)
+ {
+ has_partial = 1;
+ agg_blk_arr[i] = *agg_comm_pre_sz_p - tmp_agg_comm_pre_sz;
+ tmp_agg_comm_pre_sz = *agg_comm_pre_sz_p;
+ partial_disp = my_mem_view_state_p->pre_disp_arr[i] +
+ agg_blk_arr[i];
+ partial_len = my_mem_view_state_p->pre_blk_arr[i] -
+ agg_blk_arr[i];
+ i++;
+ break;
+ }
+ else if ((my_mem_view_state_p->pre_blk_arr[i] +
+ tmp_agg_comm_pre_sz) == *agg_comm_pre_sz_p)
+ {
+ tmp_agg_comm_pre_sz +=
+ my_mem_view_state_p->pre_blk_arr[i];
+ i++;
+ break;
+ }
+ else
+ tmp_agg_comm_pre_sz +=
+ my_mem_view_state_p->pre_blk_arr[i];
+ }
+ *agg_mem_next_off_p = agg_disp_arr[i - 1] + agg_blk_arr[i - 1];
+ *agg_ol_cur_ct_p = i;
+ *agg_comm_cur_sz_p = *agg_comm_pre_sz_p;
+
+ /* Clean up the ol pairs we used */
+ if ((i < my_mem_view_state_p->pre_ol_ct) || (has_partial == 1))
+ {
+ int remain_ol_ct =
+ my_mem_view_state_p->pre_ol_ct - i + has_partial;
+ MPI_Aint *new_pre_disp_arr = NULL;
+ int *new_pre_blk_arr = NULL;
+
+ if ((new_pre_disp_arr = (MPI_Aint *)
+ ADIOI_Malloc(remain_ol_ct * sizeof(MPI_Aint))) == NULL)
+ {
+ fprintf(stderr, "process_pre_req: malloc "
+ "new_pre_disp_arr failed\n");
+ return -1;
+ }
+ if ((new_pre_blk_arr = (int *)
+ ADIOI_Malloc(remain_ol_ct * sizeof(int))) == NULL)
+ {
+ fprintf(stderr, "process_pre_req: malloc "
+ "new_pre_blk_arr failed\n");
+ return -1;
+ }
+
+ memcpy(new_pre_disp_arr,
+ &(my_mem_view_state_p->pre_disp_arr[i - has_partial]),
+ remain_ol_ct * sizeof(MPI_Aint));
+ memcpy(new_pre_blk_arr,
+ &(my_mem_view_state_p->pre_blk_arr[i - has_partial]),
+ remain_ol_ct * sizeof(int));
+
+ /* Set the partial len of the first piece */
+ if (has_partial == 1)
+ {
+ /* new_pre_disp_arr[remain_ol_ct - 1] = partial_disp;
+ new_pre_blk_arr[remain_ol_ct - 1] = partial_len; */
+ new_pre_disp_arr[0] = partial_disp;
+ new_pre_blk_arr[0] = partial_len;
+ }
+
+ ADIOI_Free(my_mem_view_state_p->pre_disp_arr);
+ ADIOI_Free(my_mem_view_state_p->pre_blk_arr);
+
+ my_mem_view_state_p->pre_disp_arr = new_pre_disp_arr;
+ my_mem_view_state_p->pre_blk_arr = new_pre_blk_arr;
+ my_mem_view_state_p->pre_ol_ct = remain_ol_ct;
+ my_mem_view_state_p->pre_sz -= *agg_comm_pre_sz_p;
+ }
+ else /* Used all the precalculated ol pairs */
+ {
+ ADIOI_Free(my_mem_view_state_p->pre_disp_arr);
+ ADIOI_Free(my_mem_view_state_p->pre_blk_arr);
+
+ my_mem_view_state_p->pre_disp_arr = NULL;
+ my_mem_view_state_p->pre_blk_arr = NULL;
+ my_mem_view_state_p->pre_ol_ct = 0;
+ my_mem_view_state_p->pre_sz = 0;
+ }
+#ifdef DEBUG1
+ fprintf(stderr, "process_pre_req: REAL_OFF "
+ "agg_comm_pre_sz=%Ld,agg_comm_cur_sz=%Ld,agg_ol_ct=%d,"
+ "agg_ol_cur_ct=%d\n",
+ *agg_comm_pre_sz_p, *agg_comm_cur_sz_p, *agg_ol_ct_p,
+ *agg_ol_cur_ct_p);
+#endif
+ break;
+ default:
+ fprintf(stderr, "process_pre_req: Invalid off_type %d\n",
+ off_type);
+ }
+ return 0;
+}
+
+/* ADIOI_Build_client_req() creates a memory datatype to transfer data
+ * to/from a particular aggregator. */
+
+int ADIOI_Build_client_req(ADIO_File fd,
+ int agg_rank,
+ int agg_idx,
+ view_state *my_mem_view_state_p,
+ view_state *agg_file_view_state_p,
+ ADIO_Offset agg_comm_sz,
+ MPI_Datatype *agg_comm_dtype_p)
+{
+ MPI_Aint *agg_disp_arr = NULL;
+ int *agg_blk_arr = NULL;
+ ADIO_Offset st_reg = 0, act_reg_sz = 0, tmp_reg_sz = 0;
+ ADIO_Offset cur_off = -1, cur_reg_max_len = -1;
+ ADIO_Offset agg_mem_st_reg = 0, agg_mem_act_reg_sz = 0;
+ int agg_ol_ct = 0, agg_ol_cur_ct = 0;
+ int i = 0, agg_next_off_idx = -1;
+ ADIO_Offset agg_mem_next_off = 0, agg_comm_cur_sz = 0, agg_comm_pre_sz = 0;
+ ADIO_Offset *fr_st_off_arr = fd->file_realm_st_offs;
+ MPI_Datatype *fr_type_arr = fd->file_realm_types;
+ flatten_state *tmp_mem_state_p = NULL, *tmp_file_state_p = NULL;
+#ifdef DTYPE_SKIP
+ int skip_type_ct;
+#endif
+
+ if (agg_idx < 0 || agg_idx >= fd->hints->cb_nodes)
+ {
+#ifdef DEBUG1
+ fprintf(stderr, "ADIOI_Build_client_req: agg_rank %d does not map "
+ "to a valid node in cb_node\n", agg_rank);
+#endif
+ return 0;
+ }
+
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5018, 0, NULL);
+#endif
+
+#ifdef DEBUG1
+ fprintf(stderr, "ADIOI_Build_client_req:(agg=%d,size_req=%Ld)\n",
+ agg_idx, agg_comm_sz);
+#endif
+
+ /* On the first pass see how many offset-length pairs are
+ * necessary for each aggregator. Then allocate the correct
+ * amount of offset-length pairs for handling each aggregator's
+ * particular data size. On the last pass, we actually create the
+ * offset-length pairs. */
+ for (i = 0; i < MAX_OFF_TYPE; i++)
+ {
+ switch(i)
+ {
+ case TEMP_OFF:
+ tmp_mem_state_p = &(my_mem_view_state_p->tmp_state);
+ tmp_file_state_p = &(agg_file_view_state_p->tmp_state);
+ break;
+ case REAL_OFF:
+ tmp_mem_state_p = &(my_mem_view_state_p->cur_state);
+ tmp_file_state_p = &(agg_file_view_state_p->cur_state);
+ break;
+ default:
+ fprintf(stderr, "ADIOI_Build_client_pre_req: "
+ "Invalid off type %d\n", i);
+ }
+
+ agg_comm_cur_sz = 0;
+ agg_mem_next_off = -1;
+
+ /* First try to preprocess anything we can */
+ if (my_mem_view_state_p->pre_sz > 0)
+ {
+ process_pre_req(fd,
+ agg_rank,
+ agg_idx,
+ my_mem_view_state_p,
+ agg_file_view_state_p,
+ agg_comm_sz,
+ i,
+ agg_disp_arr,
+ agg_blk_arr,
+ &agg_comm_pre_sz,
+ &agg_comm_cur_sz,
+ &agg_comm_sz,
+ &agg_ol_cur_ct,
+ &agg_ol_ct,
+ &agg_mem_next_off);
+ }
+
+ while (agg_comm_cur_sz < agg_comm_sz)
+ {
+ find_next_off(fd, agg_file_view_state_p,
+ fr_st_off_arr[agg_idx],
+ &(fr_type_arr[agg_idx]),
+ i,
+ &cur_off,
+ &cur_reg_max_len);
+
+ assert(cur_off != -1);
+
+ /* Add up to the end of the file realm or as many bytes
+ * are left for this particular aggregator in the client's
+ * filetype */
+ if (cur_reg_max_len > (agg_comm_sz - agg_comm_cur_sz))
+ {
+ cur_reg_max_len = agg_comm_sz - agg_comm_cur_sz;
+ }
+ assert(cur_reg_max_len > 0);
+
+ view_state_add_region(
+ cur_reg_max_len,
+ agg_file_view_state_p,
+ &st_reg, &act_reg_sz, i);
+
+#ifdef DEBUG2
+ fprintf(stderr, "ADIOI_Build_client_req: %s File region"
+ " (proc=%d,off=%Ld,sz=%Ld)\n",
+ off_type_name[i], agg_rank, cur_off, act_reg_sz);
+#endif
+
+ /* Before translating the file regions to memory regions,
+ * we first must advance to the proper point in the
+ * mem_view_state for this aggregator to match the
+ * file_view_state. */
+
+ assert(tmp_file_state_p->cur_sz - act_reg_sz >=
+ tmp_mem_state_p->cur_sz);
+
+ while (tmp_file_state_p->cur_sz - act_reg_sz !=
+ tmp_mem_state_p->cur_sz)
+ {
+ ADIO_Offset fill_st_reg = -1, fill_reg_sz = -1;
+#ifdef DTYPE_SKIP
+ if (my_mem_view_state_p->flat_type_p->count > 1) {
+ /* let's see if we can skip whole memory datatypes */
+ skip_type_ct =
+ (tmp_file_state_p->cur_sz - act_reg_sz -
+ tmp_mem_state_p->cur_sz) /
+ my_mem_view_state_p->type_sz;
+ if (skip_type_ct > 0) {
+ tmp_mem_state_p->cur_sz +=
+ skip_type_ct * my_mem_view_state_p->type_sz;
+ tmp_mem_state_p->abs_off +=
+ skip_type_ct * my_mem_view_state_p->ext;
+ if ((tmp_mem_state_p->cur_sz - act_reg_sz) ==
+ tmp_file_state_p->cur_sz)
+ break;
+ }
+ }
+#endif
+ view_state_add_region(
+ tmp_file_state_p->cur_sz -
+ act_reg_sz - tmp_mem_state_p->cur_sz,
+ my_mem_view_state_p,
+ &fill_st_reg,
+ &fill_reg_sz, i);
+ }
+
+ /* Based on how large the act_reg_sz is, first figure
+ * out how many memory offset-length pairs are
+ * necessary and then set the offset-length pairs. */
+ tmp_reg_sz = 0;
+ while (tmp_reg_sz != act_reg_sz)
+ {
+ view_state_add_region(
+ act_reg_sz - tmp_reg_sz,
+ my_mem_view_state_p,
+ &agg_mem_st_reg, &agg_mem_act_reg_sz,
+ i);
+ tmp_reg_sz += agg_mem_act_reg_sz;
+
+#ifdef DEBUG2
+ fprintf(stderr, "ADIOI_Build_client_req: %s Mem region"
+ "(off=%Ld,sz=%Ld)\n",
+ off_type_name[i], agg_mem_st_reg,
+ agg_mem_act_reg_sz);
+#endif
+ agg_comm_cur_sz += agg_mem_act_reg_sz;
+ switch(i)
+ {
+ case TEMP_OFF:
+ /* Increment the ol list count if the next
+ * region is not adjacent to the previous
+ * region. */
+ if (agg_mem_next_off != agg_mem_st_reg)
+ {
+ agg_ol_ct++;
+ }
+ agg_mem_next_off =
+ agg_mem_st_reg + agg_mem_act_reg_sz;
+ break;
+ case REAL_OFF:
+ /* Set the ol list for the memtype that
+ * will map to our aggregator, coaslescing
+ * if possible. */
+ agg_next_off_idx = agg_ol_cur_ct;
+ if (agg_mem_next_off != agg_mem_st_reg)
+ {
+ agg_disp_arr[agg_next_off_idx] =
+ agg_mem_st_reg;
+ agg_blk_arr[agg_next_off_idx] =
+ agg_mem_act_reg_sz;
+ agg_ol_cur_ct++;
+ }
+ else
+ {
+ agg_blk_arr[agg_next_off_idx - 1]
+ += agg_mem_act_reg_sz;
+ }
+ agg_mem_next_off =
+ agg_mem_st_reg + agg_mem_act_reg_sz;
+ break;
+ default:
+ fprintf(stderr, "ADIOI_Build_client_req: "
+ "Impossible type\n");
+ }
+ }
+ }
+
+ /* On the first pass, allocate the memory structures for
+ * creating the MPI_hindexed type. */
+ if (i == TEMP_OFF)
+ {
+ /* Allocate offset-length pairs for creating hindexed
+ * MPI_Datatypes for each aggregator */
+ if ((agg_disp_arr = (MPI_Aint *)
+ ADIOI_Malloc(agg_ol_ct * sizeof(MPI_Aint))) == NULL)
+ {
+ fprintf(stderr, "ADIOI_Build_client_req: malloc "
+ "agg_disp_arr of size %ld failed\n",
+ (long int)agg_ol_ct * sizeof(MPI_Aint));
+ return -1;
+ }
+ if ((agg_blk_arr = (int *)
+ ADIOI_Malloc(agg_ol_ct * sizeof(int))) == NULL)
+ {
+ ADIOI_Free(agg_disp_arr);
+ fprintf(stderr, "ADIOI_Build_client_req: malloc "
+ "agg_blk_arr of size %ld failed\n",
+ (long int)agg_ol_ct * sizeof(int));
+ return -1;
+ }
+ }
+ }
+
+ assert(agg_ol_ct == agg_ol_cur_ct);
+#ifdef DEBUG1
+ fprintf(stderr,
+ "ADIOI_Build_client_req:(agg=%d,cur_ol_count=%d=ol_count=%d)\n",
+ agg_rank, agg_ol_cur_ct, agg_ol_ct);
+#endif
+
+#ifdef DEBUG2
+ if (agg_ol_ct > 0)
+ {
+ fprintf(stderr, "ADIOI_Build_client_req: p %d (off,len) = ", agg_rank);
+ for (i = 0; i < agg_ol_ct; i++)
+ {
+ fprintf(stderr, "[%d](%d,%d) ", i,
+ agg_disp_arr[i], agg_blk_arr[i]);
+ if (i % 5 == 0 && i != 0)
+ fprintf(stderr, "\n");
+ }
+ fprintf(stderr, "\n");
+ }
+#endif
+#ifdef DEBUG1
+ fprintf(stderr,
+ "ADIOI_Build_client_req:(agg=%d,pre_ol_count=%d)\n",
+ agg_idx, my_mem_view_state_p->pre_ol_ct);
+#endif
+
+#ifdef DEBUG2
+ if (my_mem_view_state_p->pre_sz > 0)
+ {
+ fprintf(stderr, "ADIOI_Build_client_req: p %d pre(off,len) = ",
+ agg_idx);
+ for (i = 0; i < my_mem_view_state_p->pre_ol_ct; i++)
+ {
+ fprintf(stderr, "[%d](%d,%d) ", i,
+ my_mem_view_state_p->pre_disp_arr[i],
+ my_mem_view_state_p->pre_blk_arr[i]);
+ if (i % 5 == 0 && i != 0)
+ fprintf(stderr, "\n");
+ }
+ fprintf(stderr, "\n");
+ }
+#endif
+
+ /* Create the aggregator MPI_Datatype */
+ if (agg_comm_sz > 0)
+ {
+ MPI_Type_hindexed(agg_ol_ct, agg_blk_arr, agg_disp_arr, MPI_BYTE,
+ agg_comm_dtype_p);
+ MPI_Type_commit(agg_comm_dtype_p);
+ }
+ else
+ {
+ *agg_comm_dtype_p = MPI_BYTE;
+ }
+
+ ADIOI_Free(agg_blk_arr);
+ ADIOI_Free(agg_disp_arr);
+
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5019, 0, NULL);
+#endif
+ return 0;
+}
+
+
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_coll_exch_new.c b/ompi/mca/io/romio/romio/adio/common/ad_coll_exch_new.c
new file mode 100644
index 0000000000..25473f6548
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/common/ad_coll_exch_new.c
@@ -0,0 +1,510 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *
+ * Copyright (C) 1997 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ */
+
+#include
+#include "adio.h"
+#include "adio_extern.h"
+#ifdef AGGREGATION_PROFILE
+#include "mpe.h"
+#endif
+
+/*
+#define DEBUG
+#define DEBUG2
+*/
+
+#define COUNT_EXCH 0
+#define BLOCK_LENS 1
+#define INDICES 2
+#define FPIND_DISP_OFF_SZ 3
+
+
+typedef struct {
+ int count;
+ ADIO_Offset fp_ind;
+ ADIO_Offset disp;
+ ADIO_Offset byte_off;
+ ADIO_Offset sz;
+ ADIO_Offset ext;
+ ADIO_Offset type_sz;
+} amount_and_extra_data_t;
+
+/* Debugging function to print out an ADIOI_Flatlist_node. */
+void ADIOI_Print_flatlist_node(ADIOI_Flatlist_node *flatlist_node_p)
+{
+ int i;
+ if (flatlist_node_p == NULL)
+ {
+ fprintf(stderr, "print flatlist node of NULL ptr\n");
+ return;
+ }
+ fprintf(stderr, "print flatlist node count = %d (idx,blocklen)\n",
+ flatlist_node_p->count);
+ for (i = 0; i < flatlist_node_p->count; i++)
+ {
+ if (i % 5 == 0 && i != 0)
+ {
+ fprintf(stderr, "%d=(%Ld,%Ld)\n", i, flatlist_node_p->indices[i],
+ flatlist_node_p->blocklens[i]);
+ }
+ else
+ fprintf(stderr, "%d=(%Ld,%Ld) ", i, flatlist_node_p->indices[i],
+ flatlist_node_p->blocklens[i]);
+ }
+ fprintf(stderr, "\n");
+}
+
+/* Since ADIOI_Flatten_datatype won't add a contig datatype to the
+ * ADIOI_Flatlist, we can force it to do so with this function. */
+ADIOI_Flatlist_node * ADIOI_Add_contig_flattened(MPI_Datatype contig_type)
+{
+ int contig_type_sz = -1;
+ ADIOI_Flatlist_node *flat_node_p = ADIOI_Flatlist;
+
+ /* Add contig type to the end of the list if it doesn't already
+ * exist. */
+ while (flat_node_p->next)
+ {
+ if (flat_node_p->type == contig_type)
+ return flat_node_p;
+ flat_node_p = flat_node_p->next;
+ }
+ if (flat_node_p->type == contig_type)
+ return flat_node_p;
+
+ MPI_Type_size(contig_type, &contig_type_sz);
+ if ((flat_node_p->next = (ADIOI_Flatlist_node *) ADIOI_Malloc
+ (sizeof(ADIOI_Flatlist_node))) == NULL)
+ {
+ fprintf(stderr, "ADIOI_Add_contig_flattened: malloc next failed\n");
+ }
+ flat_node_p = flat_node_p->next;
+ flat_node_p->type = contig_type;
+ if ((flat_node_p->blocklens = (ADIO_Offset *) ADIOI_Malloc(sizeof(ADIO_Offset))) == NULL)
+ {
+ fprintf(stderr, "ADIOI_Flatlist_node: malloc blocklens failed\n");
+ }
+ if ((flat_node_p->indices = (ADIO_Offset *)
+ ADIOI_Malloc(sizeof(ADIO_Offset))) == NULL)
+ {
+ fprintf(stderr, "ADIOI_Flatlist_node: malloc indices failed\n");
+ }
+ flat_node_p->blocklens[0] = contig_type_sz;
+ flat_node_p->indices[0] = 0;
+ flat_node_p->count = 1;
+ flat_node_p->next = NULL;
+ return flat_node_p;
+}
+
+/* ADIOI_Exchange_file_views - Sends all the aggregators the file
+ * views and file view states of the clients. It fills in the
+ * client_file_view_state_arr for the aggregators and the
+ * my_mem_view_state for the client. It also initializes the
+ * agg_file_view_state for all clients, which is the view for each
+ * aggregator of a client's filetype. */
+void ADIOI_Exch_file_views(int myrank, int nprocs, int file_ptr_type,
+ ADIO_File fd, int count,
+ MPI_Datatype datatype, ADIO_Offset off,
+ view_state *my_mem_view_state_arr,
+ view_state *agg_file_view_state_arr,
+ view_state *client_file_view_state_arr)
+{
+ /* Convert my own fileview to an ADIOI_Flattened type and a
+ * disp. MPI_Alltoall the count of ADIOI_Flatlist nodes.
+ * MPI_Isend/Irecv the block_lens, indices of ADIOI_Flatlist node
+ * to/from each of the aggregators with the rest of the file view
+ * state. */
+
+ int i = -1, j = -1;
+ amount_and_extra_data_t *send_count_arr = NULL;
+ amount_and_extra_data_t *recv_count_arr = NULL;
+ int send_req_arr_sz = 0;
+ int recv_req_arr_sz = 0;
+ MPI_Request *send_req_arr = NULL, *recv_req_arr = NULL;
+ MPI_Status *statuses = NULL;
+ ADIO_Offset disp_off_sz_ext_typesz[6];
+ MPI_Aint memtype_extent, filetype_extent;
+ int ret = -1;
+
+ /* parameters for datatypes */
+ ADIOI_Flatlist_node *flat_mem_p = NULL, *flat_file_p = NULL;
+ int memtype_sz = -1;
+ int memtype_is_contig = -1, filetype_is_contig = -1;
+ int filetype_sz = -1;
+
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5014, 0, NULL);
+#endif
+ /* The memtype will be freed after the call. The filetype will be
+ * freed in the close and should have been flattened in the file
+ * view. */
+ MPI_Type_size(datatype, &memtype_sz);
+ MPI_Type_extent(datatype, &memtype_extent);
+ if (memtype_sz == memtype_extent) {
+ memtype_is_contig = 1;
+ flat_mem_p = ADIOI_Add_contig_flattened(datatype);
+ flat_mem_p->blocklens[0] = memtype_sz*count;
+ }
+ else {
+ ADIOI_Flatten_datatype(datatype);
+ flat_mem_p = ADIOI_Flatlist;
+ while (flat_mem_p->type != datatype)
+ flat_mem_p = flat_mem_p->next;
+ }
+
+ MPI_Type_extent(fd->filetype, &filetype_extent);
+ MPI_Type_size(fd->filetype, &filetype_sz);
+ if (filetype_extent == filetype_sz) {
+ filetype_is_contig = 1;
+ flat_file_p = ADIOI_Add_contig_flattened(fd->filetype);
+ flat_file_p->blocklens[0] = memtype_sz*count;
+ filetype_extent = memtype_sz*count;
+ filetype_sz = filetype_extent;
+ }
+ else {
+ flat_file_p = ADIOI_Flatlist;
+ while (flat_file_p->type != fd->filetype)
+ flat_file_p = flat_file_p->next;
+ }
+
+ disp_off_sz_ext_typesz[0] = fd->fp_ind;
+ disp_off_sz_ext_typesz[1] = fd->disp;
+ disp_off_sz_ext_typesz[2] = off;
+ disp_off_sz_ext_typesz[3] = memtype_sz*count;
+ disp_off_sz_ext_typesz[4] = (ADIO_Offset) filetype_extent;
+ disp_off_sz_ext_typesz[5] = (ADIO_Offset) filetype_sz;
+
+ if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
+ recv_count_arr = ADIOI_Calloc(nprocs, sizeof(amount_and_extra_data_t));
+ send_count_arr = ADIOI_Calloc(nprocs, sizeof(amount_and_extra_data_t));
+ } else {
+ send_count_arr = ADIOI_Calloc(fd->hints->cb_nodes,
+ sizeof(amount_and_extra_data_t));
+
+ /* only aggregators receive data */
+ if (fd->is_agg) {
+ recv_count_arr = ADIOI_Calloc(nprocs,
+ sizeof(amount_and_extra_data_t));
+ recv_req_arr = ADIOI_Malloc (nprocs * sizeof(MPI_Request));
+ for (i=0; i < nprocs; i++)
+ MPI_Irecv (&recv_count_arr[i], sizeof(amount_and_extra_data_t),
+ MPI_BYTE, i, COUNT_EXCH, fd->comm, &recv_req_arr[i]);
+ }
+
+ /* only send data to aggregators */
+ send_req_arr = ADIOI_Calloc (fd->hints->cb_nodes, sizeof(MPI_Request));
+ for (i=0; i < fd->hints->cb_nodes; i++) {
+ send_count_arr[i].count = flat_file_p->count;
+ send_count_arr[i].fp_ind = disp_off_sz_ext_typesz[0];
+ send_count_arr[i].disp = disp_off_sz_ext_typesz[1];
+ send_count_arr[i].byte_off = disp_off_sz_ext_typesz[2];
+ send_count_arr[i].sz = disp_off_sz_ext_typesz[3];
+ send_count_arr[i].ext = disp_off_sz_ext_typesz[4];
+ send_count_arr[i].type_sz = disp_off_sz_ext_typesz[5];
+ MPI_Isend (&send_count_arr[i], sizeof(amount_and_extra_data_t),
+ MPI_BYTE, fd->hints->ranklist[i], COUNT_EXCH, fd->comm,
+ &send_req_arr[i]);
+ }
+ }
+
+
+ /* Every client has to build mem and file view_states for each aggregator.
+ * We initialize their values here. and we also initialize
+ * send_count_arr */
+
+ if (memtype_is_contig) {
+ /* if memory is contigous, we now replace memtype_sz and
+ * memtype_extent with the full access size */
+ memtype_sz *= count;
+ memtype_extent = memtype_sz;
+ }
+
+ for (i = 0; i < fd->hints->cb_nodes; i++)
+ {
+ int tmp_agg_idx = fd->hints->ranklist[i];
+ memset(&(my_mem_view_state_arr[tmp_agg_idx]), 0, sizeof(view_state));
+ my_mem_view_state_arr[tmp_agg_idx].sz =
+ disp_off_sz_ext_typesz[3];
+ my_mem_view_state_arr[tmp_agg_idx].ext =
+ (ADIO_Offset) memtype_extent;
+ my_mem_view_state_arr[tmp_agg_idx].type_sz =
+ (ADIO_Offset) memtype_sz;
+ my_mem_view_state_arr[tmp_agg_idx].flat_type_p = flat_mem_p;
+ ADIOI_init_view_state(file_ptr_type,
+ 1,
+ &(my_mem_view_state_arr[tmp_agg_idx]),
+ TEMP_OFF);
+ ADIOI_init_view_state(file_ptr_type,
+ 1,
+ &(my_mem_view_state_arr[tmp_agg_idx]),
+ REAL_OFF);
+
+ memset(&(agg_file_view_state_arr[tmp_agg_idx]), 0, sizeof(view_state));
+ agg_file_view_state_arr[tmp_agg_idx].fp_ind =
+ disp_off_sz_ext_typesz[0];
+ agg_file_view_state_arr[tmp_agg_idx].disp =
+ disp_off_sz_ext_typesz[1];
+ agg_file_view_state_arr[tmp_agg_idx].byte_off =
+ disp_off_sz_ext_typesz[2];
+ agg_file_view_state_arr[tmp_agg_idx].sz =
+ disp_off_sz_ext_typesz[3];
+ agg_file_view_state_arr[tmp_agg_idx].ext =
+ disp_off_sz_ext_typesz[4];
+ agg_file_view_state_arr[tmp_agg_idx].type_sz =
+ disp_off_sz_ext_typesz[5];
+ agg_file_view_state_arr[tmp_agg_idx].flat_type_p = flat_file_p;
+
+ ADIOI_init_view_state(file_ptr_type,
+ 1,
+ &(agg_file_view_state_arr[tmp_agg_idx]),
+ TEMP_OFF);
+ ADIOI_init_view_state(file_ptr_type,
+ 1,
+ &(agg_file_view_state_arr[tmp_agg_idx]),
+ REAL_OFF);
+
+ if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
+ send_count_arr[tmp_agg_idx].count = flat_file_p->count;
+ send_count_arr[tmp_agg_idx].fp_ind = disp_off_sz_ext_typesz[0];
+ send_count_arr[tmp_agg_idx].disp = disp_off_sz_ext_typesz[1];
+ send_count_arr[tmp_agg_idx].byte_off = disp_off_sz_ext_typesz[2];
+ send_count_arr[tmp_agg_idx].sz = disp_off_sz_ext_typesz[3];
+ send_count_arr[tmp_agg_idx].ext = disp_off_sz_ext_typesz[4];
+ send_count_arr[tmp_agg_idx].type_sz = disp_off_sz_ext_typesz[5];
+ }
+ }
+
+#ifdef DEBUG2
+ fprintf(stderr, "my own flattened memtype: ");
+ ADIOI_Print_flatlist_node(flat_mem_p);
+ fprintf(stderr, "my own flattened filetype: ");
+ ADIOI_Print_flatlist_node(flat_file_p);
+#endif
+
+ if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
+ ret = MPI_Alltoall(send_count_arr, sizeof(amount_and_extra_data_t),
+ MPI_BYTE,
+ recv_count_arr, sizeof(amount_and_extra_data_t),
+ MPI_BYTE, fd->comm);
+ if (ret != MPI_SUCCESS)
+ {
+ fprintf(stderr, "ADIOI_Exchange_file_views: MPI_Alltoall failed "
+ "with error %d", ret);
+ return;
+ }
+ } else {
+ statuses = (MPI_Status *) ADIOI_Malloc(1 + nprocs * sizeof(MPI_Status));
+ if (fd->is_agg) {
+ MPI_Waitall(nprocs, recv_req_arr, statuses);
+ ADIOI_Free(recv_req_arr);
+ }
+ MPI_Waitall(fd->hints->cb_nodes, send_req_arr, statuses);
+ ADIOI_Free(statuses);
+ ADIOI_Free(send_req_arr);
+ }
+#ifdef DEBUG2
+ if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
+ fprintf(stderr, "send_count_arr:");
+ for (i = 0; i < nprocs; i++)
+ {
+ fprintf(stderr, "[%d]=%d ", i, send_count_arr[i].count);
+ }
+ fprintf(stderr, "\n");
+ fprintf(stderr, "recv_count_arr:");
+ for (i = 0; i < nprocs; i++)
+ {
+ fprintf(stderr, "[%d]=%d ", i, recv_count_arr[i].count);
+ }
+ fprintf(stderr, "\n");
+ } else {
+ fprintf(stderr, "send_count_arr:");
+ for (i = 0; i < fd->hints->cb_nodes; i++)
+ {
+ fprintf(stderr, "[%d]=%d ", i, send_count_arr[i].count);
+ }
+ fprintf(stderr, "\n");
+ if (fd->is_agg) {
+ fprintf(stderr, "recv_count_arr:");
+ for (i = 0; i < nprocs; i++)
+ {
+ fprintf(stderr, "[%d]=%d ", i, recv_count_arr[i].count);
+ }
+ fprintf(stderr, "\n");
+ }
+ }
+#endif
+
+ if (fd->hints->cb_alltoall == ADIOI_HINT_DISABLE) {
+ for (i=0; i < fd->hints->cb_nodes; i++)
+ if (send_count_arr[i].count > 0)
+ send_req_arr_sz++;
+ }
+ /* Figure out how many counts to send/recv */
+ for (i = 0; i < nprocs; i++)
+ {
+ if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
+ if (send_count_arr[i].count > 0)
+ send_req_arr_sz++;
+ }
+ /* Only aggregators should recv*/
+ if (fd->is_agg) {
+ if (recv_count_arr[i].count > 0)
+ {
+ if ((client_file_view_state_arr[i].flat_type_p =
+ (ADIOI_Flatlist_node *) ADIOI_Malloc(
+ sizeof(ADIOI_Flatlist_node))) == NULL)
+ {
+ fprintf(stderr, "ADIOI_Exchange_file_views: malloc "
+ "flat_type_p failed\n");
+ }
+ client_file_view_state_arr[i].flat_type_p->count =
+ recv_count_arr[i].count;
+ client_file_view_state_arr[i].flat_type_p->indices =
+ (ADIO_Offset *) ADIOI_Calloc(recv_count_arr[i].count,
+ sizeof(ADIO_Offset));
+ client_file_view_state_arr[i].flat_type_p->blocklens =
+ (ADIO_Offset *) ADIOI_Calloc(recv_count_arr[i].count,
+ sizeof(ADIO_Offset));
+
+ /* Copy the extra data out of the stuff we Alltoall'd */
+ memcpy (&client_file_view_state_arr[i].fp_ind,
+ &recv_count_arr[i].fp_ind,
+ 6*sizeof(ADIO_Offset));
+
+ recv_req_arr_sz++;
+ }
+ }
+ }
+
+ /* Since ADIOI_Calloc may do other things we add the +1
+ * to avoid a 0-size malloc */
+ send_req_arr = (MPI_Request *) ADIOI_Calloc(2*(send_req_arr_sz)+1,
+ sizeof(MPI_Request));
+
+ j = 0;
+ if (recv_req_arr_sz > 0) {
+ assert (fd->is_agg);
+ recv_req_arr = (MPI_Request *) ADIOI_Calloc(2*(recv_req_arr_sz),
+ sizeof(MPI_Request));
+ for (i = 0; i < nprocs; i++) {
+ if (recv_count_arr[i].count > 0) {
+ MPI_Irecv(client_file_view_state_arr[i].flat_type_p->indices,
+ recv_count_arr[i].count, ADIO_OFFSET, i,
+ INDICES, fd->comm, &recv_req_arr[j]);
+ j++;
+ MPI_Irecv(client_file_view_state_arr[i].flat_type_p->blocklens,
+ recv_count_arr[i].count, MPI_INT, i,
+ BLOCK_LENS, fd->comm, &recv_req_arr[j]);
+ j++;
+ }
+ }
+ }
+
+ if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
+ j = 0;
+ for (i = 0; i < nprocs; i++) {
+ if (send_count_arr[i].count > 0) {
+ MPI_Isend(flat_file_p->indices,
+ send_count_arr[i].count, ADIO_OFFSET, i,
+ INDICES, fd->comm, &send_req_arr[j]);
+ j++;
+ MPI_Isend(flat_file_p->blocklens,
+ send_count_arr[i].count, MPI_INT, i,
+ BLOCK_LENS, fd->comm, &send_req_arr[j]);
+ j++;
+ }
+ }
+ } else {
+ j = 0;
+ for (i = 0; i < fd->hints->cb_nodes; i++) {
+ if (send_count_arr[i].count > 0) {
+ MPI_Isend(flat_file_p->indices,
+ send_count_arr[i].count, ADIO_OFFSET,
+ fd->hints->ranklist[i], INDICES, fd->comm,
+ &send_req_arr[j]);
+ j++;
+ MPI_Isend(flat_file_p->blocklens,
+ send_count_arr[i].count, MPI_INT,
+ fd->hints->ranklist[i], BLOCK_LENS, fd->comm,
+ &send_req_arr[j]);
+ j++;
+ }
+ }
+ }
+
+ /* Since ADIOI_Malloc may do other things we add the +1
+ * to avoid a 0-size malloc */
+ statuses = (MPI_Status *)
+ ADIOI_Malloc(1 + 2 * ADIOI_MAX(send_req_arr_sz,recv_req_arr_sz)
+ * sizeof(MPI_Status));
+
+ if (send_req_arr_sz > 0) {
+ MPI_Waitall(2 * send_req_arr_sz, send_req_arr, statuses);
+ ADIOI_Free(send_count_arr);
+ ADIOI_Free(send_req_arr);
+ }
+ if (recv_req_arr_sz > 0) {
+ MPI_Waitall(2 * recv_req_arr_sz, recv_req_arr, statuses);
+ ADIOI_Free(recv_count_arr);
+ ADIOI_Free(recv_req_arr);
+ }
+ ADIOI_Free(statuses);
+
+ if (fd->is_agg == 1)
+ {
+ ADIOI_init_view_state(file_ptr_type,
+ nprocs,
+ client_file_view_state_arr,
+ TEMP_OFF);
+ ADIOI_init_view_state(file_ptr_type,
+ nprocs,
+ client_file_view_state_arr,
+ REAL_OFF);
+ }
+
+#ifdef DEBUG
+ if (fd->is_agg == 1)
+ {
+ ADIOI_Flatlist_node *fr_node_p = ADIOI_Flatlist;
+ for (i = 0; i < nprocs; i++)
+ {
+ fprintf(stderr, "client_file_view_state_arr[%d]=(fp_ind=%Ld,"
+ "disp=%Ld,byte_off=%Ld,sz=%Ld,ext=%Ld\n", i,
+ client_file_view_state_arr[i].fp_ind,
+ client_file_view_state_arr[i].disp,
+ client_file_view_state_arr[i].byte_off,
+ client_file_view_state_arr[i].sz,
+ client_file_view_state_arr[i].ext);
+ }
+
+ while (fr_node_p->type !=
+ fd->file_realm_types[fd->my_cb_nodes_index])
+ fr_node_p = fr_node_p->next;
+ assert(fr_node_p != NULL);
+
+ fprintf(stderr, "my file realm (idx=%d,st_off=%Ld) ",
+ fd->my_cb_nodes_index,
+ fd->file_realm_st_offs[fd->my_cb_nodes_index]);
+ ADIOI_Print_flatlist_node(fr_node_p);
+ }
+#endif
+
+#ifdef DEBUG2
+ if (fd->is_agg == 1)
+ {
+ for (i = 0; i < nprocs; i++)
+ {
+ fprintf(stderr, "client_file_view_state_arr[%d]: ", i);
+ ADIOI_Print_flatlist_node(
+ client_file_view_state_arr[i].flat_type_p);
+ }
+ }
+#endif
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5015, 0, NULL);
+#endif
+}
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_darray.c b/ompi/mca/io/romio/romio/adio/common/ad_darray.c
index b9b3c46d7d..faa2cf694d 100644
--- a/ompi/mca/io/romio/romio/adio/common/ad_darray.c
+++ b/ompi/mca/io/romio/romio/adio/common/ad_darray.c
@@ -81,7 +81,7 @@ int ADIO_Type_create_darray(int size, int rank, int ndims,
tmp_size = 1;
for (i=1; i=0; i--) {
tmp_size *= array_of_gsizes[i+1];
- disps[1] += tmp_size*st_offsets[i];
+ disps[1] += (MPI_Aint)tmp_size*st_offsets[i];
}
}
disps[1] *= orig_extent;
disps[2] = orig_extent;
- for (i=0; idim; i--) stride *= array_of_gsizes[i];
+ for (i=ndims-1; i>dim; i--) stride *= (MPI_Aint)array_of_gsizes[i];
MPI_Type_hvector(mysize, 1, stride, type_old, type_new);
}
}
- *st_offset = blksize * rank;
+ *st_offset = (MPI_Aint)blksize * (MPI_Aint)rank;
/* in terms of no. of elements of type oldtype in this dimension */
if (mysize == 0) *st_offset = 0;
@@ -241,10 +241,10 @@ static int MPIOI_Type_cyclic(int *array_of_gsizes, int dim, int ndims, int nproc
count = local_size/blksize;
rem = local_size % blksize;
- stride = nprocs*blksize*orig_extent;
+ stride = (MPI_Aint)nprocs*(MPI_Aint)blksize*orig_extent;
if (order == MPI_ORDER_FORTRAN)
- for (i=0; idim; i--) stride *= array_of_gsizes[i];
+ for (i=0; idim; i--) stride *= (MPI_Aint)array_of_gsizes[i];
MPI_Type_hvector(count, blksize, stride, type_old, type_new);
@@ -255,7 +255,7 @@ static int MPIOI_Type_cyclic(int *array_of_gsizes, int dim, int ndims, int nproc
types[0] = *type_new;
types[1] = type_old;
disps[0] = 0;
- disps[1] = count*stride;
+ disps[1] = (MPI_Aint)count*stride;
blklens[0] = 1;
blklens[1] = rem;
@@ -272,9 +272,9 @@ static int MPIOI_Type_cyclic(int *array_of_gsizes, int dim, int ndims, int nproc
types[0] = MPI_LB;
disps[0] = 0;
types[1] = *type_new;
- disps[1] = rank * blksize * orig_extent;
+ disps[1] = (MPI_Aint)rank * (MPI_Aint)blksize * orig_extent;
types[2] = MPI_UB;
- disps[2] = orig_extent * array_of_gsizes[dim];
+ disps[2] = orig_extent * (MPI_Aint)array_of_gsizes[dim];
blklens[0] = blklens[1] = blklens[2] = 1;
MPI_Type_struct(3, blklens, disps, types, &type_tmp);
MPI_Type_free(type_new);
@@ -284,7 +284,7 @@ static int MPIOI_Type_cyclic(int *array_of_gsizes, int dim, int ndims, int nproc
the struct above */
}
else {
- *st_offset = rank * blksize;
+ *st_offset = (MPI_Aint)rank * (MPI_Aint)blksize;
/* st_offset is in terms of no. of elements of type oldtype in
* this dimension */
}
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_end.c b/ompi/mca/io/romio/romio/adio/common/ad_end.c
index 3b0778a659..9b05af3e3d 100644
--- a/ompi/mca/io/romio/romio/adio/common/ad_end.c
+++ b/ompi/mca/io/romio/romio/adio/common/ad_end.c
@@ -17,6 +17,15 @@ void ADIO_End(int *error_code)
/* FPRINTF(stderr, "reached end\n"); */
+ /* if a default errhandler was set on MPI_FILE_NULL then we need to ensure
+ * that our reference to that errhandler is released */
+/* Open MPI: The call to PMPI_File_set_errhandler has to be done in romio/src/io_romio_file_open.c
+ in routine mca_io_romio_file_close()
+*/
+#if 0
+ PMPI_File_set_errhandler(MPI_FILE_NULL, MPI_ERRORS_RETURN);
+#endif
+
/* delete the flattened datatype list */
curr = ADIOI_Flatlist;
while (curr) {
@@ -39,7 +48,7 @@ void ADIO_End(int *error_code)
datarep = ADIOI_Datarep_head;
while (datarep) {
datarep_next = datarep->next;
-#ifdef MPICH2
+#ifdef HAVE_MPIU_FUNCS
MPIU_Free(datarep->name);
#else
ADIOI_Free(datarep->name);
@@ -48,13 +57,18 @@ void ADIO_End(int *error_code)
datarep = datarep_next;
}
+ if( ADIOI_syshints != MPI_INFO_NULL)
+ MPI_Info_free(&ADIOI_syshints);
+
+ MPI_Op_free(&ADIO_same_amode);
+
*error_code = MPI_SUCCESS;
}
/* This is the delete callback function associated with
- ADIO_Init_keyval when MPI_COMM_WORLD is freed */
+ ADIO_Init_keyval when MPI_COMM_SELF is freed */
int ADIOI_End_call(MPI_Comm comm, int keyval, void *attribute_val, void
*extra_state)
@@ -62,10 +76,17 @@ int ADIOI_End_call(MPI_Comm comm, int keyval, void *attribute_val, void
int error_code;
ADIOI_UNREFERENCED_ARG(comm);
- ADIOI_UNREFERENCED_ARG(keyval);
ADIOI_UNREFERENCED_ARG(attribute_val);
ADIOI_UNREFERENCED_ARG(extra_state);
+ MPI_Keyval_free(&keyval);
+
+ /* The end call will be called after all possible uses of this keyval, even
+ * if a file was opened with MPI_COMM_SELF. Note, this assumes LIFO
+ * MPI_COMM_SELF attribute destruction behavior mandated by MPI-2.2. */
+ if (ADIOI_cb_config_list_keyval != MPI_KEYVAL_INVALID)
+ MPI_Keyval_free(&ADIOI_cb_config_list_keyval);
+
ADIO_End(&error_code);
return error_code;
}
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_features.c b/ompi/mca/io/romio/romio/adio/common/ad_features.c
new file mode 100644
index 0000000000..a62be219a2
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/common/ad_features.c
@@ -0,0 +1,17 @@
+#include "adio.h"
+
+int ADIOI_GEN_Feature(ADIO_File fd, int flag)
+{
+ switch(flag) {
+ case ADIO_LOCKS:
+ case ADIO_SHARED_FP:
+ case ADIO_ATOMIC_MODE:
+ case ADIO_DATA_SIEVING_WRITES:
+ return 1;
+ break;
+ case ADIO_SCALABLE_OPEN:
+ default:
+ return 0;
+ break;
+ }
+}
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_fstype.c b/ompi/mca/io/romio/romio/adio/common/ad_fstype.c
index 286efc839b..b350effb85 100644
--- a/ompi/mca/io/romio/romio/adio/common/ad_fstype.c
+++ b/ompi/mca/io/romio/romio/adio/common/ad_fstype.c
@@ -26,6 +26,10 @@
#include "pvfs2.h"
#endif
+#ifdef HAVE_ZOIDFS_H
+#include "zoidfs.h"
+#endif
+
/* Notes on detection process:
*
* There are three more "general" mechanisms that we use for detecting
@@ -298,7 +302,7 @@ static void ADIO_FileSysType_fncall(char *filename, int *fstype, int *error_code
/* --END ERROR HANDLING-- */
#endif /* STATVFS APPROACH */
-#if defined(HAVE_STRUCT_STATFS) && defined(HAVE_STATFS)
+#ifdef HAVE_STRUCT_STATFS
do {
err = statfs(filename, &fsbuf);
} while (err && (errno == ESTALE));
@@ -342,14 +346,15 @@ static void ADIO_FileSysType_fncall(char *filename, int *fstype, int *error_code
}
# endif
-/*#if defined(LINUX) && defined(ROMIO_LUSTRE)*/
#ifdef ROMIO_LUSTRE
-#define LL_SUPER_MAGIC 0x0BD00BD0
+# ifndef LL_SUPER_MAGIC
+# define LL_SUPER_MAGIC 0x0BD00BD0
+# endif
if (fsbuf.f_type == LL_SUPER_MAGIC) {
*fstype = ADIO_LUSTRE;
return;
}
-# endif
+#endif
# ifdef PAN_KERNEL_FS_CLIENT_SUPER_MAGIC
if (fsbuf.f_type == PAN_KERNEL_FS_CLIENT_SUPER_MAGIC) {
@@ -386,6 +391,13 @@ static void ADIO_FileSysType_fncall(char *filename, int *fstype, int *error_code
}
# endif
+# ifdef XFS_SUPER_MAGIC
+ if (fsbuf.f_type == XFS_SUPER_MAGIC) {
+ *fstype = ADIO_XFS;
+ return;
+ }
+# endif
+
# ifdef ROMIO_UFS
/* if UFS support is enabled, default to that */
*fstype = ADIO_UFS;
@@ -460,6 +472,8 @@ static void ADIO_FileSysType_fncall_scalable(MPI_Comm comm, char *filename, int
*error_code = buf[1];
}
+
+
/*
ADIO_FileSysType_prefix - determines file system type for a file using
a prefix on the file name. upper layer should have already determined
@@ -511,6 +525,10 @@ static void ADIO_FileSysType_prefix(char *filename, int *fstype, int *error_code
else if (!strncmp(filename, "pvfs2:", 6)||!strncmp(filename, "PVFS2:", 6)) {
*fstype = ADIO_PVFS2;
}
+ else if (!strncmp(filename, "zoidfs:", 7)||
+ !strncmp(filename, "ZOIDFS:", 7)) {
+ *fstype = ADIO_ZOIDFS;
+ }
else if (!strncmp(filename, "testfs:", 7)
|| !strncmp(filename, "TESTFS:", 7))
{
@@ -801,6 +819,16 @@ void ADIO_ResolveFileType(MPI_Comm comm, char *filename, int *fstype,
return;
#else
*ops = &ADIO_LUSTRE_operations;
+#endif
+ }
+ if (file_system == ADIO_ZOIDFS) {
+#ifndef ROMIO_ZOIDFS
+ *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
+ myname, __LINE__, MPI_ERR_IO,
+ "**iofstypeunsupported", 0);
+ return;
+#else
+ *ops = &ADIO_ZOIDFS_operations;
#endif
}
*error_code = MPI_SUCCESS;
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_hints.c b/ompi/mca/io/romio/romio/adio/common/ad_hints.c
index c221b6b4d3..cd755d4a7f 100644
--- a/ompi/mca/io/romio/romio/adio/common/ad_hints.c
+++ b/ompi/mca/io/romio/romio/adio/common/ad_hints.c
@@ -19,8 +19,18 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
MPI_Info info;
char *value;
int flag, intval, tmp_val, nprocs=0, nprocs_is_valid = 0, len;
+ int ok_to_override_cb_nodes=0;
static char myname[] = "ADIOI_GEN_SETINFO";
+
+ /* if we've already set up default hints and the user has not asked us to
+ * process any hints (MPI_INFO_NULL), then we can short-circuit hint
+ * processing */
+ if (fd->hints->initialized && fd->info == MPI_INFO_NULL) {
+ *error_code = MPI_SUCCESS;
+ return;
+ }
+
if (fd->info == MPI_INFO_NULL) MPI_Info_create(&(fd->info));
info = fd->info;
@@ -37,17 +47,18 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
* previously initialized
*/
if (!fd->hints->initialized) {
+
/* buffer size for collective I/O */
- MPI_Info_set(info, "cb_buffer_size", ADIOI_CB_BUFFER_SIZE_DFLT);
+ ADIOI_Info_set(info, "cb_buffer_size", ADIOI_CB_BUFFER_SIZE_DFLT);
fd->hints->cb_buffer_size = atoi(ADIOI_CB_BUFFER_SIZE_DFLT);
/* default is to let romio automatically decide when to use
* collective buffering
*/
- MPI_Info_set(info, "romio_cb_read", "automatic");
+ ADIOI_Info_set(info, "romio_cb_read", "automatic");
fd->hints->cb_read = ADIOI_HINT_AUTO;
- MPI_Info_set(info, "romio_cb_write", "automatic");
- fd->hints->cb_write = ADIOI_HINT_AUTO;
+ ADIOI_Info_set(info, "romio_cb_write", "automatic");
+ fd->hints->cb_write = ADIOI_HINT_AUTO;
fd->hints->cb_config_list = NULL;
@@ -55,37 +66,71 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
MPI_Comm_size(fd->comm, &nprocs);
nprocs_is_valid = 1;
ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", nprocs);
- MPI_Info_set(info, "cb_nodes", value);
+ ADIOI_Info_set(info, "cb_nodes", value);
fd->hints->cb_nodes = nprocs;
/* hint indicating that no indep. I/O will be performed on this file */
- MPI_Info_set(info, "romio_no_indep_rw", "false");
+ ADIOI_Info_set(info, "romio_no_indep_rw", "false");
fd->hints->no_indep_rw = 0;
- /* deferred_open derrived from no_indep_rw and cb_{read,write} */
+
+ /* hint instructing the use of persistent file realms */
+ ADIOI_Info_set(info, "romio_cb_pfr", "disable");
+ fd->hints->cb_pfr = ADIOI_HINT_DISABLE;
+
+ /* hint guiding the assignment of persistent file realms */
+ ADIOI_Info_set(info, "romio_cb_fr_types", "aar");
+ fd->hints->cb_fr_type = ADIOI_FR_AAR;
+
+ /* hint to align file realms with a certain byte value */
+ ADIOI_Info_set(info, "romio_cb_fr_alignment", "1");
+ fd->hints->cb_fr_alignment = 1;
+
+ /* hint to set a threshold percentage for a datatype's size/extent at
+ * which data sieving should be done in collective I/O */
+ ADIOI_Info_set(info, "romio_cb_ds_threshold", "0");
+ fd->hints->cb_ds_threshold = 0;
+
+ /* hint to switch between point-to-point or all-to-all for two-phase */
+ ADIOI_Info_set(info, "romio_cb_alltoall", "automatic");
+ fd->hints->cb_alltoall = ADIOI_HINT_AUTO;
+
+ /* deferred_open derived from no_indep_rw and cb_{read,write} */
fd->hints->deferred_open = 0;
/* buffer size for data sieving in independent reads */
- MPI_Info_set(info, "ind_rd_buffer_size", ADIOI_IND_RD_BUFFER_SIZE_DFLT);
+ ADIOI_Info_set(info, "ind_rd_buffer_size", ADIOI_IND_RD_BUFFER_SIZE_DFLT);
fd->hints->ind_rd_buffer_size = atoi(ADIOI_IND_RD_BUFFER_SIZE_DFLT);
/* buffer size for data sieving in independent writes */
- MPI_Info_set(info, "ind_wr_buffer_size", ADIOI_IND_WR_BUFFER_SIZE_DFLT);
+ ADIOI_Info_set(info, "ind_wr_buffer_size", ADIOI_IND_WR_BUFFER_SIZE_DFLT);
fd->hints->ind_wr_buffer_size = atoi(ADIOI_IND_WR_BUFFER_SIZE_DFLT);
/* default is to let romio automatically decide when to use data
* sieving
*/
- MPI_Info_set(info, "romio_ds_read", "automatic");
+ ADIOI_Info_set(info, "romio_ds_read", "automatic");
fd->hints->ds_read = ADIOI_HINT_AUTO;
- MPI_Info_set(info, "romio_ds_write", "automatic");
+ ADIOI_Info_set(info, "romio_ds_write", "automatic");
fd->hints->ds_write = ADIOI_HINT_AUTO;
+ /* still to do: tune this a bit for a variety of file systems. there's
+ * no good default value so just leave it unset */
+ fd->hints->min_fdomain_size = 0;
+ fd->hints->striping_unit = 0;
+
fd->hints->initialized = 1;
+
+ /* ADIO_Open sets up collective buffering arrays. If we are in this
+ * path from say set_file_view, then we've don't want to adjust the
+ * array: we'll get a segfault during collective i/o. We only want to
+ * look at the users cb_nodes if it's open time */
+ ok_to_override_cb_nodes = 1;
+
}
/* add in user's info if supplied */
if (users_info != MPI_INFO_NULL) {
- MPI_Info_get(users_info, "cb_buffer_size", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "cb_buffer_size", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && ((intval=atoi(value)) > 0)) {
tmp_val = intval;
@@ -100,30 +145,104 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
}
/* --END ERROR HANDLING-- */
- MPI_Info_set(info, "cb_buffer_size", value);
+ ADIOI_Info_set(info, "cb_buffer_size", value);
fd->hints->cb_buffer_size = intval;
}
+ /* aligning file realms to certain sizes (e.g. stripe sizes)
+ * may benefit I/O performance */
+ ADIOI_Info_get(users_info, "romio_cb_fr_alignment", MPI_MAX_INFO_VAL,
+ value, &flag);
+ if (flag && ((intval=atoi(value)) > 0)) {
+ tmp_val = intval;
+
+ MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
+ /* --BEGIN ERROR HANDLING-- */
+ if (tmp_val != intval) {
+ MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+ "romio_cb_fr_alignment",
+ error_code);
+ return;
+ }
+ /* --END ERROR HANDLING-- */
+
+ ADIOI_Info_set(info, "romio_cb_fr_alignment", value);
+ fd->hints->cb_fr_alignment = intval;
+
+ }
+
+ /* for collective I/O, try to be smarter about when to do data sieving
+ * using a specific threshold for the datatype size/extent
+ * (percentage 0-100%) */
+ ADIOI_Info_get(users_info, "romio_cb_ds_threshold", MPI_MAX_INFO_VAL,
+ value, &flag);
+ if (flag && ((intval=atoi(value)) > 0)) {
+ tmp_val = intval;
+
+ MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
+ /* --BEGIN ERROR HANDLING-- */
+ if (tmp_val != intval) {
+ MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+ "romio_cb_ds_threshold",
+ error_code);
+ return;
+ }
+ /* --END ERROR HANDLING-- */
+
+ ADIOI_Info_set(info, "romio_cb_ds_threshold", value);
+ fd->hints->cb_ds_threshold = intval;
+
+ }
+ ADIOI_Info_get(users_info, "romio_cb_alltoall", MPI_MAX_INFO_VAL, value,
+ &flag);
+ if (flag) {
+ if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
+ ADIOI_Info_set(info, "romio_cb_alltoall", value);
+ fd->hints->cb_read = ADIOI_HINT_ENABLE;
+ }
+ else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
+ ADIOI_Info_set(info, "romio_cb_alltoall", value);
+ fd->hints->cb_read = ADIOI_HINT_DISABLE;
+ }
+ else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
+ {
+ ADIOI_Info_set(info, "romio_cb_alltoall", value);
+ fd->hints->cb_read = ADIOI_HINT_AUTO;
+ }
+
+ tmp_val = fd->hints->cb_alltoall;
+
+ MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
+ /* --BEGIN ERROR HANDLING-- */
+ if (tmp_val != fd->hints->cb_alltoall) {
+ MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+ "romio_cb_alltoall",
+ error_code);
+ return;
+ }
+ /* --END ERROR HANDLING-- */
+ }
/* new hints for enabling/disabling coll. buffering on
* reads/writes
*/
- MPI_Info_get(users_info, "romio_cb_read", MPI_MAX_INFO_VAL, value, &flag);
+ ADIOI_Info_get(users_info, "romio_cb_read", MPI_MAX_INFO_VAL, value,
+ &flag);
if (flag) {
if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
- MPI_Info_set(info, "romio_cb_read", value);
+ ADIOI_Info_set(info, "romio_cb_read", value);
fd->hints->cb_read = ADIOI_HINT_ENABLE;
}
else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
/* romio_cb_read overrides no_indep_rw */
- MPI_Info_set(info, "romio_cb_read", value);
- MPI_Info_set(info, "romio_no_indep_rw", "false");
+ ADIOI_Info_set(info, "romio_cb_read", value);
+ ADIOI_Info_set(info, "romio_no_indep_rw", "false");
fd->hints->cb_read = ADIOI_HINT_DISABLE;
fd->hints->no_indep_rw = ADIOI_HINT_DISABLE;
}
else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
{
- MPI_Info_set(info, "romio_cb_read", value);
+ ADIOI_Info_set(info, "romio_cb_read", value);
fd->hints->cb_read = ADIOI_HINT_AUTO;
}
@@ -139,25 +258,25 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
}
/* --END ERROR HANDLING-- */
}
- MPI_Info_get(users_info, "romio_cb_write", MPI_MAX_INFO_VAL, value,
+ ADIOI_Info_get(users_info, "romio_cb_write", MPI_MAX_INFO_VAL, value,
&flag);
if (flag) {
if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
- MPI_Info_set(info, "romio_cb_write", value);
+ ADIOI_Info_set(info, "romio_cb_write", value);
fd->hints->cb_write = ADIOI_HINT_ENABLE;
}
else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE"))
{
/* romio_cb_write overrides no_indep_rw, too */
- MPI_Info_set(info, "romio_cb_write", value);
- MPI_Info_set(info, "romio_no_indep_rw", "false");
+ ADIOI_Info_set(info, "romio_cb_write", value);
+ ADIOI_Info_set(info, "romio_no_indep_rw", "false");
fd->hints->cb_write = ADIOI_HINT_DISABLE;
fd->hints->no_indep_rw = ADIOI_HINT_DISABLE;
}
else if (!strcmp(value, "automatic") ||
!strcmp(value, "AUTOMATIC"))
{
- MPI_Info_set(info, "romio_cb_write", value);
+ ADIOI_Info_set(info, "romio_cb_write", value);
fd->hints->cb_write = ADIOI_HINT_AUTO;
}
@@ -174,24 +293,79 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
/* --END ERROR HANDLING-- */
}
+ /* enable/disable persistent file realms for collective I/O */
+ /* may want to check for no_indep_rdwr hint as well */
+ ADIOI_Info_get(users_info, "romio_cb_pfr", MPI_MAX_INFO_VAL, value,
+ &flag);
+ if (flag) {
+ if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
+ ADIOI_Info_set(info, "romio_cb_pfr", value);
+ fd->hints->cb_pfr = ADIOI_HINT_ENABLE;
+ }
+ else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
+ ADIOI_Info_set(info, "romio_cb_pfr", value);
+ fd->hints->cb_pfr = ADIOI_HINT_DISABLE;
+ }
+ else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
+ {
+ ADIOI_Info_set(info, "romio_cb_pfr", value);
+ fd->hints->cb_pfr = ADIOI_HINT_AUTO;
+ }
+
+ tmp_val = fd->hints->cb_pfr;
+
+ MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
+ /* --BEGIN ERROR HANDLING-- */
+ if (tmp_val != fd->hints->cb_pfr) {
+ MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+ "romio_cb_pfr",
+ error_code);
+ return;
+ }
+ /* --END ERROR HANDLING-- */
+ }
+
+ /* file realm assignment types ADIOI_FR_AAR(0),
+ ADIOI_FR_FSZ(-1), ADIOI_FR_USR_REALMS(-2), all others specify
+ a regular fr size in bytes. probably not the best way... */
+ ADIOI_Info_get(users_info, "romio_cb_fr_type", MPI_MAX_INFO_VAL,
+ value, &flag);
+ if (flag && ((intval=atoi(value)) >= -2)) {
+ tmp_val = intval;
+
+ MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
+ /* --BEGIN ERROR HANDLING-- */
+ if (tmp_val != intval) {
+ MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
+ "romio_cb_fr_type",
+ error_code);
+ return;
+ }
+ /* --END ERROR HANDLING-- */
+
+ ADIOI_Info_set(info, "romio_cb_fr_type", value);
+ fd->hints->cb_fr_type = intval;
+
+ }
+
/* new hint for specifying no indep. read/write will be performed */
- MPI_Info_get(users_info, "romio_no_indep_rw", MPI_MAX_INFO_VAL, value,
+ ADIOI_Info_get(users_info, "romio_no_indep_rw", MPI_MAX_INFO_VAL, value,
&flag);
if (flag) {
if (!strcmp(value, "true") || !strcmp(value, "TRUE")) {
/* if 'no_indep_rw' set, also hint that we will do
* collective buffering: if we aren't doing independent io,
* then we have to do collective */
- MPI_Info_set(info, "romio_no_indep_rw", value);
- MPI_Info_set(info, "romio_cb_write", "enable");
- MPI_Info_set(info, "romio_cb_read", "enable");
+ ADIOI_Info_set(info, "romio_no_indep_rw", value);
+ ADIOI_Info_set(info, "romio_cb_write", "enable");
+ ADIOI_Info_set(info, "romio_cb_read", "enable");
fd->hints->no_indep_rw = 1;
fd->hints->cb_read = 1;
fd->hints->cb_write = 1;
tmp_val = 1;
}
else if (!strcmp(value, "false") || !strcmp(value, "FALSE")) {
- MPI_Info_set(info, "romio_no_indep_rw", value);
+ ADIOI_Info_set(info, "romio_no_indep_rw", value);
fd->hints->no_indep_rw = 0;
tmp_val = 0;
}
@@ -213,86 +387,91 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
/* new hints for enabling/disabling data sieving on
* reads/writes
*/
- MPI_Info_get(users_info, "romio_ds_read", MPI_MAX_INFO_VAL, value,
+ ADIOI_Info_get(users_info, "romio_ds_read", MPI_MAX_INFO_VAL, value,
&flag);
if (flag) {
if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
- MPI_Info_set(info, "romio_ds_read", value);
+ ADIOI_Info_set(info, "romio_ds_read", value);
fd->hints->ds_read = ADIOI_HINT_ENABLE;
}
else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
- MPI_Info_set(info, "romio_ds_read", value);
+ ADIOI_Info_set(info, "romio_ds_read", value);
fd->hints->ds_read = ADIOI_HINT_DISABLE;
}
else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
{
- MPI_Info_set(info, "romio_ds_read", value);
+ ADIOI_Info_set(info, "romio_ds_read", value);
fd->hints->ds_read = ADIOI_HINT_AUTO;
}
/* otherwise ignore */
}
- MPI_Info_get(users_info, "romio_ds_write", MPI_MAX_INFO_VAL, value,
+ ADIOI_Info_get(users_info, "romio_ds_write", MPI_MAX_INFO_VAL, value,
&flag);
if (flag) {
if (!strcmp(value, "enable") || !strcmp(value, "ENABLE")) {
- MPI_Info_set(info, "romio_ds_write", value);
+ ADIOI_Info_set(info, "romio_ds_write", value);
fd->hints->ds_write = ADIOI_HINT_ENABLE;
}
else if (!strcmp(value, "disable") || !strcmp(value, "DISABLE")) {
- MPI_Info_set(info, "romio_ds_write", value);
+ ADIOI_Info_set(info, "romio_ds_write", value);
fd->hints->ds_write = ADIOI_HINT_DISABLE;
}
else if (!strcmp(value, "automatic") || !strcmp(value, "AUTOMATIC"))
{
- MPI_Info_set(info, "romio_ds_write", value);
+ ADIOI_Info_set(info, "romio_ds_write", value);
fd->hints->ds_write = ADIOI_HINT_AUTO;
}
/* otherwise ignore */
}
- MPI_Info_get(users_info, "cb_nodes", MPI_MAX_INFO_VAL,
- value, &flag);
- if (flag && ((intval=atoi(value)) > 0)) {
- tmp_val = intval;
+ if (ok_to_override_cb_nodes) {
+ /* MPI_File_open path sets up some data structrues that don't
+ * get resized in the MPI_File_set_view path, so ignore
+ * cb_nodes in the set_view case */
+ ADIOI_Info_get(users_info, "cb_nodes", MPI_MAX_INFO_VAL,
+ value, &flag);
+ if (flag && ((intval=atoi(value)) > 0)) {
+ tmp_val = intval;
- MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
- /* --BEGIN ERROR HANDLING-- */
- if (tmp_val != intval) {
+ MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
+ /* --BEGIN ERROR HANDLING-- */
+ if (tmp_val != intval) {
MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
"cb_nodes",
error_code);
return;
- }
- /* --END ERROR HANDLING-- */
+ }
+ /* --END ERROR HANDLING-- */
- if (!nprocs_is_valid) {
- /* if hints were already initialized, we might not
- * have already gotten this?
- */
- MPI_Comm_size(fd->comm, &nprocs);
- nprocs_is_valid = 1;
- }
- if (intval <= nprocs) {
- MPI_Info_set(info, "cb_nodes", value);
- fd->hints->cb_nodes = intval;
- }
- }
+ if (!nprocs_is_valid) {
+ /* if hints were already initialized, we might not
+ * have already gotten this?
+ */
+ MPI_Comm_size(fd->comm, &nprocs);
+ nprocs_is_valid = 1;
+ }
+ if (intval <= nprocs) {
+ ADIOI_Info_set(info, "cb_nodes", value);
+ fd->hints->cb_nodes = intval;
+ }
+ }
+ } /* if (ok_to_override_cb_nodes) */
- MPI_Info_get(users_info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && ((intval = atoi(value)) > 0)) {
- MPI_Info_set(info, "ind_wr_buffer_size", value);
+ ADIOI_Info_set(info, "ind_wr_buffer_size", value);
fd->hints->ind_wr_buffer_size = intval;
}
- MPI_Info_get(users_info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL,
value, &flag);
if (flag && ((intval = atoi(value)) > 0)) {
- MPI_Info_set(info, "ind_rd_buffer_size", value);
+ ADIOI_Info_set(info, "ind_rd_buffer_size", value);
fd->hints->ind_rd_buffer_size = intval;
}
- MPI_Info_get(users_info, "cb_config_list", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(users_info, "cb_config_list", MPI_MAX_INFO_VAL,
value, &flag);
if (flag) {
if (fd->hints->cb_config_list == NULL) {
@@ -301,7 +480,7 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
* the cb_config_list hint will be set at file open time
* either by the user or to the default
*/
- MPI_Info_set(info, "cb_config_list", value);
+ ADIOI_Info_set(info, "cb_config_list", value);
len = (strlen(value)+1) * sizeof(char);
fd->hints->cb_config_list = ADIOI_Malloc(len);
if (fd->hints->cb_config_list == NULL) {
@@ -314,13 +493,27 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
* info value with a cb_config_list value in it in a couple
* of calls, which would be irritating. */
}
+ ADIOI_Info_get(users_info, "romio_min_fdomain_size", MPI_MAX_INFO_VAL,
+ value, &flag);
+ if ( flag && ((intval = atoi(value)) > 0) ) {
+ ADIOI_Info_set(info, "romio_min_fdomain_size", value);
+ fd->hints->min_fdomain_size = intval;
+ }
+ /* Now we use striping unit in common code so we should
+ process hints for it. */
+ ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
+ value, &flag);
+ if ( flag && ((intval = atoi(value)) > 0) ) {
+ ADIOI_Info_set(info, "striping_unit", value);
+ fd->hints->striping_unit = intval;
+ }
}
/* handle cb_config_list default value here; avoids an extra
* free/alloc and insures it is always set
*/
if (fd->hints->cb_config_list == NULL) {
- MPI_Info_set(info, "cb_config_list", ADIOI_CB_CONFIG_LIST_DFLT);
+ ADIOI_Info_set(info, "cb_config_list", ADIOI_CB_CONFIG_LIST_DFLT);
len = (strlen(ADIOI_CB_CONFIG_LIST_DFLT)+1) * sizeof(char);
fd->hints->cb_config_list = ADIOI_Malloc(len);
if (fd->hints->cb_config_list == NULL) {
@@ -341,25 +534,24 @@ void ADIOI_GEN_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
* disable at the same time doesn't make sense. honor
* romio_cb_{read,write} and force the no_indep_rw hint to
* 'disable' */
- MPI_Info_set(info, "romio_no_indep_rw", "false");
+ ADIOI_Info_set(info, "romio_no_indep_rw", "false");
fd->hints->no_indep_rw = 0;
fd->hints->deferred_open = 0;
}
- if ((fd->file_system == ADIO_PIOFS) || (fd->file_system == ADIO_PVFS) ||
- (fd->file_system == ADIO_PVFS2) ) {
- /* no data sieving for writes in PIOFS, PVFS and PVFS2, because they do not
+ if (ADIO_Feature(fd, ADIO_DATA_SIEVING_WRITES) == 0) {
+ /* disable data sieving for fs that do not
support file locking */
- MPI_Info_get(info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL,
+ ADIOI_Info_get(info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL,
value, &flag);
if (flag) {
/* get rid of this value if it is set */
- MPI_Info_delete(info, "ind_wr_buffer_size");
+ ADIOI_Info_delete(info, "ind_wr_buffer_size");
}
/* note: leave ind_wr_buffer_size alone; used for other cases
* as well. -- Rob Ross, 04/22/2003
*/
- MPI_Info_set(info, "romio_ds_write", "disable");
+ ADIOI_Info_set(info, "romio_ds_write", "disable");
fd->hints->ds_write = ADIOI_HINT_DISABLE;
}
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_init.c b/ompi/mca/io/romio/romio/adio/common/ad_init.c
index 1ca0cbb2f9..7ec0dac615 100644
--- a/ompi/mca/io/romio/romio/adio/common/ad_init.c
+++ b/ompi/mca/io/romio/romio/adio/common/ad_init.c
@@ -6,7 +6,6 @@
*/
#include "adio.h"
-#include "adio_extern.h"
ADIOI_Flatlist_node *ADIOI_Flatlist = NULL;
ADIOI_Datarep *ADIOI_Datarep_head = NULL;
@@ -22,6 +21,10 @@ MPI_Info *MPIR_Infotable = NULL;
int MPIR_Infotable_ptr = 0, MPIR_Infotable_max = 0;
#endif
+MPI_Info ADIOI_syshints = MPI_INFO_NULL;
+
+MPI_Op ADIO_same_amode=MPI_OP_NULL;
+
#if defined(ROMIO_XFS) || defined(ROMIO_LUSTRE)
int ADIOI_Direct_read = 0, ADIOI_Direct_write = 0;
#endif
@@ -30,6 +33,20 @@ int ADIO_Init_keyval=MPI_KEYVAL_INVALID;
MPI_Errhandler ADIOI_DFLT_ERR_HANDLER = MPI_ERRORS_RETURN;
+
+static void my_consensus(void *invec, void *inoutvec, int *len, MPI_Datatype *datatype)
+{
+ int i, *in, *inout;
+ in = (int*)invec;
+ inout = (int*)inoutvec;
+
+ for (i=0; i< *len; i++) {
+ if (in[i] != inout[i])
+ inout[i] = ADIO_AMODE_NOMATCH;
+ }
+ return;
+}
+
void ADIO_Init(int *argc, char ***argv, int *error_code)
{
#if defined(ROMIO_XFS) || defined(ROMIO_LUSTRE)
@@ -57,6 +74,12 @@ void ADIO_Init(int *argc, char ***argv, int *error_code)
else ADIOI_Direct_write = 0;
#endif
+ /* Assume system-wide hints won't change between runs: move hint processing
+ * from ADIO_Open to here */
+ /* FIXME should be checking error code from MPI_Info_create here */
+ MPI_Info_create(&ADIOI_syshints);
+ ADIOI_process_system_hints(ADIOI_syshints);
+
#ifdef ADIOI_MPE_LOGGING
{
MPE_Log_get_state_eventIDs( &ADIOI_MPE_open_a, &ADIOI_MPE_open_b );
@@ -71,9 +94,12 @@ void ADIO_Init(int *argc, char ***argv, int *error_code)
MPE_Log_get_state_eventIDs( &ADIOI_MPE_unlock_a, &ADIOI_MPE_unlock_b );
MPE_Log_get_state_eventIDs( &ADIOI_MPE_postwrite_a,
&ADIOI_MPE_postwrite_b );
+ MPE_Log_get_state_eventIDs( &ADIOI_MPE_openinternal_a,
+ &ADIOI_MPE_openinternal_b);
+ MPE_Log_get_state_eventIDs( &ADIOI_MPE_stat_a, &ADIOI_MPE_stat_b);
int comm_world_rank;
- PMPI_Comm_rank( MPI_COMM_WORLD, &comm_world_rank );
+ MPI_Comm_rank( MPI_COMM_WORLD, &comm_world_rank );
if ( comm_world_rank == 0 ) {
MPE_Describe_state( ADIOI_MPE_open_a, ADIOI_MPE_open_b,
@@ -94,9 +120,12 @@ void ADIO_Init(int *argc, char ***argv, int *error_code)
"unlock", "purple" );
MPE_Describe_state( ADIOI_MPE_postwrite_a, ADIOI_MPE_postwrite_b,
"postwrite", "ivory" );
+ MPE_Describe_state( ADIOI_MPE_openinternal_a, ADIOI_MPE_openinternal_b, "open system", "blue");
+ MPE_Describe_state( ADIOI_MPE_stat_a, ADIOI_MPE_stat_b, "stat", "purple");
}
}
#endif
*error_code = MPI_SUCCESS;
+ MPI_Op_create(my_consensus, 1, &ADIO_same_amode);
}
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_io_coll.c b/ompi/mca/io/romio/romio/adio/common/ad_io_coll.c
new file mode 100644
index 0000000000..6c2c275645
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/common/ad_io_coll.c
@@ -0,0 +1,1131 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ * Copyright (C) 2008 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ */
+
+#include "assert.h"
+#include "adio.h"
+#include "adio_extern.h"
+#ifdef AGGREGATION_PROFILE
+#include "mpe.h"
+#endif
+
+/* #define ALLTOALL */
+
+/* #define DEBUG */
+/* #define DEBUG2 */ /* print buffers */
+
+#define USE_PRE_REQ
+
+static void Exch_data_amounts (ADIO_File fd, int nprocs,
+ ADIO_Offset *client_comm_sz_arr,
+ ADIO_Offset *agg_comm_sz_arr,
+ int *client_alltoallw_counts,
+ int *agg_alltoallw_counts,
+ int *aggregators_done);
+static void post_aggregator_comm (MPI_Comm comm, int rw_type, int nproc,
+ void *cb_buf,
+ MPI_Datatype *client_comm_dtype_arr,
+ ADIO_Offset *client_comm_sz_arr,
+ MPI_Request **requests,
+ int *aggregators_client_count_p);
+
+static void post_client_comm (ADIO_File fd, int rw_type,
+ int agg_rank, void *buf,
+ MPI_Datatype agg_comm_dtype,
+ int agg_alltoallw_count,
+ MPI_Request *request);
+
+/* Avery Ching and Kenin Columa's reworked two-phase algorithm. Key features
+ * - persistent file domains
+ * - an option to use alltoall instead of point-to-point
+ */
+void ADIOI_IOStridedColl (ADIO_File fd, void *buf, int count, int rdwr,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status,
+ int *error_code)
+{
+ ADIO_Offset min_st_offset=0, max_end_offset=0;
+ ADIO_Offset st_end_offset[2];
+ ADIO_Offset *all_st_end_offsets = NULL;
+ int filetype_is_contig, buftype_is_contig, is_contig;
+ ADIO_Offset orig_fp, off;
+ int interleave_count = 0, i, nprocs, myrank, nprocs_for_coll;
+ int cb_enable;
+ ADIO_Offset bufsize;
+ MPI_Aint extent, bufextent;
+ int size;
+ int agg_rank;
+
+ ADIO_Offset agg_disp; /* aggregated file offset */
+ MPI_Datatype agg_dtype; /* aggregated file datatype */
+
+ int aggregators_done = 0;
+ ADIO_Offset buffered_io_size = 0;
+
+ int *alltoallw_disps;
+
+ int *alltoallw_counts;
+ int *client_alltoallw_counts;
+ int *agg_alltoallw_counts;
+
+ char *cb_buf = NULL;
+
+ MPI_Datatype *client_comm_dtype_arr; /* aggregator perspective */
+ MPI_Datatype *agg_comm_dtype_arr; /* client perspective */
+ ADIO_Offset *client_comm_sz_arr; /* aggregator perspective */
+ ADIO_Offset *agg_comm_sz_arr; /* client perspective */
+
+ /* file views for each client and aggregator */
+ view_state *client_file_view_state_arr = NULL;
+ view_state *agg_file_view_state_arr = NULL;
+ /* mem views for local process */
+ view_state *my_mem_view_state_arr = NULL;
+
+ MPI_Status *agg_comm_statuses = NULL;
+ MPI_Request *agg_comm_requests = NULL;
+ MPI_Status *client_comm_statuses = NULL;
+ MPI_Request *client_comm_requests = NULL;
+ int aggs_client_count = 0;
+ int clients_agg_count = 0;
+
+ MPI_Comm_size (fd->comm, &nprocs);
+ MPI_Comm_rank (fd->comm, &myrank);
+#ifdef DEBUG
+ fprintf (stderr, "p%d: entering ADIOI_IOStridedColl\n", myrank);
+#endif
+#ifdef AGGREGATION_PROFILE
+ if (rdwr == ADIOI_READ)
+ MPE_Log_event (5010, 0, NULL);
+ else
+ MPE_Log_event (5012, 0, NULL);
+#endif
+
+ /* I need to check if there are any outstanding nonblocking writes
+ to the file, which could potentially interfere with the writes
+ taking place in this collective write call. Since this is not
+ likely to be common, let me do the simplest thing possible here:
+ Each process completes all pending nonblocking operations before
+ completing. */
+
+ nprocs_for_coll = fd->hints->cb_nodes;
+ orig_fp = fd->fp_ind;
+
+ if (rdwr == ADIOI_READ)
+ cb_enable = fd->hints->cb_read;
+ else
+ cb_enable = fd->hints->cb_write;
+
+ /* only check for interleaving if cb_read isn't disabled */
+ if (cb_enable != ADIOI_HINT_DISABLE) {
+ /* find the starting and ending byte of my I/O access */
+ ADIOI_Calc_bounds (fd, count, datatype, file_ptr_type, offset,
+ &st_end_offset[0], &st_end_offset[1]);
+
+ /* allocate an array of start/end pairs */
+ all_st_end_offsets = (ADIO_Offset *)
+ ADIOI_Malloc (2*nprocs*sizeof(ADIO_Offset));
+ MPI_Allgather (st_end_offset, 2, ADIO_OFFSET, all_st_end_offsets, 2,
+ ADIO_OFFSET, fd->comm);
+
+ min_st_offset = all_st_end_offsets[0];
+ max_end_offset = all_st_end_offsets[1];
+
+ for (i=1; ifiletype, &filetype_is_contig);
+
+ if ((cb_enable == ADIOI_HINT_DISABLE
+ || (!interleave_count && (cb_enable == ADIOI_HINT_AUTO)))
+ && (fd->hints->cb_pfr != ADIOI_HINT_ENABLE)){
+ if (cb_enable != ADIOI_HINT_DISABLE) {
+ ADIOI_Free (all_st_end_offsets);
+ }
+
+ if (buftype_is_contig && filetype_is_contig) {
+ if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
+ off = fd->disp + (fd->etype_size) * offset;
+ if (rdwr == ADIOI_READ)
+ ADIO_ReadContig(fd, buf, count, datatype,
+ ADIO_EXPLICIT_OFFSET, off, status,
+ error_code);
+ else
+ ADIO_WriteContig(fd, buf, count, datatype,
+ ADIO_EXPLICIT_OFFSET, off, status,
+ error_code);
+ }
+ else {
+ if (rdwr == ADIOI_READ)
+ ADIO_ReadContig(fd, buf, count, datatype, ADIO_INDIVIDUAL,
+ 0, status, error_code);
+ else
+ ADIO_WriteContig(fd, buf, count, datatype, ADIO_INDIVIDUAL,
+ 0, status, error_code);
+ }
+ }
+ else {
+ if (rdwr == ADIOI_READ)
+ ADIO_ReadStrided(fd, buf, count, datatype, file_ptr_type,
+ offset, status, error_code);
+ else
+ ADIO_WriteStrided(fd, buf, count, datatype, file_ptr_type,
+ offset, status, error_code);
+ }
+ return;
+ }
+
+ MPI_Type_extent(datatype, &extent);
+ bufextent = extent * count;
+ MPI_Type_size(datatype, &size);
+ bufsize = size * count;
+
+ /* Calculate file realms */
+ if ((fd->hints->cb_pfr != ADIOI_HINT_ENABLE) ||
+ (fd->file_realm_types == NULL))
+ ADIOI_Calc_file_realms (fd, min_st_offset, max_end_offset);
+
+ my_mem_view_state_arr = (view_state *)
+ ADIOI_Calloc (1, nprocs * sizeof(view_state));
+ agg_file_view_state_arr = (view_state *)
+ ADIOI_Calloc (1, nprocs * sizeof(view_state));
+ client_comm_sz_arr = (ADIO_Offset *)
+ ADIOI_Calloc (1, nprocs * sizeof(ADIO_Offset));
+
+ if (fd->is_agg) {
+ client_file_view_state_arr = (view_state *)
+ ADIOI_Calloc (1, nprocs * sizeof(view_state));
+ }
+ else {
+ client_file_view_state_arr = NULL;
+ }
+
+ /* Alltoallw doesn't like a null array even if the counts are
+ * zero. If you do not include this code, it will fail. */
+ client_comm_dtype_arr = (MPI_Datatype *)
+ ADIOI_Calloc (1, nprocs * sizeof(MPI_Datatype));
+ if (!fd->is_agg)
+ for (i = 0; i < nprocs; i++)
+ client_comm_dtype_arr[i] = MPI_BYTE;
+
+ ADIOI_Exch_file_views (myrank, nprocs, file_ptr_type, fd, count,
+ datatype, offset, my_mem_view_state_arr,
+ agg_file_view_state_arr,
+ client_file_view_state_arr);
+
+ agg_comm_sz_arr = (ADIO_Offset *)
+ ADIOI_Calloc (1, nprocs * sizeof(ADIO_Offset));
+ agg_comm_dtype_arr = (MPI_Datatype *)
+ ADIOI_Malloc (nprocs * sizeof(MPI_Datatype));
+ if (fd->is_agg) {
+ ADIOI_Build_agg_reqs (fd, rdwr, nprocs,
+ client_file_view_state_arr,
+ client_comm_dtype_arr,
+ client_comm_sz_arr,
+ &agg_disp,
+ &agg_dtype);
+ buffered_io_size = 0;
+ for (i=0; i 0)
+ buffered_io_size += client_comm_sz_arr[i];
+ }
+ }
+#ifdef USE_PRE_REQ
+ else
+ {
+ /* Example use of ADIOI_Build_client_pre_req. to an
+ * appropriate section */
+
+ for (i = 0; i < fd->hints->cb_nodes; i++)
+ {
+ agg_rank = fd->hints->ranklist[(i+myrank)%fd->hints->cb_nodes];
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5040, 0, NULL);
+#endif
+ ADIOI_Build_client_pre_req(
+ fd, agg_rank, (i+myrank)%fd->hints->cb_nodes,
+ &(my_mem_view_state_arr[agg_rank]),
+ &(agg_file_view_state_arr[agg_rank]),
+ 2*1024*1024,
+ 64*1024);
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5041, 0, NULL);
+#endif
+ }
+ }
+#endif
+
+
+ if (fd->is_agg)
+ cb_buf = (char *) ADIOI_Malloc (fd->hints->cb_buffer_size);
+ alltoallw_disps = (int *) ADIOI_Calloc (nprocs, sizeof(int));
+ alltoallw_counts = client_alltoallw_counts = (int *)
+ ADIOI_Calloc (2*nprocs, sizeof(int));
+ agg_alltoallw_counts = &alltoallw_counts[nprocs];
+
+ if (fd->hints->cb_alltoall == ADIOI_HINT_DISABLE) {
+ /* aggregators pre-post all Irecv's for incoming data from clients */
+ if ((fd->is_agg) && (rdwr == ADIOI_WRITE))
+ post_aggregator_comm(fd->comm, rdwr, nprocs, cb_buf,
+ client_comm_dtype_arr,
+ client_comm_sz_arr,
+ &agg_comm_requests,
+ &aggs_client_count);
+ }
+ /* Aggregators send amounts for data requested to clients */
+ Exch_data_amounts (fd, nprocs, client_comm_sz_arr, agg_comm_sz_arr,
+ client_alltoallw_counts, agg_alltoallw_counts,
+ &aggregators_done);
+
+#ifdef DEBUG
+ fprintf (stderr, "client_alltoallw_counts[ ");
+ for (i=0; ihints->cb_alltoall == ADIOI_HINT_DISABLE) {
+ /* clients should build datatypes for local memory locations
+ for data communication with aggregators and post
+ communication as the datatypes are built */
+
+ client_comm_requests = (MPI_Request *)
+ ADIOI_Calloc (fd->hints->cb_nodes, sizeof(MPI_Request));
+
+ for (i = 0; i < fd->hints->cb_nodes; i++)
+ {
+ clients_agg_count = 0;
+ agg_rank = fd->hints->ranklist[(i+myrank)%fd->hints->cb_nodes];
+ if (agg_comm_sz_arr[agg_rank] > 0) {
+ ADIOI_Build_client_req(fd, agg_rank,
+ (i+myrank)%fd->hints->cb_nodes,
+ &(my_mem_view_state_arr[agg_rank]),
+ &(agg_file_view_state_arr[agg_rank]),
+ agg_comm_sz_arr[agg_rank],
+ &(agg_comm_dtype_arr[agg_rank]));
+
+#ifdef AGGREGATION_PROFILE
+ if (i == 0)
+ MPE_Log_event (5038, 0, NULL);
+#endif
+ post_client_comm (fd, rdwr, agg_rank, buf,
+ agg_comm_dtype_arr[agg_rank],
+ agg_alltoallw_counts[agg_rank],
+ &client_comm_requests[clients_agg_count]);
+ clients_agg_count++;
+ }
+ }
+#ifdef AGGREGATION_PROFILE
+ if (!clients_agg_count)
+ MPE_Log_event(5039, 0, NULL);
+#endif
+
+ if (rdwr == ADIOI_READ) {
+ if (fd->is_agg && buffered_io_size) {
+ ADIOI_IOFiletype (fd, cb_buf, buffered_io_size, MPI_BYTE,
+ ADIO_EXPLICIT_OFFSET, agg_disp, agg_dtype,
+ ADIOI_READ, status, error_code);
+ if (*error_code != MPI_SUCCESS) return;
+ MPI_Type_free (&agg_dtype);
+ }
+
+#ifdef DEBUG
+ fprintf (stderr, "expecting from [agg](disp,size,cnt)=");
+ for (i=0; i < nprocs; i++) {
+ MPI_Type_size (agg_comm_dtype_arr[i], &size);
+ fprintf (stderr, "[%d](%d,%d,%d)", i, alltoallw_disps[i],
+ size, agg_alltoallw_counts[i]);
+ if (i != nprocs - 1)
+ fprintf(stderr, ",");
+ }
+ fprintf (stderr, "]\n");
+ if (fd->is_agg) {
+ fprintf (stderr, "sending to [client](disp,size,cnt)=");
+ for (i=0; i < nprocs; i++) {
+ if (fd->is_agg)
+ MPI_Type_size (client_comm_dtype_arr[i], &size);
+ else
+ size = -1;
+
+ fprintf (stderr, "[%d](%d,%d,%d)", i, alltoallw_disps[i],
+ size, client_alltoallw_counts[i]);
+ if (i != nprocs - 1)
+ fprintf(stderr, ",");
+ }
+ fprintf (stderr,"\n");
+ }
+ fflush (NULL);
+#endif
+ /* aggregators post all Isends for outgoing data to clients */
+ if (fd->is_agg)
+ post_aggregator_comm(fd->comm, rdwr, nprocs, cb_buf,
+ client_comm_dtype_arr,
+ client_comm_sz_arr,
+ &agg_comm_requests,
+ &aggs_client_count);
+
+ if (fd->is_agg && aggs_client_count) {
+ agg_comm_statuses = ADIOI_Malloc(aggs_client_count *
+ sizeof(MPI_Status));
+ MPI_Waitall(aggs_client_count, agg_comm_requests,
+ agg_comm_statuses);
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5033, 0, NULL);
+#endif
+ ADIOI_Free (agg_comm_requests);
+ ADIOI_Free (agg_comm_statuses);
+ }
+
+ if (clients_agg_count) {
+ client_comm_statuses = ADIOI_Malloc(clients_agg_count *
+ sizeof(MPI_Status));
+ MPI_Waitall(clients_agg_count, client_comm_requests,
+ client_comm_statuses);
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5039, 0, NULL);
+#endif
+ ADIOI_Free (client_comm_requests);
+ ADIOI_Free (client_comm_statuses);
+ }
+
+#ifdef DEBUG2
+ fprintf (stderr, "buffered_io_size = %lld\n", buffered_io_size);
+ if (fd->is_agg && buffered_io_size) {
+ fprintf (stderr, "buf = [");
+ for (i=0; iis_agg)
+ MPI_Type_size (client_comm_dtype_arr[i], &size);
+ else
+ size = -1;
+
+ fprintf (stderr, "[%d](%d,%d,%d)", i, alltoallw_disps[i],
+ size, client_alltoallw_counts[i]);
+ if (i != nprocs - 1)
+ fprintf(stderr, ",");
+ }
+ fprintf (stderr,"\n");
+ fflush (NULL);
+#endif
+#ifdef DEBUG
+ fprintf (stderr, "buffered_io_size = %lld\n", buffered_io_size);
+#endif
+
+ if (clients_agg_count) {
+ client_comm_statuses = ADIOI_Malloc(clients_agg_count *
+ sizeof(MPI_Status));
+ MPI_Waitall(clients_agg_count, client_comm_requests,
+ client_comm_statuses);
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5039, 0, NULL);
+#endif
+ ADIOI_Free(client_comm_requests);
+ ADIOI_Free(client_comm_statuses);
+ }
+#ifdef DEBUG2
+ if (bufextent) {
+ fprintf (stderr, "buf = [");
+ for (i=0; iis_agg && buffered_io_size) {
+ assert (aggs_client_count != 0);
+ /* make sure we actually have the data to write out */
+ agg_comm_statuses = (MPI_Status *)
+ ADIOI_Malloc (aggs_client_count*sizeof(MPI_Status));
+
+ MPI_Waitall (aggs_client_count, agg_comm_requests,
+ agg_comm_statuses);
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5033, 0, NULL);
+#endif
+ ADIOI_Free (agg_comm_requests);
+ ADIOI_Free (agg_comm_statuses);
+#ifdef DEBUG2
+ fprintf (stderr, "cb_buf = [");
+ for (i=0; iis_agg && buffered_io_size) {
+ ADIOI_IOFiletype (fd, cb_buf, buffered_io_size, MPI_BYTE,
+ ADIO_EXPLICIT_OFFSET, agg_disp, agg_dtype,
+ ADIOI_READ, status, error_code);
+ if (*error_code != MPI_SUCCESS) return;
+ MPI_Type_free (&agg_dtype);
+ }
+
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5032, 0, NULL);
+#endif
+ MPI_Alltoallw (cb_buf, client_alltoallw_counts, alltoallw_disps,
+ client_comm_dtype_arr,
+ buf, agg_alltoallw_counts , alltoallw_disps,
+ agg_comm_dtype_arr,
+ fd->comm);
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5033, 0, NULL);
+#endif
+ }
+ else { /* Write Case */
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5032, 0, NULL);
+#endif
+ MPI_Alltoallw (buf, agg_alltoallw_counts, alltoallw_disps,
+ agg_comm_dtype_arr,
+ cb_buf, client_alltoallw_counts, alltoallw_disps,
+ client_comm_dtype_arr,
+ fd->comm);
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5033, 0, NULL);
+#endif
+ if (fd->is_agg && buffered_io_size) {
+ ADIOI_IOFiletype (fd, cb_buf, buffered_io_size, MPI_BYTE,
+ ADIO_EXPLICIT_OFFSET, agg_disp, agg_dtype,
+ ADIOI_WRITE, status, error_code);
+ if (*error_code != MPI_SUCCESS) return;
+ MPI_Type_free (&agg_dtype);
+ }
+ }
+ }
+
+ /* Free (uncommit) datatypes for reuse */
+ if (fd->is_agg) {
+ if (buffered_io_size > 0) {
+ for (i=0; i 0)
+ MPI_Type_free (&client_comm_dtype_arr[i]);
+ }
+ }
+ }
+ for (i=0; i 0)
+ MPI_Type_free (&agg_comm_dtype_arr[i]);
+ }
+
+ /* figure out next set up requests */
+ if (fd->is_agg) {
+ ADIOI_Build_agg_reqs (fd, rdwr, nprocs,
+ client_file_view_state_arr,
+ client_comm_dtype_arr,
+ client_comm_sz_arr,
+ &agg_disp,
+ &agg_dtype);
+ buffered_io_size = 0;
+ for (i=0; i 0)
+ buffered_io_size += client_comm_sz_arr[i];
+ }
+ }
+#ifdef USE_PRE_REQ
+ else {
+ /* Example use of ADIOI_Build_client_pre_req. to an
+ * appropriate section */
+ for (i = 0; i < fd->hints->cb_nodes; i++)
+ {
+ agg_rank = fd->hints->ranklist[(i+myrank)%fd->hints->cb_nodes];
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5040, 0, NULL);
+#endif
+ ADIOI_Build_client_pre_req(
+ fd, agg_rank, (i+myrank)%fd->hints->cb_nodes,
+ &(my_mem_view_state_arr[agg_rank]),
+ &(agg_file_view_state_arr[agg_rank]),
+ 2*1024*1024,
+ 64*1024);
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5041, 0, NULL);
+#endif
+ }
+ }
+#endif
+
+ /* aggregators pre-post all Irecv's for incoming data from
+ * clients. if nothing is needed, agg_comm_requests is not
+ * allocated */
+ if (fd->hints->cb_alltoall == ADIOI_HINT_DISABLE) {
+ if ((fd->is_agg) && (rdwr == ADIOI_WRITE))
+ post_aggregator_comm(fd->comm, rdwr, nprocs, cb_buf,
+ client_comm_dtype_arr,
+ client_comm_sz_arr,
+ &agg_comm_requests,
+ &aggs_client_count);
+ }
+
+ /* Aggregators send amounts for data requested to clients */
+ Exch_data_amounts (fd, nprocs, client_comm_sz_arr, agg_comm_sz_arr,
+ client_alltoallw_counts, agg_alltoallw_counts,
+ &aggregators_done);
+
+ }
+
+ /* Clean up */
+
+ if (fd->hints->cb_pfr != ADIOI_HINT_ENABLE) {
+ /* AAR, FSIZE, and User provided uniform File realms */
+ if (1) {
+ ADIOI_Delete_flattened (fd->file_realm_types[0]);
+ MPI_Type_free (&fd->file_realm_types[0]);
+ }
+ else {
+ for (i=0; ihints->cb_nodes; i++) {
+ ADIOI_Datatype_iscontig(fd->file_realm_types[i], &is_contig);
+ if (!is_contig)
+ ADIOI_Delete_flattened(fd->file_realm_types[i]);
+ MPI_Type_free (&fd->file_realm_types[i]);
+ }
+ }
+ ADIOI_Free (fd->file_realm_types);
+ ADIOI_Free (fd->file_realm_st_offs);
+ }
+
+ /* This memtype must be deleted from the ADIOI_Flatlist or else it
+ * will match incorrectly with other datatypes which use this
+ * pointer. */
+ ADIOI_Delete_flattened(datatype);
+ ADIOI_Delete_flattened(fd->filetype);
+
+ if (fd->is_agg) {
+ if (buffered_io_size > 0)
+ MPI_Type_free (&agg_dtype);
+ for (i=0; iindices);
+ ADIOI_Free (client_file_view_state_arr[i].flat_type_p->blocklens);
+ ADIOI_Free (client_file_view_state_arr[i].flat_type_p);
+ }
+ ADIOI_Free (client_file_view_state_arr);
+ ADIOI_Free (cb_buf);
+ }
+ for (i = 0; i 0)
+ MPI_Type_free (&agg_comm_dtype_arr[i]);
+
+ ADIOI_Free (client_comm_sz_arr);
+ ADIOI_Free (client_comm_dtype_arr);
+ ADIOI_Free (my_mem_view_state_arr);
+ ADIOI_Free (agg_file_view_state_arr);
+ ADIOI_Free (agg_comm_sz_arr);
+ ADIOI_Free (agg_comm_dtype_arr);
+ ADIOI_Free (alltoallw_disps);
+ ADIOI_Free (alltoallw_counts);
+ ADIOI_Free (all_st_end_offsets);
+
+#ifdef HAVE_STATUS_SET_BYTES
+ MPIR_Status_set_bytes(status, datatype, bufsize);
+ /* This is a temporary way of filling in status. The right way is
+ * to keep track of how much data was actually read and placed in
+ * buf during collective I/O. */
+#endif
+ fd->fp_sys_posn = -1; /* set it to null. */
+#ifdef AGGREGATION_PROFILE
+ if (rdwr == ADIOI_READ)
+ MPE_Log_event (5011, 0, NULL);
+ else
+ MPE_Log_event (5013, 0, NULL);
+#endif
+}
+
+
+/* Some of this code is from the old Calc_my_off_len() function.
+ * It calculates the 1st and last byte accessed */
+void ADIOI_Calc_bounds (ADIO_File fd, int count, MPI_Datatype buftype,
+ int file_ptr_type, ADIO_Offset offset,
+ ADIO_Offset *st_offset, ADIO_Offset *end_offset)
+{
+ int filetype_size, buftype_size, etype_size;
+ int i, sum;
+ MPI_Aint filetype_extent;
+ ADIO_Offset total_io;
+ int filetype_is_contig;
+ int remainder;
+ ADIOI_Flatlist_node *flat_file;
+
+ ADIO_Offset st_byte_off, end_byte_off;
+
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5000, 0, NULL);
+#endif
+
+ if (!count) {
+ /* Max signed positive value for ADIO_Offset
+ * (arch. dependent?). is there a better way? */
+ memset (st_offset, 8, sizeof(ADIO_Offset));
+ *st_offset = *st_offset / 2;
+ *end_offset = -1;
+ return;
+ }
+
+ ADIOI_Datatype_iscontig (fd->filetype, &filetype_is_contig);
+
+ MPI_Type_size (fd->filetype, &filetype_size);
+ MPI_Type_extent (fd->filetype, &filetype_extent);
+ MPI_Type_size (fd->etype, &etype_size);
+ MPI_Type_size (buftype, &buftype_size);
+
+ total_io = buftype_size * count;
+
+ if (filetype_is_contig) {
+ if (file_ptr_type == ADIO_INDIVIDUAL)
+ st_byte_off = fd->fp_ind;
+ else
+ st_byte_off = fd->disp + etype_size * offset;
+
+ end_byte_off = st_byte_off + total_io - 1;
+ }
+ else {
+ flat_file = ADIOI_Flatlist;
+ while (flat_file->type != fd->filetype) flat_file = flat_file->next;
+
+ /* we need to take care of some weirdness since fd->fp_ind
+ points at an accessible byte in file. the first accessible
+ byte in the file is not necessarily the first byte, nor is
+ it necessarily the first off/len pair in the filetype. */
+ if (file_ptr_type == ADIO_INDIVIDUAL) {
+ st_byte_off = fd->fp_ind;
+ /* find end byte of I/O (may be in middle of an etype) */
+
+ /* calculate byte starting point of first filetype */
+ end_byte_off = (ADIO_Offset)
+ ((fd->fp_ind - fd->disp - flat_file->indices[0]) /
+ filetype_extent) * filetype_extent + fd->disp +
+ flat_file->indices[0];
+ /* number of absolute bytes into first filetype */
+ remainder = (fd->fp_ind - fd->disp - flat_file->indices[0]) %
+ filetype_extent;
+ if (remainder) {
+ /* find how many file viewable bytes into first filetype */
+ sum = 0;
+ for (i=0; icount; i++) {
+ sum += flat_file->blocklens[i];
+ if ((flat_file->indices[i] - flat_file->indices[0] +
+ flat_file->blocklens[i]) >= remainder) {
+ sum -= (flat_file->blocklens[i] - (sum - remainder));
+ break;
+ }
+ }
+ total_io += sum;
+ }
+ /* byte starting point of last filetype */
+ end_byte_off += (total_io - 1) / filetype_size * filetype_extent;
+ /* number of bytes into last filetype */
+ remainder = total_io % filetype_size;
+ if (!remainder) {
+ for (i=flat_file->count - 1; i>=0; i--) {
+ if (flat_file->blocklens[i]) break;
+ }
+ assert (i > -1);
+ end_byte_off += flat_file->indices[i] +
+ flat_file->blocklens[i] - 1;
+ end_byte_off -= flat_file->indices[0];
+ }
+ else {
+ sum = 0;
+ for (i=0; icount; i++) {
+ sum += flat_file->blocklens[i];
+ if (sum >= remainder) {
+ end_byte_off += flat_file->indices[i] +
+ flat_file->blocklens[i] - sum + remainder - 1;
+ break;
+ }
+ }
+ end_byte_off -= flat_file->indices[0];
+ }
+ }
+ else {
+ /* find starting byte of I/O (must be aligned with an etype) */
+ /* byte starting point of starting filetype */
+ st_byte_off = fd->disp + ((offset * etype_size) / filetype_size) *
+ filetype_extent;
+ /* number of file viewable bytes into starting filetype */
+ remainder = (etype_size * offset) % filetype_size;
+
+ sum = 0;
+ for (i=0; icount; i++) {
+ sum += flat_file->blocklens[i];
+ if (sum >= remainder) {
+ if (sum == remainder)
+ st_byte_off += flat_file->indices[i+1];
+ else
+ st_byte_off += flat_file->indices[i] +
+ flat_file->blocklens[i] - sum + remainder;
+ break;
+ }
+ }
+
+ /* find end byte of I/O (may be in middle of an etype) */
+ /* byte starting point of last filetype */
+ end_byte_off = fd->disp + (offset * etype_size + total_io) /
+ filetype_size * filetype_extent;
+ /* number of bytes into last filetype */
+ remainder = (offset * etype_size + total_io) % filetype_size;
+
+ if (!remainder) {
+ /* the last non-zero off/len pair */
+ for (i=flat_file->count-1; i>=0; i--) {
+ if (flat_file->blocklens[i]) break;
+ }
+ assert (i >= 0);
+ /* back up a whole filetype, and put back up to the
+ * last byte of the last non-zero offlen pair */
+ /* end_byte_off = (end_byte_off - filetype_extent) +
+ flat_file->indices[i] +
+ flat_file->blocklens[i] - 1; */
+ /* equivalent of above commented out equation */
+ end_byte_off -= filetype_extent - flat_file->indices[i] -
+ flat_file->blocklens[i] + 1;
+ }
+ else {
+ sum = 0;
+ for (i=0; icount; i++) {
+ sum += flat_file->blocklens[i];
+ if (sum >= remainder) {
+ end_byte_off += flat_file->indices[i] +
+ flat_file->blocklens[i] - sum + remainder - 1;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ *st_offset = st_byte_off;
+ *end_offset = end_byte_off;
+#ifdef DEBUG
+ printf ("st_offset = %lld\nend_offset = %lld\n",
+ st_byte_off, end_byte_off);
+#endif
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5001, 0, NULL);
+#endif
+}
+
+/* wrapper function for ADIO_WriteStrided and ADIO_ReadStrided. Used
+ * by new 2 phase code to pass an arbitrary file type directly to
+ * WriteStrided call without affecting existing code. For the new 2
+ * phase code, we really only need to set a custom_ftype, and we can
+ * assume that this uses MPI_BYTE for the etype, and disp is 0 */
+void ADIOI_IOFiletype(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, MPI_Datatype custom_ftype,
+ int rdwr, ADIO_Status *status, int *error_code)
+{
+ MPI_Datatype user_filetype;
+ MPI_Datatype user_etype;
+ ADIO_Offset user_disp;
+ int user_ind_wr_buffer_size;
+ int user_ind_rd_buffer_size;
+ int f_is_contig, m_is_contig;
+ int user_ds_read, user_ds_write;
+ MPI_Aint f_extent;
+ int f_size;
+ int f_ds_percent; /* size/extent */
+
+#ifdef AGGREGATION_PROFILE
+ if (rdwr == ADIOI_READ)
+ MPE_Log_event(5006, 0, NULL);
+ else
+ MPE_Log_event(5008, 0, NULL);
+#endif
+ MPI_Type_extent(custom_ftype, &f_extent);
+ MPI_Type_size(custom_ftype, &f_size);
+ f_ds_percent = 100 * f_size / f_extent;
+
+ /* temporarily store file view information */
+ user_filetype = fd->filetype;
+ user_etype = fd->etype;
+ user_disp = fd->disp;
+ user_ds_read = fd->hints->ds_read;
+ user_ds_write = fd->hints->ds_write;
+ /* temporarily override the independent I/O datasieve buffer size */
+ user_ind_wr_buffer_size = fd->hints->ind_wr_buffer_size;
+ user_ind_rd_buffer_size = fd->hints->ind_rd_buffer_size;
+
+ /* set new values for temporary file view */
+ fd->filetype = custom_ftype;
+ fd->etype = MPI_BYTE;
+ /* set new values for independent I/O datasieve buffer size */
+ fd->hints->ind_wr_buffer_size = fd->hints->cb_buffer_size;
+ fd->hints->ind_rd_buffer_size = fd->hints->cb_buffer_size;
+ /* decide whether or not to do datasieving */
+#ifdef DEBUG
+ printf ("f_ds_percent = %d cb_ds_threshold = %d\n", f_ds_percent,
+ fd->hints->cb_ds_threshold);
+#endif
+ if (f_ds_percent >= fd->hints->cb_ds_threshold) {
+ fd->hints->ds_read = ADIOI_HINT_ENABLE;
+ fd->hints->ds_write = ADIOI_HINT_ENABLE;
+ }
+ else {
+ fd->hints->ds_read = ADIOI_HINT_DISABLE;
+ fd->hints->ds_write = ADIOI_HINT_DISABLE;
+ }
+
+ /* flatten the new filetype since the strided calls expect it to
+ * have been flattened in set file view. in the two phase code,
+ * the datatype passed down should always be MPI_BYTE, and
+ * therefore contiguous, but just for completeness sake, we'll
+ * check the memory datatype anyway */
+ ADIOI_Datatype_iscontig(custom_ftype, &f_is_contig);
+ ADIOI_Datatype_iscontig(datatype, &m_is_contig);
+ if (!f_is_contig)
+ ADIOI_Flatten_datatype (custom_ftype);
+
+ /* make appropriate Read/Write calls. Let ROMIO figure out file
+ * system specific stuff. */
+ if (f_is_contig && m_is_contig) {
+ fd->disp = 0;
+ if (rdwr == ADIOI_READ)
+ ADIO_ReadContig(fd, buf, count, datatype, file_ptr_type, offset,
+ status, error_code);
+ else
+ ADIO_WriteContig(fd, buf, count, datatype, file_ptr_type, offset,
+ status, error_code);
+ }
+ else {
+ fd->disp = offset;
+ if (rdwr == ADIOI_READ)
+ ADIO_ReadStrided(fd, buf, count, datatype, file_ptr_type, 0,
+ status, error_code);
+ else
+ ADIO_WriteStrided(fd, buf, count, datatype, file_ptr_type, 0,
+ status, error_code);
+ }
+
+ /* Delete flattened temporary filetype */
+ if (!f_is_contig)
+ ADIOI_Delete_flattened (custom_ftype);
+
+ /* restore the user specified file view to cover our tracks */
+ fd->filetype = user_filetype;
+ fd->etype = user_etype;
+ fd->disp = user_disp;
+ fd->hints->ds_read = user_ds_read;
+ fd->hints->ds_write = user_ds_write;
+ fd->hints->ind_wr_buffer_size = user_ind_wr_buffer_size;
+ fd->hints->ind_rd_buffer_size = user_ind_rd_buffer_size;
+#ifdef AGGREGATION_PROFILE
+ if (rdwr == ADIOI_READ)
+ MPE_Log_event (5007, 0, NULL);
+ else
+ MPE_Log_event (5009, 0, NULL);
+#endif
+}
+
+static void Exch_data_amounts (ADIO_File fd, int nprocs,
+ ADIO_Offset *client_comm_sz_arr,
+ ADIO_Offset *agg_comm_sz_arr,
+ int *client_alltoallw_counts,
+ int *agg_alltoallw_counts,
+ int *aggregators_done)
+{
+ int i;
+ int recv_idx;
+ MPI_Request *recv_requests;
+ MPI_Request *send_requests;
+ MPI_Status status;
+ MPI_Status *send_statuses;
+ /* Aggregators send amounts for data requested to clients */
+ if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
+ MPI_Alltoall (client_comm_sz_arr, sizeof(ADIO_Offset), MPI_BYTE,
+ agg_comm_sz_arr, sizeof(ADIO_Offset), MPI_BYTE,
+ fd->comm);
+
+ if (fd->is_agg) {
+ for (i=0; i 0)
+ client_alltoallw_counts[i] = 1;
+ else
+ client_alltoallw_counts[i] = 0;
+ }
+ *aggregators_done = 0;
+ for (i=0; i 0)
+ agg_alltoallw_counts[i] = 1;
+ else
+ agg_alltoallw_counts[i] = 0;
+ }
+ } else {
+ /* let's see if we can't reduce some communication as well as
+ * overlap some communication and work */
+
+ recv_requests = ADIOI_Malloc (fd->hints->cb_nodes * sizeof(MPI_Request));
+ /* post all receives - only receive from aggregators */
+ for (i = 0; i < fd->hints->cb_nodes; i++)
+ MPI_Irecv (&agg_comm_sz_arr[fd->hints->ranklist[i]],
+ sizeof(ADIO_Offset), MPI_BYTE, fd->hints->ranklist[i],
+ AMT_TAG, fd->comm, &recv_requests[i]);
+
+ /* Barrier is needed here if we're worried about unexpected
+ * messages being dropped */
+ /* MPI_Barrier (fd->comm); */
+ send_requests = NULL;
+ if (fd->is_agg) {
+ /* only aggregators send data */
+ send_requests = ADIOI_Malloc (nprocs * sizeof(MPI_Request));
+
+ /* post all sends */
+ for (i = 0; i < nprocs; i++) {
+ MPI_Isend (&client_comm_sz_arr[i], sizeof(ADIO_Offset),
+ MPI_BYTE, i, AMT_TAG, fd->comm, &send_requests[i]);
+
+ if (client_comm_sz_arr[i] > 0)
+ client_alltoallw_counts[i] = 1;
+ else
+ client_alltoallw_counts[i] = 0;
+ }
+ }
+
+ *aggregators_done = 0;
+ for (i=0; i < fd->hints->cb_nodes; i++) {
+ MPI_Waitany (fd->hints->cb_nodes, recv_requests, &recv_idx, &status);
+ if (agg_comm_sz_arr[fd->hints->ranklist[recv_idx]] == -1)
+ *aggregators_done = *aggregators_done + 1;
+ else if (agg_comm_sz_arr[fd->hints->ranklist[recv_idx]] > 0)
+ agg_alltoallw_counts[fd->hints->ranklist[recv_idx]] = 1;
+ else
+ agg_alltoallw_counts[fd->hints->ranklist[recv_idx]] = 0;
+ }
+
+ ADIOI_Free (recv_requests);
+ if (fd->is_agg) {
+ /* wait for all sends to complete */
+ send_statuses = ADIOI_Malloc (nprocs * sizeof (MPI_Status));
+ MPI_Waitall (nprocs, send_requests, send_statuses);
+ ADIOI_Free (send_requests);
+ ADIOI_Free (send_statuses);
+ }
+ }
+}
+
+static void post_aggregator_comm (MPI_Comm comm, int rw_type,
+ int nproc, void *cb_buf,
+ MPI_Datatype *client_comm_dtype_arr,
+ ADIO_Offset *client_comm_sz_arr,
+ MPI_Request **requests_p,
+ int *aggs_client_count_p)
+{
+ int aggs_client_count = 0;
+ MPI_Request *requests;
+ int i;
+
+#ifdef DEBUG
+ printf ("posting aggregator communication\n");
+#endif
+
+ for (i=0; i < nproc; i++)
+ if (client_comm_sz_arr[i] > 0)
+ aggs_client_count++;
+#ifdef DEBUG
+ printf ("aggregator needs to talk to %d clients\n",
+ aggs_client_count);
+#endif
+ *aggs_client_count_p = aggs_client_count;
+ if (aggs_client_count) {
+ requests = (MPI_Request *)
+ ADIOI_Malloc (aggs_client_count * sizeof(MPI_Request));
+ aggs_client_count = 0;
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5032, 0, NULL);
+#endif
+ for (i=0; i < nproc; i++) {
+ if (client_comm_sz_arr[i] > 0) {
+ if (rw_type == ADIOI_WRITE)
+ MPI_Irecv (cb_buf, 1, client_comm_dtype_arr[i], i,
+ DATA_TAG, comm,
+ &requests[aggs_client_count]);
+ else
+ MPI_Isend (cb_buf, 1, client_comm_dtype_arr[i], i,
+ DATA_TAG, comm,
+ &requests[aggs_client_count]);
+
+ aggs_client_count++;
+ }
+ }
+ *requests_p = requests;
+ }
+}
+
+static void post_client_comm (ADIO_File fd, int rw_type,
+ int agg_rank, void *buf,
+ MPI_Datatype agg_comm_dtype,
+ int agg_alltoallw_count,
+ MPI_Request *request)
+{
+ if (agg_alltoallw_count) {
+ if (rw_type == ADIOI_READ)
+ MPI_Irecv (buf, 1, agg_comm_dtype, agg_rank, DATA_TAG, fd->comm,
+ request);
+ else
+ MPI_Isend (buf, 1, agg_comm_dtype, agg_rank, DATA_TAG, fd->comm,
+ request);
+ }
+}
+
+
+
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_iread.c b/ompi/mca/io/romio/romio/adio/common/ad_iread.c
index 34fce2c1da..2a38295348 100644
--- a/ompi/mca/io/romio/romio/adio/common/ad_iread.c
+++ b/ompi/mca/io/romio/romio/adio/common/ad_iread.c
@@ -45,6 +45,7 @@ void ADIOI_GEN_IreadContig(ADIO_File fd, void *buf, int count,
static char myname[] = "ADIOI_GEN_IREADCONTIG";
MPI_Type_size(datatype, &typesize);
+ ADIOI_Assert((count * typesize) == ((ADIO_Offset)(unsigned)count * (ADIO_Offset)typesize));
len = count * typesize;
if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind;
@@ -84,7 +85,7 @@ void ADIOI_GEN_IreadStrided(ADIO_File fd, void *buf, int count,
if (*error_code == MPI_SUCCESS) {
MPI_Type_size(datatype, &typesize);
- nbytes = count*typesize;
+ nbytes = (MPI_Offset)count*(MPI_Offset)typesize;
}
MPIO_Completed_request_create(&fd, nbytes, error_code, request);
}
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_iread_fake.c b/ompi/mca/io/romio/romio/adio/common/ad_iread_fake.c
index 1a35164ccc..c6a2562bd8 100644
--- a/ompi/mca/io/romio/romio/adio/common/ad_iread_fake.c
+++ b/ompi/mca/io/romio/romio/adio/common/ad_iread_fake.c
@@ -21,12 +21,13 @@ void ADIOI_FAKE_IreadContig(ADIO_File fd, void *buf, int count,
MPI_Offset len;
MPI_Type_size(datatype, &typesize);
- len = count * typesize;
+ len = (MPI_Offset)count * (MPI_Offset)typesize;
/* Call the blocking function. It will create an error code
* if necessary.
*/
- ADIO_ReadContig(fd, buf, len, MPI_BYTE, file_ptr_type, offset,
+ ADIOI_Assert(len == (int) len); /* the count is an int parm */
+ ADIO_ReadContig(fd, buf, (int)len, MPI_BYTE, file_ptr_type, offset,
&status, error_code);
if (*error_code != MPI_SUCCESS) {
len=0;
@@ -54,7 +55,7 @@ void ADIOI_FAKE_IreadStrided(ADIO_File fd, void *buf, int count,
offset, &status, error_code);
if (*error_code == MPI_SUCCESS) {
MPI_Type_size(datatype, &typesize);
- nbytes = count*typesize;
+ nbytes = (MPI_Offset)count*(MPI_Offset)typesize;
}
MPIO_Completed_request_create(&fd, nbytes, error_code, request);
}
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_iwrite.c b/ompi/mca/io/romio/romio/adio/common/ad_iwrite.c
index e97a062960..5a18198fb1 100644
--- a/ompi/mca/io/romio/romio/adio/common/ad_iwrite.c
+++ b/ompi/mca/io/romio/romio/adio/common/ad_iwrite.c
@@ -55,6 +55,7 @@ void ADIOI_GEN_IwriteContig(ADIO_File fd, void *buf, int count,
MPI_Type_size(datatype, &typesize);
len = count * typesize;
+ ADIOI_Assert(len == (int)((ADIO_Offset)count * (ADIO_Offset)typesize)); /* the count is an int parm */
if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind;
aio_errno = ADIOI_GEN_aio(fd, buf, len, offset, 1, request);
@@ -85,10 +86,25 @@ int ADIOI_GEN_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
int error_code;
struct aiocb *aiocbp;
ADIOI_AIO_Request *aio_req;
-
+ MPI_Status status;
+#if defined(ROMIO_XFS)
+ unsigned maxiosz = wr ? fd->hints->fs_hints.xfs.write_chunk_sz :
+ fd->hints->fs_hints.xfs.read_chunk_sz;
+#endif /* ROMIO_XFS */
fd_sys = fd->fd_sys;
+#if defined(ROMIO_XFS)
+ /* Use Direct I/O if desired and properly aligned */
+ if (fd->fns == &ADIO_XFS_operations &&
+ ((wr && fd->direct_write) || (!wr && fd->direct_read)) &&
+ !(((long) buf) % fd->d_mem) && !(offset % fd->d_miniosz) &&
+ !(len % fd->d_miniosz) && (len >= fd->d_miniosz) &&
+ (len <= maxiosz)) {
+ fd_sys = fd->fd_direct;
+ }
+#endif /* ROMIO_XFS */
+
aio_req = (ADIOI_AIO_Request*)ADIOI_Calloc(sizeof(ADIOI_AIO_Request), 1);
aiocbp = (struct aiocb *) ADIOI_Calloc(sizeof(struct aiocb), 1);
aiocbp->aio_offset = offset;
@@ -133,10 +149,10 @@ int ADIOI_GEN_aio(ADIO_File fd, void *buf, int len, ADIO_Offset offset,
treat this as a blocking request and return. */
if (wr)
ADIO_WriteContig(fd, buf, len, MPI_BYTE,
- ADIO_EXPLICIT_OFFSET, offset, NULL, &error_code);
+ ADIO_EXPLICIT_OFFSET, offset, &status, &error_code);
else
ADIO_ReadContig(fd, buf, len, MPI_BYTE,
- ADIO_EXPLICIT_OFFSET, offset, NULL, &error_code);
+ ADIO_EXPLICIT_OFFSET, offset, &status, &error_code);
MPIO_Completed_request_create(&fd, len, &error_code, request);
return 0;
@@ -178,7 +194,7 @@ void ADIOI_GEN_IwriteStrided(ADIO_File fd, void *buf, int count,
if (*error_code == MPI_SUCCESS) {
MPI_Type_size(datatype, &typesize);
- nbytes = count * typesize;
+ nbytes = (MPI_Offset)count * (MPI_Offset)typesize;
}
MPIO_Completed_request_create(&fd, nbytes, error_code, request);
}
@@ -202,7 +218,6 @@ int ADIOI_GEN_aio_poll_fn(void *extra_state, MPI_Status *status)
} else if (errno == 0) {
int n = aio_return(aio_req->aiocbp);
aio_req->nbytes = n;
- MPIR_Nest_incr();
errcode = MPI_Grequest_complete(aio_req->req);
/* --BEGIN ERROR HANDLING-- */
if (errcode != MPI_SUCCESS) {
@@ -213,7 +228,6 @@ int ADIOI_GEN_aio_poll_fn(void *extra_state, MPI_Status *status)
0);
}
/* --END ERROR HANDLING-- */
- MPIR_Nest_decr();
}
return errcode;
}
@@ -263,7 +277,6 @@ int ADIOI_GEN_aio_wait_fn(int count, void ** array_of_states,
if (errno == 0) {
int n = aio_return(aio_reqlist[i]->aiocbp);
aio_reqlist[i]->nbytes = n;
- MPIR_Nest_incr();
errcode = MPI_Grequest_complete(aio_reqlist[i]->req);
if (errcode != MPI_SUCCESS) {
errcode = MPIO_Err_create_code(MPI_SUCCESS,
@@ -272,7 +285,6 @@ int ADIOI_GEN_aio_wait_fn(int count, void ** array_of_states,
__LINE__, MPI_ERR_IO,
"**mpi_grequest_complete", 0);
}
- MPIR_Nest_decr();
ADIOI_Free(aio_reqlist[i]->aiocbp);
aio_reqlist[i]->aiocbp = NULL;
cblist[i] = NULL;
@@ -289,27 +301,6 @@ int ADIOI_GEN_aio_wait_fn(int count, void ** array_of_states,
return errcode;
}
-int ADIOI_GEN_aio_query_fn(void *extra_state, MPI_Status *status)
-{
- ADIOI_AIO_Request *aio_req;
-
- aio_req = (ADIOI_AIO_Request *)extra_state;
-
-
- MPI_Status_set_elements(status, MPI_BYTE, aio_req->nbytes);
-
- /* do i need to nest_incr/nest_decr here? */
- /* can never cancel so always true */
- MPI_Status_set_cancelled(status, 0);
-
- /* choose not to return a value for this */
- status->MPI_SOURCE = MPI_UNDEFINED;
- /* tag has no meaning for this generalized request */
- status->MPI_TAG = MPI_UNDEFINED;
- /* this generalized request never fails */
- return MPI_SUCCESS;
-}
-
int ADIOI_GEN_aio_free_fn(void *extra_state)
{
ADIOI_AIO_Request *aio_req;
@@ -322,6 +313,26 @@ int ADIOI_GEN_aio_free_fn(void *extra_state)
return MPI_SUCCESS;
}
#endif /* working AIO */
+
+int ADIOI_GEN_aio_query_fn(void *extra_state, MPI_Status *status)
+{
+ ADIOI_AIO_Request *aio_req;
+
+ aio_req = (ADIOI_AIO_Request *)extra_state;
+
+
+ MPI_Status_set_elements(status, MPI_BYTE, aio_req->nbytes);
+
+ /* can never cancel so always true */
+ MPI_Status_set_cancelled(status, 0);
+
+ /* choose not to return a value for this */
+ status->MPI_SOURCE = MPI_UNDEFINED;
+ /* tag has no meaning for this generalized request */
+ status->MPI_TAG = MPI_UNDEFINED;
+ /* this generalized request never fails */
+ return MPI_SUCCESS;
+}
/*
* vim: ts=8 sts=4 sw=4 noexpandtab
*/
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_iwrite_fake.c b/ompi/mca/io/romio/romio/adio/common/ad_iwrite_fake.c
index 5fce4e3291..1df7d9d436 100644
--- a/ompi/mca/io/romio/romio/adio/common/ad_iwrite_fake.c
+++ b/ompi/mca/io/romio/romio/adio/common/ad_iwrite_fake.c
@@ -18,20 +18,22 @@ void ADIOI_FAKE_IwriteContig(ADIO_File fd, void *buf, int count,
int *error_code)
{
ADIO_Status status;
- int len, typesize;
+ MPI_Offset len;
+ int typesize;
MPI_Offset nbytes=0;
MPI_Type_size(datatype, &typesize);
- len = count * typesize;
+ len = (MPI_Offset)count * (MPI_Offset)typesize;
/* Call the blocking function. It will create an error code
* if necessary.
*/
- ADIO_WriteContig(fd, buf, len, MPI_BYTE, file_ptr_type, offset,
+ ADIOI_Assert(len == (int) len); /* the count is an int parm */
+ ADIO_WriteContig(fd, buf, (int)len, MPI_BYTE, file_ptr_type, offset,
&status, error_code);
if (*error_code == MPI_SUCCESS) {
MPI_Type_size(datatype, &typesize);
- nbytes = count*typesize;
+ nbytes = (MPI_Offset)count*(MPI_Offset)typesize;
}
MPIO_Completed_request_create(&fd, nbytes, error_code, request);
@@ -57,7 +59,7 @@ void ADIOI_FAKE_IwriteStrided(ADIO_File fd, void *buf, int count,
offset, &status, error_code);
if (*error_code == MPI_SUCCESS) {
MPI_Type_size(datatype, &typesize);
- nbytes = count * typesize;
+ nbytes = (MPI_Offset)count * (MPI_Offset)typesize;
}
MPIO_Completed_request_create(&fd, nbytes, error_code, request);
}
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_open.c b/ompi/mca/io/romio/romio/adio/common/ad_open.c
index d89f6ac64c..20a9eadd1c 100644
--- a/ompi/mca/io/romio/romio/adio/common/ad_open.c
+++ b/ompi/mca/io/romio/romio/adio/common/ad_open.c
@@ -27,7 +27,7 @@ MPI_File ADIO_Open(MPI_Comm orig_comm,
{
MPI_File mpi_fh;
ADIO_File fd;
- int orig_amode_excl, orig_amode_wronly, err, rank, procs;
+ int err, rank, procs;
static char myname[] = "ADIO_OPEN";
int max_error_code;
MPI_Info dupinfo;
@@ -59,6 +59,9 @@ MPI_File ADIO_Open(MPI_Comm orig_comm,
fd->filetype = filetype; /* MPI_BYTE by default */
fd->etype_size = 1; /* default etype is MPI_BYTE */
+ fd->file_realm_st_offs = NULL;
+ fd->file_realm_types = NULL;
+
fd->perm = perm;
fd->async_count = 0;
@@ -70,7 +73,7 @@ MPI_File ADIO_Open(MPI_Comm orig_comm,
MPI_Comm_rank(comm, &rank);
MPI_Comm_size(comm, &procs);
/* create and initialize info object */
- fd->hints = (ADIOI_Hints *)ADIOI_Malloc(sizeof(struct ADIOI_Hints_struct));
+ fd->hints = (ADIOI_Hints *)ADIOI_Calloc(1, sizeof(struct ADIOI_Hints_struct));
if (fd->hints == NULL) {
/* NEED TO HANDLE ENOMEM ERRORS */
}
@@ -79,18 +82,13 @@ MPI_File ADIO_Open(MPI_Comm orig_comm,
fd->hints->initialized = 0;
fd->info = MPI_INFO_NULL;
- if (info == MPI_INFO_NULL)
- *error_code = MPI_Info_create(&dupinfo);
- else
- *error_code = MPI_Info_dup(info, &dupinfo);
- if (*error_code != MPI_SUCCESS)
- goto fn_exit;
-
- ADIOI_process_system_hints(dupinfo);
+ ADIOI_incorporate_system_hints(info, ADIOI_syshints, &dupinfo);
ADIO_SetInfo(fd, dupinfo, &err);
- *error_code = MPI_Info_free(&dupinfo);
- if (*error_code != MPI_SUCCESS)
- goto fn_exit;
+ if (dupinfo != MPI_INFO_NULL) {
+ *error_code = MPI_Info_free(&dupinfo);
+ if (*error_code != MPI_SUCCESS)
+ goto fn_exit;
+ }
/* deferred open:
* we can only do this optimization if 'fd->hints->deferred_open' is set
@@ -103,9 +101,9 @@ MPI_File ADIO_Open(MPI_Comm orig_comm,
&& uses_generic_write(fd))) {
fd->hints->deferred_open = 0;
}
- if (fd->file_system == ADIO_PVFS2)
- /* disable deferred open on PVFS2 so that scalable broadcast will
- * always use the propper communicator */
+ if (ADIO_Feature(fd, ADIO_SCALABLE_OPEN))
+ /* disable deferred open on these fs so that scalable broadcast
+ * will always use the propper communicator */
fd->hints->deferred_open = 0;
@@ -123,134 +121,30 @@ MPI_File ADIO_Open(MPI_Comm orig_comm,
* IO */
fd->agg_comm = MPI_COMM_NULL;
fd->is_open = 0;
+ fd->my_cb_nodes_index = -2;
+ fd->is_agg = is_aggregator(rank, fd);
if (fd->hints->deferred_open) {
/* MPI_Comm_split will create a communication group of aggregators.
* for non-aggregators it will return MPI_COMM_NULL . we rely on
* fd->agg_comm == MPI_COMM_NULL for non-aggregators in several
* tests in the code */
- if (is_aggregator(rank, fd)) {
+ if (fd->is_agg) {
MPI_Comm_split(fd->comm, 1, 0, &aggregator_comm);
fd->agg_comm = aggregator_comm;
} else {
MPI_Comm_split(fd->comm, MPI_UNDEFINED, 0, &aggregator_comm);
fd->agg_comm = aggregator_comm;
}
+
}
- orig_amode_excl = access_mode;
+ /* actual opens start here */
+ /* generic open: one process opens to create the file, all others open */
+ /* nfs open: everybody opens or else you'll end up with "file not found"
+ * due to stupid nfs consistency semantics */
+ /* scalable open: one process opens and broadcasts results to everyone */
- /* optimization: by having just one process create a file, close it, then
- * have all N processes open it, we can possibly avoid contention for write
- * locks on a directory for some file systems.
- *
- * we used to special-case EXCL|CREATE, since when N processes are trying
- * to create a file exclusively, only 1 will succeed and the rest will
- * (spuriously) fail. Since we are now carrying out the CREATE on one
- * process anyway, the EXCL case falls out and we don't need to explicitly
- * worry about it, other than turning off both the EXCL and CREATE flags
- */
- /* pvfs2 handles opens specially, so it is actually more efficent for that
- * file system if we skip this optimization */
- /* NFS handles opens especially poorly, so we cannot use this optimization
- * on that FS */
- if (fd->file_system == ADIO_NFS) {
- /* no optimizations for NFS: */
- if ((access_mode & ADIO_CREATE) && (access_mode & ADIO_EXCL)) {
- /* the open should fail if the file exists. Only *1* process should
- check this. Otherwise, if all processes try to check and the file
- does not exist, one process will create the file and others who
- reach later will return error. */
- if(rank == fd->hints->ranklist[0]) {
- fd->access_mode = access_mode;
- (*(fd->fns->ADIOI_xxx_Open))(fd, error_code);
- MPI_Bcast(error_code, 1, MPI_INT, \
- fd->hints->ranklist[0], fd->comm);
- /* if no error, close the file and reopen normally below */
- if (*error_code == MPI_SUCCESS)
- (*(fd->fns->ADIOI_xxx_Close))(fd, error_code);
- }
- else MPI_Bcast(error_code, 1, MPI_INT,
- fd->hints->ranklist[0], fd->comm);
- if (*error_code != MPI_SUCCESS) {
- goto fn_exit;
- }
- else {
- /* turn off EXCL for real open */
- access_mode = access_mode ^ ADIO_EXCL;
- }
- }
- } else {
-
- /* the actual optimized create on one, open on all */
- if (access_mode & ADIO_CREATE && fd->file_system != ADIO_PVFS2) {
- if(rank == fd->hints->ranklist[0]) {
- /* remove delete_on_close flag if set */
- if (access_mode & ADIO_DELETE_ON_CLOSE)
- fd->access_mode = access_mode ^ ADIO_DELETE_ON_CLOSE;
- else
- fd->access_mode = access_mode;
-
- (*(fd->fns->ADIOI_xxx_Open))(fd, error_code);
- MPI_Bcast(error_code, 1, MPI_INT, \
- fd->hints->ranklist[0], fd->comm);
- /* if no error, close the file and reopen normally below */
- if (*error_code == MPI_SUCCESS)
- (*(fd->fns->ADIOI_xxx_Close))(fd, error_code);
-
- fd->access_mode = access_mode; /* back to original */
- }
- else MPI_Bcast(error_code, 1, MPI_INT, fd->hints->ranklist[0], fd->comm);
-
- if (*error_code != MPI_SUCCESS) {
- goto fn_exit;
- }
- else {
- /* turn off CREAT (and EXCL if set) for real multi-processor open */
- access_mode ^= ADIO_CREATE;
- if (access_mode & ADIO_EXCL)
- access_mode ^= ADIO_EXCL;
- }
- }
- }
-
- /* if we are doing deferred open, non-aggregators should return now */
- if (fd->hints->deferred_open ) {
- if (fd->agg_comm == MPI_COMM_NULL) {
- /* we might have turned off EXCL for the aggregators.
- * restore access_mode that non-aggregators get the right
- * value from get_amode */
- fd->access_mode = orig_amode_excl;
- *error_code = MPI_SUCCESS;
- goto fn_exit;
- }
- }
-
-/* For writing with data sieving, a read-modify-write is needed. If
- the file is opened for write_only, the read will fail. Therefore,
- if write_only, open the file as read_write, but record it as write_only
- in fd, so that get_amode returns the right answer. */
-
- orig_amode_wronly = access_mode;
- if (access_mode & ADIO_WRONLY) {
- access_mode = access_mode ^ ADIO_WRONLY;
- access_mode = access_mode | ADIO_RDWR;
- }
- fd->access_mode = access_mode;
-
- (*(fd->fns->ADIOI_xxx_Open))(fd, error_code);
-
- /* if error, may be it was due to the change in amode above.
- therefore, reopen with access mode provided by the user.*/
- fd->access_mode = orig_amode_wronly;
- if (*error_code != MPI_SUCCESS)
- (*(fd->fns->ADIOI_xxx_Open))(fd, error_code);
-
- /* if we turned off EXCL earlier, then we should turn it back on */
- if (fd->access_mode != orig_amode_excl) fd->access_mode = orig_amode_excl;
-
- /* for deferred open: this process has opened the file (because if we are
- * not an aggregaor and we are doing deferred open, we returned earlier)*/
- fd->is_open = 1;
+ ADIOI_OpenColl(fd, rank, access_mode, error_code);
fn_exit:
MPI_Allreduce(error_code, &max_error_code, 1, MPI_INT, MPI_MAX, comm);
@@ -300,10 +194,18 @@ MPI_File ADIO_Open(MPI_Comm orig_comm,
int is_aggregator(int rank, ADIO_File fd ) {
int i;
- for (i=0; i< fd->hints->cb_nodes; i++ ) {
- if ( rank == fd->hints->ranklist[i] )
- return 1;
+ if (fd->my_cb_nodes_index == -2) {
+ for (i=0; i< fd->hints->cb_nodes; i++ ) {
+ if ( rank == fd->hints->ranklist[i] ) {
+ fd->my_cb_nodes_index = i;
+ return 1;
+ }
+ }
+ fd->my_cb_nodes_index = -1;
}
+ else if (fd->my_cb_nodes_index != -1)
+ return 1;
+
return 0;
}
@@ -369,7 +271,7 @@ static int build_cb_config_list(ADIO_File fd,
/* TEMPORARY -- REMOVE WHEN NO LONGER UPDATING INFO FOR FS-INDEP. */
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", rank_ct);
- MPI_Info_set(fd->info, "cb_nodes", value);
+ ADIOI_Info_set(fd->info, "cb_nodes", value);
ADIOI_Free(value);
}
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_opencoll.c b/ompi/mca/io/romio/romio/adio/common/ad_opencoll.c
new file mode 100644
index 0000000000..243fe84b84
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/common/ad_opencoll.c
@@ -0,0 +1,108 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *
+ * Copyright (C) 2007 UChicago/Argonne LLC
+ * See COPYRIGHT notice in top-level directory.
+ */
+
+#include "adio.h"
+
+/* Generic version of a "collective open". Assumes a "real" underlying
+ * file system (meaning no wonky consistency semantics like NFS).
+ *
+ * optimization: by having just one process create a file, close it,
+ * then have all N processes open it, we can possibly avoid contention
+ * for write locks on a directory for some file systems.
+ *
+ * Happy side-effect: exclusive create (error if file already exists)
+ * just falls out
+ *
+ * Note: this is not a "scalable open" (c.f. "The impact of file systems
+ * on MPI-IO scalability").
+ */
+
+void ADIOI_GEN_OpenColl(ADIO_File fd, int rank,
+ int access_mode, int *error_code)
+{
+ int orig_amode_excl, orig_amode_wronly;
+ MPI_Comm tmp_comm;
+
+ orig_amode_excl = access_mode;
+
+ if (access_mode & ADIO_CREATE ){
+ if(rank == fd->hints->ranklist[0]) {
+ /* remove delete_on_close flag if set */
+ if (access_mode & ADIO_DELETE_ON_CLOSE)
+ fd->access_mode = access_mode ^ ADIO_DELETE_ON_CLOSE;
+ else
+ fd->access_mode = access_mode;
+
+ tmp_comm = fd->comm;
+ fd->comm = MPI_COMM_SELF;
+ (*(fd->fns->ADIOI_xxx_Open))(fd, error_code);
+ fd->comm = tmp_comm;
+ MPI_Bcast(error_code, 1, MPI_INT, \
+ fd->hints->ranklist[0], fd->comm);
+ /* if no error, close the file and reopen normally below */
+ if (*error_code == MPI_SUCCESS)
+ (*(fd->fns->ADIOI_xxx_Close))(fd, error_code);
+
+ fd->access_mode = access_mode; /* back to original */
+ }
+ else MPI_Bcast(error_code, 1, MPI_INT, fd->hints->ranklist[0], fd->comm);
+
+ if (*error_code != MPI_SUCCESS) {
+ return;
+ }
+ else {
+ /* turn off CREAT (and EXCL if set) for real multi-processor open */
+ access_mode ^= ADIO_CREATE;
+ if (access_mode & ADIO_EXCL)
+ access_mode ^= ADIO_EXCL;
+ }
+ }
+
+ /* if we are doing deferred open, non-aggregators should return now */
+ if (fd->hints->deferred_open ) {
+ if (fd->agg_comm == MPI_COMM_NULL) {
+ /* we might have turned off EXCL for the aggregators.
+ * restore access_mode that non-aggregators get the right
+ * value from get_amode */
+ fd->access_mode = orig_amode_excl;
+ *error_code = MPI_SUCCESS;
+ return;
+ }
+ }
+
+/* For writing with data sieving, a read-modify-write is needed. If
+ the file is opened for write_only, the read will fail. Therefore,
+ if write_only, open the file as read_write, but record it as write_only
+ in fd, so that get_amode returns the right answer. */
+
+ orig_amode_wronly = access_mode;
+ if (access_mode & ADIO_WRONLY) {
+ access_mode = access_mode ^ ADIO_WRONLY;
+ access_mode = access_mode | ADIO_RDWR;
+ }
+ fd->access_mode = access_mode;
+
+ (*(fd->fns->ADIOI_xxx_Open))(fd, error_code);
+
+ /* if error, may be it was due to the change in amode above.
+ therefore, reopen with access mode provided by the user.*/
+ fd->access_mode = orig_amode_wronly;
+ if (*error_code != MPI_SUCCESS)
+ (*(fd->fns->ADIOI_xxx_Open))(fd, error_code);
+
+ /* if we turned off EXCL earlier, then we should turn it back on */
+ if (fd->access_mode != orig_amode_excl) fd->access_mode = orig_amode_excl;
+
+ /* for deferred open: this process has opened the file (because if we are
+ * not an aggregaor and we are doing deferred open, we returned earlier)*/
+ fd->is_open = 1;
+
+}
+
+/*
+ * vim: ts=8 sts=4 sw=4 noexpandtab
+ */
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_opencoll_failsafe.c b/ompi/mca/io/romio/romio/adio/common/ad_opencoll_failsafe.c
new file mode 100644
index 0000000000..4dcdfc729d
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/common/ad_opencoll_failsafe.c
@@ -0,0 +1,89 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *
+ * Copyright (C) 2007 UChicago/Argonne LLC
+ * See COPYRIGHT notice in top-level directory.
+ */
+
+#include "adio.h"
+
+
+/* this "collective" open is useful for frankly broken file systems such
+ * as NFS where a create from one client might not be immediately
+ * visible on another */
+
+void ADIOI_FAILSAFE_OpenColl(ADIO_File fd, int rank,
+ int access_mode, int *error_code)
+{
+ int orig_amode_excl, orig_amode_wronly;
+
+ orig_amode_excl = access_mode;
+ if ((access_mode & ADIO_CREATE) && (access_mode & ADIO_EXCL)) {
+ /* the open should fail if the file exists. Only *1* process
+ * should check this. Otherwise, if all processes try to check
+ * and the file does not exist, one process will create the file
+ * and others who reach later will return error. */
+ if(rank == fd->hints->ranklist[0]) {
+ fd->access_mode = access_mode;
+ (*(fd->fns->ADIOI_xxx_Open))(fd, error_code);
+ MPI_Bcast(error_code, 1, MPI_INT, \
+ fd->hints->ranklist[0], fd->comm);
+ /* if no error, close the file and reopen normally below */
+ if (*error_code == MPI_SUCCESS)
+ (*(fd->fns->ADIOI_xxx_Close))(fd, error_code);
+ }
+ else MPI_Bcast(error_code, 1, MPI_INT,
+ fd->hints->ranklist[0], fd->comm);
+ if (*error_code != MPI_SUCCESS) {
+ return;
+ }
+ else {
+ /* turn off EXCL for real open */
+ access_mode = access_mode ^ ADIO_EXCL;
+ }
+ }
+ /* if we are doing deferred open, non-aggregators should return now */
+ if (fd->hints->deferred_open ) {
+ if (fd->agg_comm == MPI_COMM_NULL) {
+ /* we might have turned off EXCL for the aggregators.
+ * restore access_mode that non-aggregators get the right
+ * value from get_amode */
+ fd->access_mode = orig_amode_excl;
+ *error_code = MPI_SUCCESS;
+ return;
+ }
+ }
+
+/* For writing with data sieving, a read-modify-write is needed. If
+ the file is opened for write_only, the read will fail. Therefore,
+ if write_only, open the file as read_write, but record it as write_only
+ in fd, so that get_amode returns the right answer. */
+
+ orig_amode_wronly = access_mode;
+ if (access_mode & ADIO_WRONLY) {
+ access_mode = access_mode ^ ADIO_WRONLY;
+ access_mode = access_mode | ADIO_RDWR;
+ }
+ fd->access_mode = access_mode;
+
+ (*(fd->fns->ADIOI_xxx_Open))(fd, error_code);
+
+ /* if error, may be it was due to the change in amode above.
+ therefore, reopen with access mode provided by the user.*/
+ fd->access_mode = orig_amode_wronly;
+ if (*error_code != MPI_SUCCESS)
+ (*(fd->fns->ADIOI_xxx_Open))(fd, error_code);
+
+ /* if we turned off EXCL earlier, then we should turn it back on */
+ if (fd->access_mode != orig_amode_excl) fd->access_mode = orig_amode_excl;
+
+ /* for deferred open: this process has opened the file (because if we are
+ * not an aggregaor and we are doing deferred open, we returned earlier)*/
+ fd->is_open = 1;
+}
+
+
+
+/*
+ * vim: ts=8 sts=4 sw=4 noexpandtab
+ */
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_opencoll_scalable.c b/ompi/mca/io/romio/romio/adio/common/ad_opencoll_scalable.c
new file mode 100644
index 0000000000..eee8520c6d
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/common/ad_opencoll_scalable.c
@@ -0,0 +1,57 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *
+ * Copyright (C) 2007 UChicago/Argonne LLC
+ * See COPYRIGHT notice in top-level directory.
+ */
+
+#include "adio.h"
+
+/*
+ * Scalable open: for file systems capable of having one process
+ * create/open a file and broadcast the result to everyone else.
+ * - Does not need one process to create the file
+ * - Does not need special handling for CREATE|EXCL
+ */
+void ADIOI_SCALEABLE_OpenColl(ADIO_File fd, int rank,
+ int access_mode, int *error_code)
+{
+ int orig_amode_wronly;
+
+ /* if we are doing deferred open, non-aggregators should return now */
+ if (fd->hints->deferred_open ) {
+ if (fd->agg_comm == MPI_COMM_NULL) {
+ *error_code = MPI_SUCCESS;
+ return;
+ }
+ }
+
+ /* For writing with data sieving, a read-modify-write is needed. If
+ the file is opened for write_only, the read will fail. Therefore,
+ if write_only, open the file as read_write, but record it as
+ write_only in fd, so that get_amode returns the right answer. */
+
+ orig_amode_wronly = access_mode;
+ if (access_mode & ADIO_WRONLY) {
+ access_mode = access_mode ^ ADIO_WRONLY;
+ access_mode = access_mode | ADIO_RDWR;
+ }
+ fd->access_mode = access_mode;
+
+ (*(fd->fns->ADIOI_xxx_Open))(fd, error_code);
+
+ /* if error, may be it was due to the change in amode above.
+ therefore, reopen with access mode provided by the user.*/
+ fd->access_mode = orig_amode_wronly;
+ if (*error_code != MPI_SUCCESS)
+ (*(fd->fns->ADIOI_xxx_Open))(fd, error_code);
+
+ /* for deferred open: this process has opened the file (because if we are
+ * not an aggregaor and we are doing deferred open, we returned earlier)*/
+ fd->is_open = 1;
+
+}
+
+/*
+ * vim: ts=8 sts=4 sw=4 noexpandtab
+ */
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_prealloc.c b/ompi/mca/io/romio/romio/adio/common/ad_prealloc.c
index 4b44ccb3c3..cd18dbb283 100644
--- a/ompi/mca/io/romio/romio/adio/common/ad_prealloc.c
+++ b/ompi/mca/io/romio/romio/adio/common/ad_prealloc.c
@@ -47,7 +47,10 @@ void ADIOI_GEN_Prealloc(ADIO_File fd, ADIO_Offset diskspace, int *error_code)
for (i=0; i
#endif
+#ifdef AGGREGATION_PROFILE
+#include "mpe.h"
+#endif
void ADIOI_GEN_ReadContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status,
int *error_code)
{
- int err = -1, datatype_size, len;
+ int err = -1, datatype_size;
+ ADIO_Offset len;
static char myname[] = "ADIOI_GEN_READCONTIG";
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5034, 0, NULL);
+#endif
MPI_Type_size(datatype, &datatype_size);
- len = datatype_size * count;
+ len = (ADIO_Offset)datatype_size * (ADIO_Offset)count;
+ ADIOI_Assert(len == (unsigned int) len); /* read takes an unsigned int parm */
if (file_ptr_type == ADIO_INDIVIDUAL) {
offset = fd->fp_ind;
@@ -50,7 +58,7 @@ void ADIOI_GEN_ReadContig(ADIO_File fd, void *buf, int count,
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
#endif
- err = read(fd->fd_sys, buf, len);
+ err = read(fd->fd_sys, buf, (unsigned int)len);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
#endif
@@ -77,4 +85,7 @@ void ADIOI_GEN_ReadContig(ADIO_File fd, void *buf, int count,
#endif
*error_code = MPI_SUCCESS;
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5035, 0, NULL);
+#endif
}
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_read_coll.c b/ompi/mca/io/romio/romio/adio/common/ad_read_coll.c
index 303fb53ba5..ddbcd9157d 100644
--- a/ompi/mca/io/romio/romio/adio/common/ad_read_coll.c
+++ b/ompi/mca/io/romio/romio/adio/common/ad_read_coll.c
@@ -8,18 +8,25 @@
#include "adio.h"
#include "adio_extern.h"
+#ifdef USE_DBG_LOGGING
+ #define RDCOLL_DEBUG 1
+#endif
+#ifdef AGGREGATION_PROFILE
+#include "mpe.h"
+#endif
+
/* prototypes of functions used for collective reads only. */
static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype
datatype, int nprocs,
int myrank, ADIOI_Access
*others_req, ADIO_Offset *offset_list,
- int *len_list, int contig_access_count,
+ ADIO_Offset *len_list, int contig_access_count,
ADIO_Offset
min_st_offset, ADIO_Offset fd_size,
ADIO_Offset *fd_start, ADIO_Offset *fd_end,
int *buf_idx, int *error_code);
static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
- *flat_buf, ADIO_Offset *offset_list, int
+ *flat_buf, ADIO_Offset *offset_list, ADIO_Offset
*len_list, int *send_size, int *recv_size,
int *count, int *start_pos,
int *partial_send,
@@ -34,8 +41,8 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
MPI_Aint buftype_extent, int *buf_idx);
static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
*flat_buf, char **recv_buf, ADIO_Offset
- *offset_list, int *len_list,
- int *recv_size,
+ *offset_list, ADIO_Offset *len_list,
+ unsigned *recv_size,
MPI_Request *requests, MPI_Status *statuses,
int *recd_from_proc, int nprocs,
int contig_access_count,
@@ -70,12 +77,20 @@ void ADIOI_GEN_ReadStridedColl(ADIO_File fd, void *buf, int count,
ADIO_Offset start_offset, end_offset, orig_fp, fd_size, min_st_offset, off;
ADIO_Offset *offset_list = NULL, *st_offsets = NULL, *fd_start = NULL,
*fd_end = NULL, *end_offsets = NULL;
- int *len_list = NULL, *buf_idx = NULL;
+ ADIO_Offset *len_list = NULL;
+ int *buf_idx = NULL;
#ifdef HAVE_STATUS_SET_BYTES
int bufsize, size;
#endif
+ if (fd->hints->cb_pfr != ADIOI_HINT_DISABLE) {
+ ADIOI_IOStridedColl (fd, buf, count, ADIOI_READ, datatype,
+ file_ptr_type, offset, status, error_code);
+ return;
+ }
+
+
MPI_Comm_size(fd->comm, &nprocs);
MPI_Comm_rank(fd->comm, &myrank);
@@ -85,24 +100,26 @@ void ADIOI_GEN_ReadStridedColl(ADIO_File fd, void *buf, int count,
/* only check for interleaving if cb_read isn't disabled */
if (fd->hints->cb_read != ADIOI_HINT_DISABLE) {
- /* For this process's request, calculate the list of offsets and
- lengths in the file and determine the start and end offsets. */
+ /* For this process's request, calculate the list of offsets and
+ lengths in the file and determine the start and end offsets. */
- /* Note: end_offset points to the last byte-offset that will be accessed.
- e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/
+ /* Note: end_offset points to the last byte-offset that will be accessed.
+ e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/
ADIOI_Calc_my_off_len(fd, count, datatype, file_ptr_type, offset,
&offset_list, &len_list, &start_offset,
&end_offset, &contig_access_count);
- /* for (i=0; ihints->min_fdomain_size, &fd_size,
+ fd->hints->striping_unit);
/* calculate where the portions of the access requests of this process
* are located in terms of the file domains. this could be on the same
@@ -257,20 +276,30 @@ void ADIOI_GEN_ReadStridedColl(ADIO_File fd, void *buf, int count,
void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype
datatype, int file_ptr_type, ADIO_Offset
- offset, ADIO_Offset **offset_list_ptr, int
+ offset, ADIO_Offset **offset_list_ptr, ADIO_Offset
**len_list_ptr, ADIO_Offset *start_offset_ptr,
ADIO_Offset *end_offset_ptr, int
*contig_access_count_ptr)
{
- int filetype_size, buftype_size, etype_size;
- int i, j, k, frd_size=0, old_frd_size=0, st_index=0;
- int n_filetypes, etype_in_filetype;
+ int filetype_size, etype_size;
+ unsigned buftype_size;
+ int i, j, k;
+ ADIO_Offset i_offset;
+ ADIO_Offset frd_size=0, old_frd_size=0;
+ int st_index=0;
+ ADIO_Offset n_filetypes, etype_in_filetype;
ADIO_Offset abs_off_in_filetype=0;
- int bufsize, sum, n_etypes_in_filetype, size_in_filetype;
- int contig_access_count, *len_list, flag, filetype_is_contig;
+ ADIO_Offset bufsize;
+ ADIO_Offset sum, n_etypes_in_filetype, size_in_filetype;
+ int contig_access_count, filetype_is_contig;
+ ADIO_Offset *len_list;
MPI_Aint filetype_extent, filetype_lb;
ADIOI_Flatlist_node *flat_file;
ADIO_Offset *offset_list, off, end_offset=0, disp;
+
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5028, 0, NULL);
+#endif
/* For this process's request, calculate the list of offsets and
lengths in the file and determine the start and end offsets. */
@@ -280,19 +309,19 @@ void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype
MPI_Type_size(fd->filetype, &filetype_size);
MPI_Type_extent(fd->filetype, &filetype_extent);
MPI_Type_lb(fd->filetype, &filetype_lb);
- MPI_Type_size(datatype, &buftype_size);
+ MPI_Type_size(datatype, (int*)&buftype_size);
etype_size = fd->etype_size;
if ( ! filetype_size ) {
*contig_access_count_ptr = 0;
*offset_list_ptr = (ADIO_Offset *) ADIOI_Malloc(2*sizeof(ADIO_Offset));
- *len_list_ptr = (int *) ADIOI_Malloc(2*sizeof(int));
+ *len_list_ptr = (ADIO_Offset *) ADIOI_Malloc(2*sizeof(ADIO_Offset));
/* 2 is for consistency. everywhere I malloc one more than needed */
offset_list = *offset_list_ptr;
len_list = *len_list_ptr;
offset_list[0] = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
- fd->disp + etype_size * offset;
+ fd->disp + (ADIO_Offset)etype_size * offset;
len_list[0] = 0;
*start_offset_ptr = offset_list[0];
*end_offset_ptr = offset_list[0] + len_list[0] - 1;
@@ -303,14 +332,14 @@ void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype
if (filetype_is_contig) {
*contig_access_count_ptr = 1;
*offset_list_ptr = (ADIO_Offset *) ADIOI_Malloc(2*sizeof(ADIO_Offset));
- *len_list_ptr = (int *) ADIOI_Malloc(2*sizeof(int));
+ *len_list_ptr = (ADIO_Offset *) ADIOI_Malloc(2*sizeof(ADIO_Offset));
/* 2 is for consistency. everywhere I malloc one more than needed */
offset_list = *offset_list_ptr;
len_list = *len_list_ptr;
offset_list[0] = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
- fd->disp + etype_size * offset;
- len_list[0] = bufcount * buftype_size;
+ fd->disp + (ADIO_Offset)etype_size * offset;
+ len_list[0] = (ADIO_Offset)bufcount * (ADIO_Offset)buftype_size;
*start_offset_ptr = offset_list[0];
*end_offset_ptr = offset_list[0] + len_list[0] - 1;
@@ -327,31 +356,47 @@ void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype
while (flat_file->type != fd->filetype) flat_file = flat_file->next;
disp = fd->disp;
+#ifdef RDCOLL_DEBUG
+ {
+ int ii;
+ DBG_FPRINTF(stderr, "flattened %3d : ", flat_file->count );
+ for (ii=0; iicount; ii++) {
+ DBG_FPRINTF(stderr, "%16qd:%-16qd", flat_file->indices[ii], flat_file->blocklens[ii] );
+ }
+ DBG_FPRINTF(stderr, "\n" );
+ }
+#endif
if (file_ptr_type == ADIO_INDIVIDUAL) {
- offset = fd->fp_ind; /* in bytes */
- n_filetypes = -1;
- flag = 0;
- while (!flag) {
- n_filetypes++;
- for (i=0; icount; i++) {
- if (disp + flat_file->indices[i] +
- (ADIO_Offset) n_filetypes*filetype_extent +
- flat_file->blocklens[i] >= offset)
- {
- st_index = i;
- frd_size = (int) (disp + flat_file->indices[i] +
- (ADIO_Offset) n_filetypes*filetype_extent
- + flat_file->blocklens[i] - offset);
- flag = 1;
+ /* Wei-keng reworked type processing to be a bit more efficient */
+ offset = fd->fp_ind - disp;
+ n_filetypes = (offset - flat_file->indices[0]) / filetype_extent;
+ offset -= (ADIO_Offset)n_filetypes * filetype_extent;
+ /* now offset is local to this extent */
+
+ /* find the block where offset is located, skip blocklens[i]==0 */
+ for (i=0; icount; i++) {
+ ADIO_Offset dist;
+ if (flat_file->blocklens[i] == 0) continue;
+ dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
+ /* frd_size is from offset to the end of block i */
+ if (dist == 0) {
+ i++;
+ offset = flat_file->indices[i];
+ frd_size = flat_file->blocklens[i];
break;
- }
+ }
+ if (dist > 0) {
+ frd_size = dist;
+ break;
}
}
- }
+ st_index = i; /* starting index in flat_file->indices[] */
+ offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
+ }
else {
n_etypes_in_filetype = filetype_size/etype_size;
- n_filetypes = (int) (offset / n_etypes_in_filetype);
- etype_in_filetype = (int) (offset % n_etypes_in_filetype);
+ n_filetypes = offset / n_etypes_in_filetype;
+ etype_in_filetype = offset % n_etypes_in_filetype;
size_in_filetype = etype_in_filetype * etype_size;
sum = 0;
@@ -367,29 +412,29 @@ void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype
}
/* abs. offset in bytes in the file */
- offset = disp + (ADIO_Offset) n_filetypes*filetype_extent +
+ offset = disp + n_filetypes* (ADIO_Offset)filetype_extent +
abs_off_in_filetype;
}
/* calculate how much space to allocate for offset_list, len_list */
old_frd_size = frd_size;
- contig_access_count = i = 0;
+ contig_access_count = i_offset = 0;
j = st_index;
- bufsize = buftype_size * bufcount;
+ bufsize = (ADIO_Offset)buftype_size * (ADIO_Offset)bufcount;
frd_size = ADIOI_MIN(frd_size, bufsize);
- while (i < bufsize) {
+ while (i_offset < bufsize) {
if (frd_size) contig_access_count++;
- i += frd_size;
+ i_offset += frd_size;
j = (j + 1) % flat_file->count;
- frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
+ frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
}
/* allocate space for offset_list and len_list */
*offset_list_ptr = (ADIO_Offset *)
ADIOI_Malloc((contig_access_count+1)*sizeof(ADIO_Offset));
- *len_list_ptr = (int *) ADIOI_Malloc((contig_access_count+1)*sizeof(int));
+ *len_list_ptr = (ADIO_Offset *) ADIOI_Malloc((contig_access_count+1)*sizeof(ADIO_Offset));
/* +1 to avoid a 0-size malloc */
offset_list = *offset_list_ptr;
@@ -399,17 +444,17 @@ void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype
*start_offset_ptr = offset; /* calculated above */
- i = k = 0;
+ i_offset = k = 0;
j = st_index;
off = offset;
frd_size = ADIOI_MIN(old_frd_size, bufsize);
- while (i < bufsize) {
+ while (i_offset < bufsize) {
if (frd_size) {
offset_list[k] = off;
len_list[k] = frd_size;
k++;
}
- i += frd_size;
+ i_offset += frd_size;
end_offset = off + frd_size - 1;
/* Note: end_offset points to the last byte-offset that will be accessed.
@@ -417,7 +462,7 @@ void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype
if (off + frd_size < disp + flat_file->indices[j] +
flat_file->blocklens[j] +
- (ADIO_Offset) n_filetypes*filetype_extent)
+ n_filetypes* (ADIO_Offset)filetype_extent)
{
off += frd_size;
/* did not reach end of contiguous block in filetype.
@@ -425,17 +470,17 @@ void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype
*/
}
else {
- if (j < (flat_file->count - 1)) j++;
- else {
- /* hit end of flattened filetype;
- * start at beginning again
- */
- j = 0;
- n_filetypes++;
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ while (flat_file->blocklens[j]==0) {
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ /* hit end of flattened filetype; start at beginning
+ * again */
}
off = disp + flat_file->indices[j] +
- (ADIO_Offset) n_filetypes*filetype_extent;
- frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
+ n_filetypes* (ADIO_Offset)filetype_extent;
+ frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
}
}
@@ -443,15 +488,18 @@ void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
*contig_access_count_ptr = contig_access_count;
- *end_offset_ptr = end_offset;
+ *end_offset_ptr = end_offset;
}
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5029, 0, NULL);
+#endif
}
static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype
datatype, int nprocs,
int myrank, ADIOI_Access
*others_req, ADIO_Offset *offset_list,
- int *len_list, int contig_access_count, ADIO_Offset
+ ADIO_Offset *len_list, int contig_access_count, ADIO_Offset
min_st_offset, ADIO_Offset fd_size,
ADIO_Offset *fd_start, ADIO_Offset *fd_end,
int *buf_idx, int *error_code)
@@ -466,12 +514,14 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype
array from a file, where each local array is 8Mbytes, requiring
at least another 8Mbytes of temp space is unacceptable. */
- int i, j, m, size, ntimes, max_ntimes, buftype_is_contig;
+ int i, j, m, ntimes, max_ntimes, buftype_is_contig;
ADIO_Offset st_loc=-1, end_loc=-1, off, done, real_off, req_off;
char *read_buf = NULL, *tmp_buf;
int *curr_offlen_ptr, *count, *send_size, *recv_size;
- int *partial_send, *recd_from_proc, *start_pos, for_next_iter;
- int real_size, req_len, flag, for_curr_iter, rank;
+ int *partial_send, *recd_from_proc, *start_pos;
+ /* Not convinced end_loc-st_loc couldn't be > int, so make these offsets*/
+ ADIO_Offset real_size, size, for_curr_iter, for_next_iter;
+ int req_len, flag, rank;
MPI_Status status;
ADIOI_Flatlist_node *flat_buf=NULL;
MPI_Aint buftype_extent;
@@ -601,7 +651,7 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype
minus what was satisfied in previous iteration
req_size = size corresponding to req_off */
- size = (int) (ADIOI_MIN(coll_bufsize, end_loc-st_loc+1-done));
+ size = ADIOI_MIN((unsigned)coll_bufsize, end_loc-st_loc+1-done);
real_off = off - for_curr_iter;
real_size = size + for_curr_iter;
@@ -609,7 +659,9 @@ static void ADIOI_Read_and_exch(ADIO_File fd, void *buf, MPI_Datatype
for_next_iter = 0;
for (i=0; icomm, requests+j);
j++;
- /* FPRINTF(stderr, "node %d, recv_size %d, tag %d \n",
- myrank, recv_size[i], myrank+i+100*iter); */
+#ifdef RDCOLL_DEBUG
+ DBG_FPRINTF(stderr, "node %d, recv_size %d, tag %d \n",
+ myrank, recv_size[i], myrank+i+100*iter);
+#endif
}
}
@@ -822,7 +883,7 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
/* if noncontiguous, to the copies from the recv buffers */
if (!buftype_is_contig)
ADIOI_Fill_user_buffer(fd, buf, flat_buf, recv_buf,
- offset_list, len_list, recv_size,
+ offset_list, len_list, (unsigned*)recv_size,
requests, statuses, recd_from_proc,
nprocs, contig_access_count,
min_st_offset, fd_size, fd_start, fd_end,
@@ -840,9 +901,11 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
if (recv_size[i]) ADIOI_Free(recv_buf[i]);
ADIOI_Free(recv_buf);
}
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5033, 0, NULL);
+#endif
}
-
#define ADIOI_BUF_INCR \
{ \
while (buf_incr) { \
@@ -856,7 +919,7 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
n_buftypes++; \
} \
user_buf_idx = flat_buf->indices[flat_buf_idx] + \
- n_buftypes*buftype_extent; \
+ (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \
flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \
} \
buf_incr -= size_in_buf; \
@@ -868,9 +931,11 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
{ \
while (size) { \
size_in_buf = ADIOI_MIN(size, flat_buf_sz); \
+ ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + user_buf_idx) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + user_buf_idx)); \
+ ADIOI_Assert(size_in_buf == (size_t)size_in_buf); \
memcpy(((char *) buf) + user_buf_idx, \
&(recv_buf[p][recv_buf_idx[p]]), size_in_buf); \
- recv_buf_idx[p] += size_in_buf; \
+ recv_buf_idx[p] += size_in_buf; /* already tested (size_t)size_in_buf*/ \
user_buf_idx += size_in_buf; \
flat_buf_sz -= size_in_buf; \
if (!flat_buf_sz) { \
@@ -880,7 +945,7 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
n_buftypes++; \
} \
user_buf_idx = flat_buf->indices[flat_buf_idx] + \
- n_buftypes*buftype_extent; \
+ (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \
flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \
} \
size -= size_in_buf; \
@@ -889,11 +954,10 @@ static void ADIOI_R_Exchange_data(ADIO_File fd, void *buf, ADIOI_Flatlist_node
ADIOI_BUF_INCR \
}
-
static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
*flat_buf, char **recv_buf, ADIO_Offset
- *offset_list, int *len_list,
- int *recv_size,
+ *offset_list, ADIO_Offset *len_list,
+ unsigned *recv_size,
MPI_Request *requests, MPI_Status *statuses,
int *recd_from_proc, int nprocs,
int contig_access_count,
@@ -902,12 +966,15 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
ADIO_Offset *fd_end,
MPI_Aint buftype_extent)
{
+
/* this function is only called if buftype is not contig */
- int i, p, flat_buf_idx, size, buf_incr;
- int flat_buf_sz, size_in_buf, n_buftypes;
+ int i, p, flat_buf_idx;
+ ADIO_Offset flat_buf_sz, size_in_buf, buf_incr, size;
+ int n_buftypes;
ADIO_Offset off, len, rem_len, user_buf_idx;
- int *curr_from_proc, *done_from_proc, *recv_buf_idx;
+ /* Not sure unsigned is necessary, but it makes the math safer */
+ unsigned *curr_from_proc, *done_from_proc, *recv_buf_idx;
ADIOI_UNREFERENCED_ARG(requests);
ADIOI_UNREFERENCED_ARG(statuses);
@@ -918,9 +985,9 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
filled into user buffer in previous iterations
user_buf_idx = current location in user buffer
recv_buf_idx[p] = current location in recv_buf of proc. p */
- curr_from_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int));
- done_from_proc = (int *) ADIOI_Malloc(nprocs * sizeof(int));
- recv_buf_idx = (int *) ADIOI_Malloc(nprocs * sizeof(int));
+ curr_from_proc = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned));
+ done_from_proc = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned));
+ recv_buf_idx = (unsigned *) ADIOI_Malloc(nprocs * sizeof(unsigned));
for (i=0; i < nprocs; i++) {
recv_buf_idx[i] = curr_from_proc[i] = 0;
@@ -938,7 +1005,7 @@ static void ADIOI_Fill_user_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
for (i=0; i done_from_proc[p]) {
if (done_from_proc[p] > curr_from_proc[p]) {
- size = (int)ADIOI_MIN(curr_from_proc[p] + len -
+ size = ADIOI_MIN(curr_from_proc[p] + len -
done_from_proc[p], recv_size[p]-recv_buf_idx[p]);
buf_incr = done_from_proc[p] - curr_from_proc[p];
ADIOI_BUF_INCR
- buf_incr = (int)(curr_from_proc[p]+len-done_from_proc[p]);
+ buf_incr = curr_from_proc[p]+len-done_from_proc[p];
+ ADIOI_Assert((done_from_proc[p] + size) == (unsigned)((ADIO_Offset)done_from_proc[p] + size));
curr_from_proc[p] = done_from_proc[p] + size;
ADIOI_BUF_COPY
}
else {
- size = (int)ADIOI_MIN(len,recv_size[p]-recv_buf_idx[p]);
- buf_incr = (int)len;
- curr_from_proc[p] += size;
+ size = ADIOI_MIN(len,recv_size[p]-recv_buf_idx[p]);
+ buf_incr = len;
+ ADIOI_Assert((curr_from_proc[p] + size) == (unsigned)((ADIO_Offset)curr_from_proc[p] + size));
+ curr_from_proc[p] += (unsigned) size;
ADIOI_BUF_COPY
}
}
else {
- curr_from_proc[p] += (int)len;
- buf_incr = (int)len;
+ ADIOI_Assert((curr_from_proc[p] + len) == (unsigned)((ADIO_Offset)curr_from_proc[p] + len));
+ curr_from_proc[p] += (unsigned) len;
+ buf_incr = len;
ADIOI_BUF_INCR
}
}
else {
- buf_incr = (int)len;
+ buf_incr = len;
ADIOI_BUF_INCR
}
off += len;
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_read_str.c b/ompi/mca/io/romio/romio/adio/common/ad_read_str.c
index a387ab970a..61d3c73abe 100644
--- a/ompi/mca/io/romio/romio/adio/common/ad_read_str.c
+++ b/ompi/mca/io/romio/romio/adio/common/ad_read_str.c
@@ -12,12 +12,13 @@
{ \
if (req_off >= readbuf_off + readbuf_len) { \
readbuf_off = req_off; \
- readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));\
+ readbuf_len = (unsigned) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));\
ADIO_ReadContig(fd, readbuf, readbuf_len, MPI_BYTE, \
ADIO_EXPLICIT_OFFSET, readbuf_off, &status1, error_code); \
if (*error_code != MPI_SUCCESS) return; \
} \
while (req_len > readbuf_off + readbuf_len - req_off) { \
+ ADIOI_Assert((readbuf_off + readbuf_len - req_off) == (int) (readbuf_off + readbuf_len - req_off));\
partial_read = (int) (readbuf_off + readbuf_len - req_off); \
tmp_buf = (char *) ADIOI_Malloc(partial_read); \
memcpy(tmp_buf, readbuf+readbuf_len-partial_read, partial_read); \
@@ -26,13 +27,14 @@
memcpy(readbuf, tmp_buf, partial_read); \
ADIOI_Free(tmp_buf); \
readbuf_off += readbuf_len-partial_read; \
- readbuf_len = (int) (partial_read + ADIOI_MIN(max_bufsize, \
+ readbuf_len = (unsigned) (partial_read + ADIOI_MIN(max_bufsize, \
end_offset-readbuf_off+1)); \
ADIO_ReadContig(fd, readbuf+partial_read, readbuf_len-partial_read, \
MPI_BYTE, ADIO_EXPLICIT_OFFSET, readbuf_off+partial_read, \
&status1, error_code); \
if (*error_code != MPI_SUCCESS) return; \
} \
+ ADIOI_Assert(req_len == (size_t)req_len); \
memcpy((char *)buf + userbuf_off, readbuf+req_off-readbuf_off, req_len); \
}
@@ -42,21 +44,25 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count,
ADIO_Offset offset, ADIO_Status *status, int
*error_code)
{
+
+
/* offset is in units of etype relative to the filetype. */
ADIOI_Flatlist_node *flat_buf, *flat_file;
- int i, j, k, brd_size, frd_size=0, st_index=0;
- int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype;
- int n_filetypes, etype_in_filetype;
- ADIO_Offset abs_off_in_filetype=0;
- int filetype_size, etype_size, buftype_size, req_len, partial_read;
+ ADIO_Offset i_offset, new_brd_size, brd_size, size;
+ int i, j, k, st_index=0;
+ unsigned num, bufsize;
+ int n_etypes_in_filetype;
+ ADIO_Offset n_filetypes, etype_in_filetype, st_n_filetypes, size_in_filetype;
+ ADIO_Offset abs_off_in_filetype=0, new_frd_size, frd_size=0, st_frd_size;
+ int filetype_size, etype_size, buftype_size, partial_read;
MPI_Aint filetype_extent, buftype_extent;
int buf_count, buftype_is_contig, filetype_is_contig;
- ADIO_Offset userbuf_off;
+ ADIO_Offset userbuf_off, req_len, sum;
ADIO_Offset off, req_off, disp, end_offset=0, readbuf_off, start_off;
char *readbuf, *tmp_buf, *value;
- int flag, st_frd_size, st_n_filetypes, readbuf_len;
- int new_brd_size, new_frd_size, info_flag, max_bufsize;
+ int info_flag;
+ unsigned max_bufsize, readbuf_len;
ADIO_Status status1;
if (fd->hints->ds_read == ADIOI_HINT_DISABLE) {
@@ -90,12 +96,13 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count,
MPI_Type_extent(datatype, &buftype_extent);
etype_size = fd->etype_size;
+ ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count));
bufsize = buftype_size * count;
/* get max_bufsize from the info object. */
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
- MPI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value,
+ ADIOI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value,
&info_flag);
max_bufsize = atoi(value);
ADIOI_Free(value);
@@ -110,16 +117,16 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count,
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
- fd->disp + etype_size * offset;
+ fd->disp + (ADIO_Offset)etype_size * offset;
start_off = off;
end_offset = off + bufsize - 1;
readbuf_off = off;
readbuf = (char *) ADIOI_Malloc(max_bufsize);
- readbuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));
+ readbuf_len = (unsigned) (ADIOI_MIN(max_bufsize, end_offset-readbuf_off+1));
/* if atomicity is true, lock (exclusive) the region to be accessed */
- if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && (fd->file_system != ADIO_PVFS))
+ if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
ADIO_ReadContig(fd, readbuf, readbuf_len, MPI_BYTE,
@@ -127,15 +134,17 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count,
if (*error_code != MPI_SUCCESS) return;
for (j=0; jcount; i++) {
- userbuf_off = j*buftype_extent + flat_buf->indices[i];
- req_off = off;
- req_len = flat_buf->blocklens[i];
- ADIOI_BUFFERED_READ
- off += flat_buf->blocklens[i];
- }
+ {
+ for (i=0; icount; i++) {
+ userbuf_off = (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i];
+ req_off = off;
+ req_len = flat_buf->blocklens[i];
+ ADIOI_BUFFERED_READ
+ off += flat_buf->blocklens[i];
+ }
+ }
- if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && (fd->file_system != ADIO_PVFS))
+ if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
@@ -151,29 +160,36 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count,
disp = fd->disp;
if (file_ptr_type == ADIO_INDIVIDUAL) {
- offset = fd->fp_ind; /* in bytes */
- n_filetypes = -1;
- flag = 0;
- while (!flag) {
- n_filetypes++;
- for (i=0; icount; i++) {
- if (disp + flat_file->indices[i] +
- (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i]
- >= offset) {
- st_index = i;
- frd_size = (int) (disp + flat_file->indices[i] +
- (ADIO_Offset) n_filetypes*filetype_extent
- + flat_file->blocklens[i] - offset);
- flag = 1;
- break;
- }
+ /* Wei-keng reworked type processing to be a bit more efficient */
+ offset = fd->fp_ind - disp;
+ n_filetypes = (offset - flat_file->indices[0]) / filetype_extent;
+ offset -= (ADIO_Offset)n_filetypes * filetype_extent;
+ /* now offset is local to this extent */
+
+ /* find the block where offset is located, skip blocklens[i]==0 */
+ for (i=0; icount; i++) {
+ ADIO_Offset dist;
+ if (flat_file->blocklens[i] == 0) continue;
+ dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
+ /* frd_size is from offset to the end of block i */
+ if (dist == 0) {
+ i++;
+ offset = flat_file->indices[i];
+ frd_size = flat_file->blocklens[i];
+ break;
+ }
+ if (dist > 0) {
+ frd_size = dist;
+ break;
}
}
- }
+ st_index = i; /* starting index in flat_file->indices[] */
+ offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
+ }
else {
n_etypes_in_filetype = filetype_size/etype_size;
- n_filetypes = (int) (offset / n_etypes_in_filetype);
- etype_in_filetype = (int) (offset % n_etypes_in_filetype);
+ n_filetypes = offset / n_etypes_in_filetype;
+ etype_in_filetype = offset % n_etypes_in_filetype;
size_in_filetype = etype_in_filetype * etype_size;
sum = 0;
@@ -189,36 +205,67 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count,
}
/* abs. offset in bytes in the file */
- offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
+ offset = disp + (ADIO_Offset) n_filetypes*filetype_extent +
+ abs_off_in_filetype;
}
start_off = offset;
+ /* Wei-keng Liao: read request is within a single flat_file contig
+ * block e.g. with subarray types that actually describe the whole
+ * array */
+ if (buftype_is_contig && bufsize <= frd_size) {
+ ADIO_ReadContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
+ offset, status, error_code);
+
+ if (file_ptr_type == ADIO_INDIVIDUAL) {
+ /* update MPI-IO file pointer to point to the first byte that
+ * can be accessed in the fileview. */
+ fd->fp_ind = offset + bufsize;
+ if (bufsize == frd_size) {
+ do {
+ st_index++;
+ if (st_index == flat_file->count) {
+ st_index = 0;
+ n_filetypes++;
+ }
+ } while (flat_file->blocklens[st_index] == 0);
+ fd->fp_ind = disp + flat_file->indices[st_index]
+ + n_filetypes*filetype_extent;
+ }
+ }
+ fd->fp_sys_posn = -1; /* set it to null. */
+#ifdef HAVE_STATUS_SET_BYTES
+ MPIR_Status_set_bytes(status, datatype, bufsize);
+#endif
+ return;
+ }
+
/* Calculate end_offset, the last byte-offset that will be accessed.
e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/
st_frd_size = frd_size;
st_n_filetypes = n_filetypes;
- i = 0;
+ i_offset = 0;
j = st_index;
off = offset;
frd_size = ADIOI_MIN(st_frd_size, bufsize);
- while (i < bufsize) {
- i += frd_size;
+ while (i_offset < bufsize) {
+ i_offset += frd_size;
end_offset = off + frd_size - 1;
- if (j < (flat_file->count - 1)) j++;
- else {
- j = 0;
- n_filetypes++;
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ while (flat_file->blocklens[j]==0) {
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
}
-
- off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent;
- frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
+ off = disp + flat_file->indices[j] + n_filetypes*(ADIO_Offset)filetype_extent;
+ frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
}
/* if atomicity is true, lock (exclusive) the region to be accessed */
- if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && (fd->file_system != ADIO_PVFS))
+ if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
readbuf_off = 0;
@@ -230,12 +277,12 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count,
/* contiguous in memory, noncontiguous in file. should be the most
common case. */
- i = 0;
+ i_offset = 0;
j = st_index;
off = offset;
n_filetypes = st_n_filetypes;
frd_size = ADIOI_MIN(st_frd_size, bufsize);
- while (i < bufsize) {
+ while (i_offset < bufsize) {
if (frd_size) {
/* TYPE_UB and TYPE_LB can result in
frd_size = 0. save system call in such cases */
@@ -244,25 +291,26 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count,
req_off = off;
req_len = frd_size;
- userbuf_off = i;
+ userbuf_off = i_offset;
ADIOI_BUFFERED_READ
}
- i += frd_size;
+ i_offset += frd_size;
if (off + frd_size < disp + flat_file->indices[j] +
- flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent)
+ flat_file->blocklens[j] + n_filetypes*(ADIO_Offset)filetype_extent)
off += frd_size;
/* did not reach end of contiguous block in filetype.
no more I/O needed. off is incremented by frd_size. */
else {
- if (j < (flat_file->count - 1)) j++;
- else {
- j = 0;
- n_filetypes++;
- }
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ while (flat_file->blocklens[j]==0) {
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ }
off = disp + flat_file->indices[j] +
- (ADIO_Offset) n_filetypes*filetype_extent;
- frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
+ n_filetypes*(ADIO_Offset)filetype_extent;
+ frd_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
}
}
}
@@ -274,7 +322,7 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count,
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
k = num = buf_count = 0;
- i = (int) (flat_buf->indices[0]);
+ i_offset = flat_buf->indices[0];
j = st_index;
off = offset;
n_filetypes = st_n_filetypes;
@@ -289,7 +337,7 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count,
req_off = off;
req_len = size;
- userbuf_off = i;
+ userbuf_off = i_offset;
ADIOI_BUFFERED_READ
}
@@ -298,18 +346,18 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count,
if (size == frd_size) {
/* reached end of contiguous block in file */
- if (j < (flat_file->count - 1)) j++;
- else {
- j = 0;
- n_filetypes++;
- }
-
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ while (flat_file->blocklens[j]==0) {
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ }
off = disp + flat_file->indices[j] +
- (ADIO_Offset) n_filetypes*filetype_extent;
+ n_filetypes*(ADIO_Offset)filetype_extent;
new_frd_size = flat_file->blocklens[j];
if (size != brd_size) {
- i += size;
+ i_offset += size;
new_brd_size -= size;
}
}
@@ -319,7 +367,7 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count,
k = (k + 1)%flat_buf->count;
buf_count++;
- i = (int) (buftype_extent*(buf_count/flat_buf->count) +
+ i_offset = ((ADIO_Offset)buftype_extent*(ADIO_Offset)(buf_count/flat_buf->count) +
flat_buf->indices[k]);
new_brd_size = flat_buf->blocklens[k];
if (size != frd_size) {
@@ -327,13 +375,14 @@ void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count,
new_frd_size -= size;
}
}
+ ADIOI_Assert(((ADIO_Offset)num + size) == (unsigned)(num + size));
num += size;
frd_size = new_frd_size;
brd_size = new_brd_size;
}
}
- if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) && (fd->file_system != ADIO_PVFS))
+ if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off;
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_read_str_naive.c b/ompi/mca/io/romio/romio/adio/common/ad_read_str_naive.c
index 26d00e3e13..e2d74c4064 100644
--- a/ompi/mca/io/romio/romio/adio/common/ad_read_str_naive.c
+++ b/ompi/mca/io/romio/romio/adio/common/ad_read_str_naive.c
@@ -16,11 +16,13 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count,
/* offset is in units of etype relative to the filetype. */
ADIOI_Flatlist_node *flat_buf, *flat_file;
- int brd_size, frd_size=0, b_index;
- int bufsize, size, sum, n_etypes_in_filetype, size_in_filetype;
- int n_filetypes, etype_in_filetype;
+ ADIO_Offset size, brd_size, frd_size=0, req_len, sum;
+ int b_index;
+ int n_etypes_in_filetype;
+ ADIO_Offset n_filetypes, etype_in_filetype;
ADIO_Offset abs_off_in_filetype=0;
- int filetype_size, etype_size, buftype_size, req_len;
+ unsigned bufsize, filetype_size, buftype_size, size_in_filetype;
+ int etype_size;
MPI_Aint filetype_extent, buftype_extent;
int buf_count, buftype_is_contig, filetype_is_contig;
ADIO_Offset userbuf_off;
@@ -32,17 +34,18 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count,
ADIOI_Datatype_iscontig(buftype, &buftype_is_contig);
ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
- MPI_Type_size(fd->filetype, &filetype_size);
+ MPI_Type_size(fd->filetype, (int*)&filetype_size);
if ( ! filetype_size ) {
*error_code = MPI_SUCCESS;
return;
}
MPI_Type_extent(fd->filetype, &filetype_extent);
- MPI_Type_size(buftype, &buftype_size);
+ MPI_Type_size(buftype,(int*) &buftype_size);
MPI_Type_extent(buftype, &buftype_extent);
etype_size = fd->etype_size;
+ ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)buftype_size * (ADIO_Offset)count));
bufsize = buftype_size * count;
/* contiguous in buftype and filetype is handled elsewhere */
@@ -62,8 +65,7 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count,
end_offset = off + bufsize - 1;
/* if atomicity is true, lock (exclusive) the region to be accessed */
- if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) &&
- (fd->file_system != ADIO_PVFS) && (fd->file_system != ADIO_PVFS2))
+ if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
{
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
}
@@ -73,11 +75,13 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count,
*/
for (b_count=0; b_count < count; b_count++) {
for (b_index=0; b_index < flat_buf->count; b_index++) {
- userbuf_off = b_count*buftype_extent +
+ userbuf_off = (ADIO_Offset)b_count*(ADIO_Offset)buftype_extent +
flat_buf->indices[b_index];
req_off = off;
req_len = flat_buf->blocklens[b_index];
+ ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + userbuf_off) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + userbuf_off));
+ ADIOI_Assert(req_len == (int) req_len);
ADIO_ReadContig(fd,
(char *) buf + userbuf_off,
req_len,
@@ -93,8 +97,7 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count,
}
}
- if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) &&
- (fd->file_system != ADIO_PVFS) && (fd->file_system != ADIO_PVFS2))
+ if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
{
ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
}
@@ -104,7 +107,9 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count,
}
else { /* noncontiguous in file */
- int f_index, st_frd_size, st_index = 0, st_n_filetypes;
+ int f_index, st_index = 0;
+ ADIO_Offset st_n_filetypes;
+ ADIO_Offset st_frd_size;
int flag;
/* First we're going to calculate a set of values for use in all
@@ -134,15 +139,15 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count,
n_filetypes++;
for (f_index=0; f_index < flat_file->count; f_index++) {
if (disp + flat_file->indices[f_index] +
- (ADIO_Offset) n_filetypes*filetype_extent +
+ n_filetypes*(ADIO_Offset)filetype_extent +
flat_file->blocklens[f_index] >= start_off)
{
/* this block contains our starting position */
st_index = f_index;
- frd_size = (int) (disp + flat_file->indices[f_index] +
- (ADIO_Offset) n_filetypes*filetype_extent +
- flat_file->blocklens[f_index] - start_off);
+ frd_size = disp + flat_file->indices[f_index] +
+ n_filetypes*(ADIO_Offset)filetype_extent +
+ flat_file->blocklens[f_index] - start_off;
flag = 1;
break;
}
@@ -151,9 +156,9 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count,
}
else {
n_etypes_in_filetype = filetype_size/etype_size;
- n_filetypes = (int) (offset / n_etypes_in_filetype);
+ n_filetypes = offset / n_etypes_in_filetype;
etype_in_filetype = (int) (offset % n_etypes_in_filetype);
- size_in_filetype = etype_in_filetype * etype_size;
+ size_in_filetype = (unsigned)etype_in_filetype * (unsigned)etype_size;
sum = 0;
for (f_index=0; f_index < flat_file->count; f_index++) {
@@ -169,7 +174,7 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count,
}
/* abs. offset in bytes in the file */
- start_off = disp + (ADIO_Offset) n_filetypes*filetype_extent +
+ start_off = disp + n_filetypes*(ADIO_Offset)filetype_extent +
abs_off_in_filetype;
}
@@ -198,9 +203,9 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count,
}
off = disp + flat_file->indices[f_index] +
- (ADIO_Offset) n_filetypes*filetype_extent;
+ n_filetypes*(ADIO_Offset)filetype_extent;
frd_size = ADIOI_MIN(flat_file->blocklens[f_index],
- bufsize-(int)userbuf_off);
+ bufsize-(unsigned)userbuf_off);
}
/* End of calculations. At this point the following values have
@@ -213,8 +218,7 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count,
*/
/* if atomicity is true, lock (exclusive) the region to be accessed */
- if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) &&
- (fd->file_system != ADIO_PVFS) && (fd->file_system != ADIO_PVFS2))
+ if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
{
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
}
@@ -238,6 +242,8 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count,
req_off = off;
req_len = frd_size;
+ ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + userbuf_off) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + userbuf_off));
+ ADIOI_Assert(req_len == (int) req_len);
ADIO_ReadContig(fd,
(char *) buf + userbuf_off,
req_len,
@@ -252,7 +258,7 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count,
if (off + frd_size < disp + flat_file->indices[f_index] +
flat_file->blocklens[f_index] +
- (ADIO_Offset) n_filetypes*filetype_extent)
+ n_filetypes*(ADIO_Offset)filetype_extent)
{
/* important that this value be correct, as it is
* used to set the offset in the fd near the end of
@@ -270,14 +276,14 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count,
n_filetypes++;
}
off = disp + flat_file->indices[f_index] +
- (ADIO_Offset) n_filetypes*filetype_extent;
+ n_filetypes*(ADIO_Offset)filetype_extent;
frd_size = ADIOI_MIN(flat_file->blocklens[f_index],
- bufsize-(int)userbuf_off);
+ bufsize-(unsigned)userbuf_off);
}
}
}
else {
- int i, tmp_bufsize = 0;
+ ADIO_Offset i_offset, tmp_bufsize = 0;
/* noncontiguous in memory as well as in file */
ADIOI_Flatten_datatype(buftype);
@@ -285,7 +291,7 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count,
while (flat_buf->type != buftype) flat_buf = flat_buf->next;
b_index = buf_count = 0;
- i = (int) (flat_buf->indices[0]);
+ i_offset = flat_buf->indices[0];
f_index = st_index;
off = start_off;
n_filetypes = st_n_filetypes;
@@ -294,14 +300,16 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count,
/* while we haven't read size * count bytes, keep going */
while (tmp_bufsize < bufsize) {
- int new_brd_size = brd_size, new_frd_size = frd_size;
+ ADIO_Offset new_brd_size = brd_size, new_frd_size = frd_size;
size = ADIOI_MIN(frd_size, brd_size);
if (size) {
req_off = off;
req_len = size;
- userbuf_off = i;
+ userbuf_off = i_offset;
+ ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + userbuf_off) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + userbuf_off));
+ ADIOI_Assert(req_len == (int) req_len);
ADIO_ReadContig(fd,
(char *) buf + userbuf_off,
req_len,
@@ -322,11 +330,11 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count,
}
off = disp + flat_file->indices[f_index] +
- (ADIO_Offset) n_filetypes*filetype_extent;
+ n_filetypes*(ADIO_Offset)filetype_extent;
new_frd_size = flat_file->blocklens[f_index];
if (size != brd_size) {
- i += size;
+ i_offset += size;
new_brd_size -= size;
}
}
@@ -336,8 +344,8 @@ void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count,
b_index = (b_index + 1)%flat_buf->count;
buf_count++;
- i = (int) (buftype_extent*(buf_count/flat_buf->count) +
- flat_buf->indices[b_index]);
+ i_offset = buftype_extent*(buf_count/flat_buf->count) +
+ flat_buf->indices[b_index];
new_brd_size = flat_buf->blocklens[b_index];
if (size != frd_size) {
off += size;
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_resize.c b/ompi/mca/io/romio/romio/adio/common/ad_resize.c
index 18e1d1ff17..bf38296c58 100644
--- a/ompi/mca/io/romio/romio/adio/common/ad_resize.c
+++ b/ompi/mca/io/romio/romio/adio/common/ad_resize.c
@@ -20,7 +20,8 @@ void ADIOI_GEN_Resize(ADIO_File fd, ADIO_Offset size, int *error_code)
/* first aggregator performs ftruncate() */
if (rank == fd->hints->ranklist[0]) {
- err = ftruncate(fd->fd_sys, size);
+ ADIOI_Assert(size == (off_t) size);
+ err = ftruncate(fd->fd_sys, (off_t)size);
}
/* bcast return value */
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_seek.c b/ompi/mca/io/romio/romio/adio/common/ad_seek.c
index 18482f065c..2fc19c54f4 100644
--- a/ompi/mca/io/romio/romio/adio/common/ad_seek.c
+++ b/ompi/mca/io/romio/romio/adio/common/ad_seek.c
@@ -22,10 +22,12 @@ ADIO_Offset ADIOI_GEN_SeekIndividual(ADIO_File fd, ADIO_Offset offset,
ADIO_Offset off;
ADIOI_Flatlist_node *flat_file;
- int i, n_etypes_in_filetype, n_filetypes, etype_in_filetype;
+ int i;
+ ADIO_Offset n_etypes_in_filetype, n_filetypes, etype_in_filetype;
ADIO_Offset abs_off_in_filetype=0;
- int size_in_filetype, sum;
- int filetype_size, etype_size, filetype_is_contig;
+ ADIO_Offset size_in_filetype, sum;
+ unsigned filetype_size;
+ int etype_size, filetype_is_contig;
MPI_Aint filetype_extent;
ADIOI_UNREFERENCED_ARG(whence);
@@ -33,13 +35,13 @@ ADIO_Offset ADIOI_GEN_SeekIndividual(ADIO_File fd, ADIO_Offset offset,
ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
etype_size = fd->etype_size;
- if (filetype_is_contig) off = fd->disp + etype_size * offset;
+ if (filetype_is_contig) off = fd->disp + (ADIO_Offset)etype_size * offset;
else {
flat_file = ADIOI_Flatlist;
while (flat_file->type != fd->filetype) flat_file = flat_file->next;
MPI_Type_extent(fd->filetype, &filetype_extent);
- MPI_Type_size(fd->filetype, &filetype_size);
+ MPI_Type_size(fd->filetype, (int*)&filetype_size);
if ( ! filetype_size ) {
/* Since offset relative to the filetype size, we can't
do compute the offset when that result is zero.
@@ -49,8 +51,8 @@ ADIO_Offset ADIOI_GEN_SeekIndividual(ADIO_File fd, ADIO_Offset offset,
}
n_etypes_in_filetype = filetype_size/etype_size;
- n_filetypes = (int) (offset / n_etypes_in_filetype);
- etype_in_filetype = (int) (offset % n_etypes_in_filetype);
+ n_filetypes = offset / n_etypes_in_filetype;
+ etype_in_filetype = offset % n_etypes_in_filetype;
size_in_filetype = etype_in_filetype * etype_size;
sum = 0;
@@ -64,7 +66,7 @@ ADIO_Offset ADIOI_GEN_SeekIndividual(ADIO_File fd, ADIO_Offset offset,
}
/* abs. offset in bytes in the file */
- off = fd->disp + (ADIO_Offset) n_filetypes * filetype_extent +
+ off = fd->disp + n_filetypes * filetype_extent +
abs_off_in_filetype;
}
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_subarray.c b/ompi/mca/io/romio/romio/adio/common/ad_subarray.c
index 4f99ae2535..f9a32e3c02 100644
--- a/ompi/mca/io/romio/romio/adio/common/ad_subarray.c
+++ b/ompi/mca/io/romio/romio/adio/common/ad_subarray.c
@@ -32,9 +32,9 @@ int ADIO_Type_create_subarray(int ndims,
array_of_subsizes[0],
array_of_sizes[0], oldtype, &tmp1);
- size = array_of_sizes[0]*extent;
+ size = (MPI_Aint)array_of_sizes[0]*extent;
for (i=2; i=0; i--) {
- size *= array_of_sizes[i+1];
+ size *= (MPI_Aint)array_of_sizes[i+1];
MPI_Type_hvector(array_of_subsizes[i], 1, size, tmp1, &tmp2);
MPI_Type_free(&tmp1);
tmp1 = tmp2;
@@ -74,15 +74,15 @@ int ADIO_Type_create_subarray(int ndims,
disps[1] = array_of_starts[ndims-1];
size = 1;
for (i=ndims-2; i>=0; i--) {
- size *= array_of_sizes[i+1];
- disps[1] += size*array_of_starts[i];
+ size *= (MPI_Aint)array_of_sizes[i+1];
+ disps[1] += size*(MPI_Aint)array_of_starts[i];
}
}
disps[1] *= extent;
disps[2] = extent;
- for (i=0; i
#endif
+#ifdef AGGREGATION_PROFILE
+#include "mpe.h"
+#endif
void ADIOI_GEN_WriteContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status,
int *error_code)
{
- int err = -1, datatype_size, len;
+ int err = -1, datatype_size;
+ ADIO_Offset len;
static char myname[] = "ADIOI_GEN_WRITECONTIG";
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5036, 0, NULL);
+#endif
+
MPI_Type_size(datatype, &datatype_size);
- len = datatype_size * count;
+ len = (ADIO_Offset)datatype_size * (ADIO_Offset)count;
+ ADIOI_Assert(len == (unsigned int) len); /* read takes an unsigned int parm */
if (file_ptr_type == ADIO_INDIVIDUAL) {
offset = fd->fp_ind;
@@ -50,7 +59,7 @@ void ADIOI_GEN_WriteContig(ADIO_File fd, void *buf, int count,
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
- err = write(fd->fd_sys, buf, len);
+ err = write(fd->fd_sys, buf, (unsigned int)len);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
@@ -77,4 +86,7 @@ void ADIOI_GEN_WriteContig(ADIO_File fd, void *buf, int count,
#endif
*error_code = MPI_SUCCESS;
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5037, 0, NULL);
+#endif
}
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_write_coll.c b/ompi/mca/io/romio/romio/adio/common/ad_write_coll.c
index f71ec67860..c547b2a367 100644
--- a/ompi/mca/io/romio/romio/adio/common/ad_write_coll.c
+++ b/ompi/mca/io/romio/romio/adio/common/ad_write_coll.c
@@ -8,18 +8,22 @@
#include "adio.h"
#include "adio_extern.h"
+#ifdef AGGREGATION_PROFILE
+#include "mpe.h"
+#endif
+
/* prototypes of functions used for collective writes only. */
static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
datatype, int nprocs, int myrank,
ADIOI_Access
*others_req, ADIO_Offset *offset_list,
- int *len_list, int contig_access_count, ADIO_Offset
+ ADIO_Offset *len_list, int contig_access_count, ADIO_Offset
min_st_offset, ADIO_Offset fd_size,
ADIO_Offset *fd_start, ADIO_Offset *fd_end,
int *buf_idx, int *error_code);
static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
ADIOI_Flatlist_node *flat_buf, ADIO_Offset
- *offset_list, int *len_list, int *send_size,
+ *offset_list, ADIO_Offset *len_list, int *send_size,
int *recv_size, ADIO_Offset off, int size,
int *count, int *start_pos, int *partial_recv,
int *sent_to_proc, int nprocs,
@@ -33,7 +37,7 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
MPI_Aint buftype_extent, int *buf_idx, int *error_code);
static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
*flat_buf, char **send_buf, ADIO_Offset
- *offset_list, int *len_list, int *send_size,
+ *offset_list, ADIO_Offset *len_list, int *send_size,
MPI_Request *requests, int *sent_to_proc,
int nprocs, int myrank,
int contig_access_count, ADIO_Offset
@@ -42,7 +46,7 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
int *send_buf_idx, int *curr_to_proc,
int *done_to_proc, int iter,
MPI_Aint buftype_extent);
-static void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count,
+void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count,
ADIO_Offset *srt_off, int *srt_len, int *start_pos,
int nprocs, int nprocs_recv, int total_elements);
@@ -72,9 +76,15 @@ void ADIOI_GEN_WriteStridedColl(ADIO_File fd, void *buf, int count,
ADIO_Offset orig_fp, start_offset, end_offset, fd_size, min_st_offset, off;
ADIO_Offset *offset_list = NULL, *st_offsets = NULL, *fd_start = NULL,
*fd_end = NULL, *end_offsets = NULL;
- int *buf_idx = NULL, *len_list = NULL;
+ int *buf_idx = NULL;
+ ADIO_Offset *len_list = NULL;
int old_error, tmp_error;
+ if (fd->hints->cb_pfr != ADIOI_HINT_DISABLE) {
+ ADIOI_IOStridedColl (fd, buf, count, ADIOI_WRITE, datatype,
+ file_ptr_type, offset, status, error_code);
+ return;
+ }
MPI_Comm_size(fd->comm, &nprocs);
MPI_Comm_rank(fd->comm, &myrank);
@@ -136,7 +146,7 @@ void ADIOI_GEN_WriteStridedColl(ADIO_File fd, void *buf, int count,
if (buftype_is_contig && filetype_is_contig) {
if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
- off = fd->disp + (fd->etype_size) * offset;
+ off = fd->disp + (ADIO_Offset)(fd->etype_size) * offset;
ADIO_WriteContig(fd, buf, count, datatype,
ADIO_EXPLICIT_OFFSET,
off, status, error_code);
@@ -156,7 +166,9 @@ void ADIOI_GEN_WriteStridedColl(ADIO_File fd, void *buf, int count,
ADIOI_Calc_file_domains(st_offsets, end_offsets, nprocs,
nprocs_for_coll, &min_st_offset,
- &fd_start, &fd_end, &fd_size);
+ &fd_start, &fd_end,
+ fd->hints->min_fdomain_size, &fd_size,
+ fd->hints->striping_unit);
/* calculate what portions of the access requests of this process are
@@ -225,6 +237,9 @@ void ADIOI_GEN_WriteStridedColl(ADIO_File fd, void *buf, int count,
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_postwrite_b, 0, NULL );
#endif
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5012, 0, NULL);
+#endif
if ( (old_error != MPI_SUCCESS) && (old_error != MPI_ERR_IO) )
*error_code = old_error;
@@ -264,6 +279,9 @@ void ADIOI_GEN_WriteStridedColl(ADIO_File fd, void *buf, int count,
#endif
fd->fp_sys_posn = -1; /* set it to null. */
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5013, 0, NULL);
+#endif
}
@@ -276,9 +294,8 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
int myrank,
ADIOI_Access
*others_req, ADIO_Offset *offset_list,
- int *len_list, int contig_access_count,
- ADIO_Offset
- min_st_offset, ADIO_Offset fd_size,
+ ADIO_Offset *len_list, int contig_access_count,
+ ADIO_Offset min_st_offset, ADIO_Offset fd_size,
ADIO_Offset *fd_start, ADIO_Offset *fd_end,
int *buf_idx, int *error_code)
{
@@ -291,7 +308,9 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
array to a file, where each local array is 8Mbytes, requiring
at least another 8Mbytes of temp space is unacceptable. */
- int hole, i, j, m, size=0, ntimes, max_ntimes, buftype_is_contig;
+ /* Not convinced end_loc-st_loc couldn't be > int, so make these offsets*/
+ ADIO_Offset size=0;
+ int hole, i, j, m, ntimes, max_ntimes, buftype_is_contig;
ADIO_Offset st_loc=-1, end_loc=-1, off, done, req_off;
char *write_buf=NULL;
int *curr_offlen_ptr, *count, *send_size, req_len, *recv_size;
@@ -312,7 +331,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
That gives the no. of communication phases as well. */
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
- MPI_Info_get(fd->info, "cb_buffer_size", MPI_MAX_INFO_VAL, value,
+ ADIOI_Info_get(fd->info, "cb_buffer_size", MPI_MAX_INFO_VAL, value,
&info_flag);
coll_bufsize = atoi(value);
ADIOI_Free(value);
@@ -421,7 +440,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
for (i=0; i < nprocs; i++) count[i] = recv_size[i] = 0;
- size = (int) (ADIOI_MIN(coll_bufsize, end_loc-st_loc+1-done));
+ size = ADIOI_MIN((unsigned)coll_bufsize, end_loc-st_loc+1-done);
for (i=0; i < nprocs; i++) {
if (others_req[i].count) {
@@ -445,12 +464,14 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
}
if (req_off < off + size) {
count[i]++;
+ ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)write_buf)+req_off-off) == (ADIO_Offset)(MPIR_Upint)(write_buf+req_off-off));
MPI_Address(write_buf+req_off-off,
&(others_req[i].mem_ptrs[j]));
- recv_size[i] += (int)(ADIOI_MIN(off + (ADIO_Offset)size -
- req_off, req_len));
+ ADIOI_Assert((off + size - req_off) == (int)(off + size - req_off));
+ recv_size[i] += (int)(ADIOI_MIN(off + size - req_off,
+ (unsigned)req_len));
- if (off+size-req_off < req_len)
+ if (off+size-req_off < (unsigned)req_len)
{
partial_recv[i] = (int) (off + size - req_off);
@@ -494,7 +515,8 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
if (count[i]) flag = 1;
if (flag) {
- ADIO_WriteContig(fd, write_buf, size, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
+ ADIOI_Assert(size == (int)size);
+ ADIO_WriteContig(fd, write_buf, (int)size, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
off, &status, error_code);
if (*error_code != MPI_SUCCESS) return;
}
@@ -537,7 +559,7 @@ static void ADIOI_Exch_and_write(ADIO_File fd, void *buf, MPI_Datatype
*/
static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
ADIOI_Flatlist_node *flat_buf, ADIO_Offset
- *offset_list, int *len_list, int *send_size,
+ *offset_list, ADIO_Offset *len_list, int *send_size,
int *recv_size, ADIO_Offset off, int size,
int *count, int *start_pos,
int *partial_recv,
@@ -558,7 +580,7 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
MPI_Request *requests, *send_req;
MPI_Datatype *recv_types;
MPI_Status *statuses, status;
- int *srt_len, sum, sum_recv;
+ int *srt_len, sum;
ADIO_Offset *srt_off;
static char myname[] = "ADIOI_W_EXCHANGE_DATA";
@@ -617,26 +639,27 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
}
ADIOI_Free(tmp_len);
-/* check if there are any holes */
+ /* check if there are any holes. If yes, must do read-modify-write.
+ * holes can be in three places. 'middle' is what you'd expect: the
+ * processes are operating on noncontigous data. But holes can also show
+ * up at the beginning or end of the file domain (see John Bent ROMIO REQ
+ * #835). Missing these holes would result in us writing more data than
+ * recieved by everyone else. */
+
*hole = 0;
- for (i=0; i srt_len[0]) srt_len[0] = new_len;
+ }
+ else
+ break;
}
- if (size > sum_recv) *hole = 1;
+ if (i < sum || size != srt_len[0]) /* hole in middle or end */
+ *hole = 1;
}
ADIOI_Free(srt_off);
@@ -687,6 +710,9 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
/* post sends. if buftype_is_contig, data can be directly sent from
user buf at location given by buf_idx. else use send_buf. */
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5032, 0, NULL);
+#endif
if (buftype_is_contig) {
j = 0;
for (i=0; i < nprocs; i++)
@@ -761,6 +787,9 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
MPI_Waitall(nprocs_send+nprocs_recv, requests, statuses);
#endif
+#ifdef AGGREGATION_PROFILE
+ MPE_Log_event (5033, 0, NULL);
+#endif
ADIOI_Free(statuses);
ADIOI_Free(requests);
if (!buftype_is_contig && nprocs_send) {
@@ -770,7 +799,6 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
}
}
-
#define ADIOI_BUF_INCR \
{ \
while (buf_incr) { \
@@ -784,7 +812,7 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
n_buftypes++; \
} \
user_buf_idx = flat_buf->indices[flat_buf_idx] + \
- n_buftypes*buftype_extent; \
+ (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \
flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \
} \
buf_incr -= size_in_buf; \
@@ -796,6 +824,8 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
{ \
while (size) { \
size_in_buf = ADIOI_MIN(size, flat_buf_sz); \
+ ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + user_buf_idx) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + user_buf_idx)); \
+ ADIOI_Assert(size_in_buf == (size_t)size_in_buf); \
memcpy(&(send_buf[p][send_buf_idx[p]]), \
((char *) buf) + user_buf_idx, size_in_buf); \
send_buf_idx[p] += size_in_buf; \
@@ -808,7 +838,7 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
n_buftypes++; \
} \
user_buf_idx = flat_buf->indices[flat_buf_idx] + \
- n_buftypes*buftype_extent; \
+ (ADIO_Offset)n_buftypes*(ADIO_Offset)buftype_extent; \
flat_buf_sz = flat_buf->blocklens[flat_buf_idx]; \
} \
size -= size_in_buf; \
@@ -819,9 +849,11 @@ static void ADIOI_W_Exchange_data(ADIO_File fd, void *buf, char *write_buf,
+
+
static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
*flat_buf, char **send_buf, ADIO_Offset
- *offset_list, int *len_list, int *send_size,
+ *offset_list, ADIO_Offset *len_list, int *send_size,
MPI_Request *requests, int *sent_to_proc,
int nprocs, int myrank,
int contig_access_count,
@@ -833,8 +865,9 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
{
/* this function is only called if buftype is not contig */
- int i, p, flat_buf_idx, size;
- int flat_buf_sz, buf_incr, size_in_buf, jj, n_buftypes;
+ int i, p, flat_buf_idx;
+ ADIO_Offset flat_buf_sz, size_in_buf, buf_incr, size;
+ int jj, n_buftypes;
ADIO_Offset off, len, rem_len, user_buf_idx;
/* curr_to_proc[p] = amount of data sent to proc. p that has already
@@ -861,7 +894,7 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
for (i=0; i done_to_proc[p]) {
if (done_to_proc[p] > curr_to_proc[p]) {
- size = (int)ADIOI_MIN(curr_to_proc[p] + len -
+ size = ADIOI_MIN(curr_to_proc[p] + len -
done_to_proc[p], send_size[p]-send_buf_idx[p]);
buf_incr = done_to_proc[p] - curr_to_proc[p];
ADIOI_BUF_INCR
- buf_incr = (int)(curr_to_proc[p] + len - done_to_proc[p]);
+ ADIOI_Assert((curr_to_proc[p] + len - done_to_proc[p]) == (unsigned)(curr_to_proc[p] + len - done_to_proc[p]));
+ buf_incr = curr_to_proc[p] + len - done_to_proc[p];
+ ADIOI_Assert((done_to_proc[p] + size) == (unsigned)(done_to_proc[p] + size));
curr_to_proc[p] = done_to_proc[p] + size;
ADIOI_BUF_COPY
}
else {
- size = (int)ADIOI_MIN(len,send_size[p]-send_buf_idx[p]);
- buf_incr = (int)len;
+ size = ADIOI_MIN(len,send_size[p]-send_buf_idx[p]);
+ buf_incr = len;
+ ADIOI_Assert((curr_to_proc[p] + size) == (unsigned)((ADIO_Offset)curr_to_proc[p] + size));
curr_to_proc[p] += size;
ADIOI_BUF_COPY
}
@@ -902,13 +938,14 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
}
}
else {
- curr_to_proc[p] += (int)len;
- buf_incr = (int)len;
+ ADIOI_Assert((curr_to_proc[p] + len) == (unsigned)((ADIO_Offset)curr_to_proc[p] + len));
+ curr_to_proc[p] += len;
+ buf_incr = len;
ADIOI_BUF_INCR
}
}
else {
- buf_incr = (int)len;
+ buf_incr = len;
ADIOI_BUF_INCR
}
off += len;
@@ -921,7 +958,7 @@ static void ADIOI_Fill_send_buffer(ADIO_File fd, void *buf, ADIOI_Flatlist_node
-static void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count,
+void ADIOI_Heap_merge(ADIOI_Access *others_req, int *count,
ADIO_Offset *srt_off, int *srt_len, int *start_pos,
int nprocs, int nprocs_recv, int total_elements)
{
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_write_nolock.c b/ompi/mca/io/romio/romio/adio/common/ad_write_nolock.c
index e36996202a..091b8ec42b 100644
--- a/ompi/mca/io/romio/romio/adio/common/ad_write_nolock.c
+++ b/ompi/mca/io/romio/romio/adio/common/ad_write_nolock.c
@@ -27,16 +27,18 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, void *buf, int count,
/* offset is in units of etype relative to the filetype. */
ADIOI_Flatlist_node *flat_buf, *flat_file;
- int i, j, k, err=-1, bwr_size, fwr_size=0, st_index=0;
- int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype;
- int n_filetypes, etype_in_filetype;
- ADIO_Offset abs_off_in_filetype=0;
+ int j, k, err=-1, st_index=0;
+ ADIO_Offset fwr_size=0, bwr_size, new_bwr_size, new_fwr_size, i_offset, num;
+ unsigned bufsize;
+ int n_etypes_in_filetype;
+ ADIO_Offset n_filetypes, etype_in_filetype, size, sum;
+ ADIO_Offset abs_off_in_filetype=0, size_in_filetype;
int filetype_size, etype_size, buftype_size;
MPI_Aint filetype_extent, buftype_extent, indx;
int buf_count, buftype_is_contig, filetype_is_contig;
ADIO_Offset off, disp;
- int flag, new_bwr_size, new_fwr_size, err_flag=0;
- static char myname[] = "ADIOI_PVFS_WRITESTRIDED";
+ int flag, err_flag=0;
+ static char myname[] = "ADIOI_NOLOCK_WRITESTRIDED";
#ifdef IO_DEBUG
int rank,nprocs;
#endif
@@ -70,6 +72,7 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, void *buf, int count,
MPI_Type_extent(datatype, &buftype_extent);
etype_size = fd->etype_size;
+ ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count));
bufsize = buftype_size * count;
if (!buftype_is_contig && filetype_is_contig) {
@@ -100,6 +103,7 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, void *buf, int count,
* is also handled.
*/
for (j=0; jcount; i++) {
if (flat_buf->blocklens[i] > combine_buf_remain && combine_buf != combine_buf_ptr) {
/* there is data in the buffer; write out the buffer so far */
@@ -134,12 +138,14 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, void *buf, int count,
rank, nprocs, off,
flat_buf->blocklens[i]);
#endif
+ ADIOI_Assert(flat_buf->blocklens[i] == (unsigned)flat_buf->blocklens[i]);
+ ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i]) == (ADIO_Offset)((MPIR_Upint)buf + (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i]));
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
err = write(fd->fd_sys,
- ((char *) buf) + j*buftype_extent + flat_buf->indices[i],
- flat_buf->blocklens[i]);
+ ((char *) buf) + (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i],
+ (unsigned)flat_buf->blocklens[i]);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
#endif
@@ -206,14 +212,15 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, void *buf, int count,
n_filetypes = -1;
flag = 0;
while (!flag) {
+ int i;
n_filetypes++;
for (i=0; icount; i++) {
if (disp + flat_file->indices[i] +
- (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i]
+ n_filetypes*(ADIO_Offset)filetype_extent + flat_file->blocklens[i]
>= offset) {
st_index = i;
fwr_size = disp + flat_file->indices[i] +
- (ADIO_Offset) n_filetypes*filetype_extent
+ n_filetypes*(ADIO_Offset)filetype_extent
+ flat_file->blocklens[i] - offset;
flag = 1;
break;
@@ -222,9 +229,10 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, void *buf, int count,
}
}
else {
+ int i;
n_etypes_in_filetype = filetype_size/etype_size;
- n_filetypes = (int) (offset / n_etypes_in_filetype);
- etype_in_filetype = (int) (offset % n_etypes_in_filetype);
+ n_filetypes = offset / n_etypes_in_filetype;
+ etype_in_filetype = offset % n_etypes_in_filetype;
size_in_filetype = etype_in_filetype * etype_size;
sum = 0;
@@ -240,7 +248,7 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, void *buf, int count,
}
/* abs. offset in bytes in the file */
- offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
+ offset = disp + n_filetypes*(ADIO_Offset)filetype_extent + abs_off_in_filetype;
}
if (buftype_is_contig && !filetype_is_contig) {
@@ -248,11 +256,11 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, void *buf, int count,
/* contiguous in memory, noncontiguous in file. should be the most
common case. */
- i = 0;
+ i_offset = 0;
j = st_index;
off = offset;
fwr_size = ADIOI_MIN(fwr_size, bufsize);
- while (i < bufsize) {
+ while (i_offset < bufsize) {
if (fwr_size) {
/* TYPE_UB and TYPE_LB can result in
fwr_size = 0. save system call in such cases */
@@ -271,16 +279,16 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, void *buf, int count,
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event(ADIOI_MPE_write_a, 0, NULL);
#endif
- err = write(fd->fd_sys, ((char *) buf) + i, fwr_size);
+ err = write(fd->fd_sys, ((char *) buf) + i_offset, fwr_size);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event(ADIOI_MPE_write_b, 0, NULL);
#endif
if (err == -1) err_flag = 1;
}
- i += fwr_size;
+ i_offset += fwr_size;
if (off + fwr_size < disp + flat_file->indices[j] +
- flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent)
+ flat_file->blocklens[j] + n_filetypes*(ADIO_Offset)filetype_extent)
off += fwr_size;
/* did not reach end of contiguous block in filetype.
no more I/O needed. off is incremented by fwr_size. */
@@ -291,8 +299,8 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, void *buf, int count,
n_filetypes++;
}
off = disp + flat_file->indices[j] +
- (ADIO_Offset) n_filetypes*filetype_extent;
- fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
+ n_filetypes*(ADIO_Offset)filetype_extent;
+ fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
}
}
}
@@ -327,6 +335,8 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, void *buf, int count,
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
#endif
+ ADIOI_Assert(size == (size_t) size);
+ ADIOI_Assert(off == (off_t) off);
err = write(fd->fd_sys, ((char *) buf) + indx, size);
#ifdef ADIOI_MPE_LOGGING
MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
@@ -346,7 +356,7 @@ void ADIOI_NOLOCK_WriteStrided(ADIO_File fd, void *buf, int count,
}
off = disp + flat_file->indices[j] +
- (ADIO_Offset) n_filetypes*filetype_extent;
+ n_filetypes*(ADIO_Offset)filetype_extent;
new_fwr_size = flat_file->blocklens[j];
if (size != bwr_size) {
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_write_str.c b/ompi/mca/io/romio/romio/adio/common/ad_write_str.c
index 8ad385a221..b13fb183c0 100644
--- a/ompi/mca/io/romio/romio/adio/common/ad_write_str.c
+++ b/ompi/mca/io/romio/romio/adio/common/ad_write_str.c
@@ -24,7 +24,7 @@
} \
} \
writebuf_off = req_off; \
- writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
+ writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
ADIO_ReadContig(fd, writebuf, writebuf_len, MPI_BYTE, \
ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); \
@@ -36,7 +36,8 @@
return; \
} \
} \
- write_sz = (int) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
+ write_sz = (unsigned) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
+ ADIOI_Assert((ADIO_Offset)write_sz == ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off));\
memcpy(writebuf+req_off-writebuf_off, (char *)buf +userbuf_off, write_sz);\
while (write_sz != req_len) { \
ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \
@@ -52,7 +53,7 @@
req_len -= write_sz; \
userbuf_off += write_sz; \
writebuf_off += writebuf_len; \
- writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
+ writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
ADIO_ReadContig(fd, writebuf, writebuf_len, MPI_BYTE, \
ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); \
@@ -84,9 +85,10 @@
return; \
} \
writebuf_off = req_off; \
- writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
+ writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
} \
- write_sz = (int) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
+ write_sz = (unsigned) (ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
+ ADIOI_Assert((ADIO_Offset)write_sz == ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off));\
memcpy(writebuf+req_off-writebuf_off, (char *)buf +userbuf_off, write_sz);\
while (write_sz != req_len) { \
ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \
@@ -101,40 +103,41 @@
req_len -= write_sz; \
userbuf_off += write_sz; \
writebuf_off += writebuf_len; \
- writebuf_len = (int) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
+ writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize,end_offset-writebuf_off+1));\
write_sz = ADIOI_MIN(req_len, writebuf_len); \
memcpy(writebuf, (char *)buf + userbuf_off, write_sz);\
} \
}
-
-
void ADIOI_GEN_WriteStrided(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
*error_code)
{
+
/* offset is in units of etype relative to the filetype. */
ADIOI_Flatlist_node *flat_buf, *flat_file;
- int i, j, k, bwr_size, fwr_size=0, st_index=0;
- int bufsize, num, size, sum, n_etypes_in_filetype, size_in_filetype;
- int n_filetypes, etype_in_filetype;
+ ADIO_Offset i_offset, sum, size_in_filetype;
+ int i, j, k, st_index=0;
+ int n_etypes_in_filetype;
+ ADIO_Offset num, size, n_filetypes, etype_in_filetype, st_n_filetypes;
ADIO_Offset abs_off_in_filetype=0;
- int filetype_size, etype_size, buftype_size, req_len;
+ int filetype_size, etype_size, buftype_size;
MPI_Aint filetype_extent, buftype_extent;
int buf_count, buftype_is_contig, filetype_is_contig;
ADIO_Offset userbuf_off;
ADIO_Offset off, req_off, disp, end_offset=0, writebuf_off, start_off;
char *writebuf;
- int flag, st_fwr_size, st_n_filetypes, writebuf_len, write_sz;
+ unsigned bufsize, writebuf_len, max_bufsize, write_sz;
ADIO_Status status1;
- int new_bwr_size, new_fwr_size, max_bufsize;
+ ADIO_Offset new_bwr_size, new_fwr_size, st_fwr_size, fwr_size=0, bwr_size, req_len;
static char myname[] = "ADIOI_GEN_WriteStrided";
if (fd->hints->ds_write == ADIOI_HINT_DISABLE) {
/* if user has disabled data sieving on reads, use naive
* approach instead.
*/
+
ADIOI_GEN_WriteStrided_naive(fd,
buf,
count,
@@ -146,6 +149,7 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, void *buf, int count,
return;
}
+
*error_code = MPI_SUCCESS; /* changed below if error */
ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
@@ -162,6 +166,7 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, void *buf, int count,
MPI_Type_extent(datatype, &buftype_extent);
etype_size = fd->etype_size;
+ ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count));
bufsize = buftype_size * count;
/* get max_bufsize from the info object. */
@@ -177,26 +182,28 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, void *buf, int count,
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
- fd->disp + etype_size * offset;
+ fd->disp + (ADIO_Offset)etype_size * offset;
start_off = off;
end_offset = off + bufsize - 1;
writebuf_off = off;
writebuf = (char *) ADIOI_Malloc(max_bufsize);
- writebuf_len = (int) (ADIOI_MIN(max_bufsize, end_offset-writebuf_off+1));
+ writebuf_len = (unsigned) (ADIOI_MIN(max_bufsize, end_offset-writebuf_off+1));
/* if atomicity is true, lock the region to be accessed */
if (fd->atomicity)
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
for (j=0; jcount; i++) {
- userbuf_off = j*buftype_extent + flat_buf->indices[i];
- req_off = off;
- req_len = flat_buf->blocklens[i];
- ADIOI_BUFFERED_WRITE_WITHOUT_READ
- off += flat_buf->blocklens[i];
- }
+ {
+ for (i=0; icount; i++) {
+ userbuf_off = (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i];
+ req_off = off;
+ req_len = flat_buf->blocklens[i];
+ ADIOI_BUFFERED_WRITE_WITHOUT_READ
+ off += flat_buf->blocklens[i];
+ }
+ }
/* write the buffer out finally */
ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
@@ -220,29 +227,36 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, void *buf, int count,
disp = fd->disp;
if (file_ptr_type == ADIO_INDIVIDUAL) {
- offset = fd->fp_ind; /* in bytes */
- n_filetypes = -1;
- flag = 0;
- while (!flag) {
- n_filetypes++;
- for (i=0; icount; i++) {
- if (disp + flat_file->indices[i] +
- (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i]
- >= offset) {
- st_index = i;
- fwr_size = (int) (disp + flat_file->indices[i] +
- (ADIO_Offset) n_filetypes*filetype_extent
- + flat_file->blocklens[i] - offset);
- flag = 1;
- break;
- }
- }
- }
- }
+ /* Wei-keng reworked type processing to be a bit more efficient */
+ offset = fd->fp_ind - disp;
+ n_filetypes = (offset - flat_file->indices[0]) / filetype_extent;
+ offset -= (ADIO_Offset)n_filetypes * filetype_extent;
+ /* now offset is local to this extent */
+
+ /* find the block where offset is located, skip blocklens[i]==0 */
+ for (i=0; icount; i++) {
+ ADIO_Offset dist;
+ if (flat_file->blocklens[i] == 0) continue;
+ dist = flat_file->indices[i] + flat_file->blocklens[i] - offset;
+ /* fwr_size is from offset to the end of block i */
+ if (dist == 0) {
+ i++;
+ offset = flat_file->indices[i];
+ fwr_size = flat_file->blocklens[i];
+ break;
+ }
+ if (dist > 0) {
+ fwr_size = dist;
+ break;
+ }
+ }
+ st_index = i; /* starting index in flat_file->indices[] */
+ offset += disp + (ADIO_Offset)n_filetypes*filetype_extent;
+ }
else {
n_etypes_in_filetype = filetype_size/etype_size;
- n_filetypes = (int) (offset / n_etypes_in_filetype);
- etype_in_filetype = (int) (offset % n_etypes_in_filetype);
+ n_filetypes = offset / n_etypes_in_filetype;
+ etype_in_filetype = offset % n_etypes_in_filetype;
size_in_filetype = etype_in_filetype * etype_size;
sum = 0;
@@ -258,32 +272,65 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, void *buf, int count,
}
/* abs. offset in bytes in the file */
- offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
+ offset = disp + (ADIO_Offset) n_filetypes*filetype_extent +
+ abs_off_in_filetype;
}
start_off = offset;
+ /* Wei-keng Liao:write request is within single flat_file contig block*/
+ /* this could happen, for example, with subarray types that are
+ * actually fairly contiguous */
+ if (buftype_is_contig && bufsize <= fwr_size) {
+ ADIO_WriteContig(fd, buf, bufsize, MPI_BYTE, ADIO_EXPLICIT_OFFSET,
+ offset, status, error_code);
+
+ if (file_ptr_type == ADIO_INDIVIDUAL) {
+ /* update MPI-IO file pointer to point to the first byte
+ * that can be accessed in the fileview. */
+ fd->fp_ind = offset + bufsize;
+ if (bufsize == fwr_size) {
+ do {
+ st_index++;
+ if (st_index == flat_file->count) {
+ st_index = 0;
+ n_filetypes++;
+ }
+ } while (flat_file->blocklens[st_index] == 0);
+ fd->fp_ind = disp + flat_file->indices[st_index]
+ + (ADIO_Offset)n_filetypes*filetype_extent;
+ }
+ }
+ fd->fp_sys_posn = -1; /* set it to null. */
+#ifdef HAVE_STATUS_SET_BYTES
+ MPIR_Status_set_bytes(status, datatype, bufsize);
+#endif
+ return;
+ }
+
/* Calculate end_offset, the last byte-offset that will be accessed.
e.g., if start_offset=0 and 100 bytes to be write, end_offset=99*/
st_fwr_size = fwr_size;
st_n_filetypes = n_filetypes;
- i = 0;
+ i_offset = 0;
j = st_index;
off = offset;
fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
- while (i < bufsize) {
- i += fwr_size;
+ while (i_offset < bufsize) {
+ i_offset += fwr_size;
end_offset = off + fwr_size - 1;
- if (j < (flat_file->count - 1)) j++;
- else {
- j = 0;
- n_filetypes++;
- }
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ while (flat_file->blocklens[j]==0) {
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ }
- off = disp + flat_file->indices[j] + (ADIO_Offset) n_filetypes*filetype_extent;
- fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
+ off = disp + flat_file->indices[j] +
+ n_filetypes*(ADIO_Offset)filetype_extent;
+ fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i_offset);
}
/* if atomicity is true, lock the region to be accessed */
@@ -300,39 +347,41 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, void *buf, int count,
/* contiguous in memory, noncontiguous in file. should be the most
common case. */
- i = 0;
+ i_offset = 0;
j = st_index;
off = offset;
n_filetypes = st_n_filetypes;
fwr_size = ADIOI_MIN(st_fwr_size, bufsize);
- while (i < bufsize) {
+ while (i_offset < bufsize) {
if (fwr_size) {
/* TYPE_UB and TYPE_LB can result in
fwr_size = 0. save system call in such cases */
/* lseek(fd->fd_sys, off, SEEK_SET);
- err = write(fd->fd_sys, ((char *) buf) + i, fwr_size);*/
+ err = write(fd->fd_sys, ((char *) buf) + i_offset, fwr_size);*/
req_off = off;
req_len = fwr_size;
- userbuf_off = i;
+ userbuf_off = i_offset;
ADIOI_BUFFERED_WRITE
}
- i += fwr_size;
+ i_offset += fwr_size;
if (off + fwr_size < disp + flat_file->indices[j] +
- flat_file->blocklens[j] + (ADIO_Offset) n_filetypes*filetype_extent)
+ flat_file->blocklens[j] + n_filetypes*(ADIO_Offset)filetype_extent)
off += fwr_size;
/* did not reach end of contiguous block in filetype.
no more I/O needed. off is incremented by fwr_size. */
else {
- if (j < (flat_file->count - 1)) j++;
- else {
- j = 0;
- n_filetypes++;
- }
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ while (flat_file->blocklens[j]==0) {
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ }
off = disp + flat_file->indices[j] +
- (ADIO_Offset) n_filetypes*filetype_extent;
- fwr_size = ADIOI_MIN(flat_file->blocklens[j], bufsize-i);
+ n_filetypes*(ADIO_Offset)filetype_extent;
+ fwr_size = ADIOI_MIN(flat_file->blocklens[j],
+ bufsize-i_offset);
}
}
}
@@ -344,7 +393,7 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, void *buf, int count,
while (flat_buf->type != datatype) flat_buf = flat_buf->next;
k = num = buf_count = 0;
- i = (int) (flat_buf->indices[0]);
+ i_offset = flat_buf->indices[0];
j = st_index;
off = offset;
n_filetypes = st_n_filetypes;
@@ -355,11 +404,11 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, void *buf, int count,
size = ADIOI_MIN(fwr_size, bwr_size);
if (size) {
/* lseek(fd->fd_sys, off, SEEK_SET);
- err = write(fd->fd_sys, ((char *) buf) + i, size); */
+ err = write(fd->fd_sys, ((char *) buf) + i_offset, size); */
req_off = off;
req_len = size;
- userbuf_off = i;
+ userbuf_off = i_offset;
ADIOI_BUFFERED_WRITE
}
@@ -368,18 +417,19 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, void *buf, int count,
if (size == fwr_size) {
/* reached end of contiguous block in file */
- if (j < (flat_file->count - 1)) j++;
- else {
- j = 0;
- n_filetypes++;
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
+ while (flat_file->blocklens[j]==0) {
+ j = (j+1) % flat_file->count;
+ n_filetypes += (j == 0) ? 1 : 0;
}
off = disp + flat_file->indices[j] +
- (ADIO_Offset) n_filetypes*filetype_extent;
+ n_filetypes*(ADIO_Offset)filetype_extent;
new_fwr_size = flat_file->blocklens[j];
if (size != bwr_size) {
- i += size;
+ i_offset += size;
new_bwr_size -= size;
}
}
@@ -389,8 +439,8 @@ void ADIOI_GEN_WriteStrided(ADIO_File fd, void *buf, int count,
k = (k + 1)%flat_buf->count;
buf_count++;
- i = (int) (buftype_extent*(buf_count/flat_buf->count) +
- flat_buf->indices[k]);
+ i_offset = (ADIO_Offset)buftype_extent*(ADIO_Offset)(buf_count/flat_buf->count) +
+ flat_buf->indices[k];
new_bwr_size = flat_buf->blocklens[k];
if (size != fwr_size) {
off += size;
diff --git a/ompi/mca/io/romio/romio/adio/common/ad_write_str_naive.c b/ompi/mca/io/romio/romio/adio/common/ad_write_str_naive.c
index c8247fb888..6cd859e952 100644
--- a/ompi/mca/io/romio/romio/adio/common/ad_write_str_naive.c
+++ b/ompi/mca/io/romio/romio/adio/common/ad_write_str_naive.c
@@ -17,11 +17,13 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count,
ADIOI_Flatlist_node *flat_buf, *flat_file;
/* bwr == buffer write; fwr == file write */
- int bwr_size, fwr_size=0, b_index;
- int bufsize, size, sum, n_etypes_in_filetype, size_in_filetype;
- int n_filetypes, etype_in_filetype;
- ADIO_Offset abs_off_in_filetype=0;
- int filetype_size, etype_size, buftype_size, req_len;
+ ADIO_Offset bwr_size, fwr_size=0, sum, size_in_filetype;
+ int b_index;
+ unsigned bufsize;
+ int n_etypes_in_filetype;
+ ADIO_Offset size, n_filetypes, etype_in_filetype;
+ ADIO_Offset abs_off_in_filetype=0, req_len;
+ int filetype_size, etype_size, buftype_size;
MPI_Aint filetype_extent, buftype_extent;
int buf_count, buftype_is_contig, filetype_is_contig;
ADIO_Offset userbuf_off;
@@ -44,6 +46,7 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count,
MPI_Type_extent(buftype, &buftype_extent);
etype_size = fd->etype_size;
+ ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(unsigned)buftype_size * (ADIO_Offset)count));
bufsize = buftype_size * count;
/* contiguous in buftype and filetype is handled elsewhere */
@@ -57,14 +60,13 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count,
while (flat_buf->type != buftype) flat_buf = flat_buf->next;
off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
- fd->disp + etype_size * offset;
+ fd->disp + (ADIO_Offset)etype_size * offset;
start_off = off;
end_offset = off + bufsize - 1;
/* if atomicity is true, lock (exclusive) the region to be accessed */
- if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) &&
- (fd->file_system != ADIO_PVFS) && (fd->file_system != ADIO_PVFS2))
+ if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
{
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
}
@@ -74,14 +76,16 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count,
*/
for (b_count=0; b_count < count; b_count++) {
for (b_index=0; b_index < flat_buf->count; b_index++) {
- userbuf_off = b_count*buftype_extent +
+ userbuf_off = (ADIO_Offset)b_count*(ADIO_Offset)buftype_extent +
flat_buf->indices[b_index];
req_off = off;
req_len = flat_buf->blocklens[b_index];
+ ADIOI_Assert(req_len == (int) req_len);
+ ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + userbuf_off) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + userbuf_off));
ADIO_WriteContig(fd,
(char *) buf + userbuf_off,
- req_len,
+ (int)req_len,
MPI_BYTE,
ADIO_EXPLICIT_OFFSET,
req_off,
@@ -94,8 +98,7 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count,
}
}
- if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) &&
- (fd->file_system != ADIO_PVFS) && (fd->file_system != ADIO_PVFS2))
+ if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
{
ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
}
@@ -105,7 +108,8 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count,
}
else { /* noncontiguous in file */
- int f_index, st_fwr_size, st_index = 0, st_n_filetypes;
+ int f_index, st_index = 0;
+ ADIO_Offset st_fwr_size, st_n_filetypes;
int flag;
/* First we're going to calculate a set of values for use in all
@@ -135,15 +139,15 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count,
n_filetypes++;
for (f_index=0; f_index < flat_file->count; f_index++) {
if (disp + flat_file->indices[f_index] +
- (ADIO_Offset) n_filetypes*filetype_extent +
+ n_filetypes*(ADIO_Offset)filetype_extent +
flat_file->blocklens[f_index] >= start_off)
{
/* this block contains our starting position */
st_index = f_index;
- fwr_size = (int) (disp + flat_file->indices[f_index] +
- (ADIO_Offset) n_filetypes*filetype_extent +
- flat_file->blocklens[f_index] - start_off);
+ fwr_size = disp + flat_file->indices[f_index] +
+ n_filetypes*(ADIO_Offset)filetype_extent +
+ flat_file->blocklens[f_index] - start_off;
flag = 1;
break;
}
@@ -152,8 +156,8 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count,
}
else {
n_etypes_in_filetype = filetype_size/etype_size;
- n_filetypes = (int) (offset / n_etypes_in_filetype);
- etype_in_filetype = (int) (offset % n_etypes_in_filetype);
+ n_filetypes = offset / n_etypes_in_filetype;
+ etype_in_filetype = offset % n_etypes_in_filetype;
size_in_filetype = etype_in_filetype * etype_size;
sum = 0;
@@ -170,7 +174,7 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count,
}
/* abs. offset in bytes in the file */
- start_off = disp + (ADIO_Offset) n_filetypes*filetype_extent +
+ start_off = disp + n_filetypes*(ADIO_Offset)filetype_extent +
abs_off_in_filetype;
}
@@ -199,9 +203,9 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count,
}
off = disp + flat_file->indices[f_index] +
- (ADIO_Offset) n_filetypes*filetype_extent;
+ n_filetypes*(ADIO_Offset)filetype_extent;
fwr_size = ADIOI_MIN(flat_file->blocklens[f_index],
- bufsize-(int)userbuf_off);
+ bufsize-(unsigned)userbuf_off);
}
/* End of calculations. At this point the following values have
@@ -214,8 +218,7 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count,
*/
/* if atomicity is true, lock (exclusive) the region to be accessed */
- if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) &&
- (fd->file_system != ADIO_PVFS) && (fd->file_system != ADIO_PVFS2))
+ if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
{
ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
}
@@ -239,9 +242,11 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count,
req_off = off;
req_len = fwr_size;
+ ADIOI_Assert(req_len == (int) req_len);
+ ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + userbuf_off) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + userbuf_off));
ADIO_WriteContig(fd,
(char *) buf + userbuf_off,
- req_len,
+ (int)req_len,
MPI_BYTE,
ADIO_EXPLICIT_OFFSET,
req_off,
@@ -253,7 +258,7 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count,
if (off + fwr_size < disp + flat_file->indices[f_index] +
flat_file->blocklens[f_index] +
- (ADIO_Offset) n_filetypes*filetype_extent)
+ n_filetypes*(ADIO_Offset)filetype_extent)
{
/* important that this value be correct, as it is
* used to set the offset in the fd near the end of
@@ -271,14 +276,14 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count,
n_filetypes++;
}
off = disp + flat_file->indices[f_index] +
- (ADIO_Offset) n_filetypes*filetype_extent;
+ n_filetypes*(ADIO_Offset)filetype_extent;
fwr_size = ADIOI_MIN(flat_file->blocklens[f_index],
- bufsize-(int)userbuf_off);
+ bufsize-(unsigned)userbuf_off);
}
}
}
else {
- int i, tmp_bufsize = 0;
+ ADIO_Offset i_offset, tmp_bufsize = 0;
/* noncontiguous in memory as well as in file */
ADIOI_Flatten_datatype(buftype);
@@ -286,7 +291,7 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count,
while (flat_buf->type != buftype) flat_buf = flat_buf->next;
b_index = buf_count = 0;
- i = (int) (flat_buf->indices[0]);
+ i_offset = flat_buf->indices[0];
f_index = st_index;
off = start_off;
n_filetypes = st_n_filetypes;
@@ -295,17 +300,19 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count,
/* while we haven't read size * count bytes, keep going */
while (tmp_bufsize < bufsize) {
- int new_bwr_size = bwr_size, new_fwr_size = fwr_size;
+ ADIO_Offset new_bwr_size = bwr_size, new_fwr_size = fwr_size;
size = ADIOI_MIN(fwr_size, bwr_size);
if (size) {
req_off = off;
req_len = size;
- userbuf_off = i;
+ userbuf_off = i_offset;
+ ADIOI_Assert(req_len == (int) req_len);
+ ADIOI_Assert((((ADIO_Offset)(MPIR_Upint)buf) + userbuf_off) == (ADIO_Offset)(MPIR_Upint)((MPIR_Upint)buf + userbuf_off));
ADIO_WriteContig(fd,
(char *) buf + userbuf_off,
- req_len,
+ (int)req_len,
MPI_BYTE,
ADIO_EXPLICIT_OFFSET,
req_off,
@@ -323,11 +330,11 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count,
}
off = disp + flat_file->indices[f_index] +
- (ADIO_Offset) n_filetypes*filetype_extent;
+ n_filetypes*(ADIO_Offset)filetype_extent;
new_fwr_size = flat_file->blocklens[f_index];
if (size != bwr_size) {
- i += size;
+ i_offset += size;
new_bwr_size -= size;
}
}
@@ -337,8 +344,8 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count,
b_index = (b_index + 1)%flat_buf->count;
buf_count++;
- i = (int) (buftype_extent*(buf_count/flat_buf->count) +
- flat_buf->indices[b_index]);
+ i_offset = (ADIO_Offset)buftype_extent*(ADIO_Offset)(buf_count/flat_buf->count) +
+ flat_buf->indices[b_index];
new_bwr_size = flat_buf->blocklens[b_index];
if (size != fwr_size) {
off += size;
@@ -352,8 +359,7 @@ void ADIOI_GEN_WriteStrided_naive(ADIO_File fd, void *buf, int count,
}
/* unlock the file region if we locked it */
- if ((fd->atomicity) && (fd->file_system != ADIO_PIOFS) &&
- (fd->file_system != ADIO_PVFS) && (fd->file_system != ADIO_PVFS2))
+ if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS))
{
ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1);
}
diff --git a/ompi/mca/io/romio/romio/adio/common/byte_offset.c b/ompi/mca/io/romio/romio/adio/common/byte_offset.c
index e23acf34e9..ce88cf1893 100644
--- a/ompi/mca/io/romio/romio/adio/common/byte_offset.c
+++ b/ompi/mca/io/romio/romio/adio/common/byte_offset.c
@@ -14,10 +14,10 @@
void ADIOI_Get_byte_offset(ADIO_File fd, ADIO_Offset offset, ADIO_Offset *disp)
{
ADIOI_Flatlist_node *flat_file;
- int i, sum, n_etypes_in_filetype, size_in_filetype;
- int n_filetypes, etype_in_filetype;
- ADIO_Offset abs_off_in_filetype=0;
- int filetype_size, etype_size, filetype_is_contig;
+ int i;
+ ADIO_Offset n_filetypes, etype_in_filetype, sum, abs_off_in_filetype=0, size_in_filetype;
+ unsigned n_etypes_in_filetype, filetype_size, etype_size;
+ int filetype_is_contig;
MPI_Aint filetype_extent;
ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
@@ -29,10 +29,10 @@ void ADIOI_Get_byte_offset(ADIO_File fd, ADIO_Offset offset, ADIO_Offset *disp)
flat_file = ADIOI_Flatlist;
while (flat_file->type != fd->filetype) flat_file = flat_file->next;
- MPI_Type_size(fd->filetype, &filetype_size);
+ MPI_Type_size(fd->filetype, (int*)&filetype_size);
n_etypes_in_filetype = filetype_size/etype_size;
- n_filetypes = (int) (offset / n_etypes_in_filetype);
- etype_in_filetype = (int) (offset % n_etypes_in_filetype);
+ n_filetypes = offset / n_etypes_in_filetype;
+ etype_in_filetype = offset % n_etypes_in_filetype;
size_in_filetype = etype_in_filetype * etype_size;
sum = 0;
@@ -47,6 +47,6 @@ void ADIOI_Get_byte_offset(ADIO_File fd, ADIO_Offset offset, ADIO_Offset *disp)
/* abs. offset in bytes in the file */
MPI_Type_extent(fd->filetype, &filetype_extent);
- *disp = fd->disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype;
+ *disp = fd->disp + n_filetypes * ADIOI_AINT_CAST_TO_OFFSET filetype_extent + abs_off_in_filetype;
}
}
diff --git a/ompi/mca/io/romio/romio/adio/common/cb_config_list.c b/ompi/mca/io/romio/romio/adio/common/cb_config_list.c
index 1f18f6a0cc..bb2c1eeff9 100644
--- a/ompi/mca/io/romio/romio/adio/common/cb_config_list.c
+++ b/ompi/mca/io/romio/romio/adio/common/cb_config_list.c
@@ -35,7 +35,7 @@
#undef CB_CONFIG_LIST_DEBUG
/* a couple of globals keep things simple */
-static int cb_config_list_keyval = MPI_KEYVAL_INVALID;
+int ADIOI_cb_config_list_keyval = MPI_KEYVAL_INVALID;
static char *yylval;
static char *token_ptr;
@@ -83,7 +83,7 @@ int ADIOI_cb_bcast_rank_map(ADIO_File fd)
* FS-INDEP. */
value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
ADIOI_Snprintf(value, MPI_MAX_INFO_VAL+1, "%d", fd->hints->cb_nodes);
- MPI_Info_set(fd->info, "cb_nodes", value);
+ ADIOI_Info_set(fd->info, "cb_nodes", value);
ADIOI_Free(value);
return 0;
@@ -111,14 +111,16 @@ int ADIOI_cb_gather_name_array(MPI_Comm comm,
ADIO_cb_name_array array = NULL;
int alloc_size;
- if (cb_config_list_keyval == MPI_KEYVAL_INVALID) {
+ if (ADIOI_cb_config_list_keyval == MPI_KEYVAL_INVALID) {
+ /* cleaned up by ADIOI_End_call */
MPI_Keyval_create((MPI_Copy_function *) ADIOI_cb_copy_name_array,
(MPI_Delete_function *) ADIOI_cb_delete_name_array,
- &cb_config_list_keyval, NULL);
+ &ADIOI_cb_config_list_keyval, NULL);
}
else {
- MPI_Attr_get(comm, cb_config_list_keyval, (void *) &array, &found);
+ MPI_Attr_get(comm, ADIOI_cb_config_list_keyval, (void *) &array, &found);
if (found) {
+ ADIOI_Assert(array != NULL);
*arrayp = array;
return 0;
}
@@ -231,8 +233,8 @@ int ADIOI_cb_gather_name_array(MPI_Comm comm,
* it next time an open is performed on this same comm, and on the
* dupcomm, so we can use it in I/O operations.
*/
- MPI_Attr_put(comm, cb_config_list_keyval, array);
- MPI_Attr_put(dupcomm, cb_config_list_keyval, array);
+ MPI_Attr_put(comm, ADIOI_cb_config_list_keyval, array);
+ MPI_Attr_put(dupcomm, ADIOI_cb_config_list_keyval, array);
*arrayp = array;
return 0;
}
@@ -362,7 +364,7 @@ int ADIOI_cb_config_list_parse(char *config_list,
/* ADIOI_cb_copy_name_array() - attribute copy routine
*/
int ADIOI_cb_copy_name_array(MPI_Comm comm,
- int *keyval,
+ int keyval,
void *extra,
void *attr_in,
void **attr_out,
@@ -371,11 +373,11 @@ int ADIOI_cb_copy_name_array(MPI_Comm comm,
ADIO_cb_name_array array;
ADIOI_UNREFERENCED_ARG(comm);
- ADIOI_UNREFERENCED_ARG(keyval);
+ ADIOI_UNREFERENCED_ARG(keyval);
ADIOI_UNREFERENCED_ARG(extra);
array = (ADIO_cb_name_array) attr_in;
- array->refct++;
+ if (array != NULL) array->refct++;
*attr_out = attr_in;
*flag = 1; /* make a copy in the new communicator */
@@ -386,17 +388,17 @@ int ADIOI_cb_copy_name_array(MPI_Comm comm,
/* ADIOI_cb_delete_name_array() - attribute destructor
*/
int ADIOI_cb_delete_name_array(MPI_Comm comm,
- int *keyval,
+ int keyval,
void *attr_val,
void *extra)
{
ADIO_cb_name_array array;
ADIOI_UNREFERENCED_ARG(comm);
- ADIOI_UNREFERENCED_ARG(keyval);
ADIOI_UNREFERENCED_ARG(extra);
array = (ADIO_cb_name_array) attr_val;
+ ADIOI_Assert(array != NULL);
array->refct--;
if (array->refct <= 0) {
@@ -411,7 +413,6 @@ int ADIOI_cb_delete_name_array(MPI_Comm comm,
if (array->names != NULL) ADIOI_Free(array->names);
ADIOI_Free(array);
}
-
return MPI_SUCCESS;
}
@@ -679,19 +680,32 @@ static int get_max_procs(int cb_nodes)
*
* Returns a token of types defined at top of this file.
*/
+#ifdef ROMIO_BGL
+/* On BlueGene, the ',' character shows up in get_processor_name, so we have to
+ * use a different delimiter */
+#define COLON ':'
+#define COMMA ';'
+#define DELIMS ":;"
+#else
+/* these tokens work for every other platform */
+#define COLON ':'
+#define COMMA ','
+#define DELIMS ":,"
+#endif
+
static int cb_config_list_lex(void)
{
int slen;
if (*token_ptr == '\0') return AGG_EOS;
- slen = (int)strcspn(token_ptr, ":,");
+ slen = (int)strcspn(token_ptr, DELIMS);
- if (*token_ptr == ':') {
+ if (*token_ptr == COLON) {
token_ptr++;
return AGG_COLON;
}
- if (*token_ptr == ',') {
+ if (*token_ptr == COMMA) {
token_ptr++;
return AGG_COMMA;
}
diff --git a/ompi/mca/io/romio/romio/adio/common/eof_offset.c b/ompi/mca/io/romio/romio/adio/common/eof_offset.c
index e8ddbd3072..981efa087a 100644
--- a/ompi/mca/io/romio/romio/adio/common/eof_offset.c
+++ b/ompi/mca/io/romio/romio/adio/common/eof_offset.c
@@ -13,9 +13,10 @@
void ADIOI_Get_eof_offset(ADIO_File fd, ADIO_Offset *eof_offset)
{
- int error_code, filetype_is_contig, etype_size, filetype_size;
- ADIO_Offset fsize, disp, sum=0, size_in_file;
- int n_filetypes, flag, i, rem;
+ unsigned filetype_size;
+ int error_code, filetype_is_contig, etype_size;
+ ADIO_Offset fsize, disp, sum=0, size_in_file, n_filetypes, rem;
+ int flag, i;
ADIO_Fcntl_t *fcntl_struct;
MPI_Aint filetype_extent;
ADIOI_Flatlist_node *flat_file;
@@ -43,7 +44,7 @@ void ADIOI_Get_eof_offset(ADIO_File fd, ADIO_Offset *eof_offset)
while (flat_file->type != fd->filetype)
flat_file = flat_file->next;
- MPI_Type_size(fd->filetype, &filetype_size);
+ MPI_Type_size(fd->filetype, (int*)&filetype_size);
MPI_Type_extent(fd->filetype, &filetype_extent);
disp = fd->disp;
@@ -55,14 +56,14 @@ void ADIOI_Get_eof_offset(ADIO_File fd, ADIO_Offset *eof_offset)
for (i=0; icount; i++) {
sum += flat_file->blocklens[i];
if (disp + flat_file->indices[i] +
- (ADIO_Offset) n_filetypes*filetype_extent +
+ n_filetypes* ADIOI_AINT_CAST_TO_OFFSET filetype_extent +
flat_file->blocklens[i] >= fsize) {
if (disp + flat_file->indices[i] +
- (ADIO_Offset) n_filetypes*filetype_extent >= fsize)
+ n_filetypes * ADIOI_AINT_CAST_TO_OFFSET filetype_extent >= fsize)
sum -= flat_file->blocklens[i];
else {
- rem = (int) (disp + flat_file->indices[i] +
- (ADIO_Offset) n_filetypes*filetype_extent
+ rem = (disp + flat_file->indices[i] +
+ n_filetypes* ADIOI_AINT_CAST_TO_OFFSET filetype_extent
+ flat_file->blocklens[i] - fsize);
sum -= rem;
}
@@ -71,7 +72,7 @@ void ADIOI_Get_eof_offset(ADIO_File fd, ADIO_Offset *eof_offset)
}
}
}
- size_in_file = (ADIO_Offset) n_filetypes*filetype_size + sum;
+ size_in_file = n_filetypes*(ADIO_Offset)filetype_size + sum;
*eof_offset = (size_in_file+etype_size-1)/etype_size; /* ceiling division */
}
}
diff --git a/ompi/mca/io/romio/romio/adio/common/flatten.c b/ompi/mca/io/romio/romio/adio/common/flatten.c
index e644d2b138..06a456cc41 100644
--- a/ompi/mca/io/romio/romio/adio/common/flatten.c
+++ b/ompi/mca/io/romio/romio/adio/common/flatten.c
@@ -1,8 +1,5 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*-
- * vim: ts=8 sts=4 sw=4 noexpandtab
- *
- * $Id: flatten.c,v 1.24 2006/07/05 20:40:13 robl Exp $
- *
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
* Copyright (C) 1997 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*/
@@ -12,14 +9,15 @@
/* #ifdef MPISGI
#include "mpisgi2.h"
#endif */
+#ifdef ROMIO_INSIDE_MPICH2
+#include "mpid_datatype.h"
+#endif
+
+#ifdef USE_DBG_LOGGING
+ #define FLATTEN_DEBUG 1
+#endif
void ADIOI_Optimize_flattened(ADIOI_Flatlist_node *flat_type);
-void ADIOI_Flatten_copy_type(ADIOI_Flatlist_node *flat,
- int old_type_start,
- int old_type_end,
- int new_type_start,
- ADIO_Offset offset_adjustment);
-
/* flatten datatype and add it to Flatlist */
void ADIOI_Flatten_datatype(MPI_Datatype datatype)
{
@@ -29,16 +27,25 @@ void ADIOI_Flatten_datatype(MPI_Datatype datatype)
int curr_index=0, is_contig;
ADIOI_Flatlist_node *flat, *prev=0;
+#ifdef ROMIO_INSIDE_MPICH2
+ if(MPIU_DBG_SELECTED(DATATYPE,TYPICAL)) MPIDU_Datatype_debug(datatype, 4); /* use -env MPICH_DBG_OUTPUT=stdout */
+#endif
/* check if necessary to flatten. */
/* is it entirely contiguous? */
ADIOI_Datatype_iscontig(datatype, &is_contig);
+ #ifdef FLATTEN_DEBUG
+ DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: is_contig %#X\n",is_contig);
+ #endif
if (is_contig) return;
/* has it already been flattened? */
flat = ADIOI_Flatlist;
while (flat) {
if (flat->type == datatype) {
+ #ifdef FLATTEN_DEBUG
+ DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: found datatype %#X\n", datatype);
+ #endif
return;
}
else {
@@ -58,39 +65,42 @@ void ADIOI_Flatten_datatype(MPI_Datatype datatype)
flat->indices = NULL;
flat->count = ADIOI_Count_contiguous_blocks(datatype, &curr_index);
-#if 0
- FPRINTF(stderr, "cur_idx = %d\n", curr_index);
+#ifdef FLATTEN_DEBUG
+ DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: count %#X, cur_idx = %#X\n",flat->count,curr_index);
#endif
-/* FPRINTF(stderr, "%d\n", flat->count);*/
+/* DBG_FPRINTF(stderr, "%d\n", flat->count);*/
if (flat->count) {
- flat->blocklens = (int *) ADIOI_Malloc(flat->count * sizeof(int));
- flat->indices = (ADIO_Offset *) ADIOI_Malloc(flat->count * \
- sizeof(ADIO_Offset));
+ flat->blocklens = (ADIO_Offset *) ADIOI_Malloc(flat->count * sizeof(ADIO_Offset));
+ flat->indices = (ADIO_Offset *) ADIOI_Malloc(flat->count * sizeof(ADIO_Offset));
}
curr_index = 0;
#ifdef HAVE_MPIR_TYPE_FLATTEN
flatten_idx = (MPI_Aint) flat->count;
MPIR_Type_flatten(datatype, flat->indices, flat->blocklens, &flatten_idx);
+ #ifdef FLATTEN_DEBUG
+ DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: MPIR_Type_flatten\n");
+ #endif
#else
ADIOI_Flatten(datatype, flat, 0, &curr_index);
+ #ifdef FLATTEN_DEBUG
+ DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: ADIOI_Flatten\n");
+ #endif
ADIOI_Optimize_flattened(flat);
#endif
/* debug */
-#if 0
+#ifdef FLATTEN_DEBUG
{
int i;
- FPRINTF(stderr, "blens: ");
for (i=0; icount; i++)
- FPRINTF(stderr, "%d ", flat->blocklens[i]);
- FPRINTF(stderr, "\n\n");
- FPRINTF(stderr, "indices: ");
- for (i=0; icount; i++)
- FPRINTF(stderr, "%ld ", (long) flat->indices[i]);
- FPRINTF(stderr, "\n\n");
- }
+ DBG_FPRINTF(stderr,"ADIOI_Flatten_datatype:: i %#X, blocklens %#llX, indices %#llX\n",
+ i,
+ flat->blocklens[i],
+ flat->indices[i]
+ );
+ }
#endif
}
@@ -103,22 +113,46 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
ADIO_Offset st_offset, int *curr_index)
{
int i, j, k, m, n, num, basic_num, prev_index;
- int top_count, combiner, old_combiner, old_is_contig;
- int old_size, nints, nadds, ntypes, old_nints, old_nadds, old_ntypes;
- MPI_Aint old_extent;
+ int combiner, old_combiner, old_is_contig;
+ int nints, nadds, ntypes, old_nints, old_nadds, old_ntypes;
+ /* By using ADIO_Offset we preserve +/- sign and
+ avoid >2G integer arithmetic problems */
+ ADIO_Offset top_count;
+ /* By using unsigned we avoid >2G integer arithmetic problems */
+ unsigned old_size;
+ MPI_Aint old_extent;/* Assume extents are non-negative */
int *ints;
- MPI_Aint *adds;
+ MPI_Aint *adds; /* Make no assumptions about +/- sign on these */
MPI_Datatype *types;
-
MPI_Type_get_envelope(datatype, &nints, &nadds, &ntypes, &combiner);
ints = (int *) ADIOI_Malloc((nints+1)*sizeof(int));
adds = (MPI_Aint *) ADIOI_Malloc((nadds+1)*sizeof(MPI_Aint));
types = (MPI_Datatype *) ADIOI_Malloc((ntypes+1)*sizeof(MPI_Datatype));
MPI_Type_get_contents(datatype, nints, nadds, ntypes, ints, adds, types);
+ #ifdef FLATTEN_DEBUG
+ DBG_FPRINTF(stderr,"ADIOI_Flatten:: st_offset %#llX, curr_index %#X\n",st_offset,*curr_index);
+ DBG_FPRINTF(stderr,"ADIOI_Flatten:: nints %#X, nadds %#X, ntypes %#X\n",nints, nadds, ntypes);
+ for(i=0; i< nints; ++i)
+ {
+ DBG_FPRINTF(stderr,"ADIOI_Flatten:: ints[%d]=%#X\n",i,ints[i]);
+ }
+ for(i=0; i< nadds; ++i)
+ {
+ DBG_FPRINTF(stderr,"ADIOI_Flatten:: adds[%d]="MPI_AINT_FMT_HEX_SPEC"\n",i,adds[i]);
+ }
+ for(i=0; i< ntypes; ++i)
+ {
+ DBG_FPRINTF(stderr,"ADIOI_Flatten:: types[%d]=%#llX\n",i,(unsigned long long)(unsigned long)types[i]);
+ }
+ if(MPIU_DBG_SELECTED(DATATYPE,TYPICAL)) MPIDU_Datatype_debug(datatype, 4); /* use -env MPICH_DBG_OUTPUT=stdout */
+ #endif
switch (combiner) {
#ifdef MPIIMPL_HAVE_MPI_COMBINER_DUP
case MPI_COMBINER_DUP:
+ #ifdef FLATTEN_DEBUG
+ DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_DUP\n");
+ #endif
MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
&old_ntypes, &old_combiner);
ADIOI_Datatype_iscontig(types[0], &old_is_contig);
@@ -131,6 +165,9 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
{
int dims = ints[0];
MPI_Datatype stype;
+ #ifdef FLATTEN_DEBUG
+ DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_SUBARRAY\n");
+ #endif
ADIO_Type_create_subarray(dims,
&ints[1], /* sizes */
@@ -149,6 +186,9 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
{
int dims = ints[2];
MPI_Datatype dtype;
+ #ifdef FLATTEN_DEBUG
+ DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_DARRAY\n");
+ #endif
ADIO_Type_create_darray(ints[0], /* size */
ints[1], /* rank */
@@ -160,12 +200,23 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
ints[4*dims+3], /* order */
types[0],
&dtype);
+ #ifdef FLATTEN_DEBUG
+ DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_DARRAY indices[%#X] %#llX, flat->blocklens[%#X] %#llX, st_offset %#llX, curr_index %#X);\n",
+ 0, flat->indices[0], 0, flat->blocklens[0], st_offset, *curr_index);
+ #endif
ADIOI_Flatten(dtype, flat, st_offset, curr_index);
+ #ifdef FLATTEN_DEBUG
+ DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_DARRAY >ADIOI_Flatten(dtype, flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX, st_offset %#llX, curr_index %#X);\n",
+ 0, flat->indices[0], 0, flat->blocklens[0], st_offset, *curr_index);
+ #endif
MPI_Type_free(&dtype);
}
break;
#endif
case MPI_COMBINER_CONTIGUOUS:
+ #ifdef FLATTEN_DEBUG
+ DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_CONTIGUOUS\n");
+ #endif
top_count = ints[0];
MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
&old_ntypes, &old_combiner);
@@ -179,8 +230,11 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
/* simplest case, made up of basic or contiguous types */
j = *curr_index;
flat->indices[j] = st_offset;
- MPI_Type_size(types[0], &old_size);
+ MPI_Type_size(types[0], (int*)&old_size);
flat->blocklens[j] = top_count * old_size;
+ #ifdef FLATTEN_DEBUG
+ DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",j, flat->indices[j], j, flat->blocklens[j]);
+ #endif
(*curr_index)++;
}
else {
@@ -192,8 +246,11 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
MPI_Type_extent(types[0], &old_extent);
for (m=1; mindices[j] = flat->indices[j-num] + old_extent;
+ flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
flat->blocklens[j] = flat->blocklens[j-num];
+ #ifdef FLATTEN_DEBUG
+ DBG_FPRINTF(stderr,"ADIOI_Flatten:: derived flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",j, flat->indices[j], j, flat->blocklens[j]);
+ #endif
j++;
}
}
@@ -202,6 +259,9 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
break;
case MPI_COMBINER_VECTOR:
+ #ifdef FLATTEN_DEBUG
+ DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_VECTOR\n");
+ #endif
top_count = ints[0];
MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
&old_ntypes, &old_combiner);
@@ -213,19 +273,24 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
if (prev_index == *curr_index) {
/* simplest case, vector of basic or contiguous types */
+ /* By using ADIO_Offset we preserve +/- sign and
+ avoid >2G integer arithmetic problems */
+ ADIO_Offset blocklength = ints[1], stride = ints[2];
j = *curr_index;
flat->indices[j] = st_offset;
- MPI_Type_size(types[0], &old_size);
- flat->blocklens[j] = ints[1] * old_size;
+ MPI_Type_size(types[0], (int*)&old_size);
+ flat->blocklens[j] = blocklength * old_size;
for (i=j+1; iindices[i] = flat->indices[i-1] +
- (unsigned) ints[2] * (unsigned) old_size;
+ flat->indices[i] = flat->indices[i-1] + stride * old_size;
flat->blocklens[i] = flat->blocklens[j];
}
*curr_index = i;
}
else {
/* vector of noncontiguous derived types */
+ /* By using ADIO_Offset we preserve +/- sign and
+ avoid >2G integer arithmetic problems */
+ ADIO_Offset blocklength = ints[1], stride = ints[2];
j = *curr_index;
num = *curr_index - prev_index;
@@ -233,9 +298,9 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
/* The noncontiguous types have to be replicated blocklen times
and then strided. Replicate the first one. */
MPI_Type_extent(types[0], &old_extent);
- for (m=1; mindices[j] = flat->indices[j-num] + old_extent;
+ flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
flat->blocklens[j] = flat->blocklens[j-num];
j++;
}
@@ -246,8 +311,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
num = *curr_index - prev_index;
for (i=1; iindices[j] = flat->indices[j-num] + ints[2]
- *old_extent;
+ flat->indices[j] = flat->indices[j-num] + stride * ADIOI_AINT_CAST_TO_OFFSET old_extent;
flat->blocklens[j] = flat->blocklens[j-num];
j++;
}
@@ -258,6 +322,9 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
case MPI_COMBINER_HVECTOR:
case MPI_COMBINER_HVECTOR_INTEGER:
+ #ifdef FLATTEN_DEBUG
+ DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_HVECTOR_INTEGER\n");
+ #endif
top_count = ints[0];
MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
&old_ntypes, &old_combiner);
@@ -269,10 +336,13 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
if (prev_index == *curr_index) {
/* simplest case, vector of basic or contiguous types */
+ /* By using ADIO_Offset we preserve +/- sign and
+ avoid >2G integer arithmetic problems */
+ ADIO_Offset blocklength = ints[1];
j = *curr_index;
flat->indices[j] = st_offset;
- MPI_Type_size(types[0], &old_size);
- flat->blocklens[j] = ints[1] * old_size;
+ MPI_Type_size(types[0], (int*)&old_size);
+ flat->blocklens[j] = blocklength * old_size;
for (i=j+1; iindices[i] = flat->indices[i-1] + adds[0];
flat->blocklens[i] = flat->blocklens[j];
@@ -281,6 +351,9 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
}
else {
/* vector of noncontiguous derived types */
+ /* By using ADIO_Offset we preserve +/- sign and
+ avoid >2G integer arithmetic problems */
+ ADIO_Offset blocklength = ints[1];
j = *curr_index;
num = *curr_index - prev_index;
@@ -288,9 +361,9 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
/* The noncontiguous types have to be replicated blocklen times
and then strided. Replicate the first one. */
MPI_Type_extent(types[0], &old_extent);
- for (m=1; mindices[j] = flat->indices[j-num] + old_extent;
+ flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
flat->blocklens[j] = flat->blocklens[j-num];
j++;
}
@@ -311,6 +384,9 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
break;
case MPI_COMBINER_INDEXED:
+ #ifdef FLATTEN_DEBUG
+ DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_INDEXED\n");
+ #endif
top_count = ints[0];
MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
&old_ntypes, &old_combiner);
@@ -319,15 +395,23 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
prev_index = *curr_index;
if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
- ADIOI_Flatten(types[0], flat,
- st_offset+ints[top_count+1]*old_extent, curr_index);
+ {
+ /* By using ADIO_Offset we preserve +/- sign and
+ avoid >2G integer arithmetic problems */
+ ADIO_Offset stride = ints[top_count+1];
+ ADIOI_Flatten(types[0], flat,
+ st_offset+stride* ADIOI_AINT_CAST_TO_OFFSET old_extent, curr_index);
+ }
if (prev_index == *curr_index) {
/* simplest case, indexed type made up of basic or contiguous types */
j = *curr_index;
for (i=j; iindices[i] = st_offset + ints[top_count+1+i-j]*old_extent;
- flat->blocklens[i] = (int) (ints[1+i-j]*old_extent);
+ /* By using ADIO_Offset we preserve +/- sign and
+ avoid >2G integer arithmetic problems */
+ ADIO_Offset blocklength = ints[1+i-j], stride = ints[top_count+1+i-j];
+ flat->indices[i] = st_offset + stride* ADIOI_AINT_CAST_TO_OFFSET old_extent;
+ flat->blocklens[i] = blocklength* ADIOI_AINT_CAST_TO_OFFSET old_extent;
}
*curr_index = i;
}
@@ -342,7 +426,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
and then strided. Replicate the first one. */
for (m=1; mindices[j] = flat->indices[j-num] + old_extent;
+ flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
flat->blocklens[j] = flat->blocklens[j-num];
j++;
}
@@ -354,15 +438,17 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
num = *curr_index - prev_index;
prev_index = *curr_index;
for (m=0; mindices[j] = flat->indices[j-num] +
- (ints[top_count+1+i]-ints[top_count+i])*old_extent;
+ /* By using ADIO_Offset we preserve +/- sign and
+ avoid >2G integer arithmetic problems */
+ ADIO_Offset stride = ints[top_count+1+i]-ints[top_count+i];
+ flat->indices[j] = flat->indices[j-num] + stride* ADIOI_AINT_CAST_TO_OFFSET old_extent;
flat->blocklens[j] = flat->blocklens[j-num];
j++;
}
*curr_index = j;
for (m=1; mindices[j] = flat->indices[j-basic_num] + old_extent;
+ flat->indices[j] = flat->indices[j-basic_num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
flat->blocklens[j] = flat->blocklens[j-basic_num];
j++;
}
@@ -373,6 +459,9 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
break;
case MPI_COMBINER_INDEXED_BLOCK:
+ #ifdef FLATTEN_DEBUG
+ DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_INDEXED_BLOCK\n");
+ #endif
top_count = ints[0];
MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
&old_ntypes, &old_combiner);
@@ -381,15 +470,23 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
prev_index = *curr_index;
if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
- ADIOI_Flatten(types[0], flat,
- st_offset+ints[1+1]*old_extent, curr_index);
+ {
+ /* By using ADIO_Offset we preserve +/- sign and
+ avoid >2G integer arithmetic problems */
+ ADIO_Offset stride = ints[1+1];
+ ADIOI_Flatten(types[0], flat,
+ st_offset+stride* ADIOI_AINT_CAST_TO_OFFSET old_extent, curr_index);
+ }
if (prev_index == *curr_index) {
/* simplest case, indexed type made up of basic or contiguous types */
j = *curr_index;
for (i=j; iindices[i] = st_offset + ints[1+1+i-j]*old_extent;
- flat->blocklens[i] = (int) (ints[1]*old_extent);
+ /* By using ADIO_Offset we preserve +/- sign and
+ avoid >2G integer arithmetic problems */
+ ADIO_Offset blocklength = ints[1], stride = ints[1+1+i-j];
+ flat->indices[i] = st_offset + stride* ADIOI_AINT_CAST_TO_OFFSET old_extent;
+ flat->blocklens[i] = blocklength* ADIOI_AINT_CAST_TO_OFFSET old_extent;
}
*curr_index = i;
}
@@ -403,7 +500,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
and then strided. Replicate the first one. */
for (m=1; mindices[j] = flat->indices[j-num] + old_extent;
+ flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
flat->blocklens[j] = flat->blocklens[j-num];
j++;
}
@@ -414,7 +511,10 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
num = *curr_index - prev_index;
for (i=1; iindices[j] = flat->indices[j-num] + (ints[2+i]-ints[1+i])*old_extent;
+ /* By using ADIO_Offset we preserve +/- sign and
+ avoid >2G integer arithmetic problems */
+ ADIO_Offset stride = ints[2+i]-ints[1+i];
+ flat->indices[j] = flat->indices[j-num] + stride* ADIOI_AINT_CAST_TO_OFFSET old_extent;
flat->blocklens[j] = flat->blocklens[j-num];
j++;
}
@@ -425,6 +525,9 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
case MPI_COMBINER_HINDEXED:
case MPI_COMBINER_HINDEXED_INTEGER:
+ #ifdef FLATTEN_DEBUG
+ DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_HINDEXED_INTEGER\n");
+ #endif
top_count = ints[0];
MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
&old_ntypes, &old_combiner);
@@ -432,15 +535,20 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
prev_index = *curr_index;
if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig))
- ADIOI_Flatten(types[0], flat, st_offset+adds[0], curr_index);
+ {
+ ADIOI_Flatten(types[0], flat, st_offset+adds[0], curr_index);
+ }
if (prev_index == *curr_index) {
/* simplest case, indexed type made up of basic or contiguous types */
j = *curr_index;
- MPI_Type_size(types[0], &old_size);
+ MPI_Type_size(types[0], (int*)&old_size);
for (i=j; i2G integer arithmetic problems */
+ ADIO_Offset blocklength = ints[1+i-j];
flat->indices[i] = st_offset + adds[i-j];
- flat->blocklens[i] = ints[1+i-j]*old_size;
+ flat->blocklens[i] = blocklength*old_size;
}
*curr_index = i;
}
@@ -456,7 +564,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
MPI_Type_extent(types[0], &old_extent);
for (m=1; mindices[j] = flat->indices[j-num] + old_extent;
+ flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
flat->blocklens[j] = flat->blocklens[j-num];
j++;
}
@@ -475,7 +583,7 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
*curr_index = j;
for (m=1; mindices[j] = flat->indices[j-basic_num] + old_extent;
+ flat->indices[j] = flat->indices[j-basic_num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
flat->blocklens[j] = flat->blocklens[j-basic_num];
j++;
}
@@ -487,6 +595,9 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
case MPI_COMBINER_STRUCT:
case MPI_COMBINER_STRUCT_INTEGER:
+ #ifdef FLATTEN_DEBUG
+ DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_STRUCT_INTEGER\n");
+ #endif
top_count = ints[0];
for (n=0; n2G integer arithmetic problems */
+ ADIO_Offset blocklength = ints[1+n];
j = *curr_index;
flat->indices[j] = st_offset + adds[n];
- MPI_Type_size(types[n], &old_size);
- flat->blocklens[j] = ints[1+n] * old_size;
+ MPI_Type_size(types[n], (int*)&old_size);
+ flat->blocklens[j] = blocklength * old_size;
+ #ifdef FLATTEN_DEBUG
+ DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple adds[%#X] "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",n,adds[n],j, flat->indices[j], j, flat->blocklens[j]);
+ #endif
(*curr_index)++;
}
else {
@@ -515,8 +632,11 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
MPI_Type_extent(types[n], &old_extent);
for (m=1; mindices[j] = flat->indices[j-num] + old_extent;
+ flat->indices[j] = flat->indices[j-num] + ADIOI_AINT_CAST_TO_OFFSET old_extent;
flat->blocklens[j] = flat->blocklens[j-num];
+ #ifdef FLATTEN_DEBUG
+ DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple old_extent "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",old_extent,j, flat->indices[j], j, flat->blocklens[j]);
+ #endif
j++;
}
}
@@ -525,9 +645,63 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
}
break;
+ case MPI_COMBINER_RESIZED:
+ #ifdef FLATTEN_DEBUG
+ DBG_FPRINTF(stderr,"ADIOI_Flatten:: MPI_COMBINER_RESIZED\n");
+ #endif
+
+ /* This is done similar to a type_struct with an lb, datatype, ub */
+
+ /* handle the Lb */
+ j = *curr_index;
+ flat->indices[j] = st_offset + adds[0];
+ flat->blocklens[j] = 0;
+
+ #ifdef FLATTEN_DEBUG
+ DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple adds[%#X] "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",0,adds[0],j, flat->indices[j], j, flat->blocklens[j]);
+ #endif
+
+ (*curr_index)++;
+
+ /* handle the datatype */
+
+ MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
+ &old_ntypes, &old_combiner);
+ ADIOI_Datatype_iscontig(types[0], &old_is_contig);
+
+ if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) {
+ ADIOI_Flatten(types[0], flat, st_offset+adds[0], curr_index);
+ }
+ else {
+ /* current type is basic or contiguous */
+ j = *curr_index;
+ flat->indices[j] = st_offset;
+ MPI_Type_size(types[0], (int*)&old_size);
+ flat->blocklens[j] = old_size;
+
+ #ifdef FLATTEN_DEBUG
+ DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple adds[%#X] "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",0,adds[0],j, flat->indices[j], j, flat->blocklens[j]);
+ #endif
+
+ (*curr_index)++;
+ }
+
+ /* take care of the extent as a UB */
+ j = *curr_index;
+ flat->indices[j] = st_offset + adds[0] + adds[1];
+ flat->blocklens[j] = 0;
+
+ #ifdef FLATTEN_DEBUG
+ DBG_FPRINTF(stderr,"ADIOI_Flatten:: simple adds[%#X] "MPI_AINT_FMT_HEX_SPEC", flat->indices[%#X] %#llX, flat->blocklens[%#X] %#llX\n",1,adds[1],j, flat->indices[j], j, flat->blocklens[j]);
+ #endif
+
+ (*curr_index)++;
+
+ break;
+
default:
/* TODO: FIXME (requires changing prototypes to return errors...) */
- FPRINTF(stderr, "Error: Unsupported datatype passed to ADIOI_Flatten\n");
+ DBG_FPRINTF(stderr, "Error: Unsupported datatype passed to ADIOI_Flatten\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
@@ -545,6 +719,10 @@ void ADIOI_Flatten(MPI_Datatype datatype, ADIOI_Flatlist_node *flat,
ADIOI_Free(adds);
ADIOI_Free(types);
+ #ifdef FLATTEN_DEBUG
+ DBG_FPRINTF(stderr,"ADIOI_Flatten:: return st_offset %#llX, curr_index %#X\n",st_offset,*curr_index);
+ #endif
+
}
/********************************************************/
@@ -569,7 +747,7 @@ int ADIOI_Count_contiguous_blocks(MPI_Datatype datatype, int *curr_index)
int top_count, combiner, old_combiner, old_is_contig;
int nints, nadds, ntypes, old_nints, old_nadds, old_ntypes;
int *ints;
- MPI_Aint *adds;
+ MPI_Aint *adds; /* Make no assumptions about +/- sign on these */
MPI_Datatype *types;
MPI_Type_get_envelope(datatype, &nints, &nadds, &ntypes, &combiner);
@@ -789,9 +967,32 @@ int ADIOI_Count_contiguous_blocks(MPI_Datatype datatype, int *curr_index)
}
}
break;
+
+ case MPI_COMBINER_RESIZED:
+ /* treat it as a struct with lb, type, ub */
+
+ /* add 2 for lb and ub */
+ (*curr_index) += 2;
+ count += 2;
+
+ /* add for datatype */
+ MPI_Type_get_envelope(types[0], &old_nints, &old_nadds,
+ &old_ntypes, &old_combiner);
+ ADIOI_Datatype_iscontig(types[0], &old_is_contig);
+
+ if ((old_combiner != MPI_COMBINER_NAMED) && (!old_is_contig)) {
+ count += ADIOI_Count_contiguous_blocks(types[0], curr_index);
+ }
+ else {
+ /* basic or contiguous type */
+ count++;
+ (*curr_index)++;
+ }
+ break;
+
default:
/* TODO: FIXME */
- FPRINTF(stderr, "Error: Unsupported datatype passed to ADIOI_Count_contiguous_blocks, combiner = %d\n", combiner);
+ DBG_FPRINTF(stderr, "Error: Unsupported datatype passed to ADIOI_Count_contiguous_blocks, combiner = %d\n", combiner);
MPI_Abort(MPI_COMM_WORLD, 1);
}
@@ -812,6 +1013,53 @@ int ADIOI_Count_contiguous_blocks(MPI_Datatype datatype, int *curr_index)
#endif /* HAVE_MPIR_TYPE_GET_CONTIG_BLOCKS */
}
+/* removezeros() make a second pass over the
+ * flattented type knocking out zero-length blocks, but leave first and last
+ * alone (they mark LB and UB) */
+
+static void removezeros(ADIOI_Flatlist_node *flat_type)
+{
+ int i,j,opt_blocks;
+ ADIO_Offset *opt_blocklens;
+ ADIO_Offset *opt_indices;
+
+ /* short-circuit: there is nothing to do if there are
+ * - 1 block: what can we remove?
+ * - 2 blocks: either both blocks are data (and not zero)
+ * or one block is the UB or LB */
+ if (flat_type->count <= 2) return;
+
+ opt_blocks = 2; /* LB and UB */
+ for (i=1; i < flat_type->count -1; i++) {
+ if(flat_type->blocklens[i] != 0)
+ opt_blocks++;
+ }
+ /* no optimization possible */
+ if (opt_blocks == flat_type->count) return;
+ opt_blocklens = (ADIO_Offset *) ADIOI_Malloc(opt_blocks * sizeof(ADIO_Offset));
+ opt_indices = (ADIO_Offset *)ADIOI_Malloc(opt_blocks*sizeof(ADIO_Offset));
+
+ /* fill in new blocklists, keeping first and last no matter what */
+ opt_blocklens[0] = flat_type->blocklens[0];
+ opt_indices[0] = flat_type->indices[0];
+ j = 1; /* always two entries: one for LB and UB ([0] and [j])*/
+ for (i=1; i< flat_type->count -1; i++) {
+ if( flat_type->blocklens[i] != 0) {
+ opt_indices[j] = flat_type->indices[i];
+ opt_blocklens[j] = flat_type->blocklens[i];
+ j++;
+ }
+ }
+ opt_indices[j] = flat_type->indices[flat_type->count -1];
+ opt_blocklens[j] = flat_type->blocklens[flat_type->count -1];
+
+ flat_type->count = opt_blocks;
+ ADIOI_Free(flat_type->blocklens);
+ ADIOI_Free(flat_type->indices);
+ flat_type->blocklens = opt_blocklens;
+ flat_type->indices = opt_indices;
+ return;
+}
/****************************************************************/
@@ -822,14 +1070,14 @@ int ADIOI_Count_contiguous_blocks(MPI_Datatype datatype, int *curr_index)
* contiguous operations).
*
* NOTE: a further optimization would be to remove zero length blocks. However,
- * we do not do this as parts of the code use the presence of zero length
- * blocks to indicate UB and LB.
+ * the first and last blocks must remain as zero length first or last block
+ * indicates UB and LB.
*
*/
void ADIOI_Optimize_flattened(ADIOI_Flatlist_node *flat_type)
{
int i, j, opt_blocks;
- int *opt_blocklens;
+ ADIO_Offset *opt_blocklens;
ADIO_Offset *opt_indices;
opt_blocks = 1;
@@ -844,7 +1092,7 @@ void ADIOI_Optimize_flattened(ADIOI_Flatlist_node *flat_type)
/* if we can't reduce the number of blocks, quit now */
if (opt_blocks == flat_type->count) return;
- opt_blocklens = (int *) ADIOI_Malloc(opt_blocks * sizeof(int));
+ opt_blocklens = (ADIO_Offset *) ADIOI_Malloc(opt_blocks * sizeof(ADIO_Offset));
opt_indices = (ADIO_Offset *)ADIOI_Malloc(opt_blocks*sizeof(ADIO_Offset));
/* fill in new blocklists */
@@ -866,6 +1114,7 @@ void ADIOI_Optimize_flattened(ADIOI_Flatlist_node *flat_type)
ADIOI_Free(flat_type->indices);
flat_type->blocklens = opt_blocklens;
flat_type->indices = opt_indices;
+ removezeros(flat_type);
return;
}
@@ -885,26 +1134,3 @@ void ADIOI_Delete_flattened(MPI_Datatype datatype)
ADIOI_Free(flat);
}
}
-
-/* ADIOI_Flatten_copy_type()
- * flat - pointer to flatlist node holding offset and lengths
- * start - starting index of src type in arrays
- * end - one larger than ending index of src type (makes loop clean)
- * offset_adjustment - amount to add to "indices" (offset) component
- * of each off/len pair copied
- */
-void ADIOI_Flatten_copy_type(ADIOI_Flatlist_node *flat,
- int old_type_start,
- int old_type_end,
- int new_type_start,
- ADIO_Offset offset_adjustment)
-{
- int i, out_index = new_type_start;
-
- for (i=old_type_start; i < old_type_end; i++) {
- flat->indices[out_index] = flat->indices[i] + offset_adjustment;
- flat->blocklens[out_index] = flat->blocklens[i];
- out_index++;
- }
-}
-
diff --git a/ompi/mca/io/romio/romio/adio/common/get_fp_posn.c b/ompi/mca/io/romio/romio/adio/common/get_fp_posn.c
index a5a422a7c7..1d7cab8c4b 100644
--- a/ompi/mca/io/romio/romio/adio/common/get_fp_posn.c
+++ b/ompi/mca/io/romio/romio/adio/common/get_fp_posn.c
@@ -7,6 +7,7 @@
#include "adio.h"
#include "adio_extern.h"
+#include "adioi.h"
/* returns the current position of the individual file pointer
in etype units relative to the current view. */
@@ -14,10 +15,11 @@
void ADIOI_Get_position(ADIO_File fd, ADIO_Offset *offset)
{
ADIOI_Flatlist_node *flat_file;
- int i, n_filetypes, flag, frd_size;
- int filetype_size, etype_size, filetype_is_contig;
+ int i, flag;
+ unsigned filetype_size;
+ int etype_size, filetype_is_contig;
MPI_Aint filetype_extent;
- ADIO_Offset disp, byte_offset, sum=0, size_in_file;
+ ADIO_Offset disp, byte_offset, sum=0, size_in_file, n_filetypes, frd_size;
ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
etype_size = fd->etype_size;
@@ -28,7 +30,7 @@ void ADIOI_Get_position(ADIO_File fd, ADIO_Offset *offset)
flat_file = ADIOI_Flatlist;
while (flat_file->type != fd->filetype) flat_file = flat_file->next;
- MPI_Type_size(fd->filetype, &filetype_size);
+ MPI_Type_size(fd->filetype, (int*)&filetype_size);
MPI_Type_extent(fd->filetype, &filetype_extent);
disp = fd->disp;
@@ -41,18 +43,18 @@ void ADIOI_Get_position(ADIO_File fd, ADIO_Offset *offset)
for (i=0; icount; i++) {
sum += flat_file->blocklens[i];
if (disp + flat_file->indices[i] +
- (ADIO_Offset) n_filetypes*filetype_extent + flat_file->blocklens[i]
+ n_filetypes* ADIOI_AINT_CAST_TO_OFFSET filetype_extent + flat_file->blocklens[i]
>= byte_offset) {
- frd_size = (int) (disp + flat_file->indices[i] +
- (ADIO_Offset) n_filetypes*filetype_extent
- + flat_file->blocklens[i] - byte_offset);
+ frd_size = disp + flat_file->indices[i] +
+ n_filetypes * ADIOI_AINT_CAST_TO_OFFSET filetype_extent
+ + flat_file->blocklens[i] - byte_offset;
sum -= frd_size;
flag = 1;
break;
}
}
}
- size_in_file = (ADIO_Offset) n_filetypes*filetype_size + sum;
+ size_in_file = n_filetypes * (ADIO_Offset)filetype_size + sum;
*offset = size_in_file/etype_size;
}
}
diff --git a/ompi/mca/io/romio/romio/adio/common/heap-sort.c b/ompi/mca/io/romio/romio/adio/common/heap-sort.c
new file mode 100644
index 0000000000..63b6c1c8b5
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/common/heap-sort.c
@@ -0,0 +1,133 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ *
+ * Copyright (C) 2008 University of Chicago.
+ * See COPYRIGHT notice in top-level directory.
+ */
+
+#include
+#include
+#include
+#include
+#include "heap-sort.h"
+
+#define NOEXP2
+
+static void heapify(heap_t *heap, int i);
+
+/* From Introduction To Algorithms by Cormen, Leiserson, and Rivest */
+
+static inline int parent(int i) {
+ return (i/2);
+}
+
+static inline int left(int i) {
+ return (2*i);
+}
+
+static inline int right(int i) {
+ return (2*i + 1);
+}
+
+int ADIOI_Heap_create(heap_t *heap, int size) {
+ heap->size = size;
+ heap->nodes = (heap_node_t *) ADIOI_Calloc (size, sizeof(heap_node_t));
+ if (heap->nodes == NULL)
+ return 1;
+ else
+ return 0;
+}
+
+void ADIOI_Heap_free(heap_t *heap) {
+ ADIOI_Free(heap->nodes);
+}
+
+/* should suppress unused warnings on GCC */
+static void build_heap(heap_t *heap) ATTRIBUTE((unused, used));
+
+static void build_heap(heap_t *heap)
+{
+ int i;
+ for (i=(heap->size/2-1); i >= 0; i--)
+ heapify(heap, i);
+}
+
+static void heapify(heap_t *heap, int i) {
+ int l, r, smallest;
+ heap_node_t *nodes;
+ heap_node_t tmp_node;
+
+ nodes = heap->nodes;
+
+ l = left(i);
+ r = right(i);
+
+ if ((l <= heap->size) && (nodes[l].offset < nodes[i].offset))
+ smallest = l;
+ else
+ smallest = i;
+
+ if ((r <= heap->size) && (nodes[r].offset < nodes[smallest].offset))
+ smallest = r;
+
+ if (smallest != i) {
+ tmp_node = nodes[i];
+ nodes[i] = nodes[smallest];
+ nodes[smallest] = tmp_node;
+ heapify(heap, smallest);
+ }
+}
+
+void ADIOI_Heap_insert(heap_t *heap, ADIO_Offset offset, int proc,
+ ADIO_Offset reg_max_len) {
+ heap_node_t *nodes;
+ int i;
+ nodes = heap->nodes;
+ i = ++heap->size - 1;
+ while ((i > 0) && (nodes[parent(i)].offset > offset)) {
+ nodes[i] = nodes[parent(i)];
+ i = parent(i);
+ }
+ nodes[i].offset = offset;
+ nodes[i].proc = proc;
+ nodes[i].reg_max_len = reg_max_len;
+}
+
+void ADIOI_Heap_extract_min(heap_t *heap, ADIO_Offset* offset, int *proc,
+ ADIO_Offset *reg_max_len) {
+ heap_node_t *nodes;
+ nodes = heap->nodes;
+
+ assert (heap->size > 0);
+ *offset = nodes[0].offset;
+ *proc = nodes[0].proc;
+ *reg_max_len = nodes[0].reg_max_len;
+ nodes[0] = nodes[heap->size-1];
+ heap->size--;
+ heapify(heap, 0);
+}
+
+/* should suppress unused warnings on GCC */
+static void print_heap(heap_t *heap) ATTRIBUTE((unused, used));
+
+static void print_heap(heap_t *heap)
+{
+#ifndef NOEXP2
+ int i;
+ double level = 0;
+ int next_level_idx = 1;
+
+ printf ("heap->size = %d\n", heap->size);
+ printf ("offsets:\n");
+ for (i=0; i < heap->size; i++) {
+ printf ("%lld ", heap->nodes[i].offset);
+
+ if ((i+1) == next_level_idx) {
+ printf ("\n");
+ next_level_idx += (int) exp2(level+1);
+ level++;
+ }
+ }
+ printf ("\n");
+#endif
+}
diff --git a/ompi/mca/io/romio/romio/adio/common/iscontig.c b/ompi/mca/io/romio/romio/adio/common/iscontig.c
index a96a8d8808..1b47035cc6 100644
--- a/ompi/mca/io/romio/romio/adio/common/iscontig.c
+++ b/ompi/mca/io/romio/romio/adio/common/iscontig.c
@@ -60,7 +60,8 @@ void ADIOI_Datatype_iscontig(MPI_Datatype datatype, int *flag)
#elif defined(OMPI_BUILDING) && OMPI_BUILDING
-/* This function is included in Open MPI source code */
+/* void ADIOI_Datatype_iscontig(MPI_Datatype datatype, int *flag) is defined
+ * and implemented in OpenMPI itself */
#else
diff --git a/ompi/mca/io/romio/romio/adio/common/lock.c b/ompi/mca/io/romio/romio/adio/common/lock.c
index d21ec4d0ed..fda652c524 100644
--- a/ompi/mca/io/romio/romio/adio/common/lock.c
+++ b/ompi/mca/io/romio/romio/adio/common/lock.c
@@ -93,7 +93,7 @@ int ADIOI_Set_lock(FDTYPE fd, int cmd, int type, ADIO_Offset offset, int whence,
int ADIOI_Set_lock(FDTYPE fd, int cmd, int type, ADIO_Offset offset, int whence,
ADIO_Offset len)
{
- int err, error_code;
+ int err, error_code, err_count = 0, sav_errno;
struct flock lock;
if (len == 0) return MPI_SUCCESS;
@@ -120,16 +120,42 @@ int ADIOI_Set_lock(FDTYPE fd, int cmd, int type, ADIO_Offset offset, int whence,
lock.l_len = len;
#endif
+ sav_errno = errno; /* save previous errno in case we recover from retryable errors */
errno = 0;
do {
err = fcntl(fd, cmd, &lock);
- } while (err && (errno == EINTR));
+#ifdef USE_DBG_LOGGING
+/* if (MPIU_DBG_SELECTED(ROMIO,TERSE)) */
+ {
+ if (err && ((errno == EINTR) || (errno == EINPROGRESS)))
+ {
+ if((err_count < 5) || (err_count > 9995))
+ {
+ fprintf(stderr, "File locking failed in ADIOI_Set_lock(fd %#X,cmd %s/%#X,type %s/%#X,whence %#X) with return value %#X and errno %#X. Retry (%d).\n",
+ fd,
+ ((cmd == F_GETLK )? "F_GETLK" :
+ ((cmd == F_SETLK )? "F_SETLK" :
+ ((cmd == F_SETLKW )? "F_SETLKW" : "UNEXPECTED"))),
+ cmd,
+ ((type == F_RDLCK )? "F_RDLCK" :
+ ((type == F_WRLCK )? "F_WRLCK" :
+ ((type == F_UNLCK )? "F_UNLOCK" : "UNEXPECTED"))),
+ type,
+ whence, err, errno, err_count);
+ perror("ADIOI_Set_lock:");
+ fprintf(stderr,"ADIOI_Set_lock:offset %#llx, length %#llx\n",(unsigned long long)offset, (unsigned long long)len);
+ }
+ }
+ }
+#endif
+ } while (err && ((errno == EINTR) || ((errno == EINPROGRESS) && (++err_count < 10000))));
if (err && (errno != EBADF)) {
/* FIXME: This should use the error message system,
especially for MPICH2 */
FPRINTF(stderr, "File locking failed in ADIOI_Set_lock(fd %X,cmd %s/%X,type %s/%X,whence %X) with return value %X and errno %X.\n"
- "If the file system is NFS, you need to use NFS version 3, ensure that the lockd daemon is running on all the machines, and mount the directory with the 'noac' option (no attribute caching).\n",
+ "- If the file system is NFS, you need to use NFS version 3, ensure that the lockd daemon is running on all the machines, and mount the directory with the 'noac' option (no attribute caching).\n"
+ "- If the file system is LUSTRE, ensure that the directory is mounted with the 'flock' option.\n",
fd,
((cmd == F_GETLK )? "F_GETLK" :
((cmd == F_SETLK )? "F_SETLK" :
@@ -145,6 +171,9 @@ int ADIOI_Set_lock(FDTYPE fd, int cmd, int type, ADIO_Offset offset, int whence,
MPI_Abort(MPI_COMM_WORLD, 1);
}
+ if(!err) /* report fcntl failure errno's (EBADF), otherwise */
+ errno = sav_errno; /* restore previous errno in case we recovered from retryable errors */
+
error_code = (err == 0) ? MPI_SUCCESS : MPI_ERR_UNKNOWN;
return error_code;
}
diff --git a/ompi/mca/io/romio/romio/adio/common/malloc.c b/ompi/mca/io/romio/romio/adio/common/malloc.c
index ee9546528f..55306842bd 100644
--- a/ompi/mca/io/romio/romio/adio/common/malloc.c
+++ b/ompi/mca/io/romio/romio/adio/common/malloc.c
@@ -14,16 +14,11 @@
Later on, add some tracing and error checking, similar to
MPID_trmalloc. */
-/* can't include adio.h here, because of the macro, so
- * include romioconf.h to make sure config-time defines get included */
-
-#include "romioconf.h"
+#include "adio.h"
#include "mpi.h"
#include
#include
#include "mpipr.h"
-/* Open MPI: This seemes to have been missing */
-#include "adio.h"
#ifdef HAVE_MALLOC_H
#include
@@ -35,66 +30,84 @@
/* style: allow:calloc:1 sig:0 */
/* style: allow:realloc:1 sig:0 */
-
#define FPRINTF fprintf
-void *ADIOI_Malloc_fn(size_t size, int lineno, char *fname);
-void *ADIOI_Calloc_fn(size_t nelem, size_t elsize, int lineno, char *fname);
-void *ADIOI_Realloc_fn(void *ptr, size_t size, int lineno, char *fname);
-void ADIOI_Free_fn(void *ptr, int lineno, char *fname);
-void *ADIOI_Malloc_fn(size_t size, int lineno, char *fname)
+void *ADIOI_Malloc_fn(size_t size, int lineno, const char *fname);
+void *ADIOI_Calloc_fn(size_t nelem, size_t elsize, int lineno, const char *fname);
+void *ADIOI_Realloc_fn(void *ptr, size_t size, int lineno, const char *fname);
+void ADIOI_Free_fn(void *ptr, int lineno, const char *fname);
+
+void *ADIOI_Malloc_fn(size_t size, int lineno, const char *fname)
{
void *new;
#ifdef ROMIO_XFS
new = (void *) memalign(XFS_MEMALIGN, size);
+#else
+#ifdef HAVE_MPIU_FUNCS
+ new = (void *) MPIU_Malloc(size);
#else
new = (void *) malloc(size);
+#endif
#endif
if (!new) {
FPRINTF(stderr, "Out of memory in file %s, line %d\n", fname, lineno);
MPI_Abort(MPI_COMM_WORLD, 1);
}
-
+ DBG_FPRINTF(stderr, "ADIOI_Malloc %s:<%d> %p (%#zX)\n", fname, lineno, new, size);
return new;
}
-void *ADIOI_Calloc_fn(size_t nelem, size_t elsize, int lineno, char *fname)
+void *ADIOI_Calloc_fn(size_t nelem, size_t elsize, int lineno, const char *fname)
{
void *new;
+#ifdef HAVE_MPIU_FUNCS
+ new = (void *) MPIU_Calloc(nelem, elsize);
+#else
new = (void *) calloc(nelem, elsize);
+#endif
if (!new) {
FPRINTF(stderr, "Out of memory in file %s, line %d\n", fname, lineno);
MPI_Abort(MPI_COMM_WORLD, 1);
}
-
+ DBG_FPRINTF(stderr, "ADIOI_Calloc %s:<%d> %p\n", fname, lineno, new);
return new;
}
-void *ADIOI_Realloc_fn(void *ptr, size_t size, int lineno, char *fname)
+void *ADIOI_Realloc_fn(void *ptr, size_t size, int lineno, const char *fname)
{
void *new;
+#ifdef HAVE_MPIU_FUNCS
+ new = (void *) MPIU_Realloc(ptr, size);
+#else
new = (void *) realloc(ptr, size);
+#endif
if (!new) {
FPRINTF(stderr, "realloc failed in file %s, line %d\n", fname, lineno);
MPI_Abort(MPI_COMM_WORLD, 1);
}
+ DBG_FPRINTF(stderr, "ADIOI_Realloc %s:<%d> %p\n", fname, lineno, new);
return new;
}
-void ADIOI_Free_fn(void *ptr, int lineno, char *fname)
+void ADIOI_Free_fn(void *ptr, int lineno, const char *fname)
{
+ DBG_FPRINTF(stderr, "ADIOI_Free %s:<%d> %p\n", fname, lineno, ptr);
if (!ptr) {
FPRINTF(stderr, "Attempt to free null pointer in file %s, line %d\n", fname, lineno);
MPI_Abort(MPI_COMM_WORLD, 1);
}
+#ifdef HAVE_MPIU_FUNCS
+ MPIU_Free(ptr);
+#else
free(ptr);
+#endif
}
diff --git a/ompi/mca/io/romio/romio/adio/common/system_hints.c b/ompi/mca/io/romio/romio/adio/common/system_hints.c
index 361f16addb..bd01d3b005 100644
--- a/ompi/mca/io/romio/romio/adio/common/system_hints.c
+++ b/ompi/mca/io/romio/romio/adio/common/system_hints.c
@@ -40,6 +40,28 @@
#define ROMIO_HINT_DEFAULT_CFG "/etc/romio-hints"
#define ROMIO_HINT_ENV_VAR "ROMIO_HINTS"
+ /* should suppress unused warnings on GCC */
+static void dump_keys(MPI_Info info) ATTRIBUTE((unused, used));
+
+/* debug function: a routine I want in the library to make my life easier when
+ * using a source debugger. please ignore any "defined but not used" warnings
+ */
+static void dump_keys(MPI_Info info)
+{
+ int i, nkeys, flag;
+ char key[MPI_MAX_INFO_KEY];
+ char value[MPI_MAX_INFO_VAL];
+
+ MPI_Info_get_nkeys(info, &nkeys);
+
+ for (i=0; ifns->ADIOI_xxx_OpenColl))(fd, rank, access_mode, error_code)
+
#define ADIO_ReadContig(fd,buf,count,datatype,file_ptr_type,offset,status,error_code) \
(*(fd->fns->ADIOI_xxx_ReadContig))(fd,buf,count,datatype,file_ptr_type,offset,status,error_code)
@@ -269,19 +287,31 @@ struct ADIOI_Fns_struct {
#define ADIO_SetInfo(fd, users_info, error_code) \
(*(fd->fns->ADIOI_xxx_SetInfo))(fd, users_info, error_code)
+#define ADIO_Feature(fd, flag) \
+ (*(fd->fns->ADIOI_xxx_Feature))(fd, flag)
+
/* structure for storing access info of this process's request
from the file domain of other processes, and vice-versa. used
as array of structures indexed by process number. */
typedef struct {
ADIO_Offset *offsets; /* array of offsets */
- int *lens; /* array of lengths */
+ int *lens; /* array of lengths */
+ /* consider aints or offsets for lens? Seems to be used as in-memory
+ buffer lengths, so it should be < 2G and ok as an int */
MPI_Aint *mem_ptrs; /* array of pointers. used in the read/write
phase to indicate where the data
is stored in memory */
int count; /* size of above arrays */
} ADIOI_Access;
+/* structure for storing generic offset/length pairs. used to describe
+ file realms among other things */
+typedef struct {
+ ADIO_Offset *offsets; /* array of offsets */
+ int *lens; /* array of lengths */
+ int count; /* size of above arrays */
+} ADIOI_Offlen;
/* prototypes for ADIO internal functions */
@@ -292,21 +322,29 @@ void ADIOI_Flatten(MPI_Datatype type, ADIOI_Flatlist_node *flat,
void ADIOI_Delete_flattened(MPI_Datatype datatype);
int ADIOI_Count_contiguous_blocks(MPI_Datatype type, int *curr_index);
void ADIOI_Complete_async(int *error_code);
-void *ADIOI_Malloc_fn(size_t size, int lineno, char *fname);
-void *ADIOI_Calloc_fn(size_t nelem, size_t elsize, int lineno, char *fname);
-void *ADIOI_Realloc_fn(void *ptr, size_t size, int lineno, char *fname);
-void ADIOI_Free_fn(void *ptr, int lineno, char *fname);
+void *ADIOI_Malloc_fn(size_t size, int lineno, const char *fname);
+void *ADIOI_Calloc_fn(size_t nelem, size_t elsize, int lineno, const char *fname);
+void *ADIOI_Realloc_fn(void *ptr, size_t size, int lineno, const char *fname);
+void ADIOI_Free_fn(void *ptr, int lineno, const char *fname);
void ADIOI_Datatype_iscontig(MPI_Datatype datatype, int *flag);
void ADIOI_Get_position(ADIO_File fd, ADIO_Offset *offset);
void ADIOI_Get_eof_offset(ADIO_File fd, ADIO_Offset *eof_offset);
void ADIOI_Get_byte_offset(ADIO_File fd, ADIO_Offset offset,
ADIO_Offset *disp);
void ADIOI_process_system_hints(MPI_Info info);
+void ADIOI_incorporate_system_hints(MPI_Info info, MPI_Info sysinfo,
+ MPI_Info *new_info);
void ADIOI_GEN_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct,
int *error_code);
void ADIOI_GEN_Flush(ADIO_File fd, int *error_code);
+void ADIOI_GEN_OpenColl(ADIO_File fd, int rank,
+ int access_mode, int *error_code);
+void ADIOI_SCALEABLE_OpenColl(ADIO_File fd, int rank,
+ int access_mode, int *error_code);
+void ADIOI_FAILSAFE_OpenColl(ADIO_File fd, int rank,
+ int access_mode, int *error_code);
void ADIOI_GEN_Delete(char *filename, int *error_code);
void ADIOI_GEN_ReadContig(ADIO_File fd, void *buf, int count,
MPI_Datatype datatype, int file_ptr_type,
@@ -347,6 +385,8 @@ int ADIOI_GEN_aio_wait_fn(int count, void **array_of_states, double timeout,
ADIO_Status *status);
int ADIOI_GEN_aio_query_fn(void *extra_state, ADIO_Status *status);
int ADIOI_GEN_aio_free_fn(void *extra_state);
+int ADIOI_GEN_Feature(ADIO_File fd, int feature);
+
void ADIOI_GEN_ReadStrided_naive(ADIO_File fd, void *buf, int count,
MPI_Datatype buftype, int file_ptr_type,
ADIO_Offset offset, ADIO_Status *status, int
@@ -373,7 +413,7 @@ void ADIOI_GEN_WriteStridedColl(ADIO_File fd, void *buf, int count,
*error_code);
void ADIOI_Calc_my_off_len(ADIO_File fd, int bufcount, MPI_Datatype
datatype, int file_ptr_type, ADIO_Offset
- offset, ADIO_Offset **offset_list_ptr, int
+ offset, ADIO_Offset **offset_list_ptr, ADIO_Offset
**len_list_ptr, ADIO_Offset *start_offset_ptr,
ADIO_Offset *end_offset_ptr, int
*contig_access_count_ptr);
@@ -381,7 +421,9 @@ void ADIOI_Calc_file_domains(ADIO_Offset *st_offsets, ADIO_Offset
*end_offsets, int nprocs, int nprocs_for_coll,
ADIO_Offset *min_st_offset_ptr,
ADIO_Offset **fd_start_ptr, ADIO_Offset
- **fd_end_ptr, ADIO_Offset *fd_size_ptr);
+ **fd_end_ptr, int min_fd_size,
+ ADIO_Offset *fd_size_ptr,
+ int striping_unit);
int ADIOI_Calc_aggregator(ADIO_File fd,
ADIO_Offset off,
ADIO_Offset min_off,
@@ -390,7 +432,7 @@ int ADIOI_Calc_aggregator(ADIO_File fd,
ADIO_Offset *fd_start,
ADIO_Offset *fd_end);
void ADIOI_Calc_my_req(ADIO_File fd, ADIO_Offset *offset_list,
- int *len_list, int
+ ADIO_Offset *len_list, int
contig_access_count, ADIO_Offset
min_st_offset, ADIO_Offset *fd_start,
ADIO_Offset *fd_end, ADIO_Offset fd_size,
@@ -405,6 +447,107 @@ void ADIOI_Calc_others_req(ADIO_File fd, int count_my_req_procs,
int nprocs, int myrank,
int *count_others_req_procs_ptr,
ADIOI_Access **others_req_ptr);
+
+/* KC && AC - New Collective I/O internals*/
+
+#define TEMP_OFF 0
+#define REAL_OFF 1
+#define MAX_OFF_TYPE 2
+
+/* Communication Tags */
+#define DATA_TAG 30
+#define AMT_TAG 31
+
+/* cb_fr_type user size is non-zero */
+#define ADIOI_FR_AAR 0
+#define ADIOI_FR_FSZ -1
+#define ADIOI_FR_USR_REALMS -2
+
+typedef struct flatten_state
+{
+ ADIO_Offset abs_off;
+ ADIO_Offset cur_sz;
+ ADIO_Offset idx;
+ ADIO_Offset cur_reg_off;
+} flatten_state;
+
+typedef struct view_state
+{
+ ADIO_Offset fp_ind; /* file view params*/
+ ADIO_Offset disp; /* file view params*/
+ ADIO_Offset byte_off;
+ ADIO_Offset sz;
+ ADIO_Offset ext; /* preserved extent from MPI_Type_extent */
+ ADIO_Offset type_sz;
+
+ /* Current state */
+ flatten_state cur_state;
+ /* Scratch state for counting up ol pairs */
+ flatten_state tmp_state;
+
+ /* Preprocessed data amount and ol pairs */
+ ADIO_Offset pre_sz;
+ int pre_ol_ct;
+ MPI_Aint *pre_disp_arr;
+ int *pre_blk_arr;
+
+ ADIOI_Flatlist_node *flat_type_p;
+} view_state;
+
+void ADIOI_Calc_bounds (ADIO_File fd, int count, MPI_Datatype buftype,
+ int file_ptr_type, ADIO_Offset offset,
+ ADIO_Offset *st_offset, ADIO_Offset *end_offset);
+int ADIOI_Agg_idx (int rank, ADIO_File fd);
+void ADIOI_Calc_file_realms (ADIO_File fd, ADIO_Offset min_st_offset,
+ ADIO_Offset max_end_offset);
+void ADIOI_IOFiletype(ADIO_File fd, void *buf, int count,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, MPI_Datatype custom_ftype,
+ int rdwr, ADIO_Status *status, int
+ *error_code);
+void ADIOI_IOStridedColl(ADIO_File fd, void *buf, int count, int rdwr,
+ MPI_Datatype datatype, int file_ptr_type,
+ ADIO_Offset offset, ADIO_Status *status, int
+ *error_code);
+void ADIOI_Print_flatlist_node(ADIOI_Flatlist_node *flatlist_node_p);
+ADIOI_Flatlist_node * ADIOI_Add_contig_flattened(MPI_Datatype contig_type);
+void ADIOI_Exch_file_views(int myrank, int nprocs, int file_ptr_type,
+ ADIO_File fd, int count,
+ MPI_Datatype datatype, ADIO_Offset off,
+ view_state *my_mem_view_state_arr,
+ view_state *agg_file_view_state_arr,
+ view_state *client_file_view_state_arr);
+int ADIOI_init_view_state(int file_ptr_type,
+ int nprocs,
+ view_state *view_state_arr,
+ int op_type);
+int ADIOI_Build_agg_reqs(ADIO_File fd, int rw_type, int nprocs,
+ view_state *client_file_view_state_arr,
+ MPI_Datatype *client_comm_dtype_arr,
+ ADIO_Offset *client_comm_sz_arr,
+ ADIO_Offset *agg_dtype_offset_p,
+ MPI_Datatype *agg_dtype_p);
+int ADIOI_Build_client_reqs(ADIO_File fd,
+ int nprocs,
+ view_state *my_mem_view_state_arr,
+ view_state *agg_file_view_state_arr,
+ ADIO_Offset *agg_comm_sz_arr,
+ MPI_Datatype *agg_comm_dtype_arr);
+int ADIOI_Build_client_pre_req(ADIO_File fd,
+ int agg_rank,
+ int agg_idx,
+ view_state *my_mem_view_state_p,
+ view_state *agg_file_view_state_p,
+ ADIO_Offset max_pre_req_sz,
+ int max_ol_ct);
+int ADIOI_Build_client_req(ADIO_File fd,
+ int agg_rank,
+ int agg_idx,
+ view_state *my_mem_view_state_p,
+ view_state *agg_file_view_state_p,
+ ADIO_Offset agg_comm_sz,
+ MPI_Datatype *agg_comm_dtype_p);
+
ADIO_Offset ADIOI_GEN_SeekIndividual(ADIO_File fd, ADIO_Offset offset,
int whence, int *error_code);
void ADIOI_GEN_Resize(ADIO_File fd, ADIO_Offset size, int *error_code);
@@ -585,6 +728,23 @@ int ADIOI_Strncpy( char *outstr, const char *instr, size_t maxlen );
int ADIOI_Strnapp( char *, const char *, size_t );
char *ADIOI_Strdup( const char * );
+/* the current MPI standard is not const-correct, and modern compilers warn
+ * about the following sort of code:
+ *
+ * MPI_Info_set(info, "key", "val");
+ *
+ * reminding us that "key" and "val" are const. We use the following macros to
+ * cast away the const and suppress the warning. */
+#define ADIOI_Info_set(info_,key_str_,val_) \
+ MPI_Info_set((info_),((char*)key_str_),(char*)(val_))
+#define ADIOI_Info_get(info_,key_str_,val_len_,val_,flag_) \
+ MPI_Info_get((info_),((char*)key_str_),(val_len_),(val_),(flag_))
+#define ADIOI_Info_get_valuelen(info_,key_str_,val_len_,flag_) \
+ MPI_Info_get_valuelen((info_),((char*)key_str_),(val_len_),(flag_))
+#define ADIOI_Info_delete(info_,key_str_) \
+ MPI_Info_delete((info_),((char*)key_str_))
+
+
/* Provide a fallback snprintf for systems that do not have one */
/* Define attribute as empty if it has no definition */
#ifndef ATTRIBUTE
@@ -644,7 +804,55 @@ int ADIOI_MPE_unlock_a;
int ADIOI_MPE_unlock_b;
int ADIOI_MPE_postwrite_a;
int ADIOI_MPE_postwrite_b;
+int ADIOI_MPE_openinternal_a;
+int ADIOI_MPE_openinternal_b;
+int ADIOI_MPE_stat_a;
+int ADIOI_MPE_stat_b;
#endif
+#ifdef ROMIO_INSIDE_MPICH2
+/* Assert that this MPI_Aint value can be cast to a ptr value without problem.*/
+/* Basic idea is the value should be unchanged after casting
+ (no loss of (meaningful) high order bytes in 8 byte MPI_Aint
+ to (possible) 4 byte ptr cast) */
+/* Should work even on 64bit or old 32bit configs */
+ /* Use MPID_Ensure_Aint_fits_in_pointer from mpiutil.h and
+ MPI_AINT_CAST_TO_VOID_PTR from configure (mpi.h) */
+ #include "mpiimpl.h"
+
+ #define ADIOI_AINT_CAST_TO_VOID_PTR (void*)(MPIR_Pint)
+ /* The next two casts are only used when you don't want sign extension
+ when casting a (possible 4 byte) aint to a (8 byte) long long or offset */
+ #define ADIOI_AINT_CAST_TO_LONG_LONG (long long)
+ #define ADIOI_AINT_CAST_TO_OFFSET ADIOI_AINT_CAST_TO_LONG_LONG
+
+ #define ADIOI_ENSURE_AINT_FITS_IN_PTR(aint_value) MPID_Ensure_Aint_fits_in_pointer(aint_value)
+ #define ADIOI_Assert MPIU_Assert
+#else
+ #include
+ #define ADIOI_AINT_CAST_TO_VOID_PTR (void*)
+ #define ADIOI_AINT_CAST_TO_LONG_LONG (long long)
+ #define ADIOI_AINT_CAST_TO_OFFSET ADIOI_AINT_CAST_TO_LONG_LONG
+ #define ADIOI_ENSURE_AINT_FITS_IN_PTR(aint_value)
+ #define ADIOI_Assert assert
+ #define MPIR_Upint unsigned int
+ #define MPIU_THREADPRIV_DECL
+#endif
+
+#ifdef USE_DBG_LOGGING /*todo fix dependency on mpich?*/
+/* DBGT_FPRINTF terse level printing */
+#define DBGT_FPRINTF if (MPIU_DBG_SELECTED(ROMIO,VERBOSE)) fprintf(stderr,"%s:%d:",__FILE__,__LINE__); \
+if (MPIU_DBG_SELECTED(ROMIO,TERSE)) fprintf
+/* DBG_FPRINTF default (typical level) printing */
+#define DBG_FPRINTF if (MPIU_DBG_SELECTED(ROMIO,VERBOSE)) fprintf(stderr,"%s:%d:",__FILE__,__LINE__); \
+if (MPIU_DBG_SELECTED(ROMIO,TYPICAL)) fprintf
+/* DBGV_FPRINTF verbose level printing */
+#define DBGV_FPRINTF if (MPIU_DBG_SELECTED(ROMIO,VERBOSE)) fprintf(stderr,"%s:%d:",__FILE__,__LINE__); \
+ if (MPIU_DBG_SELECTED(ROMIO,VERBOSE)) fprintf
+#else /* compile it out */
+#define DBGT_FPRINTF if (0) fprintf
+#define DBG_FPRINTF if (0) fprintf
+#define DBGV_FPRINTF if (0) fprintf
+#endif
#endif
diff --git a/ompi/mca/io/romio/romio/adio/include/adioi_errmsg.h b/ompi/mca/io/romio/romio/adio/include/adioi_errmsg.h
index e17c1b01ca..79f9c9e1cd 100644
--- a/ompi/mca/io/romio/romio/adio/include/adioi_errmsg.h
+++ b/ompi/mca/io/romio/romio/adio/include/adioi_errmsg.h
@@ -1,6 +1,5 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/* $Id: adioi_errmsg.h,v 1.5 2005/05/23 23:27:49 rross Exp $
- *
+/*
* (C) 2001 by Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
diff --git a/ompi/mca/io/romio/romio/adio/include/adioi_error.h b/ompi/mca/io/romio/romio/adio/include/adioi_error.h
index 448acf340d..d7c3ad233a 100644
--- a/ompi/mca/io/romio/romio/adio/include/adioi_error.h
+++ b/ompi/mca/io/romio/romio/adio/include/adioi_error.h
@@ -1,6 +1,5 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/* $Id: adioi_error.h,v 1.12 2006/01/05 23:53:58 robl Exp $
- *
+/*
* (C) 2001 by Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
@@ -33,6 +32,17 @@ if (count < 0) { \
goto fn_exit; \
}
+#define MPIO_CHECK_COUNT_SIZE(fh, count, datatype_size, myname, error_code) \
+if (count*datatype_size != (ADIO_Offset)(unsigned)count*(ADIO_Offset)(unsigned)datatype_size) { \
+ error_code = MPIO_Err_create_code(MPI_SUCCESS, \
+ MPIR_ERR_RECOVERABLE, \
+ myname, __LINE__, \
+ MPI_ERR_ARG, \
+ "**iobadcount", 0); \
+ error_code = MPIO_Err_return_file(fh, error_code); \
+ goto fn_exit; \
+}
+
#define MPIO_CHECK_DATATYPE(fh, datatype, myname, error_code) \
if (datatype == MPI_DATATYPE_NULL) { \
error_code = MPIO_Err_create_code(MPI_SUCCESS, \
diff --git a/ompi/mca/io/romio/romio/adio/include/adioi_fs_proto.h b/ompi/mca/io/romio/romio/adio/include/adioi_fs_proto.h
index 2fc7f7f0f8..88c3a838fc 100644
--- a/ompi/mca/io/romio/romio/adio/include/adioi_fs_proto.h
+++ b/ompi/mca/io/romio/romio/adio/include/adioi_fs_proto.h
@@ -89,4 +89,9 @@ extern struct ADIOI_Fns_struct ADIO_BGLOCKLESS_operations;
extern struct ADIOI_Fns_struct ADIO_GRIDFTP_operations;
#endif
+#ifdef ROMIO_ZOIDFS
+/* prototypes are in adio/ad_zoidfs/ad_zoidfs.h */
+extern struct ADIOI_Fns_struct ADIO_ZOIDFS_operations;
+#endif
+
#endif
diff --git a/ompi/mca/io/romio/romio/adio/include/heap-sort.h b/ompi/mca/io/romio/romio/adio/include/heap-sort.h
new file mode 100644
index 0000000000..903a1e8060
--- /dev/null
+++ b/ompi/mca/io/romio/romio/adio/include/heap-sort.h
@@ -0,0 +1,22 @@
+#include "adio.h"
+
+typedef struct {
+ ADIO_Offset offset;
+ int proc;
+ ADIO_Offset reg_max_len;
+} heap_node_t;
+
+typedef struct {
+ heap_node_t *nodes;
+ int size;
+} heap_t;
+
+/*static inline int parent(heap_t *heap, int i);
+static inline int left(heap_t *heap, int i);
+static inline int right(heap_t *heap, int i); */
+void ADIOI_Heap_free(heap_t *heap);
+int ADIOI_Heap_create(heap_t *heap, int size);
+void ADIOI_Heap_insert(heap_t *heap, ADIO_Offset offset, int proc,
+ ADIO_Offset reg_max_len);
+void ADIOI_Heap_extract_min(heap_t *heap, ADIO_Offset* key, int *proc,
+ ADIO_Offset *reg_max_len);
diff --git a/ompi/mca/io/romio/romio/adio/include/mpio_error.h b/ompi/mca/io/romio/romio/adio/include/mpio_error.h
index 66c7a10433..2a5e524cfa 100644
--- a/ompi/mca/io/romio/romio/adio/include/mpio_error.h
+++ b/ompi/mca/io/romio/romio/adio/include/mpio_error.h
@@ -1,6 +1,5 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/* $Id: mpio_error.h,v 1.6 2005/05/23 23:27:50 rross Exp $
- *
+/*
* (C) 2001 by Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
diff --git a/ompi/mca/io/romio/romio/adio/include/mpipr.h b/ompi/mca/io/romio/romio/adio/include/mpipr.h
index f14fe22f23..a609f7bf45 100644
--- a/ompi/mca/io/romio/romio/adio/include/mpipr.h
+++ b/ompi/mca/io/romio/romio/adio/include/mpipr.h
@@ -24,6 +24,8 @@
#define MPI_Alltoall PMPI_Alltoall
#undef MPI_Alltoallv
#define MPI_Alltoallv PMPI_Alltoallv
+#undef MPI_Alltoallw
+#define MPI_Alltoallw PMPI_Alltoallw
#undef MPI_Attr_delete
#define MPI_Attr_delete PMPI_Attr_delete
#undef MPI_Attr_get
@@ -150,26 +152,6 @@
#define MPI_Group_union PMPI_Group_union
#undef MPI_Ibsend
#define MPI_Ibsend PMPI_Ibsend
-#if 0
-#undef MPI_Info_create
-#define MPI_Info_create PMPI_Info_create
-#undef MPI_Info_delete
-#define MPI_Info_delete PMPI_Info_delete
-#undef MPI_Info_dup
-#define MPI_Info_dup PMPI_Info_dup
-#undef MPI_Info_free
-#define MPI_Info_free PMPI_Info_free
-#undef MPI_Info_get
-#define MPI_Info_get PMPI_Info_get
-#undef MPI_Info_get_nkeys
-#define MPI_Info_get_nkeys PMPI_Info_get_nkeys
-#undef MPI_Info_get_nthkey
-#define MPI_Info_get_nthkey PMPI_Info_get_nthkey
-#undef MPI_Info_get_valuelen
-#define MPI_Info_get_valuelen PMPI_Info_get_valuelen
-#undef MPI_Info_set
-#define MPI_Info_set PMPI_Info_set
-#endif /* only conditionally set the info */
#undef MPI_Init
#define MPI_Init PMPI_Init
#undef MPI_Initialized
@@ -392,4 +374,13 @@
#define MPI_File_f2c PMPI_File_f2c
#endif
+#undef MPI_Type_get_attr
+#define MPI_Type_get_attr PMPI_Type_get_attr
+#undef MPI_Type_set_attr
+#define MPI_Type_set_attr PMPI_Type_set_attr
+#undef MPI_Comm_set_attr
+#define MPI_Comm_set_attr PMPI_Comm_set_attr
+#undef MPI_Type_create_keyval
+#define MPI_Type_create_keyval PMPI_Type_create_keyval
+
#endif
diff --git a/ompi/mca/io/romio/romio/autogen.sh b/ompi/mca/io/romio/romio/autogen.sh
new file mode 100644
index 0000000000..458232464a
--- /dev/null
+++ b/ompi/mca/io/romio/romio/autogen.sh
@@ -0,0 +1,2 @@
+:
+autoreconf -ivf -I confdb
diff --git a/ompi/mca/io/romio/romio/common/dataloop/.state-cache b/ompi/mca/io/romio/romio/common/dataloop/.state-cache
deleted file mode 100644
index 37fee8c743..0000000000
--- a/ompi/mca/io/romio/romio/common/dataloop/.state-cache
+++ /dev/null
@@ -1,64 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/ompi/mca/io/romio/romio/common/dataloop/darray_support.c b/ompi/mca/io/romio/romio/common/dataloop/darray_support.c
index da1270ac06..6a7d323429 100644
--- a/ompi/mca/io/romio/romio/common/dataloop/darray_support.c
+++ b/ompi/mca/io/romio/romio/common/dataloop/darray_support.c
@@ -34,7 +34,7 @@ int PREPEND_PREFIX(Type_convert_darray)(int size,
int procs, tmp_rank, i, tmp_size, blklens[3], *coords;
MPI_Aint *st_offsets, orig_extent, disps[3];
- PMPI_Type_extent(oldtype, &orig_extent);
+ MPI_Type_extent(oldtype, &orig_extent);
/* calculate position in Cartesian grid as MPI would (row-major
ordering) */
@@ -78,7 +78,7 @@ int PREPEND_PREFIX(Type_convert_darray)(int size,
st_offsets+i);
break;
}
- if (i) PMPI_Type_free(&type_old);
+ if (i) MPI_Type_free(&type_old);
type_old = type_new;
}
@@ -116,7 +116,7 @@ int PREPEND_PREFIX(Type_convert_darray)(int size,
type_old, &type_new, st_offsets+i);
break;
}
- if (i != ndims-1) PMPI_Type_free(&type_old);
+ if (i != ndims-1) MPI_Type_free(&type_old);
type_old = type_new;
}
@@ -140,9 +140,9 @@ int PREPEND_PREFIX(Type_convert_darray)(int size,
types[1] = type_new;
types[2] = MPI_UB;
- PMPI_Type_struct(3, blklens, disps, types, newtype);
+ MPI_Type_struct(3, blklens, disps, types, newtype);
- PMPI_Type_free(&type_new);
+ MPI_Type_free(&type_new);
DLOOP_Free(st_offsets);
DLOOP_Free(coords);
return MPI_SUCCESS;
@@ -187,18 +187,18 @@ static int MPIOI_Type_block(int *array_of_gsizes, int dim, int ndims, int nprocs
stride = orig_extent;
if (order == MPI_ORDER_FORTRAN) {
if (dim == 0)
- PMPI_Type_contiguous(mysize, type_old, type_new);
+ MPI_Type_contiguous(mysize, type_old, type_new);
else {
for (i=0; idim; i--) stride *= array_of_gsizes[i];
- PMPI_Type_hvector(mysize, 1, stride, type_old, type_new);
+ MPI_Type_hvector(mysize, 1, stride, type_old, type_new);
}
}
@@ -252,7 +252,7 @@ static int MPIOI_Type_cyclic(int *array_of_gsizes, int dim, int ndims, int nproc
for (i=0; idim; i--) stride *= array_of_gsizes[i];
- PMPI_Type_hvector(count, blksize, stride, type_old, type_new);
+ MPI_Type_hvector(count, blksize, stride, type_old, type_new);
if (rem) {
/* if the last block is of size less than blksize, include
@@ -265,9 +265,9 @@ static int MPIOI_Type_cyclic(int *array_of_gsizes, int dim, int ndims, int nproc
blklens[0] = 1;
blklens[1] = rem;
- PMPI_Type_struct(2, blklens, disps, types, &type_tmp);
+ MPI_Type_struct(2, blklens, disps, types, &type_tmp);
- PMPI_Type_free(type_new);
+ MPI_Type_free(type_new);
*type_new = type_tmp;
}
@@ -282,8 +282,8 @@ static int MPIOI_Type_cyclic(int *array_of_gsizes, int dim, int ndims, int nproc
types[2] = MPI_UB;
disps[2] = orig_extent * array_of_gsizes[dim];
blklens[0] = blklens[1] = blklens[2] = 1;
- PMPI_Type_struct(3, blklens, disps, types, &type_tmp);
- PMPI_Type_free(type_new);
+ MPI_Type_struct(3, blklens, disps, types, &type_tmp);
+ MPI_Type_free(type_new);
*type_new = type_tmp;
*st_offset = 0; /* set it to 0 because it is taken care of in
diff --git a/ompi/mca/io/romio/romio/common/dataloop/dataloop_create.c b/ompi/mca/io/romio/romio/common/dataloop/dataloop_create.c
index 91217d00f1..36edb4d280 100644
--- a/ompi/mca/io/romio/romio/common/dataloop/dataloop_create.c
+++ b/ompi/mca/io/romio/romio/common/dataloop/dataloop_create.c
@@ -38,7 +38,7 @@ void PREPEND_PREFIX(Dataloop_create)(MPI_Datatype type,
MPI_Aint stride;
MPI_Aint *disps;
- PMPI_Type_get_envelope(type, &nr_ints, &nr_aints, &nr_types, &combiner);
+ MPI_Type_get_envelope(type, &nr_ints, &nr_aints, &nr_types, &combiner);
/* some named types do need dataloops; handle separately. */
if (combiner == MPI_COMBINER_NAMED) {
@@ -93,7 +93,7 @@ void PREPEND_PREFIX(Dataloop_create)(MPI_Datatype type,
* note: in the struct case below we'll handle any additional
* types "below" the current one.
*/
- PMPI_Type_get_envelope(types[0], &dummy1, &dummy2, &dummy3,
+ MPI_Type_get_envelope(types[0], &dummy1, &dummy2, &dummy3,
&type0_combiner);
if (type0_combiner != MPI_COMBINER_NAMED)
{
@@ -228,7 +228,7 @@ void PREPEND_PREFIX(Dataloop_create)(MPI_Datatype type,
case MPI_COMBINER_STRUCT:
for (i = 1; i < ints[0]; i++) {
int type_combiner;
- PMPI_Type_get_envelope(types[i], &dummy1, &dummy2, &dummy3,
+ MPI_Type_get_envelope(types[i], &dummy1, &dummy2, &dummy3,
&type_combiner);
if (type_combiner != MPI_COMBINER_NAMED) {
@@ -288,7 +288,7 @@ void PREPEND_PREFIX(Dataloop_create)(MPI_Datatype type,
dldepth_p,
flag);
- PMPI_Type_free(&tmptype);
+ MPI_Type_free(&tmptype);
break;
case MPI_COMBINER_DARRAY:
ndims = ints[2];
@@ -309,7 +309,7 @@ void PREPEND_PREFIX(Dataloop_create)(MPI_Datatype type,
dldepth_p,
flag);
- PMPI_Type_free(&tmptype);
+ MPI_Type_free(&tmptype);
break;
case MPI_COMBINER_F90_REAL:
case MPI_COMBINER_F90_COMPLEX:
diff --git a/ompi/mca/io/romio/romio/common/dataloop/dataloop_create.h b/ompi/mca/io/romio/romio/common/dataloop/dataloop_create.h
index 3fbd42ee4a..db645d5a90 100644
--- a/ompi/mca/io/romio/romio/common/dataloop/dataloop_create.h
+++ b/ompi/mca/io/romio/romio/common/dataloop/dataloop_create.h
@@ -80,16 +80,4 @@ int PREPEND_PREFIX(Type_convert_darray)(int size,
MPI_Datatype oldtype,
MPI_Datatype *newtype);
-#if 0
-/* Helper functions for accessing datatype contents */
-void PREPEND_PREFIX(Type_access_contents)(MPI_Datatype type,
- int **ints_p,
- MPI_Aint **aints_p,
- MPI_Datatype **types_p);
-void PREPEND_PREFIX(Type_release_contents)(MPI_Datatype type,
- int **ints_p,
- MPI_Aint **aints_p,
- MPI_Datatype **types_p);
-#endif
-
#endif
diff --git a/ompi/mca/io/romio/romio/common/dataloop/dataloop_parts.h b/ompi/mca/io/romio/romio/common/dataloop/dataloop_parts.h
index 3f2a1c3509..5894dad2d0 100644
--- a/ompi/mca/io/romio/romio/common/dataloop/dataloop_parts.h
+++ b/ompi/mca/io/romio/romio/common/dataloop/dataloop_parts.h
@@ -204,9 +204,6 @@ typedef struct DLOOP_Dataloop_common {
this union, 'count', allows quick access to the shared 'count' field in the
five dataloop structure.
. extent - The extent of the dataloop
-#if 0
-- handle - handle for the corresponding 'MPI_Datatype'.
-#endif
Module:
Datatype
diff --git a/ompi/mca/io/romio/romio/common/dataloop/romio_dataloop.c b/ompi/mca/io/romio/romio/common/dataloop/romio_dataloop.c
index 00731bc9ad..5dbc7a42a5 100644
--- a/ompi/mca/io/romio/romio/common/dataloop/romio_dataloop.c
+++ b/ompi/mca/io/romio/romio/common/dataloop/romio_dataloop.c
@@ -80,7 +80,7 @@ void MPIO_Datatype_init_dataloop(MPI_Datatype type)
MPIO_Datatype_initialize();
}
- mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
+ mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
DLOOP_Assert(mpi_errno == MPI_SUCCESS);
if (!attrflag) {
@@ -107,7 +107,7 @@ void MPIO_Datatype_get_size(MPI_Datatype type, MPI_Offset *size_p)
MPIO_Datatype_initialize();
}
- mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
+ mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
DLOOP_Assert(mpi_errno == MPI_SUCCESS);
if (!attrflag) {
@@ -131,7 +131,7 @@ void MPIO_Datatype_get_extent(MPI_Datatype type, MPI_Offset *extent_p)
MPIO_Datatype_initialize();
}
- mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
+ mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
DLOOP_Assert(mpi_errno == MPI_SUCCESS);
if (!attrflag) {
@@ -163,7 +163,7 @@ void MPIO_Datatype_get_block_info(MPI_Datatype type,
int mpi_errno, attrflag;
int nr_ints, nr_aints, nr_types, combiner;
- mpi_errno = PMPI_Type_get_envelope(type, &nr_ints, &nr_aints,
+ mpi_errno = MPI_Type_get_envelope(type, &nr_ints, &nr_aints,
&nr_types, &combiner);
DLOOP_Assert(mpi_errno == MPI_SUCCESS);
@@ -183,7 +183,7 @@ void MPIO_Datatype_get_block_info(MPI_Datatype type,
MPIO_Segment *segp;
MPI_Offset bytes;
- mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp,
+ mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp,
&attrflag);
DLOOP_Assert(mpi_errno == MPI_SUCCESS);
if (!attrflag) {
@@ -231,7 +231,7 @@ void MPIO_Datatype_get_el_type(MPI_Datatype type,
int mpi_errno;
int nr_ints, nr_aints, nr_types, combiner;
- mpi_errno = PMPI_Type_get_envelope(type, &nr_ints, &nr_aints,
+ mpi_errno = MPI_Type_get_envelope(type, &nr_ints, &nr_aints,
&nr_types, &combiner);
DLOOP_Assert(mpi_errno == MPI_SUCCESS);
@@ -273,7 +273,7 @@ void MPIO_Datatype_get_loopptr(MPI_Datatype type,
MPIO_Datatype_initialize();
}
- mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
+ mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
DLOOP_Assert(mpi_errno == MPI_SUCCESS);
if (!(dtp->valid & MPIO_DATATYPE_VALID_DLOOP_PTR))
@@ -293,7 +293,7 @@ void MPIO_Datatype_get_loopsize(MPI_Datatype type, int *size_p, int flag)
MPIO_Datatype_initialize();
}
- mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
+ mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
DLOOP_Assert(mpi_errno == MPI_SUCCESS);
if (!(dtp->valid & MPIO_DATATYPE_VALID_DLOOP_SIZE))
@@ -313,7 +313,7 @@ void MPIO_Datatype_get_loopdepth(MPI_Datatype type, int *depth_p, int flag)
MPIO_Datatype_initialize();
}
- mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
+ mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
DLOOP_Assert(mpi_errno == MPI_SUCCESS);
if (!(dtp->valid & MPIO_DATATYPE_VALID_DLOOP_DEPTH))
@@ -333,7 +333,7 @@ void MPIO_Datatype_set_loopptr(MPI_Datatype type, MPIO_Dataloop *ptr, int flag)
MPIO_Datatype_initialize();
}
- mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
+ mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
DLOOP_Assert(mpi_errno == MPI_SUCCESS);
if (!attrflag) {
dtp = MPIO_Datatype_allocate(type);
@@ -355,7 +355,7 @@ void MPIO_Datatype_set_loopsize(MPI_Datatype type, int size, int flag)
MPIO_Datatype_initialize();
}
- mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
+ mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
DLOOP_Assert(mpi_errno == MPI_SUCCESS);
if (!attrflag) {
dtp = MPIO_Datatype_allocate(type);
@@ -375,7 +375,7 @@ void MPIO_Datatype_set_loopdepth(MPI_Datatype type, int depth, int flag)
MPIO_Datatype_initialize();
}
- mpi_errno = PMPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
+ mpi_errno = MPI_Type_get_attr(type, MPIO_Datatype_keyval, &dtp, &attrflag);
DLOOP_Assert(mpi_errno == MPI_SUCCESS);
if (!attrflag) {
dtp = MPIO_Datatype_allocate(type);
@@ -390,7 +390,7 @@ int MPIO_Datatype_is_nontrivial(MPI_Datatype type)
{
int nr_ints, nr_aints, nr_types, combiner;
- PMPI_Type_get_envelope(type, &nr_ints, &nr_aints, &nr_types, &combiner);
+ MPI_Type_get_envelope(type, &nr_ints, &nr_aints, &nr_types, &combiner);
if (combiner != MPI_COMBINER_NAMED ||
type == MPI_FLOAT_INT ||
type == MPI_DOUBLE_INT ||
@@ -409,20 +409,20 @@ static int MPIO_Datatype_initialize(void)
DLOOP_Assert(MPIO_Datatype_keyval == MPI_KEYVAL_INVALID);
/* create keyval for dataloop storage */
- mpi_errno = PMPI_Type_create_keyval(MPIO_Datatype_copy_attr_function,
+ mpi_errno = MPI_Type_create_keyval(MPIO_Datatype_copy_attr_function,
MPIO_Datatype_delete_attr_function,
&MPIO_Datatype_keyval,
NULL);
DLOOP_Assert(mpi_errno == MPI_SUCCESS);
/* create keyval to hook to COMM_WORLD for finalize */
- mpi_errno = PMPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,
+ mpi_errno = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,
MPIO_Datatype_finalize,
&MPIO_Datatype_finalize_keyval,
NULL);
DLOOP_Assert(mpi_errno == MPI_SUCCESS);
- mpi_errno = PMPI_Comm_set_attr(MPI_COMM_WORLD,
+ mpi_errno = MPI_Comm_set_attr(MPI_COMM_WORLD,
MPIO_Datatype_finalize_keyval,
NULL);
DLOOP_Assert(mpi_errno == MPI_SUCCESS);
@@ -444,10 +444,10 @@ static int MPIO_Datatype_finalize(MPI_Comm comm,
DLOOP_Assert(MPIO_Datatype_keyval != MPI_KEYVAL_INVALID);
/* remove keyvals */
- mpi_errno = PMPI_Type_free_keyval(&MPIO_Datatype_keyval);
+ mpi_errno = MPI_Type_free_keyval(&MPIO_Datatype_keyval);
DLOOP_Assert(mpi_errno == MPI_SUCCESS);
- mpi_errno = PMPI_Type_free_keyval(&MPIO_Datatype_finalize_keyval);
+ mpi_errno = MPI_Type_free_keyval(&MPIO_Datatype_finalize_keyval);
DLOOP_Assert(mpi_errno == MPI_SUCCESS);
printf("freed keyvals\n");
@@ -468,7 +468,7 @@ static MPIO_Datatype *MPIO_Datatype_allocate(MPI_Datatype type)
dtp->dloop_size = -1;
dtp->dloop_depth = -1;
- mpi_errno = PMPI_Type_set_attr(type, MPIO_Datatype_keyval, dtp);
+ mpi_errno = MPI_Type_set_attr(type, MPIO_Datatype_keyval, dtp);
DLOOP_Assert(mpi_errno == MPI_SUCCESS);
printf("allocated attr struct\n");
@@ -496,13 +496,13 @@ static void MPIO_Datatype_set_szext(MPI_Datatype type, MPIO_Datatype *dtp)
int size;
MPI_Aint lb, extent, true_lb, true_extent;
- mpi_errno = PMPI_Type_size(type, &size);
+ mpi_errno = MPI_Type_size(type, &size);
DLOOP_Assert(mpi_errno == MPI_SUCCESS);
- mpi_errno = PMPI_Type_get_extent(type, &lb, &extent);
+ mpi_errno = MPI_Type_get_extent(type, &lb, &extent);
DLOOP_Assert(mpi_errno == MPI_SUCCESS);
- mpi_errno = PMPI_Type_get_true_extent(type, &true_lb, &true_extent);
+ mpi_errno = MPI_Type_get_true_extent(type, &true_lb, &true_extent);
dtp->size = (MPI_Offset) size;
dtp->extent = (MPI_Offset) extent;
diff --git a/ompi/mca/io/romio/romio/common/dataloop/romio_dataloop.h b/ompi/mca/io/romio/romio/common/dataloop/romio_dataloop.h
index 610fd4ac5c..5c9bbaed7b 100644
--- a/ompi/mca/io/romio/romio/common/dataloop/romio_dataloop.h
+++ b/ompi/mca/io/romio/romio/common/dataloop/romio_dataloop.h
@@ -12,8 +12,12 @@
#include
/* romioconf.h must be included *before* mpi.h to avoid some redeclarations */
+#ifdef HAVE_MPITYPEDEFS_H
#include "mpitypedefs.h"
+#endif
+#ifdef HAVE_MPICHCONF_H
#include "mpichconf.h"
+#endif
#include "romioconf.h"
#include
diff --git a/ompi/mca/io/romio/romio/common/dataloop/segment_ops.c b/ompi/mca/io/romio/romio/common/dataloop/segment_ops.c
index d90c4f5adf..04b1603db3 100644
--- a/ompi/mca/io/romio/romio/common/dataloop/segment_ops.c
+++ b/ompi/mca/io/romio/romio/common/dataloop/segment_ops.c
@@ -548,16 +548,7 @@ static int DLOOP_Segment_index_count_block(DLOOP_Offset *blocks_p,
* DLOOP_Count i and DLOOP_Offset size would need to be
* declared above.
*/
-#if 0
- last_loc = rel_off * offsetarray[0] + blockarray[0] * el_size;
- for (i=1; i < count; i++) {
- if (last_loc == rel_off + offsetarray[i]) new_blk_count--;
-
- last_loc = rel_off + offsetarray[i] + blockarray[i] * el_size;
- }
-#else
last_loc = rel_off + offsetarray[count-1] + blockarray[count-1] * el_size;
-#endif
paramp->last_loc = last_loc;
paramp->count += new_blk_count;
@@ -690,19 +681,6 @@ static int DLOOP_Segment_vector_mpi_flatten(DLOOP_Offset *blocks_p,
DLOOP_Handle_get_size_macro(el_type, el_size);
blocks_left = *blocks_p;
-#if 0
- MPIU_DBG_MSG_FMT(DATATYPE,VERBOSE,(MPIU_DBG_FDEST,
- "\t[vector to vec: do=%d, dp=%x, len=%d, ind=%d, ct=%d, blksz=%d, str=%d, blks=%d]\n",
- (unsigned) rel_off,
- (unsigned) (MPI_Aint)bufp,
- paramp->u.pack_vector.length,
- paramp->u.pack_vector.index,
- count,
- blksz,
- stride,
- (int) *blocks_p));
-#endif
-
for (i=0; i < count && blocks_left > 0; i++) {
int last_idx;
char *last_end = NULL;
diff --git a/ompi/mca/io/romio/romio/common/dataloop/subarray_support.c b/ompi/mca/io/romio/romio/common/dataloop/subarray_support.c
index becce18349..a29d3fbd07 100644
--- a/ompi/mca/io/romio/romio/common/dataloop/subarray_support.c
+++ b/ompi/mca/io/romio/romio/common/dataloop/subarray_support.c
@@ -21,23 +21,23 @@ int PREPEND_PREFIX(Type_convert_subarray)(int ndims,
int i, blklens[3];
MPI_Datatype tmp1, tmp2, types[3];
- PMPI_Type_extent(oldtype, &extent);
+ MPI_Type_extent(oldtype, &extent);
if (order == MPI_ORDER_FORTRAN) {
/* dimension 0 changes fastest */
if (ndims == 1) {
- PMPI_Type_contiguous(array_of_subsizes[0], oldtype, &tmp1);
+ MPI_Type_contiguous(array_of_subsizes[0], oldtype, &tmp1);
}
else {
- PMPI_Type_vector(array_of_subsizes[1],
+ MPI_Type_vector(array_of_subsizes[1],
array_of_subsizes[0],
array_of_sizes[0], oldtype, &tmp1);
size = array_of_sizes[0]*extent;
for (i=2; i=0; i--) {
size *= array_of_sizes[i+1];
- PMPI_Type_hvector(array_of_subsizes[i], 1, size, tmp1, &tmp2);
- PMPI_Type_free(&tmp1);
+ MPI_Type_hvector(array_of_subsizes[i], 1, size, tmp1, &tmp2);
+ MPI_Type_free(&tmp1);
tmp1 = tmp2;
}
}
@@ -91,9 +91,9 @@ int PREPEND_PREFIX(Type_convert_subarray)(int ndims,
types[1] = tmp1;
types[2] = MPI_UB;
- PMPI_Type_struct(3, blklens, disps, types, newtype);
+ MPI_Type_struct(3, blklens, disps, types, newtype);
- PMPI_Type_free(&tmp1);
+ MPI_Type_free(&tmp1);
return MPI_SUCCESS;
}
diff --git a/ompi/mca/io/romio/romio/common/dataloop/typesize_support.c b/ompi/mca/io/romio/romio/common/dataloop/typesize_support.c
index 4a3dc60f73..b1b96c465f 100644
--- a/ompi/mca/io/romio/romio/common/dataloop/typesize_support.c
+++ b/ompi/mca/io/romio/romio/common/dataloop/typesize_support.c
@@ -162,7 +162,7 @@ void PREPEND_PREFIX(Type_calc_footprint)(MPI_Datatype type,
int ndims;
MPI_Datatype tmptype;
- mpi_errno = PMPI_Type_get_envelope(type, &nr_ints, &nr_aints,
+ mpi_errno = MPI_Type_get_envelope(type, &nr_ints, &nr_aints,
&nr_types, &combiner);
DLOOP_Assert(mpi_errno == MPI_SUCCESS);
@@ -170,8 +170,8 @@ void PREPEND_PREFIX(Type_calc_footprint)(MPI_Datatype type,
int mpisize;
MPI_Aint mpiextent;
- PMPI_Type_size(type, &mpisize);
- PMPI_Type_extent(type, &mpiextent);
+ MPI_Type_size(type, &mpisize);
+ MPI_Type_extent(type, &mpiextent);
tfp->size = (DLOOP_Offset) mpisize;
tfp->lb = 0;
tfp->ub = (DLOOP_Offset) mpiextent;
@@ -369,7 +369,7 @@ void PREPEND_PREFIX(Type_calc_footprint)(MPI_Datatype type,
types[0],
&tmptype);
PREPEND_PREFIX(Type_calc_footprint)(tmptype, tfp);
- PMPI_Type_free(&tmptype);
+ MPI_Type_free(&tmptype);
break;
case MPI_COMBINER_DARRAY:
ndims = ints[2];
@@ -386,7 +386,7 @@ void PREPEND_PREFIX(Type_calc_footprint)(MPI_Datatype type,
&tmptype);
PREPEND_PREFIX(Type_calc_footprint)(tmptype, tfp);
- PMPI_Type_free(&tmptype);
+ MPI_Type_free(&tmptype);
break;
case MPI_COMBINER_F90_REAL:
case MPI_COMBINER_F90_COMPLEX:
@@ -437,7 +437,7 @@ static void DLOOP_Type_calc_footprint_struct(MPI_Datatype type,
/* skip zero blocklength elements */
if (ints[i+1] == 0) continue;
- PMPI_Type_get_envelope(types[i], &nr_ints, &nr_aints, &nr_types,
+ MPI_Type_get_envelope(types[i], &nr_ints, &nr_aints, &nr_types,
&combiner);
/* opt: could just inline assignments for combiner == NAMED case */
@@ -530,10 +530,6 @@ static void DLOOP_Type_calc_footprint_struct(MPI_Datatype type,
}
}
-#if 0
- printf("size = %d, extent = %d\n", (int) tmp_size, (int) tmp_extent);
-#endif
-
tfp->size = tmp_size;
tfp->lb = min_lb;
tfp->ub = max_ub;
@@ -576,7 +572,7 @@ static int DLOOP_Named_type_alignsize(MPI_Datatype type, MPI_Aint disp)
if (type == MPI_LB || type == MPI_UB)
return 0;
- PMPI_Type_size(type, &alignsize);
+ MPI_Type_size(type, &alignsize);
switch(type)
{
@@ -882,46 +878,3 @@ static int DLOOP_Structalign_llint_position()
if (padding_varies_by_pos) return 1;
else return 0;
}
-
-#if 0
-/* from MPICH2 PAC_C_DOUBLE_ALIGNMENT_EXCEPTION test:
- *
- * Other tests assume that there is potentially a maximum alignment
- * and that if there is no maximum alignment, or a type is smaller than
- * that value, then we align on the size of the value, with the exception
- * of the "position-based alignment" rules we test for separately.
- *
- * It turns out that these assumptions have fallen short in at least one
- * case, on MacBook Pros, where doubles are aligned on 4-byte boundaries
- * even when long doubles are aligned on 16-byte boundaries. So this test
- * is here specifically to handle this case.
- *
- * Return value is 4 or 0.
-*/
-static int double_align_exception()
-{
- struct { char a; double b; } char_double;
- struct { double b; char a; } double_char;
- int extent1, extent2, align_4 = 0;
-
- extent1 = sizeof(char_double);
- extent2 = sizeof(double_char);
-
- /* we're interested in the largest value, will let separate test
- * deal with position-based issues.
- */
- if (extent1 < extent2) extent1 = extent2;
- if ((sizeof(double) == 8) && (extent1 % 8) != 0) {
- if (extent1 % 4 == 0) {
-#ifdef HAVE_MAX_FP_ALIGNMENT
- if (HAVE_MAX_FP_ALIGNMENT >= 8) align_4 = 1;
-#else
- align_4 = 1;
-#endif
- }
- }
-
- if (align_4) return 4;
- else return 0;
-}
-#endif
diff --git a/ompi/mca/io/romio/romio/confdb/aclocal_am.m4 b/ompi/mca/io/romio/romio/confdb/aclocal_am.m4
new file mode 100644
index 0000000000..4d3b0de225
--- /dev/null
+++ b/ompi/mca/io/romio/romio/confdb/aclocal_am.m4
@@ -0,0 +1,6 @@
+dnl AM_IGNORE is an extension that tells (a patched) automake not to
+dnl include the specified AC_SUBST variable in the Makefile.in that
+dnl automake generates. We don't use AC_DEFUN, since aclocal will
+dnl then complain that AM_IGNORE is a duplicate (if you are using the
+dnl patched automake/aclocal).
+m4_ifdef([AM_IGNORE],[],[m4_define([AM_IGNORE],[])])
diff --git a/ompi/mca/io/romio/romio/confdb/aclocal_atomic.m4 b/ompi/mca/io/romio/romio/confdb/aclocal_atomic.m4
new file mode 100644
index 0000000000..4e96cfbf36
--- /dev/null
+++ b/ompi/mca/io/romio/romio/confdb/aclocal_atomic.m4
@@ -0,0 +1,227 @@
+dnl /*D PAC_C_MEMATOMIC - Try and determine how to implement memory-atomic
+dnl operations with the selected C compiler
+dnl
+dnl Synopsis:
+dnl PAC_C_MEMATOMIC
+dnl
+dnl Notes:
+dnl Defines names of the following form
+dnl + HAVE_GCC_ASM_AND_X86_{MFENCE,LFENCE,SFENCE} - gcc __asm__ will issue
+dnl mfence, lfence, or sfence
+dnl . HAVE___ASM_AND_X86_{MFENCE,LFENCE,SFENCE} - __asm _emit will issue
+dnl mfence, lfence, or sfence
+dnl . HAVE_ASM_AND_X86_{MFENCE,LFENCE,SFENCE} - asm("...") will issue
+dnl mfence, lfence, or sfence
+dnl . HAVE__INTERLOCKEDEXCHANGE - _InterlockedExchange intrinsic is available
+dnl (IA64)
+dnl . HAVE_GCC_ASM_SPARC_MEMBAR - gcc __asm__ will issue SPARC architecture
+dnl memory barrier instruction
+dnl . HAVE_SOLARIS_ASM_SPARC_MEMBAR - Solaris asm() will issue SPARC
+dnl architecture memory barrier instruction
+dnl . HAVE_GCC_ASM_SPARC_STBAR - gcc __asm__ will issue stbar
+dnl - HAVE_SOLARIS_ASM_SPARC_STBAR - Solaris __asm() will issue stbar
+dnl
+dnl D*/
+AC_DEFUN([PAC_C_MEMATOMIC],[
+AC_CACHE_CHECK([for x86 mfence instruction using __asm__],
+ pac_cv_have_gcc_asm_and_x86_mfence,[
+AC_TRY_RUN([
+int main(int argc, char **argv)
+{
+ __asm__ __volatile__ ( ".byte 0x0f, 0xae, 0xf0" ::: "memory" );
+ exit(0);
+}
+],
+pac_cv_have_gcc_asm_and_x86_mfence=yes,pac_cv_have_gcc_asm_and_x86_mfence=no)])
+
+if test "$pac_cv_have_gcc_asm_and_x86_mfence" = "yes" ; then
+ AC_DEFINE(HAVE_GCC_ASM_AND_X86_MFENCE, 1, [Define if using gcc on a x86 system with the mfence instruction])
+fi
+
+AC_CACHE_CHECK([for x86 sfence instruction using __asm__],
+ pac_cv_have_gcc_asm_and_x86_sfence,[
+AC_TRY_RUN([
+int main(int argc, char **argv)
+{
+ __asm__ __volatile__ ( ".byte 0x0f, 0xae, 0xf8" ::: "memory" );
+ exit(0);
+}
+],
+pac_cv_have_gcc_asm_and_x86_sfence=yes,pac_cv_have_gcc_asm_and_x86_sfence=no)])
+
+if test "$pac_cv_have_gcc_asm_and_x86_sfence" = "yes" ; then
+ AC_DEFINE(HAVE_GCC_ASM_AND_X86_SFENCE, 1, [Define if using gcc on a x86 system with the sfence instruction])
+fi
+
+AC_CACHE_CHECK([for x86 lfence instruction using __asm__],
+ pac_cv_have_gcc_asm_and_x86_lfence,[
+AC_TRY_RUN([
+int main(int argc, char **argv)
+{
+ __asm__ __volatile__ ( ".byte 0x0f, 0xae, 0xe8" ::: "memory" );
+ exit(0);
+}
+],
+pac_cv_have_gcc_asm_and_x86_lfence=yes,pac_cv_have_gcc_asm_and_x86_lfence=no)])
+
+if test "$pac_cv_have_gcc_asm_and_x86_lfence" = "yes" ; then
+ AC_DEFINE(HAVE_GCC_ASM_AND_X86_LFENCE, 1, [Define if using gcc on a x86 system with the lfence instruction])
+fi
+
+dnl Some compilers, like icc, may want __asm _emit
+AC_CACHE_CHECK([for x86 mfence instruction using __asm],
+ pac_cv_have___asm_and_x86_mfence,[
+AC_TRY_RUN([
+int main(int argc, char **argv)
+{
+ __asm _emit 0x0f __asm _emit 0xae __asm _emit 0xf0 ;
+ exit(0);
+}
+],
+pac_cv_have___asm_and_x86_mfence=yes,pac_cv_have___asm_and_x86_mfence=no)])
+
+if test "$pac_cv_have___asm_and_x86_mfence" = "yes" ; then
+ AC_DEFINE(HAVE___ASM_AND_X86_MFENCE, 1, [Define if using __asm on a x86 system with the mfence instruction])
+fi
+
+AC_CACHE_CHECK([for x86 sfence instruction using __asm],
+ pac_cv_have___asm_and_x86_sfence,[
+AC_TRY_RUN([
+int main(int argc, char **argv)
+{
+ __asm sfence ;
+ exit(0);
+}
+],
+pac_cv_have___asm_and_x86_sfence=yes,pac_cv_have___asm_and_x86_sfence=no)])
+
+if test "$pac_cv_have___asm_and_x86_sfence" = "yes" ; then
+ AC_DEFINE(HAVE___ASM_AND_X86_SFENCE, 1, [Define if using __asm on a x86 system with the sfence instruction])
+fi
+
+AC_CACHE_CHECK([for x86 lfence instruction using __asm],
+ pac_cv_have___asm_and_x86_lfence,[
+AC_TRY_RUN([
+int main(int argc, char **argv)
+{
+ __asm _emit 0x0f __asm _emit 0xae __asm _emit 0xe8 ;
+ exit(0);
+}
+],
+pac_cv_have___asm_and_x86_lfence=yes,pac_cv_have___asm_and_x86_lfence=no)])
+
+if test "$lac_cv_have___asm_and_x86_lfence" = "yes" ; then
+ AC_DEFINE(HAVE___ASM_AND_X86_LFENCE, 1, [Define if using __asm on a x86 system with the lfence instruction])
+fi
+
+dnl
+dnl Some compilers, such as pgcc, may require additional arguments.
+dnl pgcc may need -Masmkeyword flag. We may want to try this with and
+dnl without adding -Masmkeyword to CFLAGS
+
+AC_CACHE_CHECK([for x86 mfence instruction using asm()],
+ pac_cv_have_asm_and_x86_mfence,[
+AC_TRY_RUN([
+int main(int argc, char **argv)
+{
+ asm("_emit 0x0f __asm _emit 0xae __asm _emit 0xf0");
+ exit(0);
+}
+],
+pac_cv_have_asm_and_x86_mfence=yes,pac_cv_have_asm_and_x86_mfence=no)])
+
+if test "$pac_cv_have_asm_and_x86_mfence" = "yes" ; then
+ AC_DEFINE(HAVE_ASM_AND_X86_MFENCE, 1, [Define if using asm() on a x86 system with the mfence instruction])
+fi
+
+AC_CACHE_CHECK([for x86 sfence instruction using asm()],
+ pac_cv_have_asm_and_x86_sfence,[
+AC_TRY_RUN([
+int main(int argc, char **argv)
+{
+ asm("sfence");
+ exit(0);
+}
+],
+pac_cv_have_asm_and_x86_sfence=yes,pac_cv_have_asm_and_x86_sfence=no)])
+
+if test "$pac_cv_have_asm_and_x86_sfence" = "yes" ; then
+ AC_DEFINE(HAVE_ASM_AND_X86_SFENCE, 1, [Define if using asm() on a x86 system with the sfence instruction])
+fi
+
+AC_CACHE_CHECK([for x86 lfence instruction using asm()],
+ pac_cv_have_asm_and_x86_lfence,[
+AC_TRY_RUN([
+int main(int argc, char **argv)
+{
+ asm("_emit 0x0f __asm _emit 0xae __asm _emit 0xe8");
+ exit(0);
+}
+],
+pac_cv_have_asm_and_x86_lfence=yes,pac_cv_have_asm_and_x86_lfence=no)])
+
+if test "$pac_cv_have_asm_and_x86_lfence" = "yes" ; then
+ AC_DEFINE(HAVE_ASM_AND_X86_LFENCE, 1, [Define if using asm() on a x86 system with the lfence instruction])
+fi
+
+AC_CACHE_CHECK([for _InterlockedExchange intrinsic],
+ pac_cv_have__InterlockedExchange,[
+AC_TRY_RUN([
+int main(int argc, char **argv)
+{
+ unsigned long lock, *lock_ptr;
+ lock_ptr = &lock;
+ _InterlockedExchange(lock_ptr, 1);
+ exit(0);
+}
+],
+pac_cv_have__InterlockedExchange=yes,pac_cv_have__InterlockedExchange=no)])
+
+if test "$pac_cv_have__InterlockedExchange" = "yes" ; then
+ AC_DEFINE(HAVE__INTERLOCKEDEXCHANGE, 1, [Define if _InterlockedExchange intrinsic is available])
+fi
+
+AC_CACHE_CHECK([for SPARC membar instruction with gcc],
+ pac_cv_gcc_sparc_membar,[
+AC_TRY_RUN([
+int main(int argc, char **argv){
+ __asm__ __volatile__ ( "membar #StoreLoad | #StoreStore" : : : "memory" );
+ exit(0);
+}],pac_cv_gcc_sparc_membar=yes,pac_cv_gcc_sparc_membar=no)])
+if test "$pac_cv_gcc_sparc_membar" = yes ; then
+ AC_DEFINE(HAVE_GCC_ASM_SPARC_MEMBAR,1,[Define if gcc asm membar supported])
+fi
+
+AC_CACHE_CHECK([for SPARC membar instruction with Solaris C],
+ pac_cv_solaris_sparc_membar,[
+AC_TRY_RUN([
+int main(int argc, char **argv){
+ __asm ( "membar #StoreLoad | #StoreStore");
+ exit(0);
+}],pac_cv_solaris_sparc_membar=yes,pac_cv_solaris_sparc_membar=no)])
+if test "$pac_cv_solaris_sparc_membar" = yes ; then
+ AC_DEFINE(HAVE_SOLARIS_ASM_SPARC_MEMBAR,1,[Define if solaris asm membar supported])
+fi
+
+AC_CACHE_CHECK([for SPARC stbar instruction with gcc],
+ pac_cv_gcc_sparc_stbar,[
+AC_TRY_RUN([
+int main(int argc, char **argv){
+ __asm__ __volatile__ ( "stbar" : : : "memory" );
+ exit(0);
+}],pac_cv_gcc_sparc_stbar=yes,pac_cv_gcc_sparc_stbar=no)])
+if test "$pac_cv_gcc_sparc_stbar" = yes ; then
+ AC_DEFINE(HAVE_GCC_ASM_SPARC_STBAR,1,[Define if gcc asm stbar supported])
+fi
+
+AC_CACHE_CHECK([for SPARC stbar instruction with Solaris C],
+ pac_cv_solaris_sparc_stbar,[
+AC_TRY_RUN([
+int main(int argc, char **argv){
+ __asm ( "stbar" );
+ exit(0);
+}],pac_cv_solaris_sparc_stbar=yes,pac_cv_solaris_sparc_stbar=no)])
+if test "$pac_cv_solaris_sparc_stbar" = yes ; then
+ AC_DEFINE(HAVE_SOLARIS_ASM_SPARC_STBAR,1,[Define if solaris asm stbar supported])
+fi
+])
\ No newline at end of file
diff --git a/ompi/mca/io/romio/romio/confdb/aclocal_attr_alias.m4 b/ompi/mca/io/romio/romio/confdb/aclocal_attr_alias.m4
new file mode 100644
index 0000000000..da49328105
--- /dev/null
+++ b/ompi/mca/io/romio/romio/confdb/aclocal_attr_alias.m4
@@ -0,0 +1,504 @@
+dnl
+dnl Check for BSD or POSIZ style global symbol lister, nm.
+dnl If found, pac_path_NM_G contains the absolute athname of nm + options
+dnl pac_path_NM_G_type will be either POSIX or BSD. NM_G will be
+dnl pac_path_NM_G without the absolute path. Preference is BSD style.
+dnl
+dnl The test checks if nm accepts the known options and also if nm produces
+dnl the expected BSD or POSIX output format.
+dnl
+AC_DEFUN([PAC_PATH_NM_G],[
+AC_MSG_CHECKING([for BSD/POSIX style global symbol lister])
+AC_LANG_PUSH(C)
+AC_PATH_PROGS_FEATURE_CHECK(NM_G, nm, [
+ # Check if nm accepts -g and BSD or POSIX compatible flag.
+ # Use the `sed 1q' to avoid HP-UX's unknown option message:
+ # nm: unknown option "B" ignored
+ # Tru64's nm complains that /dev/null is an invalid object file
+ #
+ # AIX's sed does not accept \+, 1) instead of doing 's|a\+||', do 's|aa*||'
+ # or 2) instead of 's|A \+B|AB|g', do 's|A *B|AB|g'
+
+ # Check if nm accepts -g
+ case `${ac_path_NM_G} -g /dev/null 2>&1 | sed '1q'` in
+ */dev/null* | *'Invalid file or object type'*)
+ ac_path_NM_G="${ac_path_NM_G} -g"
+ # Check if nm accepts -B
+ case `${ac_path_NM_G} -B /dev/null 2>&1 | sed '1q'` in
+ */dev/null* | *'Invalid file or object type'*)
+ AC_COMPILE_IFELSE([
+ AC_LANG_SOURCE([int iglobal;])
+ ],[
+ changequote(,)
+ case `${ac_path_NM_G} -B conftest.$OBJEXT | sed -e 's|[0-9][0-9]* *[A-Z] *iglobal|XXXX|g'` in
+ *XXXX*)
+ pac_path_NM_G="${ac_path_NM_G} -B"
+ pac_path_NM_G_type="BSD"
+ ;;
+ esac
+ changequote([,])
+ ])
+ ;;
+ *)
+ # Check if nm accepts -P
+ case `${ac_path_NM_G} -P /dev/null 2>&1 | sed '1q'` in
+ */dev/null* | *'Invalid file or object type'*)
+ AC_COMPILE_IFELSE([
+ AC_LANG_SOURCE([int iglobal;])
+ ],[
+ changequote(,)
+ case `${ac_path_NM_G} -P conftest.$OBJEXT | sed -e 's|iglobal *[A-Z] *[0-9][0-9]*|XXXX|g'` in
+ *XXXX*)
+ pac_path_NM_G="${ac_path_NM_G} -P"
+ pac_path_NM_G_type="POSIX"
+ ;;
+ esac
+ changequote([,])
+ ])
+ ;;
+ esac # Endof case `${ac_path_NM_G} -P
+ ;;
+ esac # Endof case `${ac_path_NM_G} -B
+ ;;
+ esac # Endof case `${ac_path_NM_G} -g
+ if test "X$pac_path_NM_G" != "X" ; then
+ AC_MSG_RESULT([$pac_path_NM_G_type style,$pac_path_NM_G])
+ NM_G="`echo $pac_path_NM_G | sed -e 's|^.*nm |nm |g'`"
+ else
+ AC_MSG_RESULT(no)
+ fi
+ ac_cv_path_NM_G=${ac_path_NM_G}
+ ac_path_NM_G_found=:
+], [AC_MSG_RESULT(no)],
+[$PATH$PATH_SEPARATOR/usr/ccs/bin/elf$PATH_SEPARATOR/usr/ccs/bin$PATH_SEPARATOR/usr/ucb$PATH_SEPARATOR/bin])
+AC_LANG_POP(C)
+]) dnl Endof AC_DEFUN([PAC_PATH_NM_G]
+dnl
+dnl PAC_C_MULTI_ATTR_ALIAS()
+dnl
+dnl The checks if multiple __attribute__((alias)) is available
+dnl If the multiple __attribute((alias)) support is found,
+dnl pac_c_multi_attr_alias=yes is set.
+dnl
+dnl The default is to do a runtime test. When cross_compiling=yes,
+dnl pac_path_NM_G will be used to determine the test result.
+dnl If CFLAGS(or CPPFLAGS) contains ATTR_ALIAS_DEBUG, the runtime will print
+dnl out addresses of struct(s) for debugging purpose.
+dnl
+dnl
+AC_DEFUN([PAC_C_MULTI_ATTR_ALIAS],[
+AC_REQUIRE([PAC_PATH_NM_G])
+AC_LANG_PUSH(C)
+AC_CHECK_HEADERS([stdio.h])
+AC_MSG_CHECKING([for multiple __attribute__((alias)) support])
+
+#Compile the "other" __attribute__ object file.
+AC_COMPILE_IFELSE([
+ AC_LANG_SOURCE([
+#if defined(HAVE_STDIO_H) || defined(STDC_HEADERS)
+#include
+#endif
+
+struct mpif_cmblk_t_ { int imember; };
+typedef struct mpif_cmblk_t_ mpif_cmblk_t;
+
+/* NOT initialize these structure so it appears in BSS or as COMMON symbols */
+mpif_cmblk_t mpifcmb;
+mpif_cmblk_t MPIFCMB;
+
+/*
+ Do the test in this file instead in the file
+ where __attribute__((alias)) is used.
+ This is needed for pgcc since pgcc seems to
+ define aliased symbols if they are in the same file.
+*/
+/*
+ We can't do the following comparision in one test:
+
+ ilogical = (( &mpifcmb == ptr && &MPIFCMB == ptr ) ? TRUE : FALSE) ;
+
+ because some compiler, like gcc 4.4.2's -O* optimizes the code
+ such that the ilogical expression is FALSE. The likely reason is that
+ mpifcmb and MPIFCMB are defined in the same scope in which C optimizer
+ may have treated them as different objects (with different addresses),
+ &mpifcmb != &MPIFCMB, before actually running the test and hence the
+ illogical expression is assumed to be always FALSE. The solution taken
+ here is to prevent the optimizer the opportunity to equate &mpifcmb and
+ &MPIFCMB (in same scope), e.g. using 2 separate tests and combine the
+ test results in a different scope.
+*/
+int same_addrs1( void *ptr );
+int same_addrs1( void *ptr )
+{
+#if defined(ATTR_ALIAS_DEBUG)
+ printf( "others: addr(mpifcmb)=%p, addr(input ptr)=%p\n", &mpifcmb, ptr );
+#endif
+ return ( &mpifcmb == ptr ? 1 : 0 );
+}
+
+int same_addrs2( void *ptr );
+int same_addrs2( void *ptr )
+{
+#if defined(ATTR_ALIAS_DEBUG)
+ printf( "others: addr(MPIFCMB)=%p, addr(input ptr)=%p\n", &MPIFCMB, ptr );
+#endif
+ return ( &MPIFCMB == ptr ? 1 : 0 );
+}
+
+ ])
+],[
+ rm -f pac_conftest_other.$OBJEXT
+ PAC_RUNLOG([cp conftest.$OBJEXT pac_conftest_other.$OBJEXT])
+ test -s pac_conftest_other.$OBJEXT && pac_c_attr_alias_other=yes
+dnl cp conftest.$ac_ext pac_conftest_other.$ac_ext
+dnl echo
+dnl echo "pac_conftest_other.$OBJEXT"
+dnl nm -P -g pac_conftest_other.$OBJEXT | grep -i "mpifcmb"
+],[
+ pac_c_attr_alias_other=no
+]) dnl Endof AC_COMPILE_IFELSE
+
+pac_c_attr_alias_main=no
+if test "$pac_c_attr_alias_other" = "yes" ; then
+
+# Save LIBS for later restoration.
+ saved_LIBS="$LIBS"
+ LIBS="pac_conftest_other.$OBJEXT $LIBS"
+
+# Link the "other" __attribute__ object file.
+ AC_LINK_IFELSE([
+ AC_LANG_PROGRAM([
+#if defined(HAVE_STDIO_H) || defined(STDC_HEADERS)
+#include
+#endif
+
+struct mpif_cmblk_t_ { int imember; };
+typedef struct mpif_cmblk_t_ mpif_cmblk_t;
+
+mpif_cmblk_t mpifcmbr = {0};
+extern mpif_cmblk_t MPIFCMB __attribute__ ((alias("mpifcmbr")));
+extern mpif_cmblk_t mpifcmb __attribute__ ((alias("mpifcmbr")));
+
+extern int same_addrs1( void *ptr );
+extern int same_addrs2( void *ptr );
+
+ ],[
+ int iaddr = 0;
+#if defined(ATTR_ALIAS_DEBUG)
+ printf( "main: addr(mpifcmbr) = %p\n", &mpifcmbr );
+ printf( "main: addr(mpifcmb) = %p\n", &mpifcmb );
+ printf( "main: addr(MPIFCMB) = %p\n", &MPIFCMB );
+#endif
+ iaddr = same_addrs1( &mpifcmbr ) && same_addrs2( &mpifcmbr );
+ FILE *file = fopen( "pac_conftestval", "w" );
+ if (!file) return 1;
+ fprintf( file, "%d\n", iaddr );
+ ])
+ ],[
+ rm -f pac_conftest_main$EXEEXT
+ PAC_RUNLOG([cp conftest$EXEEXT pac_conftest_main$EXEEXT])
+ test -x pac_conftest_main$EXEEXT && pac_c_attr_alias_main=yes
+dnl cp conftest.$ac_ext pac_conftest_main.$ac_ext
+dnl echo
+dnl echo "pac_conftest_main$EXEEXT"
+dnl nm -P -g pac_conftest_main$EXEEXT | grep -i "mpifcmb"
+ ],[
+ pac_c_attr_alias_main=no
+dnl cp conftest.$ac_ext pac_conftest_main.$ac_ext
+ ]) dnl Endof AC_LINK_IFELSE
+
+# Restore the previously saved LIBS
+ LIBS="$saved_LIBS"
+ rm -f pac_conftest_other.$OBJEXT
+fi dnl Endof if test "$pac_c_attr_alias_other" = "yes"
+
+if test "$pac_c_attr_alias_main" = "yes" ; then
+ if test "$cross_compiling" = "yes" ; then
+ changequote(,)
+ # echo "PAC CROSS-COMPILING" dnl
+ # POSIX NM = nm -P format dnl
+ if test "$pac_path_NM_G_type" = "POSIX" ; then
+ addrs=`${pac_path_NM_G} ./pac_conftest_main$EXEEXT | grep -i mpifcmb | sed -e 's% *[a-zA-Z][a-zA-Z]* *[a-zA-Z] *\([0-9abcdef][0-9abcdef]*\).*%\1%g'`
+ fi
+
+ # BSD NM = nm -B format dnl
+ if test "$pac_path_NM_G_type" = "BSD" ; then
+ addrs=`${pac_path_NM_G} -g ./pac_conftest_main$EXEEXT | grep -i mpifcmb | sed -e "s% *\([0-9abcdef][0-9abcdef]*\) *[a-zA-Z] *[a-zA-Z][a-zA-A]*.*%\1%g"`
+ fi
+ changequote([,])
+
+ cmp_addr=""
+ diff_addrs=no
+ for addr in ${addrs} ; do
+ if test "X${cmp_addr}" != "X" ; then
+ if test "${cmp_addr}" != "${addr}" ; then
+ diff_addrs=yes
+ break
+ fi
+ else
+ cmp_addr=${addr}
+ fi
+ done
+
+ if test "$diff_addrs" != "yes" ; then
+ dnl echo "Same addresses. Multiple aliases support"
+ AC_MSG_RESULT([${NM_G} says yes])
+ pac_c_multi_attr_alias=yes
+ else
+ dnl echo "Different addresses. No multiple aliases support."
+ AC_MSG_RESULT([${NM_G} says no])
+ pac_c_multi_attr_alias=no
+ fi
+
+ else # if test "$cross_compiling" != "yes"
+ rm -f pac_conftestval
+ ac_try="./pac_conftest_main$EXEEXT"
+ if AC_TRY_EVAL(ac_try) ; then
+ pac_c_attr_alias_val=0
+ if test -s pac_conftestval ; then
+ eval pac_c_attr_alias_val=`cat pac_conftestval`
+ fi
+ if test "$pac_c_attr_alias_val" -eq 1 ; then
+ AC_MSG_RESULT(yes)
+ pac_c_multi_attr_alias=yes
+ else
+ AC_MSG_RESULT(no)
+ pac_c_multi_attr_alias=no
+ fi
+ rm -f pac_conftestval
+ fi
+ fi
+ dnl Endof if test "$cross_compiling" = "yes"
+ rm -f pac_conftest_main$EXEEXT
+else
+ AC_MSG_RESULT(no! link failure)
+ pac_c_multi_attr_alias=no
+fi dnl Endof if test "$pac_c_attr_alias_main" = "yes"
+
+AC_LANG_POP(C)
+
+]) dnl Endof AC_DEFUN([PAC_C_MULTI_ATTR_ALIAS]
+dnl
+dnl PAC_C_ATTR_ALIGNED()
+dnl
+dnl Check if __attribute__((aligned)) support is there.
+dnl If so, set pac_c_attr_aligned=yes.
+dnl
+dnl Do a link test instead of compile test to check if the linker
+dnl would emit an error.
+dnl
+AC_DEFUN([PAC_C_ATTR_ALIGNED],[
+AC_LANG_PUSH(C)
+AC_MSG_CHECKING([for __attribute__((aligned)) support])
+#Link the __attribute__ object file.
+AC_LINK_IFELSE([
+ AC_LANG_PROGRAM([
+struct mpif_cmblk_t_ { int imembers[5]; };
+typedef struct mpif_cmblk_t_ mpif_cmblk_t;
+mpif_cmblk_t mpifcmbr __attribute__((aligned)) = {0};
+ ],[])
+],[pac_c_attr_aligned=yes], [pac_c_attr_aligned=no])
+AC_MSG_RESULT([$pac_c_attr_aligned])
+AC_LANG_POP(C)
+])
+dnl
+dnl PAC_F2C_ATTR_ALIGNED_SIZE(ARRAY_SIZE, [OUTPUT_VAR], [MIN_ALIGNMENT])
+dnl
+dnl ARRAY_SIZE : Size of the integer array within the fortran commmon block.
+dnl OUTPUT_VAR : Optional variable to be set.
+dnl if test succeeds, set OUTPUT_VAR=$pac_f2c_attr_aligned_str.
+dnl if test fails, set OUTPUT_VAR="unknown".
+dnl MIN_ALIGNMENT : Optional value.
+dnl Minimum alignment size to be used in OUTPUT_VAR.
+dnl pac_f2c_attr_aligned_str won't be modified.
+dnl
+dnl "pac_f2c_attr_aligned_str" will be set with
+dnl 1) __attribute__((aligned(ALIGNMENT_SIZE))),
+dnl 2) __attribute__((aligned)).
+dnl 3) "", i.e. empty string.
+dnl
+dnl 2) means the test can't find a good alignment value, but both the Fortran
+dnl and C compilers are OK with "aligned" which in principle means the C
+dnl compiler will pick the maximum useful alignment supported by the
+dnl architecture.
+dnl 3) means that the test has failed to find the alignment.
+dnl
+AC_DEFUN([PAC_F2C_ATTR_ALIGNED_SIZE],[
+cmblksize=$1
+AC_MSG_CHECKING([the minimum alignment of Fortran common block of $cmblksize integers])
+dnl To find the minmium alignment of Fortran common block (of integer array)
+dnl as seen by C object file, C object files of various (typical) alignments
+dnl are linked to the Fortran code using the common block of integer array.
+#
+dnl Since the incorrect alignment results only a warning messages from the
+dnl fortran compiler(or linker), so we use "diff" to compare the fortran
+dnl compiler/linker output. We cannot use AC_LANG_WERROR,
+dnl i.e. ac_fc_werror_flag=yes, because compiler like pgf77 at version 10.x)
+dnl has non-zero stderr output if a fortran program is used in the linking.
+dnl The stderr contains the name of fortran program even if the linking is
+dnl successful. We could avoid the non-zero stderr output in pgf77 by
+dnl compiling everthing into object files and linking all the object files
+dnl with pgf77. Doing that would need us to use AC_TRY_EVAL instead of
+dnl AC_LINK_IFELSE, so "diff" approach is used instead.
+#
+dnl Using diff of compiler(linker) output requires a reference output file
+dnl as the base of diff. The process of creating this reference output file
+dnl has to be exactly the same as the testing process, because pgf77 has
+dnl the following weird behavour
+dnl pgf77 -o ftest ftest.f => when $?=0 with zero stderr output
+dnl pgf77 -o ftest ftest.f dummy.o => when $?=0 with non-zero stderr output.
+dnl stderr has "ftest.f:".
+dnl
+# First create a fortran CONFTEST which will be used repeatedly.
+AC_LANG_PUSH([Fortran]) dnl AC_LANG_PUSH([Fortran 77])
+AC_LANG_CONFTEST([
+ AC_LANG_SOURCE([
+ program fconftest
+ integer isize
+ parameter (isize=$cmblksize)
+ integer status_array(isize)
+ common /mpifcmb/ status_array
+ save /mpifcmb/
+ end
+ ])
+])
+AC_LANG_POP([Fortran]) dnl AC_LANG_POP([Fortran 77])
+dnl
+dnl
+dnl
+# Compile a C dummy.$OBJEXT and link with Fortran test program to create
+# a reference linker output file, pac_align0.log, as the base of "diff".
+AC_LANG_PUSH([C])
+AC_COMPILE_IFELSE([AC_LANG_SOURCE([])],[
+ cp conftest.$ac_ext pac_conftest.c
+ PAC_RUNLOG([mv conftest.$OBJEXT pac_conftest.$OBJEXT])
+ saved_LIBS="$LIBS"
+ LIBS="pac_conftest.$OBJEXT $LIBS"
+ AC_LANG_PUSH([Fortran]) dnl AC_LANG_PUSH([Fortran 77])
+ saved_ac_link="$ac_link"
+ ac_link="`echo $saved_ac_link | sed -e 's|>.*$|> $pac_logfile 2>\&1|g'`"
+ pac_logfile="pac_align0.log"
+ rm -f $pac_logfile
+ AC_LINK_IFELSE([],[
+ pac_f2c_alignedn_diffbase=yes
+ ],[
+ pac_f2c_alignedn_diffbase=no
+ ])
+ # Be sure NOT to remove the conftest.f which is still needed for later use.
+ # rm -f conftest.$ac_ext
+ # Restore everything in autoconf that has been overwritten
+ ac_link="$saved_ac_link"
+ # restore previously saved LIBS
+ LIBS="$saved_LIBS"
+ AC_LANG_POP([Fortran]) dnl AC_LANG_POP([Fortran 77])
+],[
+ pac_f2c_alignedn_diffbase=no
+])
+AC_LANG_POP([C])
+dnl
+dnl
+if test "$pac_f2c_alignedn_diffbase" = "yes" ; then
+ # Initialize pac_result_str to empty string since part of the test
+ # depends on pac_result_str is empty or non-empty string.
+ pac_result_str=""
+ # Initialize pac_f2c_attr_aligned_str to empty string and
+ # it will remain as empty string if the following test fails.
+ pac_f2c_attr_aligned_str=""
+ for asize in 4 8 16 32 64 128 max ; do
+ if test "$asize" != "max" ; then
+ pac_attr_aligned_str="__attribute__((aligned($asize)))"
+ else
+ pac_attr_aligned_str="__attribute__((aligned))"
+ fi
+ AC_LANG_PUSH([C])
+ #Compile the __attribute__ object file.
+ AC_COMPILE_IFELSE([
+ AC_LANG_SOURCE([
+changequote(,)
+struct mpif_cmblk_t_ { $MPI_FINT imembers[$cmblksize]; };
+changequote([,])
+typedef struct mpif_cmblk_t_ mpif_cmblk_t;
+mpif_cmblk_t mpifcmbr $pac_attr_aligned_str = {0};
+
+extern mpif_cmblk_t _CMPIFCMB __attribute__ ((alias("mpifcmbr")));
+extern mpif_cmblk_t MPIFCMB __attribute__ ((alias("mpifcmbr")));
+extern mpif_cmblk_t MPIFCMB_ __attribute__ ((alias("mpifcmbr")));
+extern mpif_cmblk_t _Cmpifcmb __attribute__ ((alias("mpifcmbr")));
+extern mpif_cmblk_t mpifcmb __attribute__ ((alias("mpifcmbr")));
+extern mpif_cmblk_t mpifcmb_ __attribute__ ((alias("mpifcmbr")));
+ ])
+ ],[
+ cp conftest.$ac_ext pac_conftest.c
+ PAC_RUNLOG([mv conftest.$OBJEXT pac_conftest.$OBJEXT])
+ saved_LIBS="$LIBS"
+ LIBS="pac_conftest.$OBJEXT $LIBS"
+ AC_LANG_PUSH([Fortran]) dnl AC_LANG_PUSH([Fortran 77])
+ saved_ac_link="$ac_link"
+ ac_link="`echo $saved_ac_link | sed -e 's|>.*$|> $pac_logfile 2>\&1|g'`"
+ pac_logfile="pac_align1.log"
+ rm -f $pac_logfile
+ # Use conftest.f created in CONFTEST.
+ AC_LINK_IFELSE([],[
+ PAC_RUNLOG_IFELSE([diff -b pac_align0.log pac_align1.log],[
+ pac_attr_alignedn=yes
+ ],[
+ pac_attr_alignedn=no
+ cat $pac_logfile >&AS_MESSAGE_LOG_FD
+ echo "failed C program was:" >&AS_MESSAGE_LOG_FD
+ cat pac_conftest.c >&AS_MESSAGE_LOG_FD
+ echo "failed Fortran program was:" >&AS_MESSAGE_LOG_FD
+ cat conftest.$ac_ext >&AS_MESSAGE_LOG_FD
+ ])
+ ],[
+ pac_attr_alignedn=no
+ ])
+ # Restore everything in autoconf that has been overwritten
+ ac_link="$saved_ac_link"
+ # restore previously saved LIBS
+ LIBS="$saved_LIBS"
+ AC_LANG_POP([Fortran]) dnl AC_LANG_POP([Fortran 77])
+ # remove previously generated object file and C file.
+ rm -f pac_conftest.$OBJEXT pac_conftest.c
+ rm -f $pac_logfile
+ if test "$pac_attr_alignedn" = yes ; then
+ ifelse([$3],[],[
+ pac_result_str="$asize"
+ pac_f2c_attr_aligned_str="$pac_attr_aligned_str"
+ break
+ ],[
+ if test "$asize" != "max" -a "$asize" -lt "$3" ; then
+ if test "X$pac_result_str" = "X" ; then
+ pac_result_str="$asize"
+ pac_f2c_attr_aligned_str="$pac_attr_aligned_str"
+ fi
+ continue
+ else
+ pac_f2c_attr_aligned_str="$pac_attr_aligned_str"
+ if test "X$pac_result_str" != "X" ; then
+ pac_result_str="$pac_result_str, too small! reset to $asize"
+ else
+ pac_result_str="$asize"
+ fi
+ break
+ fi
+ ])
+ fi
+ ], [
+ pac_attr_alignedn=no
+ ])
+ AC_LANG_POP([C])
+ done
+ ifelse([$2],[],[],[$2="$pac_f2c_attr_aligned_str"])
+else
+ pac_result_str=""
+ # Since the test fails, set pac_f2c_attr_aligned_str to empty string.
+ pac_f2c_attr_aligned_str=""
+fi
+if test "X$pac_result_str" != "X" ; then
+ AC_MSG_RESULT([$pac_result_str])
+else
+ AC_MSG_RESULT([unknown])
+fi
+rm -f pac_align0.log
+])
+dnl
diff --git a/ompi/mca/io/romio/romio/confdb/aclocal_bugfix.m4 b/ompi/mca/io/romio/romio/confdb/aclocal_bugfix.m4
new file mode 100644
index 0000000000..f580d3919a
--- /dev/null
+++ b/ompi/mca/io/romio/romio/confdb/aclocal_bugfix.m4
@@ -0,0 +1,75 @@
+dnl This internal macro fails to work properly with OTHER internal macros.
+dnl Basically, if the prologue is [], then no message should be generated.
+dnl This macro is in autoconf 2.52
+m4_define([AC_LANG_PROGRAM(Fortran 77)],
+[m4_if([$1],[[[]]],,[m4_ifval([$1],
+ [m4_warn([syntax], [$0: ignoring PROLOGUE: $1])])])dnl
+ program main
+$2
+ end])
+
+
+dnl/*D
+dnl PAC_PROG_CHECK_INSTALL_WORKS - Check whether the install program in INSTALL
+dnl works.
+dnl
+dnl Synopsis:
+dnl PAC_PROG_CHECK_INSTALL_WORKS
+dnl
+dnl Output Effect:
+dnl Sets the variable 'INSTALL' to the value of 'ac_sh_install' if
+dnl a file cannot be installed into a local directory with the 'INSTALL'
+dnl program
+dnl
+dnl Notes:
+dnl The 'AC_PROG_INSTALL' scripts tries to avoid broken versions of
+dnl install by avoiding directories such as '/usr/sbin' where some
+dnl systems are known to have bad versions of 'install'. Unfortunately,
+dnl this is exactly the sort of test-on-name instead of test-on-capability
+dnl that 'autoconf' is meant to eliminate. The test in this script
+dnl is very simple but has been adequate for working around problems
+dnl on Solaris, where the '/usr/sbin/install' program (known by
+dnl autoconf to be bad because it is in /usr/sbin) is also reached by a
+dnl soft link through /bin, which autoconf believes is good.
+dnl
+dnl No variables are cached to ensure that we do not make a mistake in
+dnl our choice of install program.
+dnl
+dnl The Solaris configure requires the directory name to immediately
+dnl follow the '-c' argument, rather than the more common
+dnl.vb
+dnl args sourcefiles destination-dir
+dnl.ve
+dnl D*/
+AC_DEFUN([PAC_PROG_CHECK_INSTALL_WORKS],[
+if test -z "$INSTALL" ; then
+ AC_MSG_RESULT([No install program available])
+else
+ # first make any "confdb/install-sh -c" into an absolute path
+ # this is a hack, but it's still much cleaner than anything else I could
+ # come up with (see tt#1007) [goodell@]
+ AS_CASE(["$INSTALL"],
+ [/*],[:],
+ [*install-sh*],[INSTALL="$master_top_srcdir/$INSTALL"])
+
+ # Check that this install really works
+ rm -f conftest
+ echo "Test file" > conftest
+ if test ! -d .conftest ; then mkdir .conftest ; fi
+ AC_MSG_CHECKING([whether install works])
+ if $INSTALL conftest .conftest >/dev/null 2>&1 ; then
+ installOk=yes
+ else
+ installOk=no
+ fi
+ rm -rf .conftest conftest
+ AC_MSG_RESULT($installOk)
+ if test "$installOk" = no ; then
+ if test -n "$ac_install_sh" ; then
+ INSTALL=$ac_install_sh
+ else
+ AC_MSG_ERROR([Unable to find working install])
+ fi
+ fi
+fi
+])
diff --git a/ompi/mca/io/romio/romio/confdb/aclocal_cache.m4 b/ompi/mca/io/romio/romio/confdb/aclocal_cache.m4
new file mode 100644
index 0000000000..9d02c41638
--- /dev/null
+++ b/ompi/mca/io/romio/romio/confdb/aclocal_cache.m4
@@ -0,0 +1,372 @@
+dnl
+dnl/*D
+dnl AC_CACHE_LOAD - Replacement for autoconf cache load
+dnl
+dnl Notes:
+dnl Caching in autoconf is broken (at least through version 2.13).
+dnl The problem is that the cache is read
+dnl without any check for whether it makes any sense to read it.
+dnl A common problem is a build on a shared file system; connecting to
+dnl a different computer and then building within the same directory will
+dnl lead to at best error messages from configure and at worse a build that
+dnl is wrong but fails only at run time (e.g., wrong datatype sizes used).
+dnl Later versions of autoconf do include some checks for changes in the
+dnl environment that impact the choices, but still misses problems with
+dnl multiple different systems.
+dnl
+dnl This fixes that by requiring the user to explicitly enable caching
+dnl before the cache file will be loaded.
+dnl
+dnl To use this version of 'AC_CACHE_LOAD', you need to include
+dnl 'aclocal_cache.m4' in your 'aclocal.m4' file. The sowing 'aclocal.m4'
+dnl file includes this file.
+dnl
+dnl If no --enable-cache or --disable-cache option is selected, the
+dnl command causes configure to keep track of the system being configured
+dnl in a config.system file; if the current system matches the value stored
+dnl in that file (or there is neither a config.cache nor config.system file),
+dnl configure will enable caching. In order to ensure that the configure
+dnl tests make sense, the values of CC, F77, F90, and CXX are also included
+dnl in the config.system file. In addition, the value of PATH is included
+dnl to ensure that changes in the PATH that might select a different version
+dnl of a program with the same name (such as a native make versus gnumake)
+dnl are detected.
+dnl
+dnl Bugs:
+dnl This does not work with the Cygnus configure because the enable arguments
+dnl are processed *after* AC_CACHE_LOAD (!). To address this, we avoid
+dnl changing the value of enable_cache, and use real_enable_cache, duplicating
+dnl the "notgiven" value.
+dnl
+dnl The environment variable CONFIGURE_DEBUG_CACHE, if set to yes,
+dnl will cause additional data to be written out during the configure process.
+dnl This can be helpful in debugging the cache file process.
+dnl
+dnl See Also:
+dnl PAC_ARG_CACHING
+dnl D*/
+define([AC_CACHE_LOAD],
+[if test "$CONFIGURE_DEBUG_CACHE" = yes ; then
+ oldopts="$-"
+ clearMinusX=no
+ set -x
+ if test "$oldopts" != "$-" ; then
+ clearMinusX=yes
+ fi
+fi
+if test "X$cache_system" = "X" ; then
+ # A default file name, just in case
+ cache_system="config.system"
+ if test "$cache_file" != "/dev/null" ; then
+ # Get the directory for the cache file, if any
+ changequote(,)
+ dnl Be careful to ensure that there is no doubled slash
+ cache_system=`echo $cache_file | sed -e 's%^\(.*/\)[^/]*%\1config.system%'`
+ changequote([,])
+ test "x$cache_system" = "x$cache_file" && cache_system="config.system"
+# else
+# We must *not* set enable_cache to no because we need to know if
+# enable_cache was not set.
+# enable_cache=no
+ fi
+fi
+dnl
+dnl The "action-if-not-given" part of AC_ARG_ENABLE is not executed until
+dnl after the AC_CACHE_LOAD is executed (!). Thus, the value of
+dnl enable_cache if neither --enable-cache or --disable-cache is selected
+dnl is null. Just in case autoconf ever fixes this, we test both cases.
+dnl
+dnl Include PATH in the cache.system file since changing the path can
+dnl change which versions of programs are found (such as vendor make
+dnl or GNU make).
+dnl
+#
+# Get a test value and flag whether we should remove/replace the
+# cache_system file (do so unless cache_system_ok is yes)
+# FC and F77 should be synonyms. Save both in case
+# We include the xxxFLAGS in case the user is using the flags to change
+# the language (either input or output) of the compiler. E.g.,
+# using -xarch=v9 on Solaris to select 64 bit output or using -D_BSD_SOURCE
+# with gcc to get different header files on input.
+cleanargs=`echo "$CC $F77 $FC $CXX $F90 $CFLAGS $FFLAGS $CXXFLAGS $F90FLAGS $PATH" | tr '"' ' '`
+if uname -srm >/dev/null 2>&1 ; then
+ cache_system_text="`uname -srm` $cleanargs"
+else
+ cache_system_text="-no-uname- $cleanargs"
+fi
+cache_system_ok=no
+#
+if test -z "$real_enable_cache" ; then
+ real_enable_cache=$enable_cache
+ if test -z "$real_enable_cache" ; then real_enable_cache="notgiven" ; fi
+fi
+if test "X$real_enable_cache" = "Xnotgiven" ; then
+ # check for valid cache file
+ if test -z "$cache_system" ; then cache_system="config.system" ; fi
+ if uname -srm >/dev/null 2>&1 ; then
+ if test -f "$cache_system" -a -n "$cache_system_text" ; then
+ if test "$cache_system_text" = "`cat $cache_system`" ; then
+ real_enable_cache="yes"
+ cache_system_ok=yes
+ fi
+ elif test ! -f "$cache_system" -a -n "$cache_system_text" ; then
+ # remove the cache file because it may not correspond to our
+ # system
+ if test "$cache_file" != "/dev/null" ; then
+ rm -f $cache_file
+ fi
+ real_enable_cache="yes"
+ fi
+ fi
+fi
+if test "X$real_enable_cache" = "Xyes" -a "$cache_file" = "/dev/null" ; then
+ real_enable_cache=no
+fi
+if test "X$real_enable_cache" = "Xyes" ; then
+ if test -r "$cache_file" ; then
+ echo "loading cache $cache_file"
+ if test -w "$cache_file" ; then
+ # Clean the cache file (ergh)
+ PAC_CACHE_CLEAN
+ fi
+ . $cache_file
+ else
+ echo "Configure in `pwd` creating cache $cache_file"
+ > $cache_file
+ rm -f $cache_system
+ fi
+else
+ cache_file="/dev/null"
+fi
+# Remember our location and the name of the cachefile
+pac_cv_my_conf_dir=`pwd`
+dnl do not include the cachefile name, since this may contain the process
+dnl number and cause comparisons looking for changes to the cache file
+dnl to detect a change that isn't real.
+dnl pac_cv_my_cachefile=$cachefile
+#
+# Update the cache_system file if necessary
+if test "$cache_system_ok" != yes ; then
+ if test -n "$cache_system" ; then
+ rm -f $cache_system
+ echo $cache_system_text > $cache_system
+ fi
+fi
+if test "$clearMinusX" = yes ; then
+ set +x
+fi
+])
+dnl
+dnl/*D
+dnl PAC_ARG_CACHING - Enable caching of results from a configure execution
+dnl
+dnl Synopsis:
+dnl PAC_ARG_CACHING
+dnl
+dnl Output Effects:
+dnl Adds '--enable-cache' and '--disable-cache' to the command line arguments
+dnl accepted by 'configure'.
+dnl
+dnl See Also:
+dnl AC_CACHE_LOAD
+dnl D*/
+dnl Add this call to the other ARG_ENABLE calls. Note that the values
+dnl set here are redundant; the LOAD_CACHE call relies on the way autoconf
+dnl initially processes ARG_ENABLE commands.
+AC_DEFUN([PAC_ARG_CACHING],[
+AC_ARG_ENABLE(cache,
+ AC_HELP_STRING([--enable-cache], [Turn on configure caching]),,
+ [enable_cache="notgiven"])
+])
+
+dnl Clean the cache of extraneous quotes that AC_CACHE_SAVE may add
+dnl
+AC_DEFUN([PAC_CACHE_CLEAN],[
+ rm -f confcache
+ sed -e "s/'\\\\''//g" -e "s/'\\\\/'/" -e "s/\\\\'/'/" \
+ -e "s/'\\\\''//g" $cache_file > confcache
+ if cmp -s $cache_file confcache ; then
+ :
+ else
+ if test -w $cache_file ; then
+ echo "updating cache $cache_file"
+ cat confcache > $cache_file
+ else
+ echo "not updating unwritable cache $cache_file"
+ fi
+ fi
+ rm -f confcache
+ if test "$DEBUG_AUTOCONF_CACHE" = "yes" ; then
+ echo "Results of cleaned cache file:"
+ echo "--------------------------------------------------------"
+ cat $cache_file
+ echo "--------------------------------------------------------"
+ fi
+])
+
+dnl/*D
+dnl PAC_SUBDIR_CACHE - Create a cache file before ac_output for subdirectory
+dnl configures.
+dnl
+dnl Synopsis:
+dnl PAC_SUBDIR_CACHE(when)
+dnl
+dnl Input Parameter:
+dnl . when - Indicates when the cache should be created (optional)
+dnl If 'always', create a new cache file. This option
+dnl should be used if any of the cache parameters (such as
+dnl CFLAGS or LDFLAGS) may have changed.
+dnl
+dnl Output Effects:
+dnl
+dnl Create a cache file before ac_output so that subdir configures don't
+dnl make mistakes.
+dnl We can't use OUTPUT_COMMANDS to remove the cache file, because those
+dnl commands are executed *before* the subdir configures.
+dnl
+dnl D*/
+AC_DEFUN([PAC_SUBDIR_CACHE],[])
+AC_DEFUN([PAC_SUBDIR_CACHE_OLD],[
+if test "x$1" = "xalways" -o \( "$cache_file" = "/dev/null" -a "X$real_enable_cache" = "Xnotgiven" \) ; then
+ # Use an absolute directory to help keep the subdir configures from getting
+ # lost
+ cache_file=`pwd`/$$conf.cache
+ touch $cache_file
+ dnl
+ dnl For Autoconf 2.52+, we should ensure that the environment is set
+ dnl for the cache. Make sure that we get the values and set the
+ dnl xxx_set variables properly
+ ac_cv_env_CC_set=set
+ ac_cv_env_CC_value=$CC
+ ac_cv_env_CFLAGS_set=${CFLAGS+set}
+ ac_cv_env_CFLAGS_value=$CFLAGS
+ ac_cv_env_CPP_set=set
+ ac_cv_env_CPP_value=$CPP
+ ac_cv_env_CPPFLAGS_set=${CPPFLAGS+set}
+ ac_cv_env_CPPFLAGS_value=$CPPFLAGS
+ ac_cv_env_LDFLAGS_set=${LDFLAGS+set}
+ ac_cv_env_LDFLAGS_value=$LDFLAGS
+ ac_cv_env_LIBS_set=${LIBS+set}
+ ac_cv_env_LIBS_value=$LIBS
+ ac_cv_env_FC_set=${FC+set}
+ ac_cv_env_FC_value=$FC
+ ac_cv_env_F77_set=${F77+set}
+ ac_cv_env_F77_value=$F77
+ ac_cv_env_F90_set=${F90+set}
+ ac_cv_env_F90_value=$F90
+ ac_cv_env_FFLAGS_set=${FFLAGS+set}
+ ac_cv_env_FFLAGS_value=$FFLAGS
+ ac_cv_env_CXX_set=${CXX+set}
+ ac_cv_env_CXX_value=$CXX
+
+ ac_env_CC_set=set
+ ac_env_CC_value=$CC
+ ac_env_CFLAGS_set=${CFLAGS+set}
+ ac_env_CFLAGS_value=$CFLAGS
+ ac_env_CPP_set=set
+ ac_env_CPP_value=$CPP
+ ac_env_CPPFLAGS_set=${CPPFLAGS+set}
+ ac_env_CPPFLAGS_value=$CPPFLAGS
+ ac_env_LDFLAGS_set=${LDFLAGS+set}
+ ac_env_LDFLAGS_value=$LDFLAGS
+ ac_env_LIBS_set=${LIBS+set}
+ ac_env_LIBS_value=$LIBS
+ ac_env_FC_set=${FC+set}
+ ac_env_FC_value=$FC
+ ac_env_F77_set=${F77+set}
+ ac_env_F77_value=$F77
+ ac_env_F90_set=${F90+set}
+ ac_env_F90_value=$F90
+ ac_env_FFLAGS_set=${FFLAGS+set}
+ ac_env_FFLAGS_value=$FFLAGS
+ ac_env_CXX_set=${CXX+set}
+ ac_env_CXX_value=$CXX
+
+ dnl other parameters are
+ dnl build_alias, host_alias, target_alias
+
+ # It turns out that A C CACHE_SAVE can't be invoked more than once
+ # with data that contains blanks. What happens is that the quotes
+ # that it adds get quoted and then added again. To avoid this,
+ # we strip off the outer quotes for all cached variables
+ dnl We add pac_cv_my_conf_dir to give the source of this cachefile,
+ dnl and pac_cv_my_cachefile to indicate how it chose the cachefile.
+ pac_cv_my_conf_dir=`pwd`
+ pac_cv_my_cachefile=$cachefile
+ AC_CACHE_SAVE
+ PAC_CACHE_CLEAN
+ ac_configure_args="$ac_configure_args -enable-cache"
+fi
+dnl Unconditionally export these values. Subdir configures break otherwise
+export CC
+export CFLAGS
+export LDFLAGS
+export LIBS
+export CPPFLAGS
+export CPP
+export FC
+export F77
+export F90
+export CXX
+export FFLAGS
+export CCFLAGS
+])
+AC_DEFUN([PAC_SUBDIR_CACHE_CLEANUP],[])
+AC_DEFUN([PAC_SUBDIR_CACHE_CLEANUP_OLD],[
+if test "$cache_file" != "/dev/null" -a "X$real_enable_cache" = "Xnotgiven" ; then
+ rm -f $cache_file
+ cache_file=/dev/null
+fi
+])
+dnl
+dnl The following three macros support the sharing of configure results
+dnl by configure scripts, including ones that are not run with
+dnl AC_CONFIG_SUBDIRS (the cachefiles managed by --enable-cache can
+dnl only be used with AC_CONFIG_SUBDIRS; creating a autoconf-style
+dnl cachefile before the the end of the autoconf process will often
+dnl cause problems.
+dnl
+AC_DEFUN([PAC_CREATE_BASE_CACHE],[
+AC_ARG_ENABLE(base-cache,
+ AC_HELP_STRING([--enable-base-cache],
+ [Enable the use of a simple cache for the subsidiary
+ configure scripts]),,enable_base_cache=default)
+# The default case is controlled by the environment variable CONF_USE_CACHEFILE
+if test "$enable_base_cache" = "default" ; then
+ if test "$CONF_USE_CACHEFILE" = yes ; then
+ enable_base_cache=yes
+ else
+ enable_base_cache=no
+ fi
+fi
+if test "$enable_base_cache" != no ; then
+ if test "$enable_base_cache" = yes ; then
+ basecachefile=`pwd`/cache.base
+ else
+ basecachefile=`pwd`/$enable_base_cache
+ fi
+ set | grep ac_cv > $basecachefile
+ # Tell other configures to load this file
+ echo "Creating and exporting the base cache file $basecachefile"
+ CONF_BASE_CACHEFILE=$basecachefile
+ export CONF_BASE_CACHEFILE
+fi
+])
+AC_DEFUN([PAC_LOAD_BASE_CACHE],[
+if test -n "$CONF_BASE_CACHEFILE" -a -s "$CONF_BASE_CACHEFILE" ; then
+ echo "Loading base cachefile $CONF_BASE_CACHEFILE"
+ . $CONF_BASE_CACHEFILE
+ export CONF_BASE_CACHEFILE
+fi
+])
+AC_DEFUN([PAC_UPDATE_BASE_CACHE],[
+if test -n "$CONF_BASE_CACHEFILE" -a -s "$CONF_BASE_CACHEFILE" ; then
+ set | grep ac_cv > $CONF_BASE_CACHEFILE.new
+ if cmp -s $CONF_BASE_CACHEFILE.new $CONF_BASE_CACHEFILE ; then
+ :
+ else
+ echo "Replacing $CONF_BASE_CACHEFILE"
+ mv $CONF_BASE_CACHEFILE.new $CONF_BASE_CACHEFILE
+ fi
+fi
+])
diff --git a/ompi/mca/io/romio/romio/confdb/aclocal_cc.m4 b/ompi/mca/io/romio/romio/confdb/aclocal_cc.m4
new file mode 100644
index 0000000000..91b26de324
--- /dev/null
+++ b/ompi/mca/io/romio/romio/confdb/aclocal_cc.m4
@@ -0,0 +1,1596 @@
+dnl AC_PROG_CC_GNU
+ifdef([AC_PROG_CC_GNU],,[AC_DEFUN([AC_PROG_CC_GNU],)])
+
+dnl PAC_PROG_CC - reprioritize the C compiler search order
+AC_DEFUN([PAC_PROG_CC],[
+ PAC_PUSH_FLAG([CFLAGS])
+ AC_PROG_CC([gcc icc pgcc xlc xlC pathcc cc])
+ PAC_POP_FLAG([CFLAGS])
+])
+
+dnl
+dnl/*D
+dnl PAC_C_CHECK_COMPILER_OPTION - Check that a compiler option is accepted
+dnl without warning messages
+dnl
+dnl Synopsis:
+dnl PAC_C_CHECK_COMPILER_OPTION(optionname,action-if-ok,action-if-fail)
+dnl
+dnl Output Effects:
+dnl
+dnl If no actions are specified, a working value is added to 'COPTIONS'
+dnl
+dnl Notes:
+dnl This is now careful to check that the output is different, since
+dnl some compilers are noisy.
+dnl
+dnl We are extra careful to prototype the functions in case compiler options
+dnl that complain about poor code are in effect.
+dnl
+dnl Because this is a long script, we have ensured that you can pass a
+dnl variable containing the option name as the first argument.
+dnl
+dnl gcc 4.2.4 on 32-bit does not complain about the -Wno-type-limits option
+dnl even though it doesn't support it. However, when another warning is
+dnl triggered, it gives an error that the option is not recognized. So we
+dnl need to test with a conftest file that will generate warnings
+dnl D*/
+AC_DEFUN([PAC_C_CHECK_COMPILER_OPTION],[
+AC_MSG_CHECKING([whether C compiler accepts option $1])
+pccco_save_CFLAGS="$CFLAGS"
+CFLAGS="$1 $CFLAGS"
+rm -f conftest.out
+pac_success=no
+# conftest3.c has an invalid prototype to ensure we generate warnings
+echo 'int main(){}' > conftest3.c
+echo 'int foo(void);int foo(void){return 0;}' > conftest2.c
+echo 'int main(void);int main(void){return 0;}' > conftest.c
+if ${CC-cc} $CFLAGS $CPPFLAGS -o conftest conftest3.c $LDFLAGS >/dev/null 2>&1 &&
+ ${CC-cc} $pccco_save_CFLAGS $CPPFLAGS -o conftest conftest.c $LDFLAGS >conftest.bas 2>&1 ; then
+ if ${CC-cc} $CFLAGS $CPPFLAGS -o conftest conftest.c $LDFLAGS >conftest.out 2>&1 ; then
+ if diff -b conftest.out conftest.bas >/dev/null 2>&1 ; then
+ AC_MSG_RESULT(yes)
+ AC_MSG_CHECKING([whether routines compiled with $1 can be linked with ones compiled without $1])
+ rm -f conftest.out
+ rm -f conftest.bas
+ if ${CC-cc} -c $pccco_save_CFLAGS $CPPFLAGS conftest2.c >conftest2.out 2>&1 ; then
+ if ${CC-cc} $CFLAGS $CPPFLAGS -o conftest conftest2.o conftest.c $LDFLAGS >conftest.bas 2>&1 ; then
+ if ${CC-cc} $CFLAGS $CPPFLAGS -o conftest conftest2.o conftest.c $LDFLAGS >conftest.out 2>&1 ; then
+ if diff -b conftest.out conftest.bas >/dev/null 2>&1 ; then
+ pac_success=yes
+ else
+ :
+ fi
+ else
+ :
+ fi
+ else
+ # Could not link with the option!
+ :
+ fi
+ else
+ if test -s conftest2.out ; then
+ cat conftest2.out >&AC_FD_CC
+ fi
+ fi
+ else
+ :
+ fi
+ else
+ :
+ fi
+else
+ # Could not compile without the option!
+ :
+fi
+CFLAGS="$pccco_save_CFLAGS"
+if test "$pac_success" = yes ; then
+ AC_MSG_RESULT(yes)
+ ifelse($2,,COPTIONS="$COPTIONS $1",$2)
+else
+ AC_MSG_RESULT(no)
+ if test -s conftest.out ; then cat conftest.out >&AC_FD_CC ; fi
+ $3
+fi
+# This is needed for Mac OSX 10.5
+rm -rf conftest.dSYM
+rm -f conftest*
+])
+dnl
+dnl/*D
+dnl PAC_C_OPTIMIZATION - Determine C options for producing optimized code
+dnl
+dnl Synopsis
+dnl PAC_C_OPTIMIZATION([action if found])
+dnl
+dnl Output Effect:
+dnl Adds options to 'COPTIONS' if no other action is specified
+dnl
+dnl Notes:
+dnl This is a temporary standin for compiler optimization.
+dnl It should try to match known systems to known compilers (checking, of
+dnl course), and then falling back to some common defaults.
+dnl Note that many compilers will complain about -g and aggressive
+dnl optimization.
+dnl D*/
+AC_DEFUN([PAC_C_OPTIMIZATION],[
+ for copt in "-O4 -Ofast" "-Ofast" "-fast" "-O3" "-xO3" "-O" ; do
+ PAC_C_CHECK_COMPILER_OPTION($copt,found_opt=yes,found_opt=no)
+ if test "$found_opt" = "yes" ; then
+ ifelse($1,,COPTIONS="$COPTIONS $copt",$1)
+ break
+ fi
+ done
+ if test "$ac_cv_prog_gcc" = "yes" ; then
+ for copt in "-fomit-frame-pointer" "-finline-functions" \
+ "-funroll-loops" ; do
+ PAC_C_CHECK_COMPILER_OPTION($copt,found_opt=yes,found_opt=no)
+ if test "$found_opt" = "yes" ; then
+ ifelse($1,,COPTIONS="$COPTIONS $copt",$1)
+ # no break because we're trying to add them all
+ fi
+ done
+ # We could also look for architecture-specific gcc options
+ fi
+
+])
+
+dnl/*D
+dnl PAC_PROG_C_UNALIGNED_DOUBLES - Check that the C compiler allows unaligned
+dnl doubles
+dnl
+dnl Synopsis:
+dnl PAC_PROG_C_UNALIGNED_DOUBLES(action-if-true,action-if-false,
+dnl action-if-unknown)
+dnl
+dnl Notes:
+dnl 'action-if-unknown' is used in the case of cross-compilation.
+dnl D*/
+AC_DEFUN([PAC_PROG_C_UNALIGNED_DOUBLES],[
+AC_CACHE_CHECK([whether C compiler allows unaligned doubles],
+pac_cv_prog_c_unaligned_doubles,[
+AC_TRY_RUN([
+void fetch_double( v )
+double *v;
+{
+*v = 1.0;
+}
+int main( argc, argv )
+int argc;
+char **argv;
+{
+int p[4];
+double *p_val;
+fetch_double( (double *)&(p[0]) );
+p_val = (double *)&(p[0]);
+if (*p_val != 1.0) return 1;
+fetch_double( (double *)&(p[1]) );
+p_val = (double *)&(p[1]);
+if (*p_val != 1.0) return 1;
+return 0;
+}
+],pac_cv_prog_c_unaligned_doubles="yes",pac_cv_prog_c_unaligned_doubles="no",
+pac_cv_prog_c_unaligned_doubles="unknown")])
+ifelse($1,,,if test "X$pac_cv_prog_c_unaligned_doubles" = "yes" ; then
+$1
+fi)
+ifelse($2,,,if test "X$pac_cv_prog_c_unaligned_doubles" = "no" ; then
+$2
+fi)
+ifelse($3,,,if test "X$pac_cv_prog_c_unaligned_doubles" = "unknown" ; then
+$3
+fi)
+])
+
+dnl/*D
+dnl PAC_PROG_C_WEAK_SYMBOLS - Test whether C supports weak alias symbols.
+dnl
+dnl Synopsis
+dnl PAC_PROG_C_WEAK_SYMBOLS(action-if-true,action-if-false)
+dnl
+dnl Output Effect:
+dnl Defines one of the following if a weak symbol pragma is found:
+dnl.vb
+dnl HAVE_PRAGMA_WEAK - #pragma weak
+dnl HAVE_PRAGMA_HP_SEC_DEF - #pragma _HP_SECONDARY_DEF
+dnl HAVE_PRAGMA_CRI_DUP - #pragma _CRI duplicate x as y
+dnl.ve
+dnl May also define
+dnl.vb
+dnl HAVE_WEAK_ATTRIBUTE
+dnl.ve
+dnl if functions can be declared as 'int foo(...) __attribute__ ((weak));'
+dnl sets the shell variable pac_cv_attr_weak to yes.
+dnl Also checks for __attribute__((weak_import)) which is supported by
+dnl Apple in Mac OSX (at least in Darwin). Note that this provides only
+dnl weak symbols, not weak aliases
+dnl
+dnl D*/
+AC_DEFUN([PAC_PROG_C_WEAK_SYMBOLS],[
+pragma_extra_message=""
+AC_CACHE_CHECK([for type of weak symbol alias support],
+pac_cv_prog_c_weak_symbols,[
+# Test for weak symbol support...
+# We can't put # in the message because it causes autoconf to generate
+# incorrect code
+AC_TRY_LINK([
+extern int PFoo(int);
+#pragma weak PFoo = Foo
+int Foo(int a) { return a; }
+],[return PFoo(1);],has_pragma_weak=yes)
+#
+# Some systems (Linux ia64 and ecc, for example), support weak symbols
+# only within a single object file! This tests that case.
+# Note that there is an extern int PFoo declaration before the
+# pragma. Some compilers require this in order to make the weak symbol
+# extenally visible.
+if test "$has_pragma_weak" = yes ; then
+ # This is needed for Mac OSX 10.5
+ rm -rf conftest.dSYM
+ rm -f conftest*
+ cat >>conftest1.c <>conftest2.c <>conftest1.c <>conftest2.c <> config.log
+ echo "Failed program was" >> config.log
+ cat conftest1.c >>config.log
+ cat conftest2.c >>config.log
+ if test -s conftest.out ; then cat conftest.out >> config.log ; fi
+ has_pragma_weak=0
+ pragma_extra_message="pragma weak accepted but does not work (probably creates two non-weak entries)"
+ fi
+ else
+ echo "$ac_link2" >>config.log
+ echo "Failed program was" >>config.log
+ cat conftest1.c >>config.log
+ cat conftest2.c >>config.log
+ if test -s conftest.out ; then cat conftest.out >> config.log ; fi
+ has_pragma_weak=0
+ pragma_extra_message="pragma weak does not work outside of a file"
+ fi
+ # This is needed for Mac OSX 10.5
+ rm -rf conftest.dSYM
+ rm -f conftest*
+fi
+dnl
+if test -z "$pac_cv_prog_c_weak_symbols" ; then
+ AC_TRY_LINK([
+extern int PFoo(int);
+#pragma _HP_SECONDARY_DEF Foo PFoo
+int Foo(int a) { return a; }
+],[return PFoo(1);],pac_cv_prog_c_weak_symbols="pragma _HP_SECONDARY_DEF")
+fi
+dnl
+if test -z "$pac_cv_prog_c_weak_symbols" ; then
+ AC_TRY_LINK([
+extern int PFoo(int);
+#pragma _CRI duplicate PFoo as Foo
+int Foo(int a) { return a; }
+],[return PFoo(1);],pac_cv_prog_c_weak_symbols="pragma _CRI duplicate x as y")
+fi
+dnl
+if test -z "$pac_cv_prog_c_weak_symbols" ; then
+ pac_cv_prog_c_weak_symbols="no"
+fi
+dnl
+dnl If there is an extra explanatory message, echo it now so that it
+dnl doesn't interfere with the cache result value
+if test -n "$pragma_extra_message" ; then
+ echo $pragma_extra_message
+fi
+dnl
+])
+if test "$pac_cv_prog_c_weak_symbols" = "no" ; then
+ ifelse([$2],,:,[$2])
+else
+ case "$pac_cv_prog_c_weak_symbols" in
+ "pragma weak") AC_DEFINE(HAVE_PRAGMA_WEAK,1,[Supports weak pragma])
+ ;;
+ "pragma _HP") AC_DEFINE(HAVE_PRAGMA_HP_SEC_DEF,1,[HP style weak pragma])
+ ;;
+ "pragma _CRI") AC_DEFINE(HAVE_PRAGMA_CRI_DUP,1,[Cray style weak pragma])
+ ;;
+ esac
+ ifelse([$1],,:,[$1])
+fi
+AC_CACHE_CHECK([whether __attribute__ ((weak)) allowed],
+pac_cv_attr_weak,[
+AC_TRY_COMPILE([int foo(int) __attribute__ ((weak));],[int a;],
+pac_cv_attr_weak=yes,pac_cv_attr_weak=no)])
+# Note that being able to compile with weak_import doesn't mean that
+# it works.
+AC_CACHE_CHECK([whether __attribute__ ((weak_import)) allowed],
+pac_cv_attr_weak_import,[
+AC_TRY_COMPILE([int foo(int) __attribute__ ((weak_import));],[int a;],
+pac_cv_attr_weak_import=yes,pac_cv_attr_weak_import=no)])
+# Check if the alias option for weak attributes is allowed
+AC_CACHE_CHECK([whether __attribute__((weak,alias(...))) allowed],
+pac_cv_attr_weak_alias,[
+AC_TRY_COMPILE([int foo(int) __attribute__((weak,alias("__foo")));],[int a;],
+pac_cv_attr_weak_alias=yes,pac_cv_attr_weak_alias=no)])
+])
+
+#
+# This is a replacement that checks that FAILURES are signaled as well
+# (later configure macros look for the .o file, not just success from the
+# compiler, but they should not HAVE to
+#
+dnl --- insert 2.52 compatibility here ---
+dnl 2.52 does not have AC_PROG_CC_WORKS
+ifdef([AC_PROG_CC_WORKS],,[AC_DEFUN([AC_PROG_CC_WORKS],)])
+dnl
+AC_DEFUN([PAC_PROG_CC_WORKS],
+[AC_PROG_CC_WORKS
+AC_MSG_CHECKING([whether the C compiler sets its return status correctly])
+AC_LANG_SAVE
+AC_LANG_C
+AC_TRY_COMPILE(,[int a = bzzzt;],notbroken=no,notbroken=yes)
+AC_MSG_RESULT($notbroken)
+if test "$notbroken" = "no" ; then
+ AC_MSG_ERROR([installation or configuration problem: C compiler does not
+correctly set error code when a fatal error occurs])
+fi
+])
+
+dnl/*D
+dnl PAC_PROG_C_MULTIPLE_WEAK_SYMBOLS - Test whether C and the
+dnl linker allow multiple weak symbols.
+dnl
+dnl Synopsis
+dnl PAC_PROG_C_MULTIPLE_WEAK_SYMBOLS(action-if-true,action-if-false)
+dnl
+dnl
+dnl D*/
+AC_DEFUN([PAC_PROG_C_MULTIPLE_WEAK_SYMBOLS],[
+AC_CACHE_CHECK([for multiple weak symbol support],
+pac_cv_prog_c_multiple_weak_symbols,[
+# Test for multiple weak symbol support...
+#
+# This is needed for Mac OSX 10.5
+rm -rf conftest.dSYM
+rm -f conftest*
+cat >>conftest1.c <>conftest2.c <conftest.out 2>&1'
+if eval $ac_link2 ; then
+ pac_cv_prog_c_multiple_weak_symbols="yes"
+else
+ echo "$ac_link2" >>config.log
+ echo "Failed program was" >>config.log
+ cat conftest1.c >>config.log
+ cat conftest2.c >>config.log
+ if test -s conftest.out ; then cat conftest.out >> config.log ; fi
+fi
+# This is needed for Mac OSX 10.5
+rm -rf conftest.dSYM
+rm -f conftest*
+dnl
+])
+if test "$pac_cv_prog_c_multiple_weak_symbols" = "yes" ; then
+ ifelse([$1],,:,[$1])
+else
+ ifelse([$2],,:,[$2])
+fi
+])
+
+dnl Use the value of enable-strict to update CFLAGS
+dnl pac_cc_strict_flags contains the strict flags.
+dnl
+dnl -std=c89 is used to select the C89 version of the ANSI/ISO C standard.
+dnl As of this writing, many C compilers still accepted only this version,
+dnl not the later C99 version. When all compilers accept C99, this
+dnl should be changed to the appropriate standard level. Note that we've
+dnl had trouble with gcc 2.95.3 accepting -std=c89 but then trying to
+dnl compile program with a invalid set of options
+dnl (-D __STRICT_ANSI__-trigraphs)
+AC_DEFUN([PAC_CC_STRICT],[
+export enable_strict_done
+if test "$enable_strict_done" != "yes" ; then
+
+ # Some comments on strict warning options.
+ # These were added to reduce warnings:
+ # -Wno-missing-field-initializers -- We want to allow a struct to be
+ # initialized to zero using "struct x y = {0};" and not require
+ # each field to be initialized individually.
+ # -Wno-unused-parameter -- For portability, some parameters go unused
+ # when we have different implementations of functions for
+ # different platforms
+ # -Wno-unused-label -- We add fn_exit: and fn_fail: on all functions,
+ # but fn_fail may not be used if the function doesn't return an
+ # error.
+ # -Wno-sign-compare -- read() and write() return bytes read/written
+ # as a signed value, but we often compare this to size_t (or
+ # msg_sz_t) variables.
+ # -Wno-format-zero-length -- this warning is irritating and useless, since
+ # a zero-length format string is very well defined
+ # -Wno-type-limits -- There are places where we compare an unsigned to
+ # a constant that happens to be zero e.g., if x is unsigned and
+ # MIN_VAL is zero, we'd like to do "MPIU_Assert(x >= MIN_VAL);".
+ # Note this option is not supported by gcc 4.2. This needs to be added
+ # after most other warning flags, so that we catch a gcc bug on 32-bit
+ # that doesn't give a warning that this is unsupported, unless another
+ # warning is triggered, and then if gives an error.
+ # These were removed to reduce warnings:
+ # -Wcast-qual -- Sometimes we need to cast "volatile char*" to
+ # "char*", e.g., for memcpy.
+ # -Wpadded -- We catch struct padding with asserts when we need to
+ # -Wredundant-decls -- Having redundant declarations is benign and the
+ # code already has some.
+ # -Waggregate-return -- This seems to be a performance-related warning
+ # aggregate return values are legal in ANSI C, but they may be returned
+ # in memory rather than through a register. We do use aggregate return
+ # values, but they are structs of a single basic type (used to enforce
+ # type checking for relative vs. absolute ptrs), and with optimization
+ # the aggregate value is converted to a scalar.
+ # the embedded newlines in this string are safe because we evaluate each
+ # argument in the for-loop below and append them to the CFLAGS with a space
+ # as the separator instead
+ pac_common_strict_flags="
+ -Wall
+ -Wextra
+ -Wno-missing-field-initializers
+ -Wstrict-prototypes
+ -Wmissing-prototypes
+ -DGCC_WALL
+ -Wno-unused-parameter
+ -Wno-unused-label
+ -Wshadow
+ -Wmissing-declarations
+ -Wno-long-long
+ -Wfloat-equal
+ -Wdeclaration-after-statement
+ -Wundef
+ -Wno-endif-labels
+ -Wpointer-arith
+ -Wbad-function-cast
+ -Wcast-align
+ -Wwrite-strings
+ -Wno-sign-compare
+ -Wold-style-definition
+ -Wno-multichar
+ -Wno-deprecated-declarations
+ -Wpacked
+ -Wnested-externs
+ -Winvalid-pch
+ -Wno-pointer-sign
+ -Wvariadic-macros
+ -Wno-format-zero-length
+ -Wno-type-limits
+ "
+
+ enable_c89=yes
+ enable_c99=no
+ enable_posix=yes
+ enable_opt=yes
+ flags="`echo $1 | sed -e 's/:/ /g' -e 's/,/ /g'`"
+ for flag in ${flags}; do
+ case "$flag" in
+ c89)
+ enable_strict_done="yes"
+ enable_c89=yes
+ ;;
+ c99)
+ enable_strict_done="yes"
+ enable_c99=yes
+ ;;
+ posix)
+ enable_strict_done="yes"
+ enable_posix=yes
+ ;;
+ noposix)
+ enable_strict_done="yes"
+ enable_posix=no
+ ;;
+ opt)
+ enable_strict_done="yes"
+ enable_opt=yes
+ ;;
+ noopt)
+ enable_strict_done="yes"
+ enable_opt=no
+ ;;
+ all|yes)
+ enable_strict_done="yes"
+ enable_c89=yes
+ enable_posix=yes
+ enable_opt=yes
+ ;;
+ no)
+ # Accept and ignore this value
+ :
+ ;;
+ *)
+ if test -n "$flag" ; then
+ AC_MSG_WARN([Unrecognized value for enable-strict:$flag])
+ fi
+ ;;
+ esac
+ done
+
+ pac_cc_strict_flags=""
+ if test "${enable_strict_done}" = "yes" ; then
+ if test "${enable_opt}" = "yes" ; then
+ pac_cc_strict_flags="-O2"
+ fi
+ pac_cc_strict_flags="$pac_cc_strict_flags $pac_common_strict_flags"
+ if test "${enable_posix}" = "yes" ; then
+ PAC_APPEND_FLAG([-D_POSIX_C_SOURCE=199506L],[pac_cc_strict_flags])
+ fi
+ # We only allow one of strict-C99 or strict-C89 to be
+ # enabled. If C99 is enabled, we automatically disable C89.
+ if test "${enable_c99}" = "yes" ; then
+ PAC_APPEND_FLAG([-std=c99],[pac_cc_strict_flags])
+ elif test "${enable_c89}" = "yes" ; then
+ PAC_APPEND_FLAG([-std=c89],[pac_cc_strict_flags])
+ fi
+ fi
+
+ # See if the above options work with the compiler
+ accepted_flags=""
+ for flag in $pac_cc_strict_flags ; do
+ PAC_PUSH_FLAG([CFLAGS])
+ CFLAGS="$CFLAGS $accepted_flags"
+ PAC_C_CHECK_COMPILER_OPTION([$flag],[accepted_flags="$accepted_flags $flag"],)
+ PAC_POP_FLAG([CFLAGS])
+ done
+ pac_cc_strict_flags=$accepted_flags
+fi
+])
+
+dnl/*D
+dnl PAC_ARG_STRICT - Add --enable-strict to configure.
+dnl
+dnl Synopsis:
+dnl PAC_ARG_STRICT
+dnl
+dnl Output effects:
+dnl Adds '--enable-strict' to the command line.
+dnl
+dnl D*/
+AC_DEFUN([PAC_ARG_STRICT],[
+AC_ARG_ENABLE(strict,
+ AC_HELP_STRING([--enable-strict], [Turn on strict compilation testing]))
+PAC_CC_STRICT($enable_strict)
+CFLAGS="$CFLAGS $pac_cc_strict_flags"
+export CFLAGS
+])
+
+dnl Return the integer structure alignment in pac_cv_c_max_integer_align
+dnl Possible values include
+dnl packed
+dnl two
+dnl four
+dnl eight
+dnl
+dnl In addition, a "Could not determine alignment" and a "error!"
+dnl return is possible.
+AC_DEFUN([PAC_C_MAX_INTEGER_ALIGN],[
+AC_CACHE_CHECK([for max C struct integer alignment],
+pac_cv_c_max_integer_align,[
+AC_TRY_RUN([
+#include
+#define DBG(a,b,c)
+int main( int argc, char *argv[] )
+{
+ FILE *cf;
+ int is_packed = 1;
+ int is_two = 1;
+ int is_four = 1;
+ int is_eight = 1;
+ struct { char a; int b; } char_int;
+ struct { char a; short b; } char_short;
+ struct { char a; long b; } char_long;
+ struct { char a; int b; char c; } char_int_char;
+ struct { char a; short b; char c; } char_short_char;
+#ifdef HAVE_LONG_LONG_INT
+ struct { long long int a; char b; } lli_c;
+ struct { char a; long long int b; } c_lli;
+#endif
+ int size, extent, extent2;
+
+ /* assume max integer alignment isn't 8 if we don't have
+ * an eight-byte value :)
+ */
+#ifdef HAVE_LONG_LONG_INT
+ if (sizeof(int) < 8 && sizeof(long) < 8 && sizeof(long long int) < 8)
+ is_eight = 0;
+#else
+ if (sizeof(int) < 8 && sizeof(long) < 8) is_eight = 0;
+#endif
+
+ size = sizeof(char) + sizeof(int);
+ extent = sizeof(char_int);
+ if (size != extent) is_packed = 0;
+ if ( (extent % 2) != 0) is_two = 0;
+ if ( (extent % 4) != 0) is_four = 0;
+ if (sizeof(int) == 8 && (extent % 8) != 0) is_eight = 0;
+ DBG("char_int",size,extent);
+
+ size = sizeof(char) + sizeof(short);
+ extent = sizeof(char_short);
+ if (size != extent) is_packed = 0;
+ if ( (extent % 2) != 0) is_two = 0;
+ if (sizeof(short) == 4 && (extent % 4) != 0) is_four = 0;
+ if (sizeof(short) == 8 && (extent % 8) != 0) is_eight = 0;
+ DBG("char_short",size,extent);
+
+ size = sizeof(char) + sizeof(long);
+ extent = sizeof(char_long);
+ if (size != extent) is_packed = 0;
+ if ( (extent % 2) != 0) is_two = 0;
+ if ( (extent % 4) != 0) is_four = 0;
+ if (sizeof(long) == 8 && (extent % 8) != 0) is_eight = 0;
+ DBG("char_long",size,extent);
+
+#ifdef HAVE_LONG_LONG_INT
+ size = sizeof(char) + sizeof(long long int);
+ extent = sizeof(lli_c);
+ extent2 = sizeof(c_lli);
+ if (size != extent) is_packed = 0;
+ if ( (extent % 2) != 0 && (extent2 % 2) != 0) is_two = 0;
+ if ( (extent % 4) != 0 && (extent2 % 4) != 0) is_four = 0;
+ if (sizeof(long long int) >= 8 && (extent % 8) != 0 && (extent2 % 8) != 0)
+ is_eight = 0;
+#endif
+
+ size = sizeof(char) + sizeof(int) + sizeof(char);
+ extent = sizeof(char_int_char);
+ if (size != extent) is_packed = 0;
+ if ( (extent % 2) != 0) is_two = 0;
+ if ( (extent % 4) != 0) is_four = 0;
+ if (sizeof(int) == 8 && (extent % 8) != 0) is_eight = 0;
+ DBG("char_int_char",size,extent);
+
+ size = sizeof(char) + sizeof(short) + sizeof(char);
+ extent = sizeof(char_short_char);
+ if (size != extent) is_packed = 0;
+ if ( (extent % 2) != 0) is_two = 0;
+ if (sizeof(short) == 4 && (extent % 4) != 0) is_four = 0;
+ if (sizeof(short) == 8 && (extent % 8) != 0) is_eight = 0;
+ DBG("char_short_char",size,extent);
+
+ /* If aligned mod 8, it will be aligned mod 4 */
+ if (is_eight) { is_four = 0; is_two = 0; }
+
+ if (is_four) is_two = 0;
+
+ /* Tabulate the results */
+ cf = fopen( "ctest.out", "w" );
+ if (is_packed + is_two + is_four + is_eight == 0) {
+ fprintf( cf, "Could not determine alignment\n" );
+ }
+ else {
+ if (is_packed + is_two + is_four + is_eight != 1) {
+ fprintf( cf, "error!\n" );
+ }
+ else {
+ if (is_packed) fprintf( cf, "packed\n" );
+ if (is_two) fprintf( cf, "two\n" );
+ if (is_four) fprintf( cf, "four\n" );
+ if (is_eight) fprintf( cf, "eight\n" );
+ }
+ }
+ fclose( cf );
+ return 0;
+}],
+pac_cv_c_max_integer_align=`cat ctest.out`,
+pac_cv_c_max_integer_align="unknown",
+pac_cv_c_max_integer_align="$CROSS_ALIGN_STRUCT_INT")
+rm -f ctest.out
+])
+if test -z "$pac_cv_c_max_integer_align" ; then
+ pac_cv_c_max_integer_align="unknown"
+fi
+])
+
+dnl Return the floating point structure alignment in
+dnl pac_cv_c_max_fp_align.
+dnl
+dnl Possible values include:
+dnl packed
+dnl two
+dnl four
+dnl eight
+dnl sixteen
+dnl
+dnl In addition, a "Could not determine alignment" and a "error!"
+dnl return is possible.
+AC_DEFUN([PAC_C_MAX_FP_ALIGN],[
+AC_CACHE_CHECK([for max C struct floating point alignment],
+pac_cv_c_max_fp_align,[
+AC_TRY_RUN([
+#include
+#define DBG(a,b,c)
+int main( int argc, char *argv[] )
+{
+ FILE *cf;
+ int is_packed = 1;
+ int is_two = 1;
+ int is_four = 1;
+ int is_eight = 1;
+ int is_sixteen = 1;
+ struct { char a; float b; } char_float;
+ struct { float b; char a; } float_char;
+ struct { char a; double b; } char_double;
+ struct { double b; char a; } double_char;
+#ifdef HAVE_LONG_DOUBLE
+ struct { char a; long double b; } char_long_double;
+ struct { long double b; char a; } long_double_char;
+ struct { long double a; int b; char c; } long_double_int_char;
+#endif
+ int size, extent1, extent2;
+
+ size = sizeof(char) + sizeof(float);
+ extent1 = sizeof(char_float);
+ extent2 = sizeof(float_char);
+ if (size != extent1) is_packed = 0;
+ if ( (extent1 % 2) != 0 && (extent2 % 2) != 0) is_two = 0;
+ if ( (extent1 % 4) != 0 && (extent2 % 4) != 0) is_four = 0;
+ if (sizeof(float) == 8 && (extent1 % 8) != 0 && (extent2 % 8) != 0)
+ is_eight = 0;
+ DBG("char_float",size,extent1);
+
+ size = sizeof(char) + sizeof(double);
+ extent1 = sizeof(char_double);
+ extent2 = sizeof(double_char);
+ if (size != extent1) is_packed = 0;
+ if ( (extent1 % 2) != 0 && (extent2 % 2) != 0) is_two = 0;
+ if ( (extent1 % 4) != 0 && (extent2 % 4) != 0) is_four = 0;
+ if (sizeof(double) == 8 && (extent1 % 8) != 0 && (extent2 % 8) != 0)
+ is_eight = 0;
+ DBG("char_double",size,extent1);
+
+#ifdef HAVE_LONG_DOUBLE
+ size = sizeof(char) + sizeof(long double);
+ extent1 = sizeof(char_long_double);
+ extent2 = sizeof(long_double_char);
+ if (size != extent1) is_packed = 0;
+ if ( (extent1 % 2) != 0 && (extent2 % 2) != 0) is_two = 0;
+ if ( (extent1 % 4) != 0 && (extent2 % 4) != 0) is_four = 0;
+ if (sizeof(long double) >= 8 && (extent1 % 8) != 0 && (extent2 % 8) != 0)
+ is_eight = 0;
+ if (sizeof(long double) > 8 && (extent1 % 16) != 0
+ && (extent2 % 16) != 0) is_sixteen = 0;
+ DBG("char_long-double",size,extent1);
+
+ extent1 = sizeof(long_double_int_char);
+ if ( (extent1 % 2) != 0) is_two = 0;
+ if ( (extent1 % 4) != 0) is_four = 0;
+ if (sizeof(long double) >= 8 && (extent1 % 8) != 0) is_eight = 0;
+ if (sizeof(long double) > 8 && (extent1 % 16) != 0) is_sixteen = 0;
+#else
+ is_sixteen = 0;
+#endif
+
+ if (is_sixteen) { is_eight = 0; is_four = 0; is_two = 0; }
+
+ if (is_eight) { is_four = 0; is_two = 0; }
+
+ if (is_four) is_two = 0;
+
+ /* Tabulate the results */
+ cf = fopen( "ctest.out", "w" );
+ if (is_packed + is_two + is_four + is_eight + is_sixteen == 0) {
+ fprintf( cf, "Could not determine alignment\n" );
+ }
+ else {
+ if (is_packed + is_two + is_four + is_eight + is_sixteen != 1) {
+ fprintf( cf, "error!\n" );
+ }
+ else {
+ if (is_packed) fprintf( cf, "packed\n" );
+ if (is_two) fprintf( cf, "two\n" );
+ if (is_four) fprintf( cf, "four\n" );
+ if (is_eight) fprintf( cf, "eight\n" );
+ if (is_sixteen) fprintf( cf, "sixteen\n" );
+ }
+ }
+ fclose( cf );
+ return 0;
+}],
+pac_cv_c_max_fp_align=`cat ctest.out`,
+pac_cv_c_max_fp_align="unknown",
+pac_cv_c_max_fp_align="$CROSS_ALIGN_STRUCT_FP")
+rm -f ctest.out
+])
+if test -z "$pac_cv_c_max_fp_align" ; then
+ pac_cv_c_max_fp_align="unknown"
+fi
+])
+
+dnl Return the floating point structure alignment in
+dnl pac_cv_c_max_double_fp_align.
+dnl
+dnl Possible values include:
+dnl packed
+dnl two
+dnl four
+dnl eight
+dnl
+dnl In addition, a "Could not determine alignment" and a "error!"
+dnl return is possible.
+AC_DEFUN([PAC_C_MAX_DOUBLE_FP_ALIGN],[
+AC_CACHE_CHECK([for max C struct alignment of structs with doubles],
+pac_cv_c_max_double_fp_align,[
+AC_TRY_RUN([
+#include
+#define DBG(a,b,c)
+int main( int argc, char *argv[] )
+{
+ FILE *cf;
+ int is_packed = 1;
+ int is_two = 1;
+ int is_four = 1;
+ int is_eight = 1;
+ struct { char a; float b; } char_float;
+ struct { float b; char a; } float_char;
+ struct { char a; double b; } char_double;
+ struct { double b; char a; } double_char;
+ int size, extent1, extent2;
+
+ size = sizeof(char) + sizeof(float);
+ extent1 = sizeof(char_float);
+ extent2 = sizeof(float_char);
+ if (size != extent1) is_packed = 0;
+ if ( (extent1 % 2) != 0 && (extent2 % 2) != 0) is_two = 0;
+ if ( (extent1 % 4) != 0 && (extent2 % 4) != 0) is_four = 0;
+ if (sizeof(float) == 8 && (extent1 % 8) != 0 && (extent2 % 8) != 0)
+ is_eight = 0;
+ DBG("char_float",size,extent1);
+
+ size = sizeof(char) + sizeof(double);
+ extent1 = sizeof(char_double);
+ extent2 = sizeof(double_char);
+ if (size != extent1) is_packed = 0;
+ if ( (extent1 % 2) != 0 && (extent2 % 2) != 0) is_two = 0;
+ if ( (extent1 % 4) != 0 && (extent2 % 4) != 0) is_four = 0;
+ if (sizeof(double) == 8 && (extent1 % 8) != 0 && (extent2 % 8) != 0)
+ is_eight = 0;
+ DBG("char_double",size,extent1);
+
+ if (is_eight) { is_four = 0; is_two = 0; }
+
+ if (is_four) is_two = 0;
+
+ /* Tabulate the results */
+ cf = fopen( "ctest.out", "w" );
+ if (is_packed + is_two + is_four + is_eight == 0) {
+ fprintf( cf, "Could not determine alignment\n" );
+ }
+ else {
+ if (is_packed + is_two + is_four + is_eight != 1) {
+ fprintf( cf, "error!\n" );
+ }
+ else {
+ if (is_packed) fprintf( cf, "packed\n" );
+ if (is_two) fprintf( cf, "two\n" );
+ if (is_four) fprintf( cf, "four\n" );
+ if (is_eight) fprintf( cf, "eight\n" );
+ }
+ }
+ fclose( cf );
+ return 0;
+}],
+pac_cv_c_max_double_fp_align=`cat ctest.out`,
+pac_cv_c_max_double_fp_align="unknown",
+pac_cv_c_max_double_fp_align="$CROSS_ALIGN_STRUCT_DOUBLE_FP")
+rm -f ctest.out
+])
+if test -z "$pac_cv_c_max_double_fp_align" ; then
+ pac_cv_c_max_double_fp_align="unknown"
+fi
+])
+AC_DEFUN([PAC_C_MAX_LONGDOUBLE_FP_ALIGN],[
+AC_CACHE_CHECK([for max C struct floating point alignment with long doubles],
+pac_cv_c_max_longdouble_fp_align,[
+AC_TRY_RUN([
+#include
+#define DBG(a,b,c)
+int main( int argc, char *argv[] )
+{
+ FILE *cf;
+ int is_packed = 1;
+ int is_two = 1;
+ int is_four = 1;
+ int is_eight = 1;
+ int is_sixteen = 1;
+ struct { char a; long double b; } char_long_double;
+ struct { long double b; char a; } long_double_char;
+ struct { long double a; int b; char c; } long_double_int_char;
+ int size, extent1, extent2;
+
+ size = sizeof(char) + sizeof(long double);
+ extent1 = sizeof(char_long_double);
+ extent2 = sizeof(long_double_char);
+ if (size != extent1) is_packed = 0;
+ if ( (extent1 % 2) != 0 && (extent2 % 2) != 0) is_two = 0;
+ if ( (extent1 % 4) != 0 && (extent2 % 4) != 0) is_four = 0;
+ if (sizeof(long double) >= 8 && (extent1 % 8) != 0 && (extent2 % 8) != 0)
+ is_eight = 0;
+ if (sizeof(long double) > 8 && (extent1 % 16) != 0
+ && (extent2 % 16) != 0) is_sixteen = 0;
+ DBG("char_long-double",size,extent1);
+
+ extent1 = sizeof(long_double_int_char);
+ if ( (extent1 % 2) != 0) is_two = 0;
+ if ( (extent1 % 4) != 0) is_four = 0;
+ if (sizeof(long double) >= 8 && (extent1 % 8) != 0) is_eight = 0;
+ if (sizeof(long double) > 8 && (extent1 % 16) != 0) is_sixteen = 0;
+
+ if (is_sixteen) { is_eight = 0; is_four = 0; is_two = 0; }
+
+ if (is_eight) { is_four = 0; is_two = 0; }
+
+ if (is_four) is_two = 0;
+
+ /* Tabulate the results */
+ cf = fopen( "ctest.out", "w" );
+ if (is_packed + is_two + is_four + is_eight + is_sixteen == 0) {
+ fprintf( cf, "Could not determine alignment\n" );
+ }
+ else {
+ if (is_packed + is_two + is_four + is_eight + is_sixteen != 1) {
+ fprintf( cf, "error!\n" );
+ }
+ else {
+ if (is_packed) fprintf( cf, "packed\n" );
+ if (is_two) fprintf( cf, "two\n" );
+ if (is_four) fprintf( cf, "four\n" );
+ if (is_eight) fprintf( cf, "eight\n" );
+ if (is_sixteen) fprintf( cf, "sixteen\n" );
+ }
+ }
+ fclose( cf );
+ return 0;
+}],
+pac_cv_c_max_longdouble_fp_align=`cat ctest.out`,
+pac_cv_c_max_longdouble_fp_align="unknown",
+pac_cv_c_max_longdouble_fp_align="$CROSS_ALIGN_STRUCT_LONGDOUBLE_FP")
+rm -f ctest.out
+])
+if test -z "$pac_cv_c_max_longdouble_fp_align" ; then
+ pac_cv_c_max_longdouble_fp_align="unknown"
+fi
+])
+
+dnl Other tests assume that there is potentially a maximum alignment
+dnl and that if there is no maximum alignment, or a type is smaller than
+dnl that value, then we align on the size of the value, with the exception
+dnl of the "position-based alignment" rules we test for separately.
+dnl
+dnl It turns out that these assumptions have fallen short in at least one
+dnl case, on MacBook Pros, where doubles are aligned on 4-byte boundaries
+dnl even when long doubles are aligned on 16-byte boundaries. So this test
+dnl is here specifically to handle this case.
+dnl
+dnl Puts result in pac_cv_c_double_alignment_exception.
+dnl
+dnl Possible values currently include no and four.
+dnl
+AC_DEFUN([PAC_C_DOUBLE_ALIGNMENT_EXCEPTION],[
+AC_CACHE_CHECK([if double alignment breaks rules, find actual alignment],
+pac_cv_c_double_alignment_exception,[
+AC_TRY_RUN([
+#include
+#define DBG(a,b,c)
+int main( int argc, char *argv[] )
+{
+ FILE *cf;
+ struct { char a; double b; } char_double;
+ struct { double b; char a; } double_char;
+ int extent1, extent2, align_4 = 0;
+
+ extent1 = sizeof(char_double);
+ extent2 = sizeof(double_char);
+
+ /* we're interested in the largest value, will let separate test
+ * deal with position-based issues.
+ */
+ if (extent1 < extent2) extent1 = extent2;
+ if ((sizeof(double) == 8) && (extent1 % 8) != 0) {
+ if (extent1 % 4 == 0) {
+#ifdef HAVE_MAX_FP_ALIGNMENT
+ if (HAVE_MAX_FP_ALIGNMENT >= 8) align_4 = 1;
+#else
+ align_4 = 1;
+#endif
+ }
+ }
+
+ cf = fopen( "ctest.out", "w" );
+
+ if (align_4) fprintf( cf, "four\n" );
+ else fprintf( cf, "no\n" );
+
+ fclose( cf );
+ return 0;
+}],
+pac_cv_c_double_alignment_exception=`cat ctest.out`,
+pac_cv_c_double_alignment_exception="unknown",
+pac_cv_c_double_alignment_exception="$CROSS_ALIGN_DOUBLE_EXCEPTION")
+rm -f ctest.out
+])
+if test -z "$pac_cv_c_double_alignment_exception" ; then
+ pac_cv_c_double_alignment_exception="unknown"
+fi
+])
+
+dnl Test for odd struct alignment rule that only applies max.
+dnl padding when double value is at front of type.
+dnl Puts result in pac_cv_c_double_pos_align.
+dnl
+dnl Search for "Power alignment mode" for more details.
+dnl
+dnl Possible values include yes, no, and unknown.
+dnl
+AC_DEFUN([PAC_C_DOUBLE_POS_ALIGN],[
+AC_CACHE_CHECK([if alignment of structs with doubles is based on position],
+pac_cv_c_double_pos_align,[
+AC_TRY_RUN([
+#include
+#define DBG(a,b,c)
+int main( int argc, char *argv[] )
+{
+ FILE *cf;
+ int padding_varies_by_pos = 0;
+ struct { char a; double b; } char_double;
+ struct { double b; char a; } double_char;
+ int extent1, extent2;
+
+ extent1 = sizeof(char_double);
+ extent2 = sizeof(double_char);
+ if (extent1 != extent2) padding_varies_by_pos = 1;
+
+ cf = fopen( "ctest.out", "w" );
+ if (padding_varies_by_pos) fprintf( cf, "yes\n" );
+ else fprintf( cf, "no\n" );
+
+ fclose( cf );
+ return 0;
+}],
+pac_cv_c_double_pos_align=`cat ctest.out`,
+pac_cv_c_double_pos_align="unknown",
+pac_cv_c_double_pos_align="$CROSS_ALIGN_DOUBLE_POS")
+rm -f ctest.out
+])
+if test -z "$pac_cv_c_double_pos_align" ; then
+ pac_cv_c_double_pos_align="unknown"
+fi
+])
+
+dnl Test for odd struct alignment rule that only applies max.
+dnl padding when long long int value is at front of type.
+dnl Puts result in pac_cv_c_llint_pos_align.
+dnl
+dnl Search for "Power alignment mode" for more details.
+dnl
+dnl Possible values include yes, no, and unknown.
+dnl
+AC_DEFUN([PAC_C_LLINT_POS_ALIGN],[
+AC_CACHE_CHECK([if alignment of structs with long long ints is based on position],
+pac_cv_c_llint_pos_align,[
+AC_TRY_RUN([
+#include
+#define DBG(a,b,c)
+int main( int argc, char *argv[] )
+{
+ FILE *cf;
+ int padding_varies_by_pos = 0;
+#ifdef HAVE_LONG_LONG_INT
+ struct { char a; long long int b; } char_llint;
+ struct { long long int b; char a; } llint_char;
+ int extent1, extent2;
+
+ extent1 = sizeof(char_llint);
+ extent2 = sizeof(llint_char);
+ if (extent1 != extent2) padding_varies_by_pos = 1;
+#endif
+
+ cf = fopen( "ctest.out", "w" );
+ if (padding_varies_by_pos) fprintf( cf, "yes\n" );
+ else fprintf( cf, "no\n" );
+
+ fclose( cf );
+ return 0;
+}],
+pac_cv_c_llint_pos_align=`cat ctest.out`,
+pac_cv_c_llint_pos_align="unknown",
+pac_cv_c_llint_pos_align="$CROSS_ALIGN_LLINT_POS")
+rm -f ctest.out
+])
+if test -z "$pac_cv_c_llint_pos_align" ; then
+ pac_cv_c_llint_pos_align="unknown"
+fi
+])
+
+dnl/*D
+dnl PAC_FUNC_NEEDS_DECL - Set NEEDS__DECL if a declaration is needed
+dnl
+dnl Synopsis:
+dnl PAC_FUNC_NEEDS_DECL(headerfiles,funcname)
+dnl
+dnl Output Effect:
+dnl Sets 'NEEDS__DECL' if 'funcname' is not declared by the
+dnl headerfiles.
+dnl
+dnl Approach:
+dnl Try to compile a program with the function, but passed with an incorrect
+dnl calling sequence. If the compilation fails, then the declaration
+dnl is provided within the header files. If the compilation succeeds,
+dnl the declaration is required.
+dnl
+dnl We use a 'double' as the first argument to try and catch varargs
+dnl routines that may use an int or pointer as the first argument.
+dnl
+dnl There is one difficulty - if the compiler has been instructed to
+dnl fail on implicitly defined functions, then this test will always
+dnl fail.
+dnl
+dnl D*/
+AC_DEFUN([PAC_FUNC_NEEDS_DECL],[
+AC_CACHE_CHECK([whether $2 needs a declaration],
+pac_cv_func_decl_$2,[
+AC_TRY_COMPILE([$1
+int $2(double, int, double, const char *);],[int a=$2(1.0,27,1.0,"foo");],
+pac_cv_func_decl_$2=yes,pac_cv_func_decl_$2=no)])
+if test "$pac_cv_func_decl_$2" = "yes" ; then
+changequote(<<,>>)dnl
+define(<>, translit(NEEDS_$2_DECL, [a-z *], [A-Z__]))dnl
+changequote([, ])dnl
+ AC_DEFINE_UNQUOTED(PAC_FUNC_NAME,1,[Define if $2 needs a declaration])
+undefine([PAC_FUNC_NAME])
+fi
+])
+
+dnl PAC_C_GNU_ATTRIBUTE - See if the GCC __attribute__ specifier is allow.
+dnl Use the following
+dnl #ifndef HAVE_GCC_ATTRIBUTE
+dnl #define __attribute__(a)
+dnl #endif
+dnl If *not*, define __attribute__(a) as null
+dnl
+dnl We start by requiring Gcc. Some other compilers accept __attribute__
+dnl but generate warning messages, or have different interpretations
+dnl (which seems to make __attribute__ just as bad as #pragma)
+dnl For example, the Intel icc compiler accepts __attribute__ and
+dnl __attribute__((pure)) but generates warnings for __attribute__((format...))
+dnl
+AC_DEFUN([PAC_C_GNU_ATTRIBUTE],[
+AC_REQUIRE([AC_PROG_CC_GNU])
+if test "$ac_cv_prog_gcc" = "yes" ; then
+ AC_CACHE_CHECK([whether __attribute__ allowed],
+pac_cv_gnu_attr_pure,[
+AC_TRY_COMPILE([int foo(int) __attribute__ ((pure));],[int a;],
+pac_cv_gnu_attr_pure=yes,pac_cv_gnu_attr_pure=no)])
+AC_CACHE_CHECK([whether __attribute__((format)) allowed],
+pac_cv_gnu_attr_format,[
+AC_TRY_COMPILE([int foo(char *,...) __attribute__ ((format(printf,1,2)));],[int a;],
+pac_cv_gnu_attr_format=yes,pac_cv_gnu_attr_format=no)])
+ if test "$pac_cv_gnu_attr_pure" = "yes" -a "$pac_cv_gnu_attr_format" = "yes" ; then
+ AC_DEFINE(HAVE_GCC_ATTRIBUTE,1,[Define if GNU __attribute__ is supported])
+ fi
+fi
+])
+dnl
+dnl Check for a broken install (fails to preserve file modification times,
+dnl thus breaking libraries.
+dnl
+dnl Create a library, install it, and then try to link against it.
+AC_DEFUN([PAC_PROG_INSTALL_BREAKS_LIBS],[
+AC_CACHE_CHECK([whether install breaks libraries],
+ac_cv_prog_install_breaks_libs,[
+AC_REQUIRE([AC_PROG_RANLIB])
+AC_REQUIRE([AC_PROG_INSTALL])
+AC_REQUIRE([AC_PROG_CC])
+ac_cv_prog_install_breaks_libs=yes
+# This is needed for Mac OSX 10.5
+rm -rf conftest.dSYM
+rm -f libconftest* conftest*
+echo 'int foo(int);int foo(int a){return a;}' > conftest1.c
+echo 'extern int foo(int); int main( int argc, char **argv){ return foo(0); }' > conftest2.c
+if ${CC-cc} $CFLAGS -c conftest1.c >conftest.out 2>&1 ; then
+ if ${AR-ar} cr libconftest.a conftest1.o >/dev/null 2>&1 ; then
+ if ${RANLIB-:} libconftest.a >/dev/null 2>&1 ; then
+ # Anything less than sleep 10, and Mac OS/X (Darwin)
+ # will claim that install works because ranlib won't complain
+ sleep 10
+ libinstall="$INSTALL_DATA"
+ eval "libinstall=\"$libinstall\""
+ if ${libinstall} libconftest.a libconftest1.a >/dev/null 2>&1 ; then
+ if ${CC-cc} $CFLAGS -o conftest conftest2.c $LDFLAGS libconftest1.a >>conftest.out 2>&1 && test -x conftest ; then
+ # Success! Install works
+ ac_cv_prog_install_breaks_libs=no
+ else
+ # Failure! Does install -p work?
+ rm -f libconftest1.a
+ if ${libinstall} -p libconftest.a libconftest1.a >/dev/null 2>&1 ; then
+ if ${CC-cc} $CFLAGS -o conftest conftest2.c $LDFLAGS libconftest1.a >>conftest.out 2>&1 && test -x conftest ; then
+ # Success! Install works
+ ac_cv_prog_install_breaks_libs="no, with -p"
+ fi
+ fi
+ fi
+ fi
+ fi
+ fi
+fi
+# This is needed for Mac OSX 10.5
+rm -rf conftest.dSYM
+rm -f conftest* libconftest*])
+
+if test -z "$RANLIB_AFTER_INSTALL" ; then
+ RANLIB_AFTER_INSTALL=no
+fi
+case "$ac_cv_prog_install_breaks_libs" in
+ yes)
+ RANLIB_AFTER_INSTALL=yes
+ ;;
+ "no, with -p")
+ INSTALL_DATA="$INSTALL_DATA -p"
+ ;;
+ *)
+ # Do nothing
+ :
+ ;;
+esac
+AC_SUBST(RANLIB_AFTER_INSTALL)
+])
+
+#
+# determine if the compiler defines a symbol containing the function name
+# Inspired by checks within the src/mpid/globus/configure.in file in MPICH2
+#
+# These tests check not only that the compiler defines some symbol, such
+# as __FUNCTION__, but that the symbol correctly names the function.
+#
+# Defines
+# HAVE__FUNC__ (if __func__ defined)
+# HAVE_CAP__FUNC__ (if __FUNC__ defined)
+# HAVE__FUNCTION__ (if __FUNCTION__ defined)
+#
+AC_DEFUN([PAC_CC_FUNCTION_NAME_SYMBOL],[
+AC_CACHE_CHECK([whether the compiler defines __func__],
+pac_cv_have__func__,[
+tmp_am_cross=no
+AC_RUN_IFELSE([
+AC_LANG_SOURCE([
+#include
+int foo(void);
+int foo(void)
+{
+ return (strcmp(__func__, "foo") == 0);
+}
+int main(int argc, char ** argv)
+{
+ return (foo() ? 0 : 1);
+}
+])
+], pac_cv_have__func__=yes, pac_cv_have__func__=no,tmp_am_cross=yes)
+if test "$tmp_am_cross" = yes ; then
+ AC_LINK_IFELSE([
+ AC_LANG_SOURCE([
+#include
+int foo(void);
+int foo(void)
+{
+ return (strcmp(__func__, "foo") == 0);
+}
+int main(int argc, char ** argv)
+{
+ return (foo() ? 0 : 1);
+}
+ ])
+], pac_cv_have__func__=yes, pac_cv_have__func__=no)
+fi
+])
+
+if test "$pac_cv_have__func__" = "yes" ; then
+ AC_DEFINE(HAVE__FUNC__,,[define if the compiler defines __func__])
+fi
+
+AC_CACHE_CHECK([whether the compiler defines __FUNC__],
+pac_cv_have_cap__func__,[
+tmp_am_cross=no
+AC_RUN_IFELSE([
+AC_LANG_SOURCE([
+#include
+int foo(void);
+int foo(void)
+{
+ return (strcmp(__FUNC__, "foo") == 0);
+}
+int main(int argc, char ** argv)
+{
+ return (foo() ? 0 : 1);
+}
+])
+], pac_cv_have_cap__func__=yes, pac_cv_have_cap__func__=no,tmp_am_cross=yes)
+if test "$tmp_am_cross" = yes ; then
+ AC_LINK_IFELSE([
+ AC_LANG_SOURCE([
+#include
+int foo(void);
+int foo(void)
+{
+ return (strcmp(__FUNC__, "foo") == 0);
+}
+int main(int argc, char ** argv)
+{
+ return (foo() ? 0 : 1);
+}
+ ])
+], pac_cv_have__func__=yes, pac_cv_have__func__=no)
+fi
+])
+
+if test "$pac_cv_have_cap__func__" = "yes" ; then
+ AC_DEFINE(HAVE_CAP__FUNC__,,[define if the compiler defines __FUNC__])
+fi
+
+AC_CACHE_CHECK([whether the compiler sets __FUNCTION__],
+pac_cv_have__function__,[
+tmp_am_cross=no
+AC_RUN_IFELSE([
+AC_LANG_SOURCE([
+#include
+int foo(void);
+int foo(void)
+{
+ return (strcmp(__FUNCTION__, "foo") == 0);
+}
+int main(int argc, char ** argv)
+{
+ return (foo() ? 0 : 1);
+}
+])
+], pac_cv_have__function__=yes, pac_cv_have__function__=no,tmp_am_cross=yes)
+if test "$tmp_am_cross" = yes ; then
+ AC_LINK_IFELSE([
+ AC_LANG_SOURCE([
+#include
+int foo(void);
+int foo(void)
+{
+ return (strcmp(__FUNCTION__, "foo") == 0);
+}
+int main(int argc, char ** argv)
+{
+ return (foo() ? 0 : 1);
+}
+ ])
+], pac_cv_have__func__=yes, pac_cv_have__func__=no)
+fi
+])
+
+if test "$pac_cv_have__function__" = "yes" ; then
+ AC_DEFINE(HAVE__FUNCTION__,,[define if the compiler defines __FUNCTION__])
+fi
+
+])
+
+
+dnl Check structure alignment
+AC_DEFUN([PAC_STRUCT_ALIGNMENT],[
+ # Initialize alignment checks
+ is_packed=1
+ is_two=1
+ is_four=1
+ is_eight=1
+ is_largest=1
+
+ # See if long double exists
+ AC_TRY_COMPILE(,[long double a;],have_long_double=yes,have_long_double=no)
+
+ # Get sizes of regular types
+ AC_CHECK_SIZEOF(char)
+ AC_CHECK_SIZEOF(int)
+ AC_CHECK_SIZEOF(short)
+ AC_CHECK_SIZEOF(long)
+ AC_CHECK_SIZEOF(float)
+ AC_CHECK_SIZEOF(double)
+ AC_CHECK_SIZEOF(long double)
+
+ # char_int comparison
+ AC_CHECK_SIZEOF(char_int, 0, [typedef struct { char a; int b; } char_int; ])
+ size=`expr $ac_cv_sizeof_char + $ac_cv_sizeof_int`
+ extent=$ac_cv_sizeof_char_int
+ if test "$size" != "$extent" ; then is_packed=0 ; fi
+ if test "`expr $extent % $ac_cv_sizeof_int`" != "0" ; then is_largest=0 ; fi
+ if test "`expr $extent % 2`" != "0" ; then is_two=0 ; fi
+ if test "`expr $extent % 4`" != "0" ; then is_four=0 ; fi
+ if test "$ac_cv_sizeof_int" = "8" -a "`expr $extent % 8`" != "0" ; then
+ is_eight=0
+ fi
+
+ # char_short comparison
+ AC_CHECK_SIZEOF(char_short, 0, [typedef struct { char a; short b; } char_short; ])
+ size=`expr $ac_cv_sizeof_char + $ac_cv_sizeof_short`
+ extent=$ac_cv_sizeof_char_short
+ if test "$size" != "$extent" ; then is_packed=0 ; fi
+ if test "`expr $extent % $ac_cv_sizeof_short`" != "0" ; then is_largest=0 ; fi
+ if test "`expr $extent % 2`" != "0" ; then is_two=0 ; fi
+ if test "$ac_cv_sizeof_short" = "4" -a "`expr $extent % 4`" != "0" ; then
+ is_four=0
+ fi
+ if test "$ac_cv_sizeof_short" = "8" -a "`expr $extent % 8`" != "0" ; then
+ is_eight=0
+ fi
+
+ # char_long comparison
+ AC_CHECK_SIZEOF(char_long, 0, [typedef struct { char a; long b; } char_long; ])
+ size=`expr $ac_cv_sizeof_char + $ac_cv_sizeof_long`
+ extent=$ac_cv_sizeof_char_long
+ if test "$size" != "$extent" ; then is_packed=0 ; fi
+ if test "`expr $extent % $ac_cv_sizeof_long`" != "0" ; then is_largest=0 ; fi
+ if test "`expr $extent % 2`" != "0" ; then is_two=0 ; fi
+ if test "`expr $extent % 4`" != "0" ; then is_four=0 ; fi
+ if test "$ac_cv_sizeof_long" = "8" -a "`expr $extent % 8`" != "0" ; then
+ is_eight=0
+ fi
+
+ # char_float comparison
+ AC_CHECK_SIZEOF(char_float, 0, [typedef struct { char a; float b; } char_float; ])
+ size=`expr $ac_cv_sizeof_char + $ac_cv_sizeof_float`
+ extent=$ac_cv_sizeof_char_float
+ if test "$size" != "$extent" ; then is_packed=0 ; fi
+ if test "`expr $extent % $ac_cv_sizeof_float`" != "0" ; then is_largest=0 ; fi
+ if test "`expr $extent % 2`" != "0" ; then is_two=0 ; fi
+ if test "`expr $extent % 4`" != "0" ; then is_four=0 ; fi
+ if test "$ac_cv_sizeof_float" = "8" -a "`expr $extent % 8`" != "0" ; then
+ is_eight=0
+ fi
+
+ # char_double comparison
+ AC_CHECK_SIZEOF(char_double, 0, [typedef struct { char a; double b; } char_double; ])
+ size=`expr $ac_cv_sizeof_char + $ac_cv_sizeof_double`
+ extent=$ac_cv_sizeof_char_double
+ if test "$size" != "$extent" ; then is_packed=0 ; fi
+ if test "`expr $extent % $ac_cv_sizeof_double`" != "0" ; then is_largest=0 ; fi
+ if test "`expr $extent % 2`" != "0" ; then is_two=0 ; fi
+ if test "`expr $extent % 4`" != "0" ; then is_four=0 ; fi
+ if test "$ac_cv_sizeof_double" = "8" -a "`expr $extent % 8`" != "0" ; then
+ is_eight=0
+ fi
+
+ # char_long_double comparison
+ if test "$have_long_double" = "yes"; then
+ AC_CHECK_SIZEOF(char_long_double, 0, [
+ typedef struct {
+ char a;
+ long double b;
+ } char_long_double;
+ ])
+ size=`expr $ac_cv_sizeof_char + $ac_cv_sizeof_long_double`
+ extent=$ac_cv_sizeof_char_long_double
+ if test "$size" != "$extent" ; then is_packed=0 ; fi
+ if test "`expr $extent % $ac_cv_sizeof_long_double`" != "0" ; then is_largest=0 ; fi
+ if test "`expr $extent % 2`" != "0" ; then is_two=0 ; fi
+ if test "`expr $extent % 4`" != "0" ; then is_four=0 ; fi
+ if test "$ac_cv_sizeof_long_double" = "8" -a "`expr $extent % 8`" != "0" ; then
+ is_eight=0
+ fi
+ fi
+
+ # char_int_char comparison
+ AC_CHECK_SIZEOF(char_int_char, 0, [
+ typedef struct {
+ char a;
+ int b;
+ char c;
+ } char_int_char;
+ ])
+ size=`expr $ac_cv_sizeof_char + $ac_cv_sizeof_int + $ac_cv_sizeof_char`
+ extent=$ac_cv_sizeof_char_int_char
+ if test "$size" != "$extent" ; then is_packed=0 ; fi
+ if test "`expr $extent % $ac_cv_sizeof_int`" != "0" ; then is_largest=0 ; fi
+ if test "`expr $extent % 2`" != "0" ; then is_two=0 ; fi
+ if test "`expr $extent % 4`" != "0" ; then is_four=0 ; fi
+ if test "$ac_cv_sizeof_int" = "8" -a "`expr $extent % 8`" != "0" ; then
+ is_eight=0
+ fi
+
+ # char_short_char comparison
+ AC_CHECK_SIZEOF(char_short_char, 0, [
+ typedef struct {
+ char a;
+ short b;
+ char c;
+ } char_short_char;
+ ])
+ size=`expr $ac_cv_sizeof_char + $ac_cv_sizeof_short + $ac_cv_sizeof_char`
+ extent=$ac_cv_sizeof_char_short_char
+ if test "$size" != "$extent" ; then is_packed=0 ; fi
+ if test "`expr $extent % $ac_cv_sizeof_short`" != "0" ; then is_largest=0 ; fi
+ if test "`expr $extent % 2`" != "0" ; then is_two=0 ; fi
+ if test "$ac_cv_sizeof_short" = "4" -a "`expr $extent % 4`" != "0" ; then
+ is_four=0
+ fi
+ if test "$ac_cv_sizeof_short" = "8" -a "`expr $extent % 8`" != "0" ; then
+ is_eight=0
+ fi
+
+ # If aligned mod 8, it will be aligned mod 4
+ if test $is_eight = 1 ; then is_four=0 ; is_two=0 ; fi
+ if test $is_four = 1 ; then is_two=0 ; fi
+
+ # Largest supersedes 8
+ if test $is_largest = 1 ; then is_eight=0 ; fi
+
+ # Find the alignment
+ if test "`expr $is_packed + $is_largest + $is_two + $is_four + $is_eight`" = "0" ; then
+ pac_cv_struct_alignment="unknown"
+ elif test "`expr $is_packed + $is_largest + $is_two + $is_four + $is_eight`" != "1" ; then
+ pac_cv_struct_alignment="unknown"
+ elif test $is_packed = 1 ; then
+ pac_cv_struct_alignment="packed"
+ elif test $is_largest = 1 ; then
+ pac_cv_struct_alignment="largest"
+ elif test $is_two = 1 ; then
+ pac_cv_struct_alignment="two"
+ elif test $is_four = 1 ; then
+ pac_cv_struct_alignment="four"
+ elif test $is_eight = 1 ; then
+ pac_cv_struct_alignment="eight"
+ fi
+])
+dnl
+dnl PAC_C_MACRO_VA_ARGS
+dnl
+dnl will AC_DEFINE([HAVE_MACRO_VA_ARGS]) if the compiler supports C99 variable
+dnl length argument lists in macros (#define foo(...) bar(__VA_ARGS__))
+AC_DEFUN([PAC_C_MACRO_VA_ARGS],[
+ AC_MSG_CHECKING([for variable argument list macro functionality])
+ AC_LINK_IFELSE([AC_LANG_PROGRAM([
+ #include
+ #define conftest_va_arg_macro(...) printf(__VA_ARGS__)
+ ],
+ [conftest_va_arg_macro("a test %d", 3);])],
+ [AC_DEFINE([HAVE_MACRO_VA_ARGS],[1],[Define if C99-style variable argument list macro functionality])
+ AC_MSG_RESULT([yes])],
+ [AC_MSG_RESULT([no])])
+])dnl
diff --git a/ompi/mca/io/romio/romio/confdb/aclocal_cxx.m4 b/ompi/mca/io/romio/romio/confdb/aclocal_cxx.m4
new file mode 100644
index 0000000000..c43b64f0ab
--- /dev/null
+++ b/ompi/mca/io/romio/romio/confdb/aclocal_cxx.m4
@@ -0,0 +1,174 @@
+dnl PAC_PROG_CXX - reprioritize the C++ compiler search order
+AC_DEFUN([PAC_PROG_CXX],[
+ PAC_PUSH_FLAG([CXXFLAGS])
+ AC_PROG_CXX([g++ icpc pgCC xlC pathCC cl])
+ PAC_POP_FLAG([CXXFLAGS])
+])
+
+dnl This is from crypt.to/autoconf-archive, slightly modified.
+dnl It defines bool as int if it is not availalbe
+dnl
+AC_DEFUN([AC_CXX_BOOL],
+[AC_CACHE_CHECK(whether the compiler recognizes bool as a built-in type,
+ac_cv_cxx_bool,
+[AC_LANG_SAVE
+ AC_LANG_CPLUSPLUS
+ AC_TRY_COMPILE([
+int f(int x){return 1;}
+int f(char x){return 1;}
+int f(bool x){return 1;}
+],[bool b = true; return f(b);],
+ ac_cv_cxx_bool=yes, ac_cv_cxx_bool=no)
+ AC_LANG_RESTORE
+])
+if test "$ac_cv_cxx_bool" != yes; then
+ AC_DEFINE(bool,int,[define if bool is a built-in type])
+fi
+])
+
+dnl This is from crypt.to/autoconf-archive, slightly modified (name defined)
+dnl
+AC_DEFUN([AC_CXX_EXCEPTIONS],
+[AC_CACHE_CHECK(whether the compiler supports exceptions,
+ac_cv_cxx_exceptions,
+[AC_LANG_SAVE
+ AC_LANG_CPLUSPLUS
+ AC_TRY_COMPILE(,[try { throw 1; } catch (int i) { return i; }],
+ ac_cv_cxx_exceptions=yes, ac_cv_cxx_exceptions=no)
+ AC_LANG_RESTORE
+])
+if test "$ac_cv_cxx_exceptions" = yes; then
+ AC_DEFINE(HAVE_CXX_EXCEPTIONS,,[define if the compiler supports exceptions])
+fi
+])
+
+dnl This is from crypt.to/autoconf-archive
+dnl
+AC_DEFUN([AC_CXX_NAMESPACES],
+[AC_CACHE_CHECK(whether the compiler implements namespaces,
+ac_cv_cxx_namespaces,
+[AC_LANG_SAVE
+ AC_LANG_CPLUSPLUS
+ AC_TRY_COMPILE([namespace Outer { namespace Inner { int i = 0; }}],
+ [using namespace Outer::Inner; return i;],
+ ac_cv_cxx_namespaces=yes, ac_cv_cxx_namespaces=no)
+ AC_LANG_RESTORE
+])
+if test "$ac_cv_cxx_namespaces" = yes; then
+ AC_DEFINE(HAVE_NAMESPACES,,[define if the compiler implements namespaces])
+fi
+])
+
+dnl Some compilers support namespaces but don't know about std
+dnl
+AC_DEFUN([AC_CXX_NAMESPACE_STD],
+[AC_REQUIRE([AC_CXX_NAMESPACES])
+AC_CACHE_CHECK(whether the compiler implements the namespace std,
+ac_cv_cxx_namespace_std,
+[ac_cv_cxx_namespace_std=no
+if test "$ac_cv_cxx_namespaces" = yes ; then
+ AC_LANG_SAVE
+ AC_LANG_CPLUSPLUS
+ AC_TRY_COMPILE([
+#include
+using namespace std;],
+ [cout << "message\n";],
+ ac_cv_cxx_namespace_std=yes, ac_cv_cxx_namespace_std=no)
+ AC_LANG_RESTORE
+fi
+])
+if test "$ac_cv_cxx_namespace_std" = yes; then
+ AC_DEFINE(HAVE_NAMESPACE_STD,,[define if the compiler implements namespace std])
+fi
+])
+
+dnl/*D
+dnl PAC_CXX_CHECK_COMPILER_OPTION - Check that a C++ compiler option is
+dnl accepted without warning messages
+dnl
+dnl Synopsis:
+dnl PAC_CXX_CHECK_COMPILER_OPTION(optionname,action-if-ok,action-if-fail)
+dnl
+dnl Output Effects:
+dnl
+dnl If no actions are specified, a working value is added to 'CXXOPTIONS'
+dnl
+dnl Notes:
+dnl This is now careful to check that the output is different, since
+dnl some compilers are noisy.
+dnl
+dnl We are extra careful to prototype the functions in case compiler options
+dnl that complain about poor code are in effect.
+dnl
+dnl Because this is a long script, we have ensured that you can pass a
+dnl variable containing the option name as the first argument.
+dnl D*/
+AC_DEFUN([PAC_CXX_CHECK_COMPILER_OPTION],[
+AC_MSG_CHECKING([whether C++ compiler accepts option $1])
+save_CXXFLAGS="$CXXFLAGS"
+CXXFLAGS="$1 $CXXFLAGS"
+rm -f conftest.out
+echo 'int foo(void);int foo(void){return 0;}' > conftest2.cpp
+echo 'int main(void);int main(void){return 0;}' > conftest.cpp
+if ${CXX-g++} $save_CXXFLAGS $CPPFLAGS -o conftest conftest.cpp $LDFLAGS >conftest.bas 2>&1 ; then
+ if ${CXX-g++} $CXXFLAGS $CPPFLAGS -o conftest conftest.cpp $LDFLAGS >conftest.out 2>&1 ; then
+ if diff -b conftest.out conftest.bas >/dev/null 2>&1 ; then
+ AC_MSG_RESULT(yes)
+ AC_MSG_CHECKING([whether routines compiled with $1 can be linked with ones compiled without $1])
+ rm -f conftest.out
+ rm -f conftest.bas
+ if ${CXX-g++} -c $save_CXXFLAGS $CPPFLAGS conftest2.cpp >conftest2.out 2>&1 ; then
+ if ${CXX-g++} $CXXFLAGS $CPPFLAGS -o conftest conftest2.o conftest.cpp $LDFLAGS >conftest.bas 2>&1 ; then
+ if ${CXX-g++} $CXXFLAGS $CPPFLAGS -o conftest conftest2.o conftest.cpp $LDFLAGS >conftest.out 2>&1 ; then
+ if diff -b conftest.out conftest.bas >/dev/null 2>&1 ; then
+ AC_MSG_RESULT(yes)
+ CXXFLAGS="$save_CXXFLAGS"
+ ifelse($2,,CXXOPTIONS="$CXXOPTIONS $1",$2)
+ elif test -s conftest.out ; then
+ cat conftest.out >&AC_FD_CC
+ AC_MSG_RESULT(no)
+ CXXFLAGS="$save_CXXFLAGS"
+ $3
+ else
+ AC_MSG_RESULT(no)
+ CXXFLAGS="$save_CXXFLAGS"
+ $3
+ fi
+ else
+ if test -s conftest.out ; then
+ cat conftest.out >&AC_FD_CC
+ fi
+ AC_MSG_RESULT(no)
+ CXXFLAGS="$save_CXXFLAGS"
+ $3
+ fi
+ else
+ # Could not link with the option!
+ AC_MSG_RESULT(no)
+ fi
+ else
+ if test -s conftest2.out ; then
+ cat conftest2.out >&AC_FD_CC
+ fi
+ AC_MSG_RESULT(no)
+ CXXFLAGS="$save_CXXFLAGS"
+ $3
+ fi
+ else
+ cat conftest.out >&AC_FD_CC
+ AC_MSG_RESULT(no)
+ $3
+ CXXFLAGS="$save_CXXFLAGS"
+ fi
+ else
+ AC_MSG_RESULT(no)
+ $3
+ if test -s conftest.out ; then cat conftest.out >&AC_FD_CC ; fi
+ CXXFLAGS="$save_CXXFLAGS"
+ fi
+else
+ # Could not compile without the option!
+ AC_MSG_RESULT(no)
+fi
+rm -f conftest*
+])
diff --git a/ompi/mca/io/romio/romio/confdb/aclocal_f77.m4 b/ompi/mca/io/romio/romio/confdb/aclocal_f77.m4
new file mode 100644
index 0000000000..46bd25e2e0
--- /dev/null
+++ b/ompi/mca/io/romio/romio/confdb/aclocal_f77.m4
@@ -0,0 +1,1640 @@
+dnl PAC_PROG_F77 - reprioritize the F77 compiler search order
+AC_DEFUN([PAC_PROG_F77],[
+PAC_PUSH_FLAG([FFLAGS])
+AC_PROG_F77([ifort pgf77 af77 xlf frt cf77 fort77 fl32 fort ifc efc ftn gfortran f77 g77])
+PAC_POP_FLAG([FFLAGS])
+])
+
+dnl
+dnl/*D
+dnl PAC_PROG_F77_NAME_MANGLE - Determine how the Fortran compiler mangles
+dnl names
+dnl
+dnl Synopsis:
+dnl PAC_PROG_F77_NAME_MANGLE([action])
+dnl
+dnl Output Effect:
+dnl If no action is specified, one of the following names is defined:
+dnl.vb
+dnl If fortran names are mapped:
+dnl lower -> lower F77_NAME_LOWER
+dnl lower -> lower_ F77_NAME_LOWER_USCORE
+dnl lower -> UPPER F77_NAME_UPPER
+dnl lower_lower -> lower__ F77_NAME_LOWER_2USCORE
+dnl mixed -> mixed F77_NAME_MIXED
+dnl mixed -> mixed_ F77_NAME_MIXED_USCORE
+dnl mixed -> UPPER@STACK_SIZE F77_NAME_UPPER_STDCALL
+dnl.ve
+dnl If an action is specified, it is executed instead.
+dnl
+dnl Notes:
+dnl We assume that if lower -> lower (any underscore), upper -> upper with the
+dnl same underscore behavior. Previous versions did this by
+dnl compiling a Fortran program and running strings -a over it. Depending on
+dnl strings is a bad idea, so instead we try compiling and linking with a
+dnl C program, since that is why we are doing this anyway. A similar approach
+dnl is used by FFTW, though without some of the cases we check (specifically,
+dnl mixed name mangling). STD_CALL not only specifies a particular name
+dnl mangling convention (adding the size of the calling stack into the function
+dnl name, but also the stack management convention (callee cleans the stack,
+dnl and arguments are pushed onto the stack from right to left)
+dnl
+dnl One additional problem is that some Fortran implementations include
+dnl references to the runtime (like pgf90_compiled for the pgf90 compiler
+dnl used as the "Fortran 77" compiler). This is not yet solved.
+dnl
+dnl D*/
+dnl
+AC_DEFUN([PAC_PROG_F77_NAME_MANGLE],[
+AC_CACHE_CHECK([for Fortran 77 name mangling],
+pac_cv_prog_f77_name_mangle,
+[
+ # Check for strange behavior of Fortran. For example, some FreeBSD
+ # systems use f2c to implement f77, and the version of f2c that they
+ # use generates TWO (!!!) trailing underscores
+ # Currently, WDEF is not used but could be...
+ #
+ # Eventually, we want to be able to override the choices here and
+ # force a particular form. This is particularly useful in systems
+ # where a Fortran compiler option is used to force a particular
+ # external name format (rs6000 xlf, for example).
+ # This is needed for Mac OSX 10.5
+ rm -rf conftest.dSYM
+ rm -f conftest*
+ cat > conftest.f <&AC_FD_CC
+ cat conftest.f >&AC_FD_CC
+ fi
+
+ AC_LANG_SAVE
+ AC_LANG_C
+ save_LIBS="$LIBS"
+ dnl FLIBS comes from AC_F77_LIBRARY_LDFLAGS
+ LIBS="fconftestf.o $FLIBS $LIBS"
+ AC_TRY_LINK([extern void my_name(int);],my_name(0);,pac_cv_prog_f77_name_mangle="lower")
+ if test "X$pac_cv_prog_f77_name_mangle" = "X" ; then
+ AC_TRY_LINK([extern void my_name_(int);],my_name_(0);,pac_cv_prog_f77_name_mangle="lower underscore")
+ fi
+ if test "X$pac_cv_prog_f77_name_mangle" = "X" ; then
+ AC_TRY_LINK([void __stdcall MY_NAME(int);],MY_NAME(0);,pac_cv_prog_f77_name_mangle="upper stdcall")
+ fi
+ if test "X$pac_cv_prog_f77_name_mangle" = "X" ; then
+ AC_TRY_LINK([extern void MY_NAME(int);],MY_NAME(0);,pac_cv_prog_f77_name_mangle="upper")
+ fi
+ if test "X$pac_cv_prog_f77_name_mangle" = "X" ; then
+ AC_TRY_LINK([extern void my_name__(int);],my_name__(0);,
+ pac_cv_prog_f77_name_mangle="lower doubleunderscore")
+ fi
+ if test "X$pac_cv_prog_f77_name_mangle" = "X" ; then
+ AC_TRY_LINK([extern void MY_name(int);],MY_name(0);,pac_cv_prog_f77_name_mangle="mixed")
+ fi
+ if test "X$pac_cv_prog_f77_name_mangle" = "X" ; then
+ AC_TRY_LINK([extern void MY_name_(int);],MY_name_(0);,pac_cv_prog_f77_name_mangle="mixed underscore")
+ fi
+ LIBS="$save_LIBS"
+ AC_LANG_RESTORE
+ # If we got to this point, it may be that the programs have to be
+ # linked with the Fortran, not the C, compiler. Try reversing
+ # the language used for the test
+ dnl Note that the definition of AC_TRY_LINK and AC_LANG_PROGRAM
+ dnl is broken in autoconf and will generate spurious warning messages
+ dnl To fix this, we use
+ dnl AC _LINK_IFELSE([AC _LANG_PROGRAM(,[[body]])],action-if-true)
+ dnl instead of AC _TRY_LINK(,body,action-if-true)
+ if test "X$pac_cv_prog_f77_name_mangle" = "X" ; then
+ AC_LANG_SAVE
+ AC_LANG_FORTRAN77
+ save_LIBS="$LIBS"
+ LIBS="conftestc.o $LIBS"
+ if test "X$ac_ccompile" = "X" ; then
+ ac_ccompile='${CC-cc} -c $CFLAGS conftest.c 1>&AC_FD_CC'
+ fi
+ # This is needed for Mac OSX 10.5
+ rm -rf conftest.dSYM
+ rm -f conftest*
+ cat > conftest.c <&AC_FD_CC
+ cat conftest.c >&AC_FD_CC
+ fi
+
+ AC_LINK_IFELSE([AC_LANG_PROGRAM(,[[ call my_name(0)]])],
+ pac_cv_prog_f77_name_mangle="lower")
+
+ if test "X$pac_cv_prog_f77_name_mangle" = "X" ; then
+ # This is needed for Mac OSX 10.5
+ rm -rf conftest.dSYM
+ rm -f conftest*
+ cat > conftest.c <